pipeline.items.append(preprocessing.ExtractPatches(patch_shape=(14,14), num_patches=5*1000*1000)) #### Build full-sized image dataset. #### print "Preparing output directory for unlabeled patches..." outdir = data_dir + '/tfd_lcn_v1' serial.mkdir(outdir) README = open('README','w') README.write(""" File generated from hossrbm/scripts/tfd/make_tfd_lcn.py. """) README.close() print 'Loading TFD unlabeled dataset...' print "Preprocessing the data..." data = TFD('unlabeled') data.apply_preprocessor(preprocessor = pipeline, can_fit = True) data.use_design_loc(outdir + '/unlabeled_patches.npy') serial.save(outdir + '/unlabeled_patches.pkl',data) #### For supervised dataset, we work on the full-image dataset #### pipeline.items.pop() #### Build supervised-training datasets #### print "Preparing output directory for supervised data..." for fold_i in xrange(0,5): path = '%s/fold%i' % (outdir, fold_i) serial.mkdir(path) train_data = TFD('train', fold=fold_i, center=False, shuffle=True, seed=37192) train_data.apply_preprocessor(preprocessor = pipeline, can_fit = False)