def test_init_with_X_or_topo(): # tests that constructing with topo_view works # tests that construction with design matrix works # tests that conversion from topo_view to design matrix and back works # tests that conversion the other way works too rng = np.random.RandomState([1, 2, 3]) topo_view = rng.randn(5, 2, 2, 3) d1 = DenseDesignMatrix(topo_view=topo_view) X = d1.get_design_matrix() d2 = DenseDesignMatrix(X=X, view_converter=d1.view_converter) topo_view_2 = d2.get_topological_view() assert np.allclose(topo_view, topo_view_2) X = rng.randn(*X.shape) topo_view_3 = d2.get_topological_view(X) X2 = d2.get_design_matrix(topo_view_3) assert np.allclose(X, X2)
def test_extract_reassemble(): """ Tests that ExtractGridPatches and ReassembleGridPatches are inverse of each other """ rng = np.random.RandomState([1, 3, 7]) topo = rng.randn(4, 3 * 5, 3 * 7, 2) dataset = DenseDesignMatrix(topo_view=topo) patch_shape = (3, 7) extractor = ExtractGridPatches(patch_shape, patch_shape) reassemblor = ReassembleGridPatches(patch_shape=patch_shape, orig_shape=topo.shape[1:3]) dataset.apply_preprocessor(extractor) dataset.apply_preprocessor(reassemblor) new_topo = dataset.get_topological_view() assert new_topo.shape == topo.shape if not np.all(new_topo == topo): assert False
def get_feats_from_cnn(rows, model=None): """ fprop rows using best trained model and returns activations of the penultimate layer """ conf = utils.get_config() patch_size = conf['patch_size'] region_size = conf['region_size'] batch_size = None preds = utils.get_predictor(model=model, return_all=True) y = np.zeros(len(rows)) samples = np.zeros( (len(rows), region_size, region_size, 1), dtype=np.float32) for i, row in enumerate(rows): print 'processing %i-th image: %s' % (i, row['image_filename']) try: samples[i] = utils.get_samples_from_image(row, False)[0] except ValueError as e: print '{1} Value error: {0}'.format(str(e), row['image_filename']) y[i] = utils.is_positive(row) ds = DenseDesignMatrix(topo_view=samples) pipeline = utils.get_pipeline( ds.X_topo_space.shape, patch_size, batch_size) pipeline.apply(ds) return preds[-2](ds.get_topological_view()), y
def get_feats_from_cnn(rows, model=None): """ fprop rows using best trained model and returns activations of the penultimate layer """ conf = utils.get_config() patch_size = conf['patch_size'] region_size = conf['region_size'] batch_size = None preds = utils.get_predictor(model=model, return_all=True) y = np.zeros(len(rows)) samples = np.zeros((len(rows), region_size, region_size, 1), dtype=np.float32) for i, row in enumerate(rows): print 'processing %i-th image: %s' % (i, row['image_filename']) try: samples[i] = utils.get_samples_from_image(row, False)[0] except ValueError as e: print '{1} Value error: {0}'.format(str(e), row['image_filename']) y[i] = utils.is_positive(row) ds = DenseDesignMatrix(topo_view=samples) pipeline = utils.get_pipeline(ds.X_topo_space.shape, patch_size, batch_size) pipeline.apply(ds) return preds[-2](ds.get_topological_view()), y
print 'running theano function' feat = f(X2) feat_dataset = DenseDesignMatrix(X=feat, view_converter=DefaultViewConverter( [1, 1, feat.shape[1]])) print 'reassembling features' ns = 32 - size + 1 depatchifier = ReassembleGridPatches(orig_shape=(ns, ns), patch_shape=(1, 1)) feat_dataset.apply_preprocessor(depatchifier) print 'making topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == X.shape[0] print 'assembling visualizer' n = np.ceil(np.sqrt(model.nhid)) pv3 = PatchViewer(grid_shape=(X.shape[0], num_filters), patch_shape=(ns, ns), is_color=False) pv4 = PatchViewer(grid_shape=(n, n), patch_shape=(size, size), is_color=True, pad=(7, 7)) pv5 = PatchViewer(grid_shape=(1, num_filters), patch_shape=(size, size),
print 'compiling theano function' f = function([V],feat) print 'running theano function' feat = f(X2) feat_dataset = DenseDesignMatrix(X = feat, view_converter = DefaultViewConverter([1, 1, feat.shape[1]] ) ) print 'reassembling features' ns = 32 - size + 1 depatchifier = ReassembleGridPatches( orig_shape = (ns, ns), patch_shape=(1,1) ) feat_dataset.apply_preprocessor(depatchifier) print 'making topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == X.shape[0] print 'assembling visualizer' n = np.ceil(np.sqrt(model.nhid)) pv3 = PatchViewer(grid_shape = (X.shape[0], num_filters), patch_shape=(ns,ns), is_color= False) pv4 = PatchViewer(grid_shape = (n,n), patch_shape = (size,size), is_color = True, pad = (7,7)) pv5 = PatchViewer(grid_shape = (1,num_filters), patch_shape = (size,size), is_color = True, pad = (7,7)) idx = sorted(range(model.nhid), key = lambda l : -topo_feat[:,:,:,l].std() ) W = model.W.get_value() weights_view = dataset.get_weights_view( W.T )
def make_majority_vote(): model_paths = ['convnet_' + str(i + 1) + '.pkl' for i in range(10)] out_path = 'submission.csv' models = [] for model_path in model_paths: print('Loading ' + model_path + '...') try: with open(model_path, 'rb') as f: models.append(pkl.load(f)) except Exception as e: try: with gzip.open(model_path, 'rb') as f: models.append(pkl.load(f)) except Exception as e: usage() print( model_path + "doesn't seem to be a valid model path, I got this error when trying to load it: " ) print(e) # load the test set with open('test_data_for_pylearn2.pkl', 'rb') as f: dataset = pkl.load(f) dataset = DenseDesignMatrix(X=dataset, view_converter=DefaultViewConverter( shape=[32, 32, 1], axes=['b', 0, 1, 'c'])) preprocessor = GlobalContrastNormalization(subtract_mean=True, sqrt_bias=0.0, use_std=True) preprocessor.apply(dataset) predictions = [] print('Model description:') print('') print(models[1]) print('') for model in models: model.set_batch_size(dataset.X.shape[0]) X = model.get_input_space().make_batch_theano() Y = model.fprop(X) # forward prop the test data y = T.argmax(Y, axis=1) f = function([X], y) x_arg = dataset.get_topological_view() y = f(x_arg.astype(X.dtype)) assert y.ndim == 1 assert y.shape[0] == dataset.X.shape[0] # add one to the results! y += 1 predictions.append(y) predictions = np.array(predictions, dtype='int32') y = mode(predictions.T, axis=1)[0] y = np.array(y, dtype='int32') import itertools y = list(itertools.chain(*y)) assert len(y) == dataset.X.shape[0] util.write_results(y, out_path) print('Wrote predictions to submission.csv.') return np.reshape(y, (1, -1))
from pylearn2.utils import serial stl10 = serial.load('/data/lisa/data/stl10/stl10_32x32/train.pkl') batch = stl10.X[24:25,:] img = stl10.view_converter.design_mat_to_topo_view(batch)[0,...] / 127.5 from pylearn2.gui.patch_viewer import PatchViewer pv = PatchViewer((27,27),(6,6),pad=(1,1),is_color=True) pipeline = serial.load('/data/lisa/data/stl10/stl10_patches/preprocessor.pkl') del pipeline.items[0] from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix, DefaultViewConverter for row in xrange(27): for col in xrange(27): patch = img[row:row+6,col:col+6] d = DenseDesignMatrix( topo_view = patch.reshape(1,6,6,3), view_converter = DefaultViewConverter((6,6,3)) ) d.apply_preprocessor(pipeline) pv.add_patch(d.get_topological_view()[0,...], rescale = True) pv.show()
def make_majority_vote(): model_paths = ['convnet_' + str(i+1) + '.pkl' for i in range(10)] out_path = 'submission.csv' models = [] for model_path in model_paths: print('Loading ' + model_path + '...') try: with open(model_path, 'rb') as f: models.append(pkl.load(f)) except Exception as e: try: with gzip.open(model_path, 'rb') as f: models.append(pkl.load(f)) except Exception as e: usage() print(model_path + "doesn't seem to be a valid model path, I got this error when trying to load it: ") print(e) # load the test set with open('test_data_for_pylearn2.pkl', 'rb') as f: dataset = pkl.load(f) dataset = DenseDesignMatrix(X=dataset, view_converter=DefaultViewConverter(shape=[32, 32, 1], axes=['b', 0, 1, 'c'])) preprocessor = GlobalContrastNormalization(subtract_mean=True, sqrt_bias=0.0, use_std=True) preprocessor.apply(dataset) predictions = [] print('Model description:') print('') print(models[1]) print('') for model in models: model.set_batch_size(dataset.X.shape[0]) X = model.get_input_space().make_batch_theano() Y = model.fprop(X) # forward prop the test data y = T.argmax(Y, axis=1) f = function([X], y) x_arg = dataset.get_topological_view() y = f(x_arg.astype(X.dtype)) assert y.ndim == 1 assert y.shape[0] == dataset.X.shape[0] # add one to the results! y += 1 predictions.append(y) predictions = np.array(predictions, dtype='int32') y = mode(predictions.T, axis=1)[0] y = np.array(y, dtype='int32') import itertools y = list(itertools.chain(*y)) assert len(y) == dataset.X.shape[0] util.write_results(y, out_path) print('Wrote predictions to submission.csv.') return np.reshape(y, (1, -1))
def sar_predict(ann, image, outfile): '''Predict on a image use a trianed model ann''' #define prediction function batch_size = ann.batch_size X = ann.get_input_space().make_batch_theano() y = ann.fprop(X) f = theano.function([X], y) #moving window window = ann.get_input_space().shape[0] rl = int(math.floor(window / 2)) rr = int(window - rl) #extract patch stride = 1 jcol = range(rl, image.shape[2] - rr, stride) irow = range(rl, image.shape[1] - rr, stride) nrow = len(irow) ncol = len(jcol) batch_size = ann.batch_size yaml_src = ann.dataset_yaml_src.split() mean_std_file = yaml_src[yaml_src.index('mean_std_file:') + 1] # mean_std_file = re.search('\w+...\w+', mean_std_file).group(0) mean_std_file = mean_std_file[1:-3] print "mean_std_file:" + mean_std_file preprocessor = Standardize(mean_std_file=mean_std_file) # ann.set_batch_size(batch_size) m = len(jcol) * len(irow) extra = batch_size - m % batch_size im_pred_tmp = np.zeros(shape=(m, ann.get_output_space().dim)) xshape = ann.get_input_space().shape subimgs = np.zeros(shape=(batch_size, ann.get_input_space().num_channels, xshape[0], xshape[1]), dtype=ann.get_input_space().dtype) for i in range(0, m - m % batch_size, batch_size): for j in range(i, i + batch_size): pos = [ math.floor(j / ncol) * stride + irow[0], (j % ncol) * stride + jcol[0] ] subimgs[j - i, :] = image[:, pos[0] - rl:pos[0] + rr, pos[1] - rl:pos[1] + rr] #the default input_space for pylearn convnet is (b,0,1,c) #the axis of subimgs should be permuted batch = DenseDesignMatrix(topo_view=np.transpose( subimgs, (0, 2, 3, 1)), axes=('b', 0, 1, 'c')) preprocessor.apply(batch) im_pred_tmp[i:i + batch_size, :] = f(batch.get_topological_view()) im_pred_tmp = np.transpose(im_pred_tmp) im_pred_tmp = im_pred_tmp.reshape(im_pred_tmp.shape[0], nrow, ncol) #output tif def WriteArrayToTiff(array, outfile): driver = gdal.GetDriverByName('GTiff') ds = driver.Create(outfile, array.shape[2], array.shape[1], array.shape[0], GDT_Float32) nchannels = im_pred_tmp.shape[0] for band, i in zip(array, range(nchannels)): ds.GetRasterBand(i + 1).WriteArray(array[i, :, :]) ds.FlushCache() ds = None im_pred = np.zeros(shape=(im_pred_tmp.shape[0], image.shape[1], image.shape[2])) im_pred[:, rl:image.shape[1] - rr, rl:image.shape[2] - rr] = im_pred_tmp # np.save(outfile+".pyn",im_pred) WriteArrayToTiff(im_pred, outfile) print 'prediction max is {}'.format(im_pred.max())
print(len(models)) for model in models: print(model) model.set_batch_size(dataset.X.shape[0]) X = model.get_input_space().make_batch_theano() Y = model.fprop(X) # forward prop the test data y = T.argmax(Y, axis=1) f = function([X], y) x_arg = dataset.get_topological_view() y = f(x_arg.astype(X.dtype)) assert y.ndim == 1 assert y.shape[0] == dataset.X.shape[0] # add one to the results! y += 1 predictions.append(y) print(y) predictions = np.array(predictions, dtype='int32') y = mode(predictions.T, axis=1)[0] y = np.array(y, dtype='int32')