def load_MNIST(): """ Loads MNIST from the datafile under ./mnist.pkl.gz. Returns ------- X, Z : matrix Feature and Target matrices of the training set, one-hot encoded. VX, VZ : matrix Feature and Target matrices of the validation set, one-hot encoded. TX, TZ : matrix Feature and Target matrices of the test set, one-hot encoded. image_dims : tuple Dimensions of the image """ datafile = 'mnist.pkl.gz' # Load data. with gzip.open(datafile, 'rb') as f: train_set, val_set, test_set = cPickle.load(f) X, Z = train_set VX, VZ = val_set TX, TZ = test_set Z = one_hot(Z, 10) VZ = one_hot(VZ, 10) TZ = one_hot(TZ, 10) image_dims = 28, 28 return X, Z, VX, VZ, TX, TZ, image_dims
def load_MNIST(): """ Loads MNIST from the datafile under ./mnist.pkl.gz. Returns ------- X, Z : matrix Feature and Target matrices of the training set, one-hot encoded. VX, VZ : matrix Feature and Target matrices of the validation set, one-hot encoded. TX, TZ : matrix Feature and Target matrices of the test set, one-hot encoded. image_dims : tuple Dimensions of the image """ datafile = 'mnist.pkl.gz' # Load data. with gzip.open(datafile,'rb') as f: train_set, val_set, test_set = cPickle.load(f) X, Z = train_set VX, VZ = val_set TX, TZ = test_set Z = one_hot(Z, 10) VZ = one_hot(VZ, 10) TZ = one_hot(TZ, 10) image_dims = 28, 28 return X, Z, VX, VZ, TX, TZ, image_dims
def test_one_hot(): arr = np.array([0, 1, 2, 1, 3]) desired = np.zeros((5, 4)) for i, j in enumerate(arr): desired[i, j] = 1 assert np.allclose(desired, one_hot(arr)) assert np.allclose(desired, one_hot(arr, 4))
def test_one_hot(): arr = np.array([0, 1, 2, 1, 3]) desired = np.zeros((5, 4)) for i, j in enumerate(arr): desired[i, j] = 1 assert np.allclose(desired, one_hot(arr)) assert np.allclose(desired, one_hot(arr, 4))
def load_data(pars): data = h5.File("/nthome/maugust/thesis/train_val_test_crafted_real_int_wo_scaling.hdf5", "r") X = data["trainig_set/train_set"] Z = data["trainig_labels/real_train_labels"] VX = data["validation_set/val_set"] VZ = data["validation_labels/real_val_labels"] Z = one_hot(Z, 13) VZ = one_hot(VZ, 13) return (X, Z), (VX, VZ)
def load_data(pars): data = h5.File('/nthome/maugust/thesis/train_val_test_crafted_real_int.hdf5','r') X = data['trainig_set/train_set'] Z = data['trainig_labels/real_train_labels'] VX = data['validation_set/val_set'] VZ = data['validation_labels/real_val_labels'] Z = one_hot(Z,13) VZ = one_hot(VZ,13) return (X, Z), (VX, VZ)
def load_data(pars): data = h5.File('/nthome/maugust/thesis/usarray_data_scaled_train_val_bin.hdf5','r') X = data['trainig_set/train_set'][...][:330000] Z = data['trainig_labels/bin_train_labels'][...][:330000] VX = data['validation_set/val_set'][...][:140000] VZ = data['validation_labels/bin_val_labels'][...][:140000] Z = one_hot(Z,2) VZ = one_hot(VZ,2) return X, Z, VX, VZ
def prepare_data(self): self.data[1] = one_hot(self.data[1], np.max(self.data[1])+1) self.data[3] = one_hot(self.data[3], np.max(self.data[3])+1) num_batches = len(self.data[0])/self.batch_size self.data[0] = self.data[0][:num_batches*self.batch_size] self.data[0] = np.array(self.data[0], dtype=np.float32) self.data[1] = self.data[1][:num_batches*self.batch_size] self.data[1] = np.array(self.data[1], dtype=np.float32) num_batches = len(self.data[2])/self.batch_size self.data[2] = self.data[2][:num_batches*self.batch_size] self.data[2] = np.array(self.data[2], dtype=np.float32) self.data[3] = self.data[3][:num_batches*self.batch_size] self.data[3] = np.array(self.data[3], dtype=np.float32)
def prepare_data(self): self.data[1] = one_hot(self.data[1], np.max(self.data[1]) + 1) self.data[3] = one_hot(self.data[3], np.max(self.data[3]) + 1) num_batches = len(self.data[0]) / self.batch_size self.data[0] = self.data[0][:num_batches * self.batch_size] self.data[0] = np.array(self.data[0], dtype=np.float32) self.data[1] = self.data[1][:num_batches * self.batch_size] self.data[1] = np.array(self.data[1], dtype=np.float32) num_batches = len(self.data[2]) / self.batch_size self.data[2] = self.data[2][:num_batches * self.batch_size] self.data[2] = np.array(self.data[2], dtype=np.float32) self.data[3] = self.data[3][:num_batches * self.batch_size] self.data[3] = np.array(self.data[3], dtype=np.float32)
def full_rotation(x, z): """ Assuming a batch size of 1. More specifically: x is (1, depth, channels, height, width) and z is (1, height*width*depth, classes) """ from scipy.ndimage.interpolation import rotate as rotate_scipy from breze.learn.data import one_hot z_original_shape = z.shape n_classes = z.shape[-1] ang = float(np.random.uniform(0, 360)) axes = np.random.permutation(3)[:2] nx = np.transpose(x, (0, 2, 3, 4, 1)) nz = np.reshape(z, (1, x.shape[3], x.shape[4], x.shape[1], n_classes)) nz = np.transpose(nz, (0, 4, 1, 2, 3)) nx[0] = [rotate_scipy(modality, ang, axes=axes, order=3, reshape=False) for modality in nx[0]] nx = np.transpose(nx, (0, 4, 1, 2, 3)) nz[0] = [rotate_scipy(class_map, ang, axes=axes, order=3, reshape=False) for class_map in nz[0]] nz = nz[0].argmax(axis=0) nz = np.reshape(nz, (-1,)) nz = np.reshape(one_hot(nz, n_classes), z_original_shape) nx = np.asarray(nx, dtype=x.dtype) nz = np.asarray(nz, dtype=z.dtype) return (nx, nz)
def swirl_transform(x, z): """ Adds a swirl effect to every depth slice. Assuming a batch size of 1. More specifically: x is (1, depth, channels, height, width) and z is (1, height*width*depth, classes) """ from breze.learn.data import one_hot strength = np.random.uniform(1, 2) radius = np.random.randint(90, 140) z_original_shape = z.shape n_classes = z.shape[-1] nx = np.transpose(x, (0, 2, 1, 3, 4)) nz = np.reshape(z, (1, x.shape[3], x.shape[4], x.shape[1], n_classes)) nz = np.transpose(nz, (0, 4, 3, 1, 2)) nx[0] = [swirl_(modality, strength, radius) for modality in nx[0]] nx = np.transpose(nx, (0, 2, 1, 3, 4)) nz[0] = [swirl_(class_map, strength, radius) for class_map in nz[0]] nz = nz[0].argmax(axis=0) nz = np.transpose(nz, (1, 2, 0)) nz = np.reshape(nz, (-1, )) nz = np.reshape(one_hot(nz, n_classes), z_original_shape) nx = np.asarray(nx, dtype=x.dtype) nz = np.asarray(nz, dtype=z.dtype) return (nx, nz)
def swirl_transform(x, z): """ Adds a swirl effect to every depth slice. Assuming a batch size of 1. More specifically: x is (1, depth, channels, height, width) and z is (1, height*width*depth, classes) """ from breze.learn.data import one_hot strength = np.random.uniform(1, 2) radius = np.random.randint(90, 140) z_original_shape = z.shape n_classes = z.shape[-1] nx = np.transpose(x, (0, 2, 1, 3, 4)) nz = np.reshape(z, (1, x.shape[3], x.shape[4], x.shape[1], n_classes)) nz = np.transpose(nz, (0, 4, 3, 1, 2)) nx[0] = [swirl_(modality, strength, radius) for modality in nx[0]] nx = np.transpose(nx, (0, 2, 1, 3, 4)) nz[0] = [swirl_(class_map, strength, radius) for class_map in nz[0]] nz = nz[0].argmax(axis=0) nz = np.transpose(nz, (1, 2, 0)) nz = np.reshape(nz, (-1,)) nz = np.reshape(one_hot(nz, n_classes), z_original_shape) nx = np.asarray(nx, dtype=x.dtype) nz = np.asarray(nz, dtype=z.dtype) return (nx, nz)
def discrete(seg, n_classes): original_shape = seg.shape discrete_seg = seg.argmax(axis=3) discrete_seg = np.reshape(discrete_seg, (-1, )) discrete_seg = np.reshape(one_hot(discrete_seg, n_classes), original_shape) return discrete_seg
def percentile_filter(x, z): from scipy.ndimage import percentile_filter from breze.learn.data import one_hot percentile = np.random.randint(0, 10) nx = np.transpose(x, (0, 2, 1, 3, 4)) nx[0] = [ percentile_filter(modality, percentile, (2, 2, 2)) for modality in nx[0] ] nx = np.transpose(nx, (0, 2, 1, 3, 4)) n_classes = z.shape[-1] nz = np.reshape(z, (x.shape[3], x.shape[4], x.shape[1], n_classes)) nz = np.transpose(nz, (3, 0, 1, 2)) nz = np.array([ percentile_filter(class_map, percentile, (2, 2, 2)) for class_map in nz ]) nz = nz.argmax(axis=0) nz = np.reshape(nz, (-1, )) nz = np.reshape(one_hot(nz, n_classes), z.shape) nx = np.asarray(nx, dtype=x.dtype) nz = np.asarray(nz, dtype=z.dtype) return (nx, nz)
def visualize_tsne(args): data_dir = os.path.abspath(args['<data>']) data = h5.File(data_dir,'r') TX = data['test_set/test_set'][:5000] TZ = data['test_labels/real_test_labels'][:5000] TZ = one_hot(TZ,13) n_input = TX.shape[1] print 'data loaded.' tsne = Tsne(n_input, 2, perplexity=5) print 'TSNE initialized.' TX_r = tsne.fit_transform(TX) print 'data TSNEd.' fig = plt.figure(figsize=(16, 16)) ax = fig.add_subplot(111) TZ_am = TZ.argmax(axis=1) ax.scatter(TX_r[TZ_am==0, 0], TX_r[TZ_am==0, 1], c='g', lw=0, alpha=1, s=100, marker='o') ax.scatter(TX_r[TZ_am==1, 0], TX_r[TZ_am==1, 1], c='b', lw=0, alpha=1, s=100, marker='v') ax.scatter(TX_r[TZ_am==2, 0], TX_r[TZ_am==2, 1], c='yellow', lw=0, alpha=1, s=100, marker='^') ax.scatter(TX_r[TZ_am==3, 0], TX_r[TZ_am==3, 1], c='r', lw=0, alpha=1, s=100, marker='<') ax.scatter(TX_r[TZ_am==4, 0], TX_r[TZ_am==4, 1], c='g', lw=0, alpha=1, s=100, marker='>') ax.scatter(TX_r[TZ_am==5, 0], TX_r[TZ_am==5, 1], c='m', lw=0, alpha=1, s=100, marker='8') ax.scatter(TX_r[TZ_am==6, 0], TX_r[TZ_am==6, 1], c='crimson', lw=0, alpha=1, s=100, marker='s') ax.scatter(TX_r[TZ_am==7, 0], TX_r[TZ_am==7, 1], c='lawngreen', lw=0, alpha=1, s=100, marker='p') ax.scatter(TX_r[TZ_am==8, 0], TX_r[TZ_am==8, 1], c='gold', lw=0, alpha=1, s=100, marker='*') ax.scatter(TX_r[TZ_am==9, 0], TX_r[TZ_am==9, 1], c='darkorange', lw=0, alpha=1, s=100, marker='h') ax.scatter(TX_r[TZ_am==10, 0], TX_r[TZ_am==10, 1], c='k', lw=0, alpha=1, s=100, marker='H') ax.scatter(TX_r[TZ_am==11, 0], TX_r[TZ_am==11, 1], c='magenta', lw=0, alpha=1, s=100, marker='d') ax.scatter(TX_r[TZ_am==12, 0], TX_r[TZ_am==12, 1], c='turquoise', lw=0, alpha=1, s=100, marker='D') plt.legend() plt.savefig(os.path.join('/nthome/maugust/thesis',args['<output>']))
def discrete(seg, n_classes): original_shape = seg.shape discrete_seg = seg.argmax(axis=3) discrete_seg = np.reshape(discrete_seg, (-1,)) discrete_seg = np.reshape(one_hot(discrete_seg, n_classes), original_shape) return discrete_seg
def full_rotation(x, z): """ Assuming a batch size of 1. More specifically: x is (1, depth, channels, height, width) and z is (1, height*width*depth, classes) """ from scipy.ndimage.interpolation import rotate as rotate_scipy from breze.learn.data import one_hot z_original_shape = z.shape n_classes = z.shape[-1] ang = float(np.random.uniform(0, 360)) axes = np.random.permutation(3)[:2] nx = np.transpose(x, (0, 2, 3, 4, 1)) nz = np.reshape(z, (1, x.shape[3], x.shape[4], x.shape[1], n_classes)) nz = np.transpose(nz, (0, 4, 1, 2, 3)) nx[0] = [ rotate_scipy(modality, ang, axes=axes, order=3, reshape=False) for modality in nx[0] ] nx = np.transpose(nx, (0, 4, 1, 2, 3)) nz[0] = [ rotate_scipy(class_map, ang, axes=axes, order=3, reshape=False) for class_map in nz[0] ] nz = nz[0].argmax(axis=0) nz = np.reshape(nz, (-1, )) nz = np.reshape(one_hot(nz, n_classes), z_original_shape) nx = np.asarray(nx, dtype=x.dtype) nz = np.asarray(nz, dtype=z.dtype) return (nx, nz)
def make_report(pars, trainer, data): data = h5.File('/nthome/maugust/thesis/train_val_test_crafted_real_int.hdf5','r') TX = data['test_set/test_set'] TZ = data['test_labels/real_test_labels'] TZ = one_hot(TZ,13) current_pars = trainer.model.parameters.data trainer.model.parameters.data[...] = trainer.best_pars n_wrong = 1 - T.eq(T.argmax(trainer.model.exprs['output'], axis=1), T.argmax(trainer.model.exprs['target'], axis=1)).mean() f_n_wrong = trainer.model.function(['inpt', 'target'], n_wrong) f_pos = T.mean(T.neq(T.argmax(trainer.model.exprs['output'], axis=1),0) * T.eq(T.argmax(trainer.model.exprs['target'], axis=1), 0)) f_f_pos = trainer.model.function(['inpt', 'target'], f_pos) f_neg = T.mean(T.eq(T.argmax(trainer.model.exprs['output'], axis=1),0) * T.neq(T.argmax(trainer.model.exprs['target'], axis=1), 0)) f_f_neg = trainer.model.function(['inpt', 'target'], f_neg) emp_loss = f_n_wrong(TX,TZ) f_p = f_f_pos(TX,TZ) f_n = f_f_neg(TX,TZ) P_pos = np.argmax(trainer.model.predict(TX),axis=1) Z_pos = np.argmax(TZ, axis=1) neighbour_fails = .0 relevant_fails = 0 for i in np.arange(len(P_pos)): if P_pos[i] > 0 and Z_pos[i] > 0 and P_pos[i] != Z_pos[i]: relevant_fails += 1 if is_neighbour(P_pos[i],Z_pos[i]): neighbour_fails += 1 if relevant_fails > 0: neighbour_fails /= relevant_fails emp_loss_s = 'model achieved %f%% classification error on the test set' %emp_loss f_p_s = '\nmodel achieved %f%% false positives on the test set' %f_p f_n_s = '\nmodel achieved %f%% false negatives on the test set' %f_n neigh_s = '\nmodel achieved %f%% neighbour misspredictions on the test set' %neighbour_fails print emp_loss_s print f_p_s print f_n_s print neigh_s with open(os.path.join('.','eval_result.txt'),'w') as f: f.write(emp_loss_s) f.write(f_p_s) f.write(f_n_s) f.write(neigh_s) trainer.model.parameters.data[...] = current_pars return {'train_loss': trainer.score(*trainer.eval_data['train']), 'val_loss': trainer.score(*trainer.eval_data['val']), 'best_emp_test_loss': emp_loss}
def groundtruth_(gt): """Takes a discrete label volume with zero-indexed labels and applies one_hot encoding.""" n_classes = gt.max() + 1 shape = gt.shape l = np.reshape(gt, (-1,)) l = np.reshape(one_hot(l, n_classes), (-1, n_classes)) gt_onehot = np.reshape(l, shape + (n_classes,)) return gt_onehot
def process_gt(gt, n_classes, downsize=False): if downsize: gt = zoom(gt, 0.5, order=0) gt = np.asarray(gt, dtype='int8') gt = np.transpose(gt, (1, 2, 0)) l = np.reshape(gt, (-1,)) l = np.reshape(one_hot(l, n_classes), (-1, n_classes)) return l
def groundtruth_(gt): """Takes a discrete label volume with zero-indexed labels and applies one_hot encoding.""" n_classes = gt.max() + 1 shape = gt.shape l = np.reshape(gt, (-1, )) l = np.reshape(one_hot(l, n_classes), (-1, n_classes)) gt_onehot = np.reshape(l, shape + (n_classes, )) return gt_onehot
def process_gt(gt, n_classes, downsize=False): if downsize: gt = zoom(gt, 0.5, order=0) gt = np.asarray(gt, dtype='int8') gt = np.transpose(gt, (1, 2, 0)) l = np.reshape(gt, (-1, )) l = np.reshape(one_hot(l, n_classes), (-1, n_classes)) return l
def get_gt(gt, n_classes, downsize=False): if not downsize: return gt original_shape = gt.shape gt_onehot = np.reshape(gt, (-1,)) gt_onehot = np.reshape(one_hot(gt_onehot, n_classes), original_shape + (n_classes,)) gt_onehot = np.transpose(gt_onehot, (3, 0, 1, 2)) zoom_gt = np.array([zoom(class_map, 0.5, order=1) for class_map in gt_onehot]) zoom_gt = zoom_gt.argmax(axis=0) zoom_gt = np.asarray(zoom_gt, dtype='int8') return zoom_gt
def visualize_tsne(args): model_dir = os.path.abspath(args['<model>']) data_dir = os.path.abspath(args['<data>']) os.chdir(model_dir) cps = contrib.find_checkpoints('.') if cps: with gzip.open(cps[-1], 'rb') as fp: trainer = cPickle.load(fp) trainer.model.parameters.data[...] = trainer.best_pars data = h5.File(data_dir,'r') TX = data['test_set/test_set'][:5000] TZ = data['test_labels/real_test_labels'][:5000] TZ = one_hot(TZ,13) print 'data loaded.' if args['<mode>'] == 'cnn': f_transformed = trainer.model.function(['inpt'],'mlp-layer-2-inpt') print 'transform-function generated.' data = minibatches(TX, trainer.model.batch_size, 0) trans_TX = np.concatenate([f_transformed(element) for element in data], axis=0) else: f_transformed = trainer.model.function(['inpt'],'layer-2-inpt') print 'transform-function generated.' trans_TX = f_transformed(TX) trans_TX = np.array(trans_TX, dtype=np.float32) print 'data transformed' trans_n_input = trans_TX.shape[1] trans_tsne = Tsne(trans_n_input, 2, perplexity=5) print 'TSNE initialized.' trans_TX_r = trans_tsne.fit_transform(trans_TX) print 'data TSNEd' fig = plt.figure(figsize=(16, 16)) ax = fig.add_subplot(111) TZ_am = TZ.argmax(axis=1) ax.scatter(trans_TX_r[TZ_am==0, 0], trans_TX_r[TZ_am==0, 1], c='g', lw=0, alpha=1, s=100, marker='o') ax.scatter(trans_TX_r[TZ_am==1, 0], trans_TX_r[TZ_am==1, 1], c='b', lw=0, alpha=1, s=100, marker='v') ax.scatter(trans_TX_r[TZ_am==2, 0], trans_TX_r[TZ_am==2, 1], c='yellow', lw=0, alpha=1, s=100, marker='^') ax.scatter(trans_TX_r[TZ_am==3, 0], trans_TX_r[TZ_am==3, 1], c='r', lw=0, alpha=1, s=100, marker='<') ax.scatter(trans_TX_r[TZ_am==4, 0], trans_TX_r[TZ_am==4, 1], c='g', lw=0, alpha=1, s=100, marker='>') ax.scatter(trans_TX_r[TZ_am==5, 0], trans_TX_r[TZ_am==5, 1], c='m', lw=0, alpha=1, s=100, marker='8') ax.scatter(trans_TX_r[TZ_am==6, 0], trans_TX_r[TZ_am==6, 1], c='crimson', lw=0, alpha=1, s=100, marker='s') ax.scatter(trans_TX_r[TZ_am==7, 0], trans_TX_r[TZ_am==7, 1], c='lawngreen', lw=0, alpha=1, s=100, marker='p') ax.scatter(trans_TX_r[TZ_am==8, 0], trans_TX_r[TZ_am==8, 1], c='gold', lw=0, alpha=1, s=100, marker='*') ax.scatter(trans_TX_r[TZ_am==9, 0], trans_TX_r[TZ_am==9, 1], c='darkorange', lw=0, alpha=1, s=100, marker='h') ax.scatter(trans_TX_r[TZ_am==10, 0], trans_TX_r[TZ_am==10, 1], c='k', lw=0, alpha=1, s=100, marker='H') ax.scatter(trans_TX_r[TZ_am==11, 0], trans_TX_r[TZ_am==11, 1], c='magenta', lw=0, alpha=1, s=100, marker='d') ax.scatter(trans_TX_r[TZ_am==12, 0], trans_TX_r[TZ_am==12, 1], c='turquoise', lw=0, alpha=1, s=100, marker='D') plt.savefig(os.path.join('/nthome/maugust/thesis',args['<output>']))
def get_gt(gt, n_classes, downsize=False): if not downsize: return gt original_shape = gt.shape gt_onehot = np.reshape(gt, (-1, )) gt_onehot = np.reshape(one_hot(gt_onehot, n_classes), original_shape + (n_classes, )) gt_onehot = np.transpose(gt_onehot, (3, 0, 1, 2)) zoom_gt = np.array( [zoom(class_map, 0.5, order=1) for class_map in gt_onehot]) zoom_gt = zoom_gt.argmax(axis=0) zoom_gt = np.asarray(zoom_gt, dtype='int8') return zoom_gt
def percentile_filter(x, z): from scipy.ndimage import percentile_filter from breze.learn.data import one_hot percentile = np.random.randint(0, 10) nx = np.transpose(x, (0, 2, 1, 3, 4)) nx[0] = [percentile_filter(modality, percentile, (2, 2, 2)) for modality in nx[0]] nx = np.transpose(nx, (0, 2, 1, 3, 4)) n_classes = z.shape[-1] nz = np.reshape(z, (x.shape[3], x.shape[4], x.shape[1], n_classes)) nz = np.transpose(nz, (3, 0, 1, 2)) nz = np.array([percentile_filter(class_map, percentile, (2, 2, 2)) for class_map in nz]) nz = nz.argmax(axis=0) nz = np.reshape(nz, (-1,)) nz = np.reshape(one_hot(nz, n_classes), z.shape) nx = np.asarray(nx, dtype=x.dtype) nz = np.asarray(nz, dtype=z.dtype) return (nx, nz)
def __call__(self, x): n_classes = self.n_classes image = np.transpose(x[0], (1, 2, 3, 0)) sections = np.array([to_sections(modality) for modality in image], dtype='int16') # mod sect h w d sections = np.transpose(sections, (1, 4, 0, 2, 3)) seg_sections = [] for section in sections: depth, n_chans, height, width = section.shape model_output = self.predict(section[np.newaxis]) model_output = model_output.as_numpy_array() if isinstance(model_output, gnumpy.garray) else model_output seg = np.reshape( model_output, (height, width, depth, n_classes) ) seg = seg.argmax(axis=3) seg_sections.append(seg) final_seg = from_sections(seg_sections, original_shape=(x.shape[3], x.shape[4], x.shape[1])) seg_onehot = np.reshape(final_seg, (-1,)) seg_onehot = np.reshape(one_hot(seg_onehot, n_classes), (-1, n_classes)) return seg_onehot
def __call__(self, x): n_classes = self.n_classes image = np.transpose(x[0], (1, 2, 3, 0)) sections = np.array([to_sections(modality) for modality in image], dtype='int16') # mod sect h w d sections = np.transpose(sections, (1, 4, 0, 2, 3)) seg_sections = [] for section in sections: depth, n_chans, height, width = section.shape model_output = self.predict(section[np.newaxis]) model_output = model_output.as_numpy_array() if isinstance( model_output, gnumpy.garray) else model_output seg = np.reshape(model_output, (height, width, depth, n_classes)) seg = seg.argmax(axis=3) seg_sections.append(seg) final_seg = from_sections(seg_sections, original_shape=(x.shape[3], x.shape[4], x.shape[1])) seg_onehot = np.reshape(final_seg, (-1, )) seg_onehot = np.reshape(one_hot(seg_onehot, n_classes), (-1, n_classes)) return seg_onehot
import climin.initialize from breze.learn.mlp import Mlp from breze.learn.data import one_hot datafile = 'mnist.pkl.gz' # Load data. with gzip.open(datafile,'rb') as f: train_set, val_set, test_set = cPickle.load(f) X, Z = train_set VX, VZ = val_set TX, TZ = test_set Z = one_hot(Z, 10) VZ = one_hot(VZ, 10) TZ = one_hot(TZ, 10) image_dims = 28, 28 max_passes = 150 batch_size = 250 max_iter = max_passes * X.shape[0] / batch_size n_report = X.shape[0] / batch_size stop = climin.stops.AfterNIterations(max_iter) pause = climin.stops.ModuloNIterations(n_report) #optimizer = 'rmsprop', {'steprate': 0.0001, 'momentum': 0.95, 'decay': 0.8} optimizer = 'gd', {'steprate': 0.1}
def convolutional_nets_on_CIFAR10(): #### load data #### train_file = 'pylearn2_gcn_whitened/train.pkl' test_file = 'pylearn2_gcn_whitened/test.pkl' # Load data. f = open(train_file,'rb') train_set = cPickle.load(f) f = open(test_file) test_set = cPickle.load(f) X, Z = train_set.get_data() VX, VZ = test_set.get_data() Z = one_hot(Z, 10) VZ = one_hot(VZ, 10) X = X[:128*390]#390] Z = Z[:128*390]#390] VX = VX[:128*78]#*78] VZ = VZ[:128*78]#*78] X = np.array(X, dtype=np.float32) Z = np.array(Z, dtype=np.float32) VZ = np.array(VZ, dtype=np.float32) VX = np.array(VX, dtype=np.float32) #### initialize model #### max_passes = 500 batch_size = 128 max_iter = max_passes * X.shape[0] / batch_size n_report = X.shape[0] / (5*batch_size) stop = climin.stops.any_([ climin.stops.after_n_iterations(max_iter), ]) pause = climin.stops.modulo_n_iterations(n_report) #optimizer = 'rmsprop', {'steprate': 0.1, 'momentum': 0.8, 'decay': 0.9, 'step_adapt': 0.001} optimizer = 'gd', {'steprate': 0.01, 'momentum': 0.9} #optimizer = dropout_optimizer_conf(steprate_0=1, n_repeats=1) #m = Cnn(3072, [96, 192, 192], [500], 10, ['tanh', 'tanh', 'tanh'], ['tanh'], out_transfer='softmax', #loss='nce', image_height=32, image_width=32, n_image_channel=3, optimizer=optimizer, #batch_size=batch_size, max_iter=max_iter, pool_shapes=[[4, 4], [4, 4], [2, 2]], #filter_shapes=[[8, 8], [8, 8], [5, 5]], pool_strides=[[2, 2], [2, 2], [2, 2]], #padding=[4,3,3]) m = Cnn(3072, [32, 64, 128], [50], 10, ['rectifier', 'rectifier', 'rectifier'], ['rectifier'], out_transfer='softmax', loss='nce', image_height=32, image_width=32, n_image_channel=3, optimizer=optimizer, batch_size=batch_size, max_iter=max_iter, pool_shapes=[[3, 3], [3, 3], [3, 3]], filter_shapes=[[5, 5], [5, 5], [5, 5]], pool_strides=[[2, 2], [2, 2], [2, 2]], padding=[2,2,2], lrnorm=[True, True, False], init_weights_stdev=[0.01, 0.1, 0.1, 0.1, 0.1]) #m = Cnn(3072, [32, 32, 64], [64, 10], 10, ['rectifier', 'rectifier', 'rectifier'], ['rectifier', 'rectifier'], out_transfer='softmax', # loss='nce', image_height=32, image_width=32, n_image_channel=3, optimizer=optimizer, # batch_size=batch_size, max_iter=max_iter, pool_shapes=[[2, 2], [2, 2], [1, 1]], # filter_shapes=[[5, 5], [5, 5], [5, 5]], pool_strides=[[2, 2], [2, 2], [1, 1]]) #m.parameters.data[...] = np.random.normal(0, 0.1, m.parameters.data.shape) #inits = m.sample_conv_weights() #for name, val in inits: # m.parameters[name] = val weight_decay = 0.04*((m.parameters.in_to_hidden**2).sum()) + 0.04*((m.parameters.hidden_conv_to_hidden_conv_0**2).sum()) + 0.04*((m.parameters.hidden_conv_to_hidden_conv_1**2).sum()) + 2*(m.parameters.hidden_conv_to_hidden_full**2).sum() weight_decay /= m.exprs['inpt'].shape[0] m.exprs['true_loss'] = m.exprs['loss'] m.exprs['loss'] = m.exprs['loss'] + weight_decay n_wrong = 1 - T.eq(T.argmax(m.exprs['output'], axis=1), T.argmax(m.exprs['target'], axis=1)).mean() f_n_wrong = m.function(['inpt', 'target'], n_wrong) losses = [] v_losses = [] print 'max iter', max_iter #### train model #### start = time.time() # Set up a nice printout. keys = '#', 'val loss', 'seconds', 'train emp', 'val emp' max_len = max(len(i) for i in keys) header = '\t'.join(i for i in keys) print header print '-' * len(header) f_loss = m.function(['inpt', 'target'], ['loss']) for i, info in enumerate(m.powerfit((X, Z), (VX, VZ), stop, pause, eval_train_loss=False)): if info['n_iter'] % n_report != 0: continue passed = time.time() - start v_losses.append(info['val_loss']) #img = tile_raster_images(fe.parameters['in_to_hidden'].T, image_dims, feature_dims, (1, 1)) #save_and_display(img, 'filters-%i.png' % i f_wrong_val = m.apply_minibatches_function(f_n_wrong, VX, VZ)*VX.shape[0] f_wrong_train = m.apply_minibatches_function(f_n_wrong, X[:len(VX)], Z[:len(VZ)])*len(VX) info.update({ 'time': passed, 'val_emp': f_wrong_val, 'train_emp': f_wrong_train }) row = '%(n_iter)i\t%(val_loss)g\t%(time)g\t%(train_emp)g\t%(val_emp)g' % info print row
data = h5.File("/local-home/moritz/PycharmProjects/usaray_learning/usarray_data_scaled_train_val_bin.hdf5", "r") X = data['trainig_set/train_set'][...] Z = data['trainig_labels/bin_train_labels'][...] VX = data['validation_set/val_set'][...] VZ = data['validation_labels/bin_val_labels'][...] print len(X) print len(Z) print len(VX) print len(VZ) X = X[:333000] Z = Z[:333000] VX = VX[:142000] VZ = VZ[:142000] Z = one_hot(Z, 2) VZ = one_hot(VZ, 2) results = open('results.txt','w') results.close() archs = [[100,100],[200,200],[500,500]] funcs = [['sigmoid','sigmoid'],['tanh','tanh'],['rectifier','rectifier']] steps =[0.1,0.01,0.001,0.0001,0.00001] batches = [500,1000,5000,10000] inits = [0.1,0.01,0.001,0.0001,0.00001] wds = [0.1,0.01,0.001,0.0001,0.00001] while 1==1: arch_ind = int(np.random.random_sample() * len(archs))
#with gzip.open('../patches/patches'+p_code+'/im'+str(count)+'.pkl.gz', 'rb') as f: with gzip.open('../patches/noisy_bi_images/im' + str(count) + '.pkl.gz', 'rb') as f: patches, labels = pickle.load(f) patches, labels = shuffle_ims(patches, labels) patches, labels = z_mirror_ims(patches, labels) print labels.shape for i, lbl in enumerate(labels): mean = lbl.mean() means[count - 1, i] = mean print 'Mean value: ', mean labels = np.reshape(labels, (-1, )) labels = np.asarray(labels, dtype='int16') full_x[index, :, :, 0, :, :] = np.transpose(patches, (0, 3, 1, 2)) l = np.reshape(one_hot(labels, 2), (b_size, dimprod, -1)) full_y[index, :, :, :] = np.asarray(l, dtype='float32') #plt.imshow(full_x[index,dims[2]/2,0,:,:], cmap='Greys_r') #plt.show() gt = np.reshape(labels[:dimprod], dims) #plt.imshow(gt[:,:,dims[2]/2], cmap='Greys_r') #plt.show() index += 1 count += 1 rand_indices = np.random.permutation(30) #rand_indices = np.arange(30) train_x[:, :, :, :, :] = np.reshape( full_x[rand_indices[:train_n], :, :, :, :, :], train_x.shape) train_y[:, :, :] = np.reshape(full_y[rand_indices[:train_n], :, :, :],
def convolutional_nets_on_CIFAR10(): #### load data #### train_file = 'pylearn2_gcn_whitened/train.pkl' test_file = 'pylearn2_gcn_whitened/test.pkl' # Load data. f = open(train_file, 'rb') train_set = cPickle.load(f) f = open(test_file) test_set = cPickle.load(f) X, Z = train_set.get_data() VX, VZ = test_set.get_data() Z = one_hot(Z, 10) VZ = one_hot(VZ, 10) X = X[:128 * 390] #390] Z = Z[:128 * 390] #390] VX = VX[:128 * 78] #*78] VZ = VZ[:128 * 78] #*78] X = np.array(X, dtype=np.float32) Z = np.array(Z, dtype=np.float32) VZ = np.array(VZ, dtype=np.float32) VX = np.array(VX, dtype=np.float32) #### initialize model #### max_passes = 500 batch_size = 128 max_iter = max_passes * X.shape[0] / batch_size n_report = X.shape[0] / (5 * batch_size) stop = climin.stops.any_([ climin.stops.after_n_iterations(max_iter), ]) pause = climin.stops.modulo_n_iterations(n_report) #optimizer = 'rmsprop', {'steprate': 0.1, 'momentum': 0.8, 'decay': 0.9, 'step_adapt': 0.001} optimizer = 'gd', {'steprate': 0.01, 'momentum': 0.9} #optimizer = dropout_optimizer_conf(steprate_0=1, n_repeats=1) #m = Cnn(3072, [96, 192, 192], [500], 10, ['tanh', 'tanh', 'tanh'], ['tanh'], out_transfer='softmax', #loss='nce', image_height=32, image_width=32, n_image_channel=3, optimizer=optimizer, #batch_size=batch_size, max_iter=max_iter, pool_shapes=[[4, 4], [4, 4], [2, 2]], #filter_shapes=[[8, 8], [8, 8], [5, 5]], pool_strides=[[2, 2], [2, 2], [2, 2]], #padding=[4,3,3]) m = Cnn(3072, [32, 64, 128], [50], 10, ['rectifier', 'rectifier', 'rectifier'], ['rectifier'], out_transfer='softmax', loss='nce', image_height=32, image_width=32, n_image_channel=3, optimizer=optimizer, batch_size=batch_size, max_iter=max_iter, pool_shapes=[[3, 3], [3, 3], [3, 3]], filter_shapes=[[5, 5], [5, 5], [5, 5]], pool_strides=[[2, 2], [2, 2], [2, 2]], padding=[2, 2, 2], lrnorm=[True, True, False], init_weights_stdev=[0.01, 0.1, 0.1, 0.1, 0.1]) #m = Cnn(3072, [32, 32, 64], [64, 10], 10, ['rectifier', 'rectifier', 'rectifier'], ['rectifier', 'rectifier'], out_transfer='softmax', # loss='nce', image_height=32, image_width=32, n_image_channel=3, optimizer=optimizer, # batch_size=batch_size, max_iter=max_iter, pool_shapes=[[2, 2], [2, 2], [1, 1]], # filter_shapes=[[5, 5], [5, 5], [5, 5]], pool_strides=[[2, 2], [2, 2], [1, 1]]) #m.parameters.data[...] = np.random.normal(0, 0.1, m.parameters.data.shape) #inits = m.sample_conv_weights() #for name, val in inits: # m.parameters[name] = val weight_decay = 0.04 * ((m.parameters.in_to_hidden**2).sum()) + 0.04 * ( (m.parameters.hidden_conv_to_hidden_conv_0**2).sum()) + 0.04 * ( (m.parameters.hidden_conv_to_hidden_conv_1**2).sum()) + 2 * ( m.parameters.hidden_conv_to_hidden_full**2).sum() weight_decay /= m.exprs['inpt'].shape[0] m.exprs['true_loss'] = m.exprs['loss'] m.exprs['loss'] = m.exprs['loss'] + weight_decay n_wrong = 1 - T.eq(T.argmax(m.exprs['output'], axis=1), T.argmax(m.exprs['target'], axis=1)).mean() f_n_wrong = m.function(['inpt', 'target'], n_wrong) losses = [] v_losses = [] print 'max iter', max_iter #### train model #### start = time.time() # Set up a nice printout. keys = '#', 'val loss', 'seconds', 'train emp', 'val emp' max_len = max(len(i) for i in keys) header = '\t'.join(i for i in keys) print header print '-' * len(header) f_loss = m.function(['inpt', 'target'], ['loss']) for i, info in enumerate( m.powerfit((X, Z), (VX, VZ), stop, pause, eval_train_loss=False)): if info['n_iter'] % n_report != 0: continue passed = time.time() - start v_losses.append(info['val_loss']) #img = tile_raster_images(fe.parameters['in_to_hidden'].T, image_dims, feature_dims, (1, 1)) #save_and_display(img, 'filters-%i.png' % i f_wrong_val = m.apply_minibatches_function(f_n_wrong, VX, VZ) * VX.shape[0] f_wrong_train = m.apply_minibatches_function(f_n_wrong, X[:len(VX)], Z[:len(VZ)]) * len(VX) info.update({ 'time': passed, 'val_emp': f_wrong_val, 'train_emp': f_wrong_train }) row = '%(n_iter)i\t%(val_loss)g\t%(time)g\t%(train_emp)g\t%(val_emp)g' % info print row
def convert_gt_to_onehot(gt, n_classes): gt_onehot = np.transpose(gt, (1, 2, 0)) gt_onehot = np.reshape(gt_onehot, (-1, )) gt_onehot = np.reshape(one_hot(gt_onehot, n_classes), (-1, n_classes)) return gt_onehot
#import fasttsne import theano theano.config.compute_test_value = 'ignore' #'raise' datafile = 'mnist.pkl.gz' # Load data. with gzip.open(datafile, 'rb') as f: train_set, val_set, test_set = cPickle.load(f) X, Z = train_set VX, VZ = val_set TX, TZ = test_set Z = one_hot(Z, 10) VZ = one_hot(VZ, 10) TZ = one_hot(TZ, 10) image_dims = 28, 28 X, Z, VX, VZ, TX, TZ = [ cast_array_to_local_type(i) for i in (X, Z, VX, VZ, TX, TZ) ] batch_size = 100 #optimizer = 'rmsprop', {'step_rate': 1e-4, 'momentum': 0.95, 'decay': .95, 'offset': 1e-6} #optimizer = 'adam', {'step_rate': .5, 'momentum': 0.9, 'decay': .95, 'offset': 1e-6} optimizer = 'gd' fast_dropout = True
def evaluate_mlp(args): dir = os.path.abspath(args['<location>']) data = os.path.abspath(args['<data>']) mode = args['<mode>'] os.chdir(dir) cps = contrib.find_checkpoints('.') if cps: with gzip.open(cps[-1], 'rb') as fp: trainer = cPickle.load(fp) trainer.model.parameters.data[...] = trainer.best_pars cPickle.dump(trainer.best_pars, open('best_pars.pkl','wb')) data = h5.File(data,'r') TX = data['test_set/test_set'] TA = data['test_annotations/test_annotations'] TZ = data['test_labels/real_test_labels'] TZ = one_hot(TZ,13) n_wrong = 1 - T.eq(T.argmax(trainer.model.exprs['output'], axis=1), T.argmax(trainer.model.exprs['target'], axis=1)).mean() f_n_wrong = trainer.model.function(['inpt', 'target'], n_wrong) f_pos = T.mean(T.neq(T.argmax(trainer.model.exprs['output'], axis=1),0) * T.eq(T.argmax(trainer.model.exprs['target'], axis=1), 0)) f_f_pos = trainer.model.function(['inpt', 'target'], f_pos) f_neg = T.mean(T.eq(T.argmax(trainer.model.exprs['output'], axis=1),0) * T.neq(T.argmax(trainer.model.exprs['target'], axis=1), 0)) f_f_neg = trainer.model.function(['inpt', 'target'], f_neg) if mode == 'cnn': print 'using cnn model' emp_loss = trainer.model.apply_minibatches_function(f_n_wrong,TX,TZ) f_p = trainer.model.apply_minibatches_function(f_f_pos,TX,TZ) f_n = trainer.model.apply_minibatches_function(f_f_neg,TX,TZ) else: emp_loss = f_n_wrong(TX,TZ) f_p = f_f_pos(TX,TZ) f_n = f_f_neg(TX,TZ) P_pos = np.argmax(trainer.model.predict(TX),axis=1) Z_pos = np.argmax(TZ, axis=1) neighbour_fails = .0 relevant_fails = 0 for i in np.arange(len(P_pos)): if P_pos[i] > 0 and Z_pos[i] > 0 and P_pos[i] != Z_pos[i]: relevant_fails += 1 if is_neighbour(P_pos[i],Z_pos[i]): neighbour_fails += 1 if not relevant_fails == 0: neighbour_fails /= relevant_fails emp_loss_s = 'model achieved %f%% classification error on the test set' %emp_loss f_p_s = '\nmodel achieved %f%% false positives on the test set' %f_p f_n_s = '\nmodel achieved %f%% false negatives on the test set' %f_n neigh_s = '\nmodel achieved %f%% neighbour misspredictions on the test set' %neighbour_fails print emp_loss_s print f_p_s print f_n_s print neigh_s with open(os.path.join(dir,'eval_result.txt'),'w') as f: f.write(emp_loss_s) f.write(f_p_s) f.write(f_n_s) f.write(neigh_s) return 0 '''indices = np.random.rand(50) * 10000
print 'Reading image ', count #with gzip.open('../patches/patches'+p_code+'/im'+str(count)+'.pkl.gz', 'rb') as f: with gzip.open('../patches/noisy_bi_images/im'+str(count)+'.pkl.gz', 'rb') as f: patches, labels = pickle.load(f) patches, labels = shuffle_ims(patches, labels) patches, labels = z_mirror_ims(patches, labels) print labels.shape for i, lbl in enumerate(labels): mean = lbl.mean() means[count-1,i] = mean print 'Mean value: ', mean labels = np.reshape(labels, (-1,)) labels = np.asarray(labels, dtype='int16') full_x[index,:,:,0,:,:] = np.transpose(patches, (0, 3, 1, 2)) l = np.reshape(one_hot(labels, 2), (b_size, dimprod, -1)) full_y[index,:,:,:] = np.asarray(l, dtype='float32') #plt.imshow(full_x[index,dims[2]/2,0,:,:], cmap='Greys_r') #plt.show() gt = np.reshape(labels[:dimprod], dims) #plt.imshow(gt[:,:,dims[2]/2], cmap='Greys_r') #plt.show() index += 1 count += 1 rand_indices = np.random.permutation(30) #rand_indices = np.arange(30) train_x[:,:,:,:,:] = np.reshape(full_x[rand_indices[:train_n],:,:,:,:,:], train_x.shape) train_y[:,:,:] = np.reshape(full_y[rand_indices[:train_n],:,:,:], train_y.shape) train_mean = means[rand_indices[:train_n],:]
# Make sure directory 'datasets' exists: if not os.path.exists(DATA_HOME): os.makedirs(DATA_HOME) ds = (64, 80, 72) dp = np.prod(np.array(ds)) n_chans = 4 x_size = 2 v_size = t_size = 1 n_classes = 5 x = np.random.randn(x_size, ds[2], 4, ds[0], ds[1]) y = np.random.randint(low=0, high=n_classes, size=(x_size, dp)) y = one_hot(np.reshape(y, (-1, )), n_classes) y = np.reshape(y, (x_size, dp, n_classes)) vx = np.random.randn(v_size, ds[2], 4, ds[0], ds[1]) vy = np.random.randint(low=0, high=n_classes, size=(1, dp)) vy = one_hot(np.reshape(vy, (-1, )), n_classes) vy = np.reshape(vy, (v_size, dp, n_classes)) tx = np.random.randn(t_size, ds[2], 4, ds[0], ds[1]) ty = np.random.randint(low=0, high=n_classes, size=(1, dp)) ty = one_hot(np.reshape(ty, (-1, )), n_classes) ty = np.reshape(ty, (t_size, dp, n_classes)) f = h5py.File(os.path.join(DATA_HOME, 'dummy45.hdf5'), 'w') train_x = f.create_dataset('train_x', x.shape, dtype='float32') train_y = f.create_dataset('train_y', y.shape, dtype='float32')
# Make sure directory 'datasets' exists: if not os.path.exists(DATA_HOME): os.makedirs(DATA_HOME) ds = (64, 80, 72) dp = np.prod(np.array(ds)) n_chans = 4 x_size = 2 v_size = t_size = 1 n_classes = 5 x = np.random.randn(x_size, ds[2], 4, ds[0], ds[1]) y = np.random.randint(low=0,high=n_classes,size=(x_size, dp)) y = one_hot(np.reshape(y, (-1,)), n_classes) y = np.reshape(y, (x_size, dp, n_classes)) vx = np.random.randn(v_size, ds[2], 4, ds[0], ds[1]) vy = np.random.randint(low=0,high=n_classes,size=(1, dp)) vy = one_hot(np.reshape(vy, (-1,)), n_classes) vy = np.reshape(vy, (v_size, dp, n_classes)) tx = np.random.randn(t_size, ds[2], 4, ds[0], ds[1]) ty = np.random.randint(low=0,high=n_classes,size=(1, dp)) ty = one_hot(np.reshape(ty, (-1,)), n_classes) ty = np.reshape(ty, (t_size, dp, n_classes)) f = h5py.File(os.path.join(DATA_HOME, 'dummy45.hdf5'), 'w') train_x = f.create_dataset('train_x', x.shape, dtype='float32') train_y = f.create_dataset('train_y', y.shape, dtype='float32')
def convert_gt_to_onehot(gt, n_classes): gt_onehot = np.transpose(gt, (1, 2, 0)) gt_onehot = np.reshape(gt_onehot, (-1,)) gt_onehot = np.reshape(one_hot(gt_onehot, n_classes), (-1, n_classes)) return gt_onehot