def eval(self, X): batch_size = 100 data_iter = mx.io.NDArrayIter({'data': X}, batch_size=batch_size, shuffle=False, last_batch_handle='pad') Y = list(model.extract_feature(self.loss, self.args, self.auxs, data_iter, X.shape[0], self.xpu).values())[0] return np.mean(np.square(Y-X))/2.0
def eval(self, X): batch_size = 100 data_iter = mx.io.NDArrayIter({'data': X}, batch_size=batch_size, shuffle=False, last_batch_handle='pad') Y = model.extract_feature(self.loss, self.args, data_iter, X.shape[0], self.xpu).values()[0] return np.mean(np.square(Y-X))/2.0
def layerwise_pretrain(self, X, batch_size, n_iter, optimizer, l_rate, decay, lr_scheduler=None): def l2_norm(label, pred): return np.mean(np.square(label - pred)) / 2.0 solver = Solver('sgd', momentum=0.9, wd=decay, learning_rate=l_rate, lr_scheduler=lr_scheduler) solver.set_metric(mx.metric.CustomMetric(l2_norm)) solver.set_monitor(Monitor(1000)) data_iter = mx.io.NDArrayIter([X], batch_size=batch_size, shuffle=False, last_batch_handle='roll_over') for i in range(self.N): if i == 0: data_iter_i = data_iter else: X_i = model.extract_feature(self.internals[i - 1], self.args, ['data'], data_iter, X.shape[0], self.xpu).values()[0] data_iter_i = mx.io.NDArrayIter([X_i], batch_size=batch_size, last_batch_handle='roll_over') solver.solve(self.xpu, self.stacks[i], self.args, self.args_grad, ['data'], data_iter_i, 0, n_iter, self.args_mult)
def refresh(i): if i % update_interval == 0: z = model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values()[0] p = np.zeros((z.shape[0], self.num_centers)) self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p]) # the soft assignments qi (pred) y_pred = p.argmax(axis=1) print np.std(np.bincount(y_pred)), np.bincount(y_pred) if y is not None: # compare soft assignments with known labels (unused) print '... Updating i = %f' % i print np.std(np.bincount(y.astype(np.int))), np.bincount( y.astype(np.int)) print y_pred[0:5], y.astype(np.int)[0:5] print 'Clustering Acc = %f' % cluster_acc(y_pred, y)[0] self.acci.append(cluster_acc(y_pred, y)[0]) if (i % self.batch_size == 0 and self.ploti <= 15): # Visualize the progression of the embedded representation in a subsample of data # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It tsne = TSNE(n_components=2, perplexity=15, learning_rate=275, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) ax = fig.add_subplot(4, 4, 1 + self.ploti) plot_embedding(Z_tsne, named_y, ax, title="Epoch %d z_tsne iter (%d)" % (self.ploti, i), legend=False, plotcolor=True) self.ploti = self.ploti + 1 ## COMPUTING target distributions P ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster: weight = 1.0 / p.sum(axis=0) # p.sum provides fj weight *= self.num_centers / weight.sum() p = (p**2) * weight train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T print np.sum(y_pred != self.y_pred), 0.001 * y_pred.shape[0] # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations. # tol% = 0.001 if i == self.batch_size * 20: # performs 1epoch = 615/3 = 205*1000epochs #np.sum(y_pred != self.y_pred) < 0.001*y_pred.shape[0]: self.y_pred = y_pred return True self.y_pred = y_pred self.p = p self.z = z
def eval(self, X, V, lambda_v_rt): b_size = 100 data_iter = mx.io.NDArrayIter({'data': X, 'V': V, 'lambda_v_rt': lambda_v_rt}, b_size=b_size, shuffle=False, last_batch_handle='pad') # modified by hog Y = model.extract_feature(self.loss[1], self.args, self.auxs, data_iter, X.shape[0], self.xpu).values()[0] return np.sum(np.square(Y-X))/2.
def eval(self, X, V, lambda_v_rt): batch_size = 100 data_iter = mx.io.NDArrayIter({'data': X, 'V': V, 'lambda_v_rt': lambda_v_rt}, batch_size=batch_size, shuffle=False, last_batch_handle='pad') # modified by hog Y = model.extract_feature(self.loss[1], self.args, self.auxs, data_iter, X.shape[0], self.xpu).values()[0] return np.sum(np.square(Y-X))/2.0
def cluster(self, X, y=None, update_interval=None): N = X.shape[0] if not update_interval: update_interval = N batch_size = 256 test_iter = mx.io.NDArrayIter({'data': X}, batch_size=batch_size, shuffle=False, last_batch_handle='pad') args = {k: mx.nd.array(v.asnumpy(), ctx=self.xpu) for k, v in self.args.items()} z = list(model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values())[0] kmeans = KMeans(self.num_centers, n_init=20) kmeans.fit(z) args['dec_mu'][:] = kmeans.cluster_centers_ solver = Solver('sgd', momentum=0.9, wd=0.0, learning_rate=0.01) def ce(label, pred): return np.sum(label * np.log(label / (pred + 0.000001))) / label.shape[0] solver.set_metric(mx.metric.CustomMetric(ce)) label_buff = np.zeros((X.shape[0], self.num_centers)) train_iter = mx.io.NDArrayIter({'data': X}, {'label': label_buff}, batch_size=batch_size, shuffle=False, last_batch_handle='roll_over') self.y_pred = np.zeros((X.shape[0])) def refresh(i): if i % update_interval == 0: z = list(model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values())[0] p = np.zeros((z.shape[0], self.num_centers)) self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p]) y_pred = p.argmax(axis=1) print(np.std(np.bincount(y_pred)), np.bincount(y_pred)) print(np.std(np.bincount(y.astype(np.int))), np.bincount(y.astype(np.int))) if y is not None: print(cluster_acc(y_pred, y)[0]) weight = 1.0 / p.sum(axis=0) weight *= self.num_centers / weight.sum() p = (p ** 2) * weight train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T print(np.sum(y_pred != self.y_pred), 0.001 * y_pred.shape[0]) if np.sum(y_pred != self.y_pred) < 0.001 * y_pred.shape[0]: self.y_pred = y_pred return True self.y_pred = y_pred solver.set_iter_start_callback(refresh) solver.set_monitor(Monitor(50)) solver.solve(self.xpu, self.loss, args, self.args_grad, None, train_iter, 0, 1000000000, {}, False) self.end_args = args if y is not None: return cluster_acc(self.y_pred, y)[0] else: return -1
def cluster(self, X, y=None, update_interval=None): N = X.shape[0] if not update_interval: update_interval = N batch_size = 256 test_iter = mx.io.NDArrayIter({"data": X}, batch_size=batch_size, shuffle=False, last_batch_handle="pad") args = {k: mx.nd.array(v.asnumpy(), ctx=self.xpu) for k, v in self.args.items()} z = model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values()[0] kmeans = KMeans(self.num_centers, n_init=20) kmeans.fit(z) args["dec_mu"][:] = kmeans.cluster_centers_ solver = Solver("sgd", momentum=0.9, wd=0.0, learning_rate=0.01) def ce(label, pred): return np.sum(label * np.log(label / (pred + 0.000001))) / label.shape[0] solver.set_metric(mx.metric.CustomMetric(ce)) label_buff = np.zeros((X.shape[0], self.num_centers)) train_iter = mx.io.NDArrayIter( {"data": X}, {"label": label_buff}, batch_size=batch_size, shuffle=False, last_batch_handle="roll_over" ) self.y_pred = np.zeros((X.shape[0])) def refresh(i): if i % update_interval == 0: z = model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values()[0] p = np.zeros((z.shape[0], self.num_centers)) self.dec_op.forward([z, args["dec_mu"].asnumpy()], [p]) y_pred = p.argmax(axis=1) print np.std(np.bincount(y_pred)), np.bincount(y_pred) print np.std(np.bincount(y.astype(np.int))), np.bincount(y.astype(np.int)) if y is not None: print (cluster_acc(y_pred, y)[0]) weight = 1.0 / p.sum(axis=0) weight *= self.num_centers / weight.sum() p = (p ** 2) * weight train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T print np.sum(y_pred != self.y_pred), 0.001 * y_pred.shape[0] if np.sum(y_pred != self.y_pred) < 0.001 * y_pred.shape[0]: self.y_pred = y_pred return True self.y_pred = y_pred solver.set_iter_start_callback(refresh) solver.set_monitor(Monitor(50)) solver.solve(self.xpu, self.loss, args, self.args_grad, None, train_iter, 0, 1000000000, {}, False) self.end_args = args if y is not None: return cluster_acc(self.y_pred, y)[0] else: return -1
def refresh(i): if i % update_interval == 0: z = list(model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values())[0] p = np.zeros((z.shape[0], self.num_centers)) self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p]) y_pred = p.argmax(axis=1) print(np.std(np.bincount(y_pred)), np.bincount(y_pred)) print(np.std(np.bincount(y.astype(np.int))), np.bincount(y.astype(np.int))) if y is not None: print(cluster_acc(y_pred, y)[0]) weight = 1.0 / p.sum(axis=0) weight *= self.num_centers / weight.sum() p = (p ** 2) * weight train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T print(np.sum(y_pred != self.y_pred), 0.001 * y_pred.shape[0]) if np.sum(y_pred != self.y_pred) < 0.001 * y_pred.shape[0]: self.y_pred = y_pred return True self.y_pred = y_pred
def layerwise_pretrain(self, X, batch_size, n_iter, optimizer, l_rate, decay, lr_scheduler=None): def l2_norm(label, pred): return np.mean(np.square(label-pred))/2.0 solver = Solver('sgd', momentum=0.9, wd=decay, learning_rate=l_rate, lr_scheduler=lr_scheduler) solver.set_metric(mx.metric.CustomMetric(l2_norm)) solver.set_monitor(Monitor(1000)) data_iter = mx.io.NDArrayIter({'data': X}, batch_size=batch_size, shuffle=False, last_batch_handle='roll_over') for i in range(self.N): if i == 0: data_iter_i = data_iter else: X_i = model.extract_feature(self.internals[i-1], self.args, data_iter, X.shape[0], self.xpu).values()[0] data_iter_i = mx.io.NDArrayIter({'data': X_i}, batch_size=batch_size, last_batch_handle='roll_over') logging.info('Pre-training layer %d...'%i) solver.solve(self.xpu, self.stacks[i], self.args, self.args_grad, data_iter_i, 0, n_iter, {}, False)
def refresh(i): if i%update_interval == 0: z = model.extract_feature(self.feature, args, test_iter, N, self.xpu).values()[0] p = np.zeros((z.shape[0], self.num_centers)) self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p]) y_pred = p.argmax(axis=1) print np.std(np.bincount(y_pred)), np.bincount(y_pred) print np.std(np.bincount(y.astype(np.int))), np.bincount(y.astype(np.int)) if y is not None: print(cluster_acc(y_pred, y)[0]) weight = 1.0/p.sum(axis=0) weight *= self.num_centers/weight.sum() p = (p**2)*weight train_iter.data_list[1][:] = (p.T/p.sum(axis=1)).T print np.sum(y_pred != self.y_pred), 0.001*y_pred.shape[0] if np.sum(y_pred != self.y_pred) < 0.001*y_pred.shape[0]: self.y_pred = y_pred return True self.y_pred = y_pred
def layerwise_pretrain(self, X, batch_size, n_iter, optimizer, l_rate, decay, lr_scheduler=None, print_every=1000): def l2_norm(label, pred): return np.mean(np.square(label-pred))/2.0 solver = Solver(optimizer, momentum=0.9, wd=decay, learning_rate=l_rate, lr_scheduler=lr_scheduler) solver.set_metric(mx.metric.CustomMetric(l2_norm)) solver.set_monitor(Monitor(print_every)) data_iter = mx.io.NDArrayIter({'data': X}, batch_size=batch_size, shuffle=True, last_batch_handle='roll_over') for i in range(self.N): if i == 0: data_iter_i = data_iter else: X_i = list(model.extract_feature( self.internals[i-1], self.args, self.auxs, data_iter, X.shape[0], self.xpu).values())[0] data_iter_i = mx.io.NDArrayIter({'data': X_i}, batch_size=batch_size, last_batch_handle='roll_over') logging.info('Pre-training layer %d...', i) solver.solve(self.xpu, self.stacks[i], self.args, self.args_grad, self.auxs, data_iter_i, 0, n_iter, {}, False)
def layerwise_pretrain(self, X, b_size, total_iter, opti, l_rate, decay, scheduler_lr=None): def l2_norm(label, pred): return np.mean(np.square(label-pred))/2.0 solver = Solver(opti, momentum=0.9, wd=decay, learning_rate=l_rate, scheduler_lr=scheduler_lr) # procedding solver.set_metric solver.set_metric(mx.metric.CustomMetric(l2_norm)) # procedding solver.set_monitor solver.set_monitor(Monitor(1000)) # procedding solver. data_iter data_iter = mx.io.NDArrayIter({'data': X}, b_size=b_size, shuffle=True, last_batch_handle='roll_over') # processing all is in range for i in range(self.N): if i == 0: data_iter_i = data_iter else: X_i = model.extract_feature(self.internals[i-1], self.args, self.auxs, data_iter, X.shape[0], self.xpu).values()[0] data_iter_i = mx.io.NDArrayIter({'data': X_i}, b_size=b_size, last_batch_handle='roll_over') logging.info('Pre-training layer %d...'%i) solver.solve(self.xpu, self.stacks[i], self.args, self.args_grad, self.auxs, data_iter_i, 0, total_iter, {}, False)
# to get final cluster memberships # extact best params # extact embedding space all_iter = mx.io.NDArrayIter({'data': X}, batch_size=X.shape[0], shuffle=False, last_batch_handle='pad') ## embedded point zi aDEC = DECModel(mx.cpu(), X, num_centers, 1.0, znum, 'Z:\\Cristina\\Section3\\NME_DEC\\SAEmodels') mxdec_args = { key: mx.nd.array(v) for key, v in dec_args.items() if key != 'dec_mubestacci' } zbestacci = model.extract_feature(aDEC.feature, mxdec_args, None, all_iter, X.shape[0], aDEC.xpu).values()[0] # orig paper 256*40 (10240) point for upgrade about 1/6 (N) of data #zbestacci = dec_model['zbestacci'] pbestacci = np.zeros( (zbestacci.shape[0], dec_model['num_centers'])) aDEC.dec_op.forward( [zbestacci, dec_args['dec_mubestacci'].asnumpy()], [pbestacci]) y = np.asarray(roi_labels) # find max soft assignments W = pbestacci.argmax(axis=1) num_clusters = len(np.unique(W)) clusters = np.unique(W) MLE_kj = np.zeros((num_clusters, num_classes))
def solve(self, X, R, V, lambda_v_rt, lambda_u, lambda_v, dir_save, batch_size, xpu, sym, args, args_grad, auxs, data_iter, begin_iter, end_iter, args_lrmult={}, debug = False): # names and shapes input_desc = data_iter.provide_data + data_iter.provide_label input_names = [k for k, shape in input_desc] # plances to store them input_buffs = [mx.nd.empty(shape, ctx=xpu) for k, shape in input_desc] args = dict(args, **dict(zip(input_names, input_buffs))) # list all outputs (strings) output_names = sym.list_outputs() if debug: sym = sym.get_internals() blob_names = sym.list_outputs() sym_group = [] for i in range(len(blob_names)): if blob_names[i] not in args: x = sym[i] if blob_names[i] not in output_names: x = mx.symbol.BlockGrad(x, name=blob_names[i]) sym_group.append(x) sym = mx.symbol.Group(sym_group) # bind the network params to the network (symbol) exe = sym.bind(xpu, args=args, args_grad=args_grad, aux_states=auxs) assert len(sym.list_arguments()) == len(exe.grad_arrays) update_dict = {name: nd for name, nd in zip(sym.list_arguments(), exe.grad_arrays) if nd} batch_size = input_buffs[0].shape[0] self.optimizer.rescale_grad = 1.0/batch_size self.optimizer.set_lr_mult(args_lrmult) output_dict = {} output_buff = {} internal_dict = dict(zip(input_names, input_buffs)) # exe.outputs is a list of all output ndarrays for key, arr in zip(sym.list_outputs(), exe.outputs): if key in output_names: output_dict[key] = arr output_buff[key] = mx.nd.empty(arr.shape, ctx=mx.cpu()) else: internal_dict[key] = arr # init' U U = np.mat(np.zeros((R.shape[0],V.shape[1]))) # set lambda_v_rt to 0 in the first epoch lambda_v_rt_old = np.zeros(lambda_v_rt.shape) lambda_v_rt_old[:] = lambda_v_rt[:] lambda_v_rt[:,:] = 0 epoch = 0 # index epochs data_iter = mx.io.NDArrayIter({'data': X, 'V': V, 'lambda_v_rt': lambda_v_rt}, batch_size=batch_size, shuffle=False, last_batch_handle='pad') data_iter.reset() for i in range(begin_iter, end_iter): if self.iter_start_callback is not None: if self.iter_start_callback(i): return #if i==100: # V = np.zeros(V.shape) # data_iter = mx.io.NDArrayIter({'data': X, 'V': V, 'lambda_v_rt': # lambda_v_rt}, # batch_size=batch_size, shuffle=False, # last_batch_handle='pad') # data_iter.reset() # for j in range(10): # batch = data_iter.next() try: batch = data_iter.next() except: # means the end of an epoch epoch += 1 theta = model.extract_feature(sym[0], args, auxs, data_iter, X.shape[0], xpu).values()[0] # update U, V and get BCD loss U, V, BCD_loss = BCD_one(R, U, V, theta, lambda_u, lambda_v, dir_save, True) # get recon' loss Y = model.extract_feature(sym[1], args, auxs, data_iter, X.shape[0], xpu).values()[0] Recon_loss = lambda_v/np.square(lambda_v_rt_old[0,0])*np.sum(np.square(Y-X))/2.0 print "Epoch %d - tr_err/bcd_err/rec_err: %.1f/%.1f/%.1f" % (epoch, BCD_loss+Recon_loss, BCD_loss, Recon_loss) fp = open(dir_save+'/cdl.log','a') fp.write("Epoch %d - tr_err/bcd_err/rec_err: %.1f/%.1f/%.1f\n" % (epoch, BCD_loss+Recon_loss, BCD_loss, Recon_loss)) fp.close() lambda_v_rt[:] = lambda_v_rt_old[:] # back to normal lambda_v_rt data_iter = mx.io.NDArrayIter({'data': X, 'V': V, 'lambda_v_rt': lambda_v_rt}, batch_size=batch_size, shuffle=False, last_batch_handle='pad') data_iter.reset() batch = data_iter.next() for data, buff in zip(batch.data+batch.label, input_buffs): # copy data from batch to input_buffs # input_buffs is used during ff and bp # buffs->args->exe data.copyto(buff) exe.forward(is_train=True) if self.monitor is not None: self.monitor.forward_end(i, internal_dict) for key in output_dict: # output_buff is used for computing metrics output_dict[key].copyto(output_buff[key]) exe.backward() for key, arr in update_dict.items(): self.updater(key, arr, args[key]) if self.metric is not None: self.metric.update([input_buffs[-1]], [output_buff[output_names[0]]]) if self.monitor is not None: self.monitor.backward_end(i, args, update_dict, self.metric) if self.iter_end_callback is not None: if self.iter_end_callback(i): return exe.outputs[0].wait_to_read() #Y = model.extract_feature(sym[0], args, auxs, # data_iter, X.shape[0], xpu).values()[0] #print Y #print Y.shape theta = model.extract_feature(sym[0], args, auxs, data_iter, X.shape[0], xpu).values()[0] U, V, BCD_loss = BCD_one(R, U, V, theta, lambda_u, lambda_v, dir_save, True, 20) fp.close() return U, V, theta, BCD_loss
def refresh(i): # i=3, a full epoch occurs every i=798/48 if i%self.best_args['update_interval'] == 0: self.z = list(model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values())[0] self.p = np.zeros((self.z.shape[0], self.best_args['num_centers'])) self.dec_op.forward([self.z, args['dec_mu'].asnumpy()], [self.p]) self.best_args['dec_mu'] = args['dec_mu'] # the soft assignments qi (pred) y_pred = self.p.argmax(axis=1) print np.std(np.bincount(y_pred)), np.bincount(y_pred) ## COMPUTING target distributions P ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster: print '\n... Updating i = %f' % i weight = 1.0/self.p.sum(axis=0) # p.sum provides fj weight *= self.best_args['num_centers']/weight.sum() self.p = (self.p**2)*weight train_iter.data_list[1][:] = (self.p.T/self.p.sum(axis=1)).T #print np.sum(y_pred != self.best_args['y_pred']), 0.001*y_pred.shape[0] ##################### # prep Z-space MLP fully coneected layer for classification ##################### # compare soft assignments with known labels (only B or M) print '\n... Updating MLP fully coneected layer i = %f' % i sep = int(self.z.shape[0]*0.10) print(self.z.shape) datalabels = np.asarray(self.best_args['roi_labels']) dataZspace = np.concatenate((self.z, self.p), axis=1) #zbestacci #dec_model['zbestacci'] Z = dataZspace[datalabels!='K',:] y = datalabels[datalabels!='K'] print(Z) # Do a 5 fold cross-validation self.Z_test = Z[:sep] self.yZ_test = np.asanyarray(y[:sep]=='M').astype(int) self.Z_train = Z[sep:] self.yZ_train = np.asanyarray(y[sep:]=='M').astype(int) print(self.Z_test.shape) print(self.Z_train.shape) if(i==0): self.tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') self.Z_tsne = self.tsne.fit_transform(dataZspace) # plot initial z figinint = plt.figure() axinint = figinint.add_subplot(1,1,1) plot_embedding_unsuper_NMEdist_intenh(self.Z_tsne, named_y, axinint, title='kmeans init tsne:\n', legend=True) figinint.savefig('{}//tsne_init_z{}_mu{}_{}.pdf'.format(save_to,self.best_args['znum'],self.best_args['num_centers'],labeltype), bbox_inches='tight') plt.close() if(i>0 and i%self.best_args['plot_interval']==0 and self.ploti<=15): # Visualize the progression of the embedded representation in a subsample of data # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(dataZspace) axprogress = figprogress.add_subplot(4,4,1+self.ploti) plot_embedding_unsuper_NMEdist_intenh(Z_tsne, named_y, axprogress, title="iter %d z_tsne" % (i), legend=False) self.ploti = self.ploti+1 # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations. # tol% = 0.001 if i == self.best_args['update_interval']*120: # performs 1epoch = 615/3 = 205*1000epochs return True
def cluster(self, X_train, y_dec_train, y_train, classes, batch_size, save_to, labeltype, update_interval, logger): N = X_train.shape[0] self.best_args['update_interval'] = update_interval self.best_args['y_dec'] = y_dec_train self.best_args['roi_labels'] = y_train self.best_args['classes'] = classes self.best_args['batch_size'] = batch_size self.logger = logger # selecting batch size # [42*t for t in range(42)] will produce 16 train epochs # [0, 42, 84, 126, 168, 210, 252, 294, 336, 378, 420, 462, 504, 546, 588, 630] test_iter = mx.io.NDArrayIter({'data': X_train}, batch_size=N, shuffle=False, last_batch_handle='pad') args = {k: mx.nd.array(v.asnumpy(), ctx=self.xpu) for k, v in self.args.items()} ## embedded point zi self.z = model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values()[0] # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It self.perplexity = 5 self.learning_rate = 125 # reconstruct wordy labels list(Y)==named_y named_y = [classes[kc] for kc in y_dec_train] self.best_args['named_y'] = named_y # To initialize the cluster centers, we pass the data through # the initialized DNN to get embedded data points and then # perform standard k-means clustering in the feature space Z # to obtain k initial centroids {mu j} kmeans = KMeans(self.best_args['num_centers'], n_init=20) kmeans.fit(self.z) args['dec_mu'][:] = kmeans.cluster_centers_ figprogress = plt.figure(figsize=(20, 15)) print 'Batch_size = %f'% self.best_args['batch_size'] print 'update_interval = %f'% update_interval self.best_args['plot_interval'] = int(8*update_interval) print 'plot_interval = %f'% self.best_args['plot_interval'] self.best_args['y_pred'] = np.zeros((X_train.shape[0])) self.best_args['meanAuc_cv'] = [] self.best_args['std_auc'] = [] self.best_args['auc_val'] = [] self.best_args['overall_metric'] = [] self.ploti = 0 self.maxAUC = 10000.0 ### Define DEC training varialbes label_buff = np.zeros((X_train.shape[0], self.best_args['num_centers'])) train_iter = mx.io.NDArrayIter({'data': X_train}, {'label': label_buff}, batch_size=self.best_args['batch_size'], shuffle=True, last_batch_handle='roll_over') ### KL DIVERGENCE MINIMIZATION. eq(2) # our model is trained by matching the soft assignment to the target distribution. # To this end, we define our objective as a KL divergence loss between # the soft assignments qi (pred) and the auxiliary distribution pi (label) solver = Solver('sgd',learning_rate=0.1,lr_scheduler=mx.misc.FactorScheduler(100,0.1)) ### original: 0.01, try1: Solver('sgd', momentum=0.9, wd=0.0, learning_rate=0.000125, lr_scheduler=mx.misc.FactorScheduler(20*update_interval,0.5)) try 2: Solver('sgd', momentum=0.6, wd=0.05, learning_rate=0.00125, lr_scheduler=mx.misc.FactorScheduler(20*update_interval,0.5)) #solver = Solver('sgd', momentum=0.9, wd=0.0, learning_rate=0.01) def ce(label, pred): DECmetric = np.sum(label*np.log(label/(pred+0.000001)))/label.shape[0] print("DECmetric = {}".format(DECmetric)) ##################### # Z-space MLP fully coneected layer for classification ##################### batch_size = 50 # Run classifier with cross-validation and plot ROC curves cv = StratifiedKFold(n_splits=5, random_state=3) # Evaluate a score by cross-validation tprs = []; aucs = [] mean_fpr = np.linspace(0, 1, 100) cvi = 0 for train, test in cv.split(self.Z_train, self.yZ_train): # Multilayer Perceptron MLP_train_iter = mx.io.NDArrayIter(self.Z_train[train], self.yZ_train[train], batch_size, shuffle=True) MLP_val_iter = mx.io.NDArrayIter(self.Z_train[test], self.yZ_train[test], batch_size) # We’ll define the MLP using MXNet’s symbolic interface dataMLP = mx.sym.Variable('data') #The following code declares two fully connected layers with 128 and 64 neurons each. #Furthermore, these FC layers are sandwiched between ReLU activation layers each #one responsible for performing an element-wise ReLU transformation on the FC layer output. # The first fully-connected layer and the corresponding activation function fc1 = mx.sym.FullyConnected(data=dataMLP, num_hidden = 128) act1 = mx.sym.Activation(data=fc1, act_type="relu") fc2 = mx.sym.FullyConnected(data=act1, num_hidden = 32) act2 = mx.sym.Activation(data=fc2, act_type="relu") # data has 2 classes fc3 = mx.sym.FullyConnected(data=act2, num_hidden=2) # Softmax with cross entropy loss mlp = mx.sym.SoftmaxOutput(data=fc3, name='softmax') # create a trainable module on CPU #mon = mx.mon.Monitor(interval=100, pattern='.*', sort=True); # Defaults to mean absolute value |x|/size(x) #checkpoint = mx.callback.do_checkpoint('mlp_model_params_z{}_mu{}.arg'.format(self.best_args['znum'],self.best_args['num_centers'])) self.mlp_model = mx.mod.Module(symbol=mlp, context=mx.cpu()) self.mlp_model.fit(MLP_train_iter, # train data monitor=None, optimizer='sgd', # use SGD to train optimizer_params={'learning_rate':0.1}, # use fixed learning rate eval_metric= 'acc', #MLPacc(yZ_val, Z_val), # report accuracy during trainin num_epoch=100) #epoch_end_callbackcheckpoint) # train for at most 10 dataset passes. extras: #monitor=mon, #After the above training completes, we can evaluate the trained model by running predictions on validation data. #The following source code computes the prediction probability scores for each validation data. # prob[i][j] is the probability that the i-th validation contains the j-th output class. prob_val = self.mlp_model.predict(MLP_val_iter) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(self.yZ_train[test], prob_val.asnumpy()[:,1]) # to create an ROC with 100 pts tprs.append(interp(mean_fpr, fpr, tpr)) tprs[-1][0] = 0.0 roc_auc = auc(fpr, tpr) print roc_auc aucs.append(roc_auc) cvi += 1 # compute across all cvs mean_tpr = np.mean(tprs, axis=0) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) std_auc = np.std(aucs) print r'cv meanROC (AUC = {0:.4f} $\pm$ {0:.4f})'.format(mean_auc, std_auc) Z_test_iter = mx.io.NDArrayIter(self.Z_test, None, batch_size) prob_test = self.mlp_model.predict(Z_test_iter) # Compute ROC curve and area the curve fpr_val, tpr_val, thresholds_val = roc_curve(self.yZ_test, prob_test.asnumpy()[:,1]) self.auc_val = auc(fpr_val, tpr_val) print r'cv test (AUC = {0:.4f})'.format(self.auc_val) # compute Z-space metric overall_metric = -np.log(mean_auc) -np.log(1-DECmetric) #np.log(1-mean_auc) + np.log(DECmetric) print("overall_metric: DEC+MLP = {}".format(overall_metric)) self.best_args['overall_metric'].append(overall_metric) if(overall_metric <= self.maxAUC): print '================== Improving auc_val = {}'.format(self.auc_val) for key, v in args.items(): self.best_args[key] = args[key] self.best_args['meanAuc_cv'].append(mean_auc) self.best_args['std_auc'].append(std_auc) self.best_args['auc_val'].append(self.auc_val) self.best_args['pbestacci'] = self.p self.best_args['zbestacci'] = self.z self.best_args['dec_mu'][:] = args['dec_mu'].asnumpy() #self.best_args['mlp_model'] = self.mlp_model self.mlp_model.save_params(os.path.join(save_to,'mlp_model_params_z{}_mu{}.arg'.format(self.best_args['znum'],self.best_args['num_centers']))) self.maxAUC = overall_metric return overall_metric def refresh(i): # i=3, a full epoch occurs every i=798/48 if i%self.best_args['update_interval'] == 0: self.z = list(model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values())[0] self.p = np.zeros((self.z.shape[0], self.best_args['num_centers'])) self.dec_op.forward([self.z, args['dec_mu'].asnumpy()], [self.p]) self.best_args['dec_mu'] = args['dec_mu'] # the soft assignments qi (pred) y_pred = self.p.argmax(axis=1) print np.std(np.bincount(y_pred)), np.bincount(y_pred) ## COMPUTING target distributions P ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster: print '\n... Updating i = %f' % i weight = 1.0/self.p.sum(axis=0) # p.sum provides fj weight *= self.best_args['num_centers']/weight.sum() self.p = (self.p**2)*weight train_iter.data_list[1][:] = (self.p.T/self.p.sum(axis=1)).T #print np.sum(y_pred != self.best_args['y_pred']), 0.001*y_pred.shape[0] ##################### # prep Z-space MLP fully coneected layer for classification ##################### # compare soft assignments with known labels (only B or M) print '\n... Updating MLP fully coneected layer i = %f' % i sep = int(self.z.shape[0]*0.10) print(self.z.shape) datalabels = np.asarray(self.best_args['roi_labels']) dataZspace = np.concatenate((self.z, self.p), axis=1) #zbestacci #dec_model['zbestacci'] Z = dataZspace[datalabels!='K',:] y = datalabels[datalabels!='K'] print(Z) # Do a 5 fold cross-validation self.Z_test = Z[:sep] self.yZ_test = np.asanyarray(y[:sep]=='M').astype(int) self.Z_train = Z[sep:] self.yZ_train = np.asanyarray(y[sep:]=='M').astype(int) print(self.Z_test.shape) print(self.Z_train.shape) if(i==0): self.tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') self.Z_tsne = self.tsne.fit_transform(dataZspace) # plot initial z figinint = plt.figure() axinint = figinint.add_subplot(1,1,1) plot_embedding_unsuper_NMEdist_intenh(self.Z_tsne, named_y, axinint, title='kmeans init tsne:\n', legend=True) figinint.savefig('{}//tsne_init_z{}_mu{}_{}.pdf'.format(save_to,self.best_args['znum'],self.best_args['num_centers'],labeltype), bbox_inches='tight') plt.close() if(i>0 and i%self.best_args['plot_interval']==0 and self.ploti<=15): # Visualize the progression of the embedded representation in a subsample of data # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(dataZspace) axprogress = figprogress.add_subplot(4,4,1+self.ploti) plot_embedding_unsuper_NMEdist_intenh(Z_tsne, named_y, axprogress, title="iter %d z_tsne" % (i), legend=False) self.ploti = self.ploti+1 # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations. # tol% = 0.001 if i == self.best_args['update_interval']*120: # performs 1epoch = 615/3 = 205*1000epochs return True # Deeo learning metrics to minimize solver.set_metric(mx.metric.CustomMetric(ce)) # start solver solver.set_iter_start_callback(refresh) solver.set_monitor(Monitor(self.best_args['update_interval'])) solver.solve(self.xpu, self.loss, args, self.args_grad, None, train_iter, 0, 1000000000, {}, False) self.end_args = args self.best_args['end_args'] = args # finish figprogress = plt.gcf() figprogress.savefig('{}\\tsne_progress_z{}_mu{}_{}.pdf'.format(save_to,self.best_args['znum'],self.best_args['num_centers'],labeltype), bbox_inches='tight') plt.close() # plot final z figfinal = plt.figure() axfinal = figfinal.add_subplot(1,1,1) tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(self.z) plot_embedding_unsuper_NMEdist_intenh(Z_tsne, self.best_args['named_y'], axfinal, title='final tsne', legend=True) figfinal.savefig('{}\\tsne_final_z{}_mu{}_{}.pdf'.format(save_to,self.best_args['znum'],self.best_args['num_centers'],labeltype), bbox_inches='tight') plt.close() outdict = {'meanAuc_cv':self.best_args['meanAuc_cv'], 'std_auc':self.best_args['std_auc'], 'auc_val':self.best_args['auc_val'], 'overall_metric':self.best_args['overall_metric'], 'dec_mu':self.best_args['dec_mu'], 'y_pred': self.best_args['y_pred'], 'named_y': self.best_args['named_y'], 'classes':self.best_args['classes'], 'num_centers': self.best_args['num_centers'], 'znum':self.best_args['znum'], 'update_interval':self.best_args['update_interval'], 'batch_size':self.best_args['batch_size']} return outdict
def refresh(i): # i=3, a full epoch occurs every i=798/48 if i % self.best_args['update_interval'] == 0: z = list( model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values())[0] p = np.zeros((z.shape[0], self.best_args['num_centers'])) self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p]) # the soft assignments qi (pred) y_pred = p.argmax(axis=1) #print np.std(np.bincount(y_dec_train)), np.bincount(y_dec_train) print np.std(np.bincount(y_pred)), np.bincount(y_pred) ##################### # Z-space CV RF classfier METRICS ##################### # compare soft assignments with known labels (only B or M) print '\n... Updating i = %f' % i allL = np.asarray(y_train) dataZspace = np.concatenate( (z[allL != 'K', :], np.reshape(y_pred[allL != 'K'], (-1, 1))), axis=1) ydatalabels = np.asarray(allL[allL != 'K'] == 'M').astype( int) # malignant is positive class cv = StratifiedKFold(n_splits=5) RFmodel = RandomForestClassifier(n_jobs=2, n_estimators=500, random_state=0, verbose=0) # Evaluate a score by cross-validation tprs = [] aucs = [] mean_fpr = np.linspace(0, 1, 100) cvi = 0 for train, test in cv.split(dataZspace, ydatalabels): probas = RFmodel.fit(dataZspace[train], ydatalabels[train]).predict_proba( dataZspace[test]) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(ydatalabels[test], probas[:, 1]) # to create an ROC with 100 pts tprs.append(interp(mean_fpr, fpr, tpr)) tprs[-1][0] = 0.0 roc_auc = auc(fpr, tpr) aucs.append(roc_auc) cvi += 1 mean_tpr = np.mean(tprs, axis=0) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) # integer=5, to specify the number of folds in a (Stratified)KFold, #scores_BorM = cross_val_score(RFmodel, data, datalabels, cv=5) # compute Z-space Accuracy #Acc = scores_BorM.mean() Acc = mean_auc print "cvRF BorM mean_auc = %f " % Acc #print scores_BorM.tolist() if (i == 0): tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) self.best_args['initAcc'] = Acc # plot initial z figinint = plt.figure() axinint = figinint.add_subplot(1, 1, 1) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, named_y, axinint, title='kmeans init tsne: Acc={}'.format(Acc), legend=True) figinint.savefig('{}//tsne_init_z{}_mu{}_{}.pdf'.format( save_to, self.best_args['znum'], self.best_args['num_centers'], labeltype), bbox_inches='tight') plt.close() # save best args self.best_args['acci'].append(Acc) if (Acc >= self.maxAcc): print 'Improving mean_auc = {}'.format(Acc) for key, v in args.items(): self.best_args[key] = args[key] self.maxAcc = Acc self.best_args['pbestacci'] = p self.best_args['zbestacci'] = z self.best_args['bestacci'].append(Acc) self.best_args['dec_mu'][:] = args['dec_mu'].asnumpy() if (i > 0 and i % self.best_args['plot_interval'] == 0 and self.ploti <= 15): # Visualize the progression of the embedded representation in a subsample of data # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) axprogress = figprogress.add_subplot(4, 4, 1 + self.ploti) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, named_y, axprogress, title="Epoch %d z_tsne Acc (%f)" % (i, Acc), legend=False) self.ploti = self.ploti + 1 ## COMPUTING target distributions P ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster: weight = 1.0 / p.sum(axis=0) # p.sum provides fj weight *= self.best_args['num_centers'] / weight.sum() p = (p**2) * weight train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T print np.sum(y_pred != self.best_args['y_pred'] ), 0.001 * y_pred.shape[0] # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations. # tol% = 0.001 if i == self.best_args[ 'update_interval'] * 200: # performs 1epoch = 615/3 = 205*1000epochs self.best_args['y_pred'] = y_pred self.best_args['acci'].append(Acc) return True self.best_args['y_pred'] = y_pred
def tsne_wtest(self, X_test, y_wtest, zfinal): ## self=dec_model ## embedded point zi and X_test = combX[0, :] X_test = np.reshape(X_test, (1, X_test.shape[0])) y_test = y[0] y_wtest = list(y) y_wtest.append(y_test) y_wtest = np.asarray(y_wtest) # X_wtest = np.vstack([combX, X_test]) ######## fighome = 'results//clusterDEC_unsuperv_QuantitativEval_wPerfm' znum = 10 numc = 19 dec_model = DECModel(mx.cpu(), combX, numc, 1.0, znum, 'model/NME_' + str(znum) + 'k') with open( fighome + os.sep + 'NME_Quantitative_NMI_numc' + str(numc) + '_' + str(znum) + 'z', 'rb') as fin: savedict = pickle.load(fin) N = combX.shape[0] #X_test.transpose().shape test_iter = mx.io.NDArrayIter({'data': combX}, batch_size=1, shuffle=False, last_batch_handle='pad') args = { k: mx.nd.array(v.asnumpy(), ctx=dec_model.xpu) for k, v in savedict['end_args'].items() } # dec_for z_test = model.extract_feature(dec_model.feature, args, None, test_iter, N, dec_model.xpu).values()[0] # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It tsne = TSNE(n_components=2, perplexity=15, learning_rate=375, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z_test) # plot fig = plt.figure() ax = fig.add_subplot(1, 1, 1) y_tsne = savedict['clusterpts_labels'] plot_embedding(Z_tsne, y_tsne, ax, title="tsne with test y class(%s)" % (y_tsne[-1]), legend=True, plotcolor=True) # for cluster prediction p_test = np.zeros((z_test.shape[0], dec_model.num_centers)) dec_mu_final = args['dec_mu'].asnumpy() dec_model.dec_op.forward([z_test, dec_mu_final], [p_test]) # the soft assignments qi (pred) y_test_pred = p_test.argmax(axis=1) print y_test_pred
def cluster_unsuperv(self, X, y, y_tsne, fighome, update_interval=None): N = X.shape[0] plotting_interval = N if not update_interval: update_interval = int(self.batch_size / 5.0) # selecting batch size # [42*t for t in range(42)] will produce 16 train epochs # [0, 42, 84, 126, 168, 210, 252, 294, 336, 378, 420, 462, 504, 546, 588, 630] batch_size = self.batch_size #615/3 42 #256 test_iter = mx.io.NDArrayIter({'data': X}, batch_size=batch_size, shuffle=False, last_batch_handle='pad') args = { k: mx.nd.array(v.asnumpy(), ctx=self.xpu) for k, v in self.args.items() } ## embedded point zi z = model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values()[0] # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It pp = 15 tsne = TSNE(n_components=2, perplexity=pp, learning_rate=275, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) # plot initial z fig = plt.figure() ax = fig.add_subplot(1, 1, 1) plot_embedding(Z_tsne, y_tsne, ax, title="tsne with perplexity %d" % pp, legend=True, plotcolor=True) fig.savefig(fighome + os.sep + 'tsne_init_z' + str(self.znum) + '.pdf', bbox_inches='tight') plt.close() # To initialize the cluster centers, we pass the data through # the initialized DNN to get embedded data points and then # perform standard k-means clustering in the feature space Z # to obtain k initial centroids {mu j} kmeans = KMeans(self.num_centers, n_init=20) kmeans.fit(z) args['dec_mu'][:] = kmeans.cluster_centers_ ### KL DIVERGENCE MINIMIZATION. eq(2) # our model is trained by matching the soft assignment to the target distribution. # To this end, we define our objective as a KL divergence loss between # the soft assignments qi (pred) and the auxiliary distribution pi (label) solver = Solver('sgd', momentum=0.9, wd=0.0, learning_rate=0.01) def ce(label, pred): return np.sum(label * np.log(label / (pred + 0.000001))) / label.shape[0] solver.set_metric(mx.metric.CustomMetric(ce)) label_buff = np.zeros((X.shape[0], self.num_centers)) train_iter = mx.io.NDArrayIter({'data': X}, {'label': label_buff}, batch_size=batch_size, shuffle=False, last_batch_handle='roll_over') self.y_pred = np.zeros((X.shape[0])) self.solvermetric = [] self.ploti = 0 fig = plt.figure(figsize=(20, 15)) print 'Batch_size = %f' % self.batch_size print 'update_interval = %f' % update_interval print 'tolernace = len(ypred)/1000 = %f' % float( 0.001 * self.y_pred.shape[0]) def refresh_unsuperv(i): if i % update_interval == 0: print '... Updating i = %f' % i z = model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values()[0] p = np.zeros((z.shape[0], self.num_centers)) self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p]) # the soft assignments qi (pred) y_pred = p.argmax(axis=1) print np.std(np.bincount(y_pred)), np.bincount(y_pred) if y is not None: # compare soft assignments with known labels (unused) print np.std(np.bincount(y.astype(np.int))), np.bincount( y.astype(np.int)) print y_pred[0:5], y.astype(np.int)[0:5] print 'Clustering Acc = %f' % cluster_acc(y_pred, y)[0] self.acci.append(cluster_acc(y_pred, y)[0]) ## COMPUTING target distributions P ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster: weight = 1.0 / p.sum(axis=0) # p.sum provides fj weight *= self.num_centers / weight.sum() p = (p**2) * weight train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T print "sum(I(y'-1!=y) = %f" % np.sum(y_pred != self.y_pred) self.solvermetric.append(solver.metric.get()[1]) print "solver.metric = %f" % solver.metric.get()[1] # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations. # tol% = 0.001 # if np.sum(y_pred != self.y_pred) < 0.001*y_pred.shape[0]: # performs 1epoch = 615/3 = 205*1000epochs # # self.y_pred = y_pred # return True self.y_pred = y_pred self.p = p self.z = z # to plot if i % plotting_interval == 0: if (self.ploti <= 15): # Visualize the progression of the embedded representation in a subsample of data # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It tsne = TSNE(n_components=2, perplexity=15, learning_rate=275, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(self.z) ax = fig.add_subplot(3, 4, 1 + self.ploti) plot_embedding(Z_tsne, y_tsne, ax, title="Epoch %d z_tsne iter (%d)" % (self.ploti, i), legend=False, plotcolor=True) self.ploti = self.ploti + 1 # start solver solver.set_iter_start_callback(refresh_unsuperv) # monitor every self.batch_size solver.set_monitor(Monitor(self.batch_size)) solver.solve(self.xpu, self.loss, args, self.args_grad, None, train_iter, 0, 12 * N, {}, False) # finish fig = plt.gcf() fig.savefig(fighome + os.sep + 'tsne_progress_k' + str(self.num_centers) + '_z' + str(self.znum) + '.pdf', bbox_inches='tight') plt.close() # plot progression of clustering loss fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.plot(range(len(self.solvermetric)), self.solvermetric, '-.') ax.set_xlabel("iter") ax.set_ylabel("L loss for num_centers =" + str(self.num_centers)) fig.savefig(fighome + os.sep + 'clustering_loss_numcenters' + str(self.num_centers) + '_z' + str(self.znum) + '.pdf', bbox_inches='tight') plt.close() self.end_args = args outdict = {'p': self.p, 'z': self.z, 'y_pred': self.y_pred} return outdict
def cluster(self, X_train, y_dec_train, y_train, classes, batch_size, save_to, labeltype, update_interval=None): N = X_train.shape[0] self.best_args['update_interval'] = update_interval self.best_args['y_dec'] = y_dec_train self.best_args['roi_labels'] = y_train self.best_args['classes'] = classes self.best_args['batch_size'] = batch_size # selecting batch size # [42*t for t in range(42)] will produce 16 train epochs # [0, 42, 84, 126, 168, 210, 252, 294, 336, 378, 420, 462, 504, 546, 588, 630] test_iter = mx.io.NDArrayIter({'data': X_train}, batch_size=N, shuffle=False, last_batch_handle='pad') args = { k: mx.nd.array(v.asnumpy(), ctx=self.xpu) for k, v in self.args.items() } ## embedded point zi z = model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values()[0] # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It self.perplexity = 15 self.learning_rate = 125 # reconstruct wordy labels list(Y)==named_y named_y = [classes[kc] for kc in y_dec_train] self.best_args['named_y'] = named_y # To initialize the cluster centers, we pass the data through # the initialized DNN to get embedded data points and then # perform standard k-means clustering in the feature space Z # to obtain k initial centroids {mu j} kmeans = KMeans(self.best_args['num_centers'], n_init=20) kmeans.fit(z) args['dec_mu'][:] = kmeans.cluster_centers_ ### KL DIVERGENCE MINIMIZATION. eq(2) # our model is trained by matching the soft assignment to the target distribution. # To this end, we define our objective as a KL divergence loss between # the soft assignments qi (pred) and the auxiliary distribution pi (label) solver = Solver( 'sgd', momentum=0.9, wd=0.0, learning_rate=0.01 ) # , lr_scheduler=mx.misc.FactorScheduler(20*update_interval,0.4)) #0.01 def ce(label, pred): return np.sum(label * np.log(label / (pred + 0.000001))) / label.shape[0] solver.set_metric(mx.metric.CustomMetric(ce)) label_buff = np.zeros( (X_train.shape[0], self.best_args['num_centers'])) train_iter = mx.io.NDArrayIter({'data': X_train}, {'label': label_buff}, batch_size=self.best_args['batch_size'], shuffle=False, last_batch_handle='roll_over') self.best_args['y_pred'] = np.zeros((X_train.shape[0])) self.best_args['acci'] = [] self.best_args['bestacci'] = [] self.ploti = 0 figprogress = plt.figure(figsize=(20, 15)) print 'Batch_size = %f' % self.best_args['batch_size'] print 'update_interval = %f' % update_interval self.best_args['plot_interval'] = int(20 * update_interval) print 'plot_interval = %f' % self.best_args['plot_interval'] self.maxAcc = 0.0 def refresh(i): # i=3, a full epoch occurs every i=798/48 if i % self.best_args['update_interval'] == 0: z = list( model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values())[0] p = np.zeros((z.shape[0], self.best_args['num_centers'])) self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p]) # the soft assignments qi (pred) y_pred = p.argmax(axis=1) print np.std(np.bincount(y_dec)), np.bincount(y_dec) print np.std(np.bincount(y_pred)), np.bincount(y_pred) ##################### # Z-space CV RF classfier METRICS ##################### # compare soft assignments with known labels (only B or M) print '\n... Updating i = %f' % i datalabels = np.asarray(y_train) dataZspace = np.concatenate( (z, p), axis=1) #zbestacci #dec_model['zbestacci'] Xdata = dataZspace[datalabels != 'K', :] ydatalabels = datalabels[datalabels != 'K'] RFmodel = RandomForestClassifier(n_jobs=2, n_estimators=500, random_state=0, verbose=0) # Evaluate a score by cross-validation # integer=5, to specify the number of folds in a (Stratified)KFold, scores_BorM = cross_val_score(RFmodel, Xdata, ydatalabels, cv=5) # compute Z-space Accuracy Acc = scores_BorM.mean() print "cvRF BorM Accuracy = %f " % Acc print scores_BorM.tolist() # use only the filledbyBC examples (first 0-202 exaples) nme_dist_label = [ lab.split('_')[1] for lab in self.best_args['named_y'][0:202] ] nme_intenh_label = [ lab.split('_')[2] for lab in self.best_args['named_y'][0:202] ] # compute Z-space Accuracy scores_dist = cross_val_score(RFmodel, z[0:202], nme_dist_label, cv=5) print "cvRF nme_dist Accuracy = %f " % scores_dist.mean() scores_intenh = cross_val_score(RFmodel, z[0:202], nme_intenh_label, cv=5) print "cvRF nme_intenh Accuracy = %f " % scores_intenh.mean() ##################### # CALCULATE 5nn METRICS ##################### labels = np.asarray(self.best_args['named_y']) Z_embedding_tree = sklearn.neighbors.BallTree(z, leaf_size=4) # This finds the indices of 5 closest neighbors nme_dist = [ 'Diffuse', 'Focal', 'Linear', 'MultipleRegions', 'Regional', 'Segmental' ] nme_intenh = [ 'Clumped', 'ClusteredRing', 'Heterogeneous', 'Homogeneous', 'Stippled or punctate' ] wnme_dist = np.zeros((len(nme_dist), len(nme_dist)), dtype=np.int64) wnme_intenh = np.zeros((len(nme_intenh), len(nme_intenh)), dtype=np.int64) BorM_diag = [] TP = [] TN = [] FP = [] FN = [] missed = [] NME_descript_dist = [] NME_descript_intenh = [] # count stattistics for k in range(z.shape[0]): iclass = labels[k] dist, ind = Z_embedding_tree.query([z[k]], k=2) dist5nn, ind5nn = dist[k != ind], ind[k != ind] class5nn = labels[ind5nn] # compute DIAGNOSTIC ACC based on nme similar local neighborhood if (iclass.split('_')[0] != 'K'): predBorM = [] predBorM.append( sum([lab.split('_')[0] == 'M' for lab in class5nn])) predBorM.append( sum([lab.split('_')[0] == 'B' for lab in class5nn])) predBorM.append( sum([lab.split('_')[0] == 'K' for lab in class5nn])) pred_BorM = [ ['M', 'B', 'K'][l] for l, pc in enumerate(predBorM) if pc >= max(predBorM) and max(predBorM) > 0 ][0] # compute TP if M if (pred_BorM != 'K'): if (iclass.split('_')[0] == pred_BorM): BorM_diag.append(1) # confusino table if (iclass.split('_')[0] == 'M'): TP.append(1) if (iclass.split('_')[0] == 'B'): TN.append(1) if (iclass.split('_')[0] != pred_BorM): if (iclass.split('_')[0] == 'M'): FN.append(1) if (iclass.split('_')[0] == 'B'): FP.append(1) else: missed.append(1) if (k <= 202 and iclass.split('_')[1] != 'N/A'): # increment detections for final NME descriptor accuracy prednmed = [] prednmed.append( sum([ lab.split('_')[1] == 'Diffuse' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'Focal' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'Linear' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'MultipleRegions' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'Regional' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'Segmental' for lab in class5nn ])) # predicion based on majority voting pred_nme_dist = [ nme_dist[l] for l, pc in enumerate(prednmed) if pc >= max(prednmed) and max(prednmed) > 0 ] # compute NME ACC based on nme similar local neighborhood if (iclass.split('_')[1] in pred_nme_dist): NME_descript_dist.append(1) if (k <= 202 and iclass.split('_')[2] != 'N/A'): prednmeie = [] prednmeie.append( sum([ lab.split('_')[2] == 'Clumped' for lab in class5nn ])) prednmeie.append( sum([ lab.split('_')[2] == 'ClusteredRing' for lab in class5nn ])) prednmeie.append( sum([ lab.split('_')[2] == 'Heterogeneous' for lab in class5nn ])) prednmeie.append( sum([ lab.split('_')[2] == 'Homogeneous' for lab in class5nn ])) prednmeie.append( sum([ lab.split('_')[2] == 'Stippled or punctate' for lab in class5nn ])) # predicion based on majority voting pred_nme_intenh = [ nme_intenh[l] for l, pc in enumerate(prednmeie) if pc >= max(prednmeie) and max(prednmeie) > 0 ] # compute NME ACC based on nme similar local neighborhoo if (iclass.split('_')[2] in pred_nme_intenh): NME_descript_intenh.append(1) ##################### # collect stats ##################### BorM_diag_Acc = sum(BorM_diag) / float(len(datalabels)) #Acc = BorM_diag_Acc print "BorM_diag_Acc = %f " % BorM_diag_Acc ## good if high TPR = sum(TP) / float(sum(datalabels == 'M')) print "TPR = %f " % TPR TNR = sum(TN) / float(sum(datalabels == 'B')) print "TNR = %f " % TNR ## bad if high FPR = sum(FP) / float(sum(datalabels == 'B')) print "FPR = %f " % FPR FNR = sum(FN) / float(sum(datalabels == 'M')) print "FNR = %f " % FNR # good if reduces missedR = sum(np.asarray(missed)) / float(len(datalabels)) print "missedR = %f " % missedR Acc5nn_nme_dist = sum(NME_descript_dist) / 202.0 Acc5nn_nme_intenh = sum(NME_descript_intenh) / 202.0 print "Acc5nn_nme_dist (DIST) = %f " % Acc5nn_nme_dist print "Acc5nn_nme_intenh (INT_ENH) = %f " % Acc5nn_nme_intenh if (i == 0): tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) self.best_args['initAcc'] = Acc # plot initial z figinint = plt.figure() axinint = figinint.add_subplot(1, 1, 1) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, named_y, axinint, title= 'kmeans init tsne: Acc={}\n RF_nme_dist={}\n RF_intenh={}' .format(Acc, scores_dist.mean(), scores_intenh.mean()), legend=True) figinint.savefig('{}//tsne_init_z{}_mu{}_{}.pdf'.format( save_to, self.best_args['znum'], self.best_args['num_centers'], labeltype), bbox_inches='tight') plt.close() # save best args self.best_args['acci'].append(Acc) if (Acc >= self.maxAcc): print 'Improving maxAcc = {}'.format(Acc) for key, v in args.items(): self.best_args[key] = args[key] self.maxAcc = Acc self.best_args['pbestacci'] = p self.best_args['zbestacci'] = z self.best_args['bestacci'].append(Acc) self.best_args['cvRF_nme_dist'] = scores_dist.mean() self.best_args['cvRF_nme_intenh'] = scores_intenh.mean() self.best_args['dec_mu'][:] = args['dec_mu'].asnumpy() self.best_args['BorM_diag_Acc'] = BorM_diag_Acc self.best_args['TPR'] = TPR self.best_args['TNR'] = TNR self.best_args['FPR'] = FPR self.best_args['FNR'] = FNR self.best_args['missedR'] = missedR self.best_args['Acc5nn_nme_dist'] = Acc5nn_nme_dist self.best_args['Acc5nn_nme_intenh'] = Acc5nn_nme_intenh if (i > 0 and i % self.best_args['plot_interval'] == 0 and self.ploti <= 15): # Visualize the progression of the embedded representation in a subsample of data # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) axprogress = figprogress.add_subplot(4, 4, 1 + self.ploti) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, named_y, axprogress, title="Epoch %d z_tsne Acc (%f)" % (i, Acc), legend=False) self.ploti = self.ploti + 1 ## COMPUTING target distributions P ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster: weight = 1.0 / p.sum(axis=0) # p.sum provides fj weight *= self.best_args['num_centers'] / weight.sum() p = (p**2) * weight train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T print np.sum(y_pred != self.best_args['y_pred'] ), 0.001 * y_pred.shape[0] # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations. # tol% = 0.001 if i == self.best_args[ 'update_interval'] * 100: # performs 1epoch = 615/3 = 205*1000epochs self.best_args['y_pred'] = y_pred self.best_args['acci'].append(Acc) return True self.best_args['y_pred'] = y_pred # start solver solver.set_iter_start_callback(refresh) solver.set_monitor(Monitor(20)) solver.solve(self.xpu, self.loss, args, self.args_grad, None, train_iter, 0, 1000000000, {}, False) self.end_args = args self.best_args['end_args'] = args # finish figprogress = plt.gcf() figprogress.savefig('{}\\tsne_progress_z{}_mu{}_{}.pdf'.format( save_to, self.best_args['znum'], self.best_args['num_centers'], labeltype), bbox_inches='tight') plt.close() # plot final z figfinal = plt.figure() axfinal = figfinal.add_subplot(1, 1, 1) tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(self.best_args['zbestacci']) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, self.best_args['named_y'], axfinal, title='final tsne: Acc={}\n RF_nme_dist={}\n RF_intenh={}'.format( self.best_args['bestacci'][-1], self.best_args['cvRF_nme_dist'], self.best_args['cvRF_nme_intenh']), legend=True) figfinal.savefig('{}\\tsne_final_z{}_mu{}_{}.pdf'.format( save_to, self.best_args['znum'], self.best_args['num_centers'], labeltype), bbox_inches='tight') plt.close() outdict = { 'initAcc': self.best_args['initAcc'], 'acci': self.best_args['acci'], 'bestacci': self.best_args['bestacci'], 'pbestacci': self.best_args['pbestacci'], 'zbestacci': self.best_args['zbestacci'], 'dec_mubestacci': self.best_args['dec_mu'], 'cvRF_nme_dist': self.best_args['cvRF_nme_dist'], 'cvRF_nme_intenh': self.best_args['cvRF_nme_intenh'], 'BorM_diag_Acc': self.best_args['BorM_diag_Acc'], 'TPR': self.best_args['TPR'], 'TNR': self.best_args['TNR'], 'FPR': self.best_args['FPR'], 'FNR': self.best_args['FNR'], 'missedR': self.best_args['missedR'], 'Acc5nn_nme_dist': self.best_args['Acc5nn_nme_dist'], 'Acc5nn_nme_intenh': self.best_args['Acc5nn_nme_intenh'], 'y_pred': self.best_args['y_pred'], 'named_y': self.best_args['named_y'], 'classes': self.best_args['classes'], 'num_centers': self.best_args['num_centers'], 'znum': self.best_args['znum'], 'update_interval': self.best_args['update_interval'], 'batch_size': self.best_args['batch_size'] } return outdict
xpu = mx.cpu() ae_model = AutoEncoderModel(xpu, [X.shape[1], 500, 500, 2000, znum], pt_dropout=0.2) print('Loading autoencoder of znum = {}, post training'.format(znum)) ae_model.load( os.path.join( save_to, 'SAE_zsize{}_wimgfeatures_descStats_zeromean.arg'.format(str(znum)))) data_iter = mx.io.NDArrayIter({'data': X}, batch_size=X.shape[0], shuffle=False, last_batch_handle='pad') # extract only the encoder part of the SAE feature = ae_model.encoder zspace = model.extract_feature(feature, ae_model.args, None, data_iter, X.shape[0], xpu).values()[0] # pool Z-space variables datalabels = np.asarray(y) dataZspace = zspace ##################### # unbiased assessment: SPlit train/held-out test ##################### # to compare performance need to discard unkown labels, only use known labels (only B or M) Z = dataZspace[datalabels != 'K', :] y = datalabels[datalabels != 'K'] print '\n... MLP fully coneected layer trained on Z_train tested on Z_test' sep = int(X.shape[0] * 0.10) Z_test = Z[:sep]
print("Validation error:", ae_model.eval(val_X)) if visualize: try: from matplotlib import pyplot as plt from model import extract_feature # sample a random image #index = np.random.choice(len(X)) index = 0 original_image = X[index] #print(json.dumps(original_image)) data_iter = mx.io.NDArrayIter({'data': [original_image]}, batch_size=1, shuffle=False, last_batch_handle='pad') # reconstruct the image # X_i = list(model.extract_feature( # self.internals[i-1], self.args, self.auxs, data_iter, len(X), # self.xpu).values())[0] reconstructed_image = extract_feature(ae_model.decoder, ae_model.args, ae_model.auxs, data_iter, 1, ae_model.xpu).values()[0] print("original image") plt.imshow(original_image.reshape((WIDTH, HEIGHT))) plt.show() print("reconstructed image") plt.imshow(reconstructed_image.reshape((WIDTH, HEIGHT))) plt.show() except ImportError: logging.info("matplotlib is required for visualization")
def refresh(i): # i=3, a full epoch occurs every i=798/48 if i % self.best_args['update_interval'] == 0: z = list( model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values())[0] p = np.zeros((z.shape[0], self.best_args['num_centers'])) self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p]) # the soft assignments qi (pred) y_pred = p.argmax(axis=1) print np.std(np.bincount(y_pred)), np.bincount(y_pred) if (i == 0): self.tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') self.Z_tsne = self.tsne.fit_transform(z) ##################### # Z-space MLP fully coneected layer for classification ##################### # compare soft assignments with known labels (only B or M) print '\n... Updating i = %f' % i sep = int(z.shape[0] * 0.10) print(z.shape) datalabels = np.asarray(y_train) dataZspace = np.concatenate( (z, p), axis=1) #zbestacci #dec_model['zbestacci'] Z = dataZspace[datalabels != 'K', :] y = datalabels[datalabels != 'K'] print(Z) # Do a 5 fold cross-validation Z_test = Z[:sep] yZ_test = np.asanyarray(y[:sep] == 'M').astype(int) Z_train = Z[sep:] yZ_train = np.asanyarray(y[sep:] == 'M').astype(int) batch_size = 1 print(Z_test.shape) print(Z_train.shape) # Run classifier with cross-validation and plot ROC curves cv = StratifiedKFold(n_splits=5) # Evaluate a score by cross-validation tprs = [] aucs = [] mean_fpr = np.linspace(0, 1, 100) cvi = 0 for train, test in cv.split(Z_train, yZ_train): # Multilayer Perceptron MLP_train_iter = mx.io.NDArrayIter(Z_train[train], yZ_train[train], batch_size, shuffle=True) MLP_val_iter = mx.io.NDArrayIter( Z_train[test], yZ_train[test], batch_size) # We’ll define the MLP using MXNet’s symbolic interface dataMLP = mx.sym.Variable('data') #The following code declares two fully connected layers with 128 and 64 neurons each. #Furthermore, these FC layers are sandwiched between ReLU activation layers each #one responsible for performing an element-wise ReLU transformation on the FC layer output. # The first fully-connected layer and the corresponding activation function fc1 = mx.sym.FullyConnected(data=dataMLP, num_hidden=128) act1 = mx.sym.Activation(data=fc1, act_type="tanh") # data has 2 classes fc2 = mx.sym.FullyConnected(data=act1, num_hidden=2) # Softmax with cross entropy loss mlp = mx.sym.SoftmaxOutput(data=fc2, name='softmax') # create a trainable module on CPU mlp_model = mx.mod.Module(symbol=mlp, context=mx.cpu()) mlp_model.fit( MLP_train_iter, # train data eval_data=MLP_val_iter, # validation data optimizer='sgd', # use SGD to train optimizer_params={'learning_rate': 0.01}, # use fixed learning rate batch_end_callback=mx.callback.Speedometer( batch_size), eval_metric= 'acc', #MLPacc(yZ_val, Z_val), # report accuracy during training num_epoch=100 ) # train for at most 10 dataset passes #After the above training completes, we can evaluate the trained model by running predictions on validation data. #The following source code computes the prediction probability scores for each validation data. # prob[i][j] is the probability that the i-th validation contains the j-th output class. prob_val = mlp_model.predict(MLP_val_iter) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve( yZ_train[test], prob_val.asnumpy()[:, 1]) # to create an ROC with 100 pts tprs.append(interp(mean_fpr, fpr, tpr)) tprs[-1][0] = 0.0 roc_auc = auc(fpr, tpr) print roc_auc aucs.append(roc_auc) cvi += 1 # compute across all cvs mean_tpr = np.mean(tprs, axis=0) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) std_auc = np.std(aucs) print r'cv meanROC (AUC = {0:.4f} $\pm$ {0:.4f})'.format( mean_auc, std_auc) Z_test_iter = mx.io.NDArrayIter(Z_test, None, batch_size) prob_test = mlp_model.predict(Z_test_iter) # Compute ROC curve and area the curve fpr_val, tpr_val, thresholds_val = roc_curve( yZ_test, prob_test.asnumpy()[:, 1]) self.auc_val = auc(fpr_val, tpr_val) print r'cv test (AUC = {0:.4f})'.format(self.auc_val) # compute Z-space Accuracy self.Acc = mean_auc self.best_args['initAcc'] = self.Acc # plot initial z figinint = plt.figure() axinint = figinint.add_subplot(1, 1, 1) plot_embedding_unsuper_NMEdist_intenh( self.Z_tsne, named_y, axinint, title='kmeans init tsne: AUC={}\n'.format(self.Acc), legend=True) figinint.savefig('{}//tsne_init_z{}_mu{}_{}.pdf'.format( save_to, self.best_args['znum'], self.best_args['num_centers'], labeltype), bbox_inches='tight') plt.close() if (i > 0 and i % self.best_args['plot_interval'] == 0 and self.ploti <= 15): # Visualize the progression of the embedded representation in a subsample of data # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) axprogress = figprogress.add_subplot(4, 4, 1 + self.ploti) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, named_y, axprogress, title="Epoch %d z_tsne AUC (%f)" % (i, self.Acc), legend=False) self.ploti = self.ploti + 1 ##################### # Z-space MLP fully coneected layer for classification ##################### # compare soft assignments with known labels (only B or M) print '\n... Updating MLP fully coneected layer i = %f' % i sep = int(z.shape[0] * 0.10) print(z.shape) datalabels = np.asarray(y_train) dataZspace = np.concatenate( (z, p), axis=1) #zbestacci #dec_model['zbestacci'] Z = dataZspace[datalabels != 'K', :] y = datalabels[datalabels != 'K'] print(Z) # Do a 5 fold cross-validation Z_test = Z[:sep] yZ_test = np.asanyarray(y[:sep] == 'M').astype(int) Z_train = Z[sep:] yZ_train = np.asanyarray(y[sep:] == 'M').astype(int) batch_size = 1 print(Z_test.shape) print(Z_train.shape) # Run classifier with cross-validation and plot ROC curves cv = StratifiedKFold(n_splits=5) # Evaluate a score by cross-validation tprs = [] aucs = [] mean_fpr = np.linspace(0, 1, 100) cvi = 0 for train, test in cv.split(Z_train, yZ_train): # Multilayer Perceptron MLP_train_iter = mx.io.NDArrayIter(Z_train[train], yZ_train[train], batch_size, shuffle=True) MLP_val_iter = mx.io.NDArrayIter( Z_train[test], yZ_train[test], batch_size) # We’ll define the MLP using MXNet’s symbolic interface dataMLP = mx.sym.Variable('data') #The following code declares two fully connected layers with 128 and 64 neurons each. #Furthermore, these FC layers are sandwiched between ReLU activation layers each #one responsible for performing an element-wise ReLU transformation on the FC layer output. # The first fully-connected layer and the corresponding activation function fc1 = mx.sym.FullyConnected(data=dataMLP, num_hidden=128) act1 = mx.sym.Activation(data=fc1, act_type="tanh") # data has 2 classes fc2 = mx.sym.FullyConnected(data=act1, num_hidden=2) # Softmax with cross entropy loss mlp = mx.sym.SoftmaxOutput(data=fc2, name='softmax') # create a trainable module on CPU mlp_model = mx.mod.Module(symbol=mlp, context=mx.cpu()) mlp_model.fit( MLP_train_iter, # train data optimizer='sgd', # use SGD to train optimizer_params={'learning_rate': 0.01}, # use fixed learning rate eval_metric= 'acc', #MLPacc(yZ_val, Z_val), # report accuracy during training batch_end_callback=mx.callback.Speedometer( batch_size, 100), num_epoch=100 ) # train for at most 10 dataset passes #After the above training completes, we can evaluate the trained model by running predictions on validation data. #The following source code computes the prediction probability scores for each validation data. # prob[i][j] is the probability that the i-th validation contains the j-th output class. prob_val = mlp_model.predict(MLP_val_iter) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve( yZ_train[test], prob_val.asnumpy()[:, 1]) # to create an ROC with 100 pts tprs.append(interp(mean_fpr, fpr, tpr)) tprs[-1][0] = 0.0 roc_auc = auc(fpr, tpr) print roc_auc aucs.append(roc_auc) cvi += 1 # compute across all cvs mean_tpr = np.mean(tprs, axis=0) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) std_auc = np.std(aucs) print r'cv meanROC (AUC = {0:.4f} $\pm$ {0:.4f})'.format( mean_auc, std_auc) Z_test_iter = mx.io.NDArrayIter(Z_test, None, batch_size) prob_test = mlp_model.predict(Z_test_iter) # Compute ROC curve and area the curve fpr_val, tpr_val, thresholds_val = roc_curve( yZ_test, prob_test.asnumpy()[:, 1]) self.auc_val = auc(fpr_val, tpr_val) print r'cv test (AUC = {0:.4f})'.format(self.auc_val) # compute Z-space Accuracy self.Acc = mean_auc # save best args self.best_args['acci'].append(self.Acc) if (self.Acc >= self.maxAcc): print 'Improving maxAUC = {0:.4f}'.format(self.Acc) self.logger.info('Improving maxAUC = {0:.4f}'.format( self.Acc)) self.logger.info('in test reachedAUC = {0:.4f} \n'.format( self.auc_val)) for key, v in args.items(): self.best_args[key] = args[key] self.maxAcc = self.Acc self.best_args['pbestacci'] = p self.best_args['zbestacci'] = z self.best_args['Z_tsnebestacci'] = self.Z_tsne self.best_args['bestacci'].append(self.Acc) self.best_args['auc_val'].append(self.auc_val) self.best_args['dec_mu'][:] = args['dec_mu'].asnumpy() ## COMPUTING target distributions P ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster: print '\n... Updating i = %f' % i weight = 1.0 / p.sum(axis=0) # p.sum provides fj weight *= self.best_args['num_centers'] / weight.sum() p = (p**2) * weight train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T print np.sum(y_pred != self.best_args['y_pred'] ), 0.001 * y_pred.shape[0] # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations. # tol% = 0.001 if i == self.best_args[ 'update_interval'] * 200: # performs 1epoch = 615/3 = 205*1000epochs self.best_args['y_pred'] = y_pred self.best_args['acci'].append(self.Acc) return True self.best_args['y_pred'] = y_pred
if (key == 'dec_mu'): pass else: dec_model.args[key] = mx.nd.array(best_args[key], ctx=dec_model.xpu) # deal with centroids dec_model.args['dec_mu'] = best_args['dec_mu'] # extact embedding space all_iter = mx.io.NDArrayIter({'data': X}, batch_size=X.shape[0], shuffle=False, last_batch_handle='pad') ## embedded point zi zbestacci = model.extract_feature(dec_model.feature, dec_model.args, None, all_iter, X.shape[0], dec_model.xpu).values()[0] y = np.asarray(roi_labels) # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It tsne = TSNE(n_components=2, perplexity=15, learning_rate=200, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(zbestacci) # plot initial z fig = plt.figure()
def cluster_varying_mu(self, X, y_dec, roi_labels, classes, batch_size, save_to, labeltype, update_interval=None): # y = y_dec X, y_dec, roi_labels, classes, batch_size, save_to, labeltype, update_in N = X.shape[0] self.best_args['update_interval'] = update_interval self.best_args['y_dec'] = y_dec self.best_args['roi_labels'] = roi_labels self.best_args['classes'] = classes self.best_args['batch_size'] = batch_size # selecting batch size # [42*t for t in range(42)] will produce 16 train epochs # [0, 42, 84, 126, 168, 210, 252, 294, 336, 378, 420, 462, 504, 546, 588, 630] test_iter = mx.io.NDArrayIter({'data': X}, batch_size=N, shuffle=False, last_batch_handle='pad') args = { k: mx.nd.array(v.asnumpy(), ctx=self.xpu) for k, v in self.args.items() } ## embedded point zi z = model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values()[0] # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It self.perplexity = 25 self.learning_rate = 200 # reconstruct wordy labels list(Y)==named_y named_y = [classes[kc] for kc in y_dec] self.best_args['named_y'] = named_y # To initialize the cluster centers, we pass the data through # the initialized DNN to get embedded data points and then # perform standard k-means clustering in the feature space Z # to obtain k initial centroids {mu j} kmeans = KMeans(self.best_args['num_centers'], n_init=20) kmeans.fit(z) args['dec_mu'][:] = kmeans.cluster_centers_ ### KL DIVERGENCE MINIMIZATION. eq(2) # our model is trained by matching the soft assignment to the target distribution. # To this end, we define our objective as a KL divergence loss between # the soft assignments qi (pred) and the auxiliary distribution pi (label) solver = Solver( 'sgd', momentum=0.9, wd=0.0, learning_rate=0.01 ) # , lr_scheduler=mx.misc.FactorScheduler(20*update_interval,0.4)) #0.01 def ce(label, pred): return np.sum(label * np.log(label / (pred + 0.000001))) / label.shape[0] solver.set_metric(mx.metric.CustomMetric(ce)) label_buff = np.zeros((X.shape[0], self.best_args['num_centers'])) train_iter = mx.io.NDArrayIter({'data': X}, {'label': label_buff}, batch_size=self.best_args['batch_size'], shuffle=False, last_batch_handle='roll_over') self.best_args['y_pred'] = np.zeros((X.shape[0])) self.best_args['acci'] = [] self.best_args['bestacci'] = [] self.ploti = 0 figprogress = plt.figure(figsize=(20, 15)) print 'Batch_size = %f' % self.best_args['batch_size'] print 'update_interval = %f' % update_interval self.best_args['plot_interval'] = int(5 * update_interval) print 'plot_interval = %f' % self.best_args['plot_interval'] self.maxAcc = 0.0 def refresh(i): # i=3, a full epoch occurs every i=798/48 if i % self.best_args['update_interval'] == 0: z = list( model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values())[0] p = np.zeros((z.shape[0], self.best_args['num_centers'])) self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p]) # the soft assignments qi (pred) y_pred = p.argmax(axis=1) print np.std(np.bincount(y_pred)), np.bincount(y_pred) # use a y that only considers the filledbyBC examples # compare soft assignments with known labels print '\n... Updating i = %f' % i allL = np.asarray(roi_labels) data = z[allL != 'K', :] datalabels = allL[allL != 'K'] RFmodel = RandomForestClassifier(n_jobs=2, n_estimators=200, random_state=0, verbose=0) # Evaluate a score by cross-validation # integer=5, to specify the number of folds in a (Stratified)KFold, scores = cross_val_score(RFmodel, data, datalabels, cv=5) # do for overall class B and M labels = np.asarray(self.best_args['named_y']) Z_embedding_tree = sklearn.neighbors.BallTree(z, leaf_size=6) # This finds the indices of 5 closest neighbors nme_dist = [ 'Diffuse', 'Focal', 'Linear', 'MultipleRegions', 'Regional', 'Segmental', 'N/A' ] nme_intenh = [ 'Clumped', 'ClusteredRing', 'Heterogeneous', 'Homogeneous', 'Stippled or punctate', 'N/A' ] wnme_dist = np.zeros((len(nme_dist), len(nme_dist)), dtype=np.int64) wnme_intenh = np.zeros((len(nme_intenh), len(nme_intenh)), dtype=np.int64) NME_descriptorate = [] kznme = 0 for k in range(z.shape[0]): iclass = labels[k] dist, ind = Z_embedding_tree.query([z[k]], k=6) dist5nn, ind5nn = dist[k != ind], ind[k != ind] class5nn = labels[ind5nn] if (len(class5nn) > 0 and (iclass.split('_')[1] != 'N/A' or iclass.split('_')[1] != 'N/A')): # increment detections for final NME descriptor accuracy kznme += 1 prednmed = [] prednmed.append( sum([ lab.split('_')[1] == 'Diffuse' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'Focal' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'Linear' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'MultipleRegions' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'Regional' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'Segmental' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'N/A' for lab in class5nn ])) prednmeie = [] prednmeie.append( sum([ lab.split('_')[2] == 'Clumped' for lab in class5nn ])) prednmeie.append( sum([ lab.split('_')[2] == 'ClusteredRing' for lab in class5nn ])) prednmeie.append( sum([ lab.split('_')[2] == 'Heterogeneous' for lab in class5nn ])) prednmeie.append( sum([ lab.split('_')[2] == 'Homogeneous' for lab in class5nn ])) prednmeie.append( sum([ lab.split('_')[2] == 'Stippled or punctate' for lab in class5nn ])) prednmeie.append( sum([ lab.split('_')[2] == 'N/A' for lab in class5nn ])) # predicion based on majority pred_nme_dist = [ nme_dist[l] for l, pc in enumerate(prednmed) if pc >= max(prednmed) and max(prednmed) > 0 ] pred_nme_intenh = [ nme_intenh[l] for l, pc in enumerate(prednmeie) if pc >= max(prednmeie) and max(prednmeie) > 0 ] # allow a second kind of detection rate - a nme similar local neighborhood if (iclass.split('_')[1] in pred_nme_dist or iclass.split('_')[2] in pred_nme_intenh): NME_descriptorate.append(1) # for nme_dist label_nme_dist = [ l for l, pc in enumerate(nme_dist) if iclass.split('_')[1] == pc ] labelpred_nme_dist = [ l for l, pc in enumerate(prednmed) if pc >= max(prednmed) and max(prednmed) > 0 ] for u in label_nme_dist: for v in labelpred_nme_dist: wnme_dist[v, u] += 1 # for nme_intenh label_nme_intenh = [ l for l, pc in enumerate(nme_intenh) if iclass.split('_')[2] == pc ] labelpred_intenh = [ l for l, pc in enumerate(prednmeie) if pc >= max(prednmeie) and max(prednmeie) > 0 ] for u in label_nme_intenh: for v in labelpred_intenh: wnme_intenh[v, u] += 1 # compute Z-space Accuracy Acc = scores.mean() print "cvRF Z-space Accuracy = %f " % Acc print scores.tolist() NME_Acc = sum(np.asarray(NME_descriptorate)) / float(kznme) print "NME decriptor agreenment (NMErate) = %f " % NME_Acc indwnme_dist = linear_assignment(wnme_dist.max() - wnme_dist) indwnme_intenh = linear_assignment(wnme_intenh.max() - wnme_intenh) Acc5nn_nme_dist = sum( [wnme_dist[v, u] for v, u in indwnme_dist]) / float(kznme) Acc5nn_nme_intenh = sum( [wnme_intenh[v, u] for v, u in indwnme_intenh]) / float(kznme) print "Acc5nn_nme_dist (DIST) = %f " % Acc5nn_nme_dist print "Acc5nn_nme_intenh (INT_ENH) = %f " % Acc5nn_nme_intenh if (i == 0): tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) self.best_args['initAcc'] = Acc # plot initial z figinint = plt.figure() axinint = figinint.add_subplot(1, 1, 1) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, named_y, axinint, title= 'kmeans init tsne: Acc={}\n NME_Acc={}\n Acc5nn_nme_dist={}\n Acc5nn_nme_intenh={}' .format(Acc, NME_Acc, Acc5nn_nme_dist, Acc5nn_nme_intenh), legend=True) figinint.savefig('{}//tsne_init_z{}_mu{}_{}.pdf'.format( save_to, self.best_args['znum'], self.best_args['num_centers'], labeltype), bbox_inches='tight') plt.close() # save best args self.best_args['acci'].append(Acc) if (Acc >= self.maxAcc): print 'Improving maxAcc = {}'.format(Acc) for key, v in args.items(): self.best_args[key] = args[key] self.maxAcc = Acc self.best_args['zbestacci'] = z self.best_args['bestacci'].append(Acc) self.best_args['dec_mu'][:] = args['dec_mu'].asnumpy() self.best_args['NME_Acc'] = NME_Acc self.best_args['Acc5nn_nme_dist'] = Acc5nn_nme_dist self.best_args['Acc5nn_nme_intenh'] = Acc5nn_nme_intenh if (i > 0 and i % self.best_args['plot_interval'] == 0 and self.ploti <= 15): # Visualize the progression of the embedded representation in a subsample of data # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) axprogress = figprogress.add_subplot(4, 4, 1 + self.ploti) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, named_y, axprogress, title="Epoch %d z_tsne iter (%d)" % (self.ploti, i), legend=False) self.ploti = self.ploti + 1 ## COMPUTING target distributions P ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster: weight = 1.0 / p.sum(axis=0) # p.sum provides fj weight *= self.best_args['num_centers'] / weight.sum() p = (p**2) * weight train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T print np.sum(y_pred != self.best_args['y_pred'] ), 0.001 * y_pred.shape[0] # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations. # tol% = 0.001 if i == self.best_args[ 'update_interval'] * 100: # performs 1epoch = 615/3 = 205*1000epochs self.best_args['y_pred'] = y_pred self.best_args['p'] = p self.best_args['z'] = z self.best_args['acci'].append(Acc) return True self.best_args['y_pred'] = y_pred self.best_args['p'] = p self.best_args['z'] = z # start solver solver.set_iter_start_callback(refresh) solver.set_monitor(Monitor(50)) solver.solve(self.xpu, self.loss, args, self.args_grad, None, train_iter, 0, 1000000000, {}, False) self.end_args = args self.best_args['end_args'] = args # finish figprogress = plt.gcf() figprogress.savefig('{}\\tsne_progress_z{}_mu{}_{}.pdf'.format( save_to, self.best_args['znum'], self.best_args['num_centers'], labeltype), bbox_inches='tight') plt.close() # plot final z figfinal = plt.figure() axfinal = figfinal.add_subplot(1, 1, 1) tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(self.best_args['zbestacci']) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, self.best_args['named_y'], axfinal, title= 'final tsne: Acc={}\n NME_Acc={} \n Acc5nn_nme_dist={}\n Acc5nn_nme_intenh={}' .format(self.best_args['bestacci'][-1], self.best_args['NME_Acc'], self.best_args['Acc5nn_nme_dist'], self.best_args['Acc5nn_nme_intenh']), legend=True) figfinal.savefig('{}\\tsne_final_z{}_mu{}_{}.pdf'.format( save_to, self.best_args['znum'], self.best_args['num_centers'], labeltype), bbox_inches='tight') plt.close() outdict = { 'acc': self.best_args['acci'], 'bestacc': self.best_args['bestacci'], 'NME_Acc': self.best_args['NME_Acc'], 'Acc5nn_nme_dist': self.best_args['Acc5nn_nme_dist'], 'Acc5nn_nme_intenh': self.best_args['Acc5nn_nme_intenh'], 'p': self.best_args['p'], 'z': self.best_args['z'], 'y_pred': self.best_args['y_pred'], 'named_y': self.best_args['named_y'], 'num_centers': self.best_args['num_centers'] } return outdict
def refresh(i): # i=3, a full epoch occurs every i=798/48 if i % self.best_args['update_interval'] == 0: z = model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values()[0] p = np.zeros((z.shape[0], self.best_args['num_centers'])) self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p]) # the soft assignments qi (pred) y_pred = p.argmax(axis=1) print np.std(np.bincount(y_pred)), np.bincount(y_pred) # use a y that only considers the filledbyBC examples # compare soft assignments with known labels print '\n... Updating i = %f' % i # do for overall class B and M Z_embedding_tree = sklearn.neighbors.BallTree(z, leaf_size=5) # This finds the indices of 5 closest neighbors labels = np.asarray(self.best_args['roi_labels']) Neg = sum(labels == np.unique(labels)[0]) #for B Pos = sum(labels == np.unique(labels)[2]) #for M TP = [] TN = [] for k in range(z.shape[0]): iclass = labels[k] dist, ind = Z_embedding_tree.query([z[k]], k=6) dist5nn, ind5nn = dist[k != ind], ind[k != ind] class5nn = labels[ind5nn] # exlcude U class class5nn = class5nn[class5nn != 'K'] if (len(class5nn) > 0): predc = [] for c in np.unique(class5nn): predc.append(sum(class5nn == c)) # predicion based on majority predclass = np.unique(class5nn)[predc == max(predc)] if (len(predclass) == 1): # compute TP if M if (iclass == 'M'): TP.append(predclass[0] == iclass) # compute TN if B if (iclass == 'B'): TN.append(predclass[0] == iclass) if (len(predclass) == 2): # compute TP if M if (iclass == 'M'): TP.append(predclass[1] == iclass) # compute TN if B if (iclass == 'B'): TN.append(predclass[0] == iclass) # compute TPR and TNR TPR = sum(TP) / float(Pos) TNR = sum(TN) / float(Neg) Acc = sum(TP + TN) / float(Pos + Neg) print "True Posite Rate (TPR) = %f " % TPR print "True Negative Rate (TNR) = %f " % TNR print "Accuracy (Acc) = %f " % Acc # save best args self.best_args['acci'].append(Acc) if (Acc >= self.maxAcc): print 'Improving maxAcc = {}'.format(Acc) for key, v in args.items(): self.best_args[key] = args[key] self.maxAcc = Acc self.best_args['bestacci'].append(Acc) self.best_args['dec_mu'][:] = args['dec_mu'].asnumpy() if (i % self.best_args['plot_interval'] == 0 and self.ploti <= 15): # Visualize the progression of the embedded representation in a subsample of data # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) ax = fig.add_subplot(4, 4, 1 + self.ploti) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, named_y, ax, title="Epoch %d z_tsne iter (%d)" % (self.ploti, i), legend=False) self.ploti = self.ploti + 1 ## COMPUTING target distributions P ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster: weight = 1.0 / p.sum(axis=0) # p.sum provides fj weight *= self.best_args['num_centers'] / weight.sum() p = (p**2) * weight train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T print np.sum(y_pred != self.best_args['y_pred'] ), 0.001 * y_pred.shape[0] # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations. # tol% = 0.001 if i == self.best_args[ 'update_interval'] * 600: # performs 1epoch = 615/3 = 205*1000epochs self.best_args['y_pred'] = y_pred self.best_args['p'] = p self.best_args['z'] = z self.best_args['acci'].append(Acc) return True self.best_args['y_pred'] = y_pred self.best_args['p'] = p self.best_args['z'] = z
def refresh(i): # i=3, a full epoch occurs every i=798/48 if i % self.best_args['update_interval'] == 0: z = list( model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values())[0] p = np.zeros((z.shape[0], self.best_args['num_centers'])) self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p]) # the soft assignments qi (pred) y_pred = p.argmax(axis=1) print np.std(np.bincount(y_pred)), np.bincount(y_pred) # use a y that only considers the filledbyBC examples # compare soft assignments with known labels print '\n... Updating i = %f' % i allL = np.asarray(roi_labels) data = z[allL != 'K', :] datalabels = allL[allL != 'K'] RFmodel = RandomForestClassifier(n_jobs=2, n_estimators=200, random_state=0, verbose=0) # Evaluate a score by cross-validation # integer=5, to specify the number of folds in a (Stratified)KFold, scores = cross_val_score(RFmodel, data, datalabels, cv=5) # do for overall class B and M labels = np.asarray(self.best_args['named_y']) Z_embedding_tree = sklearn.neighbors.BallTree(z, leaf_size=6) # This finds the indices of 5 closest neighbors nme_dist = [ 'Diffuse', 'Focal', 'Linear', 'MultipleRegions', 'Regional', 'Segmental', 'N/A' ] nme_intenh = [ 'Clumped', 'ClusteredRing', 'Heterogeneous', 'Homogeneous', 'Stippled or punctate', 'N/A' ] wnme_dist = np.zeros((len(nme_dist), len(nme_dist)), dtype=np.int64) wnme_intenh = np.zeros((len(nme_intenh), len(nme_intenh)), dtype=np.int64) NME_descriptorate = [] kznme = 0 for k in range(z.shape[0]): iclass = labels[k] dist, ind = Z_embedding_tree.query([z[k]], k=6) dist5nn, ind5nn = dist[k != ind], ind[k != ind] class5nn = labels[ind5nn] if (len(class5nn) > 0 and (iclass.split('_')[1] != 'N/A' or iclass.split('_')[1] != 'N/A')): # increment detections for final NME descriptor accuracy kznme += 1 prednmed = [] prednmed.append( sum([ lab.split('_')[1] == 'Diffuse' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'Focal' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'Linear' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'MultipleRegions' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'Regional' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'Segmental' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'N/A' for lab in class5nn ])) prednmeie = [] prednmeie.append( sum([ lab.split('_')[2] == 'Clumped' for lab in class5nn ])) prednmeie.append( sum([ lab.split('_')[2] == 'ClusteredRing' for lab in class5nn ])) prednmeie.append( sum([ lab.split('_')[2] == 'Heterogeneous' for lab in class5nn ])) prednmeie.append( sum([ lab.split('_')[2] == 'Homogeneous' for lab in class5nn ])) prednmeie.append( sum([ lab.split('_')[2] == 'Stippled or punctate' for lab in class5nn ])) prednmeie.append( sum([ lab.split('_')[2] == 'N/A' for lab in class5nn ])) # predicion based on majority pred_nme_dist = [ nme_dist[l] for l, pc in enumerate(prednmed) if pc >= max(prednmed) and max(prednmed) > 0 ] pred_nme_intenh = [ nme_intenh[l] for l, pc in enumerate(prednmeie) if pc >= max(prednmeie) and max(prednmeie) > 0 ] # allow a second kind of detection rate - a nme similar local neighborhood if (iclass.split('_')[1] in pred_nme_dist or iclass.split('_')[2] in pred_nme_intenh): NME_descriptorate.append(1) # for nme_dist label_nme_dist = [ l for l, pc in enumerate(nme_dist) if iclass.split('_')[1] == pc ] labelpred_nme_dist = [ l for l, pc in enumerate(prednmed) if pc >= max(prednmed) and max(prednmed) > 0 ] for u in label_nme_dist: for v in labelpred_nme_dist: wnme_dist[v, u] += 1 # for nme_intenh label_nme_intenh = [ l for l, pc in enumerate(nme_intenh) if iclass.split('_')[2] == pc ] labelpred_intenh = [ l for l, pc in enumerate(prednmeie) if pc >= max(prednmeie) and max(prednmeie) > 0 ] for u in label_nme_intenh: for v in labelpred_intenh: wnme_intenh[v, u] += 1 # compute Z-space Accuracy Acc = scores.mean() print "cvRF Z-space Accuracy = %f " % Acc print scores.tolist() NME_Acc = sum(np.asarray(NME_descriptorate)) / float(kznme) print "NME decriptor agreenment (NMErate) = %f " % NME_Acc indwnme_dist = linear_assignment(wnme_dist.max() - wnme_dist) indwnme_intenh = linear_assignment(wnme_intenh.max() - wnme_intenh) Acc5nn_nme_dist = sum( [wnme_dist[v, u] for v, u in indwnme_dist]) / float(kznme) Acc5nn_nme_intenh = sum( [wnme_intenh[v, u] for v, u in indwnme_intenh]) / float(kznme) print "Acc5nn_nme_dist (DIST) = %f " % Acc5nn_nme_dist print "Acc5nn_nme_intenh (INT_ENH) = %f " % Acc5nn_nme_intenh if (i == 0): tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) self.best_args['initAcc'] = Acc # plot initial z figinint = plt.figure() axinint = figinint.add_subplot(1, 1, 1) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, named_y, axinint, title= 'kmeans init tsne: Acc={}\n NME_Acc={}\n Acc5nn_nme_dist={}\n Acc5nn_nme_intenh={}' .format(Acc, NME_Acc, Acc5nn_nme_dist, Acc5nn_nme_intenh), legend=True) figinint.savefig('{}//tsne_init_z{}_mu{}_{}.pdf'.format( save_to, self.best_args['znum'], self.best_args['num_centers'], labeltype), bbox_inches='tight') plt.close() # save best args self.best_args['acci'].append(Acc) if (Acc >= self.maxAcc): print 'Improving maxAcc = {}'.format(Acc) for key, v in args.items(): self.best_args[key] = args[key] self.maxAcc = Acc self.best_args['zbestacci'] = z self.best_args['bestacci'].append(Acc) self.best_args['dec_mu'][:] = args['dec_mu'].asnumpy() self.best_args['NME_Acc'] = NME_Acc self.best_args['Acc5nn_nme_dist'] = Acc5nn_nme_dist self.best_args['Acc5nn_nme_intenh'] = Acc5nn_nme_intenh if (i > 0 and i % self.best_args['plot_interval'] == 0 and self.ploti <= 15): # Visualize the progression of the embedded representation in a subsample of data # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) axprogress = figprogress.add_subplot(4, 4, 1 + self.ploti) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, named_y, axprogress, title="Epoch %d z_tsne iter (%d)" % (self.ploti, i), legend=False) self.ploti = self.ploti + 1 ## COMPUTING target distributions P ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster: weight = 1.0 / p.sum(axis=0) # p.sum provides fj weight *= self.best_args['num_centers'] / weight.sum() p = (p**2) * weight train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T print np.sum(y_pred != self.best_args['y_pred'] ), 0.001 * y_pred.shape[0] # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations. # tol% = 0.001 if i == self.best_args[ 'update_interval'] * 100: # performs 1epoch = 615/3 = 205*1000epochs self.best_args['y_pred'] = y_pred self.best_args['p'] = p self.best_args['z'] = z self.best_args['acci'].append(Acc) return True self.best_args['y_pred'] = y_pred self.best_args['p'] = p self.best_args['z'] = z
def cluster(self, X_train, y_dec_train, y_train, classes, batch_size, save_to, labeltype, update_interval=None): N = X_train.shape[0] self.best_args['update_interval'] = update_interval self.best_args['y_dec'] = y_dec_train self.best_args['roi_labels'] = y_train self.best_args['classes'] = classes self.best_args['batch_size'] = batch_size # selecting batch size # [42*t for t in range(42)] will produce 16 train epochs # [0, 42, 84, 126, 168, 210, 252, 294, 336, 378, 420, 462, 504, 546, 588, 630] test_iter = mx.io.NDArrayIter({'data': X_train}, batch_size=N, shuffle=False, last_batch_handle='pad') args = { k: mx.nd.array(v.asnumpy(), ctx=self.xpu) for k, v in self.args.items() } ## embedded point zi z = model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values()[0] # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It self.perplexity = 15 self.learning_rate = 125 # reconstruct wordy labels list(Y)==named_y named_y = [classes[kc] for kc in y_dec_train] self.best_args['named_y'] = named_y # To initialize the cluster centers, we pass the data through # the initialized DNN to get embedded data points and then # perform standard k-means clustering in the feature space Z # to obtain k initial centroids {mu j} kmeans = KMeans(self.best_args['num_centers'], n_init=20) kmeans.fit(z) args['dec_mu'][:] = kmeans.cluster_centers_ ### KL DIVERGENCE MINIMIZATION. eq(2) # our model is trained by matching the soft assignment to the target distribution. # To this end, we define our objective as a KL divergence loss between # the soft assignments qi (pred) and the auxiliary distribution pi (label) solver = Solver( 'sgd', momentum=0.9, wd=0.0, learning_rate=0.1, lr_scheduler=mx.misc.FactorScheduler(20 * update_interval, 0.5) ) # , lr_scheduler=mx.misc.FactorScheduler(20*update_interval,0.4)) #0.01 def ce(label, pred): return np.sum(label * np.log(label / (pred + 0.000001))) / label.shape[0] solver.set_metric(mx.metric.CustomMetric(ce)) label_buff = np.zeros( (X_train.shape[0], self.best_args['num_centers'])) train_iter = mx.io.NDArrayIter({'data': X_train}, {'label': label_buff}, batch_size=self.best_args['batch_size'], shuffle=False, last_batch_handle='roll_over') self.best_args['y_pred'] = np.zeros((X_train.shape[0])) self.best_args['acci'] = [] self.best_args['bestacci'] = [] self.ploti = 0 figprogress = plt.figure(figsize=(20, 15)) print 'Batch_size = %f' % self.best_args['batch_size'] print 'update_interval = %f' % update_interval self.best_args['plot_interval'] = int(20 * update_interval) print 'plot_interval = %f' % self.best_args['plot_interval'] self.maxAcc = 0.0 def refresh(i): # i=3, a full epoch occurs every i=798/48 if i % self.best_args['update_interval'] == 0: z = list( model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values())[0] p = np.zeros((z.shape[0], self.best_args['num_centers'])) self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p]) # the soft assignments qi (pred) y_pred = p.argmax(axis=1) #print np.std(np.bincount(y_dec_train)), np.bincount(y_dec_train) print np.std(np.bincount(y_pred)), np.bincount(y_pred) ##################### # Z-space CV RF classfier METRICS ##################### # compare soft assignments with known labels (only B or M) print '\n... Updating i = %f' % i allL = np.asarray(y_train) dataZspace = np.concatenate( (z[allL != 'K', :], np.reshape(y_pred[allL != 'K'], (-1, 1))), axis=1) ydatalabels = np.asarray(allL[allL != 'K'] == 'M').astype( int) # malignant is positive class cv = StratifiedKFold(n_splits=5) RFmodel = RandomForestClassifier(n_jobs=2, n_estimators=500, random_state=0, verbose=0) # Evaluate a score by cross-validation tprs = [] aucs = [] mean_fpr = np.linspace(0, 1, 100) cvi = 0 for train, test in cv.split(dataZspace, ydatalabels): probas = RFmodel.fit(dataZspace[train], ydatalabels[train]).predict_proba( dataZspace[test]) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(ydatalabels[test], probas[:, 1]) # to create an ROC with 100 pts tprs.append(interp(mean_fpr, fpr, tpr)) tprs[-1][0] = 0.0 roc_auc = auc(fpr, tpr) aucs.append(roc_auc) cvi += 1 mean_tpr = np.mean(tprs, axis=0) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) # integer=5, to specify the number of folds in a (Stratified)KFold, #scores_BorM = cross_val_score(RFmodel, data, datalabels, cv=5) # compute Z-space Accuracy #Acc = scores_BorM.mean() Acc = mean_auc print "cvRF BorM mean_auc = %f " % Acc #print scores_BorM.tolist() if (i == 0): tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) self.best_args['initAcc'] = Acc # plot initial z figinint = plt.figure() axinint = figinint.add_subplot(1, 1, 1) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, named_y, axinint, title='kmeans init tsne: Acc={}'.format(Acc), legend=True) figinint.savefig('{}//tsne_init_z{}_mu{}_{}.pdf'.format( save_to, self.best_args['znum'], self.best_args['num_centers'], labeltype), bbox_inches='tight') plt.close() # save best args self.best_args['acci'].append(Acc) if (Acc >= self.maxAcc): print 'Improving mean_auc = {}'.format(Acc) for key, v in args.items(): self.best_args[key] = args[key] self.maxAcc = Acc self.best_args['pbestacci'] = p self.best_args['zbestacci'] = z self.best_args['bestacci'].append(Acc) self.best_args['dec_mu'][:] = args['dec_mu'].asnumpy() if (i > 0 and i % self.best_args['plot_interval'] == 0 and self.ploti <= 15): # Visualize the progression of the embedded representation in a subsample of data # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) axprogress = figprogress.add_subplot(4, 4, 1 + self.ploti) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, named_y, axprogress, title="Epoch %d z_tsne Acc (%f)" % (i, Acc), legend=False) self.ploti = self.ploti + 1 ## COMPUTING target distributions P ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster: weight = 1.0 / p.sum(axis=0) # p.sum provides fj weight *= self.best_args['num_centers'] / weight.sum() p = (p**2) * weight train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T print np.sum(y_pred != self.best_args['y_pred'] ), 0.001 * y_pred.shape[0] # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations. # tol% = 0.001 if i == self.best_args[ 'update_interval'] * 200: # performs 1epoch = 615/3 = 205*1000epochs self.best_args['y_pred'] = y_pred self.best_args['acci'].append(Acc) return True self.best_args['y_pred'] = y_pred # start solver solver.set_iter_start_callback(refresh) solver.set_monitor(Monitor(20)) solver.solve(self.xpu, self.loss, args, self.args_grad, None, train_iter, 0, 1000000000, {}, False) self.end_args = args self.best_args['end_args'] = args # finish figprogress = plt.gcf() figprogress.savefig('{}\\tsne_progress_z{}_mu{}_{}.pdf'.format( save_to, self.best_args['znum'], self.best_args['num_centers'], labeltype), bbox_inches='tight') plt.close() # plot final z figfinal = plt.figure() axfinal = figfinal.add_subplot(1, 1, 1) tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(self.best_args['zbestacci']) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, self.best_args['named_y'], axfinal, title='final tsne: Acc={}'.format(self.best_args['bestacci'][-1]), legend=True) figfinal.savefig('{}\\tsne_final_z{}_mu{}_{}.pdf'.format( save_to, self.best_args['znum'], self.best_args['num_centers'], labeltype), bbox_inches='tight') plt.close() outdict = { 'initAcc': self.best_args['initAcc'], 'acci': self.best_args['acci'], 'bestacci': self.best_args['bestacci'], 'pbestacci': self.best_args['pbestacci'], 'zbestacci': self.best_args['zbestacci'], 'dec_mubestacci': self.best_args['dec_mu'], 'y_pred': self.best_args['y_pred'], 'named_y': self.best_args['named_y'], 'classes': self.best_args['classes'], 'num_centers': self.best_args['num_centers'], 'znum': self.best_args['znum'], 'update_interval': self.best_args['update_interval'], 'batch_size': self.best_args['batch_size'] } return outdict
def solve(self, X, R, V, lambda_v_rt, lambda_u, lambda_v, dir_save, batch_size, xpu, sym, args, args_grad, auxs, data_iter, begin_iter, end_iter, args_lrmult={}, debug=False): # names and shapes input_desc = data_iter.provide_data + data_iter.provide_label input_names = [k for k, shape in input_desc] # plances to store them input_buffs = [mx.nd.empty(shape, ctx=xpu) for k, shape in input_desc] args = dict(args, **dict(zip(input_names, input_buffs))) # list all outputs (strings) output_names = sym.list_outputs() if debug: sym = sym.get_internals() blob_names = sym.list_outputs() sym_group = [] for i in range(len(blob_names)): if blob_names[i] not in args: x = sym[i] if blob_names[i] not in output_names: x = mx.symbol.BlockGrad(x, name=blob_names[i]) sym_group.append(x) sym = mx.symbol.Group(sym_group) # bind the network params to the network (symbol) exe = sym.bind(xpu, args=args, args_grad=args_grad, aux_states=auxs) assert len(sym.list_arguments()) == len(exe.grad_arrays) #print(exe.grad_arrays) update_dict = { name: nd for name, nd in zip(sym.list_arguments(), exe.grad_arrays) if nd is not None } batch_size = input_buffs[0].shape[0] self.optimizer.rescale_grad = 1.0 / batch_size self.optimizer.set_lr_mult(args_lrmult) output_dict = {} output_buff = {} internal_dict = dict(zip(input_names, input_buffs)) # exe.outputs is a list of all output ndarrays for key, arr in zip(sym.list_outputs(), exe.outputs): if key in output_names: output_dict[key] = arr output_buff[key] = mx.nd.empty(arr.shape, ctx=mx.cpu()) else: internal_dict[key] = arr # init' U U = np.mat(np.zeros((R.shape[0], V.shape[1]))) # set lambda_v_rt to 0 in the first epoch lambda_v_rt_old = np.zeros(lambda_v_rt.shape) lambda_v_rt_old[:] = lambda_v_rt[:] lambda_v_rt[:, :] = 0 epoch = 0 # index epochs data_iter = mx.io.NDArrayIter( { 'data': X, 'V': V, 'lambda_v_rt': lambda_v_rt }, batch_size=batch_size, shuffle=False, last_batch_handle='pad') data_iter.reset() for i in range(begin_iter, end_iter): if self.iter_start_callback is not None: if self.iter_start_callback(i): return #if i==100: # V = np.zeros(V.shape) # data_iter = mx.io.NDArrayIter({'data': X, 'V': V, 'lambda_v_rt': # lambda_v_rt}, # batch_size=batch_size, shuffle=False, # last_batch_handle='pad') # data_iter.reset() # for j in range(10): # batch = data_iter.next() try: batch = data_iter.next() except: # means the end of an epoch epoch += 1 theta = list( model.extract_feature(sym[0], args, auxs, data_iter, X.shape[0], xpu).values())[0] # update U, V and get BCD loss U, V, BCD_loss = BCD_one(R, U, V, theta, lambda_u, lambda_v, dir_save, True) # get recon' loss Y = list( model.extract_feature(sym[1], args, auxs, data_iter, X.shape[0], xpu).values())[0] Recon_loss = lambda_v / np.square( lambda_v_rt_old[0, 0]) * np.sum(np.square(Y - X)) / 2.0 print( "Epoch %d - tr_err/bcd_err/rec_err: %.1f/%.1f/%.1f".format( epoch, BCD_loss + Recon_loss, BCD_loss, Recon_loss)) fp = open(dir_save + '/cdl.log', 'a') fp.write( "Epoch %d - tr_err/bcd_err/rec_err: %.1f/%.1f/%.1f\n" % (epoch, BCD_loss + Recon_loss, BCD_loss, Recon_loss)) fp.close() lambda_v_rt[:] = lambda_v_rt_old[:] # back to normal lambda_v_rt data_iter = mx.io.NDArrayIter( { 'data': X, 'V': V, 'lambda_v_rt': lambda_v_rt }, batch_size=batch_size, shuffle=False, last_batch_handle='pad') data_iter.reset() batch = data_iter.next() for data, buff in zip(batch.data + batch.label, input_buffs): # copy data from batch to input_buffs # input_buffs is used during ff and bp # buffs->args->exe data.copyto(buff) exe.forward(is_train=True) if self.monitor is not None: self.monitor.forward_end(i, internal_dict) for key in output_dict: # output_buff is used for computing metrics output_dict[key].copyto(output_buff[key]) exe.backward() for key, arr in update_dict.items(): self.updater(key, arr, args[key]) if self.metric is not None: self.metric.update([input_buffs[-1]], [output_buff[output_names[0]]]) if self.monitor is not None: self.monitor.backward_end(i, args, update_dict, self.metric) if self.iter_end_callback is not None: if self.iter_end_callback(i): return exe.outputs[0].wait_to_read() #Y = model.extract_feature(sym[0], args, auxs, # data_iter, X.shape[0], xpu).values()[0] #print Y #print Y.shape theta = list( model.extract_feature(sym[0], args, auxs, data_iter, X.shape[0], xpu).values())[0] U, V, BCD_loss = BCD_one(R, U, V, theta, lambda_u, lambda_v, dir_save, True, 20) fp.close() return U, V, theta, BCD_loss
for ik,znum in enumerate(latent_size): valAUC = [] cvRSAEinitKmeansAUC = [] for ic,num_centers in enumerate(varying_mu): X = combX_allNME y = roi_labels print('Loading autoencoder of znum = {}, mu = {} , post training DEC results'.format(znum,num_centers)) dec_model = DECModel(mx.cpu(), X, num_centers, 1.0, znum, 'Z:\\Cristina\\Section3\\paper_notes_section3_MODIFIED\\save_to\\SAEmodels') # extract zSpace after initialization of autoencoder test_iter = mx.io.NDArrayIter({'data': X}, batch_size=X.shape[0], shuffle=False, last_batch_handle='pad') args = {k: mx.nd.array(v.asnumpy(), ctx=dec_model.xpu) for k, v in dec_model.args.items()} ## embedded point zi zspace = model.extract_feature(dec_model.feature, args, None, test_iter, X.shape[0], dec_model.xpu).values()[0] # compute model-based best-pspace or dec_model['pspace'] pspace = np.zeros((zspace.shape[0], dec_model.num_centers)) dec_model.dec_op.forward([zspace, args['dec_mu'].asnumpy()], [pspace]) # pool Z-space variables datalabels = np.asarray(y) dataZspace = np.concatenate((zspace, pspace), axis=1) ##################### # unbiased assessment: SPlit train/held-out test ##################### # to compare performance need to discard unkown labels, only use known labels (only B or M) Z = dataZspace[datalabels!='K',:] y = datalabels[datalabels!='K']
def cluster_varying_mu(self, X, y_dec, roi_labels, classes, save_to, labeltype, update_interval=None): # y = y_dec N = X.shape[0] self.best_args['update_interval'] = update_interval self.best_args['y_dec'] = y_dec self.best_args['roi_labels'] = roi_labels self.best_args['classes'] = classes # selecting batch size # [42*t for t in range(42)] will produce 16 train epochs # [0, 42, 84, 126, 168, 210, 252, 294, 336, 378, 420, 462, 504, 546, 588, 630] batch_size = self.best_args['batch_size'] #615/3 42 #256 test_iter = mx.io.NDArrayIter({'data': X}, batch_size=batch_size, shuffle=False, last_batch_handle='pad') args = { k: mx.nd.array(v.asnumpy(), ctx=self.xpu) for k, v in self.args.items() } ## embedded point zi z = model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values()[0] # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It self.perplexity = 15 self.learning_rate = 200 tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) # plot initial z fig = plt.figure() ax = fig.add_subplot(1, 1, 1) # reconstruct wordy labels list(Y)==named_y named_y = [classes[kc] for kc in y_dec] self.best_args['named_y'] = named_y plot_embedding_unsuper_NMEdist_intenh( Z_tsne, named_y, ax, title='{} tsne with perplexity {}'.format(labeltype, self.perplexity), legend=True) fig.savefig(save_to + os.sep + 'iter1_tsne_init_z' + str(self.best_args['znum']) + '_varying_mu' + str(self.best_args['num_centers']) + '.pdf', bbox_inches='tight') plt.close() # To initialize the cluster centers, we pass the data through # the initialized DNN to get embedded data points and then # perform standard k-means clustering in the feature space Z # to obtain k initial centroids {mu j} kmeans = KMeans(self.best_args['num_centers'], n_init=20) kmeans.fit(z) args['dec_mu'][:] = kmeans.cluster_centers_ ### KL DIVERGENCE MINIMIZATION. eq(2) # our model is trained by matching the soft assignment to the target distribution. # To this end, we define our objective as a KL divergence loss between # the soft assignments qi (pred) and the auxiliary distribution pi (label) solver = Solver('sgd', momentum=0.9, wd=0.0, learning_rate=0.01, lr_scheduler=mx.misc.FactorScheduler( 100 * update_interval, 0.5)) #0.01 def ce(label, pred): return np.sum(label * np.log(label / (pred + 0.000001))) / label.shape[0] solver.set_metric(mx.metric.CustomMetric(ce)) label_buff = np.zeros((X.shape[0], self.best_args['num_centers'])) train_iter = mx.io.NDArrayIter({'data': X}, {'label': label_buff}, batch_size=N, shuffle=False, last_batch_handle='roll_over') self.best_args['y_pred'] = np.zeros((X.shape[0])) self.best_args['acci'] = [] self.best_args['bestacci'] = [] self.ploti = 0 fig = plt.figure(figsize=(20, 15)) print 'Batch_size = %f' % batch_size print 'update_interval = %f' % update_interval self.best_args['plot_interval'] = int(25 * update_interval) print 'plot_interval = %f' % self.best_args['plot_interval'] self.maxAcc = 0.0 def refresh(i): # i=3, a full epoch occurs every i=798/48 if i % self.best_args['update_interval'] == 0: z = model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values()[0] p = np.zeros((z.shape[0], self.best_args['num_centers'])) self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p]) # the soft assignments qi (pred) y_pred = p.argmax(axis=1) print np.std(np.bincount(y_pred)), np.bincount(y_pred) # use a y that only considers the filledbyBC examples # compare soft assignments with known labels print '\n... Updating i = %f' % i # do for overall class B and M Z_embedding_tree = sklearn.neighbors.BallTree(z, leaf_size=5) # This finds the indices of 5 closest neighbors labels = np.asarray(self.best_args['roi_labels']) Neg = sum(labels == np.unique(labels)[0]) #for B Pos = sum(labels == np.unique(labels)[2]) #for M TP = [] TN = [] for k in range(z.shape[0]): iclass = labels[k] dist, ind = Z_embedding_tree.query([z[k]], k=6) dist5nn, ind5nn = dist[k != ind], ind[k != ind] class5nn = labels[ind5nn] # exlcude U class class5nn = class5nn[class5nn != 'K'] if (len(class5nn) > 0): predc = [] for c in np.unique(class5nn): predc.append(sum(class5nn == c)) # predicion based on majority predclass = np.unique(class5nn)[predc == max(predc)] if (len(predclass) == 1): # compute TP if M if (iclass == 'M'): TP.append(predclass[0] == iclass) # compute TN if B if (iclass == 'B'): TN.append(predclass[0] == iclass) if (len(predclass) == 2): # compute TP if M if (iclass == 'M'): TP.append(predclass[1] == iclass) # compute TN if B if (iclass == 'B'): TN.append(predclass[0] == iclass) # compute TPR and TNR TPR = sum(TP) / float(Pos) TNR = sum(TN) / float(Neg) Acc = sum(TP + TN) / float(Pos + Neg) print "True Posite Rate (TPR) = %f " % TPR print "True Negative Rate (TNR) = %f " % TNR print "Accuracy (Acc) = %f " % Acc # save best args self.best_args['acci'].append(Acc) if (Acc >= self.maxAcc): print 'Improving maxAcc = {}'.format(Acc) for key, v in args.items(): self.best_args[key] = args[key] self.maxAcc = Acc self.best_args['bestacci'].append(Acc) self.best_args['dec_mu'][:] = args['dec_mu'].asnumpy() if (i % self.best_args['plot_interval'] == 0 and self.ploti <= 15): # Visualize the progression of the embedded representation in a subsample of data # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) ax = fig.add_subplot(4, 4, 1 + self.ploti) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, named_y, ax, title="Epoch %d z_tsne iter (%d)" % (self.ploti, i), legend=False) self.ploti = self.ploti + 1 ## COMPUTING target distributions P ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster: weight = 1.0 / p.sum(axis=0) # p.sum provides fj weight *= self.best_args['num_centers'] / weight.sum() p = (p**2) * weight train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T print np.sum(y_pred != self.best_args['y_pred'] ), 0.001 * y_pred.shape[0] # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations. # tol% = 0.001 if i == self.best_args[ 'update_interval'] * 600: # performs 1epoch = 615/3 = 205*1000epochs self.best_args['y_pred'] = y_pred self.best_args['p'] = p self.best_args['z'] = z self.best_args['acci'].append(Acc) return True self.best_args['y_pred'] = y_pred self.best_args['p'] = p self.best_args['z'] = z # start solver solver.set_iter_start_callback(refresh) solver.set_monitor(Monitor(100)) solver.solve(self.xpu, self.loss, args, self.args_grad, None, train_iter, 0, 1000000000, {}, False) self.end_args = args self.best_args['end_args'] = args # finish fig = plt.gcf() fig.savefig(save_to + os.sep + 'iter1_tsne_progress_z' + str(self.best_args['znum']) + '_varying_mu' + str(self.best_args['num_centers']) + '.pdf', bbox_inches='tight') plt.close() # plot final z fig = plt.figure() ax = fig.add_subplot(1, 1, 1) tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(self.best_args['z']) plot_embedding_unsuper_NMEdist_intenh(Z_tsne, named_y, ax, title="tsne with perplexity %d" % self.perplexity, legend=True) fig.savefig(save_to + os.sep + 'tsne_final_z' + str(self.best_args['znum']) + '_varying_mu' + str(self.best_args['num_centers']) + '.pdf', bbox_inches='tight') plt.close() outdict = { 'acc': self.best_args['acci'], 'bestacc': self.best_args['bestacci'], 'p': self.best_args['p'], 'z': self.best_args['z'], 'y_pred': self.best_args['y_pred'], 'named_y': self.best_args['named_y'], 'num_centers': self.best_args['num_centers'] } return outdict
def refresh(i): # i=3, a full epoch occurs every i=798/48 if i % self.best_args['update_interval'] == 0: z = list( model.extract_feature(self.feature, args, None, test_iter, N, self.xpu).values())[0] p = np.zeros((z.shape[0], self.best_args['num_centers'])) self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p]) # the soft assignments qi (pred) y_pred = p.argmax(axis=1) print np.std(np.bincount(y_dec)), np.bincount(y_dec) print np.std(np.bincount(y_pred)), np.bincount(y_pred) ##################### # Z-space CV RF classfier METRICS ##################### # compare soft assignments with known labels (only B or M) print '\n... Updating i = %f' % i datalabels = np.asarray(y_train) dataZspace = np.concatenate( (z, p), axis=1) #zbestacci #dec_model['zbestacci'] Xdata = dataZspace[datalabels != 'K', :] ydatalabels = datalabels[datalabels != 'K'] RFmodel = RandomForestClassifier(n_jobs=2, n_estimators=500, random_state=0, verbose=0) # Evaluate a score by cross-validation # integer=5, to specify the number of folds in a (Stratified)KFold, scores_BorM = cross_val_score(RFmodel, Xdata, ydatalabels, cv=5) # compute Z-space Accuracy Acc = scores_BorM.mean() print "cvRF BorM Accuracy = %f " % Acc print scores_BorM.tolist() # use only the filledbyBC examples (first 0-202 exaples) nme_dist_label = [ lab.split('_')[1] for lab in self.best_args['named_y'][0:202] ] nme_intenh_label = [ lab.split('_')[2] for lab in self.best_args['named_y'][0:202] ] # compute Z-space Accuracy scores_dist = cross_val_score(RFmodel, z[0:202], nme_dist_label, cv=5) print "cvRF nme_dist Accuracy = %f " % scores_dist.mean() scores_intenh = cross_val_score(RFmodel, z[0:202], nme_intenh_label, cv=5) print "cvRF nme_intenh Accuracy = %f " % scores_intenh.mean() ##################### # CALCULATE 5nn METRICS ##################### labels = np.asarray(self.best_args['named_y']) Z_embedding_tree = sklearn.neighbors.BallTree(z, leaf_size=4) # This finds the indices of 5 closest neighbors nme_dist = [ 'Diffuse', 'Focal', 'Linear', 'MultipleRegions', 'Regional', 'Segmental' ] nme_intenh = [ 'Clumped', 'ClusteredRing', 'Heterogeneous', 'Homogeneous', 'Stippled or punctate' ] wnme_dist = np.zeros((len(nme_dist), len(nme_dist)), dtype=np.int64) wnme_intenh = np.zeros((len(nme_intenh), len(nme_intenh)), dtype=np.int64) BorM_diag = [] TP = [] TN = [] FP = [] FN = [] missed = [] NME_descript_dist = [] NME_descript_intenh = [] # count stattistics for k in range(z.shape[0]): iclass = labels[k] dist, ind = Z_embedding_tree.query([z[k]], k=2) dist5nn, ind5nn = dist[k != ind], ind[k != ind] class5nn = labels[ind5nn] # compute DIAGNOSTIC ACC based on nme similar local neighborhood if (iclass.split('_')[0] != 'K'): predBorM = [] predBorM.append( sum([lab.split('_')[0] == 'M' for lab in class5nn])) predBorM.append( sum([lab.split('_')[0] == 'B' for lab in class5nn])) predBorM.append( sum([lab.split('_')[0] == 'K' for lab in class5nn])) pred_BorM = [ ['M', 'B', 'K'][l] for l, pc in enumerate(predBorM) if pc >= max(predBorM) and max(predBorM) > 0 ][0] # compute TP if M if (pred_BorM != 'K'): if (iclass.split('_')[0] == pred_BorM): BorM_diag.append(1) # confusino table if (iclass.split('_')[0] == 'M'): TP.append(1) if (iclass.split('_')[0] == 'B'): TN.append(1) if (iclass.split('_')[0] != pred_BorM): if (iclass.split('_')[0] == 'M'): FN.append(1) if (iclass.split('_')[0] == 'B'): FP.append(1) else: missed.append(1) if (k <= 202 and iclass.split('_')[1] != 'N/A'): # increment detections for final NME descriptor accuracy prednmed = [] prednmed.append( sum([ lab.split('_')[1] == 'Diffuse' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'Focal' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'Linear' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'MultipleRegions' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'Regional' for lab in class5nn ])) prednmed.append( sum([ lab.split('_')[1] == 'Segmental' for lab in class5nn ])) # predicion based on majority voting pred_nme_dist = [ nme_dist[l] for l, pc in enumerate(prednmed) if pc >= max(prednmed) and max(prednmed) > 0 ] # compute NME ACC based on nme similar local neighborhood if (iclass.split('_')[1] in pred_nme_dist): NME_descript_dist.append(1) if (k <= 202 and iclass.split('_')[2] != 'N/A'): prednmeie = [] prednmeie.append( sum([ lab.split('_')[2] == 'Clumped' for lab in class5nn ])) prednmeie.append( sum([ lab.split('_')[2] == 'ClusteredRing' for lab in class5nn ])) prednmeie.append( sum([ lab.split('_')[2] == 'Heterogeneous' for lab in class5nn ])) prednmeie.append( sum([ lab.split('_')[2] == 'Homogeneous' for lab in class5nn ])) prednmeie.append( sum([ lab.split('_')[2] == 'Stippled or punctate' for lab in class5nn ])) # predicion based on majority voting pred_nme_intenh = [ nme_intenh[l] for l, pc in enumerate(prednmeie) if pc >= max(prednmeie) and max(prednmeie) > 0 ] # compute NME ACC based on nme similar local neighborhoo if (iclass.split('_')[2] in pred_nme_intenh): NME_descript_intenh.append(1) ##################### # collect stats ##################### BorM_diag_Acc = sum(BorM_diag) / float(len(datalabels)) #Acc = BorM_diag_Acc print "BorM_diag_Acc = %f " % BorM_diag_Acc ## good if high TPR = sum(TP) / float(sum(datalabels == 'M')) print "TPR = %f " % TPR TNR = sum(TN) / float(sum(datalabels == 'B')) print "TNR = %f " % TNR ## bad if high FPR = sum(FP) / float(sum(datalabels == 'B')) print "FPR = %f " % FPR FNR = sum(FN) / float(sum(datalabels == 'M')) print "FNR = %f " % FNR # good if reduces missedR = sum(np.asarray(missed)) / float(len(datalabels)) print "missedR = %f " % missedR Acc5nn_nme_dist = sum(NME_descript_dist) / 202.0 Acc5nn_nme_intenh = sum(NME_descript_intenh) / 202.0 print "Acc5nn_nme_dist (DIST) = %f " % Acc5nn_nme_dist print "Acc5nn_nme_intenh (INT_ENH) = %f " % Acc5nn_nme_intenh if (i == 0): tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) self.best_args['initAcc'] = Acc # plot initial z figinint = plt.figure() axinint = figinint.add_subplot(1, 1, 1) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, named_y, axinint, title= 'kmeans init tsne: Acc={}\n RF_nme_dist={}\n RF_intenh={}' .format(Acc, scores_dist.mean(), scores_intenh.mean()), legend=True) figinint.savefig('{}//tsne_init_z{}_mu{}_{}.pdf'.format( save_to, self.best_args['znum'], self.best_args['num_centers'], labeltype), bbox_inches='tight') plt.close() # save best args self.best_args['acci'].append(Acc) if (Acc >= self.maxAcc): print 'Improving maxAcc = {}'.format(Acc) for key, v in args.items(): self.best_args[key] = args[key] self.maxAcc = Acc self.best_args['pbestacci'] = p self.best_args['zbestacci'] = z self.best_args['bestacci'].append(Acc) self.best_args['cvRF_nme_dist'] = scores_dist.mean() self.best_args['cvRF_nme_intenh'] = scores_intenh.mean() self.best_args['dec_mu'][:] = args['dec_mu'].asnumpy() self.best_args['BorM_diag_Acc'] = BorM_diag_Acc self.best_args['TPR'] = TPR self.best_args['TNR'] = TNR self.best_args['FPR'] = FPR self.best_args['FNR'] = FNR self.best_args['missedR'] = missedR self.best_args['Acc5nn_nme_dist'] = Acc5nn_nme_dist self.best_args['Acc5nn_nme_intenh'] = Acc5nn_nme_intenh if (i > 0 and i % self.best_args['plot_interval'] == 0 and self.ploti <= 15): # Visualize the progression of the embedded representation in a subsample of data # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It tsne = TSNE(n_components=2, perplexity=self.perplexity, learning_rate=self.learning_rate, init='pca', random_state=0, verbose=2, method='exact') Z_tsne = tsne.fit_transform(z) axprogress = figprogress.add_subplot(4, 4, 1 + self.ploti) plot_embedding_unsuper_NMEdist_intenh( Z_tsne, named_y, axprogress, title="Epoch %d z_tsne Acc (%f)" % (i, Acc), legend=False) self.ploti = self.ploti + 1 ## COMPUTING target distributions P ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster: weight = 1.0 / p.sum(axis=0) # p.sum provides fj weight *= self.best_args['num_centers'] / weight.sum() p = (p**2) * weight train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T print np.sum(y_pred != self.best_args['y_pred'] ), 0.001 * y_pred.shape[0] # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations. # tol% = 0.001 if i == self.best_args[ 'update_interval'] * 100: # performs 1epoch = 615/3 = 205*1000epochs self.best_args['y_pred'] = y_pred self.best_args['acci'].append(Acc) return True self.best_args['y_pred'] = y_pred
ae_model.layerwise_pretrain(train_X, batch_size, pretrain_num_iter, 'sgd', l_rate=0.1, decay=0.0, lr_scheduler=mx.misc.FactorScheduler(20000,0.1), print_every=print_every) ae_model.finetune(train_X, batch_size, finetune_num_iter, 'sgd', l_rate=0.1, decay=0.0, lr_scheduler=mx.misc.FactorScheduler(20000,0.1), print_every=print_every) ae_model.save('mnist_pt.arg') ae_model.load('mnist_pt.arg') print("Training error:", ae_model.eval(train_X)) print("Validation error:", ae_model.eval(val_X)) if visualize: try: from matplotlib import pyplot as plt from model import extract_feature # sample a random image original_image = X[np.random.choice(X.shape[0]), :].reshape(1, 784) data_iter = mx.io.NDArrayIter({'data': original_image}, batch_size=1, shuffle=False, last_batch_handle='pad') # reconstruct the image reconstructed_image = extract_feature(ae_model.decoder, ae_model.args, ae_model.auxs, data_iter, 1, ae_model.xpu).values()[0] print("original image") plt.imshow(original_image.reshape((28,28))) plt.show() print("reconstructed image") plt.imshow(reconstructed_image.reshape((28, 28))) plt.show() except ImportError: logging.info("matplotlib is required for visualization")
valAUC = [] cv_SAEAUC = [] for ik,znum in enumerate(latent_size): X = combX_allNME y = roi_labels xpu = mx.cpu() ae_model = AutoEncoderModel(xpu, [X.shape[1],500,500,2000,znum], pt_dropout=0.2) print('Loading autoencoder of znum = {}, post training'.format(znum)) ae_model.load( os.path.join(save_to,'SAE_zsize{}_wimgfeatures_descStats_zeromean.arg'.format(str(znum))) ) data_iter = mx.io.NDArrayIter({'data': X}, batch_size=X.shape[0], shuffle=False, last_batch_handle='pad') # extract only the encoder part of the SAE feature = ae_model.encoder zspace = model.extract_feature(feature, ae_model.args, None, data_iter, X.shape[0], xpu).values()[0] # pool Z-space variables datalabels = np.asarray(y) dataZspace = zspace ##################### # unbiased assessment: SPlit train/held-out test ##################### # to compare performance need to discard unkown labels, only use known labels (only B or M) Z = dataZspace[datalabels!='K',:] y = datalabels[datalabels!='K'] print '\n... MLP fully coneected layer trained on Z_train tested on Z_test' sep = int(X.shape[0]*0.10) Z_test = Z[:sep]