def save(self, s_file=None, s_graph=None, l_remove_loop=False): if s_file is not None: fname, ext = os.path.splitext(s_file) s_name = os.path.split(fname)[1] else: fname = s_graph or self.name or 'Untitled' s_name = fname ext = '.xgmml' s_out = self.print_header(s_name) S_node = [] S_edge = [] S_REMOVE_NODE = set(["label", "id", "canonicalName"]) S = self.T_node.header() S_node.extend([ s for s in S if s not in S_REMOVE_NODE and not s.startswith('graphics') ]) S_REMOVE_EDGE = set([ "label", "Gene_A", "Name_A", "InteractionType", "Gene_B", "Name_B", "canonicalName", "InteractionType", "TYPE" ]) S = self.T_edge.header() S_edge.extend([s for s in S if s not in S_REMOVE_EDGE]) s_out += self.print_xgmml(self.T_node, self.T_edge, "Gene", "Symbol", S_node, S_edge, l_remove_loop=l_remove_loop) util.save_list(fname + ext, s_out, s_end='\n')
def register_a_face(): if request.method == 'POST': name = request.form.get('name') f = request.files['image'] f.save(os.path.join(temp_filepath, f.filename)) img_list = os.listdir(temp_filepath) image = face_recognition.load_image_file( os.path.join(temp_filepath, img_list[0])) encoded_image = face_recognition.face_encodings(image)[0] encoded_image_list = (util.load_list())["faces"] encoded_image_list.append({ 'name': name, 'encoded_image': encoded_image.tolist() }) util.save_list({"faces": encoded_image_list}) util.flush_files('temp') return "registered" if request.method == 'GET': encoded_image_json = util.load_list() return encoded_image_json if request.method == 'DELETE': util.save_list({'faces': []}) return 'deleted'
def _fix_missing(s_file): 'This is no longer needed, fixed cwc on May 2, 2013' lines = [] with open(s_file) as f: for line in f: lines.append(re.sub(r'(?<=\s)340282346638528859811704183484516925440.000', '', line)) util.save_list(s_file, lines)
def _strip_array_line(s_file): with open(s_file) as f: lines = f.readlines() s=lines[1] if s.startswith('AID'): del lines[1:2] util.save_list(s_file, lines) return s return ''
def make_input(self, s_file='untitled', options=None): if self.table is None: util.error_msg('Clustering.make_input: missing Clustering.table!') S=self.table.header() S_up=[ s.upper() for s in S] opt=self.input_opt opt.update(options or {}) self.input_opt=opt S_miss=[s for s in opt['DATA_COLS'] if S.index(s)<0] if len(S_miss)>0: util.error_msg('Clustering.make_input: missing data column: '+", ".join(S_miss)) i_id=util.index(opt['ID'], S) if (i_id<0): i_id=S_up.index('GENE') if i_id<0: util.error_msg('Clustering.make_input: no column is specified as the ID!') opt['ID']=S[i_id] if type(opt['DESCRIPTION']) is str: opt['DESCRIPTION']=[opt['DESCRIPTION']] I_des=[util.index(s, S) for s in opt['DESCRIPTION'] if util.index(s, S)>=0] if (len(I_des)==0): I_des=[i_id] opt['DESCRIPTION']=[opt['ID']] else: for i in I_des: self.table.iloc[:, i]=util.sarray2sarray(self.table.iloc[:,i]) i_w=util.index(opt['WEIGHT_COL'], S) opt['DATA_COLS']=self.get_default_exp_cols(opt['DATA_COLS']) n_exp=len(opt['DATA_COLS']) if n_exp==0: util.error_msg('Clustering.make_input: no data column is specified!') S_out=[] S_out.append('Gene\tDescription\tWeight\t'+'\t'.join(opt['DATA_COLS'])) if opt['EXP_WEIGHT'] is None or len(opt['EXP_WEIGHT'])!=n_exp: S_out.append('Exp\t\t'+'\t1'*n_exp) else: S_out.append('Exp\t\t\t'+'\t'.join(util.rarray2sarray(opt['EXP_WEIGHT'], s_format='%g', s_null=1.0))) #df.fillna('', inplace=True) i_cols=[S.index(s) for s in opt['DATA_COLS']] if opt['GENE_WEIGHT'] is not None and len(opt['GENE_WEIGHT'])==len(self.table): if opt['WEIGHT_COL']=='': opt['WEIGHT_COL']='WEIGHT' self.table[opt['WEIGHT_COL']]=opt['GENE_WEIGHT'] for i in range(len(self.table)): s=str(self.table.iloc[i, i_id])+'\t'+":".join(self.table.iloc[i, I_des])+'\t'+str(self.table.iloc[i, i_w] if i_w>=0 else 1) R=np.array([x for x in self.table.iloc[i,i_cols]]) if opt['GENE_NORMALIZE'] and opt['NORMALIZE_METHOD']=='Z': valid=util.no_nan(R) if len(valid)>1: R=(R-np.mean(valid))/np.std(R, ddof=1) s+='\t'+'\t'.join(['' if pd.isnull(x) else str(x) for x in R]) S_out.append(s) if re.search(r'\.input$', s_file) is not None: s_file=re.sub(r'\.input$', '', s_file) util.save_list(s_file+".input", S_out, s_end='\n') self.input=s_file
def add_column(s_file, R, s_name, l_separator=True): """Add an extra column using value R array to an existing heat map. s_file: str, file name without extension, it will modify .cdt and .atr R: array(int/float), values to add s_name: str, column name l_separator: bool, default True. If True, add a column of blank value to separate the new column from existing ones.""" if re.search('\.\w{3}$', s_file): s_file = s_file[:-4] if not os.path.exists(s_file + '.cdt'): util.error_msg("File not exist: " + s_file + ".cdt!") f = open(s_file + '.cdt') S = [] cnt = 0 while True: line = f.readline() if not line: break SS = line.strip().split("\t") if SS[0].startswith('GENE'): if l_separator: SS.append('') SS.append('%.2f' % R[cnt]) cnt += 1 elif SS[0] == 'GID': if l_separator: SS.append('separator') SS.append(s_name) elif SS[0] == 'AID': X = [int(re.sub(r'\D', '', x)) for x in SS if x.startswith('ARRY')] n_array = max(X) + 1 SS.append('ARRY%dX' % n_array) if l_separator: SS.append('ARRY%dX' % (n_array + 1)) elif SS[0] == 'EWEIGHT': if l_separator: SS.append('0') SS.append('0') S.append(SS) f.close() S = ["\t".join(X) for X in S] util.save_list(s_file + '.cdt', S, s_end="\n") if os.path.exists(s_file + '.atr'): S = util.read_list(s_file + '.atr') SS = S[-1].split("\t") n_node = int(re.sub(r'\D', '', SS[0])) + 1 S.append('NODE%dX\tNODE%dX\tARRY%dX\t0' % (n_node, n_node - 1, n_array)) if l_separator: S.append('NODE%dX\tNODE%dX\tARRY%dX\t0' % (n_node + 1, n_node, n_array + 1)) util.save_list(s_file + '.atr', S, s_end="\n")
def make_JTV(s_file, r_max=2.0): s='''<DocumentConfig> <UrlExtractor/> <ArrayUrlExtractor/> <Views> <View type="Dendrogram" dock="1"> <ColorExtractor contrast="%s"> <ColorSet up="#D8181C" zero="#D8D8D8" down="#3A6C9A" missing="#D8D8D8"/> </ColorExtractor> <ArrayDrawer/> <GlobalXMap current="Fill"> <FixedMap type="Fixed" scale="7.0"/> <FillMap type="Fill"/><NullMap type="Null"/> </GlobalXMap> <GlobalYMap current="Fill"> <FixedMap type="Fixed" scale="11.0"/> <FillMap type="Fill"/> <NullMap type="Null"/> </GlobalYMap> <ZoomXMap current="Fill"> <FixedMap type="Fixed"/> <FillMap type="Fill"/> <NullMap type="Null"/> </ZoomXMap> <ZoomYMap current="Fill"> <FixedMap type="Fixed"/> <FillMap type="Fill"/> <NullMap type="Null"/> </ZoomYMap> <TextView> <TextView> <GeneSummary/> </TextView> <TextView> <GeneSummary/> </TextView> <TextView> <GeneSummary/> </TextView> <TextView> <GeneSummary/> </TextView> </TextView> <ArrayNameView> <ArraySummary included="0"/> </ArrayNameView> <AtrSummary/> <GtrSummary/> </View> </Views></DocumentConfig>''' % (str(r_max)) util.save_list(s_file+".jtv", s)
def process_vehicle_image(): imw = 64 imh = 64 data_path = '/home/kien/PycharmProjects/data/vietai-assignment-data/vehicles/' car_paths = sorted(glob.glob(data_path + 'car*.jpg')) motorbike_paths = sorted(glob.glob(data_path + 'motorbike*.jpg')) train_x_car = np.zeros((imh, imw, 1500)) train_y_car = np.zeros((1500, 1)) for i, car_path in enumerate(car_paths): img = scipy.misc.imread(car_path) img = scipy.misc.imresize(img, (imh, imw), interp='bicubic') img = np.mean(img, axis=2) train_x_car[:,:,i] = img test_x_car = train_x_car[:,:,1200:] test_y_car = train_y_car[1200:] train_x_car = train_x_car[:,:,:1200] train_y_car = train_y_car[:1200] train_x_mo = np.zeros((imh, imw, 1500)) train_y_mo = np.ones((1500, 1)) for i, motorbike_path in enumerate(motorbike_paths): img = scipy.misc.imread(motorbike_path) img = scipy.misc.imresize(img, (imh, imw), interp='bicubic') img = np.mean(img, axis=2) train_x_mo[:,:,i] = img test_x_mo = train_x_mo[:,:,1200:] test_y_mo = train_y_mo[1200:] train_x_mo = train_x_mo[:,:,:1200] train_y_mo = train_y_mo[:1200] train_x = np.concatenate((train_x_car, train_x_mo), axis=2) train_y = np.concatenate((train_y_car, train_y_mo), axis=0) test_x = np.concatenate((test_x_car, test_x_mo), axis=2) test_y = np.concatenate((test_y_car, test_y_mo), axis=0) #plt.ion() #for i in range(0,600,24): # plt.clf() # plt.imshow(test_x[:,:,i], cmap='gray') # plt.show() # plt.pause(0.2) # print("%d %d" % (i, test_y[i, 0])) #pdb.set_trace() save_list([train_x, train_y, test_x, test_y], './data/vehicles.dat')
def save(self, s_cdtfile, l_norm_row=False, r_max=2.0): """r_max controls the max matrix value to be color saturated""" X=self.data s_cdtfile, s_ext=os.path.splitext(s_cdtfile) S_row=self.S_row if self.Zr is not None: #den_r=clst.dendrogram(self.Zr) den_r=FastCluster.linkage2order(self.Zr) #X=X[den_r['leaves'], :] X=X[den_r, :] S=[] #"NODEID\tLEFT\RIGHT\tCORRELATION"] r_dist=max(self.Zr[:, 2].max(), 1.0) n=X.shape[0] node_cnt=0 S_gene=["GENE%dX" % (x+1) for x in den_r] #['leaves']] S_row=[self.S_row[i] for i in den_r] #['leaves']] S_description=[self.S_description[i] for i in den_r]#['leaves']] for i,R in enumerate(self.Zr): node_cnt+=1 s_left="GENE%dX" % int(R[0]+1) if int(R[0])<n else "NODE%dX" % (int(R[0]-n+1)) s_right="GENE%dX" % int(R[1]+1) if int(R[1])<n else "NODE%dX" % (int(R[1]-n+1)) S.append("NODE%dX\t%s\t%s\t%.4f" % (node_cnt, s_left, s_right, max(1.0-R[2]/r_dist, 0.0))) util.save_list(s_cdtfile+'.gtr', S, s_end="\n") S_col=self.S_col if self.Zc is not None: #den_c=clst.dendrogram(self.Zc) den_c=FastCluster.linkage2order(self.Zc) X=X[:, den_c]#['leaves']] S=[] r_dist=max(self.Zc[:, 2].max(), 1.0) n=X.shape[1] node_cnt=0 S_array=["ARRY%dX" % (x+1) for x in den_c]#['leaves']] S_col=[self.S_col[i] for i in den_c]#['leaves']] for i,R in enumerate(self.Zc): node_cnt+=1 s_left="ARRY%dX" % int(R[0]+1) if int(R[0])<n else "NODE%dX" % (int(R[0]-n+1)) s_right="ARRY%dX" % int(R[1]+1) if int(R[1])<n else "NODE%dX" % (int(R[1]-n+1)) S.append("NODE%dX\t%s\t%s\t%.4f" % (node_cnt, s_left, s_right, max(1.0-R[2]/r_dist, 0.0))) util.save_list(s_cdtfile+'.atr', S, s_end="\n") n_exp=len(S_col) S=["GID\tGENE\tNAME\tGWEIGHT\t"+"\t".join(S_col)] if self.Zc is not None: S.append("AID\t\t\t\t"+"\t".join(S_array)) S.append("EWEIGHT\t\t\t"+"\t1"*n_exp) for i,R in enumerate(X): if l_norm_row: R=(R-R.mean())/R.std() S.append(S_gene[i]+"\t"+S_row[i]+"\t"+S_description[i]+"\t1\t"+"\t".join(util.rarray2sarray(R, s_format="%.3f"))) util.save_list(s_cdtfile+".cdt", S, s_end="\n") import cluster Clustering.make_JTV(s_cdtfile, r_max=r_max)
cost = tf.reduce_sum(tf.multiply(log_dist, masks)) # Optimizer optimizer = tf.train.AdamOptimizer(lr) # Gradient Clipping is applied to mitigate the issue of exploding gradients gradients = optimizer.compute_gradients(cost) capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None] train_op = optimizer.apply_gradients(capped_gradients, global_step=global_step, name='train_op') # split original data into training and test data source_train, source_test, target_train, target_test = train_test_split(source_int_text, target_int_text, test_size=0.01, random_state=42) # save dataset util.save_list('source_train', source_train) util.save_list('target_train', target_train) util.save_list('source_test', source_test) util.save_list('target_test', target_test) print("The size for test data {},{}".format(len(source_test), len(target_test))) # Split data to training and validation sets valid_size = 1280 train_source = source_train[valid_size:] train_target = target_train[valid_size:] valid_source = source_train[:valid_size] valid_target = target_train[:valid_size] print("The size for training data {},{}".format(len(train_source), len(train_target)))
def test(): # Getting settings from config.py max_len = cfg.MAX_TOKEN_LEN num_token = cfg.NUM_OF_TOKEN imw = cfg.IMW imh = cfg.IMH # Training params is_train = False batch_size = 1 # Tracking/Saving num_ite_to_log = cfg.NUM_ITE_TO_LOG num_ite_to_vis = cfg.NUM_ITE_TO_VIS save_name = cfg.SAVE_NAME test_name = cfg.TEST_NAME vis_path = cfg.VIS_PATH use_cuda = cfg.CUDA and torch.cuda.is_available() save_path = cfg.MODEL_FOLDER dataset_path = cfg.DATASET_PATH + 'CROHME2013_data/TestINKML/' scale_factor = cfg.TEST_SCALE_FACTOR # Load the vocab dictionary for display purpose word_to_id, id_to_word = get_gt.build_vocab('mathsymbolclass.txt') start_id = word_to_id['<s>'] stop_id = word_to_id['</s>'] # Initialize the network and load its weights net = AGRU() save_files = glob.glob(save_path + save_name + '*.dat') if (len(save_files) > 0): save_file = sorted(save_files)[-1] print('Loading network weights saved at %s...' % save_file) loadobj = torch.load(save_file) net.load_state_dict(loadobj['state_dict']) print('Loading done.') if (use_cuda): net.cuda() # For debugging if (not is_train): net.train(False) # Get full paths to test inkml files, create a list of scale factors to be used for rendering test images inkml_list = glob.glob(dataset_path + '*.inkml') scale_list = [scale_factor] * len(inkml_list) inkml_list = np.asarray(inkml_list) scale_list = np.asarray(scale_list) #inkml_list = inkml_list[0:120] #scale_list = scale_list[0:120] num_test = len(inkml_list) num_ite = int(np.ceil(1.0 * num_test / batch_size)) # Exact match and word error rate em = [] wer = [] all_pred = [] all_gt = [] # Main test loop for i in range(num_ite): batch_idx = range(i * batch_size, (i + 1) * batch_size) if (batch_idx[-1] >= num_test): batch_idx = range(i * batch_size, num_test) batch_size = len(batch_idx) batch_x = util.batch_data(inkml_list[batch_idx], scale_list[batch_idx], is_train) batch_y_np = util.batch_target(inkml_list[batch_idx]) batch_y = util.np_to_var(batch_y_np, use_cuda) #pred_y, attention = net(batch_x, batch_y) pred_y, attention = net.beam_search(batch_x, start_id, stop_id) pred_y = util.var_to_np(pred_y, use_cuda) pred_y = np.argmax(pred_y, 2) batch_y = np.reshape(batch_y_np, (batch_size, max_len)) print('Finished ite %d/%d.' % (i, num_ite)) j = 0 pred_string = pred_y[j, :] pred_string = [id_to_word[idx] for idx in list(pred_string)] gt_string = batch_y[0, :] gt_string = [id_to_word[idx] for idx in list(gt_string)] all_pred.append(pred_string) all_gt.append(gt_string) em.append(util.exact_match(pred_string, gt_string)) if ('</s>' in pred_string): pred_string = pred_string[0:pred_string.index('</s>') + 1] gt_string = gt_string[0:gt_string.index('</s>') + 1] wer.append(util.levenshtein_distance(pred_string, gt_string)) if (i % 4 == 0): continue # Printing stuffs to console print('Prediction: %s' % ' '.join(pred_string)) print('Target: %s\n' % ' '.join(gt_string)) # Save attention to files for visualization file_name = ntpath.basename(inkml_list[batch_idx[j]])[:-6] vis_path_j = vis_path + file_name + '/' if (not os.path.exists(vis_path_j)): os.makedirs(vis_path_j) tmp_x = np.sum(batch_x.data.cpu().numpy()[j, :, :, :], axis=0) attention_np = attention.data.cpu().numpy()[j, 1:, :, :] pred_string = pred_string[1:] for k, word in enumerate(pred_string): word = word.replace('/', 'slash_') attention_k = attention_np[k, :, :] / np.max( attention_np[k, :, :]) * 0.8 attention_k = (scipy.misc.imresize(attention_k, 16.0)) / 255.0 tmp_x = scipy.misc.imresize(tmp_x, attention_k.shape) attention_k += tmp_x attention_k[attention_k > 1] = 1 try: scipy.misc.imsave(vis_path_j + ('%02d_%s.jpg' % (k, word)), attention_k) except FileNotFoundError: pdb.set_trace() if (word == '<slash_s>'): break #pdb.set_trace() print("Exact match count: %d/%d" % (sum(em), len(em))) print("Word error rate: %.5f" % (np.mean(wer))) pdb.set_trace() util.save_list([em, wer, all_pred, all_gt], save_path + test_name + '.dat') pdb.set_trace()
if args.frequency_results != None: config['frequency_results'] = args.frequency_results if args.profile: # When profiling, just run the configuration import cProfile cProfile.run("all_runs(config)", sort=2) sys.exit() try: # Perform the actual run of the experiment raw_results, frequencies = all_runs(config) combined = sorted(combine_results(raw_results).items()) print combined if args.output_results != None: # Output the results, with the combined stuff on the first line util.save_list(args.output_results, [combined] + raw_results) if args.output_config != None: # Serialize function list config['function_list'] = [func.__name__ for func in config['function_list']] # Saves the final configuration as a single file util.save_configuration(args.output_config, config) if args.frequency_results != None: # Saves the frequency information processed = frequencies_to_vector(config, frequencies) util.save_configuration(args.frequency_results, processed) except KeyError as e: print 'You must include a configuration value for', e.args[0]
def train(): # Getting settings from config.py max_len = cfg.MAX_TOKEN_LEN num_token = cfg.NUM_OF_TOKEN imw = cfg.IMW imh = cfg.IMH # Training params is_train = True batch_size_const = cfg.GPU_BATCH_SIZE num_ite_to_update = cfg.NUM_ITE_TO_UPDATE lr = cfg.LR momentum = cfg.MOMENTUM lr_decay = cfg.LR_DECAY max_grad = cfg.MAX_GRAD_CLIP num_e = cfg.NUM_EPOCH # Tracking/Saving last_e = -1 global_step = 0 running_loss = 0 num_ite_to_log = cfg.NUM_ITE_TO_LOG num_ite_to_vis = cfg.NUM_ITE_TO_VIS num_epoch_to_save = cfg.NUM_EPOCH_TO_SAVE all_loss = [] save_name = cfg.SAVE_NAME meta_name = cfg.META_NAME vis_path = cfg.VIS_PATH use_cuda = cfg.CUDA and torch.cuda.is_available() save_path = cfg.MODEL_FOLDER dataset_path = cfg.DATASET_PATH + 'CROHME2013_data/TrainINKML/' subset_list = cfg.SUBSET_LIST scale_factors = cfg.SCALE_FACTORS # Load the vocab dictionary for display purpose _, id_to_word = get_gt.build_vocab('mathsymbolclass.txt') # Initialize the network and load its weights net = AGRU() save_files = glob.glob(save_path + save_name + '*.dat') meta_files = glob.glob(save_path + meta_name + '*.dat') if (len(save_files) > 0): save_file = sorted(save_files)[-1] print('Loading network weights saved at %s...' % save_file) loadobj = torch.load(save_file) net.load_state_dict(loadobj['state_dict']) last_e, running_loss, all_loss, lr = util.load_list( sorted(meta_files)[-1]) print('Loading done.') if (use_cuda): net.cuda() # For debugging if (not is_train): net.train(False) # Get a list of convolutional layers conv_layers = util.get_layers(net, lambda x: type(x) == type(net.conv1_3)) # Get conv parameters conv_params = [] for c in conv_layers: for p in c.parameters(): if (p.requires_grad): conv_params.append(p) # Get a list of trainable layers that are not convolutional other_layers = util.get_layers( net, lambda x: type(x) != type(net.conv1_3) and hasattr(x, 'parameters')) other_layers = other_layers[1:] # The first layer is attend_GRU.AGRU # Get GRU parameters gru_params = [] for l in other_layers: for p in l.parameters(): gru_params.append(p) # Set different learning rates for conv layers and GRU layers optimizer = optim.Adam([{ 'params': gru_params }, { 'params': conv_params, 'lr': lr }], lr=lr) # Loss function criterion = nn.CrossEntropyLoss(ignore_index=1) # Get full paths to train inkml files, create a list of scale factors to be used for rendering train images inkml_list = [] scale_list = [] for i, subset in enumerate(subset_list): subset_inkml_list = glob.glob(dataset_path + subset + '*.inkml') inkml_list += subset_inkml_list scale_list += [scale_factors[i]] * len(subset_inkml_list) inkml_list = np.asarray(inkml_list) scale_list = np.asarray(scale_list) #inkml_list = inkml_list[0:120] #scale_list = scale_list[0:120] num_train = len(inkml_list) num_ite = int(np.ceil(1.0 * num_train / batch_size_const)) # Main train loop optimizer.zero_grad() for e in range(last_e + 1, num_e): permu_ind = np.random.permutation(range(num_train)) inkml_list = inkml_list[permu_ind.astype(int)] scale_list = scale_list[permu_ind.astype(int)] if (e % cfg.NUM_EPOCH_TO_DECAY == cfg.NUM_EPOCH_TO_DECAY - 1): lr = lr * lr_decay print('Current learning rate: %.8f' % lr) optimizer.param_groups[0]['lr'] = lr optimizer.param_groups[1]['lr'] = lr for i in range(num_ite): batch_idx = range(i * batch_size_const, (i + 1) * batch_size_const) if (batch_idx[-1] >= num_train): batch_idx = range(i * batch_size_const, num_train) batch_size = len(batch_idx) batch_x = util.batch_data(inkml_list[batch_idx], scale_list[batch_idx], is_train) batch_y_np = util.batch_target(inkml_list[batch_idx]) batch_y = util.np_to_var(batch_y_np, use_cuda) pred_y, attention = net(batch_x, batch_y) # Convert the 3D tensor to 2D matrix of shape (batch_size*MAX_TOKEN_LEN, NUM_OF_TOKEN) to compute log loss pred_y = pred_y.view(-1, num_token) # Remove the <start> token from target vector & prediction vvector batch_y = batch_y.view(batch_size, max_len) batch_y = batch_y[:, 1:].contiguous() batch_y = batch_y.view(-1) pred_y = pred_y.view(batch_size, max_len, num_token) pred_y = pred_y[:, 1:].contiguous() pred_y = pred_y.view(batch_size * (max_len - 1), num_token) loss = criterion(pred_y, batch_y) loss.backward() running_loss += loss.data[0] if (global_step % num_ite_to_update == (num_ite_to_update - 1)): util.grad_clip(net, max_grad) optimizer.step() optimizer.zero_grad() running_loss /= num_ite_to_update all_loss.append(running_loss) running_loss = 0 # Printing stuffs to console if (global_step % num_ite_to_log == (num_ite_to_log - 1)): print('Finished ite %d/%d, epoch %d/%d, loss: %.5f' % (i, num_ite, e, num_e, all_loss[-1])) # Printing prediction and target pred_y_np = util.var_to_np(pred_y, use_cuda) pred_y_np = np.reshape(pred_y_np, (batch_size, max_len - 1, num_token)) # Only display the first sample in the batch pred_y_np = pred_y_np[0, 0:40, :] pred_y_np = np.argmax(pred_y_np, axis=1) pred_list = [id_to_word[idx] for idx in list(pred_y_np)] print('Prediction: %s' % ' '.join(pred_list)) batch_y_np = np.reshape(batch_y_np, (batch_size, max_len)) batch_y_np = batch_y_np[0, 1:40] target_list = [id_to_word[idx] for idx in list(batch_y_np)] print('Target: %s\n' % ' '.join(target_list)) if (global_step % num_ite_to_vis == (num_ite_to_vis - 1)): tmp_x = util.var_to_np(batch_x, use_cuda)[0, :, :, :] tmp_x = np.transpose(tmp_x, (1, 2, 0))[:, :, 0:3] attention_np = attention.data.cpu().numpy()[0, 2:, :, :] for k in range(10): attention_k = attention_np[k, :, :] / np.max( attention_np[k, :, :]) * 0.8 attention_k = (scipy.misc.imresize( attention_k, 16.0, interp='bicubic')) / 255.0 tmp_x = scipy.misc.imresize(tmp_x, attention_k.shape) attention_k = np.repeat(np.expand_dims(attention_k, 2), 3, 2) attention_k = attention_k * 255 attention_k += tmp_x attention_k /= 2.0 attention_k[attention_k > 255] = 255 attention_k = (attention_k).astype(np.uint8) scipy.misc.imsave(vis_path + ('%02d.jpg' % k), attention_k) plt.clf() plt.plot(all_loss) plt.show() plt.savefig(vis_path + 'loss.png') global_step += 1 if (e % num_epoch_to_save == (num_epoch_to_save - 1)): print('Saving at epoch %d/%d' % (e, num_e)) torch.save( { 'state_dict': net.state_dict(), 'opt': optimizer.state_dict() }, save_path + save_name + ('_%03d' % e) + '.dat') metadata = [e, running_loss, all_loss, lr] util.save_list(metadata, save_path + meta_name + ('_%03d' % e) + '.dat') last_e = e
def color_cdt(s_file, exps=None, exp_bgcolor=None, genes=None, gene_bgcolor=None): if not s_file.endswith('.cdt'): s_file += '.cdt' if not os.path.exists(s_file): util.error_msg("File not exist: " + s_file + "!") BG = '#ffffff' f = open(s_file) S = [] c_first = {} i = 0 while True: line = f.readline() if not line: break SS = line.strip().split("\t") c_first[SS[0]] = i i += 1 S.append(SS) f.close() S_header = S[0] i_gene = util.index('GENE', S_header) i_name = util.index('NAME', S_header) i_gid = util.index('GID', S_header) i_w = util.index("GWEIGHT", S_header) offset = max([i_gene, i_name, i_gid, i_w]) + 1 n_exp = len(S_header) - offset if 'EWEIGHT' not in c_first: # add EWEIGHT ROW i_w = max([c_first.get('GID', -1), c_first.get('AID', -1)]) + 1 S.insert(i_w, ['EWEIGHT'] + [''] * (offset - 1) + ['1.000'] * n_exp) c_first['EWEIGHT'] = i_w i_w = util.index("GWEIGHT", S_header) if i_w < 0: # add GWEIGHT column i_w = offset S_header.insert(i_w, 'GWEIGHT') for i in range(1, len(S)): if i <= c_first['EWEIGHT']: S[i].insert(i_w, '') else: S[i].insert(i_w, '1.000') offset += 1 i_gene_color = util.index('BGCOLOR', S_header) if i_gene_color < 0 and genes is not None: i_gene_color = offset - 1 S_header.insert(i_gene_color, 'BGCOLOR') offset += 1 for i in range(1, len(S)): if i <= c_first['EWEIGHT']: S[i].insert(i_gene_color, '') else: S[i].insert(i_gene_color, BG) i_exp_color = c_first.get('BGCOLOR', -1) if i_exp_color < 0 and exps is not None: i_exp_color = c_first['EWEIGHT'] S.insert(i_exp_color, ['BGCOLOR'] + [''] * (offset - 1) + [BG] * n_exp) c_first['EWEIGHT'] += 1 if genes is not None: c_m = Tree.color_map(genes, gene_bgcolor) idx = i_gene if i_gene >= 0 else i_name for i in range(c_first['EWEIGHT'] + 1, len(S)): S[i][i_gene_color] = c_m.get(S[i][idx], BG) if exps is not None: c_m = Tree.color_map(exps, exp_bgcolor) SS = S[c_first['EWEIGHT'] - 1] for i in range(offset, len(SS)): SS[i] = c_m.get(S_header[i], BG) S = ["\t".join(X) for X in S] util.save_list(s_file, S, s_end="\n")
def _insert_array_line(s_file, s): with open(s_file) as f: lines = f.readlines() lines[1:0]=[s] util.save_list(s_file, lines)
def plot(self, karyotype="", symbol=None, links=None, hits=None, outputdir=None, outputfile="CircosPlot"): #sw=util.StopWatch() outputdir=outputdir if outputdir is not None else '/tmp' for ext in [".png" , ".svg"]: s_file=os.path.join(outputdir, outputfile+ext) if os.path.exists(s_file): os.remove(s_file) if links is None: util.warn_msg('No link to plot, simply ignore') #return tmp=tempfile.NamedTemporaryFile(dir=outputdir, delete=False, prefix="CIRCOS_", suffix=".txt") conf_file=tmp.name S_tmp_file=[conf_file] s_conf=util.read_string(self.TEMPLATE) kary_file=re.sub('CIRCOS_', 'CIRCOS_KARYOTYPE_', conf_file) util.save_string(kary_file, karyotype) S_tmp_file.append(kary_file) s_conf=re.sub(r'@KARYOTYPE@', kary_file, s_conf) r0=0.90 s_plot="" if hits is None: hits=[] elif type(hits) is not list: hits=[hits] for i,s_hit in enumerate(hits): hit_file=re.sub('CIRCOS_', 'CIRCOS_HIT_%d_' % i, conf_file) util.save_list(hit_file, s_hit) S_tmp_file.append(hit_file) s_plot+="<plot>\n" s_plot+="file = "+hit_file+"\n" s_plot+="r0 = "+('%.3f' % r0)+"r\n" s_plot+="r1 = "+('%.3f' % r0)+"r+70p\n" s_plot+="stroke_thickness = 0\n" s_plot+="min = 0\n" s_plot+="max = 2\n" s_plot+="color = oranges-3-seq\n" s_plot+="</plot>\n\n" r0-=0.05; s_conf=re.sub(r'@PLOTS@', s_plot, s_conf) #t_chr=pd.read_csv(os.path.join(Circos.HOME, "karyotype_"+pid+".tmp"), sep=r'\s+', header=None) #s_conf=re.sub(r'@CHROMOSOMES@', ";".join(t_chr[2]), s_conf) #avoid using Pandas, so that this script can be used in CGI on ldweb server, where numpy is not installed correctly S=karyotype.split("\n") S_chr=[] for s in S: if s.strip()=='': break S_chr.append(re.split(Circos.DELIMITER, s)[2]) s_conf=re.sub(r'@CHROMOSOMES@', ";".join(S_chr), s_conf) s_symbol="" if symbol is not None: symbol_file=re.sub('CIRCOS_', 'CIRCOS_SYMBOL_', conf_file) util.save_string(symbol_file, symbol) S_tmp_file.append(symbol_file) s_symbol+="<plot>\n" s_symbol+="type = text\n" s_symbol+="color = black\n" s_symbol+="file = "+symbol_file+"\n" s_symbol+="r0=1.02r\n" s_symbol+="r1=1.2r\n" s_symbol+="label_size = 12p\n" s_symbol+="label_font = condensed\n" s_symbol+="padding = 0p\n" s_symbol+="rpadding = 0p\n" s_symbol+="</plot>\n" s_conf=re.sub(r'@SYMBOL@', s_symbol, s_conf) S_color=['107,174,214', '116,196,118', '106,81,163'] s_link="" MAX_EDGES=10000 # Circos does not seem to work well with too many edges, it will not draw edges after maybe 20000-ish if links is not None: if type(links) is str: links=[links] for i in range(len(links)-1, -1, -1): link_file=re.sub('CIRCOS_', 'CIRCOS_LINK%02d_' % (i+1), conf_file) S_tmp_file.append(link_file) S_edges=links[i].strip().split("\n") n_edge=len(S_edges)/2 if n_edge>MAX_EDGES: # randomly sample a subset IDX=np.repeat(np.random.permutation(list(range(0,len(S_edges),2)))[:MAX_EDGES], 2) IDX[list(range(1,len(IDX),2))]+=1 S_in=[x for x in IDX if x >=len(S_edges) ] S_edges=pd.Series(S_edges)[IDX].astype(str) links[i]="\n".join(S_edges) util.save_string(link_file, links[i]) s_link+="<link link"+str(i+1)+">\n" s_link+="show = yes\n" s_link+="color = "+S_color[(i+len(S_color)-1)%len(S_color)]+"\n" s_link+="file = "+link_file+"\n" s_link+="</link>\n\n" s_conf=re.sub(r'@LINKS@', s_link, s_conf) #print s_conf util.save_string(conf_file, s_conf) #print s_conf ## run Circos #s_cmd = "cd "+os.path.join(os.path.dirname(__file__), "circos")+"; " s_cmd=self.BIN+" -conf "+conf_file s_cmd+=' -outputdir '+outputdir s_cmd+=' -outputfile '+outputfile #sw.check('prepare conf file') print(s_cmd) util.unix(s_cmd, l_print=False, l_error=False) l_remove_temp=True if l_remove_temp: for f in S_tmp_file: os.remove(f) s_file=os.path.join(outputdir, outputfile+".png") #sw.check('make circos image') if os.path.exists(s_file): return s_file else: return None