device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') num_epochs = 20 best_val_loss = None criterion = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) save_path = 'cifar_net.pt' model = model.to(device) training_loss, validation_loss = TRAIN(model, train_loader, valid_loader, num_epochs, criterion, optimizer, best_val_loss, device, save_path) plt.plot(training_loss) plt.plot(validation_loss) plt.show() total_label = [] total_output = [] for inputs1_valid, inputs2_valid, labels_valid in tqdm(valid_loader): inputs1_valid = inputs1_valid.to(device) inputs2_valid = inputs2_valid.to(device) labels_valid = labels_valid.to(device) outputs = model(inputs1_valid, inputs2_valid) total_label.extend(labels_valid.detach().numpy()) total_output.extend(outputs.detach().numpy()) fpr, tpr, thresholds = roc_curve(total_label, total_output, pos_label=None, sample_weight=None, drop_intermediate=True) roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, label="siamese, area=%0.2f)" % (roc_auc))
def test(seq2sql_model, roberta_model, model_optimizer, roberta_tokenizer, roberta_config, path_wikisql, test_loader, mode="dev"): roberta_model.eval() seq2sql_model.eval() count_batchlist = [] results = [] count_select_column = 0 # count the # of correct predictions of select column count_select_agg = 0 # of selectd aggregation count_where_number = 0 # of where number count_where_column = 0 # of where column count_where_operator = 0 # of where operator count_where_value = 0 # of where-value count_where_value_index = 0 # of where-value index (on question tokens) count_logical_form_acc = 0 # of logical form accuracy count_execution_acc = 0 # of execution accurac # Engine for SQL querying. engine = DBEngine(os.path.join(path_wikisql, mode + ".db")) count = 0 for batch_index, batch in enumerate(tqdm(test_loader)): count += len(batch) # if batch_index > 2: # break # Get fields natural_lang_utterance, natural_lang_utterance_tokenized, sql_canonical, \ _, _, table_metadata, _, headers = load_data.get_fields(batch) select_column_ground, select_agg_ground, where_number_ground, \ where_column_ground, where_operator_ground, _ = roberta_training.get_ground_truth_values(sql_canonical) # get ground truth where-value index under CoreNLP tokenization scheme. It's done already on trainset. natural_lang_embeddings, header_embeddings, question_token_length, header_token_length, header_count, \ natural_lang_double_tokenized, punkt_to_roberta_token_indices, roberta_to_punkt_token_indices \ = roberta_training.get_wemb_roberta(roberta_config, roberta_model, roberta_tokenizer, natural_lang_utterance_tokenized, headers,max_seq_length= 222, num_out_layers_n=2, num_out_layers_h=2) # natural_lang_embeddings: natural language embedding # header_embeddings: header embedding # question_token_length: token lengths of each question # header_token_length: header token lengths # header_count: the number of columns (headers) of the tables. where_value_index_ground_corenlp = corenlp_local.get_g_wvi_corenlp( batch) try: # where_value_index_ground = corenlp_local.get_g_wvi_bert_from_g_wvi_corenlp( punkt_to_roberta_token_indices, where_value_index_ground_corenlp) except: # Exception happens when where-condition is not found in nlu_tt. # In this case, that train example is not used. # During test, that example considered as wrongly answered. # e.g. train: 32. for b in range(len(natural_lang_utterance)): curr_results = {} curr_results["error"] = "Skip happened" curr_results["nlu"] = natural_lang_utterance[b] curr_results["table_id"] = table_metadata[b]["id"] results.append(curr_results) continue knowledge = [] for k in batch: if "bertindex_knowledge" in k: knowledge.append(k["bertindex_knowledge"]) else: knowledge.append(max(question_token_length) * [0]) knowledge_header = [] for k in batch: if "header_knowledge" in k: knowledge_header.append(k["header_knowledge"]) else: knowledge_header.append(max(header_count) * [0]) # score _, _, _, select_column_predict, select_agg_predict, where_number_predict, sql_predict = seq2sql_model.beam_forward( natural_lang_embeddings, question_token_length, header_embeddings, header_token_length, header_count, table_metadata, natural_lang_utterance_tokenized, natural_lang_double_tokenized, roberta_to_punkt_token_indices, natural_lang_utterance, beam_size=4, knowledge=knowledge, knowledge_header=knowledge_header) # sort and generate where_column_predict, where_operator_predict, _, sql_predict = infer_functions.sort_and_generate_pr_w( sql_predict) # Follosing variables are just for the consistency with no-EG case. where_value_index_predict = None # not used for b, sql_predict_instance in enumerate(sql_predict): curr_results = {} curr_results["query"] = sql_predict_instance curr_results["table_id"] = table_metadata[b]["id"] curr_results["nlu"] = natural_lang_utterance[b] results.append(curr_results) # Cacluate accuracy select_column_batchlist, select_agg_batchlist, where_number_batchlist, \ where_column_batchlist, where_operator_batchlist, \ where_value_index_batchlist, where_value_batchlist = seq2sql_model_training_functions.get_cnt_sw_list( select_column_ground, select_agg_ground, where_number_ground, where_column_ground, where_operator_ground, where_value_index_ground, select_column_predict, select_agg_predict, where_number_predict, where_column_predict, where_operator_predict, where_value_index_predict, sql_canonical, sql_predict, mode='test') logical_form_acc_batchlist = seq2sql_model_training_functions.get_cnt_lx_list( select_column_batchlist, select_agg_batchlist, where_number_batchlist, where_column_batchlist, where_operator_batchlist, where_value_batchlist) # lx stands for logical form accuracy # Execution accuracy test. execution_acc_batchlist, _, _ = seq2sql_model_training_functions.get_cnt_x_list( engine, table_metadata, select_column_ground, select_agg_ground, sql_canonical, select_column_predict, select_agg_predict, sql_predict) # statistics # ave_loss += loss.item() # count count_select_column += sum(select_column_batchlist) count_select_agg += sum(select_agg_batchlist) count_where_number += sum(where_number_batchlist) count_where_column += sum(where_column_batchlist) count_where_operator += sum(where_operator_batchlist) count_where_value_index += sum(where_value_index_batchlist) count_where_value += sum(where_value_batchlist) count_logical_form_acc += sum(logical_form_acc_batchlist) count_execution_acc += sum(execution_acc_batchlist) count_curr_batchlist = [ select_column_batchlist, select_agg_batchlist, where_number_batchlist, where_column_batchlist, where_operator_batchlist, where_value_batchlist, logical_form_acc_batchlist, execution_acc_batchlist ] count_batchlist.append(count_curr_batchlist) # ave_loss /= cnt select_column_acc = count_select_column / count select_agg_acc = count_select_agg / count where_number_acc = count_where_number / count where_column_acc = count_where_column / count where_operator_acc = count_where_operator / count where_value_index_acc = count_where_value_index / count where_value_acc = count_where_value / count logical_form_acc = count_logical_form_acc / count execution_acc = count_execution_acc / count accuracy = [ None, select_column_acc, select_agg_acc, where_number_acc, where_column_acc, where_operator_acc, where_value_index_acc, where_value_acc, logical_form_acc, execution_acc ] return accuracy, results, count_batchlist
def train(args, train_dataset, eval_dataset, model: PreTrainedModel, tokenizer: PreTrainedTokenizer) -> Tuple[int, float]: """ Train the model """ if args.local_rank in [-1, 0]: tb_writer = SummaryWriter(args.output_dir) args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu) def collate(examples: List[torch.Tensor]): if tokenizer._pad_token is None: return pad_sequence(examples, batch_first=True) return pad_sequence(examples, batch_first=True, padding_value=tokenizer.pad_token_id) train_sampler = RandomSampler( train_dataset) if args.local_rank == -1 else DistributedSampler( train_dataset) train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size, collate_fn=collate, drop_last=True) if args.max_steps > 0: t_total = args.max_steps args.num_train_epochs = args.max_steps // ( len(train_dataloader) // args.gradient_accumulation_steps) + 1 else: t_total = len( train_dataloader ) // args.gradient_accumulation_steps * args.num_train_epochs model = model.module if hasattr( model, "module") else model # Take care of distributed/parallel training model.resize_token_embeddings(len(tokenizer)) # add_special_tokens_(model, tokenizer) # Prepare optimizer and schedule (linear warmup and decay) no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], "weight_decay": args.weight_decay, }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], "weight_decay": 0.0 }, ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total) # Check if saved optimizer or scheduler states exist if (args.model_name_or_path and os.path.isfile( os.path.join(args.model_name_or_path, "optimizer.pt")) and os.path.isfile( os.path.join(args.model_name_or_path, "scheduler.pt"))): # Load in optimizer and scheduler states optimizer.load_state_dict( torch.load(os.path.join(args.model_name_or_path, "optimizer.pt"))) scheduler.load_state_dict( torch.load(os.path.join(args.model_name_or_path, "scheduler.pt"))) if args.fp16: try: from apex import amp except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level) # multi-gpu training (should be after apex fp16 initialization) if args.n_gpu > 1: model = torch.nn.DataParallel(model) # Distributed training (should be after apex fp16 initialization) if args.local_rank != -1: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank, ) # Train! logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_dataset)) logger.info(" Num Epochs = %d", args.num_train_epochs) logger.info(" Instantaneous batch size per GPU = %d", args.per_gpu_train_batch_size) logger.info( " Total train batch size (w. parallel, distributed & accumulation) = %d", args.train_batch_size * args.gradient_accumulation_steps * (torch.distributed.get_world_size() if args.local_rank != -1 else 1), ) logger.info(" Gradient Accumulation steps = %d", args.gradient_accumulation_steps) logger.info(" Total optimization steps = %d", t_total) global_step = 0 epochs_trained = 0 steps_trained_in_current_epoch = 0 # Check if continuing training from a checkpoint if args.model_name_or_path and os.path.exists(args.model_name_or_path): try: # set global_step to global_step of last saved checkpoint from model path checkpoint_suffix = args.model_name_or_path.split("-")[-1].split( "/")[0] global_step = int(checkpoint_suffix) epochs_trained = global_step // (len(train_dataloader) // args.gradient_accumulation_steps) steps_trained_in_current_epoch = global_step % ( len(train_dataloader) // args.gradient_accumulation_steps) logger.info( " Continuing training from checkpoint, will skip to saved global_step" ) logger.info(" Continuing training from epoch %d", epochs_trained) logger.info(" Continuing training from global step %d", global_step) logger.info(" Will skip the first %d steps in the first epoch", steps_trained_in_current_epoch) except ValueError: logger.info(" Starting fine-tuning.") tr_loss, logging_loss = 0.0, 0.0 model.zero_grad() train_iterator = trange(epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=None) if args.seed: set_seed(args) # Added here for reproducibility for _ in train_iterator: epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=None) for step, batch in enumerate(epoch_iterator): # Skip past any already trained steps if resuming training if steps_trained_in_current_epoch > 0: steps_trained_in_current_epoch -= 1 continue inputs, labels = (batch, batch) if inputs.shape[1] > 1024: continue inputs = inputs.to(args.device) labels = labels.to(args.device) model.train() outputs = model(inputs, labels=labels) loss = outputs[ 0] # model outputs are always tuple in transformers (see doc) if args.n_gpu > 1: loss = loss.mean( ) # mean() to average on multi-gpu parallel training if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps if args.fp16: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() tr_loss += loss.item() if (step + 1) % args.gradient_accumulation_steps == 0: if args.fp16: torch.nn.utils.clip_grad_norm_( amp.master_params(optimizer), args.max_grad_norm) else: torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step() # Update learning rate schedule model.zero_grad() global_step += 1 if args.local_rank in [ -1, 0 ] and args.logging_steps > 0 and global_step % args.logging_steps == 0: # Log metrics if args.local_rank == -1 and args.evaluate_during_training: results = evaluate(args, model, tokenizer, eval_dataset) for key, value in results.items(): tb_writer.add_scalar("eval_{}".format(key), value, global_step) tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step) tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step) logger.info(" global_step = %s, loss = %s", global_step, loss.item()) logging_loss = tr_loss if args.local_rank in [ -1, 0 ] and args.save_steps > 0 and global_step % args.save_steps == 0: checkpoint_prefix = "checkpoint" # Save model checkpoint output_dir = os.path.join( args.output_dir, "{}-{}".format(checkpoint_prefix, global_step)) os.makedirs(output_dir, exist_ok=True) model_to_save = ( model.module if hasattr(model, "module") else model ) # Take care of distributed/parallel training model_to_save.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir) torch.save(args, os.path.join(output_dir, "training_args.bin")) logger.info("Saving model checkpoint to %s", output_dir) _rotate_checkpoints(args, checkpoint_prefix) torch.save(optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt")) torch.save(scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt")) logger.info("Saving optimizer and scheduler states to %s", output_dir) if 0 < args.max_steps < global_step: epoch_iterator.close() break if 0 < args.max_steps < global_step: train_iterator.close() break if args.local_rank in [-1, 0]: tb_writer.close() return global_step, tr_loss / global_step
def scaled_least_squares(function, *args): return [ least_squares(function, *scaledargs) for scaledargs in tqdm( zip(*args), total=len(args[0]), leave=False, desc='scales') ]
] net = RatingNet(512, 512, 1024, 4028, 1, use_id, embedding_size, attention, num_rounds=[link_nr, place_nr]).cuda() optimizer = torch.optim.AdamW(net.parameters(), lr=1e-4, amsgrad=True) pb = tqdm(range(10)) for epoch in pb: net.train() eloss = 0 for i, (u, m, r, label) in enumerate(tdl): out = net(ug, mg, rg, u.cuda(), m.cuda(), r.cuda()) loss = f.mse_loss(out, label.cuda().float()) optimizer.zero_grad() loss.backward() optimizer.step() eloss += math.sqrt(loss.detach().item()) pb.set_description(f"{epoch}| {eloss/(i+1)} | {i}") vloss = evaluate(net, vdl) eloss /= i
def train_multi_task(n_fold, model_class, class_parms, train_dataset, val_dataset, test_user_id): global TIME_FORWARD, TIME_BACKWARD train_user_id = train_dataset['x'] train_gender = train_dataset['gender'] train_age = train_dataset['age'] logging.info('train number %d, val number %d' % (len(train_user_id), len(val_dataset['x']))) torch_dataset = AdDataset(train_user_id, train_gender, train_age) data_loader = Data.DataLoader( dataset=torch_dataset, batch_size=args.batch_size, shuffle=True, collate_fn=collate_fn, num_workers = args.n_worker, ) model = model_class(**class_parms).to(args.device) no_decay = ["bias", "gamma","beta"] optimizer_grouped_parameters = [ { "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], "weight_decay": args.weight_decay, }, {"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0}, ] optimizer = AdamW(optimizer_grouped_parameters, lr = args.lr, weight_decay = args.weight_decay) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=int(len(train_user_id)//(args.batch_size)), num_training_steps=int(len(train_user_id) / args.batch_size * args.epoch) ) for epoch in range(args.epoch): loss_list, loss_gender_list, loss_age_list = [], [], [] model.train() for step, data in enumerate(tqdm(data_loader)): #forward S = time.time() loss, loss_gender, loss_age, pre_gender, pre_age, _ = model(**data) TIME_FORWARD += time.time() - S loss_list.append(float(loss)) loss_gender_list.append(float(loss_gender)) loss_age_list.append(float(loss_age)) #backward S = time.time() optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm = 5) optimizer.step() scheduler.step() TIME_BACKWARD += time.time() - S model.eval() train_acc_gender, train_acc_age = eval_data(model, train_dataset['x'], train_dataset['gender'], train_dataset['age']) val_acc_gender, val_acc_age = eval_data(model, val_dataset['x'], val_dataset['gender'], val_dataset['age']) logging.info('forward:%f backward:%f'%(TIME_FORWARD,TIME_BACKWARD)) logging.info("flod %d epoch %d : \n loss: %f loss_gender : %f, loss_age : %f, gender : %f, %f, age : %f, %f, score:%f" % (n_fold, epoch, np.mean(loss_list), np.mean(loss_gender_list), np.mean(loss_age_list), train_acc_gender, val_acc_gender, train_acc_age, val_acc_age, val_acc_gender + val_acc_age)) val_ret_dict = predict_batch_multi_task(model, val_dataset['x']) test_ret_dict = predict_batch_multi_task(model, test_user_id) return model, val_ret_dict, test_ret_dict
states = [state_ic.clone() for _ in range(ncells + 1)] # Create the equilibrium solver object for the repeated equilibrium calculation. solver = EquilibriumSolver(system) # Running the reactive transport simulation loop. We start with the completion brine injection. # + step = 0 # the current step number t = 0.0 # the current time (in seconds) # Output the initial state of the reactive transport calculation outputstate_df(step, system, reactions, states) with tqdm(total=nsteps_cb, desc="45 hours of completion brine (CB) injection") as pbar: while step < nsteps_cb: # Perform transport calculations bfluid, bsolid, b = transport(states, bfluid, bsolid, b, b_bc_cb, nelems, ifluid_species, isolid_species) # Perform reactive chemical calculations states = reactive_chemistry(solver, states, b) # Increment time step and number of time steps t += dt step += 1 # Output the current state of the reactive transport calculation outputstate_df(step, system, reactions, states)
def main(num_epoch, max_length, batch_size, model_name): """ main function to train and evaluate BERT model :param num_epoch: number of epochs for training the model :param max_length: max length of the input string for training :param batch_size: batch size for training the model :param model_name: the name of the BERT model :return: None """ # check whether uses gpu or not check_gpu() # print model info print('Start training BERT model.') print('Number of epochs: ', num_epoch) print('Max input length: ', max_length) print('Batch size: ', batch_size) # read in data df = pd.read_csv("s3://msia490project/processed_video_reviews.csv").head( 500000) df['reviewText'] = df['reviewText'].astype(str) df.head() # Encode the classes for BERT. We'll keep using the 3 labels we made earlier. encoder = LabelEncoder() df['score'] = encoder.fit_transform(df['score']) # Set X and y. X = df['reviewText'] y = df['score'] # Split data into training and test sets. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) tokenizer = transformers.BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) # Encoding the words in the training data into vectors. max_length = int(max_length) encoded_data_train = tokenizer.batch_encode_plus( X_train, truncation=True, add_special_tokens=True, return_attention_mask=True, pad_to_max_length=True, max_length=max_length, return_tensors='pt') # Encoding the words in the test data into vectors. encoded_data_test = tokenizer.batch_encode_plus(X_test, truncation=True, add_special_tokens=True, return_attention_mask=True, pad_to_max_length=True, max_length=max_length, return_tensors='pt') # Get inputs and attention masks from previously encoded data. input_ids_train = encoded_data_train['input_ids'] attention_masks_train = encoded_data_train['attention_mask'] labels_train = torch.tensor(y_train.values) input_ids_test = encoded_data_test['input_ids'] attention_masks_test = encoded_data_test['attention_mask'] labels_test = torch.tensor(y_test.values) # Instantiate TensorDataset dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train) dataset_test = TensorDataset(input_ids_test, attention_masks_test, labels_test) # Initialize the model. model = transformers.BertForSequenceClassification.from_pretrained( "bert-base-uncased", num_labels=5, output_attentions=False, output_hidden_states=False) # DataLoaders for running the model dataloader_train = DataLoader(dataset_train, sampler=RandomSampler(dataset_train), batch_size=int(batch_size)) dataloader_test = DataLoader(dataset_test, sampler=SequentialSampler(dataset_test), batch_size=int(batch_size)) # Setting hyper-parameters optimizer = AdamW(model.parameters(), lr=1e-5, eps=1e-8) epochs = int(num_epoch) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=len(dataloader_train) * epochs) seed_val = 15 random.seed(seed_val) np.random.seed(seed_val) torch.manual_seed(seed_val) torch.cuda.manual_seed_all(seed_val) device = torch.device('cuda') # train the model model.to(device) for epoch in tqdm(range(1, epochs + 1)): model.train() loss_train_total = 0 progress_bar = tqdm(dataloader_train, desc='Epoch {:1d}'.format(epoch), leave=False, disable=False) for batch in progress_bar: model.zero_grad() batch = tuple(b.to(device) for b in batch) inputs = { 'input_ids': batch[0].to(device), 'attention_mask': batch[1].to(device), 'labels': batch[2].to(device), } outputs = model(**inputs) loss = outputs[0] loss_train_total += loss.item() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() scheduler.step() progress_bar.set_postfix( {'training_loss': '{:.3f}'.format(loss.item() / len(batch))}) # progress bar tqdm.write(f'\nEpoch {epoch}') loss_train_avg = loss_train_total / len(dataloader_train) tqdm.write(f'Training loss: {loss_train_avg}') # evaluate the model run_evaluation(dataloader_test, model, device, encoder, epoch, model_name) # save the model for future use/retrain torch.save( { 'epoch': num_epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, model_name + '.tar')
def evaluate(model: Union[MultiViewHandPoseCNN, MultiViewHandPoseCNNBranch], data: DataLoader, criterion: Callable[[torch.FloatTensor, torch.FloatTensor], torch.FloatTensor], device: torch.device, projection: Optional[int] = None, verbose: Optional[bool] = False) -> float: loss_data = [] # Activate eval mode model.eval() with torch.no_grad(): if verbose: data = tqdm(data, leave=False) for (batch_proj, batch_heats) in data: loss = 0 if isinstance(model, MultiViewHandPoseCNN): # Extract ground truth heat map: (batch_size, 21, 3, 18, 18) -> 3 x (batch_size, 21, 18, 18) xy_true_heats = batch_heats[:, :, 0] yz_true_heats = batch_heats[:, :, 1] zx_true_heats = batch_heats[:, :, 2] # Move tensors to GPU batch_proj = batch_proj.to(device) xy_true_heats = xy_true_heats.to(device) yz_true_heats = yz_true_heats.to(device) zx_true_heats = zx_true_heats.to(device) # Make prediction xy_pred_heats, yz_pred_heats, zx_pred_heats = model(batch_proj) # Compute loss xy_loss = sum([ criterion(xy_pred_heats[:, i], xy_true_heats[:, i]) for i in range(21) ]) yz_loss = sum([ criterion(yz_pred_heats[:, i], yz_true_heats[:, i]) for i in range(21) ]) zx_loss = sum([ criterion(zx_pred_heats[:, i], zx_true_heats[:, i]) for i in range(21) ]) loss = xy_loss + yz_loss + zx_loss elif isinstance( model, MultiViewHandPoseCNNBranch) and projection is not None: # Extract ground truth heat map: (batch_size, 21, 3, 18, 18) -> (batch_size, 21, 18, 18) true_heats = batch_heats[:, :, projection] # Move tensors to GPU batch_proj = batch_proj[:, projection].unsqueeze(1) batch_proj = batch_proj.to(device) true_heats = true_heats.to(device) # Make prediction pred_heats = model(batch_proj) # Compute loss loss = sum([ criterion(pred_heats[:, i], true_heats[:, i]) for i in range(21) ]) # Update history loss_data.append(loss.item()) return mean(loss_data)
def calculate_dom_distance_matrix(): dom_pos_flat = get_dom_positions() dists = np.empty((ndoms, ndoms)) for i, j in tqdm(np.ndindex(dists.shape), total=ndoms**2): dists[i, j] = (np.linalg.norm(dom_pos_flat[i] - dom_pos_flat[j])) return dists
# Create the equilibrium solver object for the repeated equilibrium calculation. solver = KineticSolver(reactions) solver.setPartition(partition) # Running the reactive transport simulation loop. We start with the completion brine injection. # + step = 0 # the current step number t = 0.0 # the current time (in seconds) # Output the initial state of the reactive transport calculation outputstate_df(step, system, states) with tqdm(total=nsteps, desc="Brine injection") as pbar: while step < nsteps: # Perform transport calculations bfluid, bsolid, b = transport(states, bfluid, bsolid, b, b_bc, nelems, ifluid_species, isolid_species) # Perform reactive chemical calculations states = reactive_chemistry(solver, states, t, dt, b) # Increment time step and number of time steps t += dt step += 1 # Output the current state of the reactive transport calculation outputstate_df(step, system, states)
from keras.layers import Input, Dense, LSTM, Embedding, Dropout import matplotlib as plt # small library for seeing the progress of loops. from tqdm.notebook import tqdm import tensorflow as tf with tf.device("/gpu:0"): config = tf.compat.v1.ConfigProto( gpu_options=tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=1) # device_count = {'GPU': 1} ) config.gpu_options.allow_growth = True session = tf.compat.v1.Session(config=config) tf.compat.v1.keras.backend.set_session(session) tqdm().pandas() """ import tensorflow as tf device_name = tf.test.gpu_device_name() if device_name != '/device:GPU:0': raise SystemError('GPU device not found') print('Found GPU at: {}'.format(device_name)) """ def load_doc(filename): """ Function for loading various files :param filename: Source Path """ # Opening the file as read only with open(filename) as file: text = file.read()
else: with open('./embeds/pickle_embed.pickle', 'rb') as handle: embeddings = pickle.load(handle) with open('./embeds/vocab_clean.pickle', 'rb') as handle: vocab = pickle.load(handle) # generate our embeddings if train_embedding_doc == True: from os import listdir test = [] vocab_embedding_docs = {} i = 0 for file in tqdm(listdir('./embeds/Vocab_occurences/pickles')): file = file.split('.')[0] try: if (len(extract_file_embedding(file))) == 300: vocab_embedding_docs[file] = extract_file_embedding(file) except: vocab_embedding_docs[file] = np.array([0 for t in range(300)]) test.append(file) pass else: with open('./embeds/doc_vocab_embed.pickle', 'rb') as handle: vocab_embedding_docs = pickle.load(handle) # Let's generate our Features
def TRAIN(net, train_loader, valid_loader, num_epochs, criterion, optimizer, val_loss, device, save_name): if val_loss == None: best_val_loss = float("Inf") else: best_val_loss = val_loss print('Resume training') training_step = 0 training_loss = [] validation_loss = [] for epoch in range(num_epochs): # loop over the dataset multiple times net.train() running_loss = 0.0 running_corrects = 0 for inputs1, inputs2, labels in tqdm(train_loader): inputs1 = inputs1.to(device) inputs2 = inputs2.to(device) labels = labels.to(device) '''Training of the model''' # Forward pass outputs = net(inputs1, inputs2) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() training_step += 1 if training_step % 10 == 0: training_loss.append(loss.item()) running_loss += loss.item() _, preds = torch.max(outputs.data, 1) running_corrects += torch.sum(preds == labels.data) with torch.no_grad(): net.eval() running_val_loss = 0.0 running_val_corrects = 0 for inputs1_valid, inputs2_valid, labels_valid in ( valid_loader): inputs1_valid = inputs1_valid.to(device) inputs2_valid = inputs2_valid.to(device) labels_valid = labels_valid.to(device) outputs = net(inputs1_valid, inputs2_valid) loss = criterion(outputs, labels_valid) running_val_loss += loss.item() _, preds = torch.max(outputs.data, 1) running_val_corrects += torch.sum( preds == labels_valid.data) if training_step % 10 == 0: validation_loss.append(running_val_loss / len(valid_loader)) train_loss = running_loss / len(train_loader) train_acc = running_corrects / float(len(train_loader.dataset)) with torch.no_grad(): net.eval() running_loss = 0.0 running_corrects = 0 for inputs1_valid, inputs2_valid, labels_valid in tqdm( valid_loader): inputs1_valid = inputs1_valid.to(device) inputs2_valid = inputs2_valid.to(device) labels_valid = labels_valid.to(device) outputs = net(inputs1_valid, inputs2_valid) loss = criterion(outputs, labels_valid) running_loss += loss.item() _, preds = torch.max(outputs.data, 1) running_corrects += torch.sum(preds == labels_valid.data) valid_loss = running_loss / len(valid_loader) valid_acc = running_corrects / float(len(valid_loader.dataset)) print( 'Epoch [{}/{}], Train Loss: {:.4f}, Train Acc: {:.4f}, Valid Loss: {:.4f}, Valid Acc: {:.4f}' .format(epoch + 1, num_epochs, train_loss, train_acc, valid_loss, valid_acc)) if valid_loss < best_val_loss: best_val_loss = valid_loss save_checkpoint(save_name, net, optimizer, best_val_loss) print('Finished Training') return training_loss, validation_loss
def train_model(status, epoch, model, device, train_loader, criterion_mask, criterion_depth, optimizer, depthweight=0.5, printtestimg=False, printinterval=2000, scheduler=False): model.train() pbar = notebook.tqdm(train_loader) for batch_idx, (bg, image, mask, depthmap) in enumerate(pbar): bg, image, mask, depthmap = bg.to(device), image.to(device), mask.to( device), depthmap.to(device) # Init optimizer.zero_grad() # Predict predmask, preddepth = model(bg, image) loss_mask = criterion_mask(predmask, mask) #loss_depth = criterion_depth(preddepth, depthmap) loss_depth = customloss.depth_loss(preddepth, depthmap, criterion_depth) loss = ((1 - depthweight) * loss_mask) + (depthweight * loss_depth) train_losses.append(loss) # Backpropagation loss.backward() optimizer.step() if (scheduler): scheduler.step(loss) pbar.set_description(desc=f'Loss={loss.item()} Batch_id={batch_idx}') status.value = f'epoch={epoch}, Batch_id={batch_idx}, Loss={loss}, Mask={loss_mask}, Depth={loss_depth}' if batch_idx % 500 == 0: torch.cuda.empty_cache() if printtestimg: if batch_idx % printinterval == 0: print('*********************** TRAINING *******************') print('======================= IMAGE ======================') print('image:', image.shape) visualize.show_img( torchvision.utils.make_grid(image.detach().cpu()[1:5]), 8) print('======================= MASK =======================') print('actual:', mask.shape) visualize.show_img( torchvision.utils.make_grid(mask.detach().cpu()[1:5]), 8) print('predicted:', predmask.shape) visualize.show_img( torchvision.utils.make_grid(predmask.detach().cpu()[1:5]), 8) print('======================= DEPTHMAP ===================') print('actual:', depthmap.shape) visualize.show_img( torchvision.utils.make_grid(depthmap.detach().cpu()[1:5]), 8) print('predicted:', preddepth.shape) visualize.show_img( torchvision.utils.make_grid(preddepth.detach().cpu()[1:5]), 8) return train_losses
def __init__(self, filename): """Load playground log file and extract BMI/Jovian/Sync data. Args: filename (string): path to the log file ('./process.log') Important Variables: logger.df (pandas.DataFrame): log dataframe logger.sync_df (pandas.DataFrame): the jovian output that exactly labelled by the sync time logger.jov_df (pandas.DataFrame): the animal jovian data content (time, [pos_x, pos_y], [head_direction], ball_vel) logger.cue_df (pandas.DataFrame): the cue jovian data content Other variables: logger.n_sessions (int): number of sessions (usually just 1, there can be bugs with multiple sessions) logger.log_sessions (list): list of log sessions, each is a dataframe logger.sync_time (int): sync time calculated by a microcontroller that synced with jovian """ if isnotebook(): from tqdm.notebook import tqdm else: from tqdm import tqdm time, process, level, func, msg, SY = ([] for i in range(6)) with open(filename) as f: for linenumber, line in enumerate(f): pass with open(filename) as f: for i in tqdm(range(linenumber)): line = f.readline() if line != 'SY\n': try: asctime, processName, levelname, funcName, message = line.split( ' - ') except: pass time.append(asctime.strip()) process.append(processName.strip()) level.append(levelname.strip()) func.append(funcName.strip()) msg.append(message.strip()) if line == 'SY\n': SY.append(msg[-1]) sync_time_index = i time.append(time[-1]) process.append('SYNC') level.append('INFO') func.append('sync') msg.append('last message is synced') print(f'Creating major data frame log.df and severl sub-dataframes', end='...') self.df = pd.DataFrame({ 'time': time, 'process': process, 'level': level, 'func': func, 'msg': msg }) self.cue_df = self.select(func='_jovian', msg='cue_pos') self.cue_idx = self.cue_df.index # index of cue position data in the log dataframe (report by _jovian process in playground) self.jov_df = self.select(func='_jovian').drop(self.cue_df.index) self.jov_idx = self.jov_df.index # index of jovian animal position data in the log dataframe (report by _jovian process in playground) self.reward_df = self.select(func='touched', msg='reward') self.touch_df = self.select(func='', msg='touch:') print('Done') if len(SY) == 0: print('Critical warning: no SYNC signal found') self.sync_time = None else: print('Find SYNC, syncing the data', end='...') self.sync_time = int(SY[0].split(',')[0]) self.sync_idx = self.select( func='sync' ).index - 1 # index of jov timestamps that is exactly same as SY self.sync_df = self.df.loc[self.sync_idx] self.jov_idx = self.jov_idx[ self.jov_idx >= self.sync_idx[0]] # only use jovian data after the sync time self.jov_df = self.jov_df.loc[self.jov_idx] self.cue_idx = self.cue_idx[self.cue_idx > self.jov_idx[ 0]] # only use cue data after the first jovian data self.cue_df = self.cue_df.loc[self.cue_idx] self.reward_df = self.reward_df[ self.reward_df.index > self.jov_idx[0]] self.touch_df = self.touch_df[ self.touch_df.index > self.jov_idx[0]] print('Done') print(f'Finalizing all sub-dataframes', end='...') self.jov_pos_df = self.jov_df.msg.str.extractall( float_pattern).unstack().astype('float') self.jov_pos_df.columns = [ 'jov_time', 'jov_x', 'jov_y', 'jov_z', 'jov_hd', 'jov_ball_vel' ] self.cue_pos_df = self.cue_df.msg.str.extractall( float_pattern).unstack().astype('float') self.cue_pos_df.columns = [ 'cue1_x', 'cue1_y', 'cue1_z', 'cue2_x', 'cue2_y', 'cue2_z' ] self.dfs = { 'jov_df': self.jov_df, 'jov_pos_df': self.jov_pos_df, 'cue_df': self.cue_df, 'reward_df': self.reward_df, 'touch_df': self.touch_df } print('Done') print( 'Please check log.df, log.jov_pos_df, log.cue_df, log.reward_df, log.touch_df' ) self.log_sessions = self.get_log_sessions() self.n_sessions = len(self.log_sessions) self.trial_index = None
def train_model(model, criterion, optimizer, scheduler, num_epochs, dataloaders, dataset_sizes, device, fold): start = time.time() best_model_wts = copy.deepcopy( model.state_dict() ) #最好的模型参数 state_dict变量存放训练过程中需要学习的权重和偏执系数,state_dict作为python的字典对象将每一层的参数映射成tensor张量 best_loss = np.inf #最好的损失情况 history = defaultdict(list) scaler = amp.GradScaler( ) #自动混合精度 该方法在训练网络时将单精度(FP32)与半精度(FP16)结合在一起,并使用相同的超参数实现了与FP32几乎相同的精度 for step, epoch in enumerate(range(1, num_epochs + 1)): print('Epoch {}/{}'.format(epoch, num_epochs)) print('-' * 10) # Each epoch has a training and validation phase 每个时期都有训练和验证阶段 for phase in ['train', 'valid']: if (phase == 'train'): model.train( ) # Set model to training mode 训练模式 dropout层会按照设置好的失活概率进行失活,batchnorm会继续计算数据的均值和方差等参数并在每个batch size之间不断更新 else: model.eval( ) # Set model to evaluation mode eval主要是用来影响网络中的dropout层和batchnorm层的行为 running_loss = 0.0 # Iterate over data 遍历数据 for inputs, labels in tqdm(dataloaders[phase], disable=True): #加了个disable=True tqdm进度条 print(len(labels), "labels len is") if len(labels) <= 1: continue inputs = inputs.to(CFG.device) labels = labels.to(CFG.device) # forward 前向传播 # track history if only in train 仅在训练中跟踪历史 with torch.set_grad_enabled( phase == 'train'): #set_grad_enabled 会影响网络的自动求导机制 with amp.autocast(enabled=True): outputs = model(inputs) loss = criterion(outputs, labels) loss = loss / CFG.n_accumulate # backward only if in training phase 仅在训练阶段时才反向传播 if phase == 'train': scaler.scale(loss).backward() # optimize only if in training phase 仅在训练阶段才进行优化 if phase == 'train' and (step + 1) % CFG.n_accumulate == 0: scaler.step(optimizer) scaler.update() scheduler.step() # zero the parameter gradients 参数梯度设为0 optimizer.zero_grad() running_loss += loss.item() * inputs.size(0) epoch_loss = running_loss / dataset_sizes[phase] history[phase + ' loss'].append(epoch_loss) print('{} Loss: {:.4f}'.format(phase, epoch_loss)) # deep copy the model if phase == 'valid' and epoch_loss <= best_loss: best_loss = epoch_loss best_model_wts = copy.deepcopy(model.state_dict()) PATH = f"Fold{fold}_{best_loss}_epoch_{epoch}.bin" torch.save(model.state_dict(), PATH) print() end = time.time() time_elapsed = end - start print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format( time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60)) print("Best Loss ", best_loss) # load best model weights 加载最佳模型权重 model.load_state_dict(best_model_wts) return model, history
def _preprocess_docs(self, X): preproc_pipe = [] for doc in tqdm(self.nlp.pipe(X)): preproc_pipe.append(self._get_doc_tokens(doc)) return preproc_pipe
def my_metric(y_true, y_pred): return np.mean( np.sum(np.abs(y_true - y_pred), axis=0) / np.sum(y_true, axis=0)) NFOLDS = 7 from sklearn.model_selection import KFold kf = KFold(n_splits=NFOLDS, shuffle=True, random_state=0) targets = [ 'age', 'domain2_var1', 'domain2_var2', 'domain1_var1', 'domain1_var2' ] features = list(set(train_df.columns) - set(targets) - set(['Id'])) overal_score = 0.0 for target, w in tqdm([('age', 0.3), ('domain1_var1', 0.175), ('domain1_var2', 0.175), ('domain2_var1', 0.175), ('domain2_var2', 0.175)]): y_oof = np.zeros(train_df.shape[0]) y_test = np.zeros((test_df.shape[0], NFOLDS)) print('*' * 20, target, '*' * 20) for i, (train_index, valid_index) in enumerate(kf.split(train_df, train_df)): print('>' * 20, 'Fold-', i + 1) train, val = train_df.iloc[train_index], train_df.iloc[valid_index] X_train = train[features] y_train = train[target] X_val = val[features] y_val = val[target] train_data = lgb.Dataset(X_train, label=y_train) val_data = lgb.Dataset(X_val, label=y_val) #create model
interpolation_mode=Config.interpolation_mode, interpolation_scale=Config.interpolation_scale ) train_loader = DataLoader( train_dataset, batch_size = Config.batch_size, shuffle =True ) model = SRCNN().to(DEVICE) optimizer =torch.optim.Adam(model.parameters(), lr = Config.lr) epochs = Config.epochs model.train() for epoch in range(epochs): print("{}/{} EPOCHS".format(epoch+1, epochs)) for x,y in tqdm(train_loader): x = x.to(DEVICE) y = y.to(DEVICE) pred = model(x) loss = torch.nn.functional.mse_loss(pred, y) optimizer.zero_grad() loss.backward() optimizer.step() print(loss.item())
def least_squares(function, *args): return np.concatenate([ function(*subargs, primary) for primary, *subargs in tqdm( iterate(*args), total=len(args[0]), leave=False, desc='training') ], axis=0)
def range_test_over_epochs( self, train_loader, val_loader=None, start_lr=None, end_lr=10, num_epochs=100, step_mode="linear", smooth_f=0.05, diverge_th=5, accumulation_steps=1, ): """Performs the learning rate range test. Arguments: train_loader (torch.utils.data.DataLoader): the training set data laoder. val_loader (torch.utils.data.DataLoader, optional): if `None` the range test will only use the training loss. When given a data loader, the model is evaluated after each iteration on that dataset and the evaluation loss is used. Note that in this mode the test takes significantly longer but generally produces more precise results. Default: None. start_lr (float, optional): the starting learning rate for the range test. Default: None (uses the learning rate from the optimizer). end_lr (float, optional): the maximum learning rate to test. Default: 10. num_iter (int, optional): the number of iterations over which the test occurs. Default: 100. step_mode (str, optional): one of the available learning rate policies, linear or exponential ("linear", "exp"). Default: "exp". smooth_f (float, optional): the loss smoothing factor within the [0, 1[ interval. Disabled if set to 0, otherwise the loss is smoothed using exponential smoothing. Default: 0.05. diverge_th (int, optional): the test is stopped when the loss surpasses the threshold: diverge_th * best_loss. Default: 5. accumulation_steps (int, optional): steps for gradient accumulation. If it is 1, gradients are not accumulated. Default: 1. Example (fastai approach): >>> lr_finder = LRFinder(net, optimizer, criterion, device="cuda") >>> lr_finder.range_test(dataloader, end_lr=100, num_iter=100) Example (Leslie Smith's approach): >>> lr_finder = LRFinder(net, optimizer, criterion, device="cuda") >>> lr_finder.range_test(trainloader, val_loader=val_loader, end_lr=1, num_iter=100, step_mode="linear") Gradient accumulation is supported; example: >>> train_data = ... # prepared dataset >>> desired_bs, real_bs = 32, 4 # batch size >>> accumulation_steps = desired_bs // real_bs # required steps for accumulation >>> dataloader = torch.utils.data.DataLoader(train_data, batch_size=real_bs, shuffle=True) >>> acc_lr_finder = LRFinder(net, optimizer, criterion, device="cuda") >>> acc_lr_finder.range_test(dataloader, end_lr=10, num_iter=100, accumulation_steps=accumulation_steps) Reference: [Training Neural Nets on Larger Batches: Practical Tips for 1-GPU, Multi-GPU & Distributed setups]( https://medium.com/huggingface/ec88c3e51255) [thomwolf/gradient_accumulation](https://gist.github.com/thomwolf/ac7a7da6b1888c2eeac8ac8b9b05d3d3) """ # Reset test results self.history = {"lr": [], "loss": []} self.best_acc = None # Move the model to the proper device self.model.to(self.device) # Check if the optimizer is already attached to a scheduler self._check_for_scheduler() # Set the starting learning rate if start_lr: self._set_learning_rate(start_lr) # Initialize the proper learning rate policy if step_mode.lower() == "exp": lr_schedule = ExponentialLR(self.optimizer, end_lr, num_epochs) elif step_mode.lower() == "linear": lr_schedule = LinearLR(self.optimizer, end_lr, num_epochs) else: raise ValueError( "expected one of (exp, linear), got {}".format(step_mode)) if smooth_f < 0 or smooth_f >= 1: raise ValueError("smooth_f is outside the range [0, 1[") num_iter = len([b for b, _ in enumerate(train_loader)]) for epoch in tqdm(range(num_epochs)): # Create an iterator to get data batch by batch iter_wrapper = DataLoaderIterWrapper(train_loader) train_acc = [] for iteration in range(num_iter): # Train on batch and retrieve loss _, acc = self._train_batch(iter_wrapper, accumulation_steps) train_acc.append(acc) # Track the best loss and smooth it if smooth_f is specified accuracy = train_acc[len(train_acc) - 1] if epoch == 0: self.best_acc = accuracy else: if smooth_f > 0: accuracy = smooth_f * accuracy + ( 1 - smooth_f) * self.history["loss"][-1] if accuracy > self.best_acc: self.best_acc = accuracy # Check if the loss has diverged; if it has, stop the test self.history["loss"].append(accuracy) # Update the learning rate lr_schedule.step() self.history["lr"].append(lr_schedule.get_lr()[0]) print( "Learning rate search finished. See the graph with {finder_name}.plot()" )
def train(self, train_images, train_labels, train_function=None, predict_function=None, prep=np.log, conc_function=updating_mean, *global_args): if train_function is None: train_function = self.linear_function if len(global_args) == 0: global_args = self.global_args least_squares_function = lambda x, y, params: np.array( curve_fit(partial(self.__train_function, train_function), x.reshape(-1, len(params)), y.flatten(), params)[0] ).reshape(1, -1) for image, labels in tqdm(zip(train_images, train_labels), total=len(train_images), leave=False): if prep is not None: prept_labels = prep(labels) features = self.local_function(image) global_features = calc_scales(features, self.neighbours, *global_args) global_labels = calc_scales(prept_labels, False, *global_args) # Tracer()() weights = [ np.array( scaled_least_squares(least_squares_function, weights, global_features, global_labels)) for weights in self.initial_weights ] # Tracer()() if self.training_count == 0: self.weights = weights else: # Tracer()() self.weights = [ conc_function(existing, new, self.training_count) for existing, new in zip(self.weights, weights) ] if self.initial_combined_weights is not None: # Tracer()() predicted_patches = [ scaled_least_squares(train_function, w, global_features) for w in weights ] predicted_images = np.stack([ unravel_patches(p, image.shape[0:2]) for pred in predicted_patches for p in pred ], axis=-1) # predicted_images[predicted_images>1]=1 # Tracer()() combined_weights = least_squares(least_squares_function, self.initial_combined_weights, predicted_images, labels) if self.training_count == 0: self.combined_weights = combined_weights else: conc_function(self.combined_weights, combined_weights, self.training_count) # Tracer()() self.training_count += 1
def _getCoefMatrix(gem, TFdict, alpha=1): """ Calculate GRN and return CoefMatrix (network weights) Args: gem (pandas.DataFrame): gene expression matrix to calculate GRN TFdict (dictionary): python dictionary of potential regulatory gene list alpha (float) : strength of regularization in Ridge. Returns: 2d numpy array: numpy array """ genes = gem.columns all_genes_in_dict = intersect(gem.columns, list(TFdict.keys())) zero_ = pd.Series(np.zeros(len(genes)), index=genes) def get_coef(target_gene): tmp = zero_.copy() # define regGenes reggenes = TFdict[target_gene] reggenes = intersect(reggenes, genes) if target_gene in reggenes: reggenes.remove(target_gene) if len(reggenes) == 0 : tmp[target_gene] = 1 return(tmp) # prepare learning data Data = gem[reggenes] Label = gem[target_gene] # model fitting model = Ridge(alpha=alpha, random_state=123) model.fit(Data, Label) tmp[reggenes] = model.coef_ return tmp li = [] li_calculated = [] for i in tqdm(genes): if not i in all_genes_in_dict: tmp = zero_.copy() tmp[i] = 1 else: tmp = get_coef(i) li_calculated.append(i) li.append(tmp) coef_matrix = pd.concat(li, axis=1) coef_matrix.columns = genes print(f"genes_in_gem: {gem.shape[1]}") print(f"models made for {len(li_calculated)} genes") return coef_matrix #, li_calculated
def train(seq2sql_model, roberta_model, model_optimizer, roberta_optimizer, roberta_tokenizer, roberta_config, path_wikisql, train_loader): roberta_model.train() seq2sql_model.train() results = [] average_loss = 0 count_select_column = 0 # count the # of correct predictions of select column count_select_agg = 0 # of selectd aggregation count_where_number = 0 # of where number count_where_column = 0 # of where column count_where_operator = 0 # of where operator count_where_value = 0 # of where-value count_where_value_index = 0 # of where-value index (on question tokens) count_logical_form_acc = 0 # of logical form accuracy count_execution_acc = 0 # of execution accuracy # Engine for SQL querying. engine = DBEngine(os.path.join(path_wikisql, f"train.db")) count = 0 # count the # of examples for batch_index, batch in enumerate(tqdm(train_loader)): count += len(batch) # if batch_index > 2: # break # Get fields # nlu : natural language utterance # nlu_t: tokenized nlu # sql_i: canonical form of SQL query # sql_q: full SQL query text. Not used. # sql_t: tokenized SQL query # tb : table metadata. No row data needed # hs_t : tokenized headers. Not used. natural_lang_utterance, natural_lang_utterance_tokenized, sql_canonical, \ _, _, table_metadata, _, headers = load_data.get_fields(batch) select_column_ground, select_agg_ground, where_number_ground, \ where_column_ground, where_operator_ground, _ = roberta_training.get_ground_truth_values(sql_canonical) # get ground truth where-value index under CoreNLP tokenization scheme. It's done already on trainset. natural_lang_embeddings, header_embeddings, question_token_length, header_token_length, header_count, \ natural_lang_double_tokenized, punkt_to_roberta_token_indices, roberta_to_punkt_token_indices \ = roberta_training.get_wemb_roberta(roberta_config, roberta_model, roberta_tokenizer, natural_lang_utterance_tokenized, headers,max_seq_length= 222, num_out_layers_n=2, num_out_layers_h=2) # natural_lang_embeddings: natural language embedding # header_embeddings: header embedding # question_token_length: token lengths of each question # header_token_length: header token lengths # header_count: the number of columns (headers) of the tables. where_value_index_ground_corenlp = corenlp_local.get_g_wvi_corenlp( batch) try: # where_value_index_ground = corenlp_local.get_g_wvi_bert_from_g_wvi_corenlp( punkt_to_roberta_token_indices, where_value_index_ground_corenlp) except: # Exception happens when where-condition is not found in natural_lang_double_tokenized. # In this case, that train example is not used. # During test, that example considered as wrongly answered. # e.g. train: 32. continue knowledge = [] for k in batch: if "bertindex_knowledge" in k: knowledge.append(k["bertindex_knowledge"]) else: knowledge.append(max(question_token_length) * [0]) knowledge_header = [] for k in batch: if "header_knowledge" in k: knowledge_header.append(k["header_knowledge"]) else: knowledge_header.append(max(header_count) * [0]) # score select_column_score, select_agg_score, where_number_score, where_column_score,\ where_operator_score, where_value_score = seq2sql_model(natural_lang_embeddings, question_token_length, header_embeddings, header_token_length, header_count, g_sc=select_column_ground, g_sa=select_agg_ground, g_wn=where_number_ground, g_wc=where_column_ground, g_wo=where_operator_ground, g_wvi=where_value_index_ground, knowledge = knowledge, knowledge_header = knowledge_header) # Calculate loss & step loss = seq2sql_model_training_functions.Loss_sw_se( select_column_score, select_agg_score, where_number_score, where_column_score, where_operator_score, where_value_score, select_column_ground, select_agg_ground, where_number_ground, where_column_ground, where_operator_ground, where_value_index_ground) model_optimizer.zero_grad() if roberta_optimizer: roberta_optimizer.zero_grad() loss.backward() model_optimizer.step() if roberta_optimizer: roberta_optimizer.step() # Prediction select_column_predict, select_agg_predict, where_number_predict, \ where_column_predict, where_operator_predict, where_val_index_predict = seq2sql_model_training_functions.pred_sw_se( select_column_score, select_agg_score, where_number_score, where_column_score, where_operator_score, where_value_score) where_value_string_predict, _ = seq2sql_model_training_functions.convert_pr_wvi_to_string( where_val_index_predict, natural_lang_utterance_tokenized, natural_lang_double_tokenized, roberta_to_punkt_token_indices, natural_lang_utterance) # Sort where_column_predict: # Sort where_column_predict when training the model as where_operator_predict and where_val_index_predict are predicted using ground-truth where-column (g_wc) # In case of 'dev' or 'test', it is not necessary as the ground-truth is not used during inference. where_column_predict_sorted = seq2sql_model_training_functions.sort_pr_wc( where_column_predict, where_column_ground) sql_canonical_predict = seq2sql_model_training_functions.generate_sql_i( select_column_predict, select_agg_predict, where_number_predict, where_column_predict_sorted, where_operator_predict, where_value_string_predict, natural_lang_utterance) # Cacluate accuracy select_col_batchlist, select_agg_batchlist, where_number_batchlist, \ where_column_batchlist, where_operator_batchlist, where_value_index_batchlist, \ where_value_batchlist = seq2sql_model_training_functions.get_cnt_sw_list( select_column_ground, select_agg_ground, where_number_ground, where_column_ground, where_operator_ground, where_value_index_ground, select_column_predict, select_agg_predict, where_number_predict, where_column_predict, where_operator_predict, where_val_index_predict, sql_canonical, sql_canonical_predict, mode='train') logical_form_acc_batchlist = seq2sql_model_training_functions.get_cnt_lx_list( select_col_batchlist, select_agg_batchlist, where_number_batchlist, where_column_batchlist, where_operator_batchlist, where_value_batchlist) # lx stands for logical form accuracy # Execution accuracy test. execution_acc_batchlist, _, _ = seq2sql_model_training_functions.get_cnt_x_list( engine, table_metadata, select_column_ground, select_agg_ground, sql_canonical, select_column_predict, select_agg_predict, sql_canonical_predict) # statistics average_loss += loss.item() # count count_select_column += sum(select_col_batchlist) count_select_agg += sum(select_agg_batchlist) count_where_number += sum(where_number_batchlist) count_where_column += sum(where_column_batchlist) count_where_operator += sum(where_operator_batchlist) count_where_value_index += sum(where_value_index_batchlist) count_where_value += sum(where_value_batchlist) count_logical_form_acc += sum(logical_form_acc_batchlist) count_execution_acc += sum(execution_acc_batchlist) average_loss /= count select_column_acc = count_select_column / count select_agg_acc = count_select_agg / count where_number_acc = count_where_number / count where_column_acc = count_where_column / count where_operator_acc = count_where_operator / count where_value_index_acc = count_where_value_index / count where_value_acc = count_where_value / count logical_form_acc = count_logical_form_acc / count execution_acc = count_execution_acc / count accuracy = [ average_loss, select_column_acc, select_agg_acc, where_number_acc, where_column_acc, where_operator_acc, where_value_index_acc, where_value_acc, logical_form_acc, execution_acc ] return accuracy
print(model) print(len(list(model.parameters()))) # In[9]: optimizer = torch.optim.Adam(model.parameters(), lr=0.01) # In[10]: entropy = nn.CrossEntropyLoss() # In[11]: epoch = 3 for i in range(epoch): for images, labels in tqdm(train_data): optimizer.zero_grad() y = model(images) loss = entropy(y, labels) loss.backward() optimizer.step() # In[12]: correct, total = 0, len(test_data) for images, labels in tqdm(test_data): y = model(images) prediction = torch.argmax(y, dim=1) correct += torch.sum((prediction == labels).float()) print("Accuracy : ", correct / total)
def __iter__(self): for line in tqdm(self.lines): yield line.strip().split(' ')
def lemmatize(sentence_list, nlp): new_norm = [] print("Lemmatizing Sentences") for sentence in tqdm(sentence_list): new_norm.append(lemmatize_text(sentence, nlp).strip()) return new_norm
def evaluate(args, model: PreTrainedModel, tokenizer: PreTrainedTokenizer, eval_dataset, prefix="") -> Dict: # Loop to handle MNLI double evaluation (matched, mis-matched) eval_output_dir = args.output_dir os.makedirs(eval_output_dir, exist_ok=True) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) # Note that DistributedSampler samples randomly def collate(examples: List[torch.Tensor]): if tokenizer._pad_token is None: return pad_sequence(examples, batch_first=True) return pad_sequence(examples, batch_first=True, padding_value=tokenizer.pad_token_id) eval_sampler = SequentialSampler(eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size, collate_fn=collate, drop_last=True) # multi-gpu evaluate if args.n_gpu > 1: model = torch.nn.DataParallel(model) # Eval! logger.info("***** Running evaluation {} *****".format(prefix)) logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 model.eval() for batch in tqdm(eval_dataloader, desc="Evaluating"): inputs, labels = (batch, batch) inputs = inputs.to(args.device) labels = labels.to(args.device) with torch.no_grad(): outputs = model(inputs, labels=labels) lm_loss = outputs[0] eval_loss += lm_loss.mean().item() nb_eval_steps += 1 eval_loss = eval_loss / nb_eval_steps perplexity = torch.exp(torch.tensor(eval_loss)) result = {"perplexity": perplexity} output_eval_file = os.path.join(eval_output_dir, prefix, "eval_results.txt") with open(output_eval_file, "w") as writer: logger.info("***** Eval results {} *****".format(prefix)) for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) return result
def train(trainData, validData, device, train_config): seed_val = 42 random.seed(seed_val) np.random.seed(seed_val) torch.manual_seed(seed_val) torch.cuda.manual_seed_all(seed_val) tokenizer= trainData.tokenizer vocab_size= tokenizer.vocab_size model = EncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-uncased', 'bert-base-uncased') batch_size= train_config.train_batch_size trainDataloader= DataLoader(trainData, batch_size= train_config.train_batch_size, num_workers= train_config.num_workers) param_optimizer = list(model.named_parameters()) #get parameter of models no_decay = [ "bias", "LayerNorm.bias", "LayerNorm.weight" ] ##doubt layers to be not decayed #issue optimizer_parameters = [ { 'params': [ p for n, p in param_optimizer if not any( nd in n for nd in no_decay ) ], 'weight_decay': 0.001 }, { 'params': [ p for n, p in param_optimizer if any( nd in n for nd in no_decay ) ], 'weight_decay': 0.0 }, ] optimizer =AdamW( optimizer_parameters, lr= train_config.learningRate ) total_len= trainData.__len__() num_steps= total_len/train_config.train_batch_size*train_config.epochs scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_steps ) model.to(device) for epoch_i in range(0, train_config.epochs): print("") print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, train_config.epochs)) print('Training...') t0 = time.time() total_loss = AverageMeter() total_acc = AverageMeter() model.train() tdl = tqdm(trainDataloader, total=len(trainDataloader)) for idx,batch in enumerate(tdl): ids= batch['ids'].to(device, dtype= torch.long) mask_ids= batch['mask_ids'].to(device, dtype= torch.long) seg_ids= batch['segment_ids'].to(device, dtype= torch.long) ques= batch['ques'].to(device, dtype= torch.long) model.zero_grad() loss, logits= model( input_ids= ids, attention_mask= mask_ids, decoder_input_ids= ids, # decoder_inputs_embeds= model.get_input_embeddings().weight, token_type_ids= seg_ids, masked_lm_labels = ques )[:2] loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() scheduler.step() logits= logits.view(-1, vocab_size) # orig_ques= ques.view(-1) logits = logits.detach().cpu().numpy() orig_ques = ques.detach().cpu().numpy() pred_ques = np.argmax(logits, axis=1).flatten().squeeze() pred_ques = np.reshape(pred_ques,(batch_size,-1)) # print("shape of orig and pred batch: ",orig_ques.shape, pred_ques.shape) for i in range(orig_ques.shape[0]): cur_orignal_ques= tokenizer.decode(list(orig_ques[i]), skip_special_tokens=True) cur_pred_ques= tokenizer.decode(list(pred_ques[i]), skip_special_tokens=True) cur_acc= get_blue_score(cur_orignal_ques, cur_pred_ques) total_acc.update(cur_acc) total_loss.update(loss.item(), mask_ids.size(0)) tdl.set_postfix(accu= total_acc.avg) tdl.set_postfix(loss= total_loss.avg, accu= total_acc.avg) if validData: prediciton= predict(validData, train_config.valid_batch_size, device, model, ignore_label= train_config.ignore_label, worker= train_config.num_workers) torch.save(model, train_config.save_dir+"model_{}".format(epoch_i)) #save whole model after epoch