def main(): allChamp, matchComp, blueWin = preprocess.lolDataSet(arg, "train") _, test_x, test_y = preprocess.lolDataSet(arg, "test") test_x = torch.tensor(test_x).float() test_y = torch.tensor(test_y.reshape(-1)).long() val_x = torch.tensor(matchComp[0:arg.val_set_size]).float() val_y = torch.tensor(blueWin[0:arg.val_set_size].reshape(-1)).long() train_x = matchComp[arg.val_set_size:-1] train_y = blueWin[arg.val_set_size:-1].reshape(-1) champDataset = ChampDataset(train_x, train_y) arg.champ_num = len(allChamp) if arg.in_game: arg.log_path = arg.log_path + 'in-game/' arg.model_save_path = arg.model_save_path + 'in-game/' else: arg.log_path = arg.log_path + 'pregame/' arg.model_save_path = arg.model_save_path + 'pregame/' arg.log_path = arg.log_path + arg.embed_type + '/' + arg.uncertainty + '/' + now.strftime( "%Y%m%d-%H%M%S") arg.model_save_path = arg.model_save_path + arg.embed_type + '/' + arg.uncertainty if not (os.path.isdir(arg.model_save_path)): os.makedirs(os.path.join(arg.model_save_path)) if arg.saved_model_time == None: arg.model_save_path = arg.model_save_path + '/' + now.strftime( "%Y%m%d-%H%M%S") + '.pt' if arg.embed_type == 'one_hot' and arg.uncertainty == 'None': model = OHModel(arg) if arg.saved_model_time == None: train.NNtrain(model, arg, champDataset, val_x, val_y) else: arg.model_save_path = arg.model_save_path + '/' + arg.saved_model_time + '.pt' model.load_state_dict(torch.load(arg.model_save_path)) option = 'mat' #vec, mat, tem train.opt_matrix(model, val_x, val_y, option) train.eval(test_x, test_y, model, arg) elif arg.embed_type == 'one_hot' and arg.uncertainty == 'Data': model = DUOHModel(arg) if arg.saved_model_time == None: train.DUNNtrain(model, arg, champDataset, val_x, val_y) else: arg.model_save_path = arg.model_save_path + '/' + arg.saved_model_time + '.pt' model.load_state_dict(torch.load(arg.model_save_path)) train.eval(test_x, test_y, model, arg)
def test_eval(self): LSTM = train.train( self.train_loader, self.BATCH_SIZE, self.train_dset.vocab.vocab_size) train.eval( LSTM, self.valid_loader, self.BATCH_SIZE, self.valid_dset.vocab, langs)
def main(): args = load_arg() print(f"Run:{args.lang}") args.device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = torch.cuda.device_count() train_dataset, dev_dataset = load_shinra_data(args) dataset = ShinraTargetDataset(args, labels=train_dataset.labels, chars=train_dataset.chars) model = CNN(args, train_dataset.num_labels, class_weight=train_dataset.get_class_weight()) state_dict = torch.load( os.path.join(args.pretrained_model, args.lang, "pytorch_model.bin")) model.load_state_dict(state_dict) model.to(args.device) if args.fp16: model = to_fp16(args, model) model = to_parallel(args, model) _, results = eval(args, dataset, model) os.makedirs(args.output_dir, exist_ok=True) save_result(f"{args.output_dir}/{args.lang}.json", results)
def run(): for epoch in range(args.epochs): if args.scheduler: train_loss = train( trainloader, model, criterion, optimizer, device, scheduler=scheduler, accumulation_step=args.accumulation_step) else: train_loss = train( trainloader, model, criterion, optimizer, device, accumulation_step=args.accumulation_step) val_loss = eval( validloader, model, criterion, device) print(f"Epoch: {epoch+1}/{args.epochs}, train_loss: {train_loss:.5f}, val_loss: {val_loss:.5f}")
def main(params): # build dataset train_data = pd.read_csv('./data/train_final.csv') tokenizer = get_tokenizer('spacy', language='en') if params.emb_type == "GloVe": embedding = GloVe( name=params.emb_data, dim=params.emb_dim ) # use glove embedding with default option(name='840B', dim=300) elif params.emb_type == "CharNGram": embedding = CharNGram() elif params.emb_type == "FastText": embedding = FastText(name=params.emb_data, dim=params.emb_dim) else: print("Wrong embedding type") exit() train_data, val_data = train_data[1000:], train_data[:1000] train_dataset = SentimentDataset(train_data, tokenizer, embedding) val_dataset = SentimentDataset(val_data, tokenizer, embedding) train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) val_dataloader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False) model = SentimentClassificationModel(params.emb_dim, params.hidden_dim, params.dropout).to(device) crit = nn.CrossEntropyLoss().to(device) optim = torch.optim.Adam(params=model.parameters(), lr=1e-3) best_val_acc = 0 early_stop_cnt = 0 epoch = 0 train_loss_list = [] train_acc_list = [] val_acc_list = [] while early_stop_cnt != 5: loss_list, train_acc = train.trainer(epoch, model, train_dataloader, crit, optim, device) val_acc = train.eval(epoch, model, val_dataloader, device, False) if val_acc > best_val_acc and epoch > 0: torch.save(model.state_dict(), './model/lstm_best.pt') best_val_acc = val_acc early_stop_cnt = 0 early_stop_cnt += 1 epoch += 1 train_loss_list.extend(loss_list) train_acc_list.append(train_acc) val_acc_list.append(val_acc) print("Early stopping condition satisfied") plotting("train_loss", "steps", "loss", train_loss_list) plotting("train_accuracy", "epoch", "accuracy", train_acc_list) plotting('validation_accuracy', "epoch", "accuracy", val_acc_list)
def main(): # load data train_annotation = BASE_DIR + 'train_50_tags_annotations_final.csv' val_annotation = BASE_DIR + 'valid_50_tags_annotations_final.csv' test_annotation = BASE_DIR + 'test_50_tags_annotations_final.csv' print("Start loading data...") train_data = SampleLevelMTTDataset(train_annotation, AUDIO_DIR, LIST_OF_TAGS, NUM_TAGS) val_data = SampleLevelMTTDataset(val_annotation, AUDIO_DIR, LIST_OF_TAGS, NUM_TAGS) test_data = SampleLevelMTTDataset(test_annotation, AUDIO_DIR, LIST_OF_TAGS, NUM_TAGS) train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, drop_last=True) val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True, drop_last=True) test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True, drop_last=True) print("Finished loading data!") # load model print("Load sampleCNN model") sampleCNN_model = model.SampleCNN(DROPOUT_RATE).to(device) # start training print("Start training!!") # criterion = nn.BCELoss() criterion = nn.BCEWithLogitsLoss( ) # don't use sigmoid layer at the end when using this train.train(sampleCNN_model, train_loader, val_loader, criterion, LR, NUM_EPOCHS, device) print("Finished! Hopefully..") # test it print("Start testing...") train.eval(sampleCNN_model, test_loader, criterion, device)
def main(): homepath = os.environ['HOME'] datapath = os.path.join(homepath, 'data') mx.file.copy_parallel(args.data_url, datapath) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = MobileNetV3().to(device) centerloss = CenterLoss(num_classes=75, feat_dim=1280, use_gpu=True) cross_entropy = nn.CrossEntropyLoss() optimizer_model = torch.optim.SGD(model.parameters(), lr=args.lr_model, weight_decay=5e-04, momentum=0.9) optimizer_centloss = torch.optim.SGD(centerloss.parameters(), lr=args.lr_centloss) train_iterator, test_iterator = dataprocess( train_label_path=args.train_label_txt, data_dirtory=datapath, test_label_path=args.test_label_txt, batch_size=args.batch_size) if args.step > 0: scheduler = lr_scheduler.StepLR(optimizer_model, step_size=args.step, gamma=args.gamma) if not (os.path.isdir(os.path.join(args.homepath, 'model'))): os.makedirs(os.path.join(args.homepath, 'model')) tmp_accuracy = 0 for epoch in range(args.num_epoch): if args.step > 0: scheduler.step() train_loss, train_acc = train(model=model, device=device, train_iterator=train_iterator, optimizer_model=optimizer_model, optimizer_centloss=optimizer_centloss, criterion1=cross_entropy, criterion2=centerloss, weight_centloss=args.weight) test_loss, test_acc = eval(model=model, device=device, test_iterator=test_iterator, criterion1=cross_entropy, criterion2=centerloss, weight_centloss=args.weight_centloss) print('|Epoch:', epoch + 1, '|Train loss', train_loss.item(), '|Train acc:', train_acc.item(), '|Test loss', test_loss.item(), '|Test acc', test_acc.item()) if test_acc > tmp_accuracy: MODEL_SAVE_PATH = os.path.join(args.homepath, 'model', 'mymodel_{}.pth'.format(epoch)) torch.save(model.save_dict(), MODEL_SAVE_PATH) tmp_accuracy = test_acc mox.file.copy(MODEL_SAVE_PATH, os.path.join(args.train_url, 'model/mymodel.pth'))
def do_quick_distillation(start_epoch=-1): seed_init() train_dataloader, eval_dataloader = getdataLoader() xlnet_config = XLNetConfig.from_json_file(config.xlnet_config_root) student = XlnetCloze(xlnet_config) soft_labels = pickle.load(open(config.soft_label_file, "rb")) optimizer_grouped_parameters = get_optimizer_group(student) num_train_steps = int(train_dataloader.dataset.__len__() / config.train_batch_size * config.num_train_epochs) optimizer = AdamW(optimizer_grouped_parameters, lr=config.xlnet_learning_rate) scheduler = ReduceLROnPlateau(optimizer, 'max', verbose=True, factor=config.decay, min_lr=config.min_lr, patience=config.patience) load_model(start_epoch, student, optimizer) if config.n_gpu > 1: student = nn.DataParallel(student) student.to(config.device) student.train() ave_loss, ave_hard_loss, ave_soft_loss, ave_train_accr = get_watch_index() global_step = (start_epoch + 1) * num_train_steps for epoch in trange(start_epoch + 1, config.num_train_epochs): student.zero_grad() for batch in tqdm(train_dataloader): input_ids, attention_mask, position, option_ids, tags, labels = batch input_ids, attention_mask, position, option_ids, tags, labels = to_device( input_ids, attention_mask, position, option_ids, tags, labels ) _, student_logits = student(input_ids, attention_mask, position, option_ids, tags, labels) teacher_probs = get_teacher_probs(soft_labels, tags).to(config.device) loss_hard = F.cross_entropy(student_logits, labels, reduction="mean") loss_soft, teacher_probs = cross_entropy_loss_with_temperature_v2(student_logits, teacher_probs, config.temperature) loss = config.alpha * loss_hard + (1.0 - config.alpha) * config.temperature * config.temperature * loss_soft loss.backward() ave_train_accr.add(cal_accr(student_logits, labels)) ave_loss.add((config.alpha * loss_hard + (1.0 - config.alpha) * loss_soft).item()) ave_soft_loss.add(loss_soft.item()) ave_hard_loss.add(loss_hard.item()) optimizer.step() optimizer.zero_grad() # ipdb.set_trace() show_watch_index(global_step, ave_teacher_accr=cal_accr(logits=teacher_probs, labels=labels)) if (global_step + 1) % config.show_loss_step == 0: now_lrs = show_lr(optimizer) show_watch_index(global_step, ave_hard_loss=ave_hard_loss, now_lrs=now_lrs, ave_soft_loss=ave_soft_loss, ave_loss=ave_loss) if global_step <= num_train_steps * config.warmup_proportion: warmup_adajust(num_train_steps, global_step, optimizer) global_step += 1 eval_accr = eval(student, eval_dataloader) show_watch_index(epoch, eval_accr=eval_accr, ave_train_accr=ave_train_accr) scheduler.step(eval_accr) save_model(epoch, student, optimizer)
def train_test(text_field, label_field, test=False): train_iter, dev_iter, test_iter = load_dataset(text_field, label_field, args, device=-1, repeat=False, shuffle=True) args.vocabulary_size = len(text_field.vocab) args.embedding_dim = text_field.vocab.vectors.size()[-1] args.vectors = text_field.vocab.vectors args.class_num = len(label_field.vocab) args.cuda = args.device != -1 and torch.cuda.is_available() print('Parameters:') for attr, value in sorted(args.__dict__.items()): if attr in {'vectors'}: continue print('\t{}={}'.format(attr.upper(), value)) text_cnn = model.TextCNN(args) if args.snapshot: print('\nLoading model from {}...\n'.format(args.snapshot)) text_cnn.load_state_dict(torch.load(args.snapshot)) if args.cuda: device = torch.device("cuda", args.device) text_cnn = text_cnn.to(device) if args.test: try: train.eval(test_iter, text_cnn, args, True) except KeyboardInterrupt: print('Exiting from testing early') else: try: train.train(train_iter, dev_iter, text_cnn, args) except KeyboardInterrupt: print('Exiting from training early')
def test(params): tokenizer = get_tokenizer('spacy', language='en') embedding = GloVe(name=params.emb_data, dim=params.emb_dim) test_data = pd.read_csv('./data/eval_final_open.csv') test_dataset = SentimentDataset(test_data, tokenizer, embedding, False) test_dataloader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) model = SentimentClassificationModel(params.emb_dim, params.hidden_dim, 0.3).to(device) model.load_state_dict(torch.load('./model/lstm_best.pt')) inference = {'Id': [i for i in range(len(test_data))]} inference['Category'] = train.eval(0, model, test_dataloader, device, True) df = pd.DataFrame(inference) df.to_csv("./data/out.csv", index=False)
learnign_rate = 0.001 num_epochs = 100 #parameter of rnn num_layer = 2 num_hidden = 128 #param of rcnn num_sm_hidden = 100 # model print("Load model...") #classifier_model = model.CNNClassifier(in_channels, out_channels, voca_size, embed_dim, num_classes, kernel_sizes, dropout_p, embedding_weight) #classifier_model = model.RNNClassifier(voca_size, embed_dim, num_hidden, num_layer, num_classes, embedding_weight) classifier_model = model.RCNN_Classifier(voca_size, embed_dim, num_hidden, num_sm_hidden, num_layer, num_classes, embedding_weight) if iscuda: classifier_model = classifier_model.cuda() # train print("Start Train...") train.train(train_loader, dev_loader, classifier_model, iscuda, learnign_rate, num_epochs) # eval print("Evaluation") train.eval(test_loader, classifier_model, iscuda)
cuda = (not cuda) and torch.cuda.is_available() del cuda save_dir = os.path.join( save_dir, datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') ) #making a Directory in snapshot named after time and date" # model- load from model.py cnn = model.CNN_Text(embed_num, embed_dim, class_num, kernel_num, static) if snapshot is not None: print('\nLoading model from {}...'.format(snapshot)) cnn.load_state_dict(torch.load(snapshot)) # train or predict if predict is not None: label = train.predict(predict, cnn, text_field, label_field) print('\n[Text] {}\n[Label] {}\n'.format(predict, label)) elif test: try: train.eval(test_iter, cnn) except Exception as e: print("\nSorry. The test dataset doesn't exist.\n") else: print() try: train.train(train_iter, val_iter, cnn, save_dir) except KeyboardInterrupt: print('\n' + '-' * 89) print('Exiting from training early')
# k = 0 # for i in params: # l = 1 # print("该层的结构:" + str(list(i.size()))) # for j in i.size(): # l *= j # print("该层参数和:" + str(l)) # k = k + l # print("总参数数量和:" + str(k)) if args.snapshot is not None: print('\nLoading model from {}...'.format(args.snapshot)) cnn.load_state_dict(torch.load(args.snapshot)) if args.cuda: torch.cuda.set_device(args.device) cnn = cnn.cuda() if args.test: try: train.eval(test_data, cnn, args, flag=1) except Exception as e: print("\nSorry. The test dataset doesn't exist.\n") else: print() try: train.train(train_data, dev_data, cnn, args) except KeyboardInterrupt: print('\n' + '-' * 89) print('Exiting from training early')
try: if args.kfold > 0: folds = mydatasets.TestsDS.kfold(text_field, label_field, folds=args.kfold) fold_results = [] fold = 1 for train_data, dev_data in folds: print(f'Starting fold {fold} (val. fold {fold}, rest train)...') train_iter, dev_iter = data.Iterator.splits( (train_data, dev_data), batch_sizes=(args.batch_size, len(dev_data)), repeat=False) args.embed_num = len(text_field.vocab) args.class_num = len(label_field.vocab) - 1 cnn = model.CNN_Text(args) train.train(train_iter, dev_iter, cnn, args) print(f'\nFinal fold {fold} validation:') result = train.eval(dev_iter, cnn, args) fold_results.append((fold, result)) fold += 1 print('Fold results:', fold_results) else: train.train(train_iter, dev_iter, cnn, args) except KeyboardInterrupt: print('Exiting from training early')
def train_DBN(train_data, test_data, hidden_units, num_epochs_DBN=50, num_epochs_LR=100): train_features, train_labels = train_data test_features, test_labels = test_data # training DBN model ################################################################################################# # dbn_model = DBN(visible_units=train_features.shape[1], # hidden_units=[20, hidden_units], # k=5, # learning_rate=0.01, # learning_rate_decay=True, # xavier_init=True, # increase_to_cd_k=False, # use_gpu=False) # dbn_model.train_static(train_features, train_labels, num_epochs=num_epochs_DBN, batch_size=32) # # Finishing the training DBN model # print('---------------------Finishing the training DBN model---------------------') # # using DBN model to construct features # train_features, _ = dbn_model.forward(train_features) # test_features, _ = dbn_model.forward(test_features) ################################################################################################## # training LR model ################################################################################################## if len(train_labels.shape) == 1: num_classes = 1 else: num_classes = train_labels.shape[1] # lr_model = LR(input_size=hidden_units, num_classes=num_classes) lr_model = LR(input_size=train_features.shape[1], num_classes=num_classes) optimizer = torch.optim.Adam(lr_model.parameters(), lr=0.00001) steps = 0 batches_test = mini_batches(X=test_features, Y=test_labels) for epoch in range(1, num_epochs_LR + 1): # building batches for training model batches_train = mini_batches_update(X=train_features, Y=train_labels) for batch in batches_train: x_batch, y_batch = batch if torch.cuda.is_available(): x_batch, y_batch = torch.tensor( x_batch).cuda(), torch.cuda.FloatTensor(y_batch) else: x_batch, y_batch = torch.tensor(x_batch).float(), torch.tensor( y_batch).float() optimizer.zero_grad() predict = lr_model.forward(x_batch) loss = nn.BCELoss() loss = loss(predict, y_batch) loss.backward() optimizer.step() steps += 1 if steps % 10 == 0: print('\rEpoch: {} step: {} - loss: {:.6f}'.format( epoch, steps, loss.item())) print('Epoch: %i ---Training data' % (epoch)) acc, prc, rc, f1, auc_ = eval(data=batches_train, model=lr_model) print( 'Accuracy: %f -- Precision: %f -- Recall: %f -- F1: %f -- AUC: %f' % (acc, prc, rc, f1, auc_)) print('Epoch: %i ---Testing data' % (epoch)) acc, prc, rc, f1, auc_ = eval(data=batches_test, model=lr_model) print( 'Accuracy: %f -- Precision: %f -- Recall: %f -- F1: %f -- AUC: %f' % (acc, prc, rc, f1, auc_))
def main(mode, args): # build vocab word2index, index2word = build_all_vocab(init_vocab={ UNK_WORD: 0, BOS_WORD: 1 }) args.vocab, args.vocab_size, args.index2word = word2index, len( word2index), index2word # get data_from_train from only_label = True, for same as train baseline args.only_label = True train_dataset = BindingDataset('train', args=args) data_from_train = (train_dataset.tokenize_max_len, train_dataset.columns_token_max_len, train_dataset.columns_split_marker_max_len, train_dataset.cells_token_max_len, train_dataset.cells_split_marker_max_len, train_dataset.pos_tag_vocab, train_dataset.bert_tokenize_max_len, train_dataset.bert_tokenize_marker_max_len, train_dataset.bert_columns_split_max_len, train_dataset.bert_columns_split_marker_max_len, train_dataset.bert_cells_split_max_len, train_dataset.bert_cells_split_marker_max_len) args.tokenize_max_len, args.columns_token_max_len, args.columns_split_marker_max_len, \ args.cells_token_max_len, args.cells_split_marker_max_len, args.pos_tag_vocab,\ args.bert_tokenize_max_len, args.bert_tokenize_marker_max_len, args.bert_columns_split_max_len, args.bert_columns_split_marker_max_len,\ args.bert_cells_split_max_len, args.bert_cells_split_marker_max_len = data_from_train logger.info('data_from_train'), logger.info(data_from_train) # set only_label if mode == 'train baseline': args.only_label = True elif mode == 'policy gradient': args.only_label = False elif mode == 'test model': args.only_label = True elif mode == 'add feature': args.only_label = False elif mode == 'write cases': args.only_label = True elif mode == 'anonymous': args.only_label = False # build train_dataloader train_dataset = BindingDataset('train', args=args, data_from_train=data_from_train) train_dataloader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=args.shuffle) # build dev_dataloader args.shuffle = False dev_dataset = BindingDataset('dev', args=args, data_from_train=data_from_train) dev_dataloader = DataLoader(dataset=dev_dataset, batch_size=args.batch_size, shuffle=args.shuffle) # build test_dataloader # test_dataset = BindingDataset('test', args=args, data_from_train=data_from_train) # test_dataloader = DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=args.shuffle) # load word embedding if args.load_w2v: args.embed_matrix = load_word_embedding(args.word_dim, word2index) # train if mode == 'train baseline': if args.model == 'baseline': model = Baseline(args=args) elif args.model == 'gate': if args.bert_model is None: model = Gate(args=args) else: model = BertGate(args=args) else: raise NotImplementedError train(train_dataloader, dev_dataloader, args=args, model=model) elif mode == 'policy gradient': model = torch.load('./res/' + args.model + '/2816_False_True_True_726425', map_location=lambda storage, loc: storage.cuda(0)) train_rl(train_dataloader, dev_dataloader, args=args, model=model) elif mode == 'test model': # also need the correct 'model' for dataloader model = torch.load( './res/policy_gradient/0.819928_True_True_True_412532', map_location=lambda storage, loc: storage.cuda(0)) eval(dev_dataloader, args, model, epoch=0) eval_rl(dev_dataloader, args, model, epoch=0) elif mode == 'add feature': model = torch.load('./res/policy_gradient/0.804922_22-16-28', map_location=lambda storage, loc: storage.cuda(0)) res = test(dev_dataloader, args, model) add_abstraction('dev', res=res, args=args) elif mode == 'write cases': model = torch.load( './res/policy_gradient/0.819928_True_True_True_412532', map_location=lambda storage, loc: storage.cuda(0)) res_pg = test(dev_dataloader, args, model, sep=' ') model = torch.load('./res/gate/epoch100', map_location=lambda storage, loc: storage.cuda(0)) res_gate = test(dev_dataloader, args, model, sep=' ') with open('cases.txt', 'w', encoding='utf-8') as f: for key in res_pg.keys(): # diff between gate and policy if res_gate[key]['pred'] != res_pg[key]['pred']: if res_gate[key]['pred'] == res_gate[key]['label']: f.write(key + '\n') f.write('Pred_Gate:\t\t\t\t' + json.dumps(res_gate[key]['pred']) + '\n') f.write('Pred_Policy_Gradient:\t' + json.dumps(res_pg[key]['pred']) + '\n') f.write('Label:\t\t\t\t\t' + json.dumps(res_pg[key]['label']) + '\n') f.write('SQL_Labels:\t\t\t\t' + json.dumps(res_pg[key]['sql_labels']) + '\n' + '\n') elif mode == 'anonymous': model = torch.load( './res/policy_gradient/0.819928_True_True_True_412532', map_location=lambda storage, loc: storage.cuda(0)) res = test(train_dataloader, args, model, sep='') anonymous('train', res, args)
# model cnn = model.CNN_Text(args) if args.snapshot is not None: print('\nLoading model from {}...'.format(args.snapshot)) cnn.load_state_dict(torch.load(args.snapshot)) if args.cuda: torch.cuda.set_device(args.device) cnn = cnn.cuda() # train or predict if args.predict is not None: label = train.predict(args.predict, cnn, text_field, label_field, args.cuda) print('\n[Text] {}\n[Label] {}\n'.format(args.predict, label)) elif args.test: try: train.eval(test_iter, cnn, args) except Exception as e: print("\nSorry. The test dataset doesn't exist.\n") else: print() try: #train.train(X_train, y_train, X_valid, y_valid, cnn, args) train.train(X_train, y_train, X_test, y_test, cnn, args) train.eval(X_test, y_test, cnn, args) except KeyboardInterrupt: print('\n' + '-' * 89) print('Exiting from training early')
print("\nParameters:") for attr, value in sorted(args.__dict__.items()): print("\t{}={}".format(attr.upper(), value)) # model cnn = model.CNN_Text(args) if args.snapshot is not None: print('\nLoading model from {}...'.format(args.snapshot)) cnn.load_state_dict(torch.load(args.snapshot)) device = 'cuda' if torch.cuda.is_available() else 'cpu' cnn = cnn.to(device) # train or predict if args.predict is not None: label = train.predict(args.predict, cnn, text_field, label_field, device) print('\n[Text] {}\n[Label] {}\n'.format(args.predict, label)) elif args.test: try: train.eval(test_iter, cnn, args, device) except Exception as e: print("\nSorry. The test dataset doesn't exist.\n") else: print() try: train.train(train_iter, dev_iter, cnn, args, device) except KeyboardInterrupt: print('\n' + '-' * 89) print('Exiting from training early')
def main(): parser = argparse.ArgumentParser() parser.add_argument('-config', type=str, default='config/aishell.yaml') parser.add_argument('-log', type=str, default='train.log') parser.add_argument('-mode', type=str, default='retrain') opt = parser.parse_args() configfile = open(opt.config) config = AttrDict(yaml.load(configfile, Loader=yaml.FullLoader)) exp_name = os.path.join('egs', config.data.name, 'exp', config.model.type, config.training.save_model) if not os.path.isdir(exp_name): os.makedirs(exp_name) logger = init_logger(os.path.join(exp_name, opt.log)) shutil.copyfile(opt.config, os.path.join(exp_name, 'config.yaml')) logger.info('Save config info.') os.environ["CUDA_VISIBLE_DEVICES"] = config.training.gpus config.training.num_gpu = num_gpus(config.training.gpus) num_workers = 6 * (config.training.num_gpu if config.training.num_gpu > 0 else 1) batch_size = config.data.batch_size * config.training.num_gpu if config.training.num_gpu > 0 else config.data.batch_size train_dataset = LmDataset(config.data, 'train') train_sampler = Batch_RandomSampler(len(train_dataset), batch_size=batch_size, shuffle=config.data.shuffle) training_data = AudioDataLoader(dataset=train_dataset, num_workers=num_workers, batch_sampler=train_sampler) logger.info('Load Train Set!') dev_dataset = LmDataset(config.data, 'dev') dev_sampler = Batch_RandomSampler(len(dev_dataset), batch_size=batch_size, shuffle=config.data.shuffle) validate_data = AudioDataLoader(dataset=dev_dataset, num_workers=num_workers, batch_sampler=dev_sampler) logger.info('Load Dev Set!') if config.training.num_gpu > 0: torch.cuda.manual_seed(config.training.seed) torch.backends.cudnn.deterministic = True else: torch.manual_seed(config.training.seed) logger.info('Set random seed: %d' % config.training.seed) if config.model.type == "transducer": model = Transducer(config.model) elif config.model.type == "ctc": model = CTC(config.model) elif config.model.type == "lm": model = LM(config.model) else: raise NotImplementedError if config.training.load_model: if config.training.num_gpu == 0: checkpoint = torch.load(config.training.load_model, map_location='cpu') else: checkpoint = torch.load(config.training.load_model) logger.info(str(checkpoint.keys())) load_model(model, checkpoint) logger.info('Loaded model from %s' % config.training.new_model) if config.training.load_encoder or config.training.load_decoder: if config.training.load_encoder: checkpoint = torch.load(config.training.load_encoder) model.encoder.load_state_dict(checkpoint['encoder']) logger.info('Loaded encoder from %s' % config.training.load_encoder) if config.training.load_decoder: checkpoint = torch.load(config.training.load_decoder) model.decoder.load_state_dict(checkpoint['decoder']) logger.info('Loaded decoder from %s' % config.training.load_decoder) if config.training.num_gpu > 0: model = model.cuda() if config.training.num_gpu > 1: device_ids = list(range(config.training.num_gpu)) model = torch.nn.DataParallel(model, device_ids=device_ids) logger.info('Loaded the model to %d GPUs' % config.training.num_gpu) # n_params, enc, dec = count_parameters(model) # logger.info('# the number of parameters in the whole model: %d' % n_params) # logger.info('# the number of parameters in the Encoder: %d' % enc) # logger.info('# the number of parameters in the Decoder: %d' % dec) # logger.info('# the number of parameters in the JointNet: %d' % # (n_params - dec - enc)) optimizer = Optimizer(model.parameters(), config.optim) logger.info('Created a %s optimizer.' % config.optim.type) if opt.mode == 'continue': if not config.training.load_model: raise Exception( "if mode is 'continue', need 'config.training.load_model'") optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] logger.info('Load Optimizer State!') else: start_epoch = 0 # create a visualizer if config.training.visualization: visualizer = SummaryWriter(os.path.join(exp_name, 'log')) logger.info('Created a visualizer.') else: visualizer = None logger.info(model) for epoch in range(start_epoch, config.training.epochs): train(epoch, config, model, training_data, optimizer, logger, visualizer) save_name = os.path.join( exp_name, '%s.epoch%d.chkpt' % (config.training.save_model, epoch)) save_model(model, optimizer, config, save_name) logger.info('Epoch %d model has been saved.' % epoch) if config.training.eval_or_not: _ = eval(epoch, config, model, validate_data, logger, visualizer) if epoch >= config.optim.begin_to_adjust_lr: optimizer.decay_lr() # early stop if optimizer.lr < 1e-6: logger.info('The learning rate is too low to train.') break logger.info('Epoch %d update learning rate: %.6f' % (epoch, optimizer.lr)) logger.info('The training process is OVER!')
# model if args.snapshot is None: cnn = model.CNN_Text(args) else : print('\nLoading model from [%s]...' % args.snapshot) try: cnn = torch.load(args.snapshot) except : print("Sorry, This snapshot doesn't exist."); exit() if args.cuda: cnn = cnn.cuda() #embedding copy cnn.embed.weight.data.copy_(torch.from_numpy(pretrained_embeddings)) # train if args.test : try: _ = train.eval(dev_iter, cnn, args) #test for dev except Exception as e: print("\nSorry. The test dataset doesn't exist.\n") else : print() try: train.train(train_iter, dev_iter, cnn, args) except KeyboardInterrupt: print('-' * 89) print('Exiting from training early')
def launch_model(): full_text = request.form['full_text'] id_ = request.form['id'] model_type = request.form['model_type'] global BERT, JOINT, GRANU, MGN, NUM_TASK, MASKING, HIER BERT = model_type == BERT_PATH JOINT = model_type == JOINT_BERT_PATH GRANU = model_type == GRANU_BERT_PATH MGN = model_type == MGN_SIGM_BERT_PATH # either of the four variants: # BERT = False # JOINT = False # GRANU = False # MGN = True assert BERT or JOINT or GRANU or MGN assert not (BERT and JOINT) and not (BERT and GRANU) and not (BERT and MGN) \ and not (JOINT and GRANU) and not (JOINT and MGN) and not (GRANU and MGN) # either of the two variants SIGMOID_ACTIVATION = True RELU_ACTIVATION = False assert not (SIGMOID_ACTIVATION and RELU_ACTIVATION) and ( SIGMOID_ACTIVATION or RELU_ACTIVATION) if BERT: NUM_TASK = 1 MASKING = 0 HIER = 0 elif JOINT: NUM_TASK = 2 MASKING = 0 HIER = 0 elif GRANU: NUM_TASK = 2 MASKING = 0 HIER = 1 elif MGN: NUM_TASK = 2 MASKING = 1 HIER = 0 else: raise ValueError( "You should choose one of bert, joint, granu and mgn in options") dct = { 'NUM_TASK': NUM_TASK, 'MASKING': MASKING, 'SIGMOID_ACTIVATION': SIGMOID_ACTIVATION, 'HIER': HIER } model = load_model(model_type, **dct) if not id_: ids = get_existent_ids() id_ = random_module.randint(0, N) while id_ in ids: id_ = random_module.randint(0, N) with open(DIRECTORY_PREDICT.joinpath(f'article{id_}.txt'), 'w', encoding='utf-8') as f: f.write(full_text) text = overwrite_one_article(id_, directory=DIRECTORY_PREDICT) my_predict_dataset = PropDataset(DIRECTORY_PREDICT, is_test=True) my_predict_iter = data.DataLoader(dataset=my_predict_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=1, collate_fn=pad) tmp_file = 'tmp.txt' eval(model, my_predict_iter, tmp_file, criterion, binary_criterion, NUM_TASK=NUM_TASK) ids, texts = read_data(DIRECTORY_PREDICT, is_test=True) t_texts = clean_text(texts, ids) flat_texts = [sentence for article in t_texts for sentence in article] fi, prop_sents = convert(NUM_TASK - 1, flat_texts, tmp_file) prop_sents = prop_sents[id_] prop_sents = ['1' if elem else '' for elem in prop_sents] results = remove_duplicates(fi) DIRECTORY_PREDICT.joinpath(f'article{id_}.txt').rename( DIRECTORY_MARKUP.joinpath(f'article{id_}.txt')) lst = [set() for _ in range(len(full_text))] source_lst = [set() for _ in range(len(full_text))] for inner_lst in results: for i in range(inner_lst[-2], inner_lst[-1]): lst[i].add(HUMAN_READABLE_TECHNIQUES[TECHNIQUES.index( inner_lst[-3])]) source_lst[i].add(inner_lst[-3]) extracts_s_e = [] extracts = [] categories = [] for elem in fi: if elem[0] != str(id_): continue _, category, start, end = elem extracts_s_e.append((start, end)) extracts.append(text[start:end]) categories.append(category) extracts = [ ' '.join(normalize(extract.strip())) for extract in extracts if extract ] print(f'extracts: {extracts}') # CHECK # extracts = [word for sent in extracts for word in sent.split()] test_x, test_maxlen = get_data(extracts, vocab_size=args.vocab_size, maxlen=args.maxlen) test_x = sequence.pad_sequences(test_x, maxlen=max(train_maxlen, test_maxlen)) test_length = test_x.shape[0] splits = [] for i in range(1, test_length // args.batch_size): splits.append(args.batch_size * i) if test_length % args.batch_size: splits += [(test_length // args.batch_size) * args.batch_size] test_x = np.split(test_x, splits) with graph.as_default(): aspect_model = keras_load_model(os.path.join('flask_app', 'output', 'reviews', 'model_param'), custom_objects={ "Attention": Attention, "Average": Average, "WeightedSum": WeightedSum, "MaxMargin": MaxMargin, "WeightedAspectEmb": WeightedAspectEmb, "max_margin_loss": U.max_margin_loss }, compile=True) test_fn = K.function([ aspect_model.get_layer('sentence_input').input, K.learning_phase() ], [ aspect_model.get_layer('att_weights').output, aspect_model.get_layer('p_t').output ]) aspect_probs = [] for batch in tqdm(test_x): _, cur_aspect_probs = test_fn([batch, 0]) aspect_probs.append(cur_aspect_probs) aspect_probs = np.concatenate(aspect_probs) label_ids = np.argsort(aspect_probs, axis=1)[:, -5:] for i, labels in enumerate(label_ids): print( f'{extracts[i]}: {[aspects[label] for label in labels][::-1]}') correct_lst = ['; '.join(list(elem)) for elem in lst] commands = { extract: ([aspects[label] for label in label_ids[i]][::-1], []) for i, extract in enumerate(extracts) } write_existent_dict(id_, source_lst, directory=DIRECTORY_MARKUP) for f in glob.glob(f'{DIRECTORY_PREDICT}/*'): os.remove(f) return jsonify( result={ 'id': id_, 'list': correct_lst, 'text': text, 'prop_sents': prop_sents, 'commands': commands })
:return: parsed yaml """ with open(filepath, "r") as stream: result = yaml.safe_load(stream) return result if __name__ == "__main__": args = parser.parse_args() params = load_yaml(args.config) if args.config else {} if args.command == "train": train.train(output_model_dir=args.output, tb_path=args.tensorboard, device_id=args.gpu, **params) elif args.command == "eval": loss, score = train.eval(model_path=args.model, **params) print( f'Validation loss: {loss:.4f}\nValidation mAP score: {score:.4f}') elif args.command == "mc-dropout": mc_processor = mc_dropout.MCProcessor( model=args.model, nuscenes_version=params['nuscenes_version'], data_path=params["data_path"], n_scenes=params['n_scenes']) mc_processor.visualise_monte_carlo(batch_size=1, sample_id=21, n_samples=10, saving_folder=args.saving_folder)
m_model = torch.load(args.snapshot) except: print("Sorry, This snapshot doesn't exist.") exit() if args.cuda: m_model = m_model.cuda() # train or predict assert m_model is not None if args.predict is not None: label = train.predict(args.predict, m_model, text_field, label_field) print('\n[Text] {}[Label] {}\n'.format(args.predict, label)) elif args.test: try: train.eval(test_iter, m_model, args) except Exception as e: print("\nSorry. The test dataset doesn't exist.\n") else: torch.set_num_threads(3) train.train(train_iter, dev_iter, m_model, args) # 直接测试所有的模型,选出最好的 m_max = -99999 whichmax = '' dirlist = os.listdir(args.save_dir) f = open(os.path.join(args.save_dir, 'testresult'), "w+", encoding='utf-8') for attr, value in sorted(args.__dict__.items()): f.write("\t{}={} \n".format(attr.upper(), value)) f.flush() f.write('----------------------------------------------------')
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, drop_last=True, collate_fn=data.collate_fn) validloader = torch.utils.data.DataLoader(validset, batch_size=batch_size, shuffle=True, drop_last=True, collate_fn=data.collate_fn) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True, drop_last=True, collate_fn=data.collate_fn) train.train(trainloader, validloader, mymodel, lr, epochs, save_dir=save_dir, early_stop=early_stop, save_interval=3, save_best=True, cuda=True, log_interval=1, test_interval=10, batch_size=batch_size) train.eval(testloader, mymodel) print("Finsh!!")
xfold, acc), file=log_file_handle) print("Completed fold {0}. Mean accuracy on Dev: {1} for CHAR".format( xfold, np.mean(acc)), file=log_file_handle) if args.eval_on_test: if args.num_experts > 0: result = train.ensemble_eval(test_iter, char_cnn, args, two_ch=args.two_ch, log_file_handle=log_file_handle) else: result = train.eval(test_iter, char_cnn, args, two_ch=args.two_ch, log_file_handle=log_file_handle) char_test_fold_accuracies.append(result) print("Completed fold {0}. Accuracy on Test: {1} for CHAR".format( xfold, result)) print("Completed fold {0}. Accuracy on Test: {1} for CHAR".format( xfold, result), file=log_file_handle) log_file_handle.flush() #continue # Word CNN training and dev if use_word:
val_dl = DataLoader(val_ds, batch_size=batch_size) # initialise grader model = BERTGrader() model.to(device) # Optimizer optimizer = torch.optim.AdamW(model.parameters(), lr=lr, eps=1e-8) # Scheduler scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[sch]) # Criterion criterion = torch.nn.MSELoss(reduction='mean') # Train for epoch in range(epochs): # train for one epoch print('current lr {:.5e}'.format(optimizer.param_groups[0]['lr'])) train(train_dl, model, criterion, optimizer, epoch, device) scheduler.step() # evaluate as we go along eval(val_dl, model, criterion, device) # Save the trained model state = model.state_dict() torch.save(state, out_file)
def main(path=None): if path is None: args = read_arguments() else: args = import_arguments() #### MOVED #### # Update some arguments args.cuda = (not args.no_cuda) and torch.cuda.is_available() del args.no_cuda args.kernel_sizes = [int(k) for k in args.kernel_sizes.split(',')] args.save_dir = os.path.join( args.save_dir, datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) #### MOVED #### #### NEW #### # This uses thesame tokenizer that we used when preprocessing the data (NLTK for tweets) def custom_tokenizer(text, tokenizer=TweetTokenizer(), max_filter_size=max(args.kernel_sizes)): token = tokenizer.tokenize(text) if len(token) < max_filter_size: for i in range(0, max_filter_size - len(token)): token.append('<PAD>') return token #### NEW #### # load data print("\nLoading data...") text_field = data.Field(lower=True, tokenize=custom_tokenizer) label_field = data.Field(sequential=False) #### NEW #### # Loads the relevant dataset if args.data_path is None: train_iter, dev_iter = mr(text_field, label_field, args, device=-1, repeat=False) else: train_iter, dev_iter, test_iter = load_dataset(text_field, label_field, args, device=-1, repeat=False) #### NEW #### #### NEW #### # This part of the code loads the pretrained embeddings if args.embeddings is not None: vectors = vocab.Vectors(name=args.embeddings, cache='./cnn/') text_field.vocab.set_vectors(vectors.stoi, vectors.vectors, vectors.dim) args.text_field = text_field vec = torch.FloatTensor(args.text_field.vocab.vectors).shape[-1] args.embed_dim = vec #### NEW #### # update args and print args.embed_num = len(text_field.vocab) args.class_num = len(label_field.vocab) - 1 print("\nParameters:") for attr, value in sorted(args.__dict__.items()): print("\t{}={}".format(attr.upper(), value)) # model cnn = model.CNN_Text(args) if args.snapshot is not None: print('\nLoading model from {}...'.format(args.snapshot)) cnn.load_state_dict(torch.load(args.snapshot)) if args.cuda: torch.cuda.set_device(args.device) cnn = cnn.cuda() # train or predict if args.predict is not None: label = train.predict(args.predict, cnn, text_field, label_field, args.cuda) print('\n[Text] {}\n[Label] {}\n'.format(args.predict, label)) elif args.test: #try: train.eval(test_iter, cnn, args) #except Exception as e: # print("\nSorry. The test dataset doesn't exist.\n") else: print() try: train.train(train_iter, dev_iter, cnn, args) except KeyboardInterrupt: print('\n' + '-' * 89) print('Exiting from training early')
top10ind[i] = ind break ind += 1 print("Nearer words to ", word_neighbor, " : ") for ind in top10ind: print(ind, " : ", text_field.vocab.itos[ind]) if args.cuda: # Set the device to 1 torch.cuda.set_device(args.device) twitter = twitter.cuda() writer = SummaryWriter(log_dir=args.save_dir) # train or predict if args.predict is not None: label = train.predict(args.predict, twitter, text_field, label_field, args.cuda) print('\n[Text] {}[Label] {}\n'.format(args.predict, label)) elif args.test: try: train.eval(test_iter, twitter, args) except Exception as e: print("\nSorry. The test dataset doesn't exist.\n") else: try: train.train(train_iter, dev_iter, twitter, writer, args) except KeyboardInterrupt: print('-' * 89) print('Exiting from training early')
for attr, value in sorted(args.__dict__.items()): print("\t{}={}".format(attr.upper(), value)) # model cnn = model.CNN_Text(args) if args.snapshot is not None: print('\nLoading model from {}...'.format(args.snapshot)) cnn.load_state_dict(torch.load(args.snapshot)) if args.cuda: torch.cuda.set_device(args.device) cnn = cnn.cuda() # train or predict if args.predict is not None: label = train.predict(args.predict, cnn, text_field, label_field, args.cuda) print('\n[Text] {}\n[Label] {}\n'.format(args.predict, label)) elif args.test: # try: train.eval(test_iter, cnn, args) # except Exception as e: # print("\nSorry. The test dataset doesn't exist.\n") else: print() try: train.train(train_iter, dev_iter, cnn, args) except KeyboardInterrupt: print('\n' + '-' * 89) print('Exiting from training early')
def main(config, download_resources=False, process_data=False, test_size=0.4, model_train=False, model_path=None): """ :param config: :param download_resources: :param process_data: :param test_size: :param model_train: :param model_path: :return: """ if download_resources: utils.download_data() # Get data if config['domain'] == "NER": train_chr, valid_chr, test_chr, train_word, valid_word, test_word, train_label, \ valid_label, test_label, chr_id_mappings, = data_loader.prepare_ner_data( process_data, test_size) else: train_chr, valid_chr, test_chr, train_word, valid_word, test_word, train_label, \ valid_label, test_label, chr_id_mappings, = data_loader.prepare_wjs_data( process_data, test_size) # Update config config['n_classes'] = train_label.shape[2] config['char_vocab_dim'] = len(chr_id_mappings) + 1 config['train_examples'] = train_chr.shape[0] config['validation_examples'] = valid_chr.shape[0] config['test_examples'] = test_chr.shape[0] logging.info("CONFIG:") logging.info("\n".join([k + ": " + str(v) for k, v in config.items()])) model = models.CNN_BILSTM_CRF(config) if model_train: train.train(train_word=train_word, valid_word=valid_word, train_chr=train_chr, valid_chr=valid_chr, train_label=train_label, valid_label=valid_label, num_epochs=config['train_epochs'], model=model, batch_size=config['batch_size'], config=config) # Evaluate at the end logging.info("Evaluating at the TEST set") train.eval(model, test_chr, test_word, test_label, config['batch_size']) else: if model_path: saver = tf.train.Saver() saver.restore(model.sess, model_path) # Test the model on the test set logging.info("Evaluating at the TEST set") train.eval(model, test_chr, test_word, test_label, config['batch_size']) else: print("No trained models exist! You have to train the model first.")