def train_predict(net, train_data, untrain_data, config, device, iteration, pred_probs): config = adjust_config(config, len(train_data[0]), iteration) mu.train(net, train_data, config, device) pred_probs[device] = mu.predict_prob(net, untrain_data, config, device) save_checkpoint({ 'state_dict': net.state_dict(), 'epoch': iteration, }, False, fpath=os.path.join('logs/spaco/%s.epoch%d' % (config.model_name, iteration)))
def train(stock_no, start, end, x_window_size, y_window_size, split_ratio, batch_size, layer_num, hidden_dim, nb_epoch): data, scaler_adjclose, scaler_volume = data_utils.read_data( stock_no, start, end) X, y = data_utils.window_transform_series(data, x_window_size=x_window_size, y_window_size=y_window_size) X_train, X_test, y_train, y_test = data_utils.tts(X, y, split_ratio) filepath = model_utils.model_path(stock_no) model_utils.train(X_train, y_train, X_test, y_test, x_window_size, y_window_size, split_ratio, batch_size, layer_num, hidden_dim, nb_epoch, filepath)
def run_training(settings, data, vocabulary_idx_to_word, vocabulary_word_to_idx, logger, use_cuda): reproduction_command = 'python main.py ' + '-c ' + os.path.join( logger.log_dir, logger.run_name + '.ini') logger.shout(reproduction_command) logger.log('# ' + reproduction_command) logger.log( 'epoch\titeration\tfold\ttrain_loss\ttrain_acc\ttrain_macro_f1\ttrain_macro_f1_main\ttrain_total\tval_loss\tval_acc\tval_macro_f1\tval_macro_f1_main\tval_total\tmodel' ) input_vecs, targets = data_utils.create_input_vectors( data, vocabulary_idx_to_word, vocabulary_word_to_idx) # Compute the class weights if necessary if settings.training.class_weights: class_weights = np.bincount(targets[targets != -1], minlength=settings.model.num_entities) class_weights = 1.0 / ( np.sqrt(class_weights) + 1e-6 ) # 1e-6 for numerical stability (though the inf values wouldn't be used anyway) settings.training.class_weights = class_weights else: settings.training.class_weights = None fold_indices = range(settings.data.folds) if settings.data.folds > 1: folds = data_utils.get_cv_folds(data, settings.data, logger) else: # No cross-validation: train_sequence_bounds = data_utils.get_sequence_bounds( data, settings.data.level) validation_sequence_bounds = [] for fold_idx in fold_indices: # For bookkeeping (logging five folds in one file): logger.fold_idx = fold_idx # Select training and (if cross-validation) validation data: if settings.data.folds > 1: train_sequence_bounds = np.concatenate( tuple(folds[:fold_idx] + folds[fold_idx + 1:])) validation_sequence_bounds = folds[fold_idx] # Initialise model model = models.LSTM_basic(settings.model, padding_idx=data_utils.DUMMY_ENTITY_IDX) if use_cuda: model.cuda() # Train the model last_model, best_model = model_utils.train( model, input_vecs, targets, train_sequence_bounds, validation_sequence_bounds, settings.training, settings.training.no_shuffle, logger) # Save the best model through the logger logger.save_model(best_model)
def run_training(settings, idx_to_word, word_to_idx, word_vectors, training_ids, logger, use_cuda): inputs, targets = data_utils.to_char_tensors(idx_to_word) # fold_indices = range(settings.data.folds) # if settings.data.folds > 1: # pass # TODO divide into folds # else: # # No cross-validation: # val_inputs = torch.from_numpy(np.array([])) # val_targets = torch.from_numpy(np.array([])) # # for fold_idx in fold_indices: # # For bookkeeping (logging five folds in one file): # logger.fold_idx = fold_idx # # # Select training and (if cross-validation) validation data: # if settings.data.folds > 1: # training_domains = folds[:fold_idx]+folds[fold_idx+1:] # inputs = torch.from_numpy(np.concatenate(tuple([domain['X'] for domain in training_domains]))).float() # targets = torch.from_numpy(np.concatenate(tuple([domain['y'] for domain in training_domains]))) # val_inputs = torch.from_numpy(folds[fold_idx]['X']).float() # val_targets = torch.from_numpy(folds[fold_idx]['y']) # Initialise model model = models.CharRNN(settings.model, data_utils.n_characters, torch.Tensor(word_vectors), logger=logger) # CUDAfy model and datasets: if use_cuda: model.cuda() inputs = inputs.cuda() targets = targets.cuda() # val_inputs = val_inputs.cuda() # val_targets = val_targets.cuda() # Train the model last_model, best_model = model_utils.train(model, inputs, targets, training_ids, [], [], idx_to_word, word_to_idx, word_vectors, settings.training, settings.training.no_shuffle, logger=logger) # Save the best model through the logger logger.save_model(best_model) return best_model
def dotrain(): parser = argparse.ArgumentParser(description='Text CNN 分类器') parser.add_argument('--model', type=str, default="model/textcnn.model", help='读取model继续训练') conf = Config() #打印模型配置信息 conf.dump() args = parser.parse_args() if not os.path.isdir("model"): os.mkdir("model") print("处理训练数据") train_iter, text_field, label_field = data_utils.text_dataloader( conf.train_dir, conf.batch_size) #使用pickle保存字典到本地 data_utils.save_vocab(text_field.vocab, "model/text.vocab") data_utils.save_vocab(label_field.vocab, "model/label.vocab") #添加新的配置,嵌入的维度vocab_num, 分类的类别数量class_num, conf.vocab_num = len(text_field.vocab) conf.class_num = len(label_field.vocab) - 1 # 卷积核大小, 代表跨越的句子和字的大小, 找打相邻字直接的联系, 例如[3, 4, 5] conf.kernel_sizes = [int(k) for k in conf.kernel_sizes.split(',')] #模型加载和初始化 if os.path.exists(args.model): print('发现模型文件, 加载模型: {}'.format(args.model)) cnn = torch.load(args.model) else: cnn = model_utils.TextCNN(conf) #模型训练 try: model_utils.train(train_iter, cnn, conf) except KeyboardInterrupt: print('-' * 80) print('提前退出训练.')
# configure the common VAE elements config = { 'dim_x': list(set_info.features['image'].shape), 'num_classes': set_info.features['label'].num_classes, 'dim_z': args.dim_z, 'K': set_info.features['label'].num_classes, 'enc_arch': architectures[args.data_set][arch]['enc_arch'], 'dec_arch': architectures[args.data_set][arch]['dec_arch'], 'learn_rate': architectures[args.data_set][arch]['learn_rate'], 'px_z': data_model, 'covariance_structure': covariance_structure, 'dropout_rate': 0.0, 'save_dir': dir_dim_z } # run training train(method=args.method, config=config, unlabelled_set=unlabelled_set, labelled_set=labelled_set, valid_set=valid_set, test_set=test_set, n_epochs=n_epochs) # reset the graph tf.reset_default_graph() # close all plots plt.close('all')
import jieba_cut import pandas as pd data = pd.read_csv( '/Users/shen-pc/Desktop/WORK/ITS/My method/results/problem_0528_jieba.csv', index_col=0) sim1 = model_utils.cal_cos_sim(data.iloc[5322]['id'], data.iloc[5323]['id']) print('原model的结果[5322 vs 5323]=', sim1) # 来几道新题试一下 data_new = pd.read_csv('/Users/shen-pc/Desktop/WORK/ITS/data/real_item.csv', index_col=0) data_new.rename(columns={'problem_id': 'id'}, inplace=True) data_new = data_new.loc[:20] data_new = utils.pre_processing(data_new) data_new = jieba_cut.cut(data_new) # 加入新数据进行训练: model_new = model_utils.train(data_new) sim2 = model_utils.cal_cos_sim(data.iloc[5322]['id'], data.iloc[5323]['id']) sim3 = model_utils.cal_cos_sim(data.iloc[100]['id'], data.iloc[1000]['id']) print('新model的结果[5322 vs 5323]=', sim2) print('新model的结果[100 vs 1000]=', sim3) # 涉及原本没有的题目: sim4 = model_utils.cal_cos_sim(data.iloc[0]['id'], data_new.iloc[0]['id']) print('新model的结果[old 0 vs new 0]=', sim4) # 最相似: most1 = model_utils.most_similar(data.iloc[0]['id']) most2 = model_utils.most_similar(data_new.iloc[0]['id']) print('\n\n\n\n', data.loc[0, 'cut'], '\n', most1, '\n\n') print(data_new.loc[0, 'cut'], '\n', most2)
help= f'Sizes of hidden layers in model classifier. Can pass multiple arguments. Default: {" ".join([str(_) for _ in def_hidden_units])}.' ) parser.add_argument( '--output_units', nargs='?', default=def_output_units, type=int, help= f'Size of output layer, or number of prediction classes. Default is {def_output_units}.' ) parser.add_argument( '--epochs', nargs='?', default=def_epochs, type=int, help=f'Number of training epochs to run. Default is {def_epochs}.') parser.add_argument('--gpu', action='store_true', help='Pass this flag to use GPU if available.') args = parser.parse_args() print(args) loaders = build_data_loaders(args.data_dir) model = build_model(args.arch, args.hidden_units, args.output_units) best_model = train(model, args.epochs, args.learning_rate, args.gpu, loaders) now = datetime.datetime.strftime(datetime.datetime.now(), '%Y%m%dT%H%M%S') save_checkpoint(f'{args.save_dir}/checkpoint-{args.arch}-{now}.pth', best_model, args.arch)
break summary(model.to(hyperparams["device"]), [input.size()[1:], input2.size()[1:]]) # We would like to use device=hyperparams['device'] altough we have # to wait for torchsummary to be fixed first. if CHECKPOINT is not None: model.load_state_dict(torch.load(CHECKPOINT)) try: train( model, optimizer, loss, train_loader, hyperparams["epoch"], scheduler=hyperparams["scheduler"], device=hyperparams["device"], supervision=hyperparams["supervision"], val_loader=val_loader, display=viz, ) except KeyboardInterrupt: # Allow the user to stop the training pass probabilities = test(model, img1, img2, hyperparams) try: prediction = np.argmax(probabilities, axis=-1) run_results = metrics( prediction,
def cotrain(configs, data, iter_steps=1, train_ratio=0.2, device='cuda:0'): """ cotrain model: params: model_names: model configs data: dataset include train and untrain data save_paths: paths for storing models iter_steps: maximum iteration steps train_ratio: labeled data ratio """ assert iter_steps >= 1 assert len(configs) == 2 train_data, untrain_data = dp.split_dataset( data['train'], seed=args.seed, num_per_class=args.num_per_class) gt_y = data['test'][1] new_train_data = deepcopy(train_data) add_num = 8000 for step in range(iter_steps): pred_probs = [] test_preds = [] add_ids = [] for view in range(2): print('Iter step: %d, view: %d, model name: %s' % (step + 1, view, configs[view].model_name)) configs[view] = adjust_config(configs[view], len(train_data[0]), step) net = models.create(configs[view].model_name).to(device) mu.train(net, new_train_data, configs[view], device) mu.evaluate(net, data['test'], configs[view], device) save_checkpoint( { 'state_dict': net.state_dict(), 'epoch': step + 1, }, False, fpath=os.path.join('logs/cotrain/%s.epoch%d' % (configs[view].model_name, step))) test_preds.append( mu.predict_prob(net, data['test'], configs[view], device)) if len(untrain_data[0]) > configs[view].batch_size: pred_probs.append( mu.predict_prob(net, untrain_data, configs[view], device)) add_ids.append( dp.select_ids(pred_probs[view], train_data, add_num)) # update training data # import pdb;pdb.set_trace() pred_y = np.argmax(sum(pred_probs), axis=1) add_id = np.array(sum(add_ids), dtype=np.bool) fuse_y = np.argmax(sum(test_preds), axis=1) print('Fuse Acc:%0.4f' % np.mean(fuse_y == gt_y)) if args.tricks: new_train_data, _ = dp.update_train_untrain( add_id, train_data, untrain_data, pred_y) add_num += add_num else: if len(untrain_data[0]) < 1: break new_train_data, untrain_data = dp.update_train_untrain( add_id, new_train_data, untrain_data, pred_y)
def spaco(configs, data, iter_steps=1, gamma=0, train_ratio=0.2, regularizer='soft'): """ self-paced co-training model implementation based on Pytroch params: model_names: model names for spaco, such as ['resnet50','densenet121'] data: dataset for spaco model save_pathts: save paths for two models iter_step: iteration round for spaco gamma: spaco hyperparameter train_ratio: initiate training dataset ratio """ num_view = len(configs) train_data, untrain_data = dp.split_dataset( data['train'], seed=args.seed, num_per_class=args.num_per_class) add_num = 4000 pred_probs = [] test_preds = [] sel_ids = [] weights = [] start_step = 0 ########### # initiate classifier to get preidctions ########### for view in range(num_view): configs[view] = adjust_config(configs[view], len(train_data[0]), 0) net = models.create(configs[view].model_name).to(view) mu.train(net, train_data, configs[view], device=view) pred_probs.append(mu.predict_prob(net, untrain_data, configs[view], view)) test_preds.append(mu.predict_prob(net, data['test'], configs[view], view)) acc = mu.evaluate(net, data['test'], configs[view], view) save_checkpoint( { 'state_dict': net.state_dict(), 'epoch': 0, }, False, fpath=os.path.join( 'spaco/%s.epoch%d' % (configs[view].model_name, 0))) pred_y = np.argmax(sum(pred_probs), axis=1) # initiate weights for unlabled examples for view in range(num_view): sel_id, weight = dp.get_ids_weights(pred_probs[view], pred_y, train_data, add_num, gamma, regularizer) import pdb;pdb.set_trace() sel_ids.append(sel_id) weights.append(weight) # start iterative training gt_y = data['test'][1] for step in range(start_step, iter_steps): for view in range(num_view): print('Iter step: %d, view: %d, model name: %s' % (step+1,view,configs[view].model_name)) # update sample weights sel_ids[view], weights[view] = dp.update_ids_weights( view, pred_probs, sel_ids, weights, pred_y, train_data, add_num, gamma, regularizer) # update model parameter new_train_data, _ = dp.update_train_untrain( sel_ids[view], train_data, untrain_data, pred_y, weights[view]) configs[view] = adjust_config(configs[view], len(train_data[0]), step) net = models.create(configs[view].model_name).cuda() mu.train(net, new_train_data, configs[view], device=view) # update y pred_probs[view] = mu.predict_prob(model, untrain_data, configs[view]) # evaluation current model and save it acc = mu.evaluate(net, data['test'], configs[view], device=view) predictions = mu.predict_prob(net, data['train'], configs[view], device=view) save_checkpoint( { 'state_dict': net.state_dict(), 'epoch': step + 1, 'predictions': predictions, 'accuracy': acc }, False, fpath=os.path.join( 'spaco/%s.epoch%d' % (configs[view].model_name, step + 1))) test_preds[view] = mu.predict_prob(model, data['test'], configs[view], device=view) add_num += 4000 * num_view fuse_y = np.argmax(sum(test_preds), axis=1) print('Acc:%0.4f' % np.mean(fuse_y== gt_y))
def train_predict(net, train_data, untrain_data, test_data, config, device, pred_probs): mu.train(net, train_data, config, device) pred_probs.append(mu.predict_prob(net, untrain_data, configs[view], view))
sentiment_polarity_multiple=sentiment_polarity_multiple, nb_classes=nb_classes, use_extra_feature=use_extra_feature, ner_dict_size=ner_dict_size, pos_dict_size=pos_dict_size, ans_max_len=ans_max_len, que_max_len=que_max_len, extra_feature_dim=extra_feature_dim, char_w2v_embedding_matrix_path=char_embedding_matrix_path, word_w2v_embedding_matrix_path=word_embedding_matrix_path) saver = tf.train.Saver() sess = tf.Session() initializer = tf.global_variables_initializer() sess.run(initializer) if os.path.exists(''.join([pretrained_model, '.meta'])): print('load the pre-trained model...') saver.restore(sess, pretrained_model) print('load done...') print('begin training...') train(nb_epoch=nb_epoch, sess=sess, saver=saver, data_stream_train=data_stream_train, data_stream_valid=data_stream_valid, answer_understander_train=answer_understander_train, answer_understander_valid=answer_understander_valid, best_path=best_path) # if __name__ == "__main__": # train_model()
data_proc.to_csv('./results/problem_0528_preprocessing.csv') t1 = time.time() print('文本前处理耗时:', (t1 - t0) / 60, 'min') print('============================文本前处理over============================', '\n\n') ''' ------------------------------------------------------------------------------------------------------------------------ ''' print('============================分词开始============================') data_cut = jieba_cut.cut(data_proc) data_cut.to_csv('./results/problem_0528_jieba.csv') t2 = time.time() print('分词耗时:', (t2 - t1) / 60, 'min') print('============================分词over============================', '\n\n') ''' ------------------------------------------------------------------------------------------------------------------------ ''' print('============================model训练开始============================') d2v = model_utils.train(data_cut) t3 = time.time() print('model训练耗时:', (t3 - t2) / 60, 'min') print('============================model训练over============================', '\n\n') ''' ------------------------------------------------------------------------------------------------------------------------ ''' print('\n\n\n\n') print('共耗时', (time.time() - t0) / 60, 'min')
info.to_csv(f'{out_path}/acc/{sub_name}-same_sample-test.csv', index=False) #========================== For Neural Network =================== elif decoding_method == 'nn': # define params lr = 0.001 n_epoch = 30 train_percentage = 0.8 batch_size = 32 weight_decay = 0 p = 0.5 verbose = False # if True the code will show the loss info in each batch train_size = int(data.shape[0] * train_percentage) val_size = data.shape[0] - train_size # define model and make dataset # model = VisualNet(p, selected_voxel=stability_idx) model = VisualNet_simple(p, n_hidden=20) dataset = Dataset(data, label) train_set, val_set = random_split(dataset, [train_size, val_size]) # train model model_params, train_acc, train_loss, val_acc, val_loss = \ train(model, train_set, val_set, batch_size, n_epoch, lr, weight_decay) # save and plot info plot_training_curve(n_epoch, train_acc, train_loss, val_acc, val_loss, flag=sub_name + '_simple') # torch.save(model_params, pjoin(out_path, 'visual_nn.pkl'))
def run_training(model, cfg, test_features, test_labels, train_data, train_labels, val_data, val_labels): tmp_run_path = MODEL_PATH + "/tmp_" + get_datetime() model_weights_path = "{}/{}".format(tmp_run_path, cfg.model_weights_name) model_config_path = "{}/{}".format(tmp_run_path, cfg.model_config_name) result_path = "{}/result.txt".format(tmp_run_path) os.makedirs(tmp_run_path, exist_ok=True) json.dump(cfg.to_json(), open(model_config_path, "w")) """Defining loss and optimizer""" optimizer = torch.optim.Adam(model.parameters(), lr=cfg.lr) criterion = torch.nn.CrossEntropyLoss() criterion = criterion.to(get_device()) """Creating data generators""" test_iterator = BatchIterator(test_features, test_labels) train_iterator = BatchIterator(train_data, train_labels, cfg.batch_size) validation_iterator = BatchIterator(val_data, val_labels) train_loss = 999 best_val_loss = 999 train_acc = 0 epochs_without_improvement = 0 writer = SummaryWriter() """Running training""" for epoch in range(cfg.n_epochs): train_iterator.shuffle() if epochs_without_improvement == cfg.patience: break val_loss, val_cm = evaluate(model, validation_iterator, criterion) if val_loss < best_val_loss: torch.save(model.state_dict(), model_weights_path) best_val_loss = val_loss best_val_acc = val_cm.accuracy best_val_unweighted_acc = val_cm.unweighted_accuracy epochs_without_improvement = 0 log_success( " Epoch: {} | Val loss improved to {:.4f} | val acc: {:.3f} | weighted val acc: {:.3f} | train loss: {:.4f} | train acc: {:.3f} | saved model to {}." .format(epoch, best_val_loss, best_val_acc, best_val_unweighted_acc, train_loss, train_acc, model_weights_path)) train_loss, train_cm = train(model, train_iterator, optimizer, criterion, cfg.reg_ratio) train_acc = train_cm.accuracy writer.add_scalars('all/losses', { "val": val_loss, "train": train_loss }, epoch) writer.add_scalars('all/accuracy', { "val": val_cm.accuracy, "train": train_cm.accuracy }, epoch) writer.add_scalars( 'all/unweighted_acc', { "val": val_cm.unweighted_accuracy, "train": train_cm.unweighted_accuracy }, epoch) writer.add_scalar('val/loss', val_loss, epoch) writer.add_scalar('val/val_acc', val_cm.accuracy, epoch) writer.add_scalar('val/val_unweighted_acc', val_cm.unweighted_accuracy, epoch) writer.add_scalar('train/loss', train_loss, epoch) writer.add_scalar('train/train_acc', train_cm.accuracy, epoch) writer.add_scalar('train/train_unweighted_acc', train_cm.unweighted_accuracy, epoch) epochs_without_improvement += 1 if not epoch % 1: log( f'| Epoch: {epoch+1} | Val Loss: {val_loss:.3f} | Val Acc: {val_cm.accuracy*100:.2f}% ' f'| Train Loss: {train_loss:.4f} | Train Acc: {train_acc*100:.3f}%', cfg.verbose) model.load_state_dict(torch.load(model_weights_path)) test_loss, test_cm = evaluate(model, test_iterator, criterion) result = f'| Epoch: {epoch+1} | Test Loss: {test_loss:.3f} | Test Acc: {test_cm.accuracy*100:.2f}% | Weighted Test Acc: {test_cm.unweighted_accuracy*100:.2f}%\n Confusion matrix:\n {test_cm}' log_major("Train acc: {}".format(train_acc)) log_major(result) log_major("Hyperparameters:{}".format(cfg.to_json())) with open(result_path, "w") as file: file.write(result) writer.export_scalars_to_json("./all_scalars.json") writer.close() output_path = "{}/{}_{:.3f}Acc_{:.3f}UAcc_{}".format( MODEL_PATH, cfg.model_name, test_cm.accuracy, test_cm.unweighted_accuracy, strftime("%Y-%m-%d_%H:%M:%S", gmtime())) os.rename(tmp_run_path, output_path) return test_loss