def evaluate(args, model, device, loader, training=False): global BEST_ACC model.eval() correct1 = 0 correct5 = 0 tsize = 0 with torch.no_grad(): for data, target in loader: data, target = data.to(device, torch.float), target.to(device, torch.long) output = model(data) # topk accuracy c1, c5 = util.accuracy(output.data, target, topk=(1, 5)) correct1 += c1 correct5 += c5 tsize += target.size(0) if training: model.train() acc1 = 100. * correct1 / tsize acc5 = 100. * correct5 / tsize if (acc1 > BEST_ACC): BEST_ACC = acc1.item() if training: # storing the continuous weights of the best model, done separately from checkpoint! util.save_model( { 'state_dict': model.state_dict(), 'best_acc1': BEST_ACC }, args.save_name) return acc1.item(), acc5.item()
def trainer(self): model = self.Model(self.args).to(self.args.device) self.optimizer = self._get_optimizer(model=model, optimizer=self.args.optimizer) print("Model: ", model) best_val_acc = 0.0 best_val_loss = 100000000 idx = 0 for epoch in range(self.args.epochs): start_time = time.time() train_loss, train_accuracy = self.run_train(model=model) val_loss, val_accuracy = self.run_eval(model=model) if best_val_loss > val_loss: best_val_loss = val_loss best_val_acc = val_accuracy idx = epoch save_model(model, self.args) print( '-' * 22, ' epoch: {:3d} / {:3d} - time: {:5.2f}s '.format( epoch, self.args.epochs, time.time() - start_time), '-' * 22) #Train print('| Train | loss {:.4f} | acc {:.2f}%' ' |'.format(train_loss, train_accuracy * 100)) print('| Valid | loss {:.4f} | acc {:.2f}%' ' |'.format(val_loss, val_accuracy * 100)) sys.stdout.flush() print('BEST RESULTS') print('| Valid | epoch {:3d} | acc {:.2f}%' ' |'.format(idx, best_val_acc * 100)) sys.stdout.flush()
def step(self) -> bool: """Train until the next checkpoint, and evaluate. Returns ------ bool Whether the computable has completed. """ self.train_step() self.eval_step() # Simple stopping rule, if we exceed the max number of steps self._step += 1 done = self._step >= self.epochs if done: model_name = "model.pt" self.model.load_state_dict(self._best_model) # Save metrics with open(os.path.join(self.log_dir, "metrics.json"), "w") as f: json.dump(self._log, f, indent=4, sort_keys=True) else: model_name = f"model_{self._step - 1}.pt" # Save model save_model(self.model, os.path.join(self.log_dir, model_name)) return done
def main(): args = parse_args() update_config(args.cfg) # create output directory if cfg.BASIC.CREATE_OUTPUT_DIR: out_dir = os.path.join(cfg.BASIC.ROOT_DIR, cfg.TRAIN.MODEL_DIR) if not os.path.exists(out_dir): os.makedirs(out_dir) # copy config file if cfg.BASIC.BACKUP_CODES: backup_dir = os.path.join(cfg.BASIC.ROOT_DIR, cfg.TRAIN.MODEL_DIR, 'code') backup_codes(cfg.BASIC.ROOT_DIR, backup_dir, cfg.BASIC.BACKUP_LISTS) fix_random_seed(cfg.BASIC.SEED) if cfg.BASIC.SHOW_CFG: pprint.pprint(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK cudnn.deterministic = cfg.CUDNN.DETERMINISTIC cudnn.enabled = cfg.CUDNN.ENABLE # data loader train_dset = TALDataset(cfg, cfg.DATASET.TRAIN_SPLIT) train_loader = DataLoader(train_dset, batch_size=cfg.TRAIN.BATCH_SIZE, shuffle=True, drop_last=False, num_workers=cfg.BASIC.WORKERS, pin_memory=cfg.DATASET.PIN_MEMORY) val_dset = TALDataset(cfg, cfg.DATASET.VAL_SPLIT) val_loader = DataLoader(val_dset, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, drop_last=False, num_workers=cfg.BASIC.WORKERS, pin_memory=cfg.DATASET.PIN_MEMORY) model = LocNet(cfg) model.apply(weight_init) model.cuda() optimizer = optim.Adam(model.parameters(), lr=cfg.TRAIN.LR) for epoch in range(cfg.TRAIN.BEGIN_EPOCH, cfg.TRAIN.END_EPOCH + 1): loss_train = train(cfg, train_loader, model, optimizer) print('epoch %d: loss: %f' % (epoch, loss_train)) with open(os.path.join(cfg.BASIC.ROOT_DIR, cfg.TRAIN.LOG_FILE), 'a') as f: f.write("epoch %d, loss: %.4f\n" % (epoch, loss_train)) # decay lr if epoch in cfg.TRAIN.LR_DECAY_EPOCHS: decay_lr(optimizer, factor=cfg.TRAIN.LR_DECAY_FACTOR) if epoch in cfg.TEST.EVAL_INTERVAL: save_model(cfg, epoch=epoch, model=model, optimizer=optimizer) out_df_ab, out_df_af = evaluation(val_loader, model, epoch, cfg) out_df_list = [out_df_ab, out_df_af] final_result_process(out_df_list, epoch, cfg, flag=0)
def save_model(self, model_file): data = { "word2id": self.word2id, "label2id": self.label2id, "initial_proba": self.initial_proba, "observation_proba": self.observation_proba, "transition_proba": self.transition_proba, } save_model(model_file, data)
def main(): print(tf.__version__) print("GPU Available: ", tf.test.is_gpu_available()) results_dict = {'eval_rewards': []} args = create_argument_parser().parse_args() segrot, states, markpos = get_data(file=args.expert_file) actions = get_actions_from_segrot(segrot) action_dim = actions.shape[1] state_dim = states.shape[1] args.action_dim = action_dim args.state_dim = state_dim if args.curtail_length: states = states[0:args.curtail_length + 1] actions = actions[0:args.curtail_length + 1] num_states = states.shape[0] num_train = int(0.9 * num_states) num_test = num_states - num_train train_states = states[1:num_train] train_actions = actions[1:num_train] test_states = states[-num_test:] test_actions = actions[-num_test:] base_dir = os.getcwd() + '/models/IDPAgent/' run_number = 0 while os.path.exists(base_dir + str(run_number)): run_number += 1 base_dir = base_dir + str(run_number) os.makedirs(base_dir) idp_agent = IDPAgent(**args.__dict__) for epoch in trange(args.epochs): train(idp_agent, train_states, train_actions, args.batch_size) eval_rewards = evaluate_policy(idp_agent, test_states, test_actions, args.eval_episodes, args.batch_size) eval_reward = sum(eval_rewards) / args.eval_episodes eval_variance = float(np.var(eval_rewards)) results_dict['eval_rewards'].append({ 'total_steps': epoch * states.shape[0], 'epoch': epoch, 'average_eval_reward': eval_reward, 'eval_reward_variance': eval_variance }) with open(args.results_file, 'w') as file: file.write(json.dumps(results_dict['eval_rewards'])) utils.save_model(idp_agent.actor, base_dir)
def train_resnet80(model, pretrain_loader, val_loader): global lr global best_prec1 lr = params.base_lr # model = construct_premodel(model, params) # model = construct_resnet18(model, params) model.train() optimizer = torch.optim.Adam(list(model.parameters()), lr=params.base_lr, betas=(0.9, 0.99)) criterion = nn.CrossEntropyLoss().cuda() for epoch in range(params.start_epoch, params.start_epoch + params.num_epochs): adjust_learning_rate(optimizer, epoch, params.base_lr) # train for one epoch # train_batch(train_loader, model, criterion, optimizer, epoch) for step, (images, labels) in enumerate(pretrain_loader): # make images and labels variable images = make_variable(images) labels = make_variable(labels.squeeze_()) # zero gradients for optimizer optimizer.zero_grad() # compute loss for critic preds = model(images) loss = criterion(preds, labels) # optimize source classifier loss.backward() optimizer.step() # print step info if ((step + 1) % params.log_step_pre == 0): print("Epoch [{}/{}] Step [{}/{}]: loss={}".format( epoch + 1, params.num_epochs, step + 1, len(pretrain_loader), loss.item())) if ((epoch + 1) % params.eval_step_pre == 0): eval_pretrain(model, val_loader) # save model parameters if ((epoch + 1) % params.save_step_pre == 0): save_model(model, "Resnet18-{}.pt".format(epoch + 1)) # # save final model save_model(model, "Resnet18-final.pt") return model
def train(model, criterion, optimiser, train_iterator): model.train() total_correct = 0 total_batches = len(train_iterator.data()) // train_iterator.batch_size model_predictions = [] true_labels = [] for epoch in range(config['num_epochs']): pbar = tqdm(total=total_batches) train_loss = 0 epoch_correct = 0 train_loss = 0 epoch_predictions = 0 for i, batch in enumerate(train_iterator): predictions = model(batch.chars) # forward pass loss = criterion(predictions, batch.label) train_loss += loss.item() label_pred = [ np.argmax(p) for p in predictions.cpu().detach().numpy() ] true_labels = true_labels + batch.label.cpu().detach().tolist() model_predictions = model_predictions + label_pred for p, tp in zip(label_pred, batch.label.cpu().detach().tolist()): epoch_predictions += 1 if p == tp: total_correct += 1 epoch_correct += 1 pbar.set_description( f'{str(optimiser.param_groups[0]["lr"])} - ' + f'{epoch + 1}/{config["num_epochs"]} ' + f'Loss: {train_loss / (i + 1):.7f} ' + f'Acc: {epoch_correct / epoch_predictions:.7f} ' + f'F1: {f1_score(true_labels, model_predictions, average="macro"):.7f} ' + f'Total correct {total_correct} out of {len(model_predictions)}' ) # Backward and optimize optimiser.zero_grad() loss.backward() optimiser.step() pbar.update(1) # print(f'{optimiser.param_groups["lr"]}') # optimiser = adjust_learning_rate(optimiser, epoch) if (epoch + 1) % 10 == 0: utils.save_model(f'modelCharCNN_large_lr_{epoch}.ckpt', charCNNModel)
def main(): data = open_data() vectorizer, matrix = vectorize_articles(data[TEXT_COL]) save_model(vectorizer, 'mbti_tfidf_{}'.format(TFIDF_MAX_FEATURES)) features = vectorizer.get_feature_names() save_textfile(features) df = convert_sparse_mat_to_df(matrix, features) df.to_csv(VECTORIZED_PATH, encoding='utf-8')
def train(config, train_sentences, dev_sentences, test_sentences, char_to_id, feature_to_id, target_to_id, id_to_char, id_to_target, logger): # prepare data, get a collection of list containing index train_data = prepare_dataset(train_sentences, char_to_id, target_to_id, feature_to_id, config.lower) dev_data = prepare_dataset(dev_sentences, char_to_id, target_to_id, feature_to_id, config.lower) test_data = prepare_dataset(test_sentences, char_to_id, target_to_id, feature_to_id, config.lower) print("%i / %i / %i sentences in train / dev / test." % (len(train_data), len(dev_sentences), len(test_data))) train_batch_generator = BatchGenerator(train_data, config.batch_size) dev_batch_generator = BatchGenerator(dev_data, 100) test_batch_generator = BatchGenerator(test_data, 100) # limit GPU memory tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True # 一个epoch的batch数 steps_per_epoch = train_batch_generator.len_data with tf.Session(config=tf_config) as sess: model = create_model(sess, BiLSTMModel, config, load_word2vec, id_to_char, logger) logger.info("start training") loss = [] for i in range(model.config.max_epoch): for batch in train_batch_generator.iter_batch(shuffle=True): step, batch_loss = model.run_step(sess, True, batch) loss.append(batch_loss) if step % model.config.steps_check == 0: iteration = step // steps_per_epoch + 1 logger.info("epoch iteration: {}, step: {}/{}, " "NER loss: {:>9.6f}".format( iteration, step % steps_per_epoch, steps_per_epoch, np.mean(loss))) loss = [] # 评估在验证集(dev)上F1值是否有提升,返回值为布尔型 best = evaluate(sess, model, "dev", dev_batch_generator, id_to_target, logger) if best: save_model(sess, model, model.config.ckpt_path, logger) # 跑完一个epoch就用测试集(test)对模型进行评估 evaluate(sess, model, "test", test_batch_generator, id_to_target, logger) print("final best dev f1 score: " + str(model.best_dev_f1.eval())) print("final best test f1 score: " + str(model.best_test_f1.eval()))
def train_src(model, params, data_loader, device): """Train classifier for source domain.""" #################### # 1. setup network # #################### # set train state for Dropout and BN layers model.train() # setup criterion and optimizer optimizer = optim.Adam(model.parameters(), lr=params.lr) loss_class = nn.NLLLoss() #################### # 2. train network # #################### for epoch in range(params.num_epochs_src): for step, (images, labels) in enumerate(data_loader): # make images and labels variable images = images.to(device) labels = labels.squeeze_().to(device) # zero gradients for optimizer optimizer.zero_grad() # compute loss for critic preds = model(images) loss = loss_class(preds, labels) # optimize source classifier loss.backward() optimizer.step() # print step info if ((step + 1) % params.log_step_src == 0): print("Epoch [{}/{}] Step [{}/{}]: loss={}".format(epoch + 1, params.num_epochs_src, step + 1, len(data_loader), loss.data[0])) # eval model on test set if ((epoch + 1) % params.eval_step_src == 0): eval(model, data_loader, flag='source') model.train() # save model parameters if ((epoch + 1) % params.save_step_src == 0): save_model(model, params.src_dataset + "-source-classifier-{}.pt".format(epoch + 1)) # save final model save_model(model, params.src_dataset + "-source-classifier-final.pt") return model
def main(): env = gym.make('CartPole-v1') model = PPO() score = 0.0 print_interval = 20 log = Log(__file__[:-3]) experiment = Experiment(api_key="F8yfdGljIExZoi73No4gb1gF5", project_name="reinforcement-learning", workspace="zombasy") experiment.set_model_graph(model) for n_epi in range(2000): s = env.reset() done = False epsilon = max(0.01, args.epsilon - 0.01 * (n_epi / 200)) while not done: for t in range(args.T_horizon): prob = model.pi(torch.from_numpy(s).float()) m = Categorical(prob) a = m.sample().item() coin = random.random() if coin < epsilon: a = random.randint(0, 1) s_prime, r, done, info = env.step(a) model.put_data( (s, a, r / 100.0, s_prime, prob[a].item(), done)) s = s_prime score += r if done: break model.train_net() if n_epi % print_interval == 0 and n_epi != 0: log.info("episode :{}, avg score : {:.1f}".format( n_epi, score / print_interval)) experiment.log_metric('score', score / print_interval) experiment.log_metric('epsilon', epsilon) score = 0.0 if n_epi % 500 == 0 and n_epi != 0: save_model(model, 'ppo', n_epi, experiment) env.close()
def get_model(state, args, init_model_name=None): if init_model_name is not None and os.path.exists(init_model_name): model, optimizer, state = load_model(init_model_name, return_optimizer=True, return_state=True) else: if "conv_dropout" in args: conv_dropout = args.conv_dropout else: conv_dropout = cfg.conv_dropout cnn_args = {1} if args.fixed_segment is not None: frames = cfg.frames else: frames = None nb_layers = 4 cnn_kwargs = { "activation": cfg.activation, "conv_dropout": conv_dropout, "batch_norm": cfg.batch_norm, "kernel_size": nb_layers * [3], "padding": nb_layers * [1], "stride": nb_layers * [1], "nb_filters": [16, 16, 32, 65], "pooling": [(2, 2), (2, 2), (1, 4), (1, 2)], "aggregation": args.agg_time, "norm_out": args.norm_embed, "frames": frames, } nb_frames_staying = cfg.frames // (2**2) model = CNN(*cnn_args, **cnn_kwargs) # model.apply(weights_init) state.update({ 'model': { "name": model.__class__.__name__, 'args': cnn_args, "kwargs": cnn_kwargs, 'state_dict': model.state_dict() }, 'nb_frames_staying': nb_frames_staying }) if init_model_name is not None: save_model(state, init_model_name) pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad) LOG.info( "number of parameters in the model: {}".format(pytorch_total_params)) return model, state
def hmm_train_eval(train_data, test_data, word2id, tag2id, remove_O=False): # data train_word_lists, train_tag_lists = train_data test_word_lists, test_tag_lists = test_data # training hmm_model = HMM(len(tag2id), len(word2id)) hmm_model.train(train_word_lists, train_tag_lists, word2id, tag2id) save_model(hmm_model, "./ckpts/hmm.pkl") # evaluating pred_tag_lists = hmm_model.test(test_word_lists, word2id, tag2id) results_print(test_tag_lists, pred_tag_lists, remove_O=remove_O) return pred_tag_lists
def crf_train_eval(train_data, test_data, remove_O=False): train_word_lists, train_tag_lists = train_data test_word_lists, test_tag_lists = test_data model_file = "./ckpts/crf.pkl" crf_model = CRFModel() crf_model.train(train_word_lists, train_tag_lists) save_model(crf_model, model_file) # crf_model = load_model(model_file) pred_tag_lists = crf_model.test(test_word_lists) results_print(test_tag_lists, pred_tag_lists, remove_O=remove_O) return pred_tag_lists
def evaluate(args, amodel, model, device, loader, training=False, beta=1., summary_writer=None, iterations=None): """ evaluate the model given data """ global BEST_ACC model.eval() correct1 = 0 correct5 = 0 tsize = 0 if training: # store aux-weights amodel.store(model) doround(args, model) if summary_writer is not None: for name, param in model.named_parameters(): summary_writer.add_histogram(name, param.clone().cpu().data.numpy(), iterations) plus_ones = 0 minus_ones = 0 for name, param in model.named_parameters(): plus_ones += torch.sum(param==1) minus_ones += torch.sum(param==-1) summary_writer.add_scalar('plus_ones', int(plus_ones), iterations) summary_writer.add_scalar('minus_ones', int(minus_ones), iterations) with torch.no_grad(): for data, target in loader: data, target = data.to(device, torch.float), target.to(device, torch.long) output = model(data) # topk accuracy c1, c5 = util.accuracy(output.data, target, topk=(1, 5)) correct1 += c1 correct5 += c5 tsize += target.size(0) if training: # restore aux-weights amodel.restore(model) if summary_writer is not None: for name, param in model.named_parameters(): summary_writer.add_histogram(name + '_unquantized', param.clone().cpu().data.numpy(), iterations, bins=1000) model.train() acc1 = 100. * correct1 / tsize acc5 = 100. * correct5 / tsize if (acc1 > BEST_ACC): BEST_ACC = acc1.item() if training: # storing the continuous weights of the best model, done separately from checkpoint! util.save_model({'state_dict': model.state_dict(), 'best_acc1': BEST_ACC, 'beta': beta}, args.save_name) return acc1.item(), acc5.item()
def save_last_model_and_exit(_1, _2): global model_already_saved if model_already_saved: return model_already_saved = True if os.getpid( ) == parent_pid: # otherwise, dataloader workers will try to save the model too! logger.info("Interrupted, saving the current model") save_model(training_module, optimizer_G, optimizer_D, args) # protect from Tensorboard's "Unable to get first event timestamp # for run `...`: No event timestamp could be found" if writer is not None: writer.close() sys.exit()
def run(self): """ Main Training Loop. """ print('\n======== START TRAINING: {} ========\n'.format( datetime.datetime.now().strftime("%d-%m-%y_%H:%M:%S"))) random.shuffle( self.data['train']) # shuffle training data at least once best_dev_f1 = -1.0 final_best_theta = 0.5 for epoch in range(1, self.epoch + 1): train_f1 = self.train_epoch(epoch) if epoch < self.params['init_train_epochs']: continue if epoch % self.test_epoch == 0: # dev_f1是经过theta调整的 dev_f1, zj_f1, theta, p, r = self.eval_epoch() if dev_f1 > best_dev_f1: best_dev_f1 = dev_f1 final_best_theta = theta best_train_f1 = train_f1 print("dev f1=%f, save model" % dev_f1) print("zj_f1 f1=%f, save model" % zj_f1) # print("f1_score_t f1=%f, save model" % f1_score_t) loaderTemp = self.loader save_model(self.model_folder, self, loaderTemp) if self.es: best_epoch, stop = self.early_stopping(epoch) if stop: break if self.es and (epoch != self.epoch): print('Best epoch: {}'.format(best_epoch)) self.eval_epoch(final=True, save_predictions=True) self.best_epoch = best_epoch elif epoch == self.epoch: self.eval_epoch(final=True, save_predictions=True) print('\n======== END TRAINING: {} ========\n'.format( datetime.datetime.now().strftime("%d-%m-%y_%H:%M:%S"))) with open(self.ok_file, "w") as f: f.write(str(best_dev_f1) + '\t' + str(final_best_theta))
def train(): print("Start init...") train_loader, test_loader = make_dataset() model = make_network() network = model() criterion = make_loss() network.cuda() optimizer = make_optim(network, cfg.warm_lr) print("Start training...") for epoch in range(cfg.epoch): start_time = time.time() if epoch + 1 == cfg.warm_up: optimizer = make_optim(network, cfg.lr) if (epoch + 1) in cfg.milestones: cfg.lr /= 10 optimizer = make_optim(network, cfg.lr) train_base(network, criterion, train_loader, optimizer, epoch) end_time = time.time() used_time = end_time - start_time _, u_minute, u_second = time2hour(used_time) l_hour, l_minute, l_second = time2hour(used_time * (cfg.epoch - epoch - 1)) print("Finish one epoch in %dm: %ds, and %dh: %dm: %ds left." % (u_minute, u_second, l_hour, l_minute, l_second)) train_base(network, criterion, test_loader, optimizer, epoch, mode='test') train_acc = cal_acc(network, train_loader, 'train') test_acc = cal_acc(network, test_loader, 'test') cfg.train_acc.append(train_acc) cfg.val_acc.append(test_acc) if (epoch + 1) % 10 == 0: save_model(network, optimizer, epoch + 1, cfg.model_dir) save_model(network, optimizer, cfg.epoch, cfg.model_dir) print('Finish Training') save_result()
def bilstm_train_and_eval(train_data, dev_data, test_data, word2id, tag2id, crf=True, remove_O=False, reload_model=False): # data train_word_lists, train_tag_lists = train_data dev_word_lists, dev_tag_lists = dev_data test_word_lists, test_tag_lists = test_data # training start = time.time() vocab_size = len(word2id) out_size = len(tag2id) # get model_file if crf: model_name = "bilstm_crf" else: model_name = "bilstm" emb_size = LSTMConfig.emb_size hidden_size = LSTMConfig.hidden_size model_file = "./weights/" + model_name + '_' + str(emb_size) + '_' + str( hidden_size) + ".pkl" if reload_model: # reload trained model! bilstm_model = load_model(model_file) else: # train and save model! bilstm_model = BILSTM_Model(vocab_size, out_size, crf=crf) bilstm_model.train(train_word_lists, train_tag_lists, dev_word_lists, dev_tag_lists, word2id, tag2id) save_model( bilstm_model, model_file ) # re-thinking when to save the model? after valid for each epoch? print("Training finished, taken {} seconds!".format( int(time.time() - start))) print("Evaluating {} model:".format(model_name)) pred_tag_lists, test_tag_lists = bilstm_model.test(test_word_lists, test_tag_lists, word2id, tag2id) results_print(test_tag_lists, pred_tag_lists, remove_O=remove_O) return pred_tag_lists
def main(**kwargs): options._parse(kwargs) opt = options torch.manual_seed(317) print('Setting up data...') transforms = T.Compose([T.ToTensor()]) dataset = MotDataset(opt, (640, 480), augment=True, transforms=transforms) opt = opt.update_dataset_info_and_set_heads(opt, dataset) print(opt) os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus opt.device = torch.device('cuda' if opt.gpus >= '0' else 'cpu') print('Creating model...') model = create_model('dla_34', opt.heads, opt.head_conv) optimizer = torch.optim.Adam(model.parameters(), opt.lr) start_epoch = 0 # Get dataloader train_loader = torch.utils.data.DataLoader( dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True ) print('Starting training...') trainer = BaseTrainer(opt, model, optimizer) trainer.set_device(opt.gpus, -1, opt.device) if opt.multi_load_model != '': model, optimizer = load_model(model, opt.multi_load_model, trainer.optimizer) best = 1e10 for epoch in range(start_epoch + 1, opt.num_epochs + 1): log_dict_train, _ = trainer.train(epoch, train_loader) if epoch % opt.save_every == 0: lr = opt.lr * 0.5 for param_group in optimizer.param_groups: param_group['lr'] = lr save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer)
def evaluate(args, amodel, model, device, loader, training=False, beta=1.0): """evaluate the model given data""" global BEST_ACC model.eval() correct1 = 0 correct5 = 0 tsize = 0 if training: # store aux-weights amodel.store(model) # projection and rounding simplex(model, device, projection=args.projection, beta=beta) doround(model, device, scheme=args.rounding) with torch.no_grad(): for data, target in loader: data, target = data.to(device, torch.float), target.to(device, torch.long) output = model(data) # topk accuracy c1, c5 = util.accuracy(output.data, target, topk=(1, 5)) correct1 += c1 correct5 += c5 tsize += target.size(0) if training: # restore aux-weights amodel.restore(model) model.train() acc1 = 100.0 * correct1 / tsize acc5 = 100.0 * correct5 / tsize if acc1 > BEST_ACC: BEST_ACC = acc1.item() if ( training ): # storing the continuous weights of the best model, done separately from checkpoint! util.save_model( { "state_dict": model.state_dict(), "best_acc1": BEST_ACC, "beta": beta }, args.save_name, ) return acc1.item(), acc5.item()
def main(): args = parse_args() update_config(args.cfg) fix_random_seed(cfg.SEED) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK cudnn.deterministic = cfg.CUDNN.DETERMINISTIC cudnn.enabled = cfg.CUDNN.ENABLE # prepare output directory output_dir = os.path.join(cfg.ROOT_DIR, cfg.TRAIN.MODEL_DIR) if not os.path.exists(output_dir): os.makedirs(output_dir) # copy config ssad.yaml file to output directory cfg_file = os.path.join(output_dir, args.cfg.split('/')[-1]) shutil.copyfile(args.cfg, cfg_file) # data loader # Notice: we discard the last data train_dset = TALDataset(cfg, cfg.DATASET.TRAIN_SPLIT) train_loader = DataLoader(train_dset, batch_size=cfg.TRAIN.BATCH_SIZE, shuffle=True, drop_last=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) val_dset = TALDataset(cfg, cfg.DATASET.VAL_SPLIT) val_loader = DataLoader(val_dset, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, drop_last=False, num_workers=cfg.WORKERS, pin_memory=cfg.PIN_MEMORY) model = LocNet(cfg) model.apply(weight_init) model.cuda() optimizer = optim.Adam(model.parameters(), lr=cfg.TRAIN.LR) for epoch in range(cfg.TRAIN.BEGIN_EPOCH, cfg.TRAIN.END_EPOCH + 1): if epoch in cfg.TRAIN.LR_DECAY_EPOCHS: decay_lr(optimizer, factor=cfg.TRAIN.LR_DECAY_FACTOR) loss_train = train(cfg, train_loader, model, optimizer) print('epoch %d: loss: %f' % (epoch, loss_train)) with open(os.path.join(cfg.ROOT_DIR, cfg.TRAIN.LOG_FILE), 'a') as f: f.write("epoch %d, loss: %.4f\n" % (epoch, loss_train)) if epoch % cfg.TEST.EVAL_INTERVAL == 0: # model weight_file = save_model(cfg, epoch=epoch, model=model, optimizer=optimizer) out_df_af, out_df_ab = evaluation(val_loader, model, epoch, cfg) out_df_ab['conf'] = out_df_ab['conf'] * cfg.TEST.CONCAT_AB out_df = pd.concat([out_df_af, out_df_ab]) post_process(out_df, epoch, cfg, is_soft_nms=False)
def main(): params = get_params() set_random_seed(params.RANDOM_SEED) parse_data() data = DatasetNorm('cutted_data') train_set, test_set = torch.utils.data.random_split( data, [data.__len__() - 100, 100]) trainloader = DataLoader(dataset=train_set, batch_size=params.BATCH_SIZE, shuffle=True, num_workers=8) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") tcnn = TempoCNN().to(device) wandb.init(project="tcnn") config = wandb.config config.learning_rate = 0.001 wandb.watch(tcnn) if not params.LOAD_MODEL: model = train(tcnn, trainloader) save_model(model) else: model = load_model().to(device) testloader = DataLoader(dataset=test_set, batch_size=params.BATCH_SIZE, shuffle=True) iters = 0 loss = 0.0 cr_loss = nn.BCELoss() for i, data in enumerate(testloader, 0): tcnn.eval() mels, labels = data[0].to(device), data[1].to(device) pred = model(mels.unsqueeze(-1).permute(0, 3, 1, 2)).to('cpu').detach() res = accuracy(pred, labels) print(res) loss += cr_loss(pred.float(), labels.float().to('cpu').detach()).item() iters += 1 print(loss / iters)
def train(loader, D, G, optim_D, optim_G, criterion): G_losses = [0] D_losses = [0] timer = Timer() for i in range(1, config.num_epoch + 1): iters = 0 for data in loader: current_size = data.size(0) labels0 = torch.tensor([0] * current_size).to( config.device, torch.long) labels1 = torch.tensor([1] * current_size).to( config.device, torch.long) noise = torch.randn( (current_size, config.latent_size, 1, 1)).to(config.device) D_loss = D_train(data, D, G, optim_D, criterion, current_size, labels0, labels1, noise) G_loss = G_train(D, G, optim_G, criterion, current_size, labels0, labels1, noise) iters += 1 D_losses.append(D_loss) G_losses.append(G_loss) if iters % config.log_iter == 0: timer.save_batch_time() log_batch_history(i, iters, len(loader), D_losses, G_losses, timer) save_model(i, G, optim_G, D, optim_D) timer.save_epoch_time() log_epoch_history(i, len(loader), D_losses, G_losses, timer) if i % config.make_img_samples == 0: for x in range(5): make_img_samples(G)
def export_pb_file(params, output_path, pb_fname): """ 将模型frozen之后到处成PB文件 :param params: 训练所用参数 :param output_path: 输出的目录 :param pb_fname: 输出的PB文件名 """ num_classes = params['num_classes'] ckpt_dir = params['checkpoint_path'] graph = tf.Graph() with graph.as_default(): images = tf.placeholder(tf.float32, shape=[None, 299, 299, 3], name='input') logits = dishnet.ss_dishnet(images, num_classes, False, scope='') out_tensor = tf.nn.softmax(logits, name='output') saved_ckpt = tf.train.latest_checkpoint(ckpt_dir) assert saved_ckpt is not None with tf.Session(graph=graph) as sess: saver = tf.train.Saver() saver.restore(sess, saved_ckpt) if pb_fname.endswith('.pb'): write_pb(sess, graph, output_path, pb_fname) else: save_model(sess, images, out_tensor, os.path.join(output_path, pb_fname))
def main( df_path: str = '/project/cq-training-1/project1/data/catalog.helios.public.20100101-20160101.pkl', image_size: int = 32, model: str = 'dummy', epochs: int = 20, optimizer: str = 'adam', lr: float = 1e-4, batch_size: int = 100, subset_perc: float = 1, subset_dates: bool = False, saved_model_dir: str = None, seq_len: int = 6, seed: bool = True, scale_label: bool = True, use_csky: bool = False, cache: bool = True, timesteps_minutes: int = 15): # Warning if no GPU detected if len(tf.config.list_physical_devices('GPU')) == 0: logger.warning('No GPU detected, training will run on CPU.') elif len(tf.config.list_physical_devices('GPU')) > 1: logger.warning( 'Multiple GPUs detected, training will run on only one GPU.') if subset_dates and subset_perc != 1: raise Exception( f'Invalid configuration. Argument --subset_dates=True and --subset_perc={subset_perc}.' ) # Set random seed if seed: tf.random.set_seed(SEED) np.random.seed(SEED) # Load dataframe logger.info('Loading and preprocessing dataframe...') df = pd.read_pickle(df_path) df = preprocessing.preprocess(df, shuffle=False, scale_label=scale_label) metadata = data.Metadata(df, scale_label) # Pre-crop data logger.info('Getting crops...') images = data.Images(metadata, image_size) # images.crop(dest=SLURM_TMPDIR) images.crop(dest=images.shared_storage) # Split into train and valid if subset_dates: metadata_train, metadata_valid = metadata.split_with_dates() else: metadata, _ = metadata.split(1 - subset_perc) metadata_train, metadata_valid = metadata.split(VALID_PERC) nb_train_examples = metadata_train.get_number_of_examples() nb_valid_examples = metadata_valid.get_number_of_examples() logger.info( f'Number of training examples : {nb_train_examples}, number of validation examples : \ {nb_valid_examples}') # Create model if model == 'dummy': model = baselines.DummyModel() elif model == 'sunset': model = baselines.SunsetModel() elif model == 'cnndem': model = baselines.ConvDemModel(image_size) elif model == 'sunset3d': model = baselines.Sunset3DModel() elif model == 'convlstm': model = baselines.ConvLSTM() elif model == 'cnngru': model = CnnGru(seq_len) elif model == 'cnngruatt': model = CnnGruAtt(seq_len) elif model == 'cnnlstm': model = LSTM_Resnet(seq_len) elif model == 'resnet': model = baselines.ResNetModel() else: raise Exception(f'Model "{model}" not recognized.') # Load model weights if saved_model_dir is not None: model.load_weights(os.path.join(saved_model_dir, "model")) # Loss and optimizer mse = tf.keras.losses.MeanSquaredError() if optimizer == 'adam': optimizer = tf.keras.optimizers.Adam(lr) elif optimizer == 'sgd': optimizer = tf.keras.optimizers.SGD(lr) else: raise Exception(f'Optimizer "{optimizer}" not recognized.') # Create data loader dataloader_train = SequenceDataset( metadata_train, images, seq_len, batch_size, timesteps=datetime.timedelta(minutes=timesteps_minutes), cache=cache) dataloader_valid = SequenceDataset( metadata_valid, images, seq_len, batch_size, timesteps=datetime.timedelta(minutes=timesteps_minutes), cache=cache) # Training loop logger.info('Training...') losses = {'train': [], 'valid': []} best_valid_loss = float('inf') for epoch in range(epochs): train_epoch(model, dataloader_train, batch_size, mse, optimizer, nb_train_examples, scale_label, use_csky) test_epoch(model, dataloader_valid, batch_size, mse, nb_valid_examples, scale_label, use_csky) train_loss = np.sqrt(train_mse_metric.result().numpy()) valid_loss = np.sqrt(valid_mse_metric.result().numpy()) csky_valid_loss = np.sqrt(valid_csky_mse_metric.result().numpy()) if valid_loss < best_valid_loss: best_valid_loss = valid_loss utils.save_model(model) # Logs logger.info( f'Epoch {epoch} - Train Loss : {train_loss:.4f}, Valid Loss : {valid_loss:.4f}, Csky Valid Loss : \ {csky_valid_loss:.4f}') losses['train'].append(train_loss) losses['valid'].append(valid_loss) with train_summary_writer.as_default(): tf.summary.scalar('loss', train_loss, step=epoch) with test_summary_writer.as_default(): tf.summary.scalar('loss', valid_loss, step=epoch) # Plot losses plots.plot_loss(losses['train'], losses['valid'], csky_valid_loss)
TEXT = BOWField(device, config['cut_most_freq'], term_freqs=tfs) LABEL = torchdata.Field(use_vocab=False, sequential=False, preprocessing=lambda x: int(x), is_target=True) train_dataset, test_dataset = torchdata.TabularDataset.splits(path=config['dataset_path'], train=config['dataset_train'], test=config['dataset_test'], format='tsv', fields=[('label', LABEL), ('text', TEXT)]) train_iterator = torchdata.BucketIterator(train_dataset, batch_size=config['batch_size'], device=device) test_iterator = torchdata.BucketIterator(test_dataset, batch_size=config['batch_size'], device=device) TEXT.build_vocab(train_dataset) LABEL.build_vocab(train_dataset) num_classes, weights = get_weights([e.label for e in train_dataset.examples]) feature_size = TEXT.get_features_count() BOWModel = BOWModel(input_size=feature_size, num_classes=num_classes, dropout=config['dropout']).to(device) if config['load_checkpoint']: BOWModel.load_state_dict(torch.load(config['checkpoint'], map_location=device)) print(f'Model has {utils.count_parameters(BOWModel)} trainable parameters') criterion = nn.CrossEntropyLoss(weight=torch.as_tensor(weights, device=device).float()) optimiser = torch.optim.Adam(BOWModel.parameters(), lr=config['learning_rate'], weight_decay=config['weight_decay']) train(BOWModel, criterion, optimiser, train_iterator) test(BOWModel, test_iterator) if config['save_model']: utils.save_model('modelBOW.ckpt', BOWModel)
def train_src_threemodal(model1, model2, model3, train_loader1, train_loader2, train_loader3, val_loader): global lr global best_prec1 lr = params.base_lr # model1 = construct_resnet18(model1, params) # model2 = construct_resnet18(model2, params) # model3 = construct_resnet18(model3, params) model1 = construct_resnet34(model1, params) model2 = construct_resnet34(model2, params) model3 = construct_resnet34(model3, params) model1.train() model2.train() model3.train() optimizer1 = torch.optim.Adam(list(model1.parameters()), lr=params.base_lr, betas=(0.9, 0.99)) optimizer2 = torch.optim.Adam(list(model2.parameters()), lr=params.base_lr, betas=(0.9, 0.99)) optimizer3 = torch.optim.Adam(list(model3.parameters()), lr=params.base_lr, betas=(0.9, 0.99)) # criterion = nn.CrossEntropyLoss().cuda() focalloss = FocalLoss(gamma=2) for epoch in range(params.start_epoch, params.start_epoch + params.num_epochs): adjust_learning_rate(optimizer1, epoch, params.base_lr) adjust_learning_rate(optimizer2, epoch, params.base_lr) adjust_learning_rate(optimizer3, epoch, params.base_lr) # train for one epoch # train_batch(train_loader, model, criterion, optimizer, epoch) for step, (images, labels) in enumerate(train_loader1): # make images and labels variable images = make_variable(images) labels = make_variable(labels.squeeze_()) # zero gradients for optimizer optimizer1.zero_grad() # compute loss for critic preds = model1(images) loss = focalloss(preds, labels) # optimize source classifier loss.backward() optimizer1.step() # print step info if ((step + 1) % params.log_step_pre == 0): print("Color Epoch [{}/{}] Step [{}/{}]: loss={}".format( epoch + 1, params.num_epochs, step + 1, len(train_loader1), loss.item())) for step, (images, labels) in enumerate(train_loader2): # make images and labels variable images = make_variable(images) labels = make_variable(labels.squeeze_()) # zero gradients for optimizer optimizer2.zero_grad() # compute loss for critic preds = model2(images) loss = focalloss(preds, labels) # optimize source classifier loss.backward() optimizer2.step() # print step info if ((step + 1) % params.log_step_pre == 0): print("Depth Epoch [{}/{}] Step [{}/{}]: loss={}".format( epoch + 1, params.num_epochs, step + 1, len(train_loader2), loss.item())) for step, (images, labels) in enumerate(train_loader3): # make images and labels variable images = make_variable(images) labels = make_variable(labels.squeeze_()) # zero gradients for optimizer optimizer3.zero_grad() # compute loss for critic preds = model3(images) loss = focalloss(preds, labels) # optimize source classifier loss.backward() optimizer3.step() # print step info if ((step + 1) % params.log_step_pre == 0): print("Ir Epoch [{}/{}] Step [{}/{}]: loss={}".format( epoch + 1, params.num_epochs, step + 1, len(train_loader3), loss.item())) if ((epoch + 1) % params.eval_step_pre == 0): eval_acc(model1, model2, model3, val_loader) # save model parameters if ((epoch + 1) % params.save_step_pre == 0): save_model(model1, "MultiNet-color-{}.pt".format(epoch + 1)) save_model(model2, "MultiNet-depth-{}.pt".format(epoch + 1)) save_model(model3, "MultiNet-ir-{}.pt".format(epoch + 1)) # # save final model save_model(model1, "MultiNet-color-final.pt") save_model(model2, "MultiNet-depth-final.pt") save_model(model3, "MultiNet-ir-final.pt") return model1, model2, model3
df.pop("id") df.pop("_id") # df.pop("displayed_signature_count") # df.pop("displayed_supporter_count") df.pop("is_verified_victory") df.pop("description") df["status"] = df["status"].apply(lambda x: 1 if x == "victory" else 0) print df.shape y = df.pop("status") X = df X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) petitions_model.fit_weighted_rf(X_train, y_train) save_model(petitions_model, "rf_new_petitions_model") y_pred_train = petitions_model.predict(X) y_pred = petitions_model.predict(X_test) print "--------------------------TRAIN-----------------------------------" print "victories:" , sum(y) print "total:" , len(y) print "null accuracy:" , 1-(sum(y) / len(y)) print "Accuracy:", accuracy_score(y, y_pred_train) print "Precision:", precision_score(y, y_pred_train) print "Recall:", recall_score(y, y_pred_train) print "--------------------------TEST-----------------------------------"
def train_feature_fusion(model, train_loader, val_loader): global lr global best_prec1 lr = params.base_lr # model = construct_premodel(model, params) # model = construct_resnet18(model, params) model = torch.nn.DataParallel(model) model.train() # optimizer = torch.optim.Adam( # list(model.parameters()), # lr=params.base_lr, # betas=(0.9, 0.99)) optimizer = torch.optim.SGD(list(model.parameters()), lr=params.base_lr, momentum=0.9, weight_decay=0.0005) criterion = nn.CrossEntropyLoss().cuda() centerloss = CenterLoss(num_classes=2, feat_dim=2, use_gpu=True) optimzer4center = torch.optim.SGD(centerloss.parameters(), lr=0.5) loss_weight = 0.1 # focalloss = FocalLoss(gamma=2) for epoch in range(params.start_epoch, params.start_epoch + params.num_epochs): adjust_learning_rate(optimizer, epoch, params.base_lr) # train for one epoch # train_batch(train_loader, model, criterion, optimizer, epoch) for step, (image1, image2, image3, label) in enumerate(train_loader): img1_name = image1[0][0] img1 = image1[1] img2_name = image2[0][0] img2 = image2[1] img3_name = image3[0][0] img3 = image3[1] image1 = make_variable(img1) image2 = make_variable(img2) image3 = make_variable(img3) label = make_variable(label.squeeze_()) # img1_array = np.array(image1) # img2_array = np.array(image2) # img3_array = np.array(image3) # print(img1_name, img2_name, img3_name, img1.shape, img2.shape, img3.shape) # feature_concat = torch.cat((image1, image2, image3), 1) feature_concat = torch.cat((image1, image2, image3), 1) feat, preds = model(feature_concat) loss = criterion(preds, label) + loss_weight * centerloss(feat, label) # loss = criterion(preds, label) # loss = focalloss(preds, label) optimizer.zero_grad() optimzer4center.zero_grad() # optimize source classifier loss.backward() optimizer.step() optimzer4center.step() # print step info if ((step + 1) % params.log_step_pre == 0): print("fusion Epoch [{}/{}] Step [{}/{}]: loss={}".format( epoch + 1, params.num_epochs, step + 1, len(train_loader), loss.item())) if ((epoch + 1) % params.eval_step_pre == 0): eval_src(model, val_loader) # save model parameters if ((epoch + 1) % params.save_step_pre == 0): save_model(model, "MultiNet-fusion-{}.pt".format(epoch + 1)) # # save final model save_model(model, "MultiNet-fusion-final.pt") return model
def generate_model( rf, model_name): collection = "featured_petitions" query = {"$and": [{"status": 1}]} target = "is_verified_victory" # to_pop = "status" extract_features = ["goal_days_ratio", "milestones", "num_words_letter_body", "comments_likes", "progress", "news_coverages", "created_at_quarter", "display_title_len", "num_bold_words_description", "num_targets", "count_group_targets", "num_targets", "num_capitalized_words_description", "num_capitalized_words_display_title", "count_custom_targets", "count_democrat_targets", "count_republican_targets", "is_organization", "is_verified_victory", "num_responses", "same_state"] df = read_mongo("changeorg", collection, query) df = df[df["days_range_end_date"] > 0] df = df[extract_features] df.fillna(0, inplace=True) # df.pop("display_title") # df.pop("letter_body") # df.pop("description") # df.pop("id") # df.pop("_id") # df.pop(to_pop) y = df.pop(target) X = df X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) ada_parameters = { 'n_estimators': 300 } rf_parameters = { 'n_estimators': 300, # 'max_features': 80, 'max_depth': None, 'min_samples_leaf': 20, 'random_state': 29, 'class_weight': None } if rf: clf = WeightedRFClassifier() clf.set_params(**rf_parameters) else: clf = WeightedAdaClassifier() clf.set_params(**ada_parameters) model_pipeline = ModelPipeline(clf) model_pipeline.fit(X_train, y_train) save_model(model_pipeline, model_name) y_pred_train = model_pipeline.predict(X_train) y_pred = model_pipeline.predict(X_test) print "-------------------------------------------------------------------" print model_name print "--------------------------TRAIN-----------------------------------" print "victories:", sum(y_train) print "total:", len(y_train) print "null accuracy:", 1 - (sum(y_train) / len(y_train)) print "Accuracy:", accuracy_score(y_train, y_pred_train) print "Precision:", precision_score(y_train, y_pred_train) print "Recall:", recall_score(y_train, y_pred_train) print "--------------------------TEST-----------------------------------" print "victories:", sum(y_test) print "total:", len(y_test) print "null accuracy:", 1 - (sum(y_test) / len(y_test)) print "Accuracy:", accuracy_score(y_test, y_pred) print "Precision:", precision_score(y_test, y_pred) print "Recall:", recall_score(y_test, y_pred) print "confusion matrix" print confusion_matrix(y_test, y_pred, [1, 0]) # Print the feature ranking print("------------------Feature ranking--------------------------------------") print model_pipeline.feat_importances(100) y_score = model_pipeline.pipeline.predict_proba(X_test)[:,1] false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_score) roc_auc = auc(false_positive_rate, true_positive_rate)
def train_src(model, src_data_loader, tgt_data_loader_eval, device, params): """Train classifier for source domain.""" #################### # 1. setup network # #################### # setup criterion and optimizer parameter_list = [ { "params": get_parameters(model.features, 'weight'), "lr": 0.001 }, { "params": get_parameters(model.features, 'bias'), "lr": 0.002 }, { "params": get_parameters(model.fc, 'weight'), "lr": 0.01 }, { "params": get_parameters(model.fc, 'bias'), "lr": 0.02 }, ] optimizer = optim.SGD(parameter_list, momentum=0.9) criterion = nn.CrossEntropyLoss() #################### # 2. train network # #################### global_step = 0 for epoch in range(params.num_epochs): for step, (images, labels) in enumerate(src_data_loader): model.train() global_step += 1 adjust_learning_rate(optimizer, global_step) # make images and labels variable images = images.to(device) labels = labels.to(device) # zero gradients for optimizer optimizer.zero_grad() # compute loss for critic preds = model(images) loss = criterion(preds, labels) # optimize source classifier loss.backward() optimizer.step() # print step info if (global_step % params.log_step == 0): print("Epoch [{:4d}] Step [{:4d}]: loss={:.5f}".format( epoch + 1, global_step, loss.data.item())) # eval model on test set if (global_step % params.eval_step == 0): eval(model, src_data_loader, device) eval(model, tgt_data_loader_eval, device) # save model parameters if (global_step % params.save_step == 0): save_model( model, params.src_dataset + "-source-classifier-{}.pt".format(global_step), params) # end if (global_step > params.max_step): break # save final model save_model(model, params.src_dataset + "-source-classifier-final.pt", params) return model