# opt.use_cuda = torch.cuda.is_available() opt.use_cuda = True # select model # opt.model = 'lstm' #opt.model = 'cnn' opt.model = 'lstm' opt.env = opt.model # visdom vis = Visualizer(opt.env) # vis log output vis.log('user config:') for k, v in opt.__dict__.items(): if not k.startswith('__'): vis.log('{} {}'.format(k, getattr(opt, k))) # load data # use torchtext to load train_iter, test_iter = load_data(opt) model = models.init(opt) print(type(model)) # cuda if opt.use_cuda: model.cuda()
def train(opt): vis = Visualizer(env='Pos_generator') opt.vocab_size = get_nwords(opt.data_path) opt.category_size = get_nclasses(opt.data_path) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') train_dataset, valid_dataset, test_dataset = load_dataset_pos(opt) train_loader = DataLoader(train_dataset, opt.batch_size, shuffle=True, collate_fn=collate_fn_pos) model = Pos_generator(opt) infos = {} best_score = None crit = LanguageModelCriterion() classify_crit = ClassifierCriterion() if opt.start_from is not None: assert os.path.isdir(opt.start_from), 'opt.start_from must be a dir' state_dict_path = os.path.join(opt.start_from, opt.model_name + '-bestmodel.pth') assert os.path.isfile(state_dict_path), 'bestmodel doesn\'t exist!' model.load_state_dict(torch.load(state_dict_path), strict=True) infos_path = os.path.join(opt.start_from, opt.model_name + '_infos_best.pkl') assert os.path.isfile(infos_path), 'infos of bestmodel doesn\'t exist!' with open(infos_path, 'rb') as f: infos = pickle.load(f) if opt.seed == 0: opt.seed = infos['opt'].seed best_score = infos.get('best_score', None) torch.manual_seed(opt.seed) torch.cuda.manual_seed(opt.seed) model.to(device) model.train() optimizer = optim.Adam(model.parameters(), lr=opt.learning_rate, weight_decay=opt.weight_decay) train_patience = 0 epoch = infos.get('epoch', 0) loss_meter = meter.AverageValueMeter() while True: if train_patience > opt.patience: break loss_meter.reset() if opt.learning_rate_decay_start != -1 and epoch > opt.learning_rate_decay_start: frac = int((epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every) decay_factor = opt.learning_rate_decay_rate**frac opt.current_lr = opt.learning_rate * decay_factor set_lr(optimizer, opt.current_lr) else: opt.current_lr = opt.learning_rate if opt.scheduled_sampling_start != -1 and epoch > opt.scheduled_sampling_start: frac = int((epoch - opt.scheduled_sampling_start) / opt.scheduled_sampling_increase_every) opt.scheduled_sample_probability = min( opt.scheduled_sampling_increase_probability * frac, opt.scheduled_sampling_max) model.scheduled_sample_probability = opt.scheduled_sample_probability # if torch.cuda.is_available(): # torch.cuda.synchronize() for i, (data, caps, caps_mask, cap_classes, class_masks, feats0, feats1, feat_mask, lens, gts, video_id) in enumerate(train_loader): caps = caps.to(device) caps_mask = caps_mask.to(device) cap_classes = cap_classes.to(device) feats0 = feats0.to(device) feats1 = feats1.to(device) feat_mask = feat_mask.to(device) class_masks = class_masks.to(device) cap_classes = torch.cat([cap_classes[:, -1:], cap_classes[:, :-1]], dim=1) new_mask = torch.zeros_like(class_masks) cap_classes = cap_classes.to(device) new_mask = new_mask.to(device) for j in range(class_masks.size(0)): index = np.argwhere( class_masks.cpu().numpy()[j, :] != 0)[0][-1] new_mask[j, :index + 1] = 1.0 optimizer.zero_grad() out = model(feats0, feats1, feat_mask, caps, caps_mask, cap_classes, new_mask) loss = classify_crit(out, cap_classes, caps_mask, class_masks) loss.backward() clip_gradient(optimizer, opt.grad_clip) optimizer.step() train_loss = loss.detach() loss_meter.add(train_loss.item()) if (i + 1) % opt.visualize_every == 0: vis.plot('loss', loss_meter.value()[0]) information = 'best_score is ' + (str(best_score) if best_score is not None else '0.0') information += ' current_score is ' + str(train_loss.item()) vis.log(information) if (i + 1) % opt.save_checkpoint_every == 0: # print('i am saving!!') eval_kwargs = {} eval_kwargs.update(vars(opt)) val_loss = eval_extract.eval_and_extract( model, classify_crit, valid_dataset, device, 'validation', eval_kwargs, False) # 此处输出的是交叉熵loss, 是一个正数, loss越小说明准确性越高, 为了转化为score我们需要加一个负号, score越大准确性越高 current_score = val_loss.cpu().item() is_best = False if best_score is None or best_score > current_score: best_score = current_score is_best = True train_patience = 0 else: train_patience += 1 path_of_save_model = os.path.join(opt.checkpoint_path, str(i) + '_model.pth') torch.save(model.state_dict(), path_of_save_model) infos['iteration'] = i infos['epoch'] = epoch infos['best_val_score'] = best_score infos['opt'] = opt with open( os.path.join(opt.checkpoint_path, 'infos_' + opt.model_name + '.pkl'), 'wb') as f: pickle.dump(infos, f) if is_best: path_of_save_model = os.path.join( opt.checkpoint_path, opt.model_name + '-bestmodel.pth') torch.save(model.state_dict(), path_of_save_model) with open( os.path.join( opt.checkpoint_path, 'infos_' + opt.model_name + '-best.pkl'), 'wb') as f: pickle.dump(infos, f) epoch += 1
def train(opt): vis = Visualizer(opt.visname) # model = make_model('inceptionresnetv2',num_classes=5,pretrained=True,input_size=(512,512),dropout_p=0.6) model = zin() model = nn.DataParallel(model, device_ids=[0, 1, 2, 3]) model = model.cuda() loss_fun = t.nn.CrossEntropyLoss() # loss_fun = My_loss() lr = opt.lr # optim = t.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-5) optim = t.optim.SGD(model.parameters(), lr=0.00001, weight_decay=5e-5, momentum=0.9) loss_meter = meter.AverageValueMeter() con_matx = meter.ConfusionMeter(5) # con_matx_temp = meter.ConfusionMeter(5) pre_loss = 1e10 pre_acc = 0 for epoch in range(100): loss_meter.reset() con_matx.reset() train_data = Eye_img(train=True) val_data = Eye_img(train=False) train_loader = DataLoader(train_data, opt.batchsize, True) val_loader = DataLoader(val_data, opt.batchsize, False) for ii, (imgs, data, label) in enumerate(train_loader): print('epoch:{}/{}'.format(epoch, ii)) # con_matx_temp.reset() # print(data.size()) imgs = imgs.cuda() data = data.cuda() label = label.cuda() optim.zero_grad() pre_label = model(imgs, data) # con_matx_temp.add(pre_label.detach(), label.detach()) # temp_value = con_matx_temp.value() # temp_kappa = kappa(temp_value,5) loss = loss_fun(pre_label, label) loss.backward() optim.step() loss_meter.add(loss.item()) con_matx.add(pre_label.detach(), label.detach()) if (ii + 1) % opt.printloss == 0: vis.plot('loss', loss_meter.value()[0]) val_cm, acc, kappa_ = val(model, val_loader) if pre_acc < acc: pre_acc = acc t.save(model, 'zoom3_{}.pth'.format(epoch)) vis.plot('acc', acc) vis.plot('kappa', kappa_) vis.log( "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}" .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(con_matx.value()), lr=lr))
# process opt.data_is_preprocessed = False # random seed opt.seed = 666 opt.dataset = 'kaggle' # select model opt.model = 'lstm_mixed' opt.env = opt.model + '_clf' # visdom vis = Visualizer(opt.env) # vis log output vis.log('user config:') for k, v in opt.__dict__.items(): if not k.startswith('__'): vis.log('{} {}'.format(k, getattr(opt, k))) # load data # use torchtext to load train_iter, val_iter, test_iter = load_data(opt) model = models.init(opt) print(type(model)) # cuda if opt.use_cuda: model.cuda()
def train(opt): vis = Visualizer(env='Caption_generator') opt.vocab_size = get_nwords(opt.data_path) opt.category_size = get_nclasses(opt.data_path) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') train_dataset, valid_dataset, test_dataset = load_dataset_cap(opt=opt) train_loader = DataLoader(dataset=train_dataset, batch_size=opt.batch_size, shuffle=True, collate_fn=collate_fn_cap) model = Caption_generator(opt=opt) embeddings_path = os.path.join(opt.data_path, 'sentence_embeddings.pkl') total_embeddings = load_pkl(embeddings_path) infos = {} best_score = None crit = LanguageModelCriterion() classify_crit = ClassifierCriterion() rl_crit = RewardCriterion() model_version = 2 MODEL_PATH = opt.infersent_model_path assert MODEL_PATH is not None, '--infersent_model_path is None!' MODEL_PATH = os.path.join(MODEL_PATH, 'infersent%s.pkl' % model_version) params_model = { 'bsize': 64, 'word_emb_dim': 300, 'enc_lstm_dim': 2048, 'pool_type': 'max', 'dpout_model': 0.0, 'version': model_version } infersent_model = InferSent(params_model) infersent_model.load_state_dict(torch.load(MODEL_PATH)) infersent_model = infersent_model.to(device) W2V_PATH = opt.w2v_path assert W2V_PATH is not None, '--w2v_path is None!' infersent_model.set_w2v_path(W2V_PATH) # sentences_path = os.path.join(opt.data_path, 'sentences.pkl') # sentences = load_pkl(sentences_path) # infersent_model.build_vocab(sentences, tokenize=True) infersent_model.build_vocab_k_words(K=100000) id_word = get_itow(opt.data_path) if opt.start_from is not None: assert os.path.isdir(opt.start_from), 'opt.start_from must be a dir!' state_dict_path = os.path.join(opt.start_from, opt.model_name + '-bestmodel.pth') assert os.path.isfile(state_dict_path), 'bestmodel don\'t exist!' model.load_state_dict(torch.load(state_dict_path), strict=True) infos_path = os.path.join(opt.start_from, opt.model_name + '_infos_best.pkl') assert os.path.isfile(infos_path), 'infos of bestmodel don\'t exist!' with open(infos_path, 'rb') as f: infos = pickle.load(f) if opt.seed == 0: opt.seed = infos['opt'].seed best_score = infos.get('best_score', None) torch.manual_seed(opt.seed) torch.cuda.manual_seed(opt.seed) model.to(device) model.train() optimizer = optim.Adam(model.parameters(), lr=opt.learning_rate, weight_decay=opt.weight_decay) train_patience = 0 epoch = infos.get('epoch', 0) loss_meter = meter.AverageValueMeter() while True: if train_patience > opt.patience: break loss_meter.reset() if opt.learning_rate_decay_start != -1 and epoch > opt.learning_rate_decay_start: frac = int((epoch - opt.learning_rate_decay_start) / opt.learning_rate_decay_every) decay_factor = opt.learning_rate_decay_rate**frac opt.current_lr = opt.learning_rate * decay_factor set_lr(optimizer, opt.current_lr) else: opt.current_lr = opt.learning_rate if opt.scheduled_sampling_start != -1 and epoch > opt.scheduled_sampling_start: frac = int((epoch - opt.scheduled_sampling_start) / opt.scheduled_sampling_increase_every) opt.sample_probability = min( opt.scheduled_sampling_increase_probability * frac, opt.scheduled_sampling_max_probability) model.sample_probability = opt.sample_probability if opt.self_critical_after != -1 and epoch >= opt.self_critical_after: sc_flag = True opt.save_checkpoint_every = 250 else: sc_flag = False # sc_flag = True for i, (data, caps, caps_mask, cap_classes, class_masks, feats0, feats1, feat_mask, pos_feat, lens, gts, video_id) in enumerate(train_loader): feats0 = feats0.to(device) feats1 = feats1.to(device) feat_mask = feat_mask.to(device) pos_feat = pos_feat.to(device) caps = caps.to(device) caps_mask = caps_mask.to(device) cap_classes = cap_classes.to(device) class_masks = class_masks.to(device) optimizer.zero_grad() if not sc_flag: words, categories = model(feats0, feats1, feat_mask, pos_feat, caps, caps_mask) loss_words = crit(words, caps, caps_mask) loss_cate = classify_crit(categories, cap_classes, caps_mask, class_masks) loss = loss_words + opt.weight_class * loss_cate elif not opt.eval_semantics: sample_dict = {} sample_dict.update(vars(opt)) sample_dict.update({'sample_max': 0}) probability_sample, sample_logprobs = model.sample( feats0, feats1, feat_mask, pos_feat, sample_dict) reward = get_self_critical_reward(model, feats0, feats1, feat_mask, pos_feat, gts, probability_sample) reward = torch.from_numpy(reward).float() reward = reward.to(device) loss = rl_crit(sample_logprobs, probability_sample, reward) else: sample_dict = vars(opt) sample_dict.update(vars(opt)) sample_dict.update({'sample_max': 0}) probability_sample, sample_logprobs = model.sample( feats0, feats1, feat_mask, pos_feat, sample_dict) reward = get_self_critical_semantics_reward( id_word, infersent_model, model, feats0, feats1, feat_mask, pos_feat, gts, video_id, total_embeddings, probability_sample, sample_dict) reward = torch.from_numpy(reward).float() reward = reward.to(device) loss = rl_crit(sample_logprobs, probability_sample, reward) loss.backward() clip_gradient(optimizer, opt.grad_clip) optimizer.step() train_loss = loss.detach() loss_meter.add(train_loss.item()) if i % opt.visualize_every == 0: vis.plot('train_loss', loss_meter.value()[0]) information = 'best_score is ' + (str(best_score) if best_score is not None else '0.0') information += (' reward is ' if sc_flag else ' loss is ') + str(train_loss.item()) if sc_flag is False: information += ' category_loss is ' + str( loss_cate.cpu().item()) vis.log(information) is_best = False if (i + 1) % opt.save_checkpoint_every == 0: if not opt.eval_semantics: current_loss, current_language_state = eval( infersent_model, model, crit, classify_crit, valid_dataset, vars(opt)) else: current_semantics_score, current_language_state = eval( infersent_model, model, crit, classify_crit, valid_dataset, vars(opt)) current_score = current_language_state[ 'CIDEr'] if not opt.eval_semantics else current_semantics_score vis.log('{}'.format( 'cider score is ' if not opt.eval_semantics else '###################semantics_score is ') + str(current_score) + ' ' + '+' + str(i)) if best_score is None or current_score > best_score: is_best = True best_score = current_score train_patience = 0 else: train_patience += 1 infos['opt'] = opt infos['iteration'] = i infos['best_score'] = best_score infos['language_state'] = current_language_state infos['epoch'] = epoch save_state_path = os.path.join( opt.checkpoint_path, opt.model_name + '_' + str(i) + '.pth') torch.save(model.state_dict(), save_state_path) save_infos_path = os.path.join( opt.checkpoint_path, opt.model_name + '_' + 'infos_' + str(i) + '.pkl') with open(save_infos_path, 'wb') as f: pickle.dump(infos, f) if is_best: save_state_path = os.path.join( opt.checkpoint_path, opt.model_name + '-bestmodel.pth') save_infos_path = os.path.join( opt.checkpoint_path, opt.model_name + '_' + 'infos_best.pkl') torch.save(model.state_dict(), save_state_path) with open(save_infos_path, 'wb') as f: pickle.dump(infos, f) epoch += 1
test_loader = torch.utils.data.DataLoader(FeatureDataset( data_FN=data_FN, type='test', label_column=label_column), batch_size=batch_size, shuffle=False) optimizer = optim.Adagrad(model.parameters(), lr=lr) # acc, for report use best_loss = 1000 plot_batch = 100 print('begin...') # log config setting in this model vis.log('env : {}'.format(vis_env)) vis.log('batch_size : {}'.format(batch_size)) vis.log('learning_rate : {}'.format(lr)) vis.log('task_type : {}'.format(task_type)) for epoch in range(epochs): print(epoch) model.train() for batch_index, (data, target) in enumerate(train_loader): # for debug use # if batch_index > 10: # break index_target = target.clone() data, target = Variable(data), Variable(target)
# select model # opt.model = 'lstm' # opt.model = 'cnn' opt.model = 'bilstm' opt.env = opt.model + '_clf' if opt.one_hot is True: opt.env += '_one_hot' # visdom vis = Visualizer(opt.env) # vis log output vis.log('user config:') for k, v in opt.__dict__.items(): if not k.startswith('__'): vis.log('{} {}'.format(k, getattr(opt, k))) # load data # use torchtext to load train_iter, val_iter, test_iter = load_data(opt) model = models.init(opt) print(type(model)) # debug for iterator if opt.debug_iterator is True: