def main(): os.chdir('/home/seonhoon/Desktop/workspace/ImageQA/data/') n_vocab = 12047 y_vocab = 430 dim_word = 1024 dim = 1024 maxlen = 60 train=pd.read_pickle('train_vgg.pkl') train_x=[ q for q in train['q'] ] train_y=[ a[0] for a in train['a'] ] train_y=np.array(train_y)[:,None] train_y = np_utils.to_categorical(train_y, y_vocab).astype('int32') train_x , train_x_mask = prepare_data(train_x, maxlen) train_x_img = np.array([ img.tolist() for img in train['cnn_feature'] ]).astype('float32') print 'x :', train_x.shape print 'x_mask:', train_x_mask.shape print 'x_img:', train_x_img.shape print 'y : ', train_y.shape model = RNN(n_vocab, y_vocab, dim_word, dim) model.train(train_x, train_x_mask, train_x_img, train_y, batch_size=512, epoch=50, save=15)
def main(args): this_dir = osp.join(osp.dirname(__file__), '.') os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') data_loader = data.DataLoader( DataLayer(args.data_root, args.test_session_set), batch_size=args.batch_size, num_workers=args.num_workers, ) c3d_model = C3D().to(device) c3d_model.load_state_dict(torch.load(args.c3d_pth)) c3d_model.train(False) rnn_model = RNN().to(device) rnn_model.load_state_dict(torch.load(args.rnn_pth)) rnn_model.train(False) air_criterion = nn.L1Loss().to(device) bed_criterion = nn.L1Loss().to(device) air_errors = 0.0 bed_errors = 0.0 start = time.time() with torch.set_grad_enabled(False): for batch_idx, (c3d_data, rnn_data, air_target, bed_target, data_path) \ in enumerate(data_loader): print('Processing {}/{}, {:3.3f}%'.format( data_path[0], str(batch_idx).zfill(5) + '.mat', 100.0 * batch_idx / len(data_loader))) c3d_data = c3d_data.to(device) rnn_data = rnn_data.to(device) air_target = air_target.to(device) bed_target = bed_target.to(device) air_feature, bed_feature = c3d_model.features(c3d_data) init = torch.cat((air_feature, bed_feature), 1) air_output, bed_output = rnn_model(rnn_data, init) # NOTE: Save these air and bed layers for visualization air_layer = (air_output.to('cpu').numpy() + 1) * 412 bed_layer = (bed_output.to('cpu').numpy() + 1) * 412 air_loss = air_criterion(air_output, air_target) bed_loss = bed_criterion(bed_output, bed_target) air_errors += air_loss.item() bed_errors += bed_loss.item() end = time.time() print('Finish all, errors (air): {:4.2f} (bed): {:4.2f}, | ' 'total running time: {:.2f} sec'.format( air_errors / len(data_loader.dataset) * 412, bed_errors / len(data_loader.dataset) * 412, end - start, ))
def main(args): this_dir = osp.join(osp.dirname(__file__), '.') os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') data_loader = data.DataLoader( DataLayer( data_root=osp.join(args.data_root, 'Test'), phase='Test', ), batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, ) if osp.isfile(args.checkpoint): checkpoint = torch.load(args.checkpoint) else: raise (RuntimeError('Cannot find the checkpoint {}'.format( args.checkpoint))) model = Model().to(device) model.load_state_dict(checkpoint) model.train(False) softmax = nn.Softmax(dim=1).to(device) corrects = 0.0 with torch.set_grad_enabled(False): for batch_idx, (spatial, temporal, length, target) in enumerate(data_loader): spatial_input = torch.zeros(*spatial.shape) temporal_input = torch.zeros(*temporal.shape) target_input = [] length_input = [] index = utl.argsort(length)[::-1] for i, idx in enumerate(index): spatial_input[i] = spatial[idx] temporal_input[i] = temporal[idx] target_input.append(target[idx]) length_input.append(length[idx]) spatial_input = spatial_input.to(device) temporal_input = temporal_input.to(device) target_input = torch.LongTensor(target_input).to(device) pack1 = pack_padded_sequence(spatial_input, length_input, batch_first=True) pack2 = pack_padded_sequence(temporal_input, length_input, batch_first=True) score = model(pack1, pack2) pred = torch.max(softmax(score), 1)[1].cpu() corrects += torch.sum(pred == target_input.cpu()).item() print('The accuracy is {:.4f}'.format(corrects / len(data_loader.dataset)))
def trainWithRNNGRU(rnn_token_train_features, rnn_token_train_labels, rnn_token_test_features, rnn_token_test_labels, results, algorithms, vocab_size, layers, isFinal, doTraining, existentModel): print("T W GRU vocab", vocab_size) rnn = RNN(vocab_size=vocab_size, create_model_callback=create_model_RNN_GRU, title="GRU-TRAIN-D-{}".format(layers), layers=layers, isFinal=isFinal, existentModel=existentModel) print( np.array(rnn_token_train_features).shape, np.array(rnn_token_train_labels).shape) if (doTraining): rnn.train(X=np.array(rnn_token_train_features, dtype=float), y=np.array(rnn_token_train_labels, dtype=float), X_test=np.array(rnn_token_test_features, dtype=float), y_test=np.array(rnn_token_test_labels, dtype=float)) else: print(rnn.title, "-NOT TRAINING") model_name = 'GRU-D-{}.model'.format(layers) print("SAVING MODEL & WEIGHTS USING H5: ", model_name) try: print(rnn.model.best_estimator_.model) rnn.model.best_estimator_.model.save_weights(model_name + "_weights.h5") rnn.model.best_estimator_.model.save(model_name + ".h5") except Exception as e: print("Cannot save {} because: {}\n\n".format(model_name, str(e))) algorithms["RNN_GRU"] = rnn rnn.drawCurves(X=np.array(rnn_token_train_features, dtype=float), y=np.array(rnn_token_train_labels, dtype=float)) rnn.title = "GRU-TEST-D-{}".format(layers) results["RNN_GRU"] = rnn.drawCurves(X=np.array(rnn_token_test_features, dtype=float), y=np.array(rnn_token_test_labels, dtype=float)) return rnn, results, algorithms
# Transfer to Pytorch Variable X_train_dep_std = Variable(torch.from_numpy(X_train_dep_std).float()) y_train_dep_std = Variable(torch.from_numpy(y_train_dep_std).float()) X_test_dep_std = Variable(torch.from_numpy(X_test_dep_std).float()) # Define rnn model model = RNN(input_size=5, hidden_size=40, num_layers=2, class_size=1, dropout=0.5, rnn_type='lstm',dropout_bool=True) # Define optimization function optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) # optimize all rnn parameters # Define loss function loss_func = nn.MSELoss() # Start training for iter in range(10000): model.train() prediction = model(X_train_dep_std) loss = loss_func(prediction, y_train_dep_std) optimizer.zero_grad() # clear gradients for this training step loss.backward() # back propagation, compute gradients optimizer.step() if iter % 100 == 0: print("iteration: %s, loss: %s" % (iter, loss.item())) # Save model save_filename = 'checkpoints/LSTM_DOUBLE_FC.pth' torch.save(model, save_filename) print('Saved as %s' % save_filename) # Start evaluating model model.eval()
def main(args): this_dir = osp.join(osp.dirname(__file__), '.') os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') datasets = { phase: DataLayer( data_root=osp.join(args.data_root, phase), phase=phase, ) for phase in args.phases } data_loaders = { phase: data.DataLoader( datasets[phase], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, ) for phase in args.phases } model = Model( input_size=args.input_size, hidden_size=args.hidden_size, bidirectional=args.bidirectional, num_classes=args.num_classes, ).apply(utl.weights_init).to(device) criterion = nn.CrossEntropyLoss().to(device) softmax = nn.Softmax(dim=1).to(device) optimizer = optim.RMSprop(model.parameters(), lr=args.lr) for epoch in range(args.start_epoch, args.start_epoch + args.epochs): losses = {phase: 0.0 for phase in args.phases} corrects = {phase: 0.0 for phase in args.phases} start = time.time() for phase in args.phases: training = 'Test' not in phase if training: model.train(True) else: if epoch in args.test_intervals: model.train(False) else: continue with torch.set_grad_enabled(training): for batch_idx, (spatial, temporal, length, target) in enumerate(data_loaders[phase]): spatial_input = torch.zeros(*spatial.shape) temporal_input = torch.zeros(*temporal.shape) target_input = [] length_input = [] index = utl.argsort(length)[::-1] for i, idx in enumerate(index): spatial_input[i] = spatial[idx] temporal_input[i] = temporal[idx] target_input.append(target[idx]) length_input.append(length[idx]) spatial_input = spatial_input.to(device) temporal_input = temporal_input.to(device) target_input = torch.LongTensor(target_input).to(device) pack1 = pack_padded_sequence(spatial_input, length_input, batch_first=True) pack2 = pack_padded_sequence(temporal_input, length_input, batch_first=True) score = model(pack1, pack2) loss = criterion(score, target_input) losses[phase] += loss.item() * target_input.shape[0] if args.debug: print(loss.item()) if training: optimizer.zero_grad() loss.backward() optimizer.step() else: pred = torch.max(softmax(score), 1)[1].cpu() corrects[phase] += torch.sum( pred == target_input.cpu()).item() end = time.time() print('Epoch {:2} | ' 'Train loss: {:.5f} Val loss: {:.5f} | ' 'Test loss: {:.5f} accuracy: {:.5f} | ' 'running time: {:.2f} sec'.format( epoch, losses['Train'] / len(data_loaders['Train'].dataset), losses['Validation'] / len(data_loaders['Validation'].dataset), losses['Test'] / len(data_loaders['Test'].dataset), corrects['Test'] / len(data_loaders['Test'].dataset), end - start, )) if epoch in args.test_intervals: torch.save( model.state_dict(), osp.join(this_dir, './state_dict-epoch-' + str(epoch) + '.pth'))
criterion = nn.CrossEntropyLoss() print('Loading dataset...') trainset = MSCOCO(VOCAB_SIZE, train_imagepaths_and_captions, transform_train) trainloader = torch.utils.data.DataLoader(dataset=trainset, batch_size=BATCH_SIZE, collate_fn=collate_fn, shuffle=True, drop_last=False, num_workers=NUM_WORKERS) valset = MSCOCO_VAL(VOCAB_SIZE, val_imagepaths_and_captions, transform_val) valloader = torch.utils.data.DataLoader(dataset=valset, batch_size=BATCH_SIZE, collate_fn=collate_fn_val, shuffle=False, drop_last=False, num_workers=NUM_WORKERS) writer = SummaryWriter(log_dir) for epoch in range(current_epoch, EPOCHS+1): start_time_epoch = time.time() encoder.train() decoder.train() print('[%d] epoch starts training...'%epoch) trainloss = 0.0 for batch_idx, (images, captions, lengths) in enumerate(trainloader, 1): images = images.cuda() captions = captions.cuda() lengths = lengths.cuda() # when doing forward propagation, we do not input end word key; when calculating loss, we do not count start word key. lengths -= 1 # throw out the start word key when calculating loss. targets = rnn_utils.pack_padded_sequence(captions[:, 1:], lengths, batch_first=True)[0] encoder.zero_grad() decoder.zero_grad()
class TextClassifier: def __init__(self, batch_size, iterations, initial_lr, hidden_size, dropout, kernel_sz, num_layers): self.use_cuda = torch.cuda.is_available() self.device = torch.device('cuda:0' if self.use_cuda else 'cpu') self.data = DataReader() train_iter, val_iter, test_iter = self.data.init_dataset( batch_size, ('cuda:0' if self.use_cuda else 'cpu')) self.train_batch_loader = BatchGenerator(train_iter, 'text', 'label') self.val_batch_loader = BatchGenerator(val_iter, 'text', 'label') self.test_batch_loader = BatchGenerator(test_iter, 'text', 'label') # Store hyperparameters self.batch_size = batch_size self.iterations = iterations self.initial_lr = initial_lr # Create Model emb_size, emb_dim = self.data.TEXT.vocab.vectors.size() # padding = (math.floor(kernel_sz / 2), 0) # self.model = CNN(emb_size=emb_size, emb_dimension=emb_dim, # output_size=len(self.data.LABEL.vocab), # dropout=dropout, kernel_sz=kernel_sz, stride=1, padding=padding, # out_filters=hidden_size, pretrained_emb=self.data.TEXT.vocab.vectors) self.model = RNN(emb_size=emb_size, emb_dimension=emb_dim, pretrained_emb=self.data.TEXT.vocab.vectors, output_size=len(self.data.LABEL.vocab), num_layers=num_layers, hidden_size=hidden_size, dropout=dropout) if self.use_cuda: self.model.cuda() def train(self, min_stride=3): train_loss_hist = [] val_loss_hist = [] train_acc_hist = [] val_acc_hist = [] test_acc_hist = [] best_score = 0.0 loss = 0.0 for itr in range(self.iterations): print("\nIteration: " + str(itr + 1)) optimizer = optim.SGD(self.model.parameters(), lr=self.initial_lr) self.model.train() total_loss = 0.0 total_acc = 0.0 steps = 0 data_iter = iter(self.train_batch_loader) for i in range(len(self.train_batch_loader)): ((x_batch, x_len_batch), y_batch) = next(data_iter) # if torch.min(x_len_batch) > min_stride: optimizer.zero_grad() loss, logits = self.model.forward(x_batch, y_batch) acc = torch.sum(torch.argmax(logits, dim=1) == y_batch) total_loss += loss.item() total_acc += acc.item() steps += 1 loss.backward() optimizer.step() train_loss_hist.append(total_loss / steps) train_acc_hist.append(total_acc / len(self.data.train_data)) val_loss, val_acc = self.eval_model(self.val_batch_loader, len(self.data.val_data)) val_loss_hist.append(val_loss) val_acc_hist.append(val_acc) if val_acc > best_score: best_score = val_acc test_loss, test_acc = self.eval_model(self.test_batch_loader, len(self.data.test_data)) print("Train: {Loss: " + str(total_loss / steps) + ", Acc: " + str(total_acc / len(self.data.train_data)) + " }") print("Val: {Loss: " + str(val_loss) + ", Acc: " + str(val_acc) + " }") test_acc_hist.append(test_acc) return train_loss_hist, train_acc_hist, val_loss_hist, val_acc_hist, test_acc_hist def eval_model(self, batch_loader, N, min_stride=3): self.model.eval() total_loss = 0.0 total_acc = 0.0 steps = 0 batch_iterator = iter(batch_loader) with torch.no_grad(): for i in range(len(batch_loader)): ((x_batch, x_len_batch), y_batch) = next(batch_iterator) # if torch.min(x_len_batch) > min_stride: loss, logits = self.model(x_batch, y_batch) acc = torch.sum(torch.argmax(logits, dim=1) == y_batch) total_loss += loss.item() total_acc += acc.item() return (total_loss / N), (total_acc / N)
class Trainer: """ 训练 """ def __init__(self, _hparams): utils.set_seed(_hparams.fixed_seed) self.train_loader = get_train_loader(_hparams) self.val_loader = get_val_loader(_hparams) self.encoder = CNN().to(DEVICE) self.decoder = RNN(fea_dim=_hparams.fea_dim, embed_dim=_hparams.embed_dim, hid_dim=_hparams.hid_dim, max_sen_len=_hparams.max_sen_len, vocab_pkl=_hparams.vocab_pkl).to(DEVICE) self.loss_fn = nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.get_params(), lr=_hparams.lr) self.writer = SummaryWriter() self.max_sen_len = _hparams.max_sen_len self.val_cap = _hparams.val_cap self.ft_encoder_lr = _hparams.ft_encoder_lr self.ft_decoder_lr = _hparams.ft_decoder_lr self.best_CIDEr = 0 def fine_tune_encoder(self, fine_tune_epochs, val_interval, save_path, val_path): print('*' * 20, 'fine tune encoder for', fine_tune_epochs, 'epochs', '*' * 20) self.encoder.fine_tune() self.optimizer = torch.optim.Adam([ { 'params': self.encoder.parameters(), 'lr': self.ft_encoder_lr }, { 'params': self.decoder.parameters(), 'lr': self.ft_decoder_lr }, ]) self.training(fine_tune_epochs, val_interval, save_path, val_path) self.encoder.froze() print('*' * 20, 'fine tune encoder complete', '*' * 20) def get_params(self): """ 模型需要优化的全部参数,此处encoder暂时设计不用训练,故不加参数 :return: """ return list(self.decoder.parameters()) def training(self, max_epochs, val_interval, save_path, val_path): """ 训练 :param val_path: 保存验证过程生成句子的路径 :param save_path: 保存模型的地址 :param val_interval: 验证的间隔 :param max_epochs: 最大训练的轮次 :return: """ print('*' * 20, 'train', '*' * 20) for epoch in range(max_epochs): self.set_train() epoch_loss = 0 epoch_steps = len(self.train_loader) for step, (img, cap, cap_len) in tqdm(enumerate(self.train_loader)): # batch_size * 3 * 224 * 224 img = img.to(DEVICE) cap = cap.to(DEVICE) self.optimizer.zero_grad() features = self.encoder.forward(img) outputs = self.decoder.forward(features, cap) outputs = pack_padded_sequence(outputs, cap_len - 1, batch_first=True)[0] targets = pack_padded_sequence(cap[:, 1:], cap_len - 1, batch_first=True)[0] train_loss = self.loss_fn(outputs, targets) epoch_loss += train_loss.item() train_loss.backward() self.optimizer.step() epoch_loss /= epoch_steps self.writer.add_scalar('epoch_loss', epoch_loss, epoch) print('epoch_loss: {}, epoch: {}'.format(epoch_loss, epoch)) if (epoch + 1) % val_interval == 0: CIDEr = self.validating(epoch, val_path) if self.best_CIDEr <= CIDEr: self.best_CIDEr = CIDEr self.save_model(save_path, epoch) def save_model(self, save_path, train_epoch): """ 保存最好的模型 :param save_path: 保存模型文件的地址 :param train_epoch: 当前训练的轮次 :return: """ model_state_dict = { 'encoder_state_dict': self.encoder.state_dict(), 'decoder_state_dict': self.decoder.state_dict(), 'tran_epoch': train_epoch, } print('*' * 20, 'save model to: ', save_path, '*' * 20) torch.save(model_state_dict, save_path) def validating(self, train_epoch, val_path): """ 验证 :param val_path: 保存验证过程生成句子的路径 :param train_epoch: 当前训练的epoch :return: """ print('*' * 20, 'validate', '*' * 20) self.set_eval() sen_json = [] with torch.no_grad(): for val_step, (img, img_id) in tqdm(enumerate(self.val_loader)): img = img.to(DEVICE) features = self.encoder.forward(img) sens, _ = self.decoder.sample(features) sen_json.append({'image_id': int(img_id), 'caption': sens[0]}) with open(val_path, 'w') as f: json.dump(sen_json, f) result = coco_eval(self.val_cap, val_path) scores = {} for metric, score in result: scores[metric] = score self.writer.add_scalar(metric, score, train_epoch) return scores['CIDEr'] def set_train(self): self.encoder.train() self.decoder.train() def set_eval(self): self.encoder.eval() self.decoder.eval()
num_epoch = 100 lr = 0.002 rnn = RNN("gru", hidden_size, embedding_size, lr=lr) rnn.build_graph() sess = tf.Session() sess.run(tf.global_variables_initializer()) for epoch in range(num_epoch): for _ in range(num_batch_per_epoch): batch = data.get_train_batch(batch_size) batch_X = [ np.array([wv[w] for w in tweet.words]) for tweet in batch ] batch_y = [int(tweet.label == "bull") for tweet in batch] _, _ = rnn.train(batch_X, batch_y, sess) ## accuracy check, train, valid, test sets train_X = [ np.array([wv[w] for w in tweet.words]) for tweet in data.train ] train_y = [int(tweet.label == "bull") for tweet in data.train] train_ent, train_acc = rnn.cal_accuracy(train_X, train_y, sess) valid_X = [ np.array([wv[w] for w in tweet.words]) for tweet in data.valid ] valid_y = [int(tweet.label == "bull") for tweet in data.valid] _, valid_acc = rnn.cal_accuracy(valid_X, valid_y, sess) test_X = [
lr = 0.002 embedding_size = 100 ## embedding rnn = RNN("gru", hidden_size, embedding_size, lr=lr) rnn.build_graph(embedding=True, vocab_size=len(w2i), embedding_size=100) sess = tf.Session() sess.run(tf.global_variables_initializer()) for epoch in range(num_epoch): for _ in range(num_batch_per_epoch): batch = data.get_train_batch(batch_size) batch_X = [np.array(tweet.word_indexes) for tweet in batch] batch_y = [int(tweet.label=="bull") for tweet in batch] _, _= rnn.train(batch_X, batch_y, sess, embedding=True) ## accuracy check, train, valid, test sets train_X = [np.array(tweet.word_indexes) for tweet in data.train] train_y = [int(tweet.label == "bull") for tweet in data.train] train_ent, train_acc = rnn.cal_accuracy(train_X, train_y, sess, embedding=True) valid_X = [np.array(tweet.word_indexes) for tweet in data.valid] valid_y = [int(tweet.label == "bull") for tweet in data.valid] _, valid_acc = rnn.cal_accuracy(valid_X, valid_y, sess, embedding=True) test_X = [np.array(tweet.word_indexes) for tweet in data.test] test_y = [int(tweet.label == "bull") for tweet in data.test] _, test_acc = rnn.cal_accuracy(test_X, test_y, sess, embedding=True) print("Current epoch", epoch, "\tCross entropy", train_ent,
import models import torch import torch.nn.functional as F from torch.utils.model_zoo import load_url from base64 import b64encode import age_class inception_url = 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth' cnn = getCNN() cnn.load_state_dict(load_url(inception_url, map_location=torch.device('cpu'))) cnn = cnn.train(False) rnn = RNN() rnn.load_state_dict(torch.load('net_param.pt', torch.device('cpu'))) rnn = rnn.train(False) emotions = CNN_emotions() emotions.load_state_dict(torch.load('emotions.pth', torch.device('cpu'))) emotions = emotions.train(False) vocabulary = models.vacabulary batch_of_captions_into_matrix = models.batch_of_captions_into_matrix app = Flask(__name__) tags = { 0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy',