def _gcnn_block(input): output = GatedCNN()(input) return output
word_counter = collections.Counter(words).most_common(vocab_size - 1) vocab = [w for w, _ in word_counter] w2i = dict((w, i) for i, w in enumerate(vocab, 1)) w2i['<unk>'] = 0 print('vocab_size', vocab_size) print('w2i size', len(w2i)) data = [w2i[w] if w in w2i else 0 for w in words] data = create_batches(data, batch_size, seq_len) split_idx = int(len(data) * 0.8) training_data = data[:split_idx] test_data = data[split_idx:] print('train samples:', len(training_data)) print('test samples:', len(test_data)) model = GatedCNN(seq_len, vocab_size, embd_size, n_layers, kernel, out_chs, res_block_count, vocab_size) cuda = None if torch.cuda.is_available(): print("cuda") model.cuda() cuda = True else: cuda = False distributed_mode = False if distributed_mode: # sampler = DistributedSampler(training_data, num_replicas=world_size, rank=rank) if cuda: model = DataParallel(model) else: model = DistributedDataParallelCPU(model)
def train(gpu, args): rank = args.nr * args.gpus + gpu dist.init_process_group(backend='nccl', init_method='env://', world_size=args.world_size, rank=rank) torch.manual_seed(0) words = read_words( '/users/PAS1588/liuluyu0378/lab1/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled', seq_len, kernel[0]) word_counter = collections.Counter(words).most_common(vocab_size - 1) vocab = [w for w, _ in word_counter] w2i = dict((w, i) for i, w in enumerate(vocab, 1)) w2i['<unk>'] = 0 print('vocab_size', vocab_size) print('w2i size', len(w2i)) data = [w2i[w] if w in w2i else 0 for w in words] data = create_batches(data, batch_size, seq_len) split_idx = int(len(data) * 0.8) training_data = data[:split_idx] test_data = data[split_idx:] print('train samples:', len(training_data)) print('test samples:', len(test_data)) model = GatedCNN(seq_len, vocab_size, embd_size, n_layers, kernel, out_chs, res_block_count, vocab_size) torch.cuda.set_device(gpu) model.cuda(gpu) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(gpu) optimizer = torch.optim.SGD(model.parameters(), 1e-4) # Wrap the model model = nn.parallel.DataParallel(model, device_ids=[gpu]) print("model transfered") optimizer = torch.optim.Adadelta(model.parameters()) loss_fn = nn.NLLLoss() # Data loading code train_sampler = torch.utils.data.distributed.DistributedSampler( training_data, num_replicas=args.world_size, rank=rank) train_loader = torch.utils.data.DataLoader(dataset=training_data, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True, sampler=train_sampler) start = datetime.now() total_step = len(train_loader) print("loaded") for epoch in range(args.epochs): a = time.time() print('----epoch', epoch) # random.shuffle(data) # print(len(data)) for batch_ct, (X, Y) in enumerate(train_loader): X = to_var(torch.LongTensor(X)) # (bs, seq_len) Y = to_var(torch.LongTensor(Y)) # (bs,) # print(X.size(), Y.size()) # print(X) # print(batch_ct, X.size(), Y.size()) pred = model(X) # (bs, ans_size) # _, pred_ids = torch.max(pred, 1) loss = loss_fn(pred, Y) if batch_ct % 100 == 0: print('loss: {:.4f}'.format(loss.data.item())) optimizer.zero_grad() loss.backward() optimizer.step() b = time.time() print('current performance at epoch', epoch, "time:", b - a) if gpu == 0: print("Training complete in: " + str(datetime.now() - start))
cnn = TextCNN( sequence_length=x_train.shape[1], num_classes=len(y_train[1]), vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda, learning_rate = FLAGS.learning_rate) elif FLAGS.model == "gate_cnn": from gated_cnn import GatedCNN cnn = GatedCNN( sequence_length=x_train.shape[1], num_classes=len(y_train[1]), vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda, learning_rate = FLAGS.learning_rate) elif FLAGS.model == "gate_cnn_nopadding": from gated_cnn_nopadding import GatedCNN_nopadding cnn = GatedCNN_nopadding( sequence_length=x_train.shape[1], num_classes=len(y_train[1]), vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda, learning_rate = FLAGS.learning_rate)