train_data_944K = data.encode_phrase_pairs_RLTR(phrase_pairs_944K, emb_dict) train_data_944K = data.group_train_data_RLTR_for_support(train_data_944K) dict944k = data.get944k(DICT_944K) log.info("Reading dict944k from %s is done. %d pairs in dict944k.", DICT_944K, len(dict944k)) dict944k_weak = data.get944k(DICT_944K_WEAK) log.info( "Reading dict944k_weak from %s is done. %d pairs in dict944k_weak", DICT_944K_WEAK, len(dict944k_weak)) rand = np.random.RandomState(data.SHUFFLE_SEED) rand.shuffle(train_data) train_data, test_data = data.split_train_test(train_data, TRAIN_RATIO) log.info("Training data converted, got %d samples", len(train_data)) log.info("Train set has %d phrases, test %d", len(train_data), len(test_data)) log.info("Batch size is %d", args.batches) log.info("Number of the support sets %d", args.supportsets) if (args.att): log.info("Using attention mechanism to train the SEQ2SEQ model...") else: log.info("Train the SEQ2SEQ model without attention mechanism...") if (args.lstm): log.info("Using LSTM mechanism to train the SEQ2SEQ model...") else: log.info("Using RNN mechanism to train the SEQ2SEQ model...") if (args.embed_grad): log.info(
args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") saves_path = os.path.join(SAVES_DIR, args.name) os.makedirs(saves_path, exist_ok=True) phrase_pairs, emb_dict = \ data.load_data(genre_filter=args.data) log.info("Obtained %d phrase pairs with %d uniq words", len(phrase_pairs), len(emb_dict)) data.save_emb_dict(saves_path, emb_dict) end_token = emb_dict[data.END_TOKEN] train_data = data.encode_phrase_pairs(phrase_pairs, emb_dict) rand = np.random.RandomState(data.SHUFFLE_SEED) rand.shuffle(train_data) train_data, test_data = data.split_train_test(train_data) log.info("Training data converted, got %d samples", len(train_data)) train_data = data.group_train_data(train_data) test_data = data.group_train_data(test_data) log.info("Train set has %d phrases, test %d", len(train_data), len(test_data)) rev_emb_dict = {idx: word for word, idx in emb_dict.items()} net = model.PhraseModel(emb_size=model.EMBEDDING_DIM, dict_size=len(emb_dict), hid_size=model.HIDDEN_STATE_SIZE).to(device) log.info("Model: %s", net) writer = SummaryWriter(comment="-" + args.name) net.load_state_dict(torch.load(args.load))
args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") saves_path = os.path.join(SAVES_DIR, args.name) os.makedirs(saves_path, exist_ok=True) phrase_pairs, emb_dict = data.load_data(genre_filter=args.data) log.info("Obtained %d phrase pairs with %d uniq words", len(phrase_pairs), len(emb_dict)) data.save_emb_dict(saves_path, emb_dict) end_token = emb_dict[data.END_TOKEN] train_data = data.encode_phrase_pairs(phrase_pairs, emb_dict) rand = np.random.RandomState(data.SHUFFLE_SEED) rand.shuffle(train_data) log.info("Training data converted, got %d samples", len(train_data)) train_data, test_data = data.split_train_test(train_data) log.info("Train set has %d phrases, test %d", len(train_data), len(test_data)) net = model.PhraseModel(emb_size=model.EMBEDDING_DIM, dict_size=len(emb_dict), hid_size=model.HIDDEN_STATE_SIZE).to(device) log.info("Model: %s", net) writer = SummaryWriter(comment="-" + args.name) optimiser = optim.Adam(net.parameters(), lr=LEARNING_RATE) best_bleu = None for epoch in range(MAX_EPOCHES): losses = [] bleu_sum = 0.0 bleu_count = 0 for batch in data.iterate_batches(train_data, BATCH_SIZE):