def main(_): print("\nParameters: ") for k, v in sorted(FLAGS.__flags.items()): print("{} = {}".format(k, v)) if not os.path.exists("./prepro/"): os.makedirs("./prepro/") if FLAGS.eval: print("Evaluation...") feats, test_id = data_utils.load_test_data(FLAGS.test_id, FLAGS.test_dir) vocab_processor = VocabularyProcessor.restore(FLAGS.vocab) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: model = load_model(sess, FLAGS.checkpoint_file, vocab_processor) sentences = greedy_inference(sess, model, feats, vocab_processor) # sentences = beam_search(sess, model, feats, vocab_processor) ans = [] for idx, sentence in enumerate(sentences): ans.append({"caption": sentence, "id": test_id[idx]}) json.dump(ans, open(FLAGS.output, 'w')) else: if FLAGS.prepro: print("Start preprocessing data...") vocab_processor, train_dict = data_utils.load_text_data( train_lab=FLAGS.train_lab, prepro_train_p=FLAGS.prepro_train, vocab_path=FLAGS.vocab) print("Vocabulary size: {}".format( len(vocab_processor._reverse_mapping))) print("Start dumping word2vec matrix...") w2v_W = data_utils.build_w2v_matrix(vocab_processor, FLAGS.w2v_data, FLAGS.vector_file, FLAGS.embedding_dim) else: train_dict = cPickle.load(open(FLAGS.prepro_train, 'rb')) vocab_processor = VocabularyProcessor.restore(FLAGS.vocab) w2v_W = cPickle.load(open(FLAGS.w2v_data, 'rb')) print("Start generating training data...") feats, encoder_in_idx, decoder_in = data_utils.gen_train_data( FLAGS.train_dir, FLAGS.train_lab, train_dict) print("Start generating validation data...") v_encoder_in, truth_captions = data_utils.load_valid( FLAGS.valid_dir, FLAGS.valid_lab) t_encoder_in = None files = None if FLAGS.task_dir != None: t_encoder_in, files = data_utils.load_task(FLAGS.task_dir) print('feats size: {}, training size: {}'.format( len(feats), len(encoder_in_idx))) print(encoder_in_idx.shape, decoder_in.shape) print(v_encoder_in.shape, len(truth_captions)) data = Data(feats, encoder_in_idx, decoder_in, v_encoder_in, truth_captions, t_encoder_in, files) model = CapGenModel(data, w2v_W, vocab_processor) model.build_model() model.train()
def run(dataset, DF_layers, DI_layers, n_negs, alpha, gpu='0'): import os os.environ['CUDA_VISIBLE_DEVICES'] = gpu print("##### {} Negative Samples experiment on {} DF: {} DI: {}".format( n_negs, dataset, DF_layers, DI_layers)) learning_rate = 0.0001 batch_size = 256 #embed_dim = 256 #factor_dim = 64 if torch.cuda.is_available(): device = torch.device('cuda') FloatTensor = torch.cuda.FloatTensor else: device = torch.device('cpu') FloatTensor = torch.FloatTensor manualSeed = 706 random.seed(manualSeed) torch.manual_seed(manualSeed) print('CUDA Available:', torch.cuda.is_available()) file_name = 'output/' + dataset + '_J-NCF_' + str(DF_layers) + '_' + str( DI_layers) + '_n_' + str(n_negs) + '.txt' output = open(file_name, 'w') # Datasets user_matrix, item_matrix, train_u, train_i, train_r, neg_candidates, u_cnt, user_rating_max = data_utils.load_train_data( dataset) if dataset == 'ml1m': epochs = 100 eval_batch_size = 100 * 151 test_users, test_items = data_utils.load_test_ml1m() elif dataset == 'ml100k': epochs = 100 eval_batch_size = 100 * 41 test_users, test_items = data_utils.load_test_data(dataset) elif dataset == 'yelp': epochs = 50 eval_batch_size = 100 * 81 test_users, test_items = data_utils.load_test_data(dataset) elif dataset == 'amusic': epochs = 100 eval_batch_size = 100 * 3 test_users, test_items = data_utils.load_test_data(dataset) elif dataset == 'agames': epochs = 100 eval_batch_size = 100 * 34 test_users, test_items = data_utils.load_test_data(dataset) n_users, n_items = user_matrix.shape[0], user_matrix.shape[1] user_array = user_matrix.toarray() item_array = item_matrix.toarray() user_idxlist, item_idxlist = list(range(n_users)), list(range(n_items)) # Model model = JNCF(DF_layers, DI_layers, n_users, n_items, 'concat').to(device) # 'multi' or 'concat' pair_loss_function = TOP1 # TOP1 or BPR point_loss_function = torch.nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=learning_rate) best_hr = 0.0 for epoch in range(epochs): # Train model.train() # Enable dropout (if have). idxlist = np.array(range(len(train_u))) np.random.shuffle(idxlist) epoch_loss, epoch_pair_loss, epoch_point_loss, epoch_i_point_loss, epoch_j_point_loss = .0, .0, .0, .0, .0 start_time = time.time() for batch_idx, start_idx in enumerate( range(0, len(idxlist), batch_size)): end_idx = min(start_idx + batch_size, len(idxlist)) idx = idxlist[start_idx:end_idx] u_ids = train_u.take(idx) i_ids = train_i.take(idx) i_ratings = train_r.take(idx) users = FloatTensor(user_array.take(u_ids, axis=0)) items = FloatTensor(item_array.take(i_ids, axis=0)) labels = FloatTensor(i_ratings) rating_max = FloatTensor(user_rating_max.take(u_ids, axis=0)) Y_ui = labels / rating_max # for Normalized BCE Y_uj = torch.zeros_like( Y_ui) # for Negative samples point-wise loss optimizer.zero_grad() point_loss, pair_loss = 0., 0. # Negative Sampling neg_items_list = [] for _ in range(0, n_negs): neg_items = one_negative_sampling(u_ids, neg_candidates) neg_items_list.append(neg_items) for ng_idx in range(0, n_negs): neg_ids = neg_items_list[ng_idx] items_j = FloatTensor(item_array.take(neg_ids, axis=0)) y_i, y_j = model(users, items, items_j) i_point_loss = point_loss_function(y_i, Y_ui) # positive items i j_point_loss = point_loss_function(y_j, Y_uj) # negative items j point_loss = i_point_loss + j_point_loss pair_loss = pair_loss_function(y_i, y_j, n_negs) loss = alpha * pair_loss + (1 - alpha) * point_loss epoch_loss += loss.item() epoch_pair_loss += pair_loss.item() epoch_point_loss += point_loss.item() epoch_i_point_loss += i_point_loss.item() epoch_j_point_loss += j_point_loss.item() loss.backward() optimizer.step() train_time = time.time() - start_time # Evaluate model.eval() HR, NDCG = [], [] time_E = time.time() for start_idx in range(0, len(test_users), eval_batch_size): end_idx = min(start_idx + eval_batch_size, len(test_users)) u_ids = test_users[start_idx:end_idx] i_ids = test_items[start_idx:end_idx] users = FloatTensor(user_array.take(u_ids, axis=0)) items = FloatTensor(item_array.take(i_ids, axis=0)) preds, _ = model(users, items, items) e_batch_size = eval_batch_size // 100 # faster eval preds = torch.chunk(preds.detach().cpu(), e_batch_size) chunked_items = torch.chunk(torch.IntTensor(i_ids), e_batch_size) for i, pred in enumerate(preds): _, indices = torch.topk(pred, 10) recommends = torch.take(chunked_items[i], indices).numpy().tolist() gt_item = chunked_items[i][0].item() HR.append(hit(gt_item, recommends)) NDCG.append(ndcg(gt_item, recommends)) eval_time = time.time() - time_E #if epoch % 10 == 0: e_loss = epoch_loss / (batch_idx + 1) e_pair = epoch_pair_loss / (batch_idx + 1) e_point = epoch_point_loss / (batch_idx + 1) e_i_point = epoch_i_point_loss / (batch_idx + 1) e_j_point = epoch_j_point_loss / (batch_idx + 1) text_1 = '[Epoch {:03d}]'.format(epoch) + '\ttrain: ' + time.strftime( '%M: %S', time.gmtime(train_time)) + '\tHR: {:.4f}\tNDCG: {:.4f}\n'.format( np.mean(HR), np.mean(NDCG)) text_2 = 'Loss: {:.6f}\tPair: {:.4f}\tPoint: {:.4f}\ti_point: {:.4f}\tj_point: {:.4f}\n'.format( e_loss, e_pair, e_point, e_i_point, e_j_point) print(text_1[:-1]) print(text_2[:-1]) output.write(text_1) output.write(text_2) if np.mean(HR) > best_hr: best_hr, best_ndcg, best_epoch = np.mean(HR), np.mean(NDCG), epoch result = 'DF: {} DI: {}. Best epoch {:02d}: HR = {:.4f}, NDCG = {:.4f}\n'.format( DF_layers, DI_layers, best_epoch, best_hr, best_ndcg) print(result[:-1]) output.write(result) output.close()
def train(self): batch_num = self.data.length // FLAGS.batch_size if self.data.length % FLAGS.batch_size == 0 else self.data.length // FLAGS.batch_size + 1 current_step = 0 with self.sess.as_default(): if FLAGS.checkpoint_file == "": self.sess.run(tf.global_variables_initializer()) else: self.saver.restore(sess, FLAGS.checkpoint_file) for ep in range(FLAGS.epoch): cost = 0. pbar = pb.ProgressBar( widget=[pb.Percentage(), pb.Bar(), pb.ETA()], maxval=batch_num).start() print("Epoch {}".format(ep + 1)) for b in range(batch_num): e_in, d_in_idx, d_out_idx = self.data.next_batch( FLAGS.batch_size) if self.model.__class__.__name__ == "CaptionGeneratorSS": rand_matrix = np.random.rand(d_in_idx.shape[0], d_in_idx.shape[1]) use_pred = rand_matrix > self.sigmoid_decay(ep) feed_dict = { self.model.e_in: e_in, self.model.d_in_idx: d_in_idx, self.model.d_out_idx: d_out_idx, self.model.use_pred: use_pred } else: feed_dict = { self.model.e_in: e_in, self.model.d_in_idx: d_in_idx, self.model.d_out_idx: d_out_idx } loss, step, _ = self.sess.run( [self.model.cost, self.global_step, self.updates], feed_dict=feed_dict) current_step = tf.train.global_step( self.sess, self.global_step) cost += loss / batch_num pbar.update(b + 1) pbar.finish() print(">>cost: {}".format(cost)) path = self.saver.save(self.sess, self.checkpoint_prefix, global_step=current_step) print("\nSaved model checkpoint to {}\n".format(path)) if FLAGS.pred_task: encoder_in = self.data.t_encoder_in else: encoder_in = self.data.v_encoder_in self.inference(encoder_in, task=FLAGS.pred_task) if self.model.__class__.__name__ != "CaptionGeneratorBasic": self.BeamSearch(encoder_in, task=FLAGS.pred_task) """ """ feats, test_id = data_utils.load_test_data(FLAGS.test_id, FLAGS.test_dir) sentences = self.inference(feats) with open("{}_Greedy_out.txt".format(FLAGS.hidden), 'w') as f: for idx, sentence in enumerate(sentences): f.write(test_id[idx] + ' :\t' + sentence + '\n') sentences = self.BeamSearch(feats) with open("{}_Beam_out.txt".format(FLAGS.hidden), 'w') as f: for idx, sentence in enumerate(sentences): f.write(test_id[idx] + ' :\t' + sentence + '\n') """
elif model_name == '3': # K.set_learning_phase(1) model = Dave_dropout() save_model_name = './Model3.h5' else: print(bcolors.FAIL + 'invalid model name, must one of 1, 2 or 3' + bcolors.ENDC) # the data, shuffled and split between train and test sets train_generator, samples_per_epoch = load_train_data(batch_size=batch_size, shape=(100, 100)) # trainig model.fit_generator(train_generator, steps_per_epoch=math.ceil(samples_per_epoch * 1. / batch_size), epochs=nb_epoch, workers=8, use_multiprocessing=True) print(bcolors.OKGREEN + 'Model trained' + bcolors.ENDC) # evaluation K.set_learning_phase(0) test_generator, samples_per_epoch = load_test_data(batch_size=batch_size, shape=(100, 100)) model.evaluate_generator(test_generator, steps=math.ceil(samples_per_epoch * 1. / batch_size)) # save model model.save_weights(save_model_name)
from tensorflow.keras.models import Sequential from tensorflow.keras.layers import LSTM, Dense import numpy as np import data_utils data_dim = 128 timesteps = 128 train_data = data_utils.load_training_data() test_data = data_utils.load_test_data() # loader = data_utils.DataLoader(data=data,batch_size=train_config.batch_size, num_steps=train_config.num_steps) data_loader = data_utils.DataLoader(train_data, 7352, 1) # x_test, y_test = data_utils.DataLoader(train_data, 128, 1).next_batch() # expected input data shape: (batch_size, timesteps, data_dim) model = Sequential() model.add( LSTM(256, return_sequences=True, input_shape=(1, 1))) # returns a sequence of vectors of dimension 32 model.add(LSTM( 256, return_sequences=True)) # returns a sequence of vectors of dimension 32 model.add(LSTM( 256, return_sequences=True)) # return a single vector of dimension 32 model.add(Dense(128, activation="sigmoid", name="DENSE1")) model.add(Dense(72, activation="sigmoid", name="DENSE2")) model.add(Dense(1, activation='softmax'))
train_config = load_train_setup(args.train_id) trained_model = None PLOT = args.plot if args.train: # Load the train data train_ids, x_train, y_train = dsb.load_train_data( path_to_train="../input/train/", img_size=train_config["img_size"], num_channels=3) train_dataset = NpDataset(x=x_train, y=y_train, ids=train_ids) # train the models if not train_config["kfold"]: raise NotImplementedError("Non-kfold training is not implemented") trained_model = kfold(train_dataset, train_config, args.train_id, num_completed=args.num_completed) if args.test: # Load the test data test_ids, x_test, sizes_test = dsb.load_test_data( path_to_test="../input/test/", img_size=train_config["img_size"], num_channels=3) test_dataset = NpDataset(x=x_test, ids=test_ids) test(test_dataset, sizes_test, train_config, args.train_id, model=trained_model)
NUM_TRAIN = 670 NUM_TEST = 65 # In[4]: # Load the clusters train_cluster_ids = np.load("../clusters/train_clusters.npz") test_cluster_ids = np.load("../clusters/test_clusters.npz") in_set = np.vectorize(lambda a, s: a in s) # In[5]: # Load the training data test_ids, X_test, sizes_test = dsb.load_test_data(path_to_test=PATH_TO_TEST, img_size=None, num_channels=NUM_CHANNELS, mode='rgb') train_ids, X_train, Y_train = dsb.load_train_data(path_to_train=PATH_TO_TRAIN, img_size=None, num_channels=NUM_CHANNELS, mode='rgb') print("Number of training samples: %s" % len(train_ids)) print("X-train shape: {}".format(X_train.shape)) print("Y-train shape: {}".format(Y_train.shape)) print("X-test shape: {}".format(X_test.shape)) # Get indexes from clusters train_clusters = np.zeros(NUM_TRAIN, dtype=int) test_clusters = np.zeros(NUM_TEST, dtype=int) train_clusters[in_set(train_ids, {a