batch_size = tc.batch_size test_batch_size = tc.test_batch_size display_itr = tc.display_itr train_dataset = ImplicitDatasetWithExplicitConversion( raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train') val_dataset = ImplicitDatasetWithExplicitConversion( raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val') test_dataset = ImplicitDatasetWithExplicitConversion( raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test') model = CML(batch_size=batch_size, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), dim_embed=20, opt='Adam', sess_config=config) sampler = PairwiseSamplerWithExplicitConversion( dataset=train_dataset, batch_size=batch_size, num_process=3) model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, train_dataset=train_dataset, model=model, sampler=sampler) auc_evaluator = AUC() recall_evaluator = Recall(recall_at=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]) precision_evaluator = Precision( precision_at=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]) ndcg_evaluator = NDCG(ndcg_at=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]) print(((str(datetime.datetime.now())).split('.')[0]).split(' ')[ 1] + ' ' + ((str(datetime.datetime.now())).split('.')[0]).split(' ')[0]) print(fileToLoad) model_trainer.train(num_itr=int(10), display_itr=display_itr, eval_datasets=[val_dataset], evaluators=[auc_evaluator, recall_evaluator, precision_evaluator, ndcg_evaluator]) print("Save") model.save("./model", 3)
model = VisualCML(batch_size=batch_size, max_user=raw_data['max_user'], max_item=raw_data['max_item'], l2_reg=0.001, l2_reg_mlp=0.001, dropout_rate=0.5, dim_embed=50, item_f_source=raw_data['item_features'], dims=[1028, 128, 50], sess_config=sess_config, opt='Adam') sampler = PairwiseSampler(batch_size=batch_size, dataset=train_dataset, num_process=5) model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, item_serving_size=item_serving_size, train_dataset=train_dataset, model=model, sampler=sampler) auc_evaluator = AUC() recall_evaluator = Recall(recall_at=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]) model_trainer.train(num_itr=int(1e5), display_itr=display_itr, eval_datasets=[val_dataset, test_dataset], evaluators=[auc_evaluator, recall_evaluator], num_negatives=1000)
test_batch_size = 100 display_itr = 10000 print 'yuck' train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train') print 'h' val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val') print 'e' test_dataset = ImplicitDataset(raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test') print 'here 1' youtube_model = YoutubeRec(batch_size=batch_size, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), dim_embed=20, sess_config=sess_config, opt='Adam') # bpr_model = FeatureBasedBPR(batch_size=batch_size, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), # dim_embed=20, opt='Adam', sess_config=sess_config) print 'here 2' sampler = GeneralSampler(batch_size=batch_size, dataset=train_dataset, num_process=1, genre_f = raw_data['song_to_genre']) print 'here 3' model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, train_dataset=train_dataset, model=youtube_model, sampler=sampler) print 'here 4' auc_evaluator = AUC() print 'here 5' model_trainer.train(num_itr=int(1e6), display_itr=display_itr, eval_datasets=[val_dataset, test_dataset], evaluators=[auc_evaluator]) print 'here 6'
name='Val') test_dataset = ImplicitDataset(raw_data=csv, max_user=max_users, max_item=max_items, name='Test') bpr_model = BPR(batch_size=1000, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), dim_embed=20, opt='Adam') print("before sampler") sampler = PairwiseSampler(batch_size=1000, dataset=train_dataset) print("after sampler") auc_evaluator = AUC() print("after evaluator") model_trainer = ImplicitModelTrainer(batch_size=1000, test_batch_size=100, train_dataset=train_dataset, model=bpr_model, sampler=sampler) print("after implicit") model_trainer.train(num_itr=10, display_itr=10, eval_datasets=[val_dataset, test_dataset], evaluators=[auc_evaluator])
l2_reg=l2_reg) #Sampler sampler = PointwiseWSampler(batch_size=batch_size, dataset=train_dataset, num_process=num_process, exp_factor=exp_factor, pos_ratio=pos_ratio) #Model Trainer eval_save_prefix = '/home/ubuntu/openrec/models/fhmf_models/{0}_{1}_{2}_{3}_{4}_{5}_{6}_{7}_{8}_{9}/'.format( sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5], sys.argv[6], sys.argv[7], sys.argv[8], sys.argv[9], sys.argv[10]) if dataset_name == "amazon_book": eval_save_prefix = None model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, train_dataset=train_dataset, model=fhmf_model, sampler=sampler, eval_save_prefix=eval_save_prefix) #Evaluators auc_evaluator = AUC() recall_evaluator = Recall(recall_at=[10, 50, 100, 300]) #mse_evaluator = MSE() #Train model_trainer.train(num_itr=num_itr, display_itr=display_itr, eval_datasets=[focused_val_set, focused_test_set], evaluators=[auc_evaluator, recall_evaluator], num_negatives=num_negatives) elif recommender == "PMF":
name='Val') test_dataset = ImplicitDataset(raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test') ncml_model = NCML(batch_size=batch_size, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), dim_embed=20, neg_num=5, l2_reg=None, opt='Adam', sess_config=None) sampler = NPairwiseSampler(batch_size=batch_size, dataset=train_dataset, negativenum=5, num_process=5) model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, train_dataset=train_dataset, model=ncml_model, sampler=sampler) auc_evaluator = AUC() model_trainer.train(num_itr=int(1e5), display_itr=display_itr, eval_datasets=[val_dataset], evaluators=[auc_evaluator], num_negatives=200)
def run_full_eval(user_id): global model_trainer global test_dataset global test1Temp test1Temp = [] # final results array to populate results = [None, None, None, None, None] USER = user_id # set up user's test set for entry in raw_data['test_data']: if entry['user_id'] == USER: test1Temp.append(entry) uniq = np.array(np.unique(test1Temp)) raw_data['test_1_data'] = uniq test_dataset = ImplicitDatasetWithExplicitConversion( raw_data['test_1_data'], raw_data['max_user'], raw_data['max_item'], name='Test') sampler = PairwiseSamplerWithExplicitConversion(dataset=test_dataset, batch_size=tc.batch_size, num_process=3) model_trainer = ImplicitModelTrainer(batch_size=tc.batch_size, test_batch_size=tc.test_batch_size, train_dataset=test_dataset, model=combined_recommender, sampler=sampler) model_trainer._eval_manager = ImplicitEvalManager(evaluators=evaluators) model_trainer._excluded_positives = {} model_trainer._excluded_positives[USER] = set() # evaluate each individual model for x in range(0, 3): ensemble = [0, 0, 0] print('individual ensemble ', end='') print(ensemble) ensemble[x] = 1 combined_recommender.set_ensemble(ensemble) ind_results = model_trainer._evaluate_full(test_dataset) results[x] = ind_results # genetic evolution random.seed(64) pop = toolbox.population(n=POP_SIZE) print("=== EVOLUTION PHASE ===") fitnesses = list(map(toolbox.evaluate, pop)) for ind, fit in zip(pop, fitnesses): ind.fitness.values = fit print("Evaluated %i individuals" % len(pop)) fits = [ind.fitness.values[0] for ind in pop] gen_count = 0 glob_min = 1 glob_max = 0 while max(fits) < 2 and gen_count < MAX_GENERATIONS: gen_count = gen_count + 1 print('gen') offspring = toolbox.select(pop, len(pop)) offspring = list(map(toolbox.clone, offspring)) for child1, child2 in zip(offspring[::2], offspring[1::2]): # cross two individuals with probability CXPB if random.random() < CROSS_OVER_PROBABILITY: toolbox.mate(child1, child2) # fitness values of the children # must be recalculated later del child1.fitness.values del child2.fitness.values for mutant in offspring: # mutate an individual with probability MUTPB if random.random() < MUTATION_PROBABILITY: toolbox.mutate(mutant) del mutant.fitness.values # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in offspring if not ind.fitness.valid] fitnesses = map(toolbox.evaluate, invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit print(" Evaluated %i individuals" % len(invalid_ind)) # The population is entirely replaced by the offspring pop[:] = offspring # Gather all the fitnesses in one list and print the stats fits = [ind.fitness.values[0] for ind in pop] length = len(pop) glob_min = min(glob_min, min(fits)) glob_max = max(glob_max, max(fits)) mean = sum(fits) / length sum2 = sum(x * x for x in fits) std = abs(sum2 / length - mean**2)**0.5 best_individual = tools.selBest(pop, 1)[0] print("Best individual is %s, %s" % (best_individual, best_individual.fitness.values)) results[3] = best_individual combined_recommender.set_ensemble(best_individual) results[4] = model_trainer._evaluate_full(test_dataset) return results