Exemple #1
0
batch_size = tc.batch_size
test_batch_size = tc.test_batch_size
display_itr = tc.display_itr

train_dataset = ImplicitDatasetWithExplicitConversion(
    raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train')
val_dataset = ImplicitDatasetWithExplicitConversion(
    raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val')
test_dataset = ImplicitDatasetWithExplicitConversion(
    raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test')

model = CML(batch_size=batch_size, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(),
            dim_embed=20, opt='Adam', sess_config=config)
sampler = PairwiseSamplerWithExplicitConversion(
    dataset=train_dataset, batch_size=batch_size, num_process=3)
model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size,
                                     train_dataset=train_dataset, model=model, sampler=sampler)

auc_evaluator = AUC()
recall_evaluator = Recall(recall_at=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
precision_evaluator = Precision(
    precision_at=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
ndcg_evaluator = NDCG(ndcg_at=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100])

print(((str(datetime.datetime.now())).split('.')[0]).split(' ')[
      1] + ' ' + ((str(datetime.datetime.now())).split('.')[0]).split(' ')[0])
print(fileToLoad)
model_trainer.train(num_itr=int(10), display_itr=display_itr, eval_datasets=[val_dataset],
                    evaluators=[auc_evaluator, recall_evaluator, precision_evaluator, ndcg_evaluator])

print("Save")
model.save("./model", 3)
Exemple #2
0
model = VisualCML(batch_size=batch_size,
                  max_user=raw_data['max_user'],
                  max_item=raw_data['max_item'],
                  l2_reg=0.001,
                  l2_reg_mlp=0.001,
                  dropout_rate=0.5,
                  dim_embed=50,
                  item_f_source=raw_data['item_features'],
                  dims=[1028, 128, 50],
                  sess_config=sess_config,
                  opt='Adam')
sampler = PairwiseSampler(batch_size=batch_size,
                          dataset=train_dataset,
                          num_process=5)
model_trainer = ImplicitModelTrainer(batch_size=batch_size,
                                     test_batch_size=test_batch_size,
                                     item_serving_size=item_serving_size,
                                     train_dataset=train_dataset,
                                     model=model,
                                     sampler=sampler)

auc_evaluator = AUC()
recall_evaluator = Recall(recall_at=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100])

model_trainer.train(num_itr=int(1e5),
                    display_itr=display_itr,
                    eval_datasets=[val_dataset, test_dataset],
                    evaluators=[auc_evaluator, recall_evaluator],
                    num_negatives=1000)
Exemple #3
0
test_batch_size = 100
display_itr = 10000

print 'yuck'
train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train')
print 'h'
val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val')
print 'e'
test_dataset = ImplicitDataset(raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test')
print 'here 1'

youtube_model = YoutubeRec(batch_size=batch_size, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), 
                dim_embed=20, sess_config=sess_config, opt='Adam')

# bpr_model = FeatureBasedBPR(batch_size=batch_size, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), 
#                 dim_embed=20, opt='Adam', sess_config=sess_config)

print 'here 2'
sampler = GeneralSampler(batch_size=batch_size, dataset=train_dataset, num_process=1, genre_f = raw_data['song_to_genre'])
print 'here 3'
model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, 
    train_dataset=train_dataset, model=youtube_model, sampler=sampler)
print 'here 4'
auc_evaluator = AUC()
print 'here 5'
model_trainer.train(num_itr=int(1e6), display_itr=display_itr, eval_datasets=[val_dataset, test_dataset],
                    evaluators=[auc_evaluator])
print 'here 6'


Exemple #4
0
                                  name='Val')
    test_dataset = ImplicitDataset(raw_data=csv,
                                   max_user=max_users,
                                   max_item=max_items,
                                   name='Test')

    bpr_model = BPR(batch_size=1000,
                    max_user=train_dataset.max_user(),
                    max_item=train_dataset.max_item(),
                    dim_embed=20,
                    opt='Adam')

    print("before sampler")
    sampler = PairwiseSampler(batch_size=1000, dataset=train_dataset)
    print("after sampler")

    auc_evaluator = AUC()
    print("after evaluator")

    model_trainer = ImplicitModelTrainer(batch_size=1000,
                                         test_batch_size=100,
                                         train_dataset=train_dataset,
                                         model=bpr_model,
                                         sampler=sampler)
    print("after implicit")

    model_trainer.train(num_itr=10,
                        display_itr=10,
                        eval_datasets=[val_dataset, test_dataset],
                        evaluators=[auc_evaluator])
                      l2_reg=l2_reg)
    #Sampler
    sampler = PointwiseWSampler(batch_size=batch_size,
                                dataset=train_dataset,
                                num_process=num_process,
                                exp_factor=exp_factor,
                                pos_ratio=pos_ratio)
    #Model Trainer
    eval_save_prefix = '/home/ubuntu/openrec/models/fhmf_models/{0}_{1}_{2}_{3}_{4}_{5}_{6}_{7}_{8}_{9}/'.format(
        sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5],
        sys.argv[6], sys.argv[7], sys.argv[8], sys.argv[9], sys.argv[10])
    if dataset_name == "amazon_book":
        eval_save_prefix = None
    model_trainer = ImplicitModelTrainer(batch_size=batch_size,
                                         test_batch_size=test_batch_size,
                                         train_dataset=train_dataset,
                                         model=fhmf_model,
                                         sampler=sampler,
                                         eval_save_prefix=eval_save_prefix)
    #Evaluators
    auc_evaluator = AUC()
    recall_evaluator = Recall(recall_at=[10, 50, 100, 300])
    #mse_evaluator = MSE()
    #Train
    model_trainer.train(num_itr=num_itr,
                        display_itr=display_itr,
                        eval_datasets=[focused_val_set, focused_test_set],
                        evaluators=[auc_evaluator, recall_evaluator],
                        num_negatives=num_negatives)

elif recommender == "PMF":
Exemple #6
0
                              name='Val')
test_dataset = ImplicitDataset(raw_data['test_data'],
                               raw_data['max_user'],
                               raw_data['max_item'],
                               name='Test')

ncml_model = NCML(batch_size=batch_size,
                  max_user=train_dataset.max_user(),
                  max_item=train_dataset.max_item(),
                  dim_embed=20,
                  neg_num=5,
                  l2_reg=None,
                  opt='Adam',
                  sess_config=None)
sampler = NPairwiseSampler(batch_size=batch_size,
                           dataset=train_dataset,
                           negativenum=5,
                           num_process=5)
model_trainer = ImplicitModelTrainer(batch_size=batch_size,
                                     test_batch_size=test_batch_size,
                                     train_dataset=train_dataset,
                                     model=ncml_model,
                                     sampler=sampler)
auc_evaluator = AUC()

model_trainer.train(num_itr=int(1e5),
                    display_itr=display_itr,
                    eval_datasets=[val_dataset],
                    evaluators=[auc_evaluator],
                    num_negatives=200)
Exemple #7
0
def run_full_eval(user_id):
    global model_trainer
    global test_dataset
    global test1Temp

    test1Temp = []
    # final results array to populate
    results = [None, None, None, None, None]

    USER = user_id
    # set up user's test set
    for entry in raw_data['test_data']:
        if entry['user_id'] == USER:
            test1Temp.append(entry)

    uniq = np.array(np.unique(test1Temp))
    raw_data['test_1_data'] = uniq

    test_dataset = ImplicitDatasetWithExplicitConversion(
        raw_data['test_1_data'],
        raw_data['max_user'],
        raw_data['max_item'],
        name='Test')

    sampler = PairwiseSamplerWithExplicitConversion(dataset=test_dataset,
                                                    batch_size=tc.batch_size,
                                                    num_process=3)

    model_trainer = ImplicitModelTrainer(batch_size=tc.batch_size,
                                         test_batch_size=tc.test_batch_size,
                                         train_dataset=test_dataset,
                                         model=combined_recommender,
                                         sampler=sampler)

    model_trainer._eval_manager = ImplicitEvalManager(evaluators=evaluators)
    model_trainer._excluded_positives = {}
    model_trainer._excluded_positives[USER] = set()

    # evaluate each individual model

    for x in range(0, 3):
        ensemble = [0, 0, 0]
        print('individual ensemble ', end='')
        print(ensemble)
        ensemble[x] = 1
        combined_recommender.set_ensemble(ensemble)
        ind_results = model_trainer._evaluate_full(test_dataset)
        results[x] = ind_results

    # genetic evolution

    random.seed(64)
    pop = toolbox.population(n=POP_SIZE)
    print("=== EVOLUTION PHASE ===")

    fitnesses = list(map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit

    print("Evaluated %i individuals" % len(pop))
    fits = [ind.fitness.values[0] for ind in pop]

    gen_count = 0
    glob_min = 1
    glob_max = 0

    while max(fits) < 2 and gen_count < MAX_GENERATIONS:
        gen_count = gen_count + 1
        print('gen')
        offspring = toolbox.select(pop, len(pop))

        offspring = list(map(toolbox.clone, offspring))

        for child1, child2 in zip(offspring[::2], offspring[1::2]):

            # cross two individuals with probability CXPB
            if random.random() < CROSS_OVER_PROBABILITY:
                toolbox.mate(child1, child2)

                # fitness values of the children
                # must be recalculated later
                del child1.fitness.values
                del child2.fitness.values
            for mutant in offspring:
                # mutate an individual with probability MUTPB
                if random.random() < MUTATION_PROBABILITY:
                    toolbox.mutate(mutant)
                    del mutant.fitness.values

                    # Evaluate the individuals with an invalid fitness
            invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
            fitnesses = map(toolbox.evaluate, invalid_ind)
            for ind, fit in zip(invalid_ind, fitnesses):
                ind.fitness.values = fit

            print("  Evaluated %i individuals" % len(invalid_ind))

            # The population is entirely replaced by the offspring
            pop[:] = offspring

            # Gather all the fitnesses in one list and print the stats
            fits = [ind.fitness.values[0] for ind in pop]

            length = len(pop)
            glob_min = min(glob_min, min(fits))
            glob_max = max(glob_max, max(fits))
            mean = sum(fits) / length
            sum2 = sum(x * x for x in fits)
            std = abs(sum2 / length - mean**2)**0.5

    best_individual = tools.selBest(pop, 1)[0]
    print("Best individual is %s, %s" %
          (best_individual, best_individual.fitness.values))
    results[3] = best_individual
    combined_recommender.set_ensemble(best_individual)
    results[4] = model_trainer._evaluate_full(test_dataset)
    return results