Ejemplo n.º 1
0
    def __init__(self, embedding_file, graph_path, params={}):
        Evaluation.__init__(self)

        self._embedding_file = embedding_file
        self._graph_path = graph_path
        self._directed = params['directed'] if 'directed' in params else False

        self.results = None
Ejemplo n.º 2
0
def _analyze_hybrid(model: HybridModel, evaluater: Evaluation, train, test)\
        -> Tuple[EvaluationResultHybrid, EvaluationResultHybrid]:

    model.fit_init(*train)
    result_before_x = evaluater.evaluate_hybrid(model, *test)

    model.fit_cross()
    result_after_x = evaluater.evaluate_hybrid(model, *test)

    return result_before_x, result_after_x
Ejemplo n.º 3
0
def _analyze_hybrid_as_model(model: HybridModel, evaluater: Evaluation, train, test)\
        -> Tuple[EvaluationResult, EvaluationResult, EvaluationResult]:

    model.fit_init(*train)
    result_before_x = evaluater.evaluate_hybrid(model, *test)

    model.fit_cross()
    result_hybrid = evaluater.evaluate(model, *test)

    return result_hybrid, result_before_x.cf, result_before_x.md
Ejemplo n.º 4
0
def evaluate_hybrid_single(dataset: Dataset,
                           config,
                           coldstart=False,
                           cs_type='user',
                           n_entries=0,
                           evaluater=None,
                           n_fold=5):
    (inds_u, inds_i, y, users_features, items_features) = dataset.data

    if coldstart and cs_type == 'user':
        if n_entries == 0:
            fold = kfold.kfold_entries(n_fold, inds_u)
        else:
            fold = kfold.kfold_entries_plus(n_fold, inds_u, n_entries)
    elif coldstart and cs_type == 'item':
        if n_entries == 0:
            fold = kfold.kfold_entries(n_fold, inds_i)
        else:
            fold = kfold.kfold_entries_plus(n_fold, inds_i, n_entries)
    elif coldstart:
        raise ValueError('unknown cs_type')
    else:
        fold = kfold.kfold(n_fold, inds_u)

    fold = list(fold)

    xval_train, xval_test = fold[2]

    if evaluater is None:
        evaluater = Evaluation()

    # Dataset training
    inds_u_train = inds_u[xval_train]
    inds_i_train = inds_i[xval_train]
    y_train = y[xval_train]

    # Dataset testing
    inds_u_test = inds_u[xval_test]
    inds_i_test = inds_i[xval_test]
    y_test = y[xval_test]

    user_dist = np.bincount(inds_u_train, minlength=dataset.n_users)
    item_dist = np.bincount(inds_i_train, minlength=dataset.n_items)
    evaluater.update_parts(user_dist, item_dist)

    train = ([inds_u_train, inds_i_train], y_train)
    test = ([inds_u_test, inds_i_test], y_test)

    model = HybridModel(users_features, items_features, config)

    result_before, result_after = _analyze_hybrid(model, evaluater, train,
                                                  test)

    return result_before, result_after
Ejemplo n.º 5
0
    def __init__(self, config):
        BaseAgent.__init__(self, config)
        Evaluation.__init__(self, self.config.metrics_k)
        self.metrics_k = config.metrics_k
        self.state_config = self.config.hyperparameters["State"]
        self.embedding = self.get_embedding().to(self.device)
        self.embedding_dim = self.embedding.embedding_dim

        # Initialize state-module
        self.state_agg = RNNStateAgg(self.embedding,
                                     state_config=self.state_config,
                                     reward_range=[0, 1],
                                     with_rewards=False).to(self.device)
        self.state_size = self.state_agg.state_size
        self.state_optimizer = self.create_state_optimizer()

        if self.config.hyperparameters[
                "state-only-pretrain"] or self.config.hyperparameters[
                    "pretrain"]:
            save_dir = Path(config.file_to_save_model).parent / "pretrain"
            self.pretrain_model_saver = ModelSaver(save_dir)

        if self.config.hyperparameters["state-only-pretrain"]:
            self.output_layer = torch.nn.Linear(
                self.state_size,
                self.environment.action_space.n).to(self.device)
            self.output_layer_optimizer = torch.optim.Adam(
                self.output_layer.parameters())

        self.user_history_mask_items = None
        self.masking_enabled = self.hyperparameters.get("history_masking")

        self.model_saver = ModelSaver(Path(config.file_to_save_model).parent)
        self.exploration_strategy = Epsilon_Greedy_Exploration(self.config)

        self.last_done = np.zeros(self.environment.num_envs, dtype=np.bool)
Ejemplo n.º 6
0
def main(collection_path, stopwords_path):
    """Main Method that produces the interpolate curve Precision/Recall from the query set and the MAP"""

    #Creation of the CACM index
    index = IndexCACMMemory()
    index.parserCacm("CACM/cacm.all")
    index.parserCacm(collection_path)
    index.tokenizerCacm()
    index.manage_tokens_collectionCacm(stopwords_path)
    index.index_inverse()
    index.weight_calculation_index()

    #Get the queries under the dictionnary format from the file query.text file
    queries = loop_query_test()

    #List for recall precision interpolation
    L = []
    #average precision list
    ap_list = []
    #E F measures list
    EF_list = []
    #R precision list
    R_list = []

    for query in queries:
        relevant_doc = get_doc_relevants_query(query)
        A = Evaluation(query=queries[query], sample_test=relevant_doc)
        A.precision_for_relevant_doc(index)
        A.interpolate_rappel_precision()
        L.append(A.rappel_precision_interpolation)
        #Computing average precision
        ap_list += [A.average_precision(index)]
        #Computes E, F measures
        EF_list += [A.compute_measures(index, 20)]
        #Computes R precision
        R_list += [A.rprecision(index)]

    #Calculate mean average precision
    map_v = mean(ap_list)
    print("The mean average precision is {}".format(str(map_v)))

    #Calculate the average of each value of recall
    interpolate_general = calculate_average(L)

    #Make the interpolate curve Rappel-Precision
    rappels = list(interpolate_general.keys())
    precisions = list(interpolate_general.values())
    plt.scatter(rappels, precisions)
    plt.title('Precision/Recall interpolation')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.show()

    #Print E,F,R measures for the queries
    for i in range(len(queries)):
        if EF_list[i][0] == None:
            print(
                "Pour la requete {}: \n"
                "Elle ne comporte pas de documents pertinents et nous ne pouvons donc calculer les statistiques\n"
                .format(str(queries[i + 1])))
        else:
            print(
                "Pour la requête {0}:\n la E measure vaut {1}, la F measure vaut {2} et la R précision vaut {3}\n"
                .format(str(queries[i + 1]), str(EF_list[i][0]), EF_list[i][1],
                        R_list[i]))
Ejemplo n.º 7
0
def train(session_name=None):
    if session_name is None:
        session_name = input("Session name: ")

    config = Configuration()  # general settings
    data_sets = DataSet(config)  # data sets retrieval
    model = Cnn(config)  # model builder
    trainer = Training(config)  # training ops
    evaluation = Evaluation(config)  # evaluation ops
    logger = Logger(config)

    with tf.Graph().as_default():
        data_set = data_sets.get_data_sets(config.BATCH_SIZE)

        #   training
        print('Building model...')
        predictions_training = model.inference(x=data_set.training_set.x,
                                               mode_name=config.MODE.TRAINING)
        loss_training = evaluation.loss(predictions=predictions_training,
                                        labels=data_set.training_set.y,
                                        mode_name=config.MODE.TRAINING)
        accuracy_training = evaluation.accuracy(
            predictions=predictions_training,
            labels=data_set.training_set.y,
            mode_name=config.MODE.TRAINING)
        global_step_tensor = tf.contrib.framework.get_or_create_global_step()
        train_op = trainer.train(
            loss=loss_training,
            global_step=global_step_tensor,
            num_examples_per_epoch_for_train=data_set.training_set.size)

        tf.get_variable_scope().reuse_variables()

        #   validation
        predictions_validation = model.inference(
            x=data_set.validation_set.x, mode_name=config.MODE.VALIDATION)
        loss_validation = evaluation.loss(predictions=predictions_validation,
                                          labels=data_set.validation_set.y,
                                          mode_name=config.MODE.VALIDATION)
        accuracy_validation = evaluation.accuracy(
            predictions=predictions_validation,
            labels=data_set.validation_set.y,
            mode_name=config.MODE.VALIDATION)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        merged = tf.summary.merge_all()
        saver = tf.train.Saver()

        print('Starting session...')
        with tf.Session() as sess:
            summary_writer = tf.summary.FileWriter(
                config.OUTPUT_PATH + session_name, sess.graph)
            sess.run(init_op)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            sessions_helper = Sessions(config=config,
                                       session=sess,
                                       saver=saver,
                                       session_name=session_name,
                                       summary_writer=summary_writer,
                                       coordinator=coord,
                                       threads=threads)
            sessions_helper.restore()

            global_step = 0
            epoch = 0
            step = 0
            start_time = datetime.now()
            print('Starting training with {} epochs {} steps each...'.format(
                config.EPOCHS,
                int(data_set.training_set.size / config.BATCH_SIZE)))
            print()
            try:
                for epoch in range(config.EPOCHS):
                    for step in range(
                            int(data_set.training_set.size /
                                config.BATCH_SIZE)):
                        start_time_op = time.time()
                        _, summary, loss_training_value, accuracy_training_value = sess.run(
                            [
                                train_op, merged, loss_training,
                                accuracy_training
                            ])
                        duration = time.time() - start_time_op
                        global_step = tf.train.global_step(
                            sess, global_step_tensor)
                        logger.log(global_step=global_step,
                                   epoch=epoch + 1,
                                   step=step + 1,
                                   duration=duration,
                                   loss=loss_training_value,
                                   accuracy=accuracy_training_value,
                                   mode=config.MODE.TRAINING)

                        if global_step % config.LOG_PERIOD == 0:  # update tensorboard
                            summary_writer.add_summary(summary, global_step)

                        if global_step == 1 or global_step % config.SAVE_PERIOD == 0:  # save model
                            sessions_helper.save(
                                global_step_tensor=global_step_tensor,
                                message='Initial saving...')

                        if math.isnan(loss_training_value):
                            print("loss is NaN, breaking training...")
                            exit(-1)

                        if loss_training_value <= config.TARGET_LOSS:  # early stop with good results

                            sessions_helper.save(
                                global_step_tensor=global_step_tensor,
                                message=
                                'Model reached {} witch is less than target loss, saving model...'
                                .format(loss_training_value))

                            sessions_helper.end()

                            return session_name

                    # validate
                    loss_validation_value, accuracy_validation_value = sess.run(
                        [loss_validation, accuracy_validation])
                    logger.log(global_step=global_step,
                               epoch=epoch + 1,
                               step=step + 1,
                               duration=1,
                               loss=loss_validation_value,
                               accuracy=accuracy_validation_value,
                               mode=config.MODE.VALIDATION)

                sessions_helper.save(
                    global_step_tensor=global_step_tensor,
                    message='OutOfRangeError occurred, saving model...')
                print("Restarting training...")
                train(session_name)

            except KeyboardInterrupt:

                sessions_helper.save(
                    global_step_tensor=global_step_tensor,
                    message='User requested to stop training, saving model...')
                sessions_helper.end()

                return session_name

            sessions_helper.save(
                global_step_tensor=global_step_tensor,
                message="Training finished in {}, saving model...".format(
                    datetime.now() - start_time))
            sessions_helper.end()

            return session_name
def test(session_name=None, is_visualize=False):
    if session_name is None:
        session_name = input("Session name: ")

    config = Configuration()  # general settings
    data_sets = DataSet(config)  # data sets retrieval
    model = Clstmnn(config)  # model builder
    evaluation = Evaluation(config)
    visualization = Visualization(config)

    with tf.Graph().as_default():
        data_set = data_sets.get_data_sets(config.TESTING_BATCH_SIZE)

        print('Building model...')
        predictions_testing = model.inference(x=data_set.testing_set.x,
                                              mode_name=config.MODE.TESTING,
                                              reuse_lstm=None)
        mse = evaluation.loss(predictions=predictions_testing,
                              labels=data_set.testing_set.y,
                              mode_name=config.MODE.TESTING)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        merged = tf.summary.merge_all()
        saver = tf.train.Saver()

        print('Starting session...')
        with tf.Session() as sess:
            sess.run(init_op)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            summary_writer = tf.summary.FileWriter(
                config.OUTPUT_PATH + session_name + '_tested', sess.graph)
            sessions_helper = Sessions(config=config,
                                       session=sess,
                                       saver=saver,
                                       session_name=session_name,
                                       summary_writer=summary_writer,
                                       coordinator=coord,
                                       threads=threads)

            sessions_helper.restore()

            print()
            summary = None
            start_time = time.time()
            mses = []
            actual_labels = []
            predicted_labels = []
            for epoch in range(config.TESTING_EPOCHS):
                for step in range(
                        int(data_set.testing_set.size /
                            config.TESTING_BATCH_SIZE)):
                    summary = sess.run(merged)

                    sys.stdout.write('\r>> Examples tested: {}/{}'.format(
                        step,
                        int(data_set.testing_set.size /
                            config.TESTING_BATCH_SIZE)))
                    sys.stdout.flush()

                    example_image, actual_label, predicted_label, mse_result = sess.run(
                        [
                            data_set.testing_set.x, data_set.testing_set.y,
                            predictions_testing, mse
                        ])

                    mses.append(mse_result)
                    actual_labels.append(actual_label)
                    predicted_labels.append(predicted_label)

                    if is_visualize:
                        visualization.show_example(predicted_label,
                                                   actual_label, example_image,
                                                   mse_result)

            summary_writer.add_summary(summary, 1)

            print()
            print('testing completed in %s' % (time.time() - start_time))
            print('%s: MSE @ 1 = %.9f' %
                  (datetime.now(), np.array(mses).mean()))

            visualization.display_on_map(actual_labels, predicted_labels,
                                         session_name,
                                         np.array(mses).mean())

            sessions_helper.end()
Ejemplo n.º 9
0
 def __init__(self, train_env, k):
     Evaluation.__init__(self, k)
     self.train_env = train_env
def test(session_name=None, is_visualize=False):
    if session_name is None:
        session_name = input("Session name: ")

    config = Configuration()  # general settings
    data_sets = DataSet(config)  # data sets retrieval
    model = Cnn(config)  # model builder
    evaluation = Evaluation(config)
    visualization = Visualization(config)

    with tf.Graph().as_default():
        data_set = data_sets.get_data_sets(config.TESTING_BATCH_SIZE)

        print('Building model...')
        predictions_testing = model.inference(x=data_set.testing_set.x,
                                              mode_name=config.MODE.TESTING)
        is_correct = evaluation.correct_number(predictions_testing,
                                               data_set.testing_set.y)
        predictions_testing = tf.argmax(predictions_testing, 1)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        merged = tf.summary.merge_all()
        saver = tf.train.Saver()

        print('Starting session...')
        with tf.Session() as sess:
            sess.run(init_op)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            summary_writer = tf.summary.FileWriter(
                config.OUTPUT_PATH + session_name + '_tested', sess.graph)
            sessions_helper = Sessions(config=config,
                                       session=sess,
                                       saver=saver,
                                       session_name=session_name,
                                       summary_writer=summary_writer,
                                       coordinator=coord,
                                       threads=threads)

            sessions_helper.restore()

            print()
            true_count = 0
            summary = None
            start_time = time.time()
            labels = []
            predictions = []
            for epoch in range(config.TESTING_EPOCHS):
                for step in range(
                        int(data_set.testing_set.size /
                            config.TESTING_BATCH_SIZE)):
                    summary = sess.run(merged)

                    sys.stdout.write('\r>> Examples tested: {}/{}'.format(
                        step,
                        int(data_set.testing_set.size /
                            config.TESTING_BATCH_SIZE)))
                    sys.stdout.flush()

                    example_image, actual_label, predicted_label, is_correct_result = sess.run(
                        [
                            data_set.testing_set.x, data_set.testing_set.y,
                            predictions_testing, is_correct
                        ])
                    true_count += np.sum(is_correct_result)

                    labels.append(actual_label)
                    predictions.append(predicted_label)

                    if is_visualize:
                        visualization.show_example(predicted_label,
                                                   actual_label, example_image,
                                                   is_correct_result)

            summary_writer.add_summary(summary, 1)

            np_labels = np.array(labels)
            np_predictions = np.array(predictions)

            conf_matrix = tf.confusion_matrix(
                labels=tf.squeeze(np_labels),
                predictions=tf.squeeze(np_predictions),
                num_classes=config.NUM_CLASSES)
            print()
            c_m = sess.run(conf_matrix)
            print(c_m)

            precision = true_count / data_set.testing_set.size
            print()
            print('testing completed in %s' % (time.time() - start_time))
            print('%s: accuracy @ 1 = %.3f' %
                  (datetime.now(), precision * 100))

            sessions_helper.end()
Ejemplo n.º 11
0
    if with_attribute and not reweighting and False:
        trainer = AttributeTrainer(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            num_attrs=len(attrs),
            save_dir=save_dir,
            optimizer=None,
            summary_writer=None,
            attribute_list=attrs,
            with_attribute=with_attribute,
            num_classes=CONFIG['DATASET']['NUM_CATEGORY'],
            criterion=nn.CrossEntropyLoss(),
            config=CONFIG)

    if torch.cuda.is_available():
        model = model.cuda()

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    model.eval()
    #if with_attribute:
    #    trainer.eval(epoch=100)
    evaluate = Evaluation(model=model,
                          dataloader=val_loader,
                          classes=class_names,
                          ten_crops=CONFIG['TESTING']['TEN_CROPS'],
                          with_attribute=CONFIG['MODEL']['WITH_ATTRIBUTE'])
    evaluate.test(topk=(1, 2, 5))
Ejemplo n.º 12
0
def evaluate_models_xval(dataset: Dataset,
                         models: List[EvalModel],
                         coldstart=False,
                         cs_type='user',
                         n_entries=0,
                         evaluater=None,
                         n_fold=5,
                         repeat=1):
    (inds_u, inds_i, y, users_features, items_features) = dataset.data

    folds = []

    for _ in range(repeat):
        if coldstart and cs_type == 'user':
            if n_entries == 0:
                fold = kfold.kfold_entries(n_fold, inds_u)
            else:
                fold = kfold.kfold_entries_plus(n_fold, inds_u, n_entries)
        elif coldstart and cs_type == 'item':
            if n_entries == 0:
                fold = kfold.kfold_entries(n_fold, inds_i)
            else:
                fold = kfold.kfold_entries_plus(n_fold, inds_i, n_entries)
        elif coldstart:
            raise ValueError('unknown cs_type')
        else:
            fold = kfold.kfold(n_fold, inds_u)

        folds.extend(list(fold))

    if evaluater is None:
        evaluater = Evaluation()

    # Create results list
    results = {}
    for name, model_type, config in models:
        if issubclass(model_type, AbstractModel):
            results[name] = evaluater.get_results_class()

        elif issubclass(model_type, HybridModel):
            results[name] = evaluater.get_results_class()
            results[
                name + '_' +
                config.model_type_cf.__name__] = evaluater.get_results_class()
            results[
                name + '_' +
                config.model_type_md.__name__] = evaluater.get_results_class()

        else:
            raise TypeError('Invalid model_type')

    for xval_train, xval_test in folds:

        # Dataset training
        inds_u_train = inds_u[xval_train]
        inds_i_train = inds_i[xval_train]
        y_train = y[xval_train]

        # Dataset testing
        inds_u_test = inds_u[xval_test]
        inds_i_test = inds_i[xval_test]
        y_test = y[xval_test]

        user_dist = np.bincount(inds_u_train, minlength=dataset.n_users)
        item_dist = np.bincount(inds_i_train, minlength=dataset.n_items)
        evaluater.update_parts(user_dist, item_dist)

        train = ([inds_u_train, inds_i_train], y_train)
        test = ([inds_u_test, inds_i_test], y_test)

        for name, model_type, config in models:

            if issubclass(model_type, AbstractModelCF):
                model = model_type(dataset.n_users, dataset.n_items, config)
                result = _analyze_model(model, evaluater, train, test)
                results[name].add(result)

            elif issubclass(model_type, AbstractModelMD):
                model = model_type(users_features, items_features, config)
                result = _analyze_model(model, evaluater, train, test)
                results[name].add(result)

            elif issubclass(model_type, HybridModel):
                model = model_type(users_features, items_features, config)
                result_hybrid, result_cf, result_md = _analyze_hybrid_as_model(
                    model, evaluater, train, test)

                results[name].add(result_hybrid)
                results[name + '_' +
                        config.model_type_cf.__name__].add(result_cf)
                results[name + '_' +
                        config.model_type_md.__name__].add(result_md)

    return results
Ejemplo n.º 13
0
def _analyze_model(model, evaluation: Evaluation, train, test)\
        -> EvaluationResult:
    model.fit(*train)
    result = evaluation.evaluate(model, *test)
    return result
Ejemplo n.º 14
0
def evaluate_hybrid_xval(dataset: Dataset,
                         config,
                         coldstart=False,
                         cs_type='user',
                         n_entries=0,
                         evaluater=None,
                         n_fold=5,
                         repeat=1):
    (inds_u, inds_i, y, users_features, items_features) = dataset.data

    folds = []

    for _ in range(repeat):
        if coldstart and cs_type == 'user':
            if n_entries == 0:
                fold = kfold.kfold_entries(n_fold, inds_u)
            else:
                fold = kfold.kfold_entries_plus(n_fold, inds_u, n_entries)
        elif coldstart and cs_type == 'item':
            if n_entries == 0:
                fold = kfold.kfold_entries(n_fold, inds_i)
            else:
                fold = kfold.kfold_entries_plus(n_fold, inds_i, n_entries)
        elif coldstart:
            raise ValueError('unknown cs_type')
        else:
            fold = kfold.kfold(n_fold, inds_u)

        folds.extend(list(fold))

    if evaluater is None:
        evaluater = Evaluation()

    # Create results list
    results_before = evaluater.get_results_hybrid_class()
    results_after = evaluater.get_results_hybrid_class()

    for xval_train, xval_test in folds:

        # Dataset training
        inds_u_train = inds_u[xval_train]
        inds_i_train = inds_i[xval_train]
        y_train = y[xval_train]
        n_train = len(y_train)

        # Dataset testing
        inds_u_test = inds_u[xval_test]
        inds_i_test = inds_i[xval_test]
        y_test = y[xval_test]

        user_dist = np.bincount(inds_u_train, minlength=dataset.n_users)
        item_dist = np.bincount(inds_i_train, minlength=dataset.n_items)
        evaluater.update_parts(user_dist, item_dist)

        train = ([inds_u_train, inds_i_train], y_train)
        test = ([inds_u_test, inds_i_test], y_test)

        model = HybridModel(users_features, items_features, config)

        result_before, result_after = _analyze_hybrid(model, evaluater, train,
                                                      test)

        results_before.add(result_before)
        results_after.add(result_after)

    return results_before, results_after
Ejemplo n.º 15
0
                    "save_dir": save_dir,
                    "optimizer": optimizer,
                    "summary_writer": summary_writer,
                    "attribute_list": attrs,
                    "with_attribute": with_attribute,
                    "num_classes": CONFIG['DATASET']['NUM_CATEGORY'],
                    "criterion": criterion,
                    "config": CONFIG}
    if with_attribute and not reweighting and False:
        trainer = AttributeTrainer(**trainer_args)
    else:
        trainer = ClassificationTrainer(**trainer_args)

    evaluate = Evaluation(model=model,
                          dataloader=val_loader,
                          classes=class_names,
                          ten_crops=CONFIG['TESTING']['TEN_CROPS'],
                          with_attribute=with_attribute)

    epochs = CONFIG['TRAINING']['EPOCH']
    best_top1 = 0.0
    for epoch in range(epochs):
        print("Epoch {}".format(epoch + 1))
        epoch_time = time.time()
        trainer.train(epoch)
        scheduler.step()
        print("LR1/LR2: [{}/{}], Train Time: {:.2f}".format(
            optimizer.param_groups[0]['lr'],
            optimizer.param_groups[1]['lr'],
            time.time() - epoch_time
        ))
Ejemplo n.º 16
0
np.random.seed(0)

from evaluation.evaluation import Evaluation, EvaluationResults, EvaluationResultsHybrid
from hybrid_model.hybrid import HybridModel
from hybrid_model.config import hybrid_config
from hybrid_model.dataset import get_dataset
from util import kfold

user_coldstart = False
n_entries = 0
n_fold = 5

epochs = 10

evaluation = Evaluation()
dataset = get_dataset('ml100k')

results_models = [EvaluationResultsHybrid() for _ in range(epochs + 1)]
results_hybrid = [EvaluationResults() for _ in range(epochs + 1)]

(inds_u, inds_i, y, users_features, items_features) = dataset.data

if user_coldstart:
    if n_entries == 0:
        fold = kfold.kfold_entries(n_fold, inds_u)
    else:
        fold = kfold.kfold_entries_plus(n_fold, inds_u, n_entries)
else:
    fold = kfold.kfold(n_fold, inds_u)
Ejemplo n.º 17
0
def evaluate_models_single(dataset: Dataset,
                           models: List[EvalModel],
                           coldstart=False,
                           cs_type='user',
                           n_entries=0,
                           evaluater=None,
                           n_fold=5):
    (inds_u, inds_i, y, users_features, items_features) = dataset.data

    if coldstart and cs_type == 'user':
        if n_entries == 0:
            fold = kfold.kfold_entries(n_fold, inds_u)
        else:
            fold = kfold.kfold_entries_plus(n_fold, inds_u, n_entries)
    elif coldstart and cs_type == 'item':
        if n_entries == 0:
            fold = kfold.kfold_entries(n_fold, inds_i)
        else:
            fold = kfold.kfold_entries_plus(n_fold, inds_i, n_entries)
    elif coldstart:
        raise ValueError('unknown cs_type')
    else:
        fold = kfold.kfold(n_fold, inds_u)

    fold = list(fold)

    xval_train, xval_test = fold[2]

    # Dataset training
    inds_u_train = inds_u[xval_train]
    inds_i_train = inds_i[xval_train]
    y_train = y[xval_train]

    # Dataset testing
    inds_u_test = inds_u[xval_test]
    inds_i_test = inds_i[xval_test]
    y_test = y[xval_test]

    if evaluater is None:
        evaluater = Evaluation()

    user_dist = np.bincount(inds_u_train, minlength=dataset.n_users)
    item_dist = np.bincount(inds_i_train, minlength=dataset.n_items)
    evaluater.update_parts(user_dist, item_dist)

    train = ([inds_u_train, inds_i_train], y_train)
    test = ([inds_u_test, inds_i_test], y_test)

    results = {}

    for name, model_type, config in models:

        if issubclass(model_type, AbstractModelCF):
            model = model_type(dataset.n_users, dataset.n_items, config)
            results[name] = _analyze_model(model, evaluater, train, test)

        elif issubclass(model_type, AbstractModelMD):
            model = model_type(users_features, items_features, config)
            results[name] = _analyze_model(model, evaluater, train, test)

        elif issubclass(model_type, HybridModel):
            model = model_type(users_features, items_features, config)
            results[name],\
            results[name + '_' + config.model_type_cf.__name__],\
            results[name + '_' + config.model_type_md.__name__] = \
                _analyze_hybrid_as_model(model, evaluater, train, test)

        else:
            raise TypeError('Invalid model_type')

    return results