Ejemplo n.º 1
0
    business_stats: Dict[str, Business] = Business.load_from_file(
        business_stats_file)

    print('[ %04ds ] Files loaded' % (time.time() - start_time))

    all_user_features = ['NO_FEAT']
    all_business_features = Business.collect_business_features(business_stats)

    dataset = Dataset()
    dataset.fit(User.extract_user_ids(user_stats),
                Business.extract_business_ids(business_stats),
                user_features=all_user_features,
                item_features=all_business_features)

    user_features = dataset.build_user_features(
        User.build_user_features(user_stats,
                                 User.extract_user_ids(user_stats)), True)

    business_features = dataset.build_item_features(
        Business.build_business_features(
            business_stats, Business.extract_business_ids(business_stats)),
        True)

    print('[ %04ds ] Dataset initialized' % (time.time() - start_time))

    user_avg, user_std = Review.extract_user_average_and_std(training_set)
    normalized_training_reviews = Review.normalize_by_user(
        training_set, user_avg)
    training_interactions = Review.extract_sparse_interaction_matrix(
        normalized_training_reviews)

    interaction_matrix, interaction_weight = dataset.build_interactions(
Ejemplo n.º 2
0
    print('[ %04ds ] Files loaded' % (time.time() - start_time))

    all_user_features = ['NO_FEAT']
    all_business_features = Business.collect_business_features(business_stats)

    all_user_ids = User.extract_user_ids(user_stats)
    all_business_ids = Business.extract_business_ids(business_stats)

    dataset = Dataset()
    dataset.fit(all_user_ids,
                all_business_ids,
                user_features=all_user_features,
                item_features=all_business_features)

    user_features = dataset.build_user_features(
        User.build_user_features(user_stats, all_user_ids), True)

    business_features = dataset.build_item_features(
        Business.build_business_features(business_stats, all_business_ids),
        True)

    print('[ %04ds ] Dataset initialized' % (time.time() - start_time))

    user_avg, user_std = Review.extract_user_average_and_std(training_set)
    normalized_training_reviews = Review.normalize_by_user(
        training_set, user_avg)
    training_interactions = Review.extract_sparse_interaction_matrix(
        normalized_training_reviews)
    training_user_ids = Review.extract_user_ids(normalized_training_reviews)
    training_business_ids = Review.extract_business_ids(
        normalized_training_reviews)