Example #1
0
File: main.py Project: kimiyoung/cf
def run(args):
    """run the rating algorithms given the arguments.

    Args:
        args: the arguments.
    """


    train = data.read_train(args.training_file, impute = False)
    test = data.read_test(args.test_file)

    start_time = time.time()

    """initialize the model."""
    if args.rating_method == 'user-user':
        model = rating.user_user(train, args.weighting_method, args.metric, args.k)
    elif args.rating_method == 'movie-movie':
        model = rating.item_item(train, args.weighting_method, args.metric, args.k)
    elif args.rating_method == 'pcc':
        model = rating.user_user(train, args.weighting_method, args.metric, args.k, True)
    elif args.rating_method == 'bipartite-user':
        clu_user = data.read_cluster(args.ucluster_file)
        model = rating.bipartite_user(train, clu_user, args.weighting_method, args.metric, args.k)
    elif args.rating_method == 'bipartite-movie':
        clu_movie = data.read_cluster(args.mcluster_file)
        model = rating.bipartite_item(train, clu_movie, args.weighting_method, args.metric, args.k)

    """predict the ratings. assume all models have a query API."""
    py = []
    for i, query in enumerate(test):
        if i % 1000 == 0:
            logging.info('{} out of {}'.format(i, len(test)))
        py.append(model.query(query[0], query[1]))

    """time the program."""
    end_time = time.time()
    print 'running time =', end_time - start_time

    """output the prediction to files. used for test."""
    if args.output_file:
        fout = open(args.output_file, 'w')
        for ppy in py:
            fout.write('{}\n'.format(ppy))
        fout.close()

    """evaluate the results. used for validation."""
    if args.ground_truth:
        gold = data.read_gold(args.ground_truth)
        print 'RMSE =', math.sqrt(((gold - np.array(py)) ** 2).mean())
Example #2
0
if __name__ == '__main__':
    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

    random.seed(0)

    parser = argparse.ArgumentParser()
    parser.add_argument('training_file', help = 'the file name of the training data')
    parser.add_argument('output_movie', help = 'the output file for movie clusters')
    parser.add_argument('output_user', help = 'the output file for user clusters')
    parser.add_argument('--k_movie', help = 'the number of movie clusters', type = int, default = 200)
    parser.add_argument('--k_user', help = 'the number of user clusters', type = int, default = 400)
    parser.add_argument('--iter', help = 'the number of iterations', type = int, default = 10)
    args = parser.parse_args()

    mat = data.read_train(args.training_file, impute = True)

    start_time = time.time()
    
    res_row, res_col = bipartite(args.k_movie, args.k_user, args.iter, mat)

    end_time = time.time()

    fout = open(args.output_movie, 'w')
    for e in res_row:
        fout.write('{}\n'.format(e))
    fout.close()

    fout = open(args.output_user, 'w')
    for e in res_col:
        fout.write('{}\n'.format(e))
Example #3
0
# -*- coding: utf-8 -*-
"""
Created on Sun Jun 10 15:37:16 2018

@author: ashima.garg
"""

import config
import data
import model

if __name__ == "__main__":
    data = data.Data()
    data.read_train(config.TRAIN_X_PATH, config.TRAIN_Y_PATH)
    data.preprocess()
    data.split()
    print("data read")
    
    model = model.Model()
    model.build()
    print("model build")
    
    model.train(data)
    print("model trained")
    
    model.test(data)
    print("model tested")
    '''
    data.read_test(config.TEST_X_PATH)
    data.preprocess()
    print("model predicted")
Example #4
0
from nltk.corpus import stopwords

from sklearn.metrics import accuracy_score, confusion_matrix

from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding
from tensorflow.keras.layers import LSTM
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

#Train dataset

train_df = read_train(
    "/Users/Ricou/Desktop/ANDRE/machine_learning/tweet_sentiment_extraction/data/train.csv"
)
print(train_df.head())
print(train_df.shape)

#Test dataset
test_df = read_test(
    "/Users/Ricou/Desktop/ANDRE/machine_learning/tweet_sentiment_extraction/data/test.csv"
)
#print(test_df.head())
print(test_df.shape)

print("Text preprocessing")
#Recherche de valeurs manquantes sur le dataset train
print(f'Training null Values:\n{train_df.isnull().sum()}\n')
print(f'Test null Values:\n{test_df.isnull().sum()}')
Example #5
0
def train_ld():
  ds = data.Dataset()
  ds.datas = data.read_train()
  ld = data.Loader(ds, 1)
  return ld