Beispiel #1
0
def train_loss(model, epoch, dataset='ml1m'):
    loss_function = nn.MSELoss()
    if dataset == 'ml100k':
        datasets = MovieRankDataset(pkl_file='ml100k_train.p')
    else:
        datasets = MovieRankDataset(pkl_file='ml1m_train.pkl')
    losses = 0
    dataloader = DataLoader(datasets, batch_size=100,
                            shuffle=False)  # one batch
    num = 0
    for batch in dataloader:
        with torch.no_grad():
            user_inputs = batch['user_inputs']

            batch_len = user_inputs['uid'].shape[0]
            num += batch_len
            movie_inputs = batch['movie_inputs']
            target = batch['target'].to(device)
            predicitons = model(user_inputs, movie_inputs)  # batch x score
            loss = loss_function(predicitons, target)
            losses += loss * batch_len
    epoch_loss = losses / num
    print("Epoch {} loss:{}".format(epoch, epoch_loss))
    str = "Epoch {} MSE: {}\n".format(epoch, epoch_loss)
    return str
def train(model, num_epochs=5, lr=0.0001):
    loss_function = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    datasets = MovieRankDataset(pkl_file='data.p')
    dataloader = DataLoader(datasets, batch_size=256, shuffle=True)

    losses = []
    writer = SummaryWriter()
    for epoch in range(num_epochs):
        loss_all = 0
        for i_batch, sample_batch in enumerate(dataloader):

            user_inputs = sample_batch['user_inputs']
            movie_inputs = sample_batch['movie_inputs']
            target = sample_batch['target'].to(device)

            model.zero_grad()

            tag_rank, _, _ = model(user_inputs, movie_inputs)

            loss = loss_function(tag_rank, target)
            if i_batch % 20 == 0:
                writer.add_scalar('data/loss', loss, i_batch * 20)
                print(loss)

            loss_all += loss
            loss.backward()
            optimizer.step()
        print('Epoch {}:\t loss:{}'.format(epoch, loss_all))
    writer.export_scalars_to_json("./test.json")
    writer.close()
Beispiel #3
0
def evaluate(model, epoch, dataset='ml1m'):
    loss_function = nn.MSELoss()
    if dataset == 'ml100k':
        datasets = MovieRankDataset(pkl_file='ml100k_test.p')
    else:
        datasets = MovieRankDataset(pkl_file='ml1m_test.pkl')
    dataloader = DataLoader(datasets, batch_size=1000,
                            shuffle=False)  # one batch
    for batch in dataloader:
        with torch.no_grad():
            user_inputs = batch['user_inputs']
            movie_inputs = batch['movie_inputs']
            target = batch['target'].to(device)
            predicitons = model(user_inputs, movie_inputs)  # batch x score
            loss = loss_function(predicitons, target)
    str = "Epoch {} MSE: {}\n".format(epoch, loss)
    return str
Beispiel #4
0
def train_eval(model,
               num_epochs=5,
               lr=0.001,
               batch_size=64,
               dataset='ml1m',
               loss_file='loss_file.txt',
               evaluate_file='evaluate_file.txt'):
    loss_function = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    if dataset == 'ml100k':
        datasets = MovieRankDataset(pkl_file='ml100k_train.p')
    else:
        datasets = MovieRankDataset(pkl_file='ml1m_train.pkl')
    dataloader = DataLoader(datasets, batch_size=batch_size, shuffle=True)

    eval_str = ""
    loss_str = ""
    for epoch in range(num_epochs):
        for i_batch, sample_batch in enumerate(dataloader):

            user_inputs = sample_batch['user_inputs']
            movie_inputs = sample_batch['movie_inputs']
            target = sample_batch['target'].to(device)

            model.zero_grad()

            tag_rank = model(user_inputs, movie_inputs)

            loss = loss_function(tag_rank, target)
            if i_batch % 19 == 0:
                print("Epoch {}:{}".format(epoch, loss))
            loss.backward()
            optimizer.step()

        eval_str += evaluate(model, epoch, dataset=dataset)
        loss_str += train_loss(model, epoch, dataset=dataset)
    with open(evaluate_file, 'w') as f:
        f.write(eval_str)
    with open(loss_file, 'w') as f:
        f.write(loss_str)
Beispiel #5
0
def saveMovieAndUserFeature(model):
    '''
    Save Movie and User feature into HD

    '''

    batch_size = 256

    datasets = MovieRankDataset(pkl_file='data.p', drop_dup=True)
    dataloader = DataLoader(datasets,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=4)

    # format: {id(int) : feature(numpy array)}
    user_feature_dict = {}
    movie_feature_dict = {}
    movies = {}
    users = {}
    with torch.no_grad():
        for i_batch, sample_batch in enumerate(dataloader):
            user_inputs = sample_batch['user_inputs']
            movie_inputs = sample_batch['movie_inputs']

            # B x 1 x 200 = 256 x 1 x 200
            _, feature_user, feature_movie = model(user_inputs, movie_inputs)

            # B x 1 x 200 = 256 x 1 x 200
            feature_user = feature_user.cpu().numpy()
            feature_movie = feature_movie.cpu().numpy()

            for i in range(user_inputs['uid'].shape[0]):
                uid = user_inputs['uid'][i]  # uid
                gender = user_inputs['gender'][i]
                age = user_inputs['age'][i]
                job = user_inputs['job'][i]

                mid = movie_inputs['mid'][i]  # mid
                mtype = movie_inputs['mtype'][i]
                mtext = movie_inputs['mtext'][i]

                if uid.item() not in users.keys():
                    users[uid.item()] = {
                        'uid': uid,
                        'gender': gender,
                        'age': age,
                        'job': job
                    }
                if mid.item() not in movies.keys():
                    movies[mid.item()] = {
                        'mid': mid,
                        'mtype': mtype,
                        'mtext': mtext
                    }

                if uid.item() not in user_feature_dict.keys():
                    user_feature_dict[uid.item()] = feature_user[i]
                if mid.item() not in movie_feature_dict.keys():
                    movie_feature_dict[mid.item()] = feature_movie[i]

            print('Solved: {} samples'.format((i_batch + 1) * batch_size))
    feature_data = {
        'feature_user': user_feature_dict,
        'feature_movie': movie_feature_dict
    }
    dict_user_movie = {'user': users, 'movie': movies}
    print(len(dict_user_movie['user']))
    print(len(feature_data['feature_movie']))
    pkl.dump(feature_data, open('Params/feature_data.pkl', 'wb'))
    pkl.dump(dict_user_movie, open('Params/user_movie_dict.pkl', 'wb'))