data_rating = data_rating_train.append(data_rating_test)

print('Range of userId is [{}, {}]'.format(data_rating.userId.min(), data_rating.userId.max()))
print('Range of itemId is [{}, {}]'.format(data_rating.itemId.min(), data_rating.itemId.max()))

# Read the grouping information
if args.pretrain_grouping:
    data_grouping = pd.read_csv(args.data_grouping, sep=",", header=0, names=['friendId', 'tagId', 'score'],engine='python')
    config['num_friends_pretrain'] = int(data_grouping.friendId.max() + 1)
    config['num_items_pretrain'] = int(data_grouping.tagId.max() + 1)
    del data_grouping
    print ("group data reading finished!")

# Process the tweet
vocab = Word()
tweet = vocab.load_tweets(data_tweet, args.max_seq_len)
pad_word = vocab.pad
tweet_pad = np.full(shape=(1, args.max_seq_len), fill_value=pad_word, dtype=np.int64)
tweet = np.vstack([tweet, tweet_pad])

# config
config['num_users'], config['num_items'] = int(data_rating.userId.max() + 1), int(data_rating.itemId.max() + 1)
config['user_friends'],config['user_tweets'],config['num_friends'] = load_friends_tweets(args.data_profile)
args.tweet = tweet
config['args'] = args
config['vocab'] = vocab

# Specify the exact model
model = sys.argv[1] if len(sys.argv) == 2 else "gmf"
if args.model.lower() == "gmf":
    config['group'] = False
Example #2
0
print('Range of userId is [{}, {}]'.format(data_rating.userId.min(), data_rating.userId.max()))
print('Range of itemId is [{}, {}]'.format(data_rating.itemId.min(), data_rating.itemId.max()))
print('Range of tweetId is [{}, {}]'.format(data_rating.tweetId.min(), data_rating.tweetId.max()))

# Read the grouping information
if args.pretrain_grouping:
    data_grouping = pd.read_csv(args.data_grouping, sep=",", header=0, names=['friendId', 'tagId', 'score'],engine='python')
    config['num_friends_pretrain'] = int(data_grouping.friendId.max() + 1)
    config['num_items_pretrain'] = int(data_grouping.tagId.max() + 1)
    del data_grouping

args.item_num = int(data_rating.itemId.max() + 1)

# Process the tweet
vocab = Word()
tweet = vocab.load_tweets(data_tweet, max_len=200)

# Read the grouping information
data_grouping = pd.read_csv(args.data_grouping, sep=",", header=0, names=['friendId', 'tagId', 'score'],engine='python')
config['num_friends_pretrain'] = int(data_grouping.friendId.max() + 1)
config['num_items_pretrain'] = int(data_grouping.tagId.max() + 1)
del data_grouping

# config
config['num_users'], config['num_items'] = int(data_rating.userId.max() + 1), int(data_rating.itemId.max() + 1)
config['user_friends'], config['num_friends'] = load_friends(args.data_friends)
args.tweet = tweet
config['args'] = args
config['vocab'] = vocab

# Specify the exact model