def __init__(self, use_normalized_data=True, start_from_zero=False, learning_rate=0.001): init_logger(log_file='log/pmf.log', log_level=logging.INFO) self.exp_id = datetime.today().strftime('%Y%m%d%H%M%S') self.ratings_file = ratings_file self.load_data() self.obs_num = self.ratings_vector.shape[0] self.use_normalized_data = use_normalized_data self.start_from_zero = start_from_zero #数据里user_id和item_id是否从0开始,豆瓣是从0开始的 if self.use_normalized_data: self.generate_normalized_ratings() self.split_data() self.learning_rate = learning_rate self.epsilon = learning_rate #learning rate self.lamb = 0.01 #Regularization parameter self.momentum = 0.8 self.max_epoch = 1000 #iteration self.feat_num = 10 #uid, vid以observation里出现的uid为准, 如何划分数据也是一个问题 self.user_num = self.ratings_vector[:, 0].max() self.item_num = self.ratings_vector[:, 1].max() self.U_shape = (self.user_num, self.feat_num) self.V_shape = (self.item_num, self.feat_num) #U: matrix of user features, V: matrix of item features, generated from gaussian distribution self.U = np.random.standard_normal(self.U_shape) self.V = np.random.standard_normal(self.V_shape)
def __init__(self, use_normalized_data=True, start_from_zero=False, learning_rate=0.001): init_logger(log_file='log/pmf.log', log_level=logging.INFO) self.exp_id = datetime.today().strftime('%Y%m%d%H%M%S') self.ratings_file = ratings_file self.load_data() self.obs_num = self.ratings_vector.shape[0] self.use_normalized_data = use_normalized_data self.start_from_zero = start_from_zero#数据里user_id和item_id是否从0开始,豆瓣是从0开始的 if self.use_normalized_data: self.generate_normalized_ratings() self.split_data() self.learning_rate = learning_rate self.epsilon = learning_rate; #learning rate self.lamb = 0.01 #Regularization parameter self.momentum = 0.8 self.max_epoch = 1000 #iteration self.feat_num = 10 #uid, vid以observation里出现的uid为准, 如何划分数据也是一个问题 self.user_num = self.ratings_vector[:,0].max() self.item_num = self.ratings_vector[:,1].max() self.U_shape = (self.user_num, self.feat_num) self.V_shape = (self.item_num, self.feat_num) #U: matrix of user features, V: matrix of item features, generated from gaussian distribution self.U = np.random.standard_normal(self.U_shape) self.V = np.random.standard_normal(self.V_shape)
def set_logfile(config, args): log_filename = 'log/fmg_%s_%s_split%s.log' % ( config['dt'], config['exp_type'], config['sn']) if config['exp_type'] == 'vary_mg': log_filename = 'log/fmg_%s_%s_split%s_reg%s.log' % ( config['dt'], config['exp_type'], config['sn'], config['reg']) config['log_filename'] = log_filename init_logger('', config['log_filename'], logging.INFO, False)
def init_conifg(dt_arg, reg, exp_type, eps, K=10, F=10): global rating_filename global logger global exp_id global dt dt = dt_arg if dt == 'yelp': rating_filename = 'ratings_filter5' elif dt in ['yelp-200k', 'yelp-50k', 'yelp-10k', 'yelp-5k', 'yelp-100k']: rating_filename = 'ratings' elif dt in ['douban']: rating_filename = 'ratings' elif dt == 'cikm-yelp': rating_filename = 'ratings' elif dt == 'yelp-sample': rating_filename = '' elif dt in ['ml-100k', 'ml-1m', 'ml-10m']: rating_filename = '%s-rating' % dt elif dt == 'amazon-app': rating_filename = 'filter5_ratings_Apps_for_Android' elif dt in ['amazon-200k', 'amazon-50k', 'amazon-100k', 'amazon-10k', 'amazon-5k']: rating_filename = 'ratings' if exp_type == 1: log_filename = 'log/%s_fm_glasso_once_reg%s_eps%s_K%s_F%s.log' % (dt, reg, eps, K, F) elif exp_type == 2: log_filename = 'log/%s_fm_glasso_regv%s_eps%s_K%s_F%s.log' % (dt, reg, eps, K, F) exp_id = int(time.time()) logger = init_logger('exp_%s' % exp_id, log_filename, logging.INFO, False)
def set_logfile(config, args): if config.get('reg'): if args.mg: motif = re.search('m\d', args.mg).group(0) log_filename = 'log/%s_%s_%s_%s_reg%s.log' % ( config['dt'], config.get('log_filename'), "glasso", motif, config.get('reg')) else: log_filename = 'log/%s_%s_%s_reg%s.log' % ( config['dt'], config.get('log_filename'), "glasso", config.get('reg')) else: log_filename = 'log/%s_%s_%s_regW%s_regP%s.log' % ( config['dt'], config.get('log_filename'), "glasso", config.get('reg_W'), config.get('reg_P')) config['log_filename'] = log_filename init_logger('exp_%s' % config['exp_id'], config['log_filename'], logging.INFO, False) logging.info('******\n%s\n******', config)
import time import logging import random from datetime import datetime import numpy as np from sklearn.linear_model import LogisticRegression from sklearn import preprocessing from logging_util import init_logger from rec_dal import RECDAL from constant import LR_log_file, vali_output_path, test_output_path dal = RECDAL() init_logger(log_file=LR_log_file, log_level=logging.INFO, print_console=True) #sql for offline prediction train_pos_sql = ''' select l.user_id, l.item_id, s.looks, s.stores, s.carts, s.buys, s.total, s.l3d_looks, s.l3d_stores, s.l3d_carts, l3d_buys, s.l3d_total, s.lc_date_delta, s.y_looks, s.y_stores, s.y_carts, s.y_buys, s.y_total, s.lc_date_delta, s.item_total, s.item_l3d_total, s.item_yes_total, l.label from split_20141217_labels as l join split_20141217_stats as s on l.user_id=s.user_id and l.item_id=s.item_id where l.label = 1 ''' train_pos_sql = ''' select s.user_id, s.item_id, s.buys, s.l3d_buys, s.y_buys, s.total, s.l3d_total, s.y_total, s.item_total, s.item_l3d_total, s.item_yes_total, s.lc_date_delta, l.label from split_20141217_labels as l join split_20141217_stats as s on l.user_id=s.user_id and l.item_id=s.item_id where l.label = 1 ''' train_neg_sql = '''
import tempfile import platform import datetime import argparse from configparser import ConfigParser, ExtendedInterpolation from typing import List, Dict, Tuple import subprocess import release_task_reader from urllib.request import urlretrieve from urllib.error import HTTPError from release_task_reader import ReleaseTask from installer_utils import PackagingError from runner import exec_cmd, async_exec_cmd from logging_util import init_logger log = init_logger(__name__, debug_mode=False) timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d--%H:%M:%S') class QtRepositoryLayout: def __init__(self, root_path: str, license: str, repo_domain: str) -> None: self.root_path = root_path self.license = license self.repo_domain = repo_domain self.pending = "pending" self.staging = "staging" self.production = "production" # <root_path>/<license>/<pending|staging|production>/<repo_domain>/ # /data/online_repositories/opensource/pending|staging|production/qtsdkrepository/ self.base_repo_path = os.path.join(self.root_path, self.license)
def run_all_epinions(): # run(path_str) for path_str in ['ratings_only']: run(path_str) for path_str in [ 'UUB_m1', 'UUB_m2', 'UUB_m3', 'UUB_m4', 'UUB_m5', 'UUB_m6', 'UUB_m7' ]: for n in range(11): alpha = n * 0.1 path_str1 = '%s_%s' % (path_str, alpha) print 'run for ', path_str1 run(path_str1) if __name__ == '__main__': global dir_ if len(sys.argv) == 3: dt = sys.argv[1] split_num = sys.argv[2] dir_ = 'data/%s/exp_split/%s/' % (dt, split_num) log_filename = 'log/%s_mf_feature_geneartion_split%s.log' % (dt, split_num) exp_id = int(time.time()) logger = init_logger('exp_%s' % exp_id, log_filename, logging.INFO, False) run_all_epinions() else: print 'please speficy the data and path_str' sys.exit(0)
fw = open('data/yelp/samples/grid_res_%s' % filename, 'w+') fw.write('\n'.join( ['%s\t%s\t%s\t%s\t%s' % (f, k, e, l, r) for f, k, e, l, r in res])) fw.close() if __name__ == '__main__': if len(sys.argv) == 3: dt = sys.argv[1] global logger global dir_ dir_ = 'data/%s/' % dt exp_id = int(time.time()) log_filename = 'log/%s_mf.log' % dt logger = init_logger('exp_%s' % str(exp_id), log_filename, logging.INFO, False) if int(sys.argv[2]) == 1: filename = dir_ + 'ratings.txt' K = 10 eps = 10 lamb = 1 max_iter = 500 exp_rmses = [] for i in range(1): exp_rmses.append( run_basedline(filename, K, eps, lamb, max_iter, silent_run=False))