def __init__(self, **kwargs): super(RecSys2ClickSession, self).__init__(X=None, y=None, **kwargs) # df = pandas.read_csv('/home/wuzz/ihpc/dataset/yoochoose_data/2_clicks_sess.csv') FEATURE = [ 'Price', 'ItemMaxPrice', 'ItemMinPrice', 'ItemTotalClicks', 'ItemTotalBuys', 'ItemBuyingProbability', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9', 'cat10', 'cat11', 'cat12', 'ItemDuration', 'ItemMonth', 'ItemDay', 'ItemHourInMins', 'Sales' ] LABEL = ['BuyInSession'] df = pandas.read_csv( '/home/wuzz/ihpc/dataset/yoochoose_data/2_clicks_sess.csv') df = df.sort('SessionID')[:1000000] data = Dataset_by_Session(df=df, sort_by_session=True, train_valid_test=self.ratio) train_df, valid_df, test_df = data.split_df() # import pdb; pdb.set_trace() self.set_train(X=train_df[FEATURE].values, y=make_one_hot( train_df[LABEL].values.reshape(len(train_df)), 2)) self.set_valid(X=valid_df[FEATURE].values, y=make_one_hot( valid_df[LABEL].values.reshape(len(valid_df)), 2)) self.set_test(X=test_df[FEATURE].values, y=make_one_hot( test_df[LABEL].values.reshape(len(test_df)), 2))
def __init__(self, **kwargs): super(RecSys, self).__init__(X=None, y=None, **kwargs) csv_path = '/home/wuzz/ihpc/onlinebehavior/sessions.csv' data = Dataset_by_Session(csv_path=csv_path, train_valid_test = self.ratio) tbl = data.split(features=data.feature, labels=data.label) self.set_train(X=tbl['train_fea'], y=make_one_hot(tbl['train_lbl'], 2)) self.set_valid(X=tbl['valid_fea'], y=make_one_hot(tbl['valid_lbl'], 2)) self.set_test(X=tbl['test_fea'], y=make_one_hot(tbl['test_lbl'], 2))
def __init__(self, **kwargs): super(RecSysJitter, self).__init__(X=None, y=None, **kwargs) data_dir = '/home/wuzz/recsys2015/data/jitter_stdr0_1_dup_15' train = Dataset_by_Session(sort_by_session=False, csv_path=data_dir + '/train_jitter.csv') valid = Dataset_by_Session(sort_by_session=False, csv_path=data_dir + '/valid.csv') test = Dataset_by_Session(sort_by_session=False, csv_path=data_dir + '/test.csv') self.set_train(X=train.feature, y=make_one_hot(train.label,2)) self.set_valid(X=valid.feature, y=make_one_hot(valid.label,2)) self.set_test(X=test.feature, y=make_one_hot(test.label,2))
def __init__(self, **kwargs): super(RecSys, self).__init__(X=None, y=None, **kwargs) csv_path = '/home/wuzz/ihpc/onlinebehavior/sessions.csv' data = Dataset_by_Session(csv_path=csv_path, train_valid_test=self.ratio) tbl = data.split(features=data.feature, labels=data.label) self.set_train(X=tbl['train_fea'], y=make_one_hot(tbl['train_lbl'], 2)) self.set_valid(X=tbl['valid_fea'], y=make_one_hot(tbl['valid_lbl'], 2)) self.set_test(X=tbl['test_fea'], y=make_one_hot(tbl['test_lbl'], 2))
def __init__(self, **kwargs): super(RecSysJitter, self).__init__(X=None, y=None, **kwargs) data_dir = '/home/wuzz/recsys2015/data/jitter_stdr0_1_dup_15' train = Dataset_by_Session(sort_by_session=False, csv_path=data_dir + '/train_jitter.csv') valid = Dataset_by_Session(sort_by_session=False, csv_path=data_dir + '/valid.csv') test = Dataset_by_Session(sort_by_session=False, csv_path=data_dir + '/test.csv') self.set_train(X=train.feature, y=make_one_hot(train.label, 2)) self.set_valid(X=valid.feature, y=make_one_hot(valid.label, 2)) self.set_test(X=test.feature, y=make_one_hot(test.label, 2))
def save_gaussian_one_hot(y, save_dir, std): print 'saving' one_hot_y = make_one_hot(y, 1998) gaussian = Gaussian(std=std) one_hot_y = gaussian.apply(one_hot_y) with open("%s/sample_y_onehot_gaussian_noise_std%s.npy"%(save_dir,str(std)), 'wb') as yout: np.save(yout, one_hot_y) print 'saving done!'
def __init__(self, **kwargs): super(RecSysPosterior, self).__init__(X=None, y=None, **kwargs) sav_dir = '/home/wuzz/ihpc/dataset/posteriors' with open(sav_dir + '/train_y.npy') as train_y_fin: train_X = np.load(train_y_fin) with open(sav_dir + '/train_lbl.npy') as train_lbl_fin: train_y = np.load(train_lbl_fin) with open(sav_dir + '/test_y.npy') as test_y_fin: test_X = np.load(test_y_fin) with open(sav_dir + '/test_lbl.npy') as test_lbl_fin: test_y = np.load(test_lbl_fin) self.set_train(X=train_X, y=make_one_hot(train_y, 2)) self.set_valid(X=test_X, y=make_one_hot(test_y, 2)) self.set_test(X=test_X, y=make_one_hot(test_y, 2))
def __init__(self, **kwargs): dir = '/home/stuwzhz/datasets/spectral-features/npy2' with open('%s/sample_y.npy'%dir) as yin, \ open('%s/sample_X.npy'%dir) as Xin: y = np.load(yin) y = make_one_hot(y, 1998) X = np.load(Xin) super(I2R_Posterior_NoisyFeat_Sample, self).__init__(X=X, y=y, **kwargs)
def save_gaussian_one_hot(y, save_dir, std): print 'saving' one_hot_y = make_one_hot(y, 1998) gaussian = Gaussian(std=std) one_hot_y = gaussian.apply(one_hot_y) with open( "%s/sample_y_onehot_gaussian_noise_std%s.npy" % (save_dir, str(std)), 'wb') as yout: np.save(yout, one_hot_y) print 'saving done!'
def __init__(self, **kwargs): super(RecSys2ClickSession, self).__init__(X=None, y=None, **kwargs) # df = pandas.read_csv('/home/wuzz/ihpc/dataset/yoochoose_data/2_clicks_sess.csv') FEATURE = ['Price', 'ItemMaxPrice', 'ItemMinPrice', 'ItemTotalClicks', 'ItemTotalBuys', 'ItemBuyingProbability', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9', 'cat10', 'cat11', 'cat12', 'ItemDuration', 'ItemMonth', 'ItemDay', 'ItemHourInMins', 'Sales'] LABEL = ['BuyInSession'] df = pandas.read_csv('/home/wuzz/ihpc/dataset/yoochoose_data/2_clicks_sess.csv') df = df.sort('SessionID')[:1000000] data = Dataset_by_Session(df=df, sort_by_session=True, train_valid_test=self.ratio) train_df, valid_df, test_df = data.split_df() # import pdb; pdb.set_trace() self.set_train(X=train_df[FEATURE].values, y=make_one_hot(train_df[LABEL].values.reshape(len(train_df)), 2)) self.set_valid(X=valid_df[FEATURE].values, y=make_one_hot(valid_df[LABEL].values.reshape(len(valid_df)), 2)) self.set_test(X=test_df[FEATURE].values, y=make_one_hot(test_df[LABEL].values.reshape(len(test_df)), 2))
def __init__(self, one_hot=False, **kwargs): dir = '/Volumes/Storage/Unilever_Challenge/dataset' with open(dir + '/train.npy') as Xin: data = np.load(Xin) X, y = self.make_Xy(data) if one_hot: y = make_one_hot(y, 8) else: y = y.reshape((y.shape[0], 1)) super(Unilever, self).__init__(X=X, y=y, **kwargs)
def __init__(self, **kwargs): dir = '/home/stuwzhz/datasets/spectral-features/npy2' with open('%s/sample_y.npy'%dir) as yin: y = np.load(yin) y_tmp = [] for e in y: if e > 150: y_tmp.append(e) y_tmp = np.asarray(y_tmp) y_tmp = make_one_hot(y_tmp, 1998) super(I2R_Posterior_Gaussian_Noisy_Sample, self).__init__(X=y_tmp, y=y_tmp, **kwargs)
def __init__(self, **kwargs): dir = '/home/stuwzhz/datasets/spectral-features/npy2' with open('%s/sample_y.npy' % dir) as yin: y = np.load(yin) y_tmp = [] for e in y: if e > 150: y_tmp.append(e) y_tmp = np.asarray(y_tmp) y_tmp = make_one_hot(y_tmp, 1998) super(I2R_Posterior_Gaussian_Noisy_Sample, self).__init__(X=y_tmp, y=y_tmp, **kwargs)