Esempio n. 1
0
    def __init__(self, **kwargs):
        super(RecSys2ClickSession, self).__init__(X=None, y=None, **kwargs)
        # df = pandas.read_csv('/home/wuzz/ihpc/dataset/yoochoose_data/2_clicks_sess.csv')
        FEATURE = [
            'Price', 'ItemMaxPrice', 'ItemMinPrice', 'ItemTotalClicks',
            'ItemTotalBuys', 'ItemBuyingProbability', 'cat1', 'cat2', 'cat3',
            'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9', 'cat10', 'cat11',
            'cat12', 'ItemDuration', 'ItemMonth', 'ItemDay', 'ItemHourInMins',
            'Sales'
        ]
        LABEL = ['BuyInSession']

        df = pandas.read_csv(
            '/home/wuzz/ihpc/dataset/yoochoose_data/2_clicks_sess.csv')
        df = df.sort('SessionID')[:1000000]
        data = Dataset_by_Session(df=df,
                                  sort_by_session=True,
                                  train_valid_test=self.ratio)

        train_df, valid_df, test_df = data.split_df()
        # import pdb; pdb.set_trace()

        self.set_train(X=train_df[FEATURE].values,
                       y=make_one_hot(
                           train_df[LABEL].values.reshape(len(train_df)), 2))
        self.set_valid(X=valid_df[FEATURE].values,
                       y=make_one_hot(
                           valid_df[LABEL].values.reshape(len(valid_df)), 2))
        self.set_test(X=test_df[FEATURE].values,
                      y=make_one_hot(
                          test_df[LABEL].values.reshape(len(test_df)), 2))
Esempio n. 2
0
    def __init__(self, **kwargs):
        super(RecSys, self).__init__(X=None, y=None, **kwargs)

        csv_path = '/home/wuzz/ihpc/onlinebehavior/sessions.csv'
        data = Dataset_by_Session(csv_path=csv_path, train_valid_test = self.ratio)
        tbl = data.split(features=data.feature, labels=data.label)
        self.set_train(X=tbl['train_fea'], y=make_one_hot(tbl['train_lbl'], 2))
        self.set_valid(X=tbl['valid_fea'], y=make_one_hot(tbl['valid_lbl'], 2))
        self.set_test(X=tbl['test_fea'], y=make_one_hot(tbl['test_lbl'], 2))
Esempio n. 3
0
 def __init__(self, **kwargs):
     super(RecSysJitter, self).__init__(X=None, y=None, **kwargs)
     data_dir = '/home/wuzz/recsys2015/data/jitter_stdr0_1_dup_15'
     train = Dataset_by_Session(sort_by_session=False, csv_path=data_dir + '/train_jitter.csv')
     valid = Dataset_by_Session(sort_by_session=False, csv_path=data_dir + '/valid.csv')
     test = Dataset_by_Session(sort_by_session=False, csv_path=data_dir + '/test.csv')
     self.set_train(X=train.feature, y=make_one_hot(train.label,2))
     self.set_valid(X=valid.feature, y=make_one_hot(valid.label,2))
     self.set_test(X=test.feature, y=make_one_hot(test.label,2))
Esempio n. 4
0
    def __init__(self, **kwargs):
        super(RecSys, self).__init__(X=None, y=None, **kwargs)

        csv_path = '/home/wuzz/ihpc/onlinebehavior/sessions.csv'
        data = Dataset_by_Session(csv_path=csv_path,
                                  train_valid_test=self.ratio)
        tbl = data.split(features=data.feature, labels=data.label)
        self.set_train(X=tbl['train_fea'], y=make_one_hot(tbl['train_lbl'], 2))
        self.set_valid(X=tbl['valid_fea'], y=make_one_hot(tbl['valid_lbl'], 2))
        self.set_test(X=tbl['test_fea'], y=make_one_hot(tbl['test_lbl'], 2))
Esempio n. 5
0
 def __init__(self, **kwargs):
     super(RecSysJitter, self).__init__(X=None, y=None, **kwargs)
     data_dir = '/home/wuzz/recsys2015/data/jitter_stdr0_1_dup_15'
     train = Dataset_by_Session(sort_by_session=False,
                                csv_path=data_dir + '/train_jitter.csv')
     valid = Dataset_by_Session(sort_by_session=False,
                                csv_path=data_dir + '/valid.csv')
     test = Dataset_by_Session(sort_by_session=False,
                               csv_path=data_dir + '/test.csv')
     self.set_train(X=train.feature, y=make_one_hot(train.label, 2))
     self.set_valid(X=valid.feature, y=make_one_hot(valid.label, 2))
     self.set_test(X=test.feature, y=make_one_hot(test.label, 2))
Esempio n. 6
0
def save_gaussian_one_hot(y, save_dir, std):
    print 'saving'
    one_hot_y = make_one_hot(y, 1998)
    gaussian = Gaussian(std=std)
    one_hot_y = gaussian.apply(one_hot_y)
    with open("%s/sample_y_onehot_gaussian_noise_std%s.npy"%(save_dir,str(std)), 'wb') as yout:
        np.save(yout, one_hot_y)
        print 'saving done!'
Esempio n. 7
0
    def __init__(self, **kwargs):
        super(RecSysPosterior, self).__init__(X=None, y=None, **kwargs)

        sav_dir = '/home/wuzz/ihpc/dataset/posteriors'

        with open(sav_dir + '/train_y.npy') as train_y_fin:
            train_X = np.load(train_y_fin)
        with open(sav_dir + '/train_lbl.npy') as train_lbl_fin:
            train_y = np.load(train_lbl_fin)
        with open(sav_dir + '/test_y.npy') as test_y_fin:
            test_X = np.load(test_y_fin)
        with open(sav_dir + '/test_lbl.npy') as test_lbl_fin:
            test_y = np.load(test_lbl_fin)

        self.set_train(X=train_X, y=make_one_hot(train_y, 2))
        self.set_valid(X=test_X, y=make_one_hot(test_y, 2))
        self.set_test(X=test_X, y=make_one_hot(test_y, 2))
Esempio n. 8
0
    def __init__(self, **kwargs):
        super(RecSysPosterior, self).__init__(X=None, y=None, **kwargs)

        sav_dir = '/home/wuzz/ihpc/dataset/posteriors'

        with open(sav_dir + '/train_y.npy') as train_y_fin:
            train_X = np.load(train_y_fin)
        with open(sav_dir + '/train_lbl.npy') as train_lbl_fin:
            train_y = np.load(train_lbl_fin)
        with open(sav_dir + '/test_y.npy') as test_y_fin:
            test_X = np.load(test_y_fin)
        with open(sav_dir + '/test_lbl.npy') as test_lbl_fin:
            test_y = np.load(test_lbl_fin)

        self.set_train(X=train_X, y=make_one_hot(train_y, 2))
        self.set_valid(X=test_X, y=make_one_hot(test_y, 2))
        self.set_test(X=test_X, y=make_one_hot(test_y, 2))
Esempio n. 9
0
File: i2r.py Progetto: hycis/Pynet
    def __init__(self, **kwargs):
        dir = '/home/stuwzhz/datasets/spectral-features/npy2'

        with open('%s/sample_y.npy'%dir) as yin, \
            open('%s/sample_X.npy'%dir) as Xin:
            y = np.load(yin)
            y = make_one_hot(y, 1998)
            X = np.load(Xin)

        super(I2R_Posterior_NoisyFeat_Sample, self).__init__(X=X, y=y, **kwargs)
Esempio n. 10
0
def save_gaussian_one_hot(y, save_dir, std):
    print 'saving'
    one_hot_y = make_one_hot(y, 1998)
    gaussian = Gaussian(std=std)
    one_hot_y = gaussian.apply(one_hot_y)
    with open(
            "%s/sample_y_onehot_gaussian_noise_std%s.npy" %
        (save_dir, str(std)), 'wb') as yout:
        np.save(yout, one_hot_y)
        print 'saving done!'
Esempio n. 11
0
    def __init__(self, **kwargs):
        super(RecSys2ClickSession, self).__init__(X=None, y=None, **kwargs)
        # df = pandas.read_csv('/home/wuzz/ihpc/dataset/yoochoose_data/2_clicks_sess.csv')
        FEATURE = ['Price', 'ItemMaxPrice', 'ItemMinPrice', 'ItemTotalClicks', 'ItemTotalBuys', 'ItemBuyingProbability',
                   'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9', 'cat10', 'cat11', 'cat12',
                   'ItemDuration', 'ItemMonth', 'ItemDay', 'ItemHourInMins', 'Sales']
        LABEL = ['BuyInSession']

        df = pandas.read_csv('/home/wuzz/ihpc/dataset/yoochoose_data/2_clicks_sess.csv')
        df = df.sort('SessionID')[:1000000]
        data = Dataset_by_Session(df=df,
                                  sort_by_session=True,
                                  train_valid_test=self.ratio)

        train_df, valid_df, test_df = data.split_df()
        # import pdb; pdb.set_trace()

        self.set_train(X=train_df[FEATURE].values, y=make_one_hot(train_df[LABEL].values.reshape(len(train_df)), 2))
        self.set_valid(X=valid_df[FEATURE].values, y=make_one_hot(valid_df[LABEL].values.reshape(len(valid_df)), 2))
        self.set_test(X=test_df[FEATURE].values, y=make_one_hot(test_df[LABEL].values.reshape(len(test_df)), 2))
Esempio n. 12
0
    def __init__(self, one_hot=False, **kwargs):
        dir = '/Volumes/Storage/Unilever_Challenge/dataset'
        with open(dir + '/train.npy') as Xin:
            data = np.load(Xin)

        X, y = self.make_Xy(data)
        if one_hot:
            y = make_one_hot(y, 8)
        else:
            y = y.reshape((y.shape[0], 1))

        super(Unilever, self).__init__(X=X, y=y, **kwargs)
Esempio n. 13
0
    def __init__(self, **kwargs):
        dir = '/home/stuwzhz/datasets/spectral-features/npy2'

        with open('%s/sample_y.npy'%dir) as yin, \
            open('%s/sample_X.npy'%dir) as Xin:
            y = np.load(yin)
            y = make_one_hot(y, 1998)
            X = np.load(Xin)

        super(I2R_Posterior_NoisyFeat_Sample, self).__init__(X=X,
                                                             y=y,
                                                             **kwargs)
Esempio n. 14
0
    def __init__(self, one_hot=False, **kwargs):
        dir = '/Volumes/Storage/Unilever_Challenge/dataset'
        with open(dir + '/train.npy') as Xin:
            data = np.load(Xin)

        X, y = self.make_Xy(data)
        if one_hot:
            y = make_one_hot(y, 8)
        else:
            y = y.reshape((y.shape[0], 1))

        super(Unilever, self).__init__(X=X, y=y, **kwargs)
Esempio n. 15
0
File: i2r.py Progetto: hycis/Pynet
    def __init__(self, **kwargs):
        dir = '/home/stuwzhz/datasets/spectral-features/npy2'

        with open('%s/sample_y.npy'%dir) as yin:
            y = np.load(yin)
            y_tmp = []
            for e in y:
                if e > 150:
                    y_tmp.append(e)

            y_tmp = np.asarray(y_tmp)
            y_tmp = make_one_hot(y_tmp, 1998)

        super(I2R_Posterior_Gaussian_Noisy_Sample, self).__init__(X=y_tmp, y=y_tmp, **kwargs)
Esempio n. 16
0
    def __init__(self, **kwargs):
        dir = '/home/stuwzhz/datasets/spectral-features/npy2'

        with open('%s/sample_y.npy' % dir) as yin:
            y = np.load(yin)
            y_tmp = []
            for e in y:
                if e > 150:
                    y_tmp.append(e)

            y_tmp = np.asarray(y_tmp)
            y_tmp = make_one_hot(y_tmp, 1998)

        super(I2R_Posterior_Gaussian_Noisy_Sample, self).__init__(X=y_tmp,
                                                                  y=y_tmp,
                                                                  **kwargs)