예제 #1
0
파일: dnn.py 프로젝트: PKostya/kaggle
        self.name = name
        self.start, self.stop = start, stop
        self.ls = None

    def __call__(self, nn, train_history):
        if self.ls is None:
            self.ls = np.linspace(self.start, self.stop, nn.max_epochs)

        epoch = train_history[-1]['epoch']
        new_value = float32(self.ls[epoch - 1])
        getattr(nn, self.name).set_value(new_value)

if __name__ == '__main__':       
    train_size = 0.75
    cls = RandomForestClassifier()
    train_df_orig = RevenueCompetition.load_data()
    y = train_df_orig['revenue'].values.astype('float32')
    del train_df_orig['revenue']

    test_df_orig = RevenueCompetition.load_data(train=False)

    full_df_orig = train_df_orig.append(test_df_orig)
    
    print("Transforming...")
    tr = make_pipeline(RevenueTransform(rescale=True), StandardScaler())
    tr.fit(full_df_orig)

    print('Classify the outliers...')
    ly = np.log(y)
    ym = ly.mean()
    ys = ly.std()
예제 #2
0
파일: linear.py 프로젝트: PKostya/kaggle
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.cross_validation import StratifiedShuffleSplit
from sklearn.svm import SVR

from revenue import RevenueCompetition, RevenueTransform

if __name__ == '__main__':
        
    train_size = 0.75
    cls = RandomForestClassifier()
    reg = RandomForestRegressor(n_estimators=20, max_features=5, max_depth=None,
                                 min_samples_split=2, min_samples_leaf=1,
                                 max_leaf_nodes=None, bootstrap=True,
                                 oob_score=False, n_jobs=-1)
    reg = SVR(C=10., gamma=0.1)
    train_df_orig = RevenueCompetition.load_data()
    y = train_df_orig['revenue'].values
    del train_df_orig['revenue']

    test_df_orig = RevenueCompetition.load_data(train=False)

    full_df = train_df_orig.append(test_df_orig)
    
    print("Transforming...")
    tr = RevenueTransform(rescale=True)
    tr.fit(full_df)
    X = tr.transform(train_df_orig).values

    print('Classify the outliers...')
    ly = np.log(y)
    ym = ly.mean()