예제 #1
0
파일: tasks.py 프로젝트: kaluzhny/airbnb
    def load_data(self):
        classes_count = len(le_.classes_)

        # load test data
        # x_test = TestDataTask(self.task_core).run()

        # train
        x_train, y_train = TrainingDataTask(self.task_core).run()

        # split
        train_idxs, test_idxs = list(StratifiedShuffleSplit(y_train, 1, test_size=self.task_core.cv_ratio,
                                                       random_state=self.task_core.n_seed))[0]
        x_test = x_train.filter_rows_by_idxs(test_idxs)
        y_test = y_train[test_idxs]
        x_train = x_train.filter_rows_by_idxs(train_idxs)
        y_train = y_train[train_idxs]

        # 2014 only for test
        x_test, y_test, _, _ = divide_by_has_sessions(x_test, y_test)

        print('running prediction model')
        probabilities = run_model(x_train, y_train, x_test, classes_count, self.classifier,
                                  self.task_core.n_threads, self.task_core.n_seed,
                                  self.task_core.cache_dir)

        print_probabilities(probabilities)
        s = score(probabilities, y_test)
        return {'Score': s}
예제 #2
0
파일: tasks.py 프로젝트: kaluzhny/airbnb
    def load_data(self):

        classes_count = len(le_.classes_)

        # load test data
        x_test = TestDataTask(self.task_core).run()

        # train
        x_train, y_train = TrainingDataTask(self.task_core).run()
        # perm_idxs = list(np.random.permutation(y_train.shape[0]))
        # x_train = x_train.filter_rows(perm_idxs)
        # y_train = y_train[perm_idxs]

        x_test, x_train = sync_columns_2(x_test, x_train)

        probabilities = run_model(x_train, y_train, x_test, classes_count, self.classifier,
                                  self.task_core.n_threads, self.task_core.n_seed,
                                  self.task_core.cache_dir)

        print_probabilities(probabilities)

        save_submission(x_test.ids_, probabilities, self.task_core.submission_file)