fcols = [col for col in orig_dataset.columns if col in settings.FEATURES] catconversion = FeatureUnion([feature_sets.CATEGORICAL_CONVERSION], n_jobs=1) dataset = pd.DataFrame(data=catconversion.fit_transform(orig_dataset), columns=fcols, index=orig_dataset.index) target = FeatureColumnsExtractor( settings.TARGET).fit_transform(orig_dataset).apply(nonlinearity) # import time # before = time.time() pipeline = overall_pipeline() cv = KFold(len(target), n_folds=4, random_state=2, shuffle=False) submission = SqrtHazardSubmission(pipeline, 'XGB_Direct_Reduced', cv=cv) submission.fit(dataset, target, perform_cv=True, scoring=scorer_normalized_gini, n_jobs=1, verbose=3) original_test_set = pd.read_csv(settings.TEST_FILE) test_set = pd.DataFrame(data=catconversion.transform(original_test_set), columns=fcols, index=original_test_set.index) predictions = submission.predict(test_set) submission.create_submission(predictions, original_test_set,
fcols = [col for col in orig_dataset.columns if col in settings.FEATURES] catconversion = FeatureUnion([feature_sets.CATEGORICAL_CONVERSION], n_jobs=1) dataset = pd.DataFrame(data=catconversion.fit_transform(orig_dataset), columns=fcols, index=orig_dataset.index) target = FeatureColumnsExtractor( settings.TARGET).fit_transform(orig_dataset).apply(nonlinearity) # import time # before = time.time() pipeline = overall_pipeline() cv = KFold(len(target), n_folds=4, random_state=2, shuffle=False) submission = SqrtHazardSubmission(pipeline, 'XGB_SVM_Ensemble', cv=cv) submission.fit(dataset, target, perform_cv=True, scoring=scorer_normalized_gini, n_jobs=2, verbose=3) original_test_set = pd.read_csv(settings.TEST_FILE) test_set = pd.DataFrame(data=catconversion.transform(original_test_set), columns=fcols, index=original_test_set.index) predictions = submission.predict(test_set) submission.create_submission(predictions, original_test_set,