def test_custom_learner_model_loading(): num_labels = 10 class_weights = [(0.5 / (num_labels - 1)) for x in range(num_labels - 1)] + [0.5] train_fs, test_fs = make_classification_data(num_examples=600, train_test_ratio=0.8, num_labels=num_labels, num_features=5, non_negative=True, class_weights=class_weights) # Write training feature set to a file train_path = join(_my_dir, 'train', 'test_model_custom_learner.jsonlines') writer = NDJWriter(train_path, train_fs) writer.write() # Write test feature set to a file test_path = join(_my_dir, 'test', 'test_model_custom_learner.jsonlines') writer = NDJWriter(test_path, test_fs) writer.write() # run the configuration that trains the custom model and saves it cfgfile = 'test_model_save_custom_learner.template.cfg' config_template_path = join(_my_dir, 'configs', cfgfile) config_path = fill_in_config_paths(config_template_path) run_configuration(config_path, quiet=True) # save the predictions from disk into memory # and delete the predictions file outprefix = 'test_model_custom_learner' pred_file = join( _my_dir, 'output', '{}_{}_CustomLogisticRegressionWrapper' '_predictions.tsv'.format(outprefix, outprefix)) preds1 = read_predictions(pred_file) os.unlink(pred_file) # run the configuration that loads the saved model # and generates the predictions again cfgfile = 'test_model_load_custom_learner.template.cfg' config_template_path = join(_my_dir, 'configs', cfgfile) config_path = fill_in_config_paths(config_template_path) run_configuration(config_path, overwrite=False, quiet=True) # load the newly generated predictions preds2 = read_predictions(pred_file) # make sure that they are the same as before assert_array_equal(preds1, preds2)
def test_ablation_cv_feature_hasher_all_combos_sampler(): """ Test ablation all-combos + cross-validation + feature hashing + samplers """ config_template_path = join( _my_dir, 'configs', 'test_ablation_feature_hasher_sampler_all_combos.template.cfg') config_path = fill_in_config_paths(config_template_path) run_configuration(config_path, quiet=True, ablation=None) # read in the summary file and make sure it has # 10 ablated featuresets * (10 folds + 1 average line) * 2 learners = 220 # lines with open( join(_my_dir, 'output', 'ablation_cv_feature_hasher_all_combos_summary.tsv')) as f: reader = csv.DictReader(f, dialect=csv.excel_tab) num_rows = check_ablation_rows(reader) eq_(num_rows, 220) # make sure there are 10 ablated featuresets * 2 learners = 20 results # files num_result_files = len( glob( join(_my_dir, 'output', 'ablation_cv_feature_hasher_sampler_all_combos*.results'))) eq_(num_result_files, 20)
def test_logistic_custom_learner(): num_labels = 10 class_weights = [(0.5 / (num_labels - 1)) for x in range(num_labels - 1)] + [0.5] train_fs, test_fs = make_classification_data(num_examples=600, train_test_ratio=0.8, num_labels=num_labels, num_features=5, non_negative=True, class_weights=class_weights) # Write training feature set to a file train_path = join(_my_dir, 'train', 'test_logistic_custom_learner.jsonlines') writer = NDJWriter(train_path, train_fs) writer.write() # Write test feature set to a file test_path = join(_my_dir, 'test', 'test_logistic_custom_learner.jsonlines') writer = NDJWriter(test_path, test_fs) writer.write() cfgfile = 'test_logistic_custom_learner.template.cfg' config_template_path = join(_my_dir, 'configs', cfgfile) config_path = fill_in_config_paths(config_template_path) run_configuration(config_path, quiet=True) outprefix = 'test_logistic_custom_learner' preds = read_predictions( join(_my_dir, 'output', ('{}_{}_CustomLogisticRegressionWrapper' '_predictions.tsv'.format(outprefix, outprefix)))) expected = read_predictions( join(_my_dir, 'output', ('{}_{}_LogisticRegression_predictions.tsv'.format( outprefix, outprefix)))) assert_array_equal(preds, expected)
def test_majority_class_custom_learner(): num_labels = 10 # This will make data where the last class happens about 50% of the time. class_weights = [(0.5 / (num_labels - 1)) for x in range(num_labels - 1)] + [0.5] train_fs, test_fs = make_classification_data(num_examples=600, train_test_ratio=0.8, num_labels=num_labels, num_features=5, non_negative=True, class_weights=class_weights) # Write training feature set to a file train_path = join(_my_dir, 'train', 'test_majority_class_custom_learner.jsonlines') writer = NDJWriter(train_path, train_fs) writer.write() # Write test feature set to a file test_path = join(_my_dir, 'test', 'test_majority_class_custom_learner.jsonlines') writer = NDJWriter(test_path, test_fs) writer.write() cfgfile = 'test_majority_class_custom_learner.template.cfg' config_template_path = join(_my_dir, 'configs', cfgfile) config_path = fill_in_config_paths(config_template_path) run_configuration(config_path, quiet=True) outprefix = 'test_majority_class_custom_learner' preds = read_predictions( join(_my_dir, 'output', ('{}_{}_MajorityClassLearner_predictions.tsv'.format( outprefix, outprefix)))) expected = np.array([float(num_labels - 1) for x in preds]) assert_array_equal(preds, expected)
def test_class_map(): """ Test class maps """ make_class_map_data() config_template_path = join(_my_dir, 'configs', 'test_class_map.template.cfg') config_path = fill_in_config_paths(config_template_path) run_configuration(config_path, quiet=True) with open( join(_my_dir, 'output', ('test_class_map_test_class_map_Logistic' 'Regression.results.json'))) as f: outd = json.loads(f.read()) # outstr = f.read() # logistic_result_score = float( # SCORE_OUTPUT_RE.search(outstr).groups()[0]) logistic_result_score = outd[0]['accuracy'] assert_almost_equal(logistic_result_score, 0.5)
def test_ablation_cv(): """ Test ablation + cross-validation """ config_template_path = join(_my_dir, 'configs', 'test_ablation.template.cfg') config_path = fill_in_config_paths(config_template_path) run_configuration(config_path, quiet=True, ablation=1) # read in the summary file and make sure it has # 7 ablated featuresets * (10 folds + 1 average line) * 2 learners = 154 # lines with open(join(_my_dir, 'output', 'ablation_cv_plain_summary.tsv')) as f: reader = csv.DictReader(f, dialect=csv.excel_tab) num_rows = check_ablation_rows(reader) eq_(num_rows, 154) # make sure there are 7 ablated featuresets * 2 learners = 12 results files num_result_files = len( glob(join(_my_dir, 'output', 'ablation_cv_plain*.results'))) eq_(num_result_files, 14)