def test_ids_to_floats(): path = join(_my_dir, 'train', 'test_input_2examples_1.jsonlines') examples = Reader.for_path(path, ids_to_floats=True, quiet=True).read() assert isinstance(examples.ids[0], float) examples = Reader.for_path(path, quiet=True).read() assert not isinstance(examples.ids[0], float) assert isinstance(examples.ids[0], str)
def test_backward_compatibility(): """ Test to validate backward compatibility """ predict_path = join(_my_dir, 'backward_compatibility', ('v0.9.17_test_summary_test_summary_' 'LogisticRegression.predictions')) model_path = join(_my_dir, 'backward_compatibility', ('v0.9.17_test_summary_test_summary_LogisticRegression.' '{}.model').format(sys.version_info[0])) test_path = join(_my_dir, 'backward_compatibility', 'v0.9.17_test_summary.jsonlines') learner = Learner.from_file(model_path) examples = Reader.for_path(test_path, quiet=True).read() new_predictions = learner.predict(examples)[:, 1] with open(predict_path) as predict_file: old_predictions = [float(line.strip()) for line in predict_file] assert_almost_equal(new_predictions, old_predictions)
def compute_eval_from_predictions(examples_file, predictions_file, metric_names): """ Compute evaluation metrics from prediction files after you have run an experiment. :param examples_file: a SKLL examples file (in .jsonlines or other format) :param predictions_file: a SKLL predictions output TSV file with id and prediction column names :param metric_names: a list of SKLL metric names (e.g., [pearson, unweighted_kappa]) :returns: a dictionary from metrics names to values """ # read gold standard labels data = Reader.for_path(examples_file).read() gold = dict(zip(data.ids, data.labels)) # read predictions pred = {} with open(predictions_file) as pred_file: reader = csv.reader(pred_file, dialect=csv.excel_tab) next(reader) # skip header for row in reader: pred[row[0]] = safe_float(row[1]) # make a sorted list of example ids in order to match up # labels and predictions if set(gold.keys()) != set(pred.keys()): raise ValueError('The example and prediction IDs do not match.') example_ids = sorted(gold.keys()) res = {} for metric_name in metric_names: score = use_score_func(metric_name, [gold[ex_id] for ex_id in example_ids], [pred[ex_id] for ex_id in example_ids]) res[metric_name] = score return res
def compute_eval_from_predictions(examples_file, predictions_file, metric_names): """ Compute evaluation metrics from prediction files after you have run an experiment. :param examples_file: a SKLL examples file (in .jsonlines or other format) :param predictions_file: a SKLL predictions output TSV file with id and prediction column names :param metric_names: a list of SKLL metric names (e.g., [pearson, unweighted_kappa]) :returns: a dictionary from metrics names to values """ # read gold standard labels data = Reader.for_path(examples_file).read() gold = dict(zip(data.ids, data.labels)) # read predictions pred = {} with open(predictions_file) as pred_file: reader = csv.reader(pred_file, dialect=csv.excel_tab) next(reader) # skip header for row in reader: pred[row[0]] = safe_float(row[1]) # make a sorted list of example ids in order to match up # labels and predictions if set(gold.keys()) != set(pred.keys()): raise ValueError("The example and prediction IDs do not match.") example_ids = sorted(gold.keys()) res = {} for metric_name in metric_names: score = use_score_func( metric_name, [gold[ex_id] for ex_id in example_ids], [pred[ex_id] for ex_id in example_ids] ) res[metric_name] = score return res
def main(): """ Create directories and split CSV files into subsets. """ logging.basicConfig(format=('%(asctime)s - %(name)s - %(levelname)s - ' + '%(message)s'), level=logging.INFO) logger = logging.getLogger(__name__) # Create dictionary of subsets to use for creating split feature files subset_dict = { 'vitals': ['Sex', 'Age'], 'socioeconomic': ['Pclass', 'Fare'], 'family': ['SibSp', 'Parch'], 'misc': ['Embarked'] } features_to_keep = list(chain(*subset_dict.values())) # Create directories to store files if not os.path.exists('titanic/train'): logger.info('Creating titanic/train directory') os.makedirs('titanic/train') if not os.path.exists('titanic/dev'): logger.info('Creating titanic/dev directory') os.makedirs('titanic/dev') if not os.path.exists('titanic/train+dev'): logger.info('Creating titanic/train+dev directory') os.makedirs('titanic/train+dev') if not os.path.exists('titanic/test'): logger.info('Creating titanic/test directory') os.makedirs('titanic/test') usecols_train = features_to_keep + ['PassengerId', 'Survived'] usecols_test = features_to_keep + ['PassengerId'] # Read and write training FeatureSet train_fs = Reader.for_path('titanic/train.csv', label_col='Survived', id_col='PassengerId', drop_blanks=True, pandas_kwargs={ 'usecols': usecols_train }, quiet=False, sparse=False).read() train_fs.filter(features=features_to_keep) num_train_dev = len(train_fs) num_train = int((num_train_dev / 5) * 4) writer = Writer.for_path('titanic/train/.csv', train_fs[:num_train], id_col='PassengerId', label_col='Survived', quiet=False, subsets=subset_dict) writer.write() # Write train+dev set for training model to use to generate predictions on # test writer = Writer.for_path('titanic/train+dev/.csv', train_fs, label_col='Survived', id_col='PassengerId', quiet=False, subsets=subset_dict) writer.write() # Write dev FeatureSet writer = Writer.for_path('titanic/dev/.csv', train_fs[num_train:], label_col='Survived', id_col='PassengerId', quiet=False, subsets=subset_dict) writer.write() # Read and write test FeatureSet test_fs = Reader.for_path('titanic/test.csv', label_col='Survived', drop_blanks=True, pandas_kwargs={ 'usecols': usecols_test }, quiet=False, sparse=False).read() test_fs.filter(features=features_to_keep) num_test = len(test_fs) test_fs.ids = list(range(num_train_dev + 1, num_test + num_train_dev + 1)) writer = Writer.for_path('titanic/test/.csv', test_fs, id_col='PassengerId', quiet=False, subsets=subset_dict) writer.write()
def compute_eval_from_predictions(examples_file, predictions_file, metric_names, prediction_method=None): """ Compute evaluation metrics from prediction files after you have run an experiment. Parameters ---------- examples_file: str Path to a SKLL examples file (in .jsonlines or other format). predictions_file: str Path to a SKLL predictions output TSV file with id and prediction column names. metric_names: list of str A list of SKLL metric names (e.g., [pearson, unweighted_kappa]). prediction_method: str or None Indicates how to get a single class prediction from the probabilities. Currently supported options are "highest", which selects the class with the highest probability, and "expected_value", which calculates an expected value over integer classes and rounds to the nearest int. If predictions file does not contain probabilities, this should be set to None. Returns ------- dict Maps metrics names to corresponding values. Raises ------ ValueError If the requested prediction method is 'expected_value' but the class names can't be converted to ints. """ # read gold standard labels data = Reader.for_path(examples_file).read() gold = dict(zip(data.ids, data.labels)) # read predictions pred = {} with open(predictions_file) as pred_file: reader = csv.reader(pred_file, dialect=csv.excel_tab) header = next(reader) # If there are more than two columns, assume column 0 contains the ids, and # columns 1-n contain class probabilities. Convert them to a class prediction # using the specified `method`. if len(header) > 2: classes = [c for c in header[1:] if c] if prediction_method is None: prediction_method = "highest" logger.info("No prediction method specified. Using 'highest'.") if prediction_method == 'expected_value': try: classes = [int(c) for c in classes] except ValueError as e: raise e for row in reader: probabilities = [safe_float(p) for p in row[1:]] prediction = get_prediction_from_probabilities( classes, probabilities, prediction_method) pred[row[0]] = safe_float(prediction) else: if prediction_method is not None: logger.warning( "A prediction method was provided, but the predictions " "file doesn't contain probabilities. Ignoring prediction " "method '{}'.".format(prediction_method)) for row in reader: pred[row[0]] = safe_float(row[1]) # make a sorted list of example ids in order to match up # labels and predictions if set(gold.keys()) != set(pred.keys()): raise ValueError('The example and prediction IDs do not match.') example_ids = sorted(gold.keys()) res = {} for metric_name in metric_names: score = use_score_func(metric_name, [gold[ex_id] for ex_id in example_ids], [pred[ex_id] for ex_id in example_ids]) res[metric_name] = score return res