import os from sample.models.model import Model from sample.models.model_list import get_models import sample.utils.printer as printer ''' This script allows the training of any model that correctly subclasses Model. It train, evaluates, and stores the model. Currently, it uses the default transform_function (used in preprocessing) of each class. ''' # TODO: I need to figure out how to allow the user to pass a function. But this is hard over cmd line... #All of the available models to train models = get_models() parser = argparse.ArgumentParser() parser.add_argument('model_type', help='The type of model to train.') parser.add_argument('data', help='The data to train on. Either a JSON formatted string or a path to a file. ' + 'If it is a file, use the --isfile flag.') parser.add_argument('models_folder', help='The parent folder to place the folder containing the new model in') parser.add_argument('model_name', help='The name of the folder to store the new model to. If the folder already ' + 'exists, its existing contents will be overriden. If it does not, it will be created.') parser.add_argument('group_size', help='OPTIONAL ARGUMENT. The first of a pair that dictates the evaluation that will be produced ' + 'by the training when it is finished. The second is seq_size.' + 'This will generate a report for every element of the cartesian ' + 'product of the sets range(group_size) and ' + 'range(sequence_size). Each pair (group_size, sequence_size) ' + 'generates a full report where each prediction is. ' + 'generated over sequence_size data points and is ' + 'considered correct if it has a correct prediction within ' + 'the first group_size guesses ' + '(ordered by likelihood). Note that unless you held out data you will be unable to ' +
nargs='?', type=int, default='3') parser.add_argument( '--readable', help='Pass this flag if you want human readable output printed to terminal', action='store_true') parser.add_argument( '--isfile', help='Pass this flag if you are passing the data as a path to a json file', action='store_true') args = parser.parse_args() path = args.models_path + '/' + args.model_name + '/' model_type = get_models()[Model.load_type(path)] if args.isfile: data = open(args.dataToTest).read() model_type = get_models()[Model.load_type(path)] #a tuple containing all info need to classify loaded_model = Model.load_classification_pipeline(model_type, path) preprocessor = loaded_model.preprocessor X, Y = parse_accuracy(data, preprocessor.encoder, preprocessor.normalizer) eval_group_size = args.evalGroupSize eval_seq_size = args.evalSeqSize evaluation = {(i, j): loaded_model.evaluate_in_pipeline(X, Y, preprocessor.get_encoder(), i, j)
import re import shutil from sample.models.dnn import Dnn from sample.models.rnn import Rnn from sample.models.model import Model from sample.preprocessing.preprocessor import Preprocessor from sample.preprocessing.gaussian_normalizer import Gaussian_normalizer from sample.preprocessing.categorical_encoder import Categorical_encoder from sample.parsing.parser import parse_json_classify as parse_classify from sample.models.model_list import get_models import numpy as np models_to_test = get_models().values() @pytest.mark.timeout(5) @pytest.mark.parametrize("data", [("data/iris.json")]) def test_models_pipeline_with_read_write(data): ''' Tests the exact calls used in train and classify in order, including file writes and reads. Deletes test files when done. Verifies that the outputs of save_pipeline_output line up with load_classification_pipeline, that evaluation is formatted correctly (print won't work otherwise), that classification meets a few basic structural criteria, and generally that the pipeline runs through without crashing. ''' path = 'test_models/test_models_pipeline_with_read_write'