Exemple #1
0
import os

from sample.models.model import Model
from sample.models.model_list import get_models
import sample.utils.printer as printer

'''
This script allows the training of any model that correctly subclasses Model. It
train, evaluates, and stores the model. Currently, it uses the default transform_function (used in preprocessing)
of each class.
'''

# TODO: I need to figure out how to allow the user to pass a function. But this is hard over cmd line...

#All of the available models to train
models = get_models()

parser = argparse.ArgumentParser()

parser.add_argument('model_type', help='The type of model to train.')
parser.add_argument('data', help='The data to train on. Either a JSON formatted string or a path to a file. ' +
  'If it is a file, use the --isfile flag.')
parser.add_argument('models_folder', help='The parent folder to place the folder containing the new model in')
parser.add_argument('model_name', help='The name of the folder to store the new model to. If the folder already ' +
  'exists, its existing contents will be overriden. If it does not, it will be created.')
parser.add_argument('group_size', help='OPTIONAL ARGUMENT. The first of a pair that dictates the evaluation that will be produced ' +
  'by the training when it is finished. The second is seq_size.' + 'This will generate a report for every element of the cartesian ' +
  'product of the sets range(group_size) and ' + 'range(sequence_size). Each pair (group_size, sequence_size) ' + 
  'generates a full report where each prediction is. ' + 'generated over sequence_size data points and is ' +
  'considered correct if it has a correct prediction within ' + 'the first group_size guesses ' +
  '(ordered by likelihood). Note that unless you held out data you will be unable to ' +
Exemple #2
0
                    nargs='?',
                    type=int,
                    default='3')
parser.add_argument(
    '--readable',
    help='Pass this flag if you want human readable output printed to terminal',
    action='store_true')
parser.add_argument(
    '--isfile',
    help='Pass this flag if you are passing the data as a path to a json file',
    action='store_true')

args = parser.parse_args()

path = args.models_path + '/' + args.model_name + '/'
model_type = get_models()[Model.load_type(path)]

if args.isfile:
    data = open(args.dataToTest).read()

model_type = get_models()[Model.load_type(path)]
#a tuple containing all info need to classify
loaded_model = Model.load_classification_pipeline(model_type, path)
preprocessor = loaded_model.preprocessor
X, Y = parse_accuracy(data, preprocessor.encoder, preprocessor.normalizer)
eval_group_size = args.evalGroupSize
eval_seq_size = args.evalSeqSize
evaluation = {(i, j):
              loaded_model.evaluate_in_pipeline(X, Y,
                                                preprocessor.get_encoder(), i,
                                                j)
import re
import shutil

from sample.models.dnn import Dnn
from sample.models.rnn import Rnn
from sample.models.model import Model
from sample.preprocessing.preprocessor import Preprocessor

from sample.preprocessing.gaussian_normalizer import Gaussian_normalizer
from sample.preprocessing.categorical_encoder import Categorical_encoder
from sample.parsing.parser import parse_json_classify as parse_classify
from sample.models.model_list import get_models

import numpy as np

models_to_test = get_models().values()


@pytest.mark.timeout(5)
@pytest.mark.parametrize("data", [("data/iris.json")])
def test_models_pipeline_with_read_write(data):
    '''
  Tests the exact calls used in train and classify in order, including file writes
  and reads. Deletes test files when done.
  
  Verifies that the outputs of save_pipeline_output line up with load_classification_pipeline,
  that evaluation is formatted correctly (print won't work otherwise), that classification 
  meets a few basic structural criteria, and generally that the pipeline runs through 
  without crashing.
  '''
    path = 'test_models/test_models_pipeline_with_read_write'