def main(table, schema): logger = get_root_logger() _ = get_header(logger, 'Building a model to predict Trump tweets') loc = get_path(__file__) + '/{0}' params = { 'features__text_processing__vect__ngram_range': [(1, 1), (1, 2), (1, 3)], 'clf__n_estimators': [int(x) for x in logspace(1, 3, num=10)] } model = RandomForestModel(table, schema, **params) model.train() model.evaluate() model.save(loc.format('saved_models'))
from etl.load.src import FileLoader from lib.utils.lw import get_header, get_root_logger import argparse def main(args): loader = FileLoader(args.config, args.file, args.table, args.schema, delim=args.delim) loader.load() if __name__ == '__main__': logger = get_root_logger() _ = get_header(logger, 'Loading CSV data') parser = argparse.ArgumentParser() parser.add_argument('--config') parser.add_argument('--file') parser.add_argument('--table') parser.add_argument('--schema') parser.add_argument('--delim', default=',') args = parser.parse_args() if args.delim == 'tab': args.delim = '\t' main(args)