parser.add_argument('--cn') parser.add_argument('--mn') parser.add_argument('--mt') parser.add_argument('--cf') args = parser.parse_args() collection_name = args.cn model_name = args.mn model_type = args.mt config_path = args.cf config = ConfigParser.ConfigParser() config.read(config_path) root_logger.info('Get train data.') data_rep = DataRepository(uri=config.get('defaults', 'MONGO_URI')) X = [] Y = [] for row in data_rep.get(collection_name, { '$or': [{'Y': [1]}, {'Y': [0]}]}): root_logger.info(row['meta']) X.append(row['X']) Y.append(row['Y']) root_logger.info('We have {0} rows to train'.format(len(Y))) root_logger.info('Train model.') model = StatModelFactory.create(model_type) model.train(X, Y) model_storage = AzureBlobStorage(
config_path = args.cf source_type = args.st config = ConfigParser.ConfigParser() config.read(config_path) # (stat_season, games_season) seasons = [ (2012, 2012), (2013, 2013), (2014, 2014), (2015, 2015), (2015, 2016), ] data_rep = DataRepository(uri=config.get('defaults', 'MONGO_URI')) for season in seasons: root_logger.info('stat_season: {0}, games_season: {1}' .format(season[0], season[1])) ds = SportReferenceDataSource( base_url=__source_type_base_url_map[source_type], team_stat_season=season[0], games_season=season[1], game_type=source_type, row_parse_strategy=__source_type_map[source_type], cache_team_stats=True) X, Y, metadata = ds.load(dict(date_from=date_from, date_to=date_to))
date_from = date_utils.try_parse(args.df) date_to = date_utils.try_parse(args.dt) source_name = args.sn source_type = args.st config = ConfigParser.ConfigParser() config.read('scripts.cfg') ds = SportReferenceDataSource( base_url=__source_type_base_url_map[source_type], team_stat_season=2015, games_season=2016, game_type=source_type, row_parse_strategy=__source_type_map[source_type], cache_team_stats=True) X, Y, metadata = ds.load(dict(date_from=date_from, date_to=date_to)) data_rep = DataRepository(uri=config.get('defaults', 'MONGO_URI')) meta_keys = metadata[0].keys() for i in range(len(Y)): if not Y[i]: continue meta_data_row = [metadata[i][k] for k in metadata[i].keys()] res = data_rep.update( source_name, { 'meta.{0}'.format(meta_keys[j]):meta_data_row[j] for j in range(len(meta_keys)) }, { 'Y': [Y[i]] } ) root_logger.info(res)