예제 #1
0
    parser.add_argument('--cn')
    parser.add_argument('--mn')
    parser.add_argument('--mt')
    parser.add_argument('--cf')
    args = parser.parse_args()
    collection_name = args.cn
    model_name = args.mn
    model_type = args.mt
    config_path = args.cf

    config = ConfigParser.ConfigParser()
    config.read(config_path)

    root_logger.info('Get train data.')

    data_rep = DataRepository(uri=config.get('defaults', 'MONGO_URI'))
    X = []
    Y = []
    for row in data_rep.get(collection_name, { '$or': [{'Y': [1]}, {'Y': [0]}]}):
        root_logger.info(row['meta'])

        X.append(row['X'])
        Y.append(row['Y'])

    root_logger.info('We have {0} rows to train'.format(len(Y)))
    root_logger.info('Train model.')

    model = StatModelFactory.create(model_type)
    model.train(X, Y)

    model_storage = AzureBlobStorage(
예제 #2
0
    config_path = args.cf
    source_type = args.st

    config = ConfigParser.ConfigParser()
    config.read(config_path)

    # (stat_season, games_season)
    seasons = [
        (2012, 2012),
        (2013, 2013),
        (2014, 2014),
        (2015, 2015),
        (2015, 2016),
        ]

    data_rep = DataRepository(uri=config.get('defaults', 'MONGO_URI'))

    for season in seasons:
        root_logger.info('stat_season: {0}, games_season: {1}'
                                                .format(season[0], season[1]))

        ds = SportReferenceDataSource(
                            base_url=__source_type_base_url_map[source_type],
                            team_stat_season=season[0],
                            games_season=season[1],
                            game_type=source_type,
                            row_parse_strategy=__source_type_map[source_type],
                            cache_team_stats=True)
        X, Y, metadata = ds.load(dict(date_from=date_from, date_to=date_to))

예제 #3
0
    date_from = date_utils.try_parse(args.df)
    date_to = date_utils.try_parse(args.dt)
    source_name = args.sn
    source_type = args.st

    config = ConfigParser.ConfigParser()
    config.read('scripts.cfg')

    ds = SportReferenceDataSource(
                        base_url=__source_type_base_url_map[source_type],
                        team_stat_season=2015,
                        games_season=2016,
                        game_type=source_type,
                        row_parse_strategy=__source_type_map[source_type],
                        cache_team_stats=True)
    X, Y, metadata = ds.load(dict(date_from=date_from, date_to=date_to))

    data_rep = DataRepository(uri=config.get('defaults', 'MONGO_URI'))

    meta_keys = metadata[0].keys()
    for i in range(len(Y)):
        if not Y[i]:
            continue
        meta_data_row = [metadata[i][k] for k in metadata[i].keys()]
        res = data_rep.update(
            source_name,
            { 'meta.{0}'.format(meta_keys[j]):meta_data_row[j] for j in range(len(meta_keys)) },
            { 'Y': [Y[i]] }
        )
	root_logger.info(res)