コード例 #1
0
ファイル: DataScience.py プロジェクト: erangell/mwt-ds
        headers = ['timestamp']
        for n in list(itertools.islice(metrics, 1))[0].names:
            headers.extend(['{0} cost'.format(n), '{0} action'.format(n)])
        headers.extend(['prob', 'file'])

        data = itertools.chain.from_iterable(
            map(lambda x: x.tabulate_data(), metrics))

        if top:
            data = itertools.islice(data, top)

        return tabulate(data, headers)

    # reproduce training, by using trackback files
    model_history = list(
        common.get_checkpoint_models(block_blob_service,
                                     start_date_withlookback, end_date))
    with Pool(5) as p:
        model_history = p.map(
            lambda x: common.CheckpointedModel(block_blob_service, x[
                0], cache_folder, x[1], x[2]), model_history)
        for m in model_history:
            if m.model_id is not None:
                global_model_idx[m.model_id] = m

    model_history.sort(key=lambda jd: jd.ts)

    # create scoring directories for [start_date, end_date] range
    scoring_dir = os.path.join(cache_folder, 'scoring')
    if not os.path.exists(scoring_dir):
        os.makedirs(scoring_dir)
コード例 #2
0
ファイル: DataScience.py プロジェクト: Microsoft/mwt-ds
    def tabulate_metrics(metrics, top = None):
        headers = ['timestamp']
        for n in list(itertools.islice(metrics, 1))[0].names:
            headers.extend(['{0} cost'.format(n), '{0} action'.format(n)]) 
        headers.extend(['prob', 'file'])

        data = itertools.chain.from_iterable(map(lambda x : x.tabulate_data(), metrics))

        if top:
            data = itertools.islice(data, top)
        
        return tabulate(data, headers)

    # reproduce training, by using trackback files
    model_history = list(common.get_checkpoint_models(block_blob_service, start_date_withlookback, end_date))
    with Pool(5) as p:
        model_history = p.map(lambda x: common.CheckpointedModel(block_blob_service, x[0], cache_folder, x[1], x[2]), model_history)
        for m in model_history:
            if m.model_id is not None:
                global_model_idx[m.model_id] = m
                
    model_history.sort(key=lambda jd: jd.ts)

    # create scoring directories for [start_date, end_date] range
    scoring_dir = os.path.join(cache_folder, 'scoring')
    os.makedirs(scoring_dir, exist_ok=True)

    for local_date in common.dates_in_range(start_date, end_date):
        scoring_dir_date = os.path.join(scoring_dir, local_date.strftime('%Y/%m/%d'))
        if os.path.exists(scoring_dir_date):