Python aggregate Beispiele, aggregation.aggregate Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: aggregation_test.py Projekt: darian19/numenta-apps

 def testAggregationNoValues(self):
   """Single metric with no values."""
   slices = (
       InstanceMetricData("id", ()),
   )
   result = aggregation.aggregate(slices)
   self.assertEqual(len(result), 0)

Beispiel #2

0

Datei anzeigen

def view_clusters():
    session = create_session()
    clusters = session.query(Cluster).all()
    states = concat_dfs(cluster.state_df() for cluster in clusters)
    level_info_data = get_level_info_data()
    price_settings = get_price_settings()

    time_stats_dict = {
        'interactive': empty_timeseries(),
        'job': empty_timeseries()
    }
    if not states.empty:
        results = aggregate_by_types(states, aggregate_for_entity)
        for key, (_, time_stats) in results.items():
            time_stats_dict[key] = time_stats.to_dict("records")

        cluster_dbus = (aggregate(df=states,
                                  col="interval_dbu",
                                  by="cluster_id",
                                  since_days=7).rename(columns={
                                      'interval_dbu': 'dbu'
                                  }).dbu.to_dict())
    else:
        cluster_dbus = {cluster.cluster_id: 0.0 for cluster in clusters}

    clusters_by_type = {}
    for cluster in clusters:
        clusters_by_type.setdefault(cluster.cluster_type(), []).append(cluster)

    return render_template('clusters.html',
                           clusters_by_type=clusters_by_type,
                           price_settings=price_settings,
                           data=level_info_data,
                           cluster_dbus=cluster_dbus,
                           time_stats=time_stats_dict)

Beispiel #3

0

Datei anzeigen

def aggregate_and_save(src_fold,
                       method,
                       dst_fold,
                       fea_idx,
                       total_order=0,
                       order=0):
    if not os.path.exists(dst_fold):
        os.mkdir(dst_fold)
    src_fold = os.path.join(src_fold, '*.npy')

    fea_list = glob.glob(src_fold)
    if not total_order == 0 and not order == 0:
        length = len(fea_list)
        step = int(ceil(float(length) / total_order))
        fea_list = fea_list[(order - 1) * step:min(order * step, length)]
    idx = 0
    for path in fea_list:
        fold, name = os.path.split(path)
        save_path = os.path.join(dst_fold, name)
        if os.path.exists(save_path):
            idx += 1
            if not idx % 100:
                sys.stdout.write('\rEncoding %d th img: %s' % (idx, name))
                sys.stdout.flush()
            continue
        X = np.load(path)
        y = aggregate(X, fea_idx, method)

        np.save(save_path, y)
        idx += 1
        if not idx % 100:
            sys.stdout.write('\rEncoding %d th img: %s' % (idx, name))
            sys.stdout.flush()
    sys.stdout.write('\n')
    sys.stdout.flush()

Beispiel #4

0

Datei anzeigen

Datei: aggregation_test.py Projekt: darian19/numenta-apps

 def testAggregationSingleValue(self):
   """Single metric with single value."""
   timestamp = datetime.datetime.utcnow()
   slices = (
       InstanceMetricData("id", (
           MetricRecord(timestamp, 100.0),
       )),
   )
   result = aggregation.aggregate(slices)
   self.assertEqual(len(result), 1)
   self.assertIsInstance(result[0], tuple)
   self.assertSequenceEqual(result[0], (timestamp, 100.0))

Beispiel #5

0

Datei anzeigen

def system_model(config, logger, log_dir):
    '''
    Function description
    '''
    _name = system_model.__name__  #name for logging
    logger.info(LogMessage(_name, 'Starting the system model.'))

    exio_v, exio_u = load_exiobase.get_sut(config.get('exio_data', 'ddir'),
                                           config.get('exio_data', 'supply'),
                                           config.get('exio_data', 'use'),
                                           logger)
    iot_names, country_dic, prod_dic, country_list = load_exiobase.\
                                 get_aggregated_product_names(
                                 config.get('exio_data','ddir'),
                                 config.get('exio_data', 'aggregated_names'),
                                 logger)

    aggregation_matrix, N_reg, N_prod, N_sec = agg.get_aggregation_matrix(
        config.get('exio_data', 'ddir'),
        config.get('exio_data', 'aggregation_matrix'),
        config.get('exio_data', 'calvals_matrix'), log_dir,
        config.get('project_info', 'aggregation_report_file'), logger)
    exio_vagg, exio_uagg = agg.aggregate(exio_v, exio_u, aggregation_matrix,
                                         logger)
    all_excl_byprods = get_exclusive_byproducts(exio_vagg, exio_uagg, N_reg,
                                                iot_names, logger)
    excl_byproducts, market_names, grid_electricity, elec_markets =\
           create_market_and_product_names(all_excl_byprods, N_reg,
           country_list, logger)


    V_without_elec, U_without_elec, V_elecmarkets, U_elecmarkets,\
           elec_market_product_supply, elec_market_product_use =\
           create_electricity_grids(exio_vagg, exio_uagg, N_reg,
                                    N_sec, iot_names, logger)

    V_markets, U_markets, v_market_excl_byproduct,\
           u_market_excl_byproduct, excl_market_products_supply,\
           excl_market_products_use = create_excl_byprod_markets(
           V_without_elec, U_without_elec, excl_byproducts, prod_dic,
           country_dic, all_excl_byprods, N_sec, iot_names, logger)

    V_model, U_model = assemble_SUT(
        V_markets, U_markets, V_elecmarkets, U_elecmarkets,
        elec_market_product_supply, elec_market_product_use,
        v_market_excl_byproduct, u_market_excl_byproduct,
        excl_market_products_supply, excl_market_products_use, logger)

    Z_model, A_model = make_IOT(V_model, U_model, logger)

Beispiel #6

0

Datei anzeigen

def view_users():
    session = create_session()
    users = session.query(User).all()

    level_info_data = get_level_info_data()

    for user in users:
        user.dbu = aggregate(df=user.state_df(),
                             col='interval_dbu',
                             since_days=7)
    users = sorted(users, key=lambda user: user.dbu, reverse=True)
    states = concat_dfs(user.state_df() for user in users)

    # Average active users
    active_users = (aggregate(df=states,
                              col='user_id',
                              by=get_time_grouper('timestamp'),
                              aggfunc='nunique',
                              since_days=7).reindex(get_time_index(7),
                                                    fill_value=0))
    active_users['ts'] = active_users.index.format()

    # Average used DBU
    dbus = (aggregate(df=states,
                      col='interval_dbu',
                      by=get_time_grouper('timestamp'),
                      aggfunc='sum',
                      since_days=7).reindex(get_time_index(7), fill_value=0))
    active_users['sum_dbus'] = dbus.interval_dbu
    active_users['average_dbu'] = ((active_users.sum_dbus /
                                    active_users.user_id).fillna(0.))

    return render_template('users.html',
                           users=users,
                           active_users=active_users.to_dict('records'),
                           data=level_info_data)

Beispiel #7

0

Datei anzeigen

Datei: aggregation_test.py Projekt: darian19/numenta-apps

 def testAggregationMultipleValues(self):
   """Single metric with multiple values at different timestamps."""
   timestamp2 = datetime.datetime.utcnow()
   timestamp1 = timestamp2 - datetime.timedelta(minutes=5)
   slices = (
       InstanceMetricData("id", (
           MetricRecord(timestamp1, 100.0),
           MetricRecord(timestamp2, 50.0),
       )),
   )
   result = aggregation.aggregate(slices)
   self.assertEqual(len(result), 2)
   self.assertIsInstance(result[0], tuple)
   self.assertIsInstance(result[1], tuple)
   self.assertSequenceEqual(result[0], (timestamp1, 100.0))
   self.assertSequenceEqual(result[1], (timestamp2, 50.0))

Beispiel #8

0

Datei anzeigen

Datei: aggregation_test.py Projekt: darian19/numenta-apps

 def testAggregationMultipleMetricsAligned(self):
   """Multiple metrics with matching timestamps."""
   timestamp2 = datetime.datetime.utcnow()
   timestamp1 = timestamp2 - datetime.timedelta(minutes=5)
   slices = (
       InstanceMetricData("id1", (
           MetricRecord(timestamp1, 100.0),
           MetricRecord(timestamp2, 50.0),
       )),
       InstanceMetricData("id2", (
           MetricRecord(timestamp1, 80.0),
           MetricRecord(timestamp2, 30.0),
       )),
   )
   result = aggregation.aggregate(slices)
   self.assertEqual(len(result), 2)
   self.assertIsInstance(result[0], tuple)
   self.assertIsInstance(result[1], tuple)
   self.assertSequenceEqual(result[0], (timestamp1, 90.0))
   self.assertSequenceEqual(result[1], (timestamp2, 40.0))

Beispiel #9

0

Datei anzeigen

def view_user(username):
    session = create_session()
    try:
        user = (session.query(User).filter(User.username == username).one())
    except Exception:
        return view_missing(type="user", id=username)
    states = user.state_df()

    time_stats_dict = {
        'interactive': empty_timeseries(),
        'job': empty_timeseries()
    }
    if not states.empty:
        workspaces = (concat_dfs({
            (w.workspace.id, w.workspace.name): w.workspace.state_df()
            for w in user.user_workspaces
        }).reset_index([0, 1]).rename(columns={
            'level_0': 'workspace_id',
            'level_1': 'workspace_name'
        }))

        last7_workspaces = (aggregate(
            df=workspaces,
            col='interval_dbu',
            by=['workspace_id', 'workspace_name'],
            since_days=7).rename(columns={'interval_dbu': 'last7dbu'}))

        all_workspaces = (aggregate(
            df=workspaces,
            col='interval_dbu',
            by=['workspace_id', 'workspace_name'
                ]).rename(columns={'interval_dbu': 'alltimedbu'}))

        workspaces_dict = (pd.merge(
            all_workspaces,
            last7_workspaces,
            how='left',
            left_index=True,
            right_index=True).fillna(0.0).reset_index().sort_values(
                'last7dbu').to_dict('records'))

        price_settings = get_price_settings()
        results = aggregate_by_types(states, aggregate_for_entity)

        cost_summary_dict = {}
        for key, (cost_summary, time_stats) in results.items():
            time_stats_dict[key] = time_stats.to_dict("records")
            cost_summary = cost_summary.to_dict()
            cost = cost_summary['interval_dbu'] * price_settings[key]
            weekly_cost = (cost_summary['weekly_interval_dbu_sum'] *
                           price_settings[key])
            cost_summary['cost'] = cost
            cost_summary['weekly_cost'] = weekly_cost
            cost_summary_dict[key] = cost_summary

        # We aren't sure if we have both interactive and job
        present_key = list(cost_summary_dict.keys())[0]
        cost_summary_dict = {
            key: sum([cost_summary_dict[type][key] for type in results.keys()])
            for key in cost_summary_dict[present_key]
        }
    else:
        workspaces_dict = [{
            'workspace_id': w.workspace.id,
            'workspace_name': w.workspace.name,
            'last7dbu': 0.0,
            'alltimedbu': 0.0
        } for w in user.user_workspaces]
        cost_summary_dict = {
            "interval": 0.0,
            "interval_dbu": 0.0,
            "weekly_interval_sum": 0.0,
            "weekly_interval_dbu_sum": 0.0,
            "cost": 0.0,
            "weekly_cost": 0.0
        }

    return render_template('user.html',
                           user=user,
                           workspaces=workspaces_dict,
                           cost=cost_summary_dict,
                           time_stats=time_stats_dict)

Beispiel #10

0

Datei anzeigen

def view_workspace(workspace_id):
    session = create_session()
    try:
        workspace = (session.query(Workspace).filter(
            Workspace.id == workspace_id).one())
    except Exception:
        return view_missing(type="workspace", id=workspace_id)
    states = workspace.state_df()
    numbjobs_dict = get_running_jobs(workspace.jobruns)
    price_settings = get_price_settings()

    time_stats_dict = {
        'interactive': empty_timeseries(),
        'job': empty_timeseries()
    }
    if not states.empty:
        results = aggregate_by_types(states, aggregate_for_entity)
        cost_summary_dict = {}
        for key, (cost_summary, time_stats) in results.items():
            time_stats_dict[key] = time_stats.to_dict("records")
            cost_summary = cost_summary.to_dict()
            cost = cost_summary['interval_dbu'] * price_settings[key]
            weekly_cost = (cost_summary['weekly_interval_dbu_sum'] *
                           price_settings[key])
            cost_summary['cost'] = cost
            cost_summary['weekly_cost'] = weekly_cost
            cost_summary_dict[key] = cost_summary

        # We aren't sure if we have both interactive and job
        present_key = list(cost_summary_dict.keys())[0]
        cost_summary_dict = {
            key: sum([cost_summary_dict[type][key] for type in results.keys()])
            for key in cost_summary_dict[present_key]
        }

        top_users = (aggregate(
            df=states, col='interval_dbu', by='user_id',
            since_days=7).reset_index().rename(columns={
                'interval_dbu': 'dbu'
            }).sort_values('dbu', ascending=False))
        top_users_dict = (top_users.loc[~top_users.user_id.isin(['UNKONWN'])].
                          to_dict("records")[:3])
    else:
        cost_summary_dict = {
            "interval": 0.0,
            "interval_dbu": 0.0,
            "weekly_interval_sum": 0.0,
            "weekly_interval_dbu_sum": 0.0,
            "cost": 0.0,
            "weekly_cost": 0.0
        }
        top_users_dict = {}

    clusters_by_type = {}
    for cluster in workspace.clusters:
        clusters_by_type.setdefault(cluster.cluster_type(), []).append(cluster)

    return render_template('workspace.html',
                           workspace=workspace,
                           clusters_by_type=clusters_by_type,
                           cost=cost_summary_dict,
                           time_stats=time_stats_dict,
                           top_users=top_users_dict,
                           numjobs=numbjobs_dict,
                           empty=states.empty)

Beispiel #11

0

Datei anzeigen

Datei: aggregation_test.py Projekt: darian19/numenta-apps

 def testAggregationEmptyTuple(self):
   """No values in input tuple."""
   result = aggregation.aggregate(())
   self.assertSequenceEqual(result, ())

Beispiel #12

0

Datei anzeigen

Datei: aggregation_test.py Projekt: darian19/numenta-apps

 def testAggregationEmptyList(self):
   """No values in input list."""
   result = aggregation.aggregate([])
   self.assertSequenceEqual(result, ())

Beispiel #13

0

Datei anzeigen

def main():
    filename = "./Dataframes/" + "The big one" + ".csv"

    patterns = ["World", "Champ", "Candidates", "Interzonal", "PCA"]
    # patterns = ["2000"]
    # patterns = ["World"]
    # patterns = ["Dae"]
    directory = r'/home/jake/Downloads/pgn/'
    # directory = r'./pgn/'
    # directory = r'./pgn/FISC'
    dfs = []
    list_of_games = []
    for entry in os.scandir(directory):
        if (entry.path.endswith(".pgn") and entry.is_file()) and any(True for pattern in patterns \
                                                                if pattern in entry.name[:-4]):
            print(entry.path)
            df, games = aggregate(entry.path)

            rows = []
            for game in games:
                lost_pieces = process_game(game)
                rows.append(lost_pieces)
            df["Lost pieces"] = rows

            dfs.append(df)
            list_of_games.extend(games)

    df = pd.concat(dfs, ignore_index=True)
    # filename = "./pgn/DeLaBourdonnais.pgn"

    # filename = "./pgn/testing.pgn"

    # meta data and game list
    # df.to_csv(filename)

    # pieces = [chess.ROOK, chess.KNIGHT]
    pieces = list(chess.PIECE_TYPES)[:-1][::-1]

    # Type casting and filsimilar to others surrounding it. If objects are atering
    df_elo = df[df["WhiteElo"].apply(lambda x: x not in ["", "?", None, np.nan]) & \
                df["BlackElo"].apply(lambda x: x not in ["", "?", None, np.nan])]  \
                .astype({"WhiteElo": 'int', "BlackElo": 'int'}) # type casts elo to ints and filters out the various non-int things
    df["Date"] = pd.to_datetime(
        df["Date"].apply(lambda x: x.replace("?", "")[:4].replace(".", ""))
    )  # type cast dates to dates, assumes year is present in all data

    # plt.rcParams['figure.figsize'] = (10, 8)
    plt.rcParams.update({'font.size': 14})

    # ELO
    minimum, low, low_mid, mid, mid_high, high, maximum = df_elo[
        "WhiteElo"].quantile([0, 0.028, 0.1587, 0.5, 0.8413, 0.9772, 1])
    bins_ranges = [minimum, low, low_mid, mid, mid_high, high, maximum]
    bins = []
    col_labels = []
    for lower, upper in zip(bins_ranges[:-1], bins_ranges[1:]):
        print(f"Elo in range {lower:.0f}-{upper:.0f}")
        bins.append(df_elo[in_range(lower, df_elo["WhiteElo"], upper)])
        col_labels.append(
            f"{lower:.0f} - {upper:.0f}\n{len(bins)-4 if len(bins) != 1 else '-∞'}σ to {len(bins)-3 if len(bins) != 6 else '∞'}σ"
        )
    bintype = "ELO"

    # fig, axs = plot_heatmap_grid(bins, pieces, col_labels, chess.BLACK, bintype, username="******")
    # fig, axs = plot_heatmap_grid(bins, pieces, col_labels, chess.WHITE, bintype, username="******")

    # fig, axs = plot_hist_grid(bins, pieces, col_labels, chess.BLACK, bintype, username="******")
    # fig, axs = plot_hist_grid(bins, pieces, col_labels, chess.WHITE, bintype, username="******")

    fig, axs = plot_heatmap_grid(bins, pieces, col_labels, chess.BLACK,
                                 bintype)
    fig, axs = plot_heatmap_grid(bins, pieces, col_labels, chess.WHITE,
                                 bintype)
    fig, axs = plot_hist_grid(bins, pieces, col_labels, chess.BLACK, bintype)
    fig, axs = plot_hist_grid(bins, pieces, col_labels, chess.WHITE, bintype)

    # DATE
    minimum, low, mid, high, maximum = df["Date"].quantile(
        [0, 0.25, 0.5, 0.75, 1])
    bins_ranges = [minimum, low, mid, high, maximum]
    # bins_ranges = [pd.to_datetime("1700"), pd.to_datetime("1900"), pd.to_datetime("1980"), pd.to_datetime("2010"), pd.to_datetime("2025")]

    bins = []
    col_labels = []
    for lower, upper in zip(bins_ranges[:-1], bins_ranges[1:]):
        print(f"Date in range {lower.year}-{upper.year}")
        bins.append(df[in_range(lower, df["Date"], upper)])
        col_labels.append(
            f"{lower.year} to {upper.year}\nQ{len(bins)-1} - Q{len(bins)}")
    bintype = "DATE"

    # fig, axs = plot_heatmap_grid(bins, pieces, col_labels, chess.BLACK, bintype, username="******")
    # fig, axs = plot_heatmap_grid(bins, pieces, col_labels, chess.WHITE, bintype, username="******")

    # fig, axs = plot_hist_grid(bins, pieces, col_labels, chess.BLACK, bintype, username="******")
    # fig, axs = plot_hist_grid(bins, pieces, col_labels, chess.WHITE, bintype, username="******")

    fig, axs = plot_heatmap_grid(bins, pieces, col_labels, chess.BLACK,
                                 bintype)
    fig, axs = plot_heatmap_grid(bins, pieces, col_labels, chess.WHITE,
                                 bintype)
    fig, axs = plot_hist_grid(bins, pieces, col_labels, chess.BLACK, bintype)
    fig, axs = plot_hist_grid(bins, pieces, col_labels, chess.WHITE, bintype)

    # fig, axs = plot_heatmap_single_piece(df, [chess.PAWN], username="******", cmap=sns.color_palette("viridis", as_cmap=True))
    # fig, axs = plot_heatmap_single_piece(df, [chess.PAWN], bintype)

    for piece in chess.PIECE_TYPES[:-1]:
        # fig, axs = plot_heatmap_single_piece(df, [piece], username="******")
        fig, axs = plot_heatmap_single_piece(df, [piece])

        # fig, axs = plot_hist_single_piece(df, [piece], username="******")
        fig, axs = plot_hist_single_piece(df, [piece])

    # plt.show()

    # show(df)

    print(len(df))
    return df, list_of_games