def testAggregationNoValues(self): """Single metric with no values.""" slices = ( InstanceMetricData("id", ()), ) result = aggregation.aggregate(slices) self.assertEqual(len(result), 0)
def view_clusters(): session = create_session() clusters = session.query(Cluster).all() states = concat_dfs(cluster.state_df() for cluster in clusters) level_info_data = get_level_info_data() price_settings = get_price_settings() time_stats_dict = { 'interactive': empty_timeseries(), 'job': empty_timeseries() } if not states.empty: results = aggregate_by_types(states, aggregate_for_entity) for key, (_, time_stats) in results.items(): time_stats_dict[key] = time_stats.to_dict("records") cluster_dbus = (aggregate(df=states, col="interval_dbu", by="cluster_id", since_days=7).rename(columns={ 'interval_dbu': 'dbu' }).dbu.to_dict()) else: cluster_dbus = {cluster.cluster_id: 0.0 for cluster in clusters} clusters_by_type = {} for cluster in clusters: clusters_by_type.setdefault(cluster.cluster_type(), []).append(cluster) return render_template('clusters.html', clusters_by_type=clusters_by_type, price_settings=price_settings, data=level_info_data, cluster_dbus=cluster_dbus, time_stats=time_stats_dict)
def aggregate_and_save(src_fold, method, dst_fold, fea_idx, total_order=0, order=0): if not os.path.exists(dst_fold): os.mkdir(dst_fold) src_fold = os.path.join(src_fold, '*.npy') fea_list = glob.glob(src_fold) if not total_order == 0 and not order == 0: length = len(fea_list) step = int(ceil(float(length) / total_order)) fea_list = fea_list[(order - 1) * step:min(order * step, length)] idx = 0 for path in fea_list: fold, name = os.path.split(path) save_path = os.path.join(dst_fold, name) if os.path.exists(save_path): idx += 1 if not idx % 100: sys.stdout.write('\rEncoding %d th img: %s' % (idx, name)) sys.stdout.flush() continue X = np.load(path) y = aggregate(X, fea_idx, method) np.save(save_path, y) idx += 1 if not idx % 100: sys.stdout.write('\rEncoding %d th img: %s' % (idx, name)) sys.stdout.flush() sys.stdout.write('\n') sys.stdout.flush()
def testAggregationSingleValue(self): """Single metric with single value.""" timestamp = datetime.datetime.utcnow() slices = ( InstanceMetricData("id", ( MetricRecord(timestamp, 100.0), )), ) result = aggregation.aggregate(slices) self.assertEqual(len(result), 1) self.assertIsInstance(result[0], tuple) self.assertSequenceEqual(result[0], (timestamp, 100.0))
def system_model(config, logger, log_dir): ''' Function description ''' _name = system_model.__name__ #name for logging logger.info(LogMessage(_name, 'Starting the system model.')) exio_v, exio_u = load_exiobase.get_sut(config.get('exio_data', 'ddir'), config.get('exio_data', 'supply'), config.get('exio_data', 'use'), logger) iot_names, country_dic, prod_dic, country_list = load_exiobase.\ get_aggregated_product_names( config.get('exio_data','ddir'), config.get('exio_data', 'aggregated_names'), logger) aggregation_matrix, N_reg, N_prod, N_sec = agg.get_aggregation_matrix( config.get('exio_data', 'ddir'), config.get('exio_data', 'aggregation_matrix'), config.get('exio_data', 'calvals_matrix'), log_dir, config.get('project_info', 'aggregation_report_file'), logger) exio_vagg, exio_uagg = agg.aggregate(exio_v, exio_u, aggregation_matrix, logger) all_excl_byprods = get_exclusive_byproducts(exio_vagg, exio_uagg, N_reg, iot_names, logger) excl_byproducts, market_names, grid_electricity, elec_markets =\ create_market_and_product_names(all_excl_byprods, N_reg, country_list, logger) V_without_elec, U_without_elec, V_elecmarkets, U_elecmarkets,\ elec_market_product_supply, elec_market_product_use =\ create_electricity_grids(exio_vagg, exio_uagg, N_reg, N_sec, iot_names, logger) V_markets, U_markets, v_market_excl_byproduct,\ u_market_excl_byproduct, excl_market_products_supply,\ excl_market_products_use = create_excl_byprod_markets( V_without_elec, U_without_elec, excl_byproducts, prod_dic, country_dic, all_excl_byprods, N_sec, iot_names, logger) V_model, U_model = assemble_SUT( V_markets, U_markets, V_elecmarkets, U_elecmarkets, elec_market_product_supply, elec_market_product_use, v_market_excl_byproduct, u_market_excl_byproduct, excl_market_products_supply, excl_market_products_use, logger) Z_model, A_model = make_IOT(V_model, U_model, logger)
def view_users(): session = create_session() users = session.query(User).all() level_info_data = get_level_info_data() for user in users: user.dbu = aggregate(df=user.state_df(), col='interval_dbu', since_days=7) users = sorted(users, key=lambda user: user.dbu, reverse=True) states = concat_dfs(user.state_df() for user in users) # Average active users active_users = (aggregate(df=states, col='user_id', by=get_time_grouper('timestamp'), aggfunc='nunique', since_days=7).reindex(get_time_index(7), fill_value=0)) active_users['ts'] = active_users.index.format() # Average used DBU dbus = (aggregate(df=states, col='interval_dbu', by=get_time_grouper('timestamp'), aggfunc='sum', since_days=7).reindex(get_time_index(7), fill_value=0)) active_users['sum_dbus'] = dbus.interval_dbu active_users['average_dbu'] = ((active_users.sum_dbus / active_users.user_id).fillna(0.)) return render_template('users.html', users=users, active_users=active_users.to_dict('records'), data=level_info_data)
def testAggregationMultipleValues(self): """Single metric with multiple values at different timestamps.""" timestamp2 = datetime.datetime.utcnow() timestamp1 = timestamp2 - datetime.timedelta(minutes=5) slices = ( InstanceMetricData("id", ( MetricRecord(timestamp1, 100.0), MetricRecord(timestamp2, 50.0), )), ) result = aggregation.aggregate(slices) self.assertEqual(len(result), 2) self.assertIsInstance(result[0], tuple) self.assertIsInstance(result[1], tuple) self.assertSequenceEqual(result[0], (timestamp1, 100.0)) self.assertSequenceEqual(result[1], (timestamp2, 50.0))
def testAggregationMultipleMetricsAligned(self): """Multiple metrics with matching timestamps.""" timestamp2 = datetime.datetime.utcnow() timestamp1 = timestamp2 - datetime.timedelta(minutes=5) slices = ( InstanceMetricData("id1", ( MetricRecord(timestamp1, 100.0), MetricRecord(timestamp2, 50.0), )), InstanceMetricData("id2", ( MetricRecord(timestamp1, 80.0), MetricRecord(timestamp2, 30.0), )), ) result = aggregation.aggregate(slices) self.assertEqual(len(result), 2) self.assertIsInstance(result[0], tuple) self.assertIsInstance(result[1], tuple) self.assertSequenceEqual(result[0], (timestamp1, 90.0)) self.assertSequenceEqual(result[1], (timestamp2, 40.0))
def view_user(username): session = create_session() try: user = (session.query(User).filter(User.username == username).one()) except Exception: return view_missing(type="user", id=username) states = user.state_df() time_stats_dict = { 'interactive': empty_timeseries(), 'job': empty_timeseries() } if not states.empty: workspaces = (concat_dfs({ (w.workspace.id, w.workspace.name): w.workspace.state_df() for w in user.user_workspaces }).reset_index([0, 1]).rename(columns={ 'level_0': 'workspace_id', 'level_1': 'workspace_name' })) last7_workspaces = (aggregate( df=workspaces, col='interval_dbu', by=['workspace_id', 'workspace_name'], since_days=7).rename(columns={'interval_dbu': 'last7dbu'})) all_workspaces = (aggregate( df=workspaces, col='interval_dbu', by=['workspace_id', 'workspace_name' ]).rename(columns={'interval_dbu': 'alltimedbu'})) workspaces_dict = (pd.merge( all_workspaces, last7_workspaces, how='left', left_index=True, right_index=True).fillna(0.0).reset_index().sort_values( 'last7dbu').to_dict('records')) price_settings = get_price_settings() results = aggregate_by_types(states, aggregate_for_entity) cost_summary_dict = {} for key, (cost_summary, time_stats) in results.items(): time_stats_dict[key] = time_stats.to_dict("records") cost_summary = cost_summary.to_dict() cost = cost_summary['interval_dbu'] * price_settings[key] weekly_cost = (cost_summary['weekly_interval_dbu_sum'] * price_settings[key]) cost_summary['cost'] = cost cost_summary['weekly_cost'] = weekly_cost cost_summary_dict[key] = cost_summary # We aren't sure if we have both interactive and job present_key = list(cost_summary_dict.keys())[0] cost_summary_dict = { key: sum([cost_summary_dict[type][key] for type in results.keys()]) for key in cost_summary_dict[present_key] } else: workspaces_dict = [{ 'workspace_id': w.workspace.id, 'workspace_name': w.workspace.name, 'last7dbu': 0.0, 'alltimedbu': 0.0 } for w in user.user_workspaces] cost_summary_dict = { "interval": 0.0, "interval_dbu": 0.0, "weekly_interval_sum": 0.0, "weekly_interval_dbu_sum": 0.0, "cost": 0.0, "weekly_cost": 0.0 } return render_template('user.html', user=user, workspaces=workspaces_dict, cost=cost_summary_dict, time_stats=time_stats_dict)
def view_workspace(workspace_id): session = create_session() try: workspace = (session.query(Workspace).filter( Workspace.id == workspace_id).one()) except Exception: return view_missing(type="workspace", id=workspace_id) states = workspace.state_df() numbjobs_dict = get_running_jobs(workspace.jobruns) price_settings = get_price_settings() time_stats_dict = { 'interactive': empty_timeseries(), 'job': empty_timeseries() } if not states.empty: results = aggregate_by_types(states, aggregate_for_entity) cost_summary_dict = {} for key, (cost_summary, time_stats) in results.items(): time_stats_dict[key] = time_stats.to_dict("records") cost_summary = cost_summary.to_dict() cost = cost_summary['interval_dbu'] * price_settings[key] weekly_cost = (cost_summary['weekly_interval_dbu_sum'] * price_settings[key]) cost_summary['cost'] = cost cost_summary['weekly_cost'] = weekly_cost cost_summary_dict[key] = cost_summary # We aren't sure if we have both interactive and job present_key = list(cost_summary_dict.keys())[0] cost_summary_dict = { key: sum([cost_summary_dict[type][key] for type in results.keys()]) for key in cost_summary_dict[present_key] } top_users = (aggregate( df=states, col='interval_dbu', by='user_id', since_days=7).reset_index().rename(columns={ 'interval_dbu': 'dbu' }).sort_values('dbu', ascending=False)) top_users_dict = (top_users.loc[~top_users.user_id.isin(['UNKONWN'])]. to_dict("records")[:3]) else: cost_summary_dict = { "interval": 0.0, "interval_dbu": 0.0, "weekly_interval_sum": 0.0, "weekly_interval_dbu_sum": 0.0, "cost": 0.0, "weekly_cost": 0.0 } top_users_dict = {} clusters_by_type = {} for cluster in workspace.clusters: clusters_by_type.setdefault(cluster.cluster_type(), []).append(cluster) return render_template('workspace.html', workspace=workspace, clusters_by_type=clusters_by_type, cost=cost_summary_dict, time_stats=time_stats_dict, top_users=top_users_dict, numjobs=numbjobs_dict, empty=states.empty)
def testAggregationEmptyTuple(self): """No values in input tuple.""" result = aggregation.aggregate(()) self.assertSequenceEqual(result, ())
def testAggregationEmptyList(self): """No values in input list.""" result = aggregation.aggregate([]) self.assertSequenceEqual(result, ())
def main(): filename = "./Dataframes/" + "The big one" + ".csv" patterns = ["World", "Champ", "Candidates", "Interzonal", "PCA"] # patterns = ["2000"] # patterns = ["World"] # patterns = ["Dae"] directory = r'/home/jake/Downloads/pgn/' # directory = r'./pgn/' # directory = r'./pgn/FISC' dfs = [] list_of_games = [] for entry in os.scandir(directory): if (entry.path.endswith(".pgn") and entry.is_file()) and any(True for pattern in patterns \ if pattern in entry.name[:-4]): print(entry.path) df, games = aggregate(entry.path) rows = [] for game in games: lost_pieces = process_game(game) rows.append(lost_pieces) df["Lost pieces"] = rows dfs.append(df) list_of_games.extend(games) df = pd.concat(dfs, ignore_index=True) # filename = "./pgn/DeLaBourdonnais.pgn" # filename = "./pgn/testing.pgn" # meta data and game list # df.to_csv(filename) # pieces = [chess.ROOK, chess.KNIGHT] pieces = list(chess.PIECE_TYPES)[:-1][::-1] # Type casting and filsimilar to others surrounding it. If objects are atering df_elo = df[df["WhiteElo"].apply(lambda x: x not in ["", "?", None, np.nan]) & \ df["BlackElo"].apply(lambda x: x not in ["", "?", None, np.nan])] \ .astype({"WhiteElo": 'int', "BlackElo": 'int'}) # type casts elo to ints and filters out the various non-int things df["Date"] = pd.to_datetime( df["Date"].apply(lambda x: x.replace("?", "")[:4].replace(".", "")) ) # type cast dates to dates, assumes year is present in all data # plt.rcParams['figure.figsize'] = (10, 8) plt.rcParams.update({'font.size': 14}) # ELO minimum, low, low_mid, mid, mid_high, high, maximum = df_elo[ "WhiteElo"].quantile([0, 0.028, 0.1587, 0.5, 0.8413, 0.9772, 1]) bins_ranges = [minimum, low, low_mid, mid, mid_high, high, maximum] bins = [] col_labels = [] for lower, upper in zip(bins_ranges[:-1], bins_ranges[1:]): print(f"Elo in range {lower:.0f}-{upper:.0f}") bins.append(df_elo[in_range(lower, df_elo["WhiteElo"], upper)]) col_labels.append( f"{lower:.0f} - {upper:.0f}\n{len(bins)-4 if len(bins) != 1 else '-∞'}σ to {len(bins)-3 if len(bins) != 6 else '∞'}σ" ) bintype = "ELO" # fig, axs = plot_heatmap_grid(bins, pieces, col_labels, chess.BLACK, bintype, username="******") # fig, axs = plot_heatmap_grid(bins, pieces, col_labels, chess.WHITE, bintype, username="******") # fig, axs = plot_hist_grid(bins, pieces, col_labels, chess.BLACK, bintype, username="******") # fig, axs = plot_hist_grid(bins, pieces, col_labels, chess.WHITE, bintype, username="******") fig, axs = plot_heatmap_grid(bins, pieces, col_labels, chess.BLACK, bintype) fig, axs = plot_heatmap_grid(bins, pieces, col_labels, chess.WHITE, bintype) fig, axs = plot_hist_grid(bins, pieces, col_labels, chess.BLACK, bintype) fig, axs = plot_hist_grid(bins, pieces, col_labels, chess.WHITE, bintype) # DATE minimum, low, mid, high, maximum = df["Date"].quantile( [0, 0.25, 0.5, 0.75, 1]) bins_ranges = [minimum, low, mid, high, maximum] # bins_ranges = [pd.to_datetime("1700"), pd.to_datetime("1900"), pd.to_datetime("1980"), pd.to_datetime("2010"), pd.to_datetime("2025")] bins = [] col_labels = [] for lower, upper in zip(bins_ranges[:-1], bins_ranges[1:]): print(f"Date in range {lower.year}-{upper.year}") bins.append(df[in_range(lower, df["Date"], upper)]) col_labels.append( f"{lower.year} to {upper.year}\nQ{len(bins)-1} - Q{len(bins)}") bintype = "DATE" # fig, axs = plot_heatmap_grid(bins, pieces, col_labels, chess.BLACK, bintype, username="******") # fig, axs = plot_heatmap_grid(bins, pieces, col_labels, chess.WHITE, bintype, username="******") # fig, axs = plot_hist_grid(bins, pieces, col_labels, chess.BLACK, bintype, username="******") # fig, axs = plot_hist_grid(bins, pieces, col_labels, chess.WHITE, bintype, username="******") fig, axs = plot_heatmap_grid(bins, pieces, col_labels, chess.BLACK, bintype) fig, axs = plot_heatmap_grid(bins, pieces, col_labels, chess.WHITE, bintype) fig, axs = plot_hist_grid(bins, pieces, col_labels, chess.BLACK, bintype) fig, axs = plot_hist_grid(bins, pieces, col_labels, chess.WHITE, bintype) # fig, axs = plot_heatmap_single_piece(df, [chess.PAWN], username="******", cmap=sns.color_palette("viridis", as_cmap=True)) # fig, axs = plot_heatmap_single_piece(df, [chess.PAWN], bintype) for piece in chess.PIECE_TYPES[:-1]: # fig, axs = plot_heatmap_single_piece(df, [piece], username="******") fig, axs = plot_heatmap_single_piece(df, [piece]) # fig, axs = plot_hist_single_piece(df, [piece], username="******") fig, axs = plot_hist_single_piece(df, [piece]) # plt.show() # show(df) print(len(df)) return df, list_of_games