コード例 #1
0
def evaluate_many(n='all', workers=0):
    reports = []
    output_file = f'output/fao_report_{n}.csv'
    if n == 'all':
        dataset = load_dataset(drop_small_layers=True)
        layer_pairs = list(combinations(dataset.layer_names.index, r=2))
    else:
        layer_pairs = [None] * n

    if workers == 0:
        for layer_pair in tqdm(layer_pairs):
            try:
                reports.append(demo_single_run_silent(layer_pair))
            except KeyboardInterrupt:
                output_file = output_file.replace('.csv', '_partial.csv')
                break
    else:
        reports = process_map(demo_single_run_silent,
                              layer_pairs,
                              max_workers=workers,
                              chunksize=64)

    reports = list(filter(None, reports))
    print('Total completed', len(reports), 'out of', len(layer_pairs))

    df = pd.DataFrame.from_records(reports)
    df.to_csv(output_file, index=False)
    compute_metrics(output_file)
    return df
コード例 #2
0
def fao_layer_sample(layer_id=None,
                     hidden_ratio=0.5,
                     random_state=None) -> LayerSplit:
    """
    Select random layer, then split its nodes into 'observed' and 'hidden' parts.
    Edges that are adjacent to any hidden node are also considered as 'hidden'.

    Args:
        layer_id (int, optional): Chosen at random by default.
        hidden_ratio (float): Ratio of nodes hidden (0.25 by default).
                              Ratio of hidden edges is essentially larger.
                              If ratio = 1.0 then the observed edge list is empty, and
                              reconstruction algorithm does not account for them.
        random_state (int): Random seed for splitting nodes.

    Returns:
        LayerSplit: data class containing nodes and edges for each partition:
                    'observed', 'hidden' and 'full'.
    """
    dataset = load_dataset()
    if layer_id is None:
        layer_id = random.choice(dataset.layer_names.index)

    edges = filter_by_layer(dataset.edges, layer_id)

    if hidden_ratio != 1:
        return random_layer_split(edges=edges,
                                  layer_id=layer_id,
                                  hidden_ratio=hidden_ratio,
                                  random_state=random_state)
    else:
        return layer_split_with_no_observables(edges=edges, layer_id=layer_id)
コード例 #3
0
def plot_multiplexity_histogram(dataset=None):
    if dataset is None:
        dataset = fao_data.load_dataset(drop_small_layers=True)
    multiplexities = compute_multiplexity(dataset)
    plt.rc('font', size=14)
    plt.hist(multiplexities, bins=32, facecolor='g', alpha=0.75)
    plt.ylabel('frequency')
    plt.xlabel('multiplexity value')
    ax = plt.gca()
    ax.axes.yaxis.set_ticklabels([])
    ax.axes.yaxis.set_ticks([])
    plt.tight_layout()
    plt.savefig('output/mltplx_hist.svg')
    plt.savefig('output/mltplx_hist.png', dpi=1200)
    plt.show()
コード例 #4
0
def demo_evaluate_multiple_layers(n=None,
                                  layer_ids=None,
                                  num_seeds=2,
                                  num_workers=6):
    if layer_ids is None:
        layer_ids = load_dataset(drop_small_layers=True).layer_names.index
        if n is not None:
            layer_ids = random.choices(layer_ids, k=n)

    seeds = np.arange(num_seeds)
    experiments = [
        ('Random', random_baseline.reconstruct_layer_sample),
        ('MaxEnt', ipf.reconstruct_layer_sample, ('ipf_steps', 0)),
        ('IPF', ipf.reconstruct_layer_sample_unconsciously),
        # ('IPF enforced', ipf.reconstruct_layer_sample),
        ('IPF enforced', ipf.reconstruct_v2),
        ('DBCM', dbcm.reconstruct_layer_sample, ('enforce_observed', False)),
        ('DBCM enforced', dbcm.reconstruct_layer_sample, ('enforce_observed',
                                                          True)),
    ]

    index_keys = []
    runs = []
    for layer_id in layer_ids:
        for seed in seeds:
            sample = fao_layer_sample(layer_id, random_state=seed)
            for name, reconstruct_func, *kwargs in experiments:
                index_keys.append((layer_id, name, seed))
                runs.append((sample, reconstruct_func, dict(kwargs)))

    results_list = process_map(_run_single_eval,
                               runs,
                               chunksize=3,
                               max_workers=num_workers,
                               smoothing=0)
    results_df = pd.DataFrame(
        results_list,
        index=pd.MultiIndex.from_tuples(index_keys,
                                        names=['layer_id', 'name', 'seed']),
    )
    print('Stats by layer')
    display(results_df[METRICS_DISPLAY].groupby(
        level=['layer_id', 'name']).agg(describe_mean_std))

    print('Stats by method')
    display(results_df[METRICS_DISPLAY].groupby(
        level=['name']).agg(describe_mean_std))
    return results_df
コード例 #5
0
def fao_multilayer_sample(
        layer_ids: Optional[List[int]] = None,
        hidden_ratio: float = 0.5,
        random_state: Optional[int] = None) -> MultiLayerSplit:
    dataset = load_dataset(drop_small_layers=True)
    if layer_ids is None:
        layer_ids = random.sample(dataset.layer_names.index.tolist(), k=2)
    elif len(layer_ids) != 2:
        raise NotImplementedError('Only two layers at once supported yet')

    edges = filter_by_layer(dataset.edges, layer_ids)

    return multilayer_sample(edges=edges,
                             layer_ids=layer_ids,
                             hidden_ratio=hidden_ratio,
                             random_state=random_state)
コード例 #6
0
def plot_multiplexity(dataset=None):
    if dataset is None:
        dataset = fao_data.load_dataset(drop_small_layers=True)
    n = len(dataset.layer_names)
    multiplexities = compute_multiplexity(dataset)
    multiplexities_2d = squareform(multiplexities) + np.eye(n)
    plt.figure(figsize=(4, 3.4), dpi=200)
    plt.imshow(multiplexities_2d, cmap='Greens')
    plt.colorbar(shrink=0.95)
    ax = plt.gca()
    ax.axes.xaxis.set_visible(False)
    ax.axes.yaxis.set_visible(False)
    plt.tight_layout()
    plt.savefig('output/mltplx_heatmap.svg')
    plt.savefig('output/mltplx_heatmap.png', dpi=1200)
    plt.show()
コード例 #7
0
    res = dict(mltplx_tgt=orig_mltplx,
               mltplx_pred=pred_mltplx,
               mltplx_mae=mean_absolute_error([orig_mltplx], [pred_mltplx]),
               mltplx_mape=mean_absolute_percentage_error([orig_mltplx],
                                                          [pred_mltplx]),
               mltrcp_mae=mean_absolute_error([orig_mltrcp], [pred_mltrcp]),
               mltrcp_mape=mean_absolute_percentage_error([orig_mltrcp],
                                                          [pred_mltrcp]))
    return res


if __name__ == '__main__':
    from fao_data import load_dataset

    dataset = load_dataset()
    N = 10
    layer_subset = dataset.layer_names.index[:N]
    common = np.zeros(shape=(N, N))
    mltplx = np.zeros(shape=(N, N))
    mltrcp = np.zeros(shape=(N, N))

    for i, layer_a in enumerate(layer_subset):
        for j, layer_b in enumerate(layer_subset):
            common[i, j] = count_common_links(dataset.edges, layer_a, layer_b,
                                              False)
            mltplx[i, j] = multiplexity(dataset.edges, layer_a, layer_b, False)
            mltrcp[i, j] = multiplexity(dataset.edges, layer_a, layer_b, True)

    print(common)
    print(mltplx.round(2))