Beispiel #1
0
def comp_link_sim_matrices(cfg):
    """
    Compute link similarity matrices for given network densities.
    specified in ``cfg['density_range']`` across
    ``cfg['all_fnames']``
    Results are saved in ``cfg['outdata_dir'])``

    Parameters
    ----------
    cfg : dict
        brainnets config dict
    """
    config.require(cfg, [
        "all_fnames", "blacklist_fname", "density_range", "include_mst",
        "n_cpus"
    ])
    densities = cfg['density_range']
    arg_list = zip(densities, [cfg] * len(densities))
    link_sim_mat_list = ch.run_in_parallel(_comp_link_sim_mat_worker,
                                           arg_list,
                                           cfg["n_cpus"],
                                           chunksize=1)
    out_dict = {
        settings.densities_tag: densities,
        settings.common_links_tag: link_sim_mat_list,
        settings.config_tag: cfg
    }
    dataio.save_pickle(fnc.get_fname(cfg, settings.common_links_tag), out_dict)
Beispiel #2
0
def do_end(fname, out_fname, results):
    """ Prints which work ended and saves the results"""
    dataio.save_pickle(out_fname, results)
    assert (settings.config_tag in results)
    if settings.be_verbose:
        print "finished " + fname
        sys.stdout.flush()
Beispiel #3
0
def comp_consistent_link_distances(cfg):
    """Computes the distances of consistently appearing links
        (amongst the networks defined by the fNames)

        Saves the results to the specified outdata_dir

    Parameters
    ----------
    cfg : dict
        the brainnets config dict
    """
    config.require(cfg, [
        'all_fnames', 'blacklist_fname', 'node_info_fname', 'density_range',
        'include_mst'
    ])

    start_mat, ok_nodes = ch.do_start(cfg['all_fnames'][0],
                                      cfg['blacklist_fname'])
    out_dict = {settings.densities_tag: cfg['density_range']}
    link_distances = []
    for d in cfg['density_range']:
        start_net = netgen.make_net_from_unfiltered_data(
            start_mat, ok_nodes, d, include_mst=cfg['include_mst'])
        for i, fName in enumerate(cfg['all_fnames'][1:]):
            mat, ok_nodes = ch.do_start(fName, cfg['blacklist_fname'])
            net = netgen.make_net_from_unfiltered_data(
                mat, ok_nodes, d, include_mst=cfg['include_mst'])
            start_net = start_net.intersection(net)
        link_distances.append(get_link_distances_for_net(start_net, cfg))

    out_dict[settings.link_distance_tag] = link_distances

    out_fname = fnc.get_fname(cfg, settings.link_distance_common_tag)
    dataio.save_pickle(out_fname, out_dict)
Beispiel #4
0
def _compute_coms_worker(args):
    """
    Computes Louvain communities.
    """
    fname, cfg, com_det_method, com_det_options= args
    coms = []
    graph = netgen.get_graph_from_bare_data(
        fname, cfg['blacklist_fname'], cfg['density'],
        include_mst=cfg['include_mst'], weighted=False)
    membershiplists = []
    for i in range(cfg['n_it_comdet']):
        clustering = com_det_method(graph, **com_det_options)
        if isinstance(clustering, igraph.clustering.VertexDendrogram):
            clustering = clustering.as_clustering()
        membershiplists.append(clustering.membership)
    coms = np.array(membershiplists)
    ok_nodes = dataio.get_ok_nodes(cfg['blacklist_fname'])
    # expand communities to non-filtered indices
    unfiltered_coms = []
    for i, com in enumerate(coms):
        uf_com = dataio.expand_1D_node_vals_to_non_blacklisted_array(
            com, ok_nodes
        )
        unfiltered_coms.append(uf_com)
    unfiltered_coms = np.array(unfiltered_coms)
    com_det_method_tag = igraph_com_det_method_to_tag(com_det_method)
    out_fname = fnc.get_ind_fname(fname, cfg, com_det_method_tag)
    out_dict = {com_det_method_tag : unfiltered_coms,
                settings.config_tag         : cfg}
    dataio.save_pickle(out_fname, out_dict)
    print "finished " + fname
    return unfiltered_coms
Beispiel #5
0
def _compute_louvain_coms_worker(args):
    """
    Computes Louvain communities.
    """
    fname, cfg = args
    coms = []
    mods = []
    print "started " + fname
    graph = netgen.get_graph_from_bare_data(
        fname, cfg['blacklist_fname'], cfg['density'],
        include_mst=cfg['include_mst'], weighted=False)
    louvain_coms_dict = \
        gencomps.get_louvain_partitions(graph, False, cfg['n_it_comdet'])
    coms.extend(louvain_coms_dict[settings.louvain_cluster_tag])
    coms = np.array(coms)
    ok_nodes = dataio.get_ok_nodes(cfg['blacklist_fname'])
    # expand communities to non-filtered indices
    unfiltered_coms = []
    for i, com in enumerate(coms):
        uf_com = dataio.expand_1D_node_vals_to_non_blacklisted_array(
            com, ok_nodes
        )
        unfiltered_coms.append(uf_com)
    unfiltered_coms = np.array(unfiltered_coms)
    mods.extend(louvain_coms_dict[settings.modularity_tag])
    mods = np.array(mods)
    out_fname = fnc.get_ind_fname(fname, cfg, settings.louvain_cluster_tag)
    out_dict = {settings.louvain_cluster_tag: unfiltered_coms,
                settings.modularity_tag: mods,
                settings.config_tag: cfg}
    dataio.save_pickle(out_fname, out_dict)

    print "finished " + fname
    return unfiltered_coms, mods
Beispiel #6
0
def comp_scaled_inclusivity_for_two_fname_groups(cfg):
    config.require(
        cfg, ["density", "group_1_mat_fnames", "group_2_mat_fnames"])
    fname_groups = [cfg['group_1_mat_fnames'], cfg['group_2_mat_fnames']]
    for i, fname_group in enumerate(fname_groups):
        clus = []
        for mat_fname in fname_group:
            clusters_fname = fnc.get_ind_fname(
                mat_fname,
                cfg,
                settings.louvain_cluster_tag
            )
            subject_clusters = dataio.load_pickle(clusters_fname)
            clus.append(subject_clusters[settings.louvain_cluster_tag])
        partitions = aux.expand_first_axis(np.array(clus))
        partitions = partitions[:, dataio.get_ok_nodes(cfg['blacklist_fname'])]
        assert np.logical_not(np.isnan(partitions)).all()
        node_SIs = gencomps.comp_scaled_inclusivity(partitions)
        out_dict = {settings.scaled_inclusivity_tag:
                    node_SIs, settings.config_tag: cfg}
        out_fname = fnc.get_group_fname(
            cfg, settings.scaled_inclusivity_tag, i)
        dataio.save_pickle(out_fname, out_dict)
Beispiel #7
0
def comp_consensus_scaled_inclusivity(cfg, group_id, n_to_consider=None):
    """
    Parameters
    ----------
    cfg : dict
        brainnets config dictionary
    group_id : int
        0 or 1 -- the group for which the scaled inclusivity should be computed
    """
    config.require(
        cfg, ["density", "group_1_mat_fnames", "group_2_mat_fnames"])

    if group_id == 0:
        fname_group = cfg['group_1_mat_fnames']

    elif group_id == 1:
        fname_group = cfg['group_2_mat_fnames']
    else:
        raise Error('Param group_id should be either 0 or 1')
    consenus_com_fname = fnc.get_group_fname(
        cfg, settings.louvain_consensus_tag, group_id)
    consensus_com = \
        dataio.load_pickle(consenus_com_fname)[settings.louvain_cluster_tag]

    clus = []
    for mat_fname in fname_group:
        clusters_fname = fnc.get_ind_fname(
            mat_fname,
            cfg,
            settings.louvain_cluster_tag
        )
        data = dataio.load_pickle(clusters_fname)
        subject_clusters = data[settings.louvain_cluster_tag]

        if n_to_consider is not None:
            if isinstance(n_to_consider, int):
                subject_clusters = subject_clusters[:n_to_consider]
            elif n_to_consider == 'best':
                max_mod_i = np.argmax(data[settings.modularity_tag])
                subject_clusters = subject_clusters[max_mod_i]
                subject_clusters = subject_clusters.reshape(
                    1, len(subject_clusters))
            else:
                assert isinstance(n_to_consider, int) or n_to_consider == 'best', \
                    "n_to_consider should be an integer!"
        clus.append(subject_clusters)

    partitions = aux.expand_first_axis(np.array(clus))
    ok_nodes = dataio.get_ok_nodes(cfg['blacklist_fname'])
    partitions = partitions[:, ok_nodes]
    consensus_com = consensus_com[ok_nodes]
    assert np.logical_not(np.isnan(partitions)).all()
    assert len(consensus_com) == len(partitions[0])

    node_SIs = gencomps.comp_scaled_inclusivity_for_ref_partition(
        consensus_com, partitions, normalize=True)
    out_dict = {settings.scaled_inclusivity_tag:
                node_SIs, settings.config_tag: cfg}
    out_fname = fnc.get_group_fname(
        cfg, settings.louvain_consensus_si_tag, group_id)
    dataio.save_pickle(out_fname, out_dict)
Beispiel #8
0
def comp_consensus_partition(cfg, fnames_tag, out_fname,
                             n_clu_for_mcla='median',
                             n_to_consider=None,
                             comdet_tag=None):
    """
    Computes a consensus partition.

    Parameters
    ----------
    cfg : dict
        a brainnets config dictionary
    fnames_tag : str
        the filenames group for which the consensus partition is
        computed
    out_fname : str
        the filename to which the consensus partition is stored
    n_clu_for_mcla : int or "median"
        maximum number or clusters in the consensus partition
        if "median", the median number is used as the max number
        of clusters in the consensus partition
    n_to_consider : int/str, optional
        number of partitions to consider for obtaining consensus
        defaults to considering _all_ partitions
        if "best" uses the partition with maximum modularity
        if available
    comdet_tag: str, optional
        e.g. "infomap"
        defaulting to settings.louvain_cluster_tag (legacy)

    Returns
    -------
    out_dict : dict
        dictionary containing the consensus partition
    """
    config.require(cfg, [fnames_tag, 'blacklist_fname', 'density'])

    ok_nodes = dataio.get_ok_nodes(cfg['blacklist_fname'])
    if comdet_tag is None:
        comdet_tag = settings.louvain_cluster_tag

    # load clusterings
    clusterings = None
    ok_nodes = dataio.get_ok_nodes(cfg['blacklist_fname'])
    for fname in cfg[fnames_tag]:
        indfname = fnc.get_ind_fname(fname, cfg, comdet_tag)
        data = dataio.load_pickle(indfname)
        clus_raw = data[comdet_tag]

        assert len(clus_raw[0]) >= np.sum(ok_nodes)
        if n_to_consider is not None:
            if isinstance(n_to_consider, int):
                clus_raw = clus_raw[:n_to_consider]
            elif n_to_consider == 'best':
                max_mod_i = np.argmax(data[settings.modularity_tag])
                clus_raw = clus_raw[max_mod_i]
                clus_raw = clus_raw.reshape(1, len(clus_raw))
            else:
                assert isinstance(n_to_consider, int) or n_to_consider == 'best', \
                    "n_to_consider should be an integer!"

        clus = clus_raw[:, ok_nodes]
        if clusterings is None:
            # for first encounter
            clusterings = np.copy(clus)
        else:
            clusterings = np.vstack((clusterings, clus))

    # this should hold usually, unless you have a non-standard workflow:
    # (added for making sure a bug does not exist anymore)
    assert len(clusterings) == len(clus) * len(cfg[fnames_tag])

    # print len(clusterings), n_clu_for_mcla
    consensus_clu = gencomps.comp_consensus_partition(
        clusterings, n_clu_for_mcla)
    consensus_clu = dataio.expand_1D_node_vals_to_non_blacklisted_array(
        consensus_clu, ok_nodes, default_value=-1)
    out_dict = {comdet_tag: consensus_clu,
                settings.config_tag: cfg}

    dataio.save_pickle(out_fname, out_dict)
    return out_dict