コード例 #1
0
ファイル: k_core.py プロジェクト: wtgme/ohsn
def coreness_features(g):
    """Correlation of K-core and feature values"""
    g = g.as_undirected(mode="collapse")
    all_coreness = g.shell_index(mode='ALL')
    g.vs['core'] = all_coreness
    fields = iot.read_fields()
    for field in fields:
        gt.add_attribute(g, 'pof', 'fed', 'com', field)
        vlist = g.vs.select(pof_ne=-1000000000.0)['core']
        flist = g.vs.select(pof_ne=-1000000000.0)['pof']
        pt.correlation(vlist, flist, 'K-Core', 'Feature', 'data/corerel/'+field+'.pdf')
コード例 #2
0
ファイル: k_core.py プロジェクト: abiraja2004/ohsn
def coreness_features(g):
    """Correlation of K-core and feature values"""
    g = g.as_undirected(mode="collapse")
    all_coreness = g.shell_index(mode='ALL')
    g.vs['core'] = all_coreness
    fields = iot.read_fields()
    for field in fields:
        gt.add_attribute(g, 'pof', 'fed', 'com', field)
        vlist = g.vs.select(pof_ne=-1000000000.0)['core']
        flist = g.vs.select(pof_ne=-1000000000.0)['pof']
        pt.correlation(vlist, flist, 'K-Core', 'Feature',
                       'data/corerel/' + field + '.pdf')
コード例 #3
0
ファイル: net_stat.py プロジェクト: abiraja2004/ohsn
def rank_feature(gc, dbname, comname, db_field_names, directed=True):
    g = gt.giant_component(gc, 'WEAK')

    g.vs['nt'] = g.degree(type="in")
    netatt = g.vs['nt']

    # ranks = g.pagerank(weights='weight')
    # g.vs['rank'] = ranks

    # cor = st.tau_coef(g.degree(type="in"), g.vs['rank'])
    # print 'Indegree' + '\t' + str(cor[0]) + '\t' + str(cor[1])
    # cor = st.tau_coef(g.degree(type="out"), g.vs['rank'])
    # print 'Outdegree' + '\t' + str(cor[0]) + '\t' + str(cor[1])

    for db_field_name in db_field_names:
        # print 'Processing ' + db_field_name
        g = gt.add_attribute(g, 'foi', dbname, comname, db_field_name)
        raw_values = np.array(g.vs['foi'])
        values = drop_initials(raw_values)

        if len(values) > 100:
            # maxv, minv = max(values), min(values)
            maxv, minv = np.percentile(values,
                                       97.5), np.percentile(values, 2.5)
            vs = g.vs(foi_ge=minv, foi_le=maxv)
            sg = g.subgraph(vs)

            maxd, mind = np.percentile(netatt,
                                       97.5), np.percentile(netatt, 2.5)
            vs = sg.vs(nt_ge=mind, nt_le=maxd)
            sg = sg.subgraph(vs)

            # cor = st.tau_coef(sg.vs['foi'], sg.vs['nt'])
            # print db_field_name + '\t' + str(len(sg.vs)) + '\t' + str(len(sg.es)) + '\t'\
            #       + str(min(netatt)) + '\t' + str(max(netatt)) + '\t' + str(mind) + '\t'\
            #       +str(maxd) + '\t' \
            #       + str(min(values)) + '\t' + str(max(values)) + '\t' + str(minv) + '\t'\
            #       +str(maxv) + '\t'\
            #       + str(cor[0]) + '\t' + str(cor[1])
            pt.correlation(sg.vs['nt'], sg.vs['foi'], 'Indegree', 'Feature',
                           'data/' + db_field_name + '.pdf')
コード例 #4
0
ファイル: net_stat.py プロジェクト: wtgme/ohsn
def rank_feature(gc, dbname, comname, db_field_names, directed=True):
    g = gt.giant_component(gc, "WEAK")

    g.vs["nt"] = g.degree(type="in")
    netatt = g.vs["nt"]

    # ranks = g.pagerank(weights='weight')
    # g.vs['rank'] = ranks

    # cor = st.tau_coef(g.degree(type="in"), g.vs['rank'])
    # print 'Indegree' + '\t' + str(cor[0]) + '\t' + str(cor[1])
    # cor = st.tau_coef(g.degree(type="out"), g.vs['rank'])
    # print 'Outdegree' + '\t' + str(cor[0]) + '\t' + str(cor[1])

    for db_field_name in db_field_names:
        # print 'Processing ' + db_field_name
        g = gt.add_attribute(g, "foi", dbname, comname, db_field_name)
        raw_values = np.array(g.vs["foi"])
        values = drop_initials(raw_values)

        if len(values) > 100:
            # maxv, minv = max(values), min(values)
            maxv, minv = np.percentile(values, 97.5), np.percentile(values, 2.5)
            vs = g.vs(foi_ge=minv, foi_le=maxv)
            sg = g.subgraph(vs)

            maxd, mind = np.percentile(netatt, 97.5), np.percentile(netatt, 2.5)
            vs = sg.vs(nt_ge=mind, nt_le=maxd)
            sg = sg.subgraph(vs)

            # cor = st.tau_coef(sg.vs['foi'], sg.vs['nt'])
            # print db_field_name + '\t' + str(len(sg.vs)) + '\t' + str(len(sg.es)) + '\t'\
            #       + str(min(netatt)) + '\t' + str(max(netatt)) + '\t' + str(mind) + '\t'\
            #       +str(maxd) + '\t' \
            #       + str(min(values)) + '\t' + str(max(values)) + '\t' + str(minv) + '\t'\
            #       +str(maxv) + '\t'\
            #       + str(cor[0]) + '\t' + str(cor[1])
            pt.correlation(sg.vs["nt"], sg.vs["foi"], "Indegree", "Feature", "data/" + db_field_name + ".pdf")
コード例 #5
0
ファイル: net_stat.py プロジェクト: wtgme/ohsn
def feature_assort_friend(g, dbname, comname, db_field_names, directed=True):
    """Using iGraph
    Assigning values different from zero or one to the adjacency matrix will be translated to one,
    unless the graph is weighted, in which case the numbers will be treated as weights
    """
    node_size, edge_size = g.vcount(), g.ecount()
    outputs = {}
    print ("Feature, #Nodes, #Edges, %Nodes, %Edges, D_assort, F_assort, F_assort, Mean, STD, z_sore, p_value")
    for db_field_name in db_field_names:
        # print 'Processing ' + db_field_name
        g = gt.add_attribute(g, "foi", dbname, comname, db_field_name)
        raw_values = np.array(g.vs["foi"])
        values = drop_initials(raw_values)

        if len(values) > 100:
            output = ""
            # maxv, minv = np.percentile(values, 97.5), np.percentile(values, 2.5)
            maxv, minv = max(values), min(values)
            vs = g.vs.select(foi_ge=minv, foi_le=maxv)
            sg = g.subgraph(vs)
            t_node_size, t_edge_size = len(sg.vs), len(sg.es)
            output += (
                db_field_name
                + ","
                + str(t_node_size)
                + ","
                + str(t_edge_size)
                + ","
                + str(float(t_node_size) / node_size)
                + ","
                + str(float(t_edge_size) / edge_size)
                + ","
                + str(sg.assortativity_degree(directed=directed))
                + ","
                + str(sg.assortativity("foi", "foi", directed=directed))
                + ","
            )
            raw_assort = sg.assortativity("foi", "foi", directed=directed)
            ass_list = list()
            for i in xrange(3000):
                np.random.shuffle(raw_values)
                g.vs["foi"] = raw_values
                vs = g.vs.select(foi_ge=minv, foi_le=maxv)
                sg = g.subgraph(vs)
                ass_list.append(sg.assortativity("foi", "foi", directed=directed))
            ass_list = np.array(ass_list)
            amean, astd = np.mean(ass_list), np.std(ass_list)

            absobserved = abs(raw_assort)
            pval = (np.sum(ass_list >= absobserved) + np.sum(ass_list <= -absobserved)) / float(len(ass_list))
            zscore = (raw_assort - amean) / astd
            # print pval
            output += str(raw_assort) + "," + str(amean) + "," + str(astd) + "," + str(zscore) + "," + str(pval)
            print output
            if pval < 0.001:
                output += "***"
                outputs[output] = abs(zscore)
                continue
            if pval < 0.01:
                output += "**"
                outputs[output] = abs(zscore)
                continue
            if pval < 0.05:
                output += "*"
                outputs[output] = abs(zscore)
                continue
            else:
                outputs[output] = abs(zscore)
                continue
    return outputs
コード例 #6
0
ファイル: net_stat.py プロジェクト: abiraja2004/ohsn
def feature_assort_friend(g, dbname, comname, db_field_names, directed=True):
    '''Using iGraph
    Assigning values different from zero or one to the adjacency matrix will be translated to one,
    unless the graph is weighted, in which case the numbers will be treated as weights
    '''
    node_size, edge_size = g.vcount(), g.ecount()
    outputs = {}
    print(
        'Feature, #Nodes, #Edges, %Nodes, %Edges, D_assort, F_assort, F_assort, Mean, STD, z_sore, p_value'
    )
    for db_field_name in db_field_names:
        # print 'Processing ' + db_field_name
        g = gt.add_attribute(g, 'foi', dbname, comname, db_field_name)
        raw_values = np.array(g.vs['foi'])
        values = drop_initials(raw_values)

        if len(values) > 100:
            output = ''
            # maxv, minv = np.percentile(values, 97.5), np.percentile(values, 2.5)
            maxv, minv = max(values), min(values)
            vs = g.vs.select(foi_ge=minv, foi_le=maxv)
            sg = g.subgraph(vs)
            t_node_size, t_edge_size = len(sg.vs), len(sg.es)
            output += db_field_name + ',' + str(t_node_size) + ',' + str(t_edge_size) + ',' \
                      + str(float(t_node_size)/node_size) + ',' + str(float(t_edge_size)/edge_size)+ ',' \
                      + str(sg.assortativity_degree(directed=directed)) + ',' \
                      + str(sg.assortativity('foi', 'foi', directed=directed)) + ','
            raw_assort = sg.assortativity('foi', 'foi', directed=directed)
            ass_list = list()
            for i in xrange(3000):
                np.random.shuffle(raw_values)
                g.vs["foi"] = raw_values
                vs = g.vs.select(foi_ge=minv, foi_le=maxv)
                sg = g.subgraph(vs)
                ass_list.append(
                    sg.assortativity('foi', 'foi', directed=directed))
            ass_list = np.array(ass_list)
            amean, astd = np.mean(ass_list), np.std(ass_list)

            absobserved = abs(raw_assort)
            pval = (np.sum(ass_list >= absobserved) +
                    np.sum(ass_list <= -absobserved)) / float(len(ass_list))
            zscore = (raw_assort - amean) / astd
            # print pval
            output += str(raw_assort) + ',' + str(amean) + ',' + str(
                astd) + ',' + str(zscore) + ',' + str(pval)
            print output
            if pval < 0.001:
                output += '***'
                outputs[output] = abs(zscore)
                continue
            if pval < 0.01:
                output += '**'
                outputs[output] = abs(zscore)
                continue
            if pval < 0.05:
                output += '*'
                outputs[output] = abs(zscore)
                continue
            else:
                outputs[output] = abs(zscore)
                continue
    return outputs
コード例 #7
0
def network_assort():
    # test network assortative
    gs = ['edfollow', 'follow', 'retweet', 'communication']
    fields = iot.read_fields()
    # print len(fields)
    for gf in gs[1:]:
        g = gt.Graph.Read_GraphML('data/' + gf + '_net.graphml')
        # g = gt.giant_component(g)
        # gt.net_stat(g)
        sigs = []
        for filed in fields:
            g = gt.add_attribute(g, 'foi', 'depression', 'com', filed)
            raw_values = np.array(g.vs['foi'])
            values = drop_initials(raw_values)
            if len(values) > 100:
                output = gf + ',' + filed.split('.')[-1] + ','
                # maxv, minv = np.percentile(values, 97.5), np.percentile(values, 2.5)
                maxv, minv = max(values), min(values)
                vs = g.vs.select(foi_ge=minv, foi_le=maxv)
                sg = g.subgraph(vs)
                raw_assort = sg.assortativity('foi', 'foi', directed=True)
                ass_list = []
                for i in xrange(1000):
                    np.random.shuffle(raw_values)
                    g.vs["foi"] = raw_values
                    vs = g.vs.select(foi_ge=minv, foi_le=maxv)
                    sg = g.subgraph(vs)
                    ass_list.append(
                        sg.assortativity('foi', 'foi', directed=True))

                ass_list = np.array(ass_list)
                amean, astd = np.mean(ass_list), np.std(ass_list)
                absobserved = abs(raw_assort)
                pval = (np.sum(ass_list >= absobserved) +
                        np.sum(ass_list <= -absobserved)) / float(
                            len(ass_list))
                zscore = (raw_assort - amean) / astd
                output += format(raw_assort, '.2f') + ',' + format(amean, '.2f') + ',' + \
                          format(astd, '.2f') + ',' + format(zscore, '.2f') + ',' + format(pval, '.3f') + ','
                if pval < 0.001:
                    output += '***'
                    if raw_assort > 0:
                        sigs.append('***')
                    print output
                    continue
                if pval < 0.01:
                    output += '**'
                    if raw_assort > 0:
                        sigs.append('**')
                    print output
                    continue
                if pval < 0.05:
                    output += '*'
                    if raw_assort > 0:
                        sigs.append('*')
                    print output
                    continue
                else:
                    sigs.append('N')
                    print output
                    continue
        c = Counter(sigs)
        print c
        for sig, cou in c.items():
            print sig, 1.0 * cou / len(fields)