Ejemplo n.º 1
0
import networkx as nx


def numTriangles(G, n):
    EG = nx.ego_graph(G, n)
    #each triangle is counted three times, once at each node
    numTri = sum(nx.triangles(EG).values()) / 3
    print("number of triangles in ego-network: " + str(numTri))


from src.preprocess.csvToGraph import convert

emailSet = 'data/datasetEmail/datasetEmail_final.csv'
politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv'
numTriangles(convert(politicalSet), 0)
Ejemplo n.º 2
0
import networkx as nx


def sizeConnectedComponents(G):
    num = nx.number_connected_components(G)
    print("number of connected components: " + str(num))
    lenEachCC = [len(c) for c in nx.connected_components(G)]
    print("list of their sizes: " + str(lenEachCC))


from src.preprocess.csvToGraph import convert
emailSet = 'data/datasetEmail/datasetEmail_final.csv'
politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv'
sizeConnectedComponents(convert(emailSet))
Ejemplo n.º 3
0
import networkx as nx


def numEdges(G, n):
    EG = nx.ego_graph(G, n)
    numEdges = EG.number_of_edges()
    print("number of edges in ego-network: " + str(numEdges))


from src.preprocess.csvToGraph import convert
emailSet = 'data/datasetEmail/datasetEmail_final.csv'
politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv'
numEdges(convert(emailSet), 212)
Ejemplo n.º 4
0
import networkx as nx

def numTriangles(G):
    #each triangle is counted three times, once at each node
    num = sum(nx.triangles(G).values())/3
    print("Number of triangles: " + str(num))


def numTriangles2():
    G = nx.Graph()
    G.add_edges_from([(1, 2), (1, 3), (2, 3), (3, 4)])
    num = sum(nx.triangles(G).values())/3
    print(nx.triangles(G))
    print("Number of triangles: " + str(num))

from src.preprocess.csvToGraph import convert
emailSet = 'data/datasetEmail/datasetEmail_final.csv'
politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv'
graph = convert(emailSet)
#numTriangles(graph)
numTriangles2()
Ejemplo n.º 5
0
    G = genPPM.generate()
    print(G.number_of_nodes())
    print(G.number_of_edges())

    n = G.number_of_nodes()
    nArr = []
    for i in range(n):
        nArr.append(i + 1)
    print(nArr)

    #Express graph as graph adjacency matrix / Numpy matrix
    M = nx.to_numpy_matrix(G)
    #Find all singular values of matrix
    s = np.linalg.svd(M)[1]
    print(s)

    #Draw loglog graph
    plt.plot(np.log10(nArr),
             np.log10(s),
             linestyle='None',
             marker='x',
             markeredgecolor='blue')
    plt.xscale('log')
    plt.yscale('log')
    plt.show()


from src.preprocess.csvToGraph import convert
politicalSet = 'data/datasetPolitical/datasetPolitical_final2.csv'
computeTopSVal(convert(politicalSet), 600)
Ejemplo n.º 6
0
        kArr.append(k)
        accuracyArr.append(accuracy)

        k = k + 1

    plotkError(kArr, accuracyArr)


def plotkError(kArr, accuracyArr):
    plt.plot(kArr,
             accuracyArr,
             linestyle='None',
             marker='x',
             markeredgecolor='blue')
    plt.title(
        "k={1...50}, Linear-linear graph, average classification accuracy vs. k, political set"
    )
    plt.ylabel("Classification Accuracy")
    plt.xlabel("k")
    plt.show()


from src.preprocess.csvToGraph import convert

emailSet = 'data/datasetEmail/datasetEmail_final.csv'
politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv'
#from src.svd.computeTruncated import computeTrun
#M = computeTrun(convert(politicalSet))
#classifyData(M)
classifyDataGeneric(convert(politicalSet))
Ejemplo n.º 7
0
import networkx as nx
import scipy as sp

def largestSingularValue(G, n):
    EG = nx.ego_graph(G, n)
    #Express graph as graph adjacency matrix / SciPy sparse matrix
    M = sp.sparse.csc_matrix(nx.to_scipy_sparse_matrix(EG)).asfptype()
    #Find greatest singular value of matrix
    s = sp.sparse.linalg.svds(M, 1)[1]
    print("largest singular value of adjacency of ego-network: " + str(s))

from src.preprocess.csvToGraph import convert
emailSet = 'data/datasetEmail/datasetEmail_final.csv'
politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv'
largestSingularValue(convert(emailSet), 212)
Ejemplo n.º 8
0
import networkx as nx
#import snap


def diameter(G):
    diameter = nx.diameter(G)
    print("diameter: " + str(diameter))


#def effectiveDiameter(G):
#effDiam = snap.GetBfsEffDiam(G, 10, false)
#print("90-percentile effective diameter: " + str(effDiam))

from src.preprocess.csvToGraph import convert

emailSet = 'data/datasetEmail/datasetEmail_final.csv'
politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv'
diameter(convert(politicalSet))
#effectiveDiameter(graph)
Ejemplo n.º 9
0
        indexed_anomaly_score = []
        for i in range(len(anomaly_score)):
            indexed_anomaly_score.append([anomaly_score[i], testIndexes[i]])
        #print("indexed anomaly score: ", indexed_anomaly_score)
        indexed_anomaly_score.sort(key=sortFirst)
        #print("indexed anomaly score sorted: ", indexed_anomaly_score)

        count = 0
        for i in range(n):
            if indexed_anomaly_score[i][1] in outlierIndexes:
                print("found correct anomaly index: ",
                      str(indexed_anomaly_score[i][1]))
                count = count + 1
        print("count: " + str(count))
        accuracy_score = float(count) / n
        print("accuracy score: " + str(accuracy_score))
        accuracyArr.append(accuracy_score)

    averageAccuracy = sum(accuracyArr, 0.0) / len(accuracyArr)
    print("average accuracy score: " + str(averageAccuracy))
    return averageAccuracy


from src.preprocess.csvToGraph import convert

emailSet = 'data/datasetEmail/datasetEmail_final.csv'
politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv'
M = computeAllProperties(convert(politicalSet))
detectIsolation(M)
Ejemplo n.º 10
0
    )
    plt.ylabel("Anomaly Accuracy")
    plt.xlabel("k")
    plt.show()


def printMatrix(infile):
    lookup_classify = pickle.load(
        open('data/datasetPolitical/blogs_classify.pkl', 'rb'))
    first = [k for k, v in lookup_classify.items() if v == 0]
    second = [k for k, v in lookup_classify.items() if v == 1]

    df = pd.read_csv(infile)
    df.columns = ['source', 'target', 'weight']

    plt.plot(df[df.columns[0]],
             df[df.columns[1]],
             linestyle='None',
             marker='.',
             markeredgecolor='blue')
    plt.ylabel("col")
    plt.xlabel("row")
    plt.show()


from src.preprocess.csvToGraph import convert
emailSet = 'data/datasetEmail/datasetEmail_final.csv'
politicalSet = 'data/datasetPolitical/datasetPolitical_final1.csv'
detectIsolationGeneric(convert(politicalSet))
#printMatrix(politicalSet)
Ejemplo n.º 11
0
        #Calculate Frobenius norm for matrix A-B
        normAB = sp.linalg.norm(A - B)

        error = normAB / normA
        if (error <= 0.1):
            finalU = U

        kArr.append(k)
        errorArr.append(error)

        k = k + 20

    plotkError(kArr, errorArr)
    return finalU

def plotkError(kArr, errorArr):
    #Draw loglog graph
    plt.plot(kArr, errorArr, linestyle='None',
           marker='x', markeredgecolor='blue')
    plt.title("Linear-linear graph, political set")
    plt.ylabel("Reconstruction Error")
    plt.xlabel("k")
    #plt.xscale('log')
    #plt.yscale('log')
    plt.show()

from src.preprocess.csvToGraph import convert
emailSet = 'data/datasetEmail/datasetEmail_final.csv'
politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv'
computeTrun(convert(politicalSet))
Ejemplo n.º 12
0
import networkx as nx


def localClustering(G, n):
    c = nx.clustering(G, n)
    print("local clustering coefficient: " + str(c))


from src.preprocess.csvToGraph import convert
emailSet = 'data/datasetEmail/datasetEmail_final.csv'
politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv'
localClustering(convert(emailSet), 212)