import networkx as nx def numTriangles(G, n): EG = nx.ego_graph(G, n) #each triangle is counted three times, once at each node numTri = sum(nx.triangles(EG).values()) / 3 print("number of triangles in ego-network: " + str(numTri)) from src.preprocess.csvToGraph import convert emailSet = 'data/datasetEmail/datasetEmail_final.csv' politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv' numTriangles(convert(politicalSet), 0)
import networkx as nx def sizeConnectedComponents(G): num = nx.number_connected_components(G) print("number of connected components: " + str(num)) lenEachCC = [len(c) for c in nx.connected_components(G)] print("list of their sizes: " + str(lenEachCC)) from src.preprocess.csvToGraph import convert emailSet = 'data/datasetEmail/datasetEmail_final.csv' politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv' sizeConnectedComponents(convert(emailSet))
import networkx as nx def numEdges(G, n): EG = nx.ego_graph(G, n) numEdges = EG.number_of_edges() print("number of edges in ego-network: " + str(numEdges)) from src.preprocess.csvToGraph import convert emailSet = 'data/datasetEmail/datasetEmail_final.csv' politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv' numEdges(convert(emailSet), 212)
import networkx as nx def numTriangles(G): #each triangle is counted three times, once at each node num = sum(nx.triangles(G).values())/3 print("Number of triangles: " + str(num)) def numTriangles2(): G = nx.Graph() G.add_edges_from([(1, 2), (1, 3), (2, 3), (3, 4)]) num = sum(nx.triangles(G).values())/3 print(nx.triangles(G)) print("Number of triangles: " + str(num)) from src.preprocess.csvToGraph import convert emailSet = 'data/datasetEmail/datasetEmail_final.csv' politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv' graph = convert(emailSet) #numTriangles(graph) numTriangles2()
G = genPPM.generate() print(G.number_of_nodes()) print(G.number_of_edges()) n = G.number_of_nodes() nArr = [] for i in range(n): nArr.append(i + 1) print(nArr) #Express graph as graph adjacency matrix / Numpy matrix M = nx.to_numpy_matrix(G) #Find all singular values of matrix s = np.linalg.svd(M)[1] print(s) #Draw loglog graph plt.plot(np.log10(nArr), np.log10(s), linestyle='None', marker='x', markeredgecolor='blue') plt.xscale('log') plt.yscale('log') plt.show() from src.preprocess.csvToGraph import convert politicalSet = 'data/datasetPolitical/datasetPolitical_final2.csv' computeTopSVal(convert(politicalSet), 600)
kArr.append(k) accuracyArr.append(accuracy) k = k + 1 plotkError(kArr, accuracyArr) def plotkError(kArr, accuracyArr): plt.plot(kArr, accuracyArr, linestyle='None', marker='x', markeredgecolor='blue') plt.title( "k={1...50}, Linear-linear graph, average classification accuracy vs. k, political set" ) plt.ylabel("Classification Accuracy") plt.xlabel("k") plt.show() from src.preprocess.csvToGraph import convert emailSet = 'data/datasetEmail/datasetEmail_final.csv' politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv' #from src.svd.computeTruncated import computeTrun #M = computeTrun(convert(politicalSet)) #classifyData(M) classifyDataGeneric(convert(politicalSet))
import networkx as nx import scipy as sp def largestSingularValue(G, n): EG = nx.ego_graph(G, n) #Express graph as graph adjacency matrix / SciPy sparse matrix M = sp.sparse.csc_matrix(nx.to_scipy_sparse_matrix(EG)).asfptype() #Find greatest singular value of matrix s = sp.sparse.linalg.svds(M, 1)[1] print("largest singular value of adjacency of ego-network: " + str(s)) from src.preprocess.csvToGraph import convert emailSet = 'data/datasetEmail/datasetEmail_final.csv' politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv' largestSingularValue(convert(emailSet), 212)
import networkx as nx #import snap def diameter(G): diameter = nx.diameter(G) print("diameter: " + str(diameter)) #def effectiveDiameter(G): #effDiam = snap.GetBfsEffDiam(G, 10, false) #print("90-percentile effective diameter: " + str(effDiam)) from src.preprocess.csvToGraph import convert emailSet = 'data/datasetEmail/datasetEmail_final.csv' politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv' diameter(convert(politicalSet)) #effectiveDiameter(graph)
indexed_anomaly_score = [] for i in range(len(anomaly_score)): indexed_anomaly_score.append([anomaly_score[i], testIndexes[i]]) #print("indexed anomaly score: ", indexed_anomaly_score) indexed_anomaly_score.sort(key=sortFirst) #print("indexed anomaly score sorted: ", indexed_anomaly_score) count = 0 for i in range(n): if indexed_anomaly_score[i][1] in outlierIndexes: print("found correct anomaly index: ", str(indexed_anomaly_score[i][1])) count = count + 1 print("count: " + str(count)) accuracy_score = float(count) / n print("accuracy score: " + str(accuracy_score)) accuracyArr.append(accuracy_score) averageAccuracy = sum(accuracyArr, 0.0) / len(accuracyArr) print("average accuracy score: " + str(averageAccuracy)) return averageAccuracy from src.preprocess.csvToGraph import convert emailSet = 'data/datasetEmail/datasetEmail_final.csv' politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv' M = computeAllProperties(convert(politicalSet)) detectIsolation(M)
) plt.ylabel("Anomaly Accuracy") plt.xlabel("k") plt.show() def printMatrix(infile): lookup_classify = pickle.load( open('data/datasetPolitical/blogs_classify.pkl', 'rb')) first = [k for k, v in lookup_classify.items() if v == 0] second = [k for k, v in lookup_classify.items() if v == 1] df = pd.read_csv(infile) df.columns = ['source', 'target', 'weight'] plt.plot(df[df.columns[0]], df[df.columns[1]], linestyle='None', marker='.', markeredgecolor='blue') plt.ylabel("col") plt.xlabel("row") plt.show() from src.preprocess.csvToGraph import convert emailSet = 'data/datasetEmail/datasetEmail_final.csv' politicalSet = 'data/datasetPolitical/datasetPolitical_final1.csv' detectIsolationGeneric(convert(politicalSet)) #printMatrix(politicalSet)
#Calculate Frobenius norm for matrix A-B normAB = sp.linalg.norm(A - B) error = normAB / normA if (error <= 0.1): finalU = U kArr.append(k) errorArr.append(error) k = k + 20 plotkError(kArr, errorArr) return finalU def plotkError(kArr, errorArr): #Draw loglog graph plt.plot(kArr, errorArr, linestyle='None', marker='x', markeredgecolor='blue') plt.title("Linear-linear graph, political set") plt.ylabel("Reconstruction Error") plt.xlabel("k") #plt.xscale('log') #plt.yscale('log') plt.show() from src.preprocess.csvToGraph import convert emailSet = 'data/datasetEmail/datasetEmail_final.csv' politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv' computeTrun(convert(politicalSet))
import networkx as nx def localClustering(G, n): c = nx.clustering(G, n) print("local clustering coefficient: " + str(c)) from src.preprocess.csvToGraph import convert emailSet = 'data/datasetEmail/datasetEmail_final.csv' politicalSet = 'data/datasetPolitical/datasetPolitical_final.csv' localClustering(convert(emailSet), 212)