def DegreeClassify(dataPath='../LinkAnalyticsData/UTK_problem/'): """ Finds the distrbtion of the cummulative node and edge wights """ filenames=('Moria_2','Standelf_2') attrs=('calls','texts','degree','secs') for f in filenames: # Reading in the Graph MG = GU.readData(os.path.join(dataPath,f)+'.graph') deg = nx.degree(MG)
def GetEdgeDistributions(dataPath='../LinkAnalyticsData/UTK_problem/'): filenames=('Moria_1.graph','Standelf_1.graph') attrs=('calls','texts','days','secs') for f in filenames: # Reading in the Graph MG = GU.readData(os.path.join(dataPath,f)) # Distribution of each Attribute for attr in attrs: data = GU.GetAttr(MG,attr) # Plotting pyplot.hist(data,100) pyplot.yscale('log') pyplot.grid(True) pyplot.ylabel("Frequency") pyplot.xlabel(attr) name = f.split('.')[0].split('_')[0] title = name+" "+attr+" distribution" pyplot.title(title) pyplot.savefig(name+"_"+attr+"_distribution.png") pyplot.clf()
def readData(filename='../LinkAnalyticsData/UTK_problem/Moria_1.graph'): """ Creates a dataset for ANN training of the formated data supplied by filename """ """ Currently based on 4x2 inputs of days, calls, call duration, and texts """ """ Two 'classes' are implemented, either there or not """ numInputs = 2+4+2 alldata = ClassificationDataSet(numInputs,1,nb_classes=2) MG = GU.readData(filename) closeness = nx.closeness_centrality(MG) degree = nx.degree(MG) startTime = datetime.now() # Computing the data data = [[closeness[u],degree[u],\ edata['calls'],edata['secs'],edata['texts'],edata['days'],\ degree[v],closeness[v]] \ for u,v,edata in MG.edges(data=True)] for d in data: alldata.addSample(d,[1]) print "Converted to data in ",(datetime.now()-startTime) return alldata
def GetDataDistributions(dataPath='../LinkAnalyticsData/UTK_problem/'): filenames=('Moria_1.graph','Standelf_1.graph') """ Finds the distrbtion of the cummulative node and edge wights """ attrs=('calls','texts','degree','secs') for f in filenames: # Reading in the Graph MG = GU.readData(os.path.join(dataPath,f)) g = GU.ConvertToSingle(MG) for attr in attrs: x = list() for n in g.nodes(): x.append(g.node[n][attr]) # Plotting the Data largest = heapq.nlargest(3,x) pyplot.figure() pyplot.hist(x,bins=np.logspace(1,np.log2(largest[2]),25,base=2)) pyplot.ylabel("Frequency") pyplot.xlabel(attr) name = f.split('.')[0].split('_')[0] title = name+" "+attr+" distribution" pyplot.title(title) pyplot.savefig(name+"_"+attr+"_cum_distribution.png")