def __init__(self, fileName): self.fileName = fileName self.rootDir = self.getRootDir() self.attrVal1 = attrVal1 self.attrVal2 = attrVal2 self.graphName = self.getGraphName() self.lblV = snap.TStrV() self.lblV.Add(attrVal1) self.lblV.Add(attrVal2) self.lblNH = snap.TStrIntH() # Node count with attached label self.lblEH = snap.TIntIntH() # Edge count with attached src dst labels #self.G = self.getGraph(snap.PUNGraph) self.G = self.getLblGraph() self.saveGraph() ufileName = os.path.abspath(self.rootDir + "/" + self.graphName + ".txt") self.snapG = snap.LoadEdgeList(snap.PUNGraph, ufileName) self.nxG = nx.read_edgelist(ufileName) self.gNAH = self.getNodeIdLabel(self.G)
def getNEStats(self): lblEH = snap.TIntIntH() lblNH = snap.TStrIntH() NI = self.G.BegNI() cN = 0 while NI < self.G.EndNI(): NId = NI.GetId() label = self.walkNodeAttributes(NId) if (label not in lblNH): lblNH[label] = 0 lblNH[label] += 1 NI.Next() cN += 1 EI = self.G.BegEI() ECount = 0 while EI < self.G.EndEI(): srcId = EI.GetSrcNId() dstId = EI.GetDstNId() srcLbl = self.walkNodeAttributes(srcId) dstLbl = self.walkNodeAttributes(dstId) EdgeLblId = self.getEdgeLbl(srcLbl, dstLbl) if (EdgeLblId not in lblEH): lblEH[EdgeLblId] = 0 lblEH[EdgeLblId] += 1 ECount += 1 EI.Next() return lblNH,lblEH
def __init__(self, filename, mVals, pVal, tau): self.fileName = fileName self.mVals = mVals self.pVal = pVal self.pVals = [] self.pVals.append(pVal) self.pVals.append(1 - pVal) self.tau = tau self.nLH = snap.TIntStrH() self.lblNH = snap.TStrIntH() # Node count with attached label self.lblEH = snap.TIntIntH() # Edge count with attached src dst labels self.RH = snap.TIntFltPrH() self.BH = snap.TIntFltPrH() self.cRV = snap.TIntV() self.cBV = snap.TIntV() self.G = self.getGraph(snap.PUNGraph) self.NG = snap.TNEANet() self.graphName = self.getGraphName() self.rootDir = self.getParentDir(self.fileName) self.absrootDir = os.path.abspath(self.rootDir) self.cR_count = 0 self.cB_count = 0 self.RH_count = 0 self.BH_count = 0
import snap print "LoadEdgeListStr 1" companyEmployeeGraph = snap.LoadEdgeListStr(snap.PUNGraph, "c.txt", 0, 1) print "LoadEdgeListStr 2" mapping = snap.TStrIntH() companyEmployeeGraph = snap.LoadEdgeListStr(snap.PUNGraph, "c.txt", 0, 1, mapping)
company_name = df_counts.loc[c, 'organization'] #Strip special characters from company_name company_name = re.sub('[^0-9a-zA-Z]+', '', company_name) Graph = snap.PUNGraph.New() #Extract data for company of interest company_info = df_all[df_all['assignee_id'] == c][[ 'patent_id', 'date', 'inventor_id' ]].sort_values(by='patent_id') #Get date of oldest patent oldest_patent = max_date.year - min(company_info['date'].tolist()).year #Get all nodes (inventors) nodes = company_info['inventor_id'].drop_duplicates().tolist() #Get all potential edges (patent info) patents = company_info['patent_id'].drop_duplicates().tolist() #Hash of inventor_id -> node_id (for easier graph manipulation) inventor_id_to_index = snap.TStrIntH() metadata = {} metadata['number_of_patents'] = len(patents) metadata['oldest_patent'] = oldest_patent print metadata #Add all nodes to graph for i in range(0, len(nodes)): Graph.AddNode(i) inventor_id_to_index[nodes[i]] = i #Add all edges to graph for p in patents: #Get small connected component formed by a patent with multiple authors mini_cc = company_info[company_info['patent_id'] == p]['inventor_id'].tolist()