Exemple #1
0
    def __init__(self, fileName):
        self.fileName = fileName
        self.rootDir = self.getRootDir()
        self.attrVal1 = attrVal1
        self.attrVal2 = attrVal2

        self.graphName = self.getGraphName()

        self.lblV = snap.TStrV()
        self.lblV.Add(attrVal1)
        self.lblV.Add(attrVal2)

        self.lblNH = snap.TStrIntH()  # Node count with attached label

        self.lblEH = snap.TIntIntH()  # Edge count with attached src dst labels

        #self.G = self.getGraph(snap.PUNGraph)
        self.G = self.getLblGraph()
        self.saveGraph()

        ufileName = os.path.abspath(self.rootDir + "/" + self.graphName +
                                    ".txt")
        self.snapG = snap.LoadEdgeList(snap.PUNGraph, ufileName)
        self.nxG = nx.read_edgelist(ufileName)

        self.gNAH = self.getNodeIdLabel(self.G)
    def getNEStats(self):
        lblEH = snap.TIntIntH()
        lblNH = snap.TStrIntH()

        NI = self.G.BegNI()
        cN = 0
        while NI < self.G.EndNI():
            NId = NI.GetId()
            label = self.walkNodeAttributes(NId)
            if (label not in lblNH):
                lblNH[label] = 0
            lblNH[label] += 1
            NI.Next()
            cN += 1


        EI = self.G.BegEI()
        ECount = 0
        while EI < self.G.EndEI():
            srcId = EI.GetSrcNId()
            dstId = EI.GetDstNId()

            srcLbl = self.walkNodeAttributes(srcId)
            dstLbl = self.walkNodeAttributes(dstId)

            EdgeLblId = self.getEdgeLbl(srcLbl, dstLbl)

            if (EdgeLblId not in lblEH):
                lblEH[EdgeLblId] = 0

            lblEH[EdgeLblId] += 1

            ECount += 1
            EI.Next()
        return lblNH,lblEH
    def __init__(self, filename, mVals, pVal, tau):
        self.fileName = fileName
        self.mVals = mVals
        self.pVal = pVal
        self.pVals = []
        self.pVals.append(pVal)
        self.pVals.append(1 - pVal)
        self.tau = tau
        self.nLH = snap.TIntStrH()
        self.lblNH = snap.TStrIntH()  # Node count with attached label
        self.lblEH = snap.TIntIntH()  # Edge count with attached src dst labels

        self.RH = snap.TIntFltPrH()
        self.BH = snap.TIntFltPrH()

        self.cRV = snap.TIntV()
        self.cBV = snap.TIntV()

        self.G = self.getGraph(snap.PUNGraph)
        self.NG = snap.TNEANet()
        self.graphName = self.getGraphName()
        self.rootDir = self.getParentDir(self.fileName)
        self.absrootDir = os.path.abspath(self.rootDir)

        self.cR_count = 0
        self.cB_count = 0

        self.RH_count = 0
        self.BH_count = 0
Exemple #4
0
import snap

print "LoadEdgeListStr 1"
companyEmployeeGraph = snap.LoadEdgeListStr(snap.PUNGraph, "c.txt", 0, 1)

print "LoadEdgeListStr 2"
mapping = snap.TStrIntH()
companyEmployeeGraph = snap.LoadEdgeListStr(snap.PUNGraph, "c.txt", 0, 1,
                                            mapping)
    company_name = df_counts.loc[c, 'organization']
    #Strip special characters from company_name
    company_name = re.sub('[^0-9a-zA-Z]+', '', company_name)
    Graph = snap.PUNGraph.New()
    #Extract data for company of interest
    company_info = df_all[df_all['assignee_id'] == c][[
        'patent_id', 'date', 'inventor_id'
    ]].sort_values(by='patent_id')
    #Get date of oldest patent
    oldest_patent = max_date.year - min(company_info['date'].tolist()).year
    #Get all nodes (inventors)
    nodes = company_info['inventor_id'].drop_duplicates().tolist()
    #Get all potential edges (patent info)
    patents = company_info['patent_id'].drop_duplicates().tolist()
    #Hash of inventor_id -> node_id (for easier graph manipulation)
    inventor_id_to_index = snap.TStrIntH()

    metadata = {}
    metadata['number_of_patents'] = len(patents)
    metadata['oldest_patent'] = oldest_patent
    print metadata

    #Add all nodes to graph
    for i in range(0, len(nodes)):
        Graph.AddNode(i)
        inventor_id_to_index[nodes[i]] = i
    #Add all edges to graph
    for p in patents:
        #Get small connected component formed by a patent with multiple authors
        mini_cc = company_info[company_info['patent_id'] ==
                               p]['inventor_id'].tolist()