Ejemplo n.º 1
0
def genDonorFeatures(year, weightF, graphFiles=None, bigraph=None, adjMat=None, newToOldIDs=None):
    timing = Timer('Generating donor features for %d %s' % (year, weightF))

    if not graphFiles:
        graphFiles = getGraphFiles(year, weightF)
    if not bigraph:
        bigraph = graph_funcs.loadGraph('Data/Bipartite-Graphs/%d.graph' % year)
    if adjMat is None:
        adjMat = pickler.load('Data/Unipartite-Matrix/%d.%s' % (year, weightF))
        adjMat = adjMat.tocsc()
    if newToOldIDs is None:
        newToOldIDs = pickler.load('Data/Unipartite-NodeMappings/%d.newToOld' % year)
    timing.markEvent('Loaded bigraph, adj matrix, and newToOld mapping')

    for gf in graphFiles:
        unigraph = graph_funcs.loadGraph('Data/Unipartite-Graphs/%s' % gf, snap.TUNGraph)
        timing.markEvent('Loaded graph %s' % gf)

        features = feature_extractor.generateFeatures(year, bigraph, unigraph, newToOldIDs, adjMat)
        timing.markEvent('Generated features')

        pickler.save(features, 'Data/Features/%s.features' % gf)
        timing.markEvent('Saved features')

    timing.finish()
Ejemplo n.º 2
0
def genRecipFeatures(year, weightF, graphFiles=None, bigraph=None):
    timing = Timer('Generating recip features for %d %s' % (year, weightF))

    if not graphFiles: graphFiles = getGraphFiles(year, weightF)
    if not bigraph: bigraph = graph_funcs.loadGraph('Data/Bipartite-Graphs/%d.graph' % year)

    receiptsFromDonor, totalReceipts, totalDonations = \
            recip_feature_extractor.getDonationAmounts(bigraph)
    partialFeatures, fullFeatures = \
            recip_feature_extractor.getCategoricalGraphFeatures(bigraph)

    timing.markEvent('Loaded bigraph, donor amounts, and categorical feature funcs')

    for gf in graphFiles:
        donorFeatures = pickler.load('Data/Features/%s.features' % gf)
        timing.markEvent('Loaded donor features for graph %s' % gf)

        recipFeatures = recip_feature_extractor.getRecipFeatures(
                bigraph, donorFeatures, receiptsFromDonor, totalReceipts,
                totalDonations, partialFeatures, fullFeatures)
        timing.markEvent('Calculated recip features')

        recip_feature_extractor.saveFeatures(bigraph, recipFeatures, 'Data/Recip-Features/%s' % gf)
        timing.markEvent('Saved recip features')

    timing.finish()
Ejemplo n.º 3
0
def runFullPipeline(year):
    timing = Timer('Running pipeline for %d' % year)

    weightings = ('adamic', 'cosine', 'jaccard', 'jaccard2', 'weighted_adamic')
    bigraph = graph_funcs.loadGraph('Data/Bipartite-Graphs/%d.graph'% year)
    newToOldIDs = pickler.load('Data/Unipartite-NodeMappings/%d.newToOld' % year)

    for weightF in weightings:

        graphFiles = getGraphFiles(year, weightF)

        adjMat = pickler.load('Data/Unipartite-Matrix/%d.%s' % (year, weightF))
        timing.markEvent('Loaded everything for donor features')
        genDonorFeatures(year, weightF, graphFiles=graphFiles, bigraph=bigraph,\
                adjMat=adjMat, newToOldIDs=newToOldIDs)
        del adjMat # free the incredible amount of memory for the adjacency matrix


        genRecipFeatures(year, weightF, graphFiles=graphFiles, bigraph=bigraph)
        results = getResults(year, weightF, graphFiles=graphFiles)
        pickler.save(results, 'Data/pruning_optimizations.%d.%s' % (year, weightF))
        timing.markEvent('Finished with %s' % weightF)

    timing.finish()
Ejemplo n.º 4
0
def createDonorDonorGraph(year, weightF):
    timing = Timer('creating donor-donor graph for %d' % year)

    # Load the old bipartite graph graph
    bipartiteGraph = graph_funcs.loadGraph('Data/Bipartite-Graphs/%d.graph' %
                                           year)

    # Load the info about each donor and their recipients
    numDonations, totalAmount, cands, transactions, amounts, totalReceipts = getDonorInfos(
        bipartiteGraph)
    timing.markEvent('Got info about donor nodes')

    # Create initial unipartite graph with just nodes and node attributes
    unipartiteGraph, oldToNew, newToOld = cloneBipartiteNodes(
        bipartiteGraph, cands)
    timing.markEvent('Finished cloning nodes')

    jaccardData = []
    jaccard2Data = []
    affinityData = []
    cosineData = []
    adamicData = []
    weightedAdamicData = []
    r = []
    c = []

    # Add the weighted edges for every relevant pair of donor nodes
    nodesDone = 0

    for i, newID1 in enumerate(newToOld.keys()):
        oldID1 = newToOld[newID1]
        for newID2 in newToOld.keys()[i + 1:]:
            oldID2 = newToOld[newID2]

            sharedCands = cands[oldID1].intersection(cands[oldID2])
            if not sharedCands: continue

            # Calculate the weight
            weights = weightF(oldID1, oldID2, sharedCands, numDonations,
                              totalAmount, cands, transactions, amounts,
                              totalReceipts)

            r.append(newID1)
            r.append(newID2)
            c.append(newID2)
            c.append(newID1)
            jaccardData.append(weights['jaccard'])
            jaccardData.append(weights['jaccard'])
            jaccard2Data.append(weights['jaccard2'])
            jaccard2Data.append(weights['jaccard2'])
            affinityData.append(weights['affinity'])
            affinityData.append(weights['affinity'])
            cosineData.append(weights['cosine'])
            cosineData.append(weights['cosine'])
            adamicData.append(weights['adamic'])
            adamicData.append(weights['adamic'])
            weightedAdamicData.append(weights['weighted_adamic'])
            weightedAdamicData.append(weights['weighted_adamic'])

            # Add the edges between the two nodes and their weights
            unipartiteGraph.AddEdge(newID1, newID2)

        nodesDone += 1
        if nodesDone % 100 == 0:
            timing.markEvent('Finished %d outer loops out of %d' % \
                    (nodesDone, unipartiteGraph.GetNodes()))

    N = len(newToOld)
    jaccardAdjMat = sp.csr_matrix((jaccardData, (r, c)), shape=(N, N))
    jaccard2AdjMat = sp.csr_matrix((jaccard2Data, (r, c)), shape=(N, N))
    affinityAdjMat = sp.csr_matrix((affinityData, (r, c)), shape=(N, N))
    cosineAdjMat = sp.csr_matrix((cosineData, (r, c)), shape=(N, N))
    adamicAdjMat = sp.csr_matrix((adamicData, (r, c)), shape=(N, N))
    weightedAdamicAdjMat = sp.csr_matrix((weightedAdamicData, (r, c)),
                                         shape=(N, N))

    timing.finish()
    return unipartiteGraph, jaccardAdjMat, jaccard2AdjMat, affinityAdjMat, cosineAdjMat, adamicAdjMat, weightedAdamicAdjMat, newToOld, oldToNew
Ejemplo n.º 5
0
    return partialFeatures, fullFeatures


################################################################################
# Module command-line behavior #
################################################################################

if __name__ == '__main__':
    #weightings = ('jaccard', 'jaccard2', 'affinity', 'cosine', 'adamic', 'weighted_adamic')
    #weightings = ('adamic', 'weighted_adamic')
    weightings = ('jaccard2', )
    for year in sys.argv[1:]:
        year = int(year)
        timing = Timer('Generating features for %d' % year)
        graph = graph_funcs.loadGraph('Data/Bipartite-Graphs/%d.graph' % year)
        receiptsFromDonor, totalReceipts, totalDonations = getDonationAmounts(
            graph)
        partialFeatures, fullFeatures = getCategoricalGraphFeatures(graph)

        baselineFeatures = \
            getBaselineFeatures(graph, receiptsFromDonor, totalReceipts, totalDonations, partialFeatures, fullFeatures)
        saveFeatures(graph, baselineFeatures,
                     'Data/Recip-Features/%d.baseline' % year)
        timing.markEvent('Generated baseline features')

        for weighting in weightings:
            donorFeatures = pickler.load('Data/Features/%d%s.features' \
                    % (year, weighting))
            recipFeatures = getRecipFeatures(graph, donorFeatures,
                                             receiptsFromDonor, totalReceipts,
Ejemplo n.º 6
0
                communities[nid] = communityIndex
        communityIndex += 1

    return communities


################################################################################
# Module command-line behavior #
################################################################################

if __name__ == '__main__':
    for arg in sys.argv[1:]:
        year = int(arg)
        timing = Timer('creating unipartite graph for %d' % year)

        bipartiteGraph = graph_funcs.loadGraph(
            'Data/Bipartite-Graphs/%d.graph' % year)
        unipartiteGraph = graph_funcs.loadGraph(
            'Data/Unipartite-Graphs/%d.graph' % year, snap.TUNGraph)
        newToOldIDs = pickler.load('Data/Unipartite-NodeMappings/%d.newToOld' %
                                   year)
        timing.markEvent('Loaded input graphs/matrices.')

        #for weightF in ['jaccard', 'affinity', 'jaccard2', 'cosine', 'adamic', 'weighted_adamic']:
        for weightF in ['jaccard2']:
            print '******* %s *******' % weightF
            adjMatrix = pickler.load('Data/Unipartite-Matrix/%d.%s' %
                                     (year, weightF))
            adjMatrix = adjMatrix.tocsc()

            features = generateFeatures(year, bipartiteGraph, unipartiteGraph,
                                        newToOldIDs, adjMatrix)