def genRecipFeatures(year, weightF, graphFiles=None, bigraph=None): timing = Timer('Generating recip features for %d %s' % (year, weightF)) if not graphFiles: graphFiles = getGraphFiles(year, weightF) if not bigraph: bigraph = graph_funcs.loadGraph('Data/Bipartite-Graphs/%d.graph' % year) receiptsFromDonor, totalReceipts, totalDonations = \ recip_feature_extractor.getDonationAmounts(bigraph) partialFeatures, fullFeatures = \ recip_feature_extractor.getCategoricalGraphFeatures(bigraph) timing.markEvent('Loaded bigraph, donor amounts, and categorical feature funcs') for gf in graphFiles: donorFeatures = pickler.load('Data/Features/%s.features' % gf) timing.markEvent('Loaded donor features for graph %s' % gf) recipFeatures = recip_feature_extractor.getRecipFeatures( bigraph, donorFeatures, receiptsFromDonor, totalReceipts, totalDonations, partialFeatures, fullFeatures) timing.markEvent('Calculated recip features') recip_feature_extractor.saveFeatures(bigraph, recipFeatures, 'Data/Recip-Features/%s' % gf) timing.markEvent('Saved recip features') timing.finish()
def genDonorFeatures(year, weightF, graphFiles=None, bigraph=None, adjMat=None, newToOldIDs=None): timing = Timer('Generating donor features for %d %s' % (year, weightF)) if not graphFiles: graphFiles = getGraphFiles(year, weightF) if not bigraph: bigraph = graph_funcs.loadGraph('Data/Bipartite-Graphs/%d.graph' % year) if adjMat is None: adjMat = pickler.load('Data/Unipartite-Matrix/%d.%s' % (year, weightF)) adjMat = adjMat.tocsc() if newToOldIDs is None: newToOldIDs = pickler.load('Data/Unipartite-NodeMappings/%d.newToOld' % year) timing.markEvent('Loaded bigraph, adj matrix, and newToOld mapping') for gf in graphFiles: unigraph = graph_funcs.loadGraph('Data/Unipartite-Graphs/%s' % gf, snap.TUNGraph) timing.markEvent('Loaded graph %s' % gf) features = feature_extractor.generateFeatures(year, bigraph, unigraph, newToOldIDs, adjMat) timing.markEvent('Generated features') pickler.save(features, 'Data/Features/%s.features' % gf) timing.markEvent('Saved features') timing.finish()
def getDonationAmounts(graph): timing = Timer('Getting candidate, donor, and cand-donor donation amounts') # A dictionary from rnodeids to dictionaries from cnodeids to floats indicating # the total donations from that donor to that candidate receiptsFromDonor = defaultdict(lambda: defaultdict(int)) # A dictionary from rnodeids to ints indicating the total amount donated to # that candidate. totalReceipts = defaultdict(int) # A dictionary from cnodeids to ints indicating the total amount donated by # that donor. totalDonations = defaultdict(int) # For each donation, note it in the relevant dictionaries for edge in graph.Edges(): donor = edge.GetSrcNId() recip = edge.GetDstNId() amount = graph.GetIntAttrDatE(edge.GetId(), 'amount') receiptsFromDonor[recip][donor] += amount totalReceipts[recip] += amount totalDonations[donor] += amount timing.finish() return receiptsFromDonor, totalReceipts, totalDonations
def set_binary_model(self, model_file_path: str): timer: Timer = Timer() Logger().start_analyzing("Loading binary Word2VecModel") self.model = KeyedVectors.load_word2vec_format(model_file_path, binary=True) Logger().finish_analyzing(timer.get_duration(), "Loading binary Word2VecModel")
def generateFeatures(year, bipartite, unipartite, newToOldIDs, adjMatrix): timing = Timer('generating features for %d' % year) bipartiteFeatures = extractBipartiteFeatures(bipartite) timing.markEvent('Extracted bipartite features.') # rawUnifeatures, componentFeatureFunc, communityFeatureFuncn = extractUnipartiteFeatures(unipartite, adjMatrix) rawUnifeatures, componentFeatureFunc, CNMFeatureFunc = extractUnipartiteFeatures( unipartite, adjMatrix) unipartiteFeatures = convertNewToOldIDs(rawUnifeatures, newToOldIDs) timing.markEvent('Extracted unipartite features.') # append unipartite features to bipartite features for each node, returning combined feature dictionary. # If the donor is not in the unipartite feature graph then we just take the default values (since the # node falls below the unipartite threshold from sqlToGraphs): features = {} for donorNode in graph_funcs.getDonors(bipartite): oldNID = donorNode.GetId() if oldNID in unipartiteFeatures: features[oldNID] = bipartiteFeatures[oldNID] + unipartiteFeatures[ oldNID] else: features[oldNID] = bipartiteFeatures[ oldNID] + defaultUnipartiteFeatures( componentFeatureFunc, CNMFeatureFunc) #, communityFeatureFuncn) timing.finish() return features
def loadRecipients(dbNames, filepath): timing = Timer('loading Recipients table') extractors = [ 0, 7, 8, 10, 12, 13, 14, 15, 16, 22, 23, 39, 46, 47, 61, 62, 63, 64, 65 ] transforms = [ int, str, safeInt, party, str, str, incumb, float, float, int, gender, safeInt, winner, safeFloat, safeFloat, safeFloat, candStatus, int, candOrComm ] observedKeys = set() for db in dbNames: initRecipientTable(db) with open(filepath, 'r') as f: reader = csv.reader(f) reader.next() # skip column headers for i, block in enumerate(generateChunk(reader, extractors, transforms)): newBlock = filterRecipients(block, observedKeys) for db in dbNames: commitRecipBlock(db, newBlock) timing.finish()
def trainAndTestModels(year, extension, X=None, Y=None, k=10, clf=linear_model.LinearRegression(), transF=None, decomp_func=None): timing = Timer('Running regression for %d.%s' % (year, extension)) if X is None or Y is None: X, Y = pickler.load('Data/Recip-Features/%d.%s' % (year, extension)) if transF: Y = transF(Y) timing.markEvent('Loaded X and Y') rsquareds = [] # Train and test the regression model on each k-fold set kf = KFold(len(Y), k) for train, test in kf: X_train, X_test = X[train], X[test] Y_train, Y_test = Y[train], Y[test] if decomp_func: decomp_func.fit(X_train) X_train = decomp_func.transform(X_train) X_test = decomp_func.transform(X_test) clf.fit(X_train, Y_train) rsquareds.append(clf.score(X_test, Y_test)) timing.markEvent('Ran regression') timing.finish() return rsquareds
def __init__(self, path: str, supported_extensions: [int]): self.timer = Timer() self.path = path self.relative_path = PathExtractor().get_relative_path(path) self.supported_extensions = supported_extensions self.file_name = PathExtractor().get_file_name(path) self.extension = PathExtractor().get_file_extension(self.file_name)
def loadContributors(dbNames, filepath): timing = Timer('loading Contributors table') extractors = [0, 1, 2, 3] transforms = [int, indiv, str, safeFloat] for db in dbNames: initContributorsTable(db) reader = csv.reader(open(filepath, 'rb')) reader.next() # skip column headers for i, block in enumerate(generateChunk(reader, extractors, transforms)): for db in dbNames: commitContribBlock(db, block) timing.finish()
def main(): script_name: str = PathExtractor().get_file_name(sys.argv[0]) if len(sys.argv) != 2: Logger().usage(f'python {script_name} <wiki.en.raw.txt>') return file_path = sys.argv[1] if PathValidator().is_valid_files([file_path]): Logger().info(f'Input file: "{file_path}"') Logger().info("Starting to remove stopwords") timer = Timer() remove_stopwords(file_path) Logger().finish_script(timer.get_duration(), script_name)
def main(): script_name: str = PathExtractor().get_file_name(sys.argv[0]) if len(sys.argv) != 2: Logger().usage(f"python {script_name} <wiki.en.filtered.txt>") return file_path = sys.argv[1] if PathValidator().is_valid_files([file_path]): Logger().info(f'Input file: "{file_path}"') Logger().info("Starting to lemmatize text") timer = Timer() lemmatize_text(file_path, timer) Logger().finish_script(timer.get_duration(), script_name)
def getResults(year, weightF, graphFiles=None): timing = Timer('Running regressions for %d %s' % (year, weightF)) results = [] if not graphFiles: graphFiles = getGraphFiles(year, weightF) for gf in graphFiles: X, Y = pickler.load('Data/Recip-Features/%s' % gf) rsquareds = cfscore_predictions.trainAndTestModels(year, weightF, X=X, Y=Y) results.append([weightF, gf, rsquareds]) timing.finish() return results
def main(): script_name: str = PathExtractor().get_file_name(sys.argv[0]) if len(sys.argv) != 2: Logger().usage( f'python {script_name} <en.wiki-latest-pages-articles.xml.bz2>') return file_path: str = sys.argv[1] if PathValidator().is_valid_files([file_path]): Logger().info(f'Input file: "{file_path}"') Logger().info(f'Starting to create wiki corpus from "{file_path}"') timer = Timer() get_corpus(file_path) Logger().finish_script(timer.get_duration(), script_name)
def loadTransactionFile(dbName, csvName, year): timing = Timer('loading Transactions_%d into table' % year) extractors = [0, 1, 2, 3, 4, 5, 13, 27, 28, 29, 33, 34, 36, 37] transforms = [ int, str, str, strToFltToInt, str, strToFltToInt, indiv, str, party, candOrComm, str, str, safeFloat, safeFloat ] initTransactionsTable(dbName) with open(csvName, 'r') as f: reader = csv.reader(f) reader.next() # skip column headers for i, block in enumerate(generateChunk(reader, extractors, transforms)): newBlock = filterTransactions(block) commitTransBlock(dbName, newBlock) timing.finish()
def getSortedMatrixVals(filename): timing = Timer('Gettin sorted matrix vals') adjMat = pickler.load(filename) timing.markEvent('Loaded adjacency matrix') N = adjMat.shape[0] xIndices, yIndices = adjMat.nonzero() timing.markEvent('Loaded nonzero indices') data = adjMat[xIndices, yIndices] timing.markEvent('Loaded nonzero vals') flat = np.ravel(data) timing.markEvent('Flattened data') vals = zip(xIndices, yIndices, flat) timing.markEvent('Zipped values') vals.sort(key=lambda v: v[2], reverse=True) timing.markEvent('Sorted values') print vals[0][2] > vals[1][2] print vals[0][2], vals[1][2] raise ValueError("LOL") return vals, N
def main(): script_name: str = PathExtractor().get_file_name(sys.argv[0]) timer = Timer() if len(sys.argv) == 3: model_path = PathExtractor().get_absolute_path(sys.argv[2]) if not PathValidator().is_valid_files([model_path]): return Word2VecModel.instance.set_model(model_path) if len(sys.argv) < 2 or len(sys.argv) > 3: Logger().usage( f"python {script_name} <file_or_directory_path> [<word2vec.model>]" ) return project_path = PathExtractor().get_absolute_path(sys.argv[1]) if PathValidator().is_valid_paths([project_path]): parse(project_path) Logger().finish_script(timer.get_duration(), script_name)
def calcAverageWeights(graph, adjMat): neighbors = defaultdict(list) timing = Timer('Calculating average weights') # Get all the nodes that a node borders in the graph for edge in graph.Edges(): nodeid1 = edge.GetSrcNId() nodeid2 = edge.GetDstNId() neighbors[nodeid1].append(nodeid2) neighbors[nodeid2].append(nodeid1) timing.markEvent('Gotten all neighbors') # Get the average weight per node connected to weights = {} i = 0 for nodeid in neighbors: rows = neighbors[nodeid] weights[nodeid] = adjMat[rows, nodeid].sum() / float(len(rows)) i += 1 if i % 1000 == 0: timing.markEvent('Done with %d out of %d' % (i, len(neighbors))) return weights
def extractUnipartiteFeatures(unipartiteGraph, adjMat): timing = Timer('extracting unipartite features') features = defaultdict(list) #componentFeatureFunc, communityFeatureFuncn, idToCommunity = getUnipartiteSurfaceFeatures(unipartiteGraph, adjMat, features) componentFeatureFunc, CNMFeatureFunc, idToCNM = getUnipartiteSurfaceFeatures( unipartiteGraph, adjMat, features) timing.markEvent('1. Extracted surface features') # Average weight of edges: avgWeights = calcAverageWeights(unipartiteGraph, adjMat) #totalWeights = {adjMat timing.markEvent('2. Computed average weights.') # Size of connected component: #cnctComponents = calcCnctComponents(unipartiteGraph) timing.markEvent('3. Computed connected components.') # Size of CNM community: communities = calcCommunities(idToCNM) timing.markEvent('4. Computed CNM communities.') # Pagerank: pageRanks = snap.TIntFltH() snap.GetPageRank(unipartiteGraph, pageRanks) timing.markEvent('5. Computed PageRank.') # combine the graph wide features with the existing surface features: for nid in features: features[nid].append(avgWeights[nid]) #features[nid].append(cnctComponents[nid]) features[nid].append(communities[nid]) features[nid].append(pageRanks[nid]) timing.finish() return features, componentFeatureFunc, CNMFeatureFunc
def processYearAndWeight(year, weighting, percents=None, thresholds=None): timing = Timer('Running for year %d and weight %s' % (year, weighting)) adjMatFile = 'Data/Unipartite-Matrix/%d.%s' % (year, weighting) sortedVals, N = getSortedMatrixVals(adjMatFile) timing.markEvent('Got sorted vals') if percents: for p in percents: outfile = 'Data/Unipartite-Graphs/%d.%s_percent_%f.graph' \ % (year, weighting, p) graph = pruneGraphByPercent(sortedVals, N, p) graph_funcs.saveGraph(graph, outfile) timing.markEvent('Finished for %f percent' % p) if thresholds: for t in thresholds: outfile = 'Data/Unipartite-Graphs/%d.%s_threshold_%f.graph' \ % (year, weighting, t) graph = pruneGraphByThreshold(sortedVals, N, t) graph_funcs.saveGraph(graph, outfile) timing.markEvent('Finished for threshold %f' % t) timing.finish()
def getRecipFeatures(graph, donorFeatures, receiptsFromDonor, totalReceipts, totalDonations, partialFeatures, fullFeatures, includeDonorFeatures=False): timing = Timer('Getting recipient features') recipFeatures = {} for recipNode in graph_funcs.getRecipients(graph, cfs=True): rnodeid = recipNode.GetId() # Add a donor feature indicating what percent of this donor's donations # went to this candidate. for donor in receiptsFromDonor[rnodeid]: pct = receiptsFromDonor[rnodeid][donor] / float( totalDonations[donor]) donorFeatures[donor].append(pct) if includeDonorFeatures: recipFeatures[rnodeid] = np.append( getPartialNodeRecipSpecificFeatures(graph, rnodeid), processDonorFeaturesForRecip(donorFeatures, receiptsFromDonor[rnodeid])) else: recipFeatures[rnodeid] = \ processDonorFeaturesForRecip(donorFeatures, receiptsFromDonor[rnodeid]) # Remove the temporarily added feature for what percent of this donor's # donations went to this candidate. for donor in receiptsFromDonor[rnodeid]: donorFeatures[donor].pop() timing.finish() return recipFeatures
def runFullPipeline(year): timing = Timer('Running pipeline for %d' % year) weightings = ('adamic', 'cosine', 'jaccard', 'jaccard2', 'weighted_adamic') bigraph = graph_funcs.loadGraph('Data/Bipartite-Graphs/%d.graph'% year) newToOldIDs = pickler.load('Data/Unipartite-NodeMappings/%d.newToOld' % year) for weightF in weightings: graphFiles = getGraphFiles(year, weightF) adjMat = pickler.load('Data/Unipartite-Matrix/%d.%s' % (year, weightF)) timing.markEvent('Loaded everything for donor features') genDonorFeatures(year, weightF, graphFiles=graphFiles, bigraph=bigraph,\ adjMat=adjMat, newToOldIDs=newToOldIDs) del adjMat # free the incredible amount of memory for the adjacency matrix genRecipFeatures(year, weightF, graphFiles=graphFiles, bigraph=bigraph) results = getResults(year, weightF, graphFiles=graphFiles) pickler.save(results, 'Data/pruning_optimizations.%d.%s' % (year, weightF)) timing.markEvent('Finished with %s' % weightF) timing.finish()
def init(self): self.timer = Timer(3000000).start()
rsquareds.append(clf.score(X_test, Y_test)) timing.markEvent('Ran regression') timing.finish() return rsquareds ################################################################################ # Module command-line behavior # ################################################################################ if __name__ == '__main__': #extensions = ('jaccard', 'jaccard2', 'affinity', 'cosine', 'adamic', 'weighted_adamic', 'baseline') #extensions = ('jaccard2',) extensions = ( 'jaccard2', 'baseline', ) for year in sys.argv[1:]: year = int(year) timing = Timer('Running regressions for %d' % year) for extension in extensions: rsquareds = trainAndTestModels(year, extension) avgRSq = sum(rsquareds) / len(rsquareds) print '%d %s: %f' % (year, extension, avgRSq) with open('Data/Results/%d.%s' % (year, extension), 'w') as f: f.write('K-fold validation results:\n') f.write('Average: %f\n\n' % avgRSq) for i, r in enumerate(rsquareds): f.write('%d: %f\n' % (i, r))
#'Factor Analysis': FactorAnalysis(n_components='mle'), #'ICA': FastICA(n_components='mle'), } clfs = { 'OLS': linear_model.LinearRegression(), 'Random Forest': ensemble.RandomForestRegressor(), } extensions = ('jaccard', 'jaccard2', 'cosine', 'adamic', 'weighted_adamic') results = {} resultsList = [] years = [int(arg) for arg in sys.argv[1:]] timing = Timer('Runnign everything') for year in years: timing.markEvent('Running for year %d' % year) results[year] = {} for extension in extensions: timing.markEvent('Running for extension %s' % extension) results[year][extension] = {} for clfname, clf in clfs.iteritems(): timing.markEvent('Running for classifier %s' % clfname) results[year][extension][clfname] = {} for decompname, decompFunction in decompFunctions.iteritems(): timing.markEvent('Running for decomp function %s' % decompname) rsquareds = cfscore_predictions.trainAndTestModels( year, extension, clf=clf, decomp_func=decompFunction) resultsList.append( (year, extension, clfname, decompname, tuple(rsquareds)))
communities[nid] = 0.0 else: communities[nid] = communityIndex communityIndex += 1 return communities ################################################################################ # Module command-line behavior # ################################################################################ if __name__ == '__main__': for arg in sys.argv[1:]: year = int(arg) timing = Timer('creating unipartite graph for %d' % year) bipartiteGraph = graph_funcs.loadGraph( 'Data/Bipartite-Graphs/%d.graph' % year) unipartiteGraph = graph_funcs.loadGraph( 'Data/Unipartite-Graphs/%d.graph' % year, snap.TUNGraph) newToOldIDs = pickler.load('Data/Unipartite-NodeMappings/%d.newToOld' % year) timing.markEvent('Loaded input graphs/matrices.') #for weightF in ['jaccard', 'affinity', 'jaccard2', 'cosine', 'adamic', 'weighted_adamic']: for weightF in ['jaccard2']: print '******* %s *******' % weightF adjMatrix = pickler.load('Data/Unipartite-Matrix/%d.%s' % (year, weightF)) adjMatrix = adjMatrix.tocsc()
fullFeatures['winner'] = getIntAttrFeatureVec(graph, 'winner', full=True) return partialFeatures, fullFeatures ################################################################################ # Module command-line behavior # ################################################################################ if __name__ == '__main__': #weightings = ('jaccard', 'jaccard2', 'affinity', 'cosine', 'adamic', 'weighted_adamic') #weightings = ('adamic', 'weighted_adamic') weightings = ('jaccard2', ) for year in sys.argv[1:]: year = int(year) timing = Timer('Generating features for %d' % year) graph = graph_funcs.loadGraph('Data/Bipartite-Graphs/%d.graph' % year) receiptsFromDonor, totalReceipts, totalDonations = getDonationAmounts( graph) partialFeatures, fullFeatures = getCategoricalGraphFeatures(graph) baselineFeatures = \ getBaselineFeatures(graph, receiptsFromDonor, totalReceipts, totalDonations, partialFeatures, fullFeatures) saveFeatures(graph, baselineFeatures, 'Data/Recip-Features/%d.baseline' % year) timing.markEvent('Generated baseline features') for weighting in weightings: donorFeatures = pickler.load('Data/Features/%d%s.features' \ % (year, weighting)) recipFeatures = getRecipFeatures(graph, donorFeatures,
def getNonzeroElems(year, weightF): timing = Timer('Loading nonzero elems for year %d and weightf %s ' % (year, weightF)) adjMat = pickler.load('Data/Unipartite-Matrix/%d.%s' % (year, weightF)) timing.finish() return adjMat[adjMat.nonzero()]
def createDonorDonorGraph(year, weightF): timing = Timer('creating donor-donor graph for %d' % year) # Load the old bipartite graph graph bipartiteGraph = graph_funcs.loadGraph('Data/Bipartite-Graphs/%d.graph' % year) # Load the info about each donor and their recipients numDonations, totalAmount, cands, transactions, amounts, totalReceipts = getDonorInfos( bipartiteGraph) timing.markEvent('Got info about donor nodes') # Create initial unipartite graph with just nodes and node attributes unipartiteGraph, oldToNew, newToOld = cloneBipartiteNodes( bipartiteGraph, cands) timing.markEvent('Finished cloning nodes') jaccardData = [] jaccard2Data = [] affinityData = [] cosineData = [] adamicData = [] weightedAdamicData = [] r = [] c = [] # Add the weighted edges for every relevant pair of donor nodes nodesDone = 0 for i, newID1 in enumerate(newToOld.keys()): oldID1 = newToOld[newID1] for newID2 in newToOld.keys()[i + 1:]: oldID2 = newToOld[newID2] sharedCands = cands[oldID1].intersection(cands[oldID2]) if not sharedCands: continue # Calculate the weight weights = weightF(oldID1, oldID2, sharedCands, numDonations, totalAmount, cands, transactions, amounts, totalReceipts) r.append(newID1) r.append(newID2) c.append(newID2) c.append(newID1) jaccardData.append(weights['jaccard']) jaccardData.append(weights['jaccard']) jaccard2Data.append(weights['jaccard2']) jaccard2Data.append(weights['jaccard2']) affinityData.append(weights['affinity']) affinityData.append(weights['affinity']) cosineData.append(weights['cosine']) cosineData.append(weights['cosine']) adamicData.append(weights['adamic']) adamicData.append(weights['adamic']) weightedAdamicData.append(weights['weighted_adamic']) weightedAdamicData.append(weights['weighted_adamic']) # Add the edges between the two nodes and their weights unipartiteGraph.AddEdge(newID1, newID2) nodesDone += 1 if nodesDone % 100 == 0: timing.markEvent('Finished %d outer loops out of %d' % \ (nodesDone, unipartiteGraph.GetNodes())) N = len(newToOld) jaccardAdjMat = sp.csr_matrix((jaccardData, (r, c)), shape=(N, N)) jaccard2AdjMat = sp.csr_matrix((jaccard2Data, (r, c)), shape=(N, N)) affinityAdjMat = sp.csr_matrix((affinityData, (r, c)), shape=(N, N)) cosineAdjMat = sp.csr_matrix((cosineData, (r, c)), shape=(N, N)) adamicAdjMat = sp.csr_matrix((adamicData, (r, c)), shape=(N, N)) weightedAdamicAdjMat = sp.csr_matrix((weightedAdamicData, (r, c)), shape=(N, N)) timing.finish() return unipartiteGraph, jaccardAdjMat, jaccard2AdjMat, affinityAdjMat, cosineAdjMat, adamicAdjMat, weightedAdamicAdjMat, newToOld, oldToNew
def getCorrel(year, weightFs): timing = Timer('Getting correlation matrix for year %d' % year) append = lambda x, y: np.append(x, y, axis=0) data = reduce(append, [getNonzeroElems(year, weightF) for weightF in weightFs]) timing.finish() return np.corrcoef(data)
# Weighted Adamic Adar Similarity Index: (http://www.slideshare.net/hajimesasaki1/picmet15sasaki20150805ppt) # <On slide 8> def weightedAdamic(id1, id2, sharedCands, numDonations, totalAmount, cands, transactions, amounts, totalReceipts): score = sum([(amounts[id1][cand] + amounts[id2][cand]) / (1.0 + math.log(totalReceipts[cand], 10)) for cand in sharedCands]) return 'weighted_adamic', score ################################################################################ # Module command-line behavior # ################################################################################ if __name__ == '__main__': overallTiming = Timer('all unipartite graphs') for arg in sys.argv[1:]: year = int(arg) timing = Timer('Creating unipartite graph for %d' % year) graph, wmat1, wmat2, wmat3, wmat4, wmat5, wmat6, newToOld, oldToNew = createDonorDonorGraph( year, getWeightScores) # Save the SNAP graph: outfile = 'Data/Unipartite-Graphs/%d.graph' % year graph_funcs.saveGraph(graph, outfile) # Save the weight matrices: matrixPrefix = 'Data/Unipartite-Matrix/%d' % year pickler.save(wmat1, matrixPrefix + '.jaccard') pickler.save(wmat2, matrixPrefix + '.jaccard2')