def outputGraph(self, outf, compute_all_edges=False): """ For debugging purposes: outputs the similarity/relevance graph to a file. by default, only outputs the spanning graph for each connected component if compute_all_edges is true, we recompute all the similarities and output the full graph """ print('nodedef>name VARCHAR,label VARCHAR,pid VARCHAR,visibility VARCHAR,relevance FLOAT,relevant VARCHAR', file=outf) for (x,v) in self.authors.items(): visibility = v.paper.visibility if v.paper.year <= 2012: visibility = 'NOT_LABELLED' print(nocomma([x,unidecode(v.paper.title), v.paper.id, visibility, self.relevance.get(x,None),self.relevance.get(x,-1.)>0.]), file=outf) print('edgedef>node1 VARCHAR,node2 VARCHAR', file=outf) if not compute_all_edges: for (x,y) in self.edge_set: if y != None: print(nocomma([x,y]), file=outf) else: author_pks = list(self.authors.keys()) for i in range(len(author_pks)): for j in range(i): x = author_pks[i] y = author_pks[j] if self.classify(x, y): print(nocomma([x,y]), file=outf)
def outputGraph(self, author_set, outfile, logf): print('nodedef>name VARCHAR,label VARCHAR,pid VARCHAR,visibility VARCHAR', file=outfile) author_data = [] for author in author_set: paper = author.paper visibility = paper.visibility if paper.year <= 2012: visibility = 'NOT_LABELLED' author_data.append(self.lstData(author)) print(nocomma([author.pk, unidecode(paper.title), paper.id, visibility]), file=outfile) print('edgedef>node1 VARCHAR,node2 VARCHAR', file=outfile) authors = list(author_set) for i in range(len(authors)): for j in range(i): output = self.classifyData(author_data[i],author_data[j]) print(str(authors[i].pk)+"-"+str(authors[j].pk)+"\t"+str(output), file=logf) if output: print(nocomma([authors[i].pk,authors[j].pk]), file=outfile)
def test_nocomma(self): self.assertEqual(nocomma(['a','b','cd']), 'a,b,cd') self.assertEqual(nocomma(['a,','b']), 'a,b') self.assertEqual(nocomma(['abc','','\n','def']), 'abc, , ,def')