Esempio n. 1
0
 def outputGraph(self, outf, compute_all_edges=False):
     """
     For debugging purposes:
     outputs the similarity/relevance graph to a file.
     by default, only outputs the spanning graph for each connected component
     if compute_all_edges is true, we recompute all the similarities and output the full graph
     """
     print('nodedef>name VARCHAR,label VARCHAR,pid VARCHAR,visibility VARCHAR,relevance FLOAT,relevant VARCHAR', file=outf)
     for (x,v) in self.authors.items():
         visibility = v.paper.visibility
         if v.paper.year <= 2012:
             visibility = 'NOT_LABELLED'
         print(nocomma([x,unidecode(v.paper.title), v.paper.id,
             visibility, self.relevance.get(x,None),self.relevance.get(x,-1.)>0.]), file=outf)
     print('edgedef>node1 VARCHAR,node2 VARCHAR', file=outf)
     if not compute_all_edges:
         for (x,y) in self.edge_set:
             if y != None:
                 print(nocomma([x,y]), file=outf)
     else:
         author_pks = list(self.authors.keys())
         for i in range(len(author_pks)):
             for j in range(i):
                 x = author_pks[i]
                 y = author_pks[j]
                 if self.classify(x, y):
                     print(nocomma([x,y]), file=outf)
Esempio n. 2
0
 def outputGraph(self, author_set, outfile, logf):
     print('nodedef>name VARCHAR,label VARCHAR,pid VARCHAR,visibility VARCHAR', file=outfile)
     author_data = []
     for author in author_set:
         paper = author.paper
         visibility = paper.visibility
         if paper.year <= 2012:
             visibility = 'NOT_LABELLED'
         author_data.append(self.lstData(author))
         print(nocomma([author.pk, unidecode(paper.title), paper.id, visibility]), file=outfile)
     print('edgedef>node1 VARCHAR,node2 VARCHAR', file=outfile)
     authors = list(author_set)
     for i in range(len(authors)):
         for j in range(i):
             output = self.classifyData(author_data[i],author_data[j])
             print(str(authors[i].pk)+"-"+str(authors[j].pk)+"\t"+str(output), file=logf)
             if output: 
                 print(nocomma([authors[i].pk,authors[j].pk]), file=outfile)
Esempio n. 3
0
 def test_nocomma(self):
     self.assertEqual(nocomma(['a','b','cd']), 'a,b,cd')
     self.assertEqual(nocomma(['a,','b']), 'a,b')
     self.assertEqual(nocomma(['abc','','\n','def']), 'abc, , ,def')