Esempio n. 1
0
 def test_purge(self):
     # adding graph pairs with identical graphbanks
     pgc1 = ParallelGraphCorpus(inf="data/corpus-1.pgc")
     pgc1 += pgc1
     graphbanks_before = pgc1._graphbanks()
     self.assertEqual(len(graphbanks_before), 2)
     pgc1.purge()
     graphbanks_after = pgc1._graphbanks()        
     self.assertEqual(graphbanks_before, graphbanks_after)
     
     # adding graph pairs with equal graphbanks
     pgc1 = ParallelGraphCorpus(inf="data/corpus-1.pgc")
     pgc2 = ParallelGraphCorpus(inf="data/corpus-1.pgc")
     pgc1 += pgc2
     graphbanks_before = pgc1._graphbanks()
     self.assertEqual(len(graphbanks_before), 4)
     pgc1.purge()
     graphbanks_after = pgc1._graphbanks()        
     self.assertEqual(len(graphbanks_after), 2)
     
     # adding graph pairs with different graphbanks
     pgc1 = ParallelGraphCorpus(inf="data/corpus-1.pgc")
     pgc2 = ParallelGraphCorpus(inf="data/corpus-2.pgc")
     pgc1 += pgc2
     graphbanks_before = pgc1._graphbanks()
     self.assertEqual(len(graphbanks_before), 4)
     pgc1.purge()
     graphbanks_after = pgc1._graphbanks()        
     self.assertEqual(graphbanks_before, graphbanks_after)
     
     # removing graphpairs and thus dependencies on graphbanks
     del pgc1[:]
     graphbanks = pgc1._graphbanks()
     self.assertEqual(len(graphbanks), 0)
Esempio n. 2
0
    )


args = parser.parse_args()

pgc_fns = multiglob(args.file)

def log(s):
    if args.verbose:
        print >>sys.stderr, "***", s
        
        
log("Reading corpus from " + pgc_fns[0])
        
corpus = ParallelGraphCorpus(inf=pgc_fns[0])

for fn in pgc_fns[1:]:
    log("Joining corpus from " + fn)
    # __iadd__ also checks if another corpus is compatible w.r.t. relations
    # and meta-data
    corpus += ParallelGraphCorpus(inf=fn)
    

# Purge the corpus of duplicate graphbanks held in memory    
log("Purging corpus")    
corpus.purge()

log("Writing corpus")
corpus.write(pprint=args.format)

Esempio n. 3
0
                    "--verbose",
                    action="store_true",
                    help="verbose ouput to stderr")

args = parser.parse_args()

pgc_fns = multiglob(args.file)


def log(s):
    if args.verbose:
        print >> sys.stderr, "***", s


log("Reading corpus from " + pgc_fns[0])

corpus = ParallelGraphCorpus(inf=pgc_fns[0])

for fn in pgc_fns[1:]:
    log("Joining corpus from " + fn)
    # __iadd__ also checks if another corpus is compatible w.r.t. relations
    # and meta-data
    corpus += ParallelGraphCorpus(inf=fn)

# Purge the corpus of duplicate graphbanks held in memory
log("Purging corpus")
corpus.purge()

log("Writing corpus")
corpus.write(pprint=args.format)