Esempio n. 1
0
def pgc_zip(zip_filename, pgc_filenames):
    zip_arch = zipfile.ZipFile(zip_filename, "w")
    arch_dir = os.path.splitext( os.path.basename(zip_filename))[0]
    
    for corpus_filename in multiglob(pgc_filenames):
        # add corpus to archive
        arch_filename = os.path.join( arch_dir,
                                      os.path.basename(corpus_filename) )
        zip_arch.write(corpus_filename, arch_filename)
        
        corpus = ParallelGraphCorpus(inf=corpus_filename,
                                     graph_loading=LOAD_NONE)

        for gb in corpus._graphbanks():
            gb_filename = gb.get_file_path()
            # add graphbank files to archive
            arch_filename = os.path.join( arch_dir,
                                          os.path.basename(gb_filename) )
            zip_arch.write(gb_filename, arch_filename)
            
    zip_arch.close()
Esempio n. 2
0
def pgc_zip(zip_filename, pgc_filenames):
    zip_arch = zipfile.ZipFile(zip_filename, "w")
    arch_dir = os.path.splitext(os.path.basename(zip_filename))[0]

    for corpus_filename in multiglob(pgc_filenames):
        # add corpus to archive
        arch_filename = os.path.join(arch_dir,
                                     os.path.basename(corpus_filename))
        zip_arch.write(corpus_filename, arch_filename)

        corpus = ParallelGraphCorpus(inf=corpus_filename,
                                     graph_loading=LOAD_NONE)

        for gb in corpus._graphbanks():
            gb_filename = gb.get_file_path()
            # add graphbank files to archive
            arch_filename = os.path.join(arch_dir,
                                         os.path.basename(gb_filename))
            zip_arch.write(gb_filename, arch_filename)

    zip_arch.close()
Esempio n. 3
0
parser.add_argument(
    "-f", "--format",
    action="store_true",
    help="output indented XML"
    )

parser.add_argument(
    "-V", "--verbose",
    action="store_true",
    help="verbose ouput to stderr"
    )


args = parser.parse_args()

pgc_fns = multiglob(args.file)

def log(s):
    if args.verbose:
        print >>sys.stderr, "***", s
        
        
log("Reading corpus from " + pgc_fns[0])
        
corpus = ParallelGraphCorpus(inf=pgc_fns[0])

for fn in pgc_fns[1:]:
    log("Joining corpus from " + fn)
    # __iadd__ also checks if another corpus is compatible w.r.t. relations
    # and meta-data
    corpus += ParallelGraphCorpus(inf=fn)
Esempio n. 4
0
                    "--config",
                    metavar="FILE",
                    help="configuration file to set up a corpus aligner")

parser.add_argument("-x",
                    "--clear",
                    action="store_true",
                    help="remove all existing alignments")

parser.add_argument("-i",
                    "--in-place",
                    action="store_true",
                    help="modify input file(s)")

args = parser.parse_args()

if args.config:
    config = imp.load_source("config", args.config)
    corpus_aligner = set_up_corpus_aligner(config)
else:
    from daeso_nl.ga.corpus import CorpusAligner
    corpus_aligner = CorpusAligner()

for inf in multiglob(args.pgc_files):
    corpus = ParallelGraphCorpus(inf=inf)
    corpus_aligner.align(corpus, clear=args.clear)

    if args.in_place:
        corpus.write(outf=inf, pprint=True)
    else:
        corpus.write(pprint=True)
Esempio n. 5
0
parser.add_argument(
    "-x", "--clear",
    action="store_true",
    help="remove all existing alignments"
    )

parser.add_argument(
    "-i", "--in-place",
    action="store_true",
    help="modify input file(s)"
    )

args = parser.parse_args()


if args.config:
    config = imp.load_source("config", args.config)
    corpus_aligner = set_up_corpus_aligner(config)
else:
    from daeso_nl.ga.corpus import CorpusAligner
    corpus_aligner = CorpusAligner()
    

for inf in multiglob(args.pgc_files):
    corpus = ParallelGraphCorpus(inf=inf)
    corpus_aligner.align(corpus, clear=args.clear)
    
    if args.in_place:
        corpus.write(outf=inf, pprint=True)
    else:
        corpus.write(pprint=True)
Esempio n. 6
0
                    help="parallel graph corpus filename, "
                    "or quoted file name pattern for parallel graph corpora")

parser.add_argument("-f",
                    "--format",
                    action="store_true",
                    help="output indented XML")

parser.add_argument("-V",
                    "--verbose",
                    action="store_true",
                    help="verbose ouput to stderr")

args = parser.parse_args()

pgc_fns = multiglob(args.file)


def log(s):
    if args.verbose:
        print >> sys.stderr, "***", s


log("Reading corpus from " + pgc_fns[0])

corpus = ParallelGraphCorpus(inf=pgc_fns[0])

for fn in pgc_fns[1:]:
    log("Joining corpus from " + fn)
    # __iadd__ also checks if another corpus is compatible w.r.t. relations
    # and meta-data