def write_annotation_file(anno_filename, doc): """ Write a GlozzDocument to XML in the given path """ glozz.write_annotation_file(anno_filename, doc, settings=STAC_OUTPUT_SETTINGS)
def write_glozz(gdoc, path_stub): ac_path = path_stub + '.ac' aa_path = path_stub + '.aa' gdoc_bytes = gdoc.text().encode('utf-8') with open(ac_path, 'wb') as ac_f: ac_f.write(gdoc_bytes) gdoc.hashcode = glozz.hashcode(StringIO.StringIO(gdoc_bytes)) glozz.write_annotation_file(aa_path, gdoc)
def save_document(output_dir, k, doc): """ Save a document as a Glozz .ac/.aa pair """ stub = output_path_stub(output_dir, k) mk_parent_dirs(stub) doc_bytes = doc.text().encode('utf-8') is_unannotated = k.stage == 'unannotated' # .aa file settings = stac_unannotated_output_settings\ if is_unannotated else stac_output_settings out_doc = copy.copy(doc) out_doc.hashcode = glozz.hashcode(BytesIO(doc_bytes)) glozz.write_annotation_file(stub + ".aa", out_doc, settings=settings) # .ac file if is_unannotated: with open(stub + ".ac", 'wb') as fout: fout.write(doc_bytes)
def main(): #ligne de commande : python nonling_annotations-v2.py ../../data/pilotnonling/test/ def to_annotate(fileId): stage = fileId.stage return stage == 'units' or stage == 'discourse' parser = argparse.ArgumentParser() parser.add_argument('Directory', help = 'directory where the files to annotate are') args = parser.parse_args() Directory = args.Directory reader = STAC.Reader(Directory) subset = reader.filter(reader.files(), lambda k: to_annotate(k)) corpus= reader.slurp(subset, verbose=True) for key in corpus.keys(): doc = corpus[key] data = str(key).split(' ') game = data[0] part = data[1][1:-1] #the integer that interests us is between brackets stage = data[2] metal = data[3] path = Directory + game + '/' + stage + '/' + metal + '/' print(game, part, stage, metal) if stage == 'units': newdoc = add_units_annotations(doc) GLOZZ.write_annotation_file(path + game + '_' + part + '.aa', newdoc) continue elif stage == 'discourse': newdoc = add_discourse_annotations(doc) GLOZZ.write_annotation_file(path + game + '_' + part + '.aa', newdoc) continue else: raise Exception("main : you shouldn't be here!") continue
def write_annotation_file(anno_filename, doc): """ Write a GlozzDocument to XML in the given path """ glozz.write_annotation_file(anno_filename, doc, settings=stac_output_settings)