def easy_file2ds_string_list(file,date="DATE",validators="VALIDATOR"): ds_string_list="" er = EasyReader() chunksdeps = er.read_xml(file) ds_string_list={} for i in chunksdeps.keys(): sentence = "" dg = chunksdeps[i]['deps'] ds_string_list[i] = "sentence(\n" ds_string_list[i] += "id("+str(i)+")\n" ds_string_list[i] += "date("+date+")\n" ds_string_list[i] += "validators("+validators+")\n" ds_string_list[i] += "sentence_form(" for t in chunksdeps[i]['chunks'].tokens.keys(): token = chunksdeps[i]['chunks'].tokens[t] sentence += token.forme + " " ds_string_list[i] += sentence ds_string_list[i] += ")\n" ds_string_list[i] += "surf_deps(\n" ds_string_list[i] += dg.triples2string(True) ds_string_list[i] += ")\n" ds_string_list[i] += "features(\n" ds_string_list[i] += ")\n" ds_string_list[i] += ")\n" print ds_string_list[i]
def easy_file2dgraph_list(file,add=None): dgraph_list={} er = EasyReader() chunksdeps = er.read_xml(file) for i in chunksdeps.keys(): dgraph_list[i] = chunksdeps[i]['deps'] if add: dgraph_list = normalize_dgraph_list(dgraph_list,add) return dgraph_list
else: print "\n# "+dossier_easy+" nest pas un dossier!" sys.exit() else: sys.exit() if dossier_txt: if os.path.isdir(dossier_txt): pass else: print "\n# "+ dossier_txt +" nexiste pas: il sera cree!" os.mkdir(dossier_txt) else: sys.exit() reader = EasyReader() print "Lecture du dossier easy:" sentences_per_file = reader.get_sentences_dir(dossier_easy) print "Transformation en .txt" # Ecriture des fichiers texte correspondant. for file in sentences_per_file.keys(): filetxt = re.search("(.*?)\.xml$",file).group(1) + ".txt" filename = os.path.join(dossier_txt,filetxt) fichier = open(filename, 'w') # marie : désolée ça passe pas chez moi... try: fichier.write(sentences_per_file[file]) except UnicodeEncodeError: fichier.write(sentences_per_file[file].encode('iso-8859-15'))
######## # MAIN # ######## print "--------------------------------------------------------------" print "EVAL_ENONCE\t=\t"+str(eval_enonce) print "EVAL_CAT-ENONCE\t=\t"+str(eval_cat_enonce) print "EVAL_CAT\t=\t"+str(eval_cat) print "EVAL_CONFUSION\t=\t"+str(eval_confusion) print "--------------------------------------------------------------" if reference <> None: if os.path.isdir(reference): print "Parsing du dossier reference: "+reference easyReader_reference = EasyReader() chunksdeps_per_file_ref = easyReader_reference.read_dir_xml(reference) pass elif os.path.isfile(reference): print "Parsing du fichier reference: "+reference reference_file = re.search("/?([^/]+)$",reference).group(1) easyReader_reference = EasyReader() chunksdeps_per_file_ref = {} chunksdeps_per_file_ref[reference_file] = easyReader_reference.read_xml(reference) pass else: print "\n# "+reference+" nest pas un dossier!" sys.exit() else: sys.exit()