def export_wnsql_synsets(args): print( "Exporting synsets' info (lemmas/defs/examples) from WordnetSQL (Princeton Wordnet 3.0) to text file" ) show_info(args) output_with_sid_file = os.path.abspath('./data/wn30_lemmas.txt') output_without_sid_file = os.path.abspath('./data/wn30_lemmas_noss.txt') output_defs = os.path.abspath('./data/wn30_defs.txt') output_exes = os.path.abspath('./data/wn30_exes.txt') wn = get_wn(args) # Extract lemmas records = wn.get_all_synsets() synsets_lemmas = [] for r in records: synsets_lemmas.append( (SynsetID.from_string(str(r.synsetid)).to_canonical(), r.lemma)) synsets_lemmas.sort(key=itemgetter(0, 1)) with open(output_with_sid_file, 'w') as with_sid, open(output_without_sid_file, 'w') as without_sid: for row in synsets_lemmas: with_sid.write('%s\t%s\n' % row) without_sid.write('%s\n' % (row[1], )) # just the lemma # Extract synset definitions records = wn.schema.ss.select(orderby='synsetid') synsets_defs = [] for r in records: synsets_defs.append( (SynsetID.from_string(r.synsetid).to_canonical(), r.definition)) synsets_defs.sort(key=itemgetter(0)) with open(output_defs, 'w') as def_file: for row in synsets_defs: def_file.write('%s\t%s\n' % row) # Extract examples records = wn.schema.ex.select(orderby='synsetid') synsets_examples = [] for r in records: synsets_examples.append( (SynsetID.from_string(r.synsetid).to_canonical(), r.sample)) synsets_examples.sort(key=itemgetter(0)) with open(output_exes, 'w') as ex_file: for row in synsets_examples: ex_file.write('%s\t%s\n' % row) # summary print("Data has been extracted to:") print(" + {}".format(output_with_sid_file)) print(" + {}".format(output_without_sid_file)) print(" + {}".format(output_defs)) print(" + {}".format(output_exes)) print("Done!")
def export_gwnsql_synsets(args): print( "Exporting synsets' info (lemmas/defs/examples) from GlossWordNet (SQLite) to text file" ) show_info(args) output_with_sid_file = os.path.abspath('./data/glosstag_lemmas.txt') output_without_sid_file = os.path.abspath( './data/glosstag_lemmas_noss.txt') output_defs = os.path.abspath('./data/glosstag_defs.txt') output_exes = os.path.abspath('./data/glosstag_exes.txt') gwn = get_gwn(args) # Extract synsets' lemmas, definitions and examples if args.mockup: synsets = get_gwnxml(args).synsets else: synsets = gwn.all_synsets() synsets.synsets.sort(key=lambda x: x.sid.to_canonical()) with open(output_defs, 'w') as def_file, open(output_exes, 'w') as ex_file, open( output_with_sid_file, 'w') as with_sid, open(output_without_sid_file, 'w') as without_sid: # synsets = gwn.get_synsets_by_ids(['01828736-v', '00001740-r']) for ss in synsets: for t in sorted(ss.terms, key=lambda x: x.term): with_sid.write('%s\t%s\n' % (ss.sid.to_canonical(), t.term)) without_sid.write('%s\n' % (t.term, )) for gloss in ss.glosses: if gloss.cat == 'def': def_file.write('{sid}\t{d}\n'.format(sid=ss.sid, d=gloss.text())) elif gloss.cat == 'ex': ex_file.write('{sid}\t{ex}\n'.format(sid=ss.sid, ex=gloss.text())) # summary print("Data has been extracted to:") print(" + {}".format(output_with_sid_file)) print(" + {}".format(output_without_sid_file)) print(" + {}".format(output_defs)) print(" + {}".format(output_exes)) print("Extracted synsets: {}".format(len(synsets))) print("Done!")
def glosstag2ntumc(args): print("Extracting Glosstag to NTU-MC") show_info(args) print("To be developed") pass