Ejemplo n.º 1
0
added_synsets = set()
# loop through Synsets
for synset_obj in old.synsets_get_generator():
    ili = synset_obj.get_ili()
    synset_id = synset_obj.get_id()
    if all([ili is not None,
            synset_id]):
        added_synsets.add(synset_id)

    if synset_id in odwn_ids:
        added_synsets.add(synset_id)


# add LexicalEntries
added_sense_ids = set()
for counter, le_obj in enumerate(old.les_get_generator()): # mw not taken into account

    synset_id = le_obj.get_synset_id()
    sense_id = le_obj.get_sense_id()

    if all([synset_id,
            sense_id not in added_sense_ids]):
        if synset_id in added_synsets:

            # TODO: add sense examples
            lexical_entry_el = etree.SubElement(lexicon_el, 'LexicalEntry',
                                                attrib={'id': 'w%s' % counter})

            etree.SubElement(lexical_entry_el, 'Lemma',
                             attrib={'writtenForm': le_obj.get_lemma(),
                                     'partOfSpeech': le_obj.get_pos()[0]})
Ejemplo n.º 2
0
                               'user_input',
                               'Anneleen',
                               'synsets_5_10.bin')

with open(annotation_path,'rb') as infile:
    annotation = pickle.load(infile)


#STEP X: remove le_objs
to_remove = set()
[to_remove.update(value['le_ids_to_remove']) 
 for value in annotation.values()
 if 'le_ids_to_remove' in value]

num_to_remove = len(to_remove)
logger.info('%s le ids found to remove' % num_to_remove)

for le_obj in my_parser.les_get_generator():
    le_id = le_obj.get_id()
    if le_id in to_remove:
        le_obj.remove_me()
    

#STEP X: RUN STATS
my_parser.get_stats(verbose=True)

#STEP X: export it to version 1.2
my_parser.export(output_path)

logger.info('finished conversion')
added_synsets = set()
# loop through Synsets
for synset_obj in old.synsets_get_generator():
    ili = synset_obj.get_ili()
    synset_id = synset_obj.get_id()
    if all([ili is not None,
            synset_id]):
        added_synsets.add(synset_id)

    if synset_id in odwn_ids:
        added_synsets.add(synset_id)


# add LexicalEntries
added_sense_ids = set()
for counter, le_obj in enumerate(old.les_get_generator()): # mw not taken into account

    synset_id = le_obj.get_synset_id()
    sense_id = le_obj.get_sense_id()

    if all([synset_id,
            sense_id not in added_sense_ids]):
        if synset_id in added_synsets:

            # TODO: add sense examples
            lexical_entry_el = etree.SubElement(lexicon_el, 'LexicalEntry',
                                                attrib={'id': 'w%s' % counter})

            etree.SubElement(lexical_entry_el, 'Lemma',
                             attrib={'writtenForm': le_obj.get_lemma(),
                                     'partOfSpeech': le_obj.get_pos()[0]})