Beispiel #1
0
    def _load_from_xml(self, filename: str):
        self._clean()

        parser = et.XMLParser(encoding="utf-8")
        root = et.parse(filename, parser).getroot()

        for child in root:
            synset = None

            for element in child:
                if element.tag == 'ID':
                    synset = Synset(element.text)

                if element.tag == 'POS':
                    dic_chr2pos = {
                        'n': Synset.Pos.NOUN,
                        'v': Synset.Pos.VERB,
                        'r': Synset.Pos.ADVERB,
                        'a': Synset.Pos.ADJECTIVE
                    }
                    pos = dic_chr2pos[element.text]
                    synset.pos = pos

                if element.tag == 'SYNONYM':
                    try:
                        synset.literals = [literal.text for literal in element]
                    except TypeError as e:
                        print(synset.id)

                    literals_senses = []
                    for literal in element:
                        literals_senses.append(literal[0].text if literal[0].
                                               text is not None else "")
                    synset.literals_senses = literals_senses

                    for literal in synset.literals:
                        literal_parts = literal.split('_')
                        if len(literal_parts) > 1:
                            for literal_part in literal_parts:
                                if literal_part not in synset.literals:
                                    synset.add_literal(literal_part)

                    for literal in synset.literals:
                        self._literal2synset[literal].append(synset.id)

                if element.tag == 'STAMP':
                    synset.stamp = element.text

                if element.tag == 'ILR':
                    self._relation_types.add(element[0].text)

                    self._graph.add_edge(synset.id,
                                         element.text,
                                         label=element[0].text)

                if element.tag == 'DEF':
                    synset.definition = element.text

                if element.tag == 'DOMAIN':
                    synset.domain = element.text

                if element.tag == 'SUMO':
                    synset.sumo = element.text
                    dic_chr2sumotype = {
                        '+': Synset.SumoType.HYPERNYM,
                        '=': Synset.SumoType.EQUIVALENT,
                        '@': Synset.SumoType.INSTANCE,
                        '[': Synset.SumoType.BRACKET,
                        ':': Synset.SumoType.POINTS
                    }
                    sumotype = dic_chr2sumotype[element[0].text]
                    synset.sumotype = sumotype

                if element.tag == 'SENTIWN':
                    synset.sentiwn = [
                        float(subelement.text) for subelement in element
                    ]

            self._synsets[synset.id] = synset
Beispiel #2
0
def demo_create_and_edit_synsets():
    print("\n\nThis demo shows how to create and edit synsets & relations.\n" +
          "_" * 70)

    # create a synset( it's recommended to use the function 'generate_synset_id'
    # from the rowordnet class. See the function "demo_basic_rowordnet_operations'
    # for more details
    id = "my_id"
    synset = Synset(id)
    print("\n\tSynset with id '{}' has been created.".format(id))

    # printing the synset
    print("\n\tPrint this synset:")
    print(synset)

    # set a pos of type verb
    pos = Synset.Pos.VERB
    synset.pos = pos
    print("\tSynset's pos has been changed to '{}'".format(synset.pos))

    # add a literal
    literal = "tigru"
    sense = "1"
    synset.add_literal(literal=literal, sense=sense)
    print(
        "\n\tA new literal '{}' with sense '{}' has been added to the synset with id '{}'"
        .format(literal, sense, synset.id))
    print("\tNumber of literals for synset with id '{}': {}".format(
        synset.id, len(synset.literals)))

    # remove a literal
    literal = "tigru"
    synset.remove_literal(literal=literal)
    print("\n\tThe literal '{}' has been removed from the synset with id '{}'".
          format(literal, synset.id))
    print("\tNumber of literals for synset with id '{}': {}".format(
        synset.id, len(synset.literals)))

    # add more literals at once
    print("\n\tAdding literals to a synset. Initially we create them:")
    literals = ['lup', 'vuple', 'caine']
    print("\tDirect addition of {} literals to synset with id '{}'".format(
        len(literals), synset.id))
    synset.literals = literals
    print("\tNumber of literals for synset with id '{}': {}".format(
        synset.id, len(synset.literals)))

    # add more senses at once
    print(
        "\n\tAdding senses to a synset's literals. Initially we create them:")
    literals_senses = ['1', '2', 'x']
    print("\tDirect addition of {} senses to synset with id '{}'".format(
        len(literals_senses), synset.id))
    synset.literals_senses = literals_senses
    print("\tNumber of senses for synset '{}': {}".format(
        synset.id, len(synset.literals_senses)))

    # set a definition
    definition = "Animal carnivor"
    synset.definition = definition
    print("\tSynset's defition has been changed to '{}'".format(
        synset.definition))

    # set a sumo
    sumo = "Animal"
    synset.sumo = sumo
    print("\tSynset's sumo has been changed to '{}'".format(synset.sumo))

    # set a sumotype
    sumotype = Synset.SumoType.INSTANCE
    synset.sumotype = sumotype
    print("\tSynset's sumotype has been changed to '{}'".format(
        synset.sumotype))

    # generate a new id with default prefix and suffix
    wn = rowordnet.RoWordNet()
    id = wn.generate_synset_id()
    print(
        "\n\tNew id '{}' generated with default prefix 'ENG30-' and suffix '-n'"
        .format(id))
    # generate a new id with custom prefix and suffix
    prefix = 'ENG31-'
    suffix = '-v'
    new_id = wn.generate_synset_id(prefix=prefix, suffix=suffix)
    print("\tNew id '{}' generated with prefix '{}' and suffix '{}'".format(
        new_id, prefix, suffix))

    # create a synset with previous id
    synset = Synset(id)
    print("\n\tSynset with id '{}' has been created".format(synset.id))
    # add the synset to the rowordnet
    wn.add_synset(synset)
    print("\n\tAdded synset with id '{}' to the rowordnet".format(synset.id))

    # add a literal to synset
    literal = 'iepure'
    sense = '1'
    # get a synset
    synset_id = wn.synsets()[0]
    synset = wn(synset_id)
    # add a literal to the synset
    synset.add_literal(literal, sense)
    # tell the rowordnet that synsets's literals have been changed. This step is
    # necessary for a correct internal representation.
    wn.reindex_literals()
    print(
        "\n\tAdded literal with literal '{}' and sense '{}' to the synset '{}'. "
        "Number of synsets containing literal '{}': {}".format(
            literal, sense, synset.id, literal, len(wn.synsets(literal))))

    # remove the previous literal from synset.
    synset.remove_literal(literal)
    # again, we have to tell the rowordnet that synset's literals have been
    # changed.
    wn.reindex_literals()
    print(
        "\tRemoved literal with literal '{}' from the synset '{}'. Number of synsets containing literal '{}': {}"
        .format(literal, synset.id, literal, len(wn.synsets(literal))))

    # generate a new synset
    prefix = 'ENG31-'
    suffix = '-n'
    new_id = wn.generate_synset_id(prefix, suffix)
    new_synset = Synset(new_id)
    wn.add_synset(new_synset)
    print("\n\tAdded new synset with id '{}' to the rowordnet".format(
        new_synset.id))

    # add a relation of type 'hypernym' from 'synset' to 'new_synset'
    relation = 'hypernym'
    wn.add_relation(synset.id, new_synset.id, relation)
    print(
        "\n\tAdded '{}' relation from synset with id '{}' to synset with id '{}'"
        .format(relation, synset.id, new_synset.id))

    # remove relation of type 'hypernym' from 'synset' to 'new_synset'
    wn.remove_relation(synset.id, new_synset.id)
    print("\tRemoved relation from synset with id '{}' to synset with id '{}'".
          format(synset.id, new_synset.id))