Esempio n. 1
0
def test_member_comparisons():
    from compare_versions import compare_members

    from_vn = VerbNetParser(version="3.2")
    to_vn = VerbNetParser(version="3.3")

    from_vn.parse_files()
    to_vn.parse_files()

    from_vn_members = from_vn.get_all_members()
    to_vn_members = to_vn.get_all_members()

    x = compare_members(from_vn_members, to_vn_members)

    for k, v in x.items():
        if k:
            for change in v:
                if change.element_name in [
                        m.name[0] for m in to_vn.get_verb_class(k).members
                ]:
                    continue  #print("Success: %s is now in %s" % (change.element_name, k))
                else:
                    print("Failed, %s is in %s: " %
                          (change.element_name, ', '.join(
                              search.find_members(to_vn_members,
                                                  name=change.element_name))))
                    print({k: change.__dict__})
        else:  # Deletions
            for change in v:
                potential_members = search.find_members(
                    to_vn_members, name=change.element_name)
                if potential_members:
                    print("%s marked as Deleted could be in %s" %
                          (change.element_name, ', '.join(potential_members)))
Esempio n. 2
0
def find_in_old_versions(ann, old_vns):
    for old_vn in old_vns:
        all_old_members = old_vn.get_members()
        if ann.exists_in(old_vn):
            return search.find_members(all_old_members,
                                       class_ID=ann.vn_class,
                                       name=ann.verb)
        else:
            return search.find_members(all_old_members, name=ann.verb)
Esempio n. 3
0
def update_annotation_line(ann_line, new_vn, old_vns, log):
    ann = Annotation(ann_line)

    # If the verb in this annotation is not mapped directly to desired "new" version of VN
    if not ann.exists_in(new_vn):
        vn_members = find_in_old_versions(ann, old_vns)

        all_new_members = new_vn.get_all_members()
        updated_vn_members = []
        for vn_member in vn_members:
            # search these members for the lookup member by name and wordnet mapping
            updated_vn_members += search.find_members(all_new_members,
                                                      name=vn_member.name,
                                                      wn=vn_member.wn)

        # Ambiguities in previous versions may all point to the same verb in new version
        # so we need to remove the duplicate members from this list
        unique_members = []
        for m in updated_vn_members:
            if (m.name, m.class_id()) not in [u[1] for u in unique_members]:
                unique_members.append([m, (m.name, m.class_id())])

        updated_vn_members = [u[0] for u in unique_members]

        if len(updated_vn_members) == 1:
            ann.update_vn_info(updated_vn_members[0])
            log.write(
                "SUCCESS: Found %s in %s in VerbNet version %s" %
                (ann.verb, ann.class_ID, updated_vn_members[0].version()))
            #print("SUCCESS: Found %s from %s in VerbNet version %s in %s" % (ann.verb, ann.class_ID, updated_vn_members[0].version(), updated_vn_members[0].class_id()))
        elif len(updated_vn_members) > 1:  # Otherwise there is ambiguity
            log.write(
                "ERROR: %s no longer belongs to %s and could belong to %s" %
                (ann.verb, ann.class_ID, ' OR '.join(
                    [u.class_id() for u in updated_vn_members])))
            #print("ERROR: %s in %s could now belong to %s" % (ann.verb, ann.class_ID, ' OR '.join([u.class_id() for u in updated_vn_members])))
            return None
        else:  # Otherwise this verb no longer exists in VN
            log.write("ERROR: %s no longer exists in VerbNet" % ann.verb)
            #print("ERROR: No member named %s (which was previously in class %s) exists in VerbNet" % (ann.verb, ann.class_ID))
            return None
    else:
        log.write("SUCCESS: %s is still a reference to %s in %s" %
                  (ann.verb, ann.verb, ann.class_ID))

    return str(ann)
Esempio n. 4
0
def move_member(member_name, current_classname, new_classname):
    '''
    :return: (current_class_xml, new_class_xml)
  '''
    current_member_search = search.find_members(name=member_name,
                                                class_ID=current_classname)
    if current_member_search:
        # Should only ever be one unique member name per class
        current_member = current_member_search[0]
    else:
        raise Exception("%s is not in the class %s in VerbNet version %s" %
                        (member_name, current_classname, vn.version))

    new_class = vn.get_verb_class(new_classname)
    current_class = vn.get_verb_class(current_classname)

    if new_class:
        current_class.remove_member(current_member)
        new_class.add_member(current_member)
        return (current_class.pp(), new_class.pp())
    else:
        raise Exception("%s is not a class in VerbNet version %s" %
                        (new_classname, vn.version))
Esempio n. 5
0
def generate_groupings():
    # CHANGE DIRECTORY to desired verbnet version;
    vn_directory = "../../vn_versions/verbnet3.3/"

    vnp = VerbNetParser(directory=vn_directory)

    member_names = set([m.name for m in vnp.get_members()])

    for member_name in member_names:
        root = etree.Element("root")

        inv = etree.Element("inventory", lemma=member_name+"-v")
        members = search.find_members(members = vnp.get_members(), name=member_name)

        for i in range(len(members)):
            member = members[i]
            sense = etree.Element("sense", group="1", n=str(i), name=member.vnc + ".xml", type="")
            examples = etree.Element("examples")
            examples.text = "\n".join([f.examples[0] for f in vnp.verb_classes_dict[member.vnc].frames])
            sense.append(examples)

            mappings = etree.Element("mappings")
            wn_map = etree.Element("wn", version="", lemma=" ".join(member.wn))
            mappings.append(wn_map)

            mappings.append(etree.Element("pb"))
            mappings.append(etree.Element("vn"))
            mappings.append(etree.Element("fn"))

            sense.append(mappings)

            inv.append(sense)
        root.append(inv)
        doctype_string = '<!DOCTYPE inventory SYSTEM "inventory.dtd">'

        with open("groupings/" + member_name + "-v.xml", "wb") as output:
            output.write(etree.tostring(root, xml_declaration=True, encoding="UTF-8", doctype=doctype_string, pretty_print=True))
Esempio n. 6
0
def compare_members(from_vn_members, to_vn_members):
  '''
    returns a dict of {to_vn_class: [Change objects]}

    update: member still in class, but attributes changed
    delete: member removed from class, and not moved to a new VN class
    insert: member inserted to the class, and is newly added to VN
    move: member moved from the class to a new vn class
  '''
  all_changes = {}

  for from_vn_member in from_vn_members:
    # Find that member in to_vn_members, and if there are differences, record them
    possible_to_vn_members = search.find_members(to_vn_members, name=from_vn_member.name, wn=from_vn_member.wn)
    changes = []
    to_vn_member = None
    attr_diffs = None

    # First case is that this member is in the same class in to_vn
    possible_match = [m for m in possible_to_vn_members if m.class_id() == from_vn_member.class_id()]

    if possible_match:  # If member is in the same class
      to_vn_member = possible_match[0]
      attr_diffs = from_vn_member.compare_attrs(to_vn_member)
    # If there are many possible members, none of which are in the same class
    elif len(possible_to_vn_members) > 1:
      # Then try to see if any of those ALSO exist in from_vn
      # This is to identify an instance of this member in a NEW class
      # to say that this is where it moved to
      for possible_to_vn_member in possible_to_vn_members:
        if len(search.find_members(from_vn_members, class_ID=possible_to_vn_member.class_id(),
                                   name=from_vn_member.name)) == 0:
          to_vn_member = possible_to_vn_member
          changes.append(Change(from_vn_member.name, "member", "move", from_vn_member.class_id()))
          # Compare the attributes
          attr_diffs = from_vn_member.compare_attrs(to_vn_member)
    elif len(possible_to_vn_members) == 1:
      to_vn_member = possible_to_vn_members[0]
      changes.append(Change(from_vn_member.name, "member", "move", from_vn_member.class_id()))
      # Compare the attributes
      attr_diffs = from_vn_member.compare_attrs(to_vn_member)
    else:
      changes.append(Change(from_vn_member.name, "member", "delete", from_vn_member.class_id()))

    if attr_diffs:  # If member has updates to its attributes
      changes.append(Change(from_vn_member.name, "member", "update", from_vn_member.class_id(),
                            ', '.join(["%s: %s" % (attr, diff) for attr, diff in attr_diffs.items()])))

    if changes:
      all_changes[to_vn_member.class_id() if to_vn_member else to_vn_member] = changes

  '''
    Lastly, we have to get all to_vn_members that did not exist in
    from_vn_members to record all of the rest of the insert operations
    Get string name in order to hash it for the set,
    and then change it back to a list in order to work with search.find_members
  '''
  for name, class_ID in list(set([(m.name, m.class_id()) for m in to_vn_members]) - set(
      [(m.name, m.class_id()) for m in from_vn_members])):
    inserted_member = search.find_members(to_vn_members, class_ID=class_ID, name=[name])
    if inserted_member:
      all_changes.setdefault(inserted_member[0].class_id(), []).append(
        Change(inserted_member[0].name, "member", "insert", notes=inserted_member[0].pp()))

  return all_changes
Esempio n. 7
0
def update_annotation_line(ann_line, new_vn, old_vns, log=None):
    """
  Logs changes to log if one is given

  Returns a tuple of (bool marking if the update was successful, updated_annotation)
  """
    # Semlink annotations have mappings, and thus more attributes in a line
    if len(ann_line.strip().split()) > 5:
        ann = annotation.SemLinkAnnotation(ann_line)
    else:
        ann = annotation.VnAnnotation(ann_line)

    # Flag to tell us whether or not the annotation was updated
    success = False

    stats[4] += 1
    # If the verb in this annotation is not mapped directly to desired "new" version of VN
    print("Searching for %s from %s...." % (ann.verb, ann.vn_class))
    if not ann.exists_in(new_vn):
        possible_old_vn_members = find_in_old_versions(ann, old_vns)

        all_new_members = new_vn.get_members()
        updated_vn_members = []
        for vn_member in possible_old_vn_members:
            # search these members for the lookup member by name and wordnet mapping
            updated_vn_members += search.find_members(all_new_members,
                                                      name=vn_member.name,
                                                      wn=vn_member.wn)
        """
    Ambiguities in previous versions may all point to the same verb in new version
    I.e one verb may appear in multiple classes in version 3.2, and so this script
    will look for n new verbs in 3.3, where n is the number of times this member
    appears in 3.2. So if all n of those points to the same member of the same class
    in 3.3, then we need to remove those duplicate members from this list
    """
        unique_members = []
        for updated_member in updated_vn_members:
            if (updated_member.name, updated_member.class_id()) not in [
                    u[1] for u in unique_members
            ]:
                unique_members.append([
                    updated_member,
                    (updated_member.name, updated_member.class_id())
                ])

        updated_vn_members = [u[0] for u in unique_members]

        if len(updated_vn_members
               ) == 1:  # The verb maps to new version in a new class
            if log:
                log.write(
                    "SUCCESS: Found %s from %s in %s in VerbNet version %s" %
                    (ann.verb, ann.vn_class, updated_vn_members[0].class_id(),
                     updated_vn_members[0].version))
            stats[1] += 1
            success = True
            ann.update_vn_info(updated_vn_members[0])
        elif len(updated_vn_members) > 1:  # Otherwise there is ambiguity
            if log:
                log.write(
                    "ERROR: %s no longer belongs to %s and could belong to %s in VerbNet version %s"
                    % (ann.verb, ann.vn_class, ' OR '.join([
                        u.class_id() for u in updated_vn_members
                    ]), updated_vn_members[0].version))
            stats[2] += 1
            success = False
        else:  # Otherwise this verb no longer exists in VN
            if log:
                log.write(
                    "ERROR: %s from %s in an old version of VerbNet does not have an exact match in version %s"
                    % (ann.verb, ann.vn_class, new_vn.version))
            stats[3] += 1
            success = False
    else:
        if log:
            log.write(
                "SUCCESS: %s is still a reference to %s in %s in VerbNet version %s"
                % (ann.verb, ann.verb, ann.vn_class, new_vn.version))
        stats[0] += 1
        success = True

    return (success, str(ann))