def _dep(xml, source_element='basic-dependencies'): soup = _soup(xml) token_by_ids = _token_by_ids(soup) ann_by_ids = defaultdict(dict) for s_id, t_id, ann in _pos(xml): ann_by_ids[s_id][t_id] = ann yield ann curr_rel_id = 1 for sent_e in _find_sentences_element(soup).iter('sentence'): sent_id = int(sent_e.get('id')) deps_e = sent_e.findall(source_element) assert len(deps_e) == 1 deps_e = deps_e[0] for dep_e in deps_e: if dep_e.tag != 'dep': # To be on the safe side continue dep_type = dep_e.get('type') assert dep_type is not None gov_tok_id = int(dep_e.find('governor').get('idx')) dep_tok_id = int(dep_e.find('dependent').get('idx')) yield BinaryRelationAnnotation('R%s' % curr_rel_id, dep_type, 'Governor', ann_by_ids[sent_id][gov_tok_id].id, 'Dependent', ann_by_ids[sent_id][dep_tok_id].id, '') curr_rel_id += 1
def _create_relation(ann_obj, projectconf, mods, origin, target, type, attributes, old_type, old_target, undo_resp={}): attributes = _parse_attributes(attributes) if old_type is not None or old_target is not None: assert type in projectconf.get_relation_types(), ( ('attempting to convert relation to non-relation "%s" ' % (target.type, )) + ('(legit types: %s)' % (unicode(projectconf.get_relation_types()), ))) sought_target = (old_target if old_target is not None else target.id) sought_type = (old_type if old_type is not None else type) # We are to change the type, target, and/or attributes found = None for ann in ann_obj.get_relations(): if ann.arg2 == sought_target and ann.type == sought_type: found = ann break if found is None: # TODO: better response Messager.error('_create_relation: failed to identify target relation (type %s, target %s) (deleted?)' % (str(old_type), str(old_target))) elif found.arg2 == target.id and found.type != type: # no changes to type or target pass else: # type and/or target changed, mark. before = unicode(found) found.arg2 = target.id found.type = type mods.change(before, found) target_ann = found else: # Create a new annotation new_id = ann_obj.get_new_id('R') rel = projectconf.get_relation_by_type(type) assert rel is not None and len(rel.arg_list) == 2 a1l, a2l = rel.arg_list ann = BinaryRelationAnnotation(new_id, type, a1l, origin.id, a2l, target.id, '\t') mods.addition(ann) ann_obj.add_annotation(ann) target_ann = ann # process attributes if target_ann is not None: _set_attributes(ann_obj, ann, attributes, mods, undo_resp) elif attributes != None: Messager.error('_create_relation: cannot set arguments: failed to identify target relation (type %s, target %s) (deleted?)' % (str(old_type), str(old_target))) return target_ann
def _dep(xml, source_element='basic-dependencies'): soup = _soup(xml) token_by_ids = _token_by_ids(soup) ann_by_ids = defaultdict(dict) for s_id, t_id, ann in _pos(xml): ann_by_ids[s_id][t_id] = ann yield ann curr_rel_id = 1 for sent_e in _find_sentences_element(soup).getiterator('sentence'): sent_id = int(sent_e.get('id')) # Attempt to find dependencies as distinctly named elements as they # were stored in the Stanford XML format prior to 2013. deps_e = sent_e.findall(source_element) if len(deps_e) == 0: # Perhaps we are processing output following the newer standard, # check for the same identifier but as a type attribute for # general "dependencies" elements. deps_e = list(e for e in sent_e.getiterator('dependencies') if e.attrib['type'] == source_element) assert len(deps_e) == 1 deps_e = deps_e[0] for dep_e in deps_e: if dep_e.tag != 'dep': # To be on the safe side continue dep_type = dep_e.get('type') assert dep_type is not None if dep_type == 'root': # Skip dependencies to the root node, this behaviour conforms # with how we treated the pre-2013 format. continue gov_tok_id = int(dep_e.find('governor').get('idx')) dep_tok_id = int(dep_e.find('dependent').get('idx')) yield BinaryRelationAnnotation( 'R%s' % curr_rel_id, dep_type, 'Governor', ann_by_ids[sent_id][gov_tok_id].id, 'Dependent', ann_by_ids[sent_id][dep_tok_id].id, '' ) curr_rel_id += 1
mods = ModificationTracker() cidmap = {} for cid, ann in ((i, a) for i, a in json_resp.iteritems() if _is_relation(a)): assert 'rel_type' in ann, 'Tagger response lacks rel_type' rel_type = ann['rel_type'] assert 'arg1' in ann, 'Tagger response lacks arg1' arg1 = ann['arg1'] assert 'arg2' in ann, 'Tagger response lacks arg2' arg2 = ann['arg2'] _id = ann_obj.get_new_id('R') cidmap[cid] = _id tb = BinaryRelationAnnotation(_id, rel_type, 'Arg1', arg1, 'Arg2', arg2, "") mods.addition(tb) ann_obj.add_annotation(tb) mod_resp = mods.json_response() mod_resp['annotations'] = _json_from_ann(ann_obj) return mod_resp if __name__ == '__main__': # Silly test, but helps tag('/BioNLP-ST_2011_ID_devel', 'PMC1874608-01-INTRODUCTION', 'random')