def main(): """Quick tests.""" a = Attribute('hour', ['0,...,23']) a2 = Attribute('minute', ['0,...,59']) r_ahead = Relation('R1(h1,m1,h2,m2) <=> h1 > h2 or (h1 = h2 and m1 > m2)', ['hour', 'minute', 'hour', 'minute'], 1) r_behind = Relation('R2(h1,m1,h2,m2) <=> h1 < h2 or (h1 = h2 and m1 < m2)', ['hour', 'minute', 'hour', 'minute'], 2) r_pm = Relation('R3(h1) <=> h1 > 12', ['hour'], 3) r_am = Relation('R4(h1) <=> h1 < 12', ['hour'], 4) attribute_structure = AttributeStructure(a, a2, r_ahead, r_behind, r_pm, r_am) ahead_rs = RelationSymbol('Ahead', 4) behind_rs = RelationSymbol('Behind', 4) pm_rs = RelationSymbol('PM', 1) vocabulary = Vocabulary(['C1', 'C2'], [ahead_rs, behind_rs, pm_rs], ['V1', 'V2']) profiles = [[ ahead_rs, ('hour', 1), ('minute', 1), ('hour', 2), ('minute', 2) ], [behind_rs, ('hour', 1), ('minute', 1), ('hour', 2), ('minute', 2)], [pm_rs, ('hour', 1)]] mapping = {ahead_rs: 1, behind_rs: 2, pm_rs: 3} ai = AttributeInterpretation(vocabulary, attribute_structure, mapping, profiles) print ai == ai
def ReadNextRelation(self): """ Read in the next relation, might need to read from disk, might have it in the buffer. """ if (self.filePointer == None): return None line = self.filePointer.readline() #print 'Line:', line if (len(line) > 0): line = line.strip() else: return None if (len(line) < 3): return self.ReadNextRelation() if (line.find('set ') == 0): end = line.find('\t') if (end == -1): end == -2 self.currentArticle = (line[5:end]).strip() return self.ReadNextRelation() elif (line.find('endset;') == 0): return self.ReadNextRelation() elif (len(self.currentArticle) < 3): return self.ReadNextRelation() else: #end = line.find('+') r = Relation(line, self.currentArticle) if (r.success): return r else: return Relation('', '')
def get_matches(images, dist): # Create the parameters for FLANN index_params = {'algorithm': 1, 'trees': 5} search_params = {'checks': 50} # Create the FLANN object flann = cv.FlannBasedMatcher(index_params, search_params) # Loop through the images for image_a, image_b in combinations(images, 2): # Get the descriptions desc_a = image_a.descriptions desc_b = image_b.descriptions # Get the matches matches = flann.knnMatch(desc_a, desc_b, k=2) # The matches indices match_a = [] match_b = [] # Filter the matches for m, n in matches: if m.distance < dist * n.distance: # Add the point match_a.append(m.queryIdx) match_b.append(m.trainIdx) # Update the matches match_a = np.array(match_a).reshape((-1, 1)) match_b = np.array(match_b).reshape((-1, 1)) # Set the matches match_a, match_b = np.append(match_a, match_b, axis=1), np.append(match_b, match_a, axis=1) # Get the points points_a = image_a.points[match_a[:, 0]] points_b = image_b.points[match_b[:, 0]] # Calculate the essential matrix # F, mask = cv.findFundamentalMat(points_a, points_b) # E = np.matmul(image_b.intrinsic.T, np.matmul(F, image_a.intrinsic)) E, mask = cv.findEssentialMat(points_a, points_b, image_a.intrinsic) # E, mask = cv.findEssentialMat(points_a, points_b) # Remove the points that don't match the mask match_a = match_a[(mask == 1)[:, 0]] match_b = match_b[(mask == 1)[:, 0]] # Create the relations rel_a = Relation(E, image_a, image_b, match_a, match_b) rel_b = Relation(E, image_b, image_a, match_b, match_a) # Add the relations image_a.add_relation(rel_a) image_b.add_relation(rel_b)
def extract_date_of_birth(sentence): data = sentence['annotation'] predicate = "DateOfBirth" results = [] string = "" rel = Relation("", "", "") for items in data: word = items[1] type = items[4] new = word + "(" + type + ") " string += new quoteObj = re.search("(\"\(I-PERSON\)) (\w*)\(I-PERSON\) (\"\(I-PERSON\))", string) if quoteObj is not None: quote1 = str(quoteObj.group(1)) quote1 = re.sub('\(I-PERSON\)', '', quote1) name = str(quoteObj.group(2)) quote2 = str(quoteObj.group(3)) quote2 = re.sub('\(I-PERSON\)', '', quote2) enclosedName = quote1 + name + quote2 + "(I-PERSON)" string = re.sub("(\"\(I-PERSON\)) (\w*)\(I-PERSON\) (\"\(I-PERSON\))", enclosedName, string) fullDateObj = re.search( r'(\w*\.*\(B-PERSON\)) (\"*\w*\.*\"*\(I-PERSON\))? ?(\"*\w*\.*\"*\(I-PERSON\))? ?(\"*\w*\.*\"*\(I-PERSON\))?.*born.*?(\w*\(B-DATE\)) ?(.*\(I-DATE\))?', string) if fullDateObj is not None: firstName = str(fullDateObj.group(1)) secondName = str(fullDateObj.group(2)) thirdName = str(fullDateObj.group(3)) fourthName = str(fullDateObj.group(4)) month = str(fullDateObj.group(5)) day_Year = str(fullDateObj.group(6)) firstName = re.sub(r'\(B-PERSON\)', '', firstName) secondName = re.sub(r'\(I-PERSON\)', '', secondName) thirdName = re.sub(r'\(I-PERSON\)', '', thirdName) fourthName = re.sub(r'\(I-PERSON\)', '', fourthName) fullName = firstName + " " + secondName + " " + thirdName + " " + fourthName fullName = fullName.replace(" None", "") month = re.sub(r'\(B-DATE\)| ', '', month) day_Year = re.sub(r'\(I-DATE\)', '', day_Year) day_Year = re.sub(r' , ', ', ', day_Year) day_Year = day_Year[0:20] day_YearObj = re.search(r'^(.*?[0-9]{4}).*', day_Year) if day_YearObj is not None: day_Year = str(day_YearObj.group(1)) monthObj = re.search(r'(^[0-9]{4}).*', month) if monthObj is not None: day_Year = "None" DOB = month + " " + day_Year DOB = DOB.replace(" None", "") rel = Relation(fullName, predicate, DOB) results.append(rel) return results
def getRelations(self, doc: Doc) -> [Relation]: relations = [] matches = self._matcher(doc) for match_id, start, end in matches: span = doc[start:end] hypernym = span.root.text hyponym = span.text.split()[-1] relations.append(Relation(hypernym, hyponym)) for right in span.rights: if right.pos_ == "NOUN": relations.append(Relation(hypernym, right.text)) return relations
def main(): """quick dev tests.""" from interval import Interval from relationSymbol import RelationSymbol from vocabulary import Vocabulary from attribute_interpretation import AttributeInterpretation from formula import Formula from assumption_base import AssumptionBase from attribute import Attribute from relation import Relation from attribute_structure import AttributeStructure from attribute_system import AttributeSystem from constant_assignment import ConstantAssignment from named_state import NamedState from context import Context from variable_assignment import VariableAssignment a = Attribute('hour', [Interval(0, 23)]) a2 = Attribute('minute', [Interval(0, 59)]) r_pm = Relation('R1(h1) <=> h1 > 11', ['hour'], 1) r_am = Relation('R2(h1) <=> h1 <= 11', ['hour'], 2) r_ahead = Relation('R3(h1,m1,h2,m2) <=> h1 > h2 or (h1 = h2 and m1 > m2)', ['hour', 'minute', 'hour', 'minute'], 3) r_behind = Relation('R4(h1,m1,h2,m2) <=> h1 < h2 or (h1 = h2 and m1 < m2)', ['hour', 'minute', 'hour', 'minute'], 4) attribute_structure = AttributeStructure(a, a2, r_ahead, r_behind, r_pm, r_am) pm_rs = RelationSymbol('PM', 1) am_rs = RelationSymbol('AM', 1) ahead_rs = RelationSymbol('Ahead', 4) behind_rs = RelationSymbol('Behind', 4) vocabulary = Vocabulary(['C1', 'C2'], [pm_rs, am_rs, ahead_rs, behind_rs], ['V1', 'V2']) objs = ['s1', 's2', 's3'] asys = AttributeSystem(attribute_structure, objs) const_mapping_2 = {'C1': 's1'} p2 = ConstantAssignment(vocabulary, asys, const_mapping_2) ascriptions_1 = { ("hour", "s1"): [13, 15, 17], ("minute", "s1"): [10], ("hour", "s2"): [1, 3, 5], ("minute", "s2"): [10], ("hour", "s3"): [1, 3, 5], ("minute", "s3"): [10] } named_state_4 = NamedState(asys, p2, ascriptions_1)
def generate_nonexp_relations(self, article): for para in article.paragraphs: for s1, s2 in zip(para.sentences[:-1], para.sentences[1:]): if not article.has_exp_relation(s1.id): # TODO: Add detail implementation rel = Relation() rel.article = article rel.doc_id = article.id rel.arg1s['parsed'] = [s1.tree.root ] if not s1.tree.is_null() else [] rel.arg1_leaves = self.remove_leading_tailing_punc( s1.leaves) rel.arg1_addr = [n.leaf_id for n in rel.arg1_leaves] rel.arg1_sid = rel.arg1_leaves[-1].goto_tree( ).sent_id if len(rel.arg1_leaves) > 0 else -1 rel.arg1_text = ' '.join(n.value for n in rel.arg1_leaves) rel.arg2s['parsed'] = [s2.tree.root ] if not s2.tree.is_null() else [] rel.arg2_leaves = self.remove_leading_tailing_punc( s2.leaves) rel.arg2_addr = [n.leaf_id for n in rel.arg2_leaves] rel.arg2_sid = rel.arg2_leaves[0].goto_tree( ).sent_id if len(rel.arg2_leaves) > 0 else -1 rel.arg2_text = ' '.join(n.value for n in rel.arg2_leaves) article.nonexp_relations.append(rel)
def _process_parsed_conn(self, articles, which='test'): """ generate explicit relation for each true discourse connective """ connParser = Connective() conn_feat_name = FILE_PATH + '/../tmp/conn.feat' conn_feat_file = codecs.open(conn_feat_name, 'w', 'utf-8') checked_conns = [] for art in articles: checked_conns.append( connParser.print_features(art, which, conn_feat_file)) conn_feat_file.close() conn_pred_name = FILE_PATH + '/../tmp/conn.pred' Corpus.test_with_opennlp(conn_feat_name, connParser.model_file, conn_pred_name) conn_res = [ l.strip().split()[-1] for l in codecs.open(conn_pred_name, 'r', 'utf-8') ] assert len(checked_conns) == len(articles), 'article size not match' s = 0 for art, cand_conns in zip(articles, checked_conns): length = len(cand_conns) cand_res = conn_res[s:s + length] s += length for conn, label in zip(cand_conns, cand_res): if label == '1': rel = Relation() rel.doc_id = art.id rel.rel_type = 'Explicit' rel.article = art rel.conn_leaves = conn rel.conn_addr = [n.leaf_id for n in conn] art.exp_relations.append(rel) assert s == len(conn_res), 'conn size not match'
def getdatabaserelation(self): reloid = 1262 if not self.pg_database_attr: self.pg_database_attr = [ PgAttribute95( 1262, "datname", 19, -1, Catalog95.NAMEDATALEN, 1, 0, -1, -1, False, 'p', 'c', True, False, False, True, 0, 0 ), \ PgAttribute95( 1262, "datdba", 26, -1, 4, 2, 0, -1, -1, True, 'p', 'i', True, False, False, True, 0, 0 ), \ PgAttribute95( 1262, "encoding", 23, -1, 4, 3, 0, -1, -1, True, 'p', 'i', True, False, False, True, 0, 0 ), \ PgAttribute95( 1262, "datcollate", 19, -1, Catalog95.NAMEDATALEN, 4, 0, -1, -1, False, 'p', 'c', True, False, False, True, 0, 0 ), \ PgAttribute95( 1262, "datctype", 19, -1, Catalog95.NAMEDATALEN, 5, 0, -1, -1, False, 'p', 'c', True, False, False, True, 0, 0 ), \ PgAttribute95( 1262, "datistemplate", 16, -1, 1, 6, 0, -1, -1, True, 'p', 'c', True, False, False, True, 0, 0 ), \ PgAttribute95( 1262, "datallowconn", 16, -1, 1, 7, 0, -1, -1, True, 'p', 'c', True, False, False, True, 0, 0 ), \ PgAttribute95( 1262, "datconnlimit", 23, -1, 4, 8, 0, -1, -1, True, 'p', 'i', True, False, False, True, 0, 0 ), \ PgAttribute95( 1262, "datlastsysoid", 26, -1, 4, 9, 0, -1, -1, True, 'p', 'i', True, False, False, True, 0, 0 ), \ PgAttribute95( 1262, "datfrozenxid", 28, -1, 4, 10, 0, -1, -1, True, 'p', 'i', True, False, False, True, 0, 0 ), \ PgAttribute95( 1262, "datminmxid", 28, -1, 4, 11, 0, -1, -1, True, 'p', 'i', True, False, False, True, 0, 0 ), \ PgAttribute95( 1262, "dattablespace", 26, -1, 4, 12, 0, -1, -1, True, 'p', 'i', True, False, False, True, 0, 0 ), \ PgAttribute95( 1262, "datacl", 1034, -1, -1, 13, 1, -1, -1, False, 'x', 'i', False, False, False, True, 0, 0 ) ] relfilenode = RelFileNode() relfilenode.space_node = Catalog95.GLOBALTABLESPACE_OID relfilenode.db_node = 0 relfilenode.rel_node = self.getsharerelmap()[reloid] databaserelation = Relation(reloid, 'pg_database', self.pg_database_attr, relfilenode) return databaserelation
def getRelations(self, doc: Doc) -> [Relation]: relations = [] matches = self._matcher(doc) for match_id, start, end in matches: span = doc[start:end] for sent in doc.sents: for token in sent: # Find the relation if token.text == "including" and token.head.i == span.root.i: for token2 in sent: # First hyponym if token2.head.i == token.i: results = set() results.add(span.text.split()[-1]) # Other hyponyms for token3 in sent: startToken = token3 while startToken and startToken.head.i != sent.root.i and startToken.i != token2.i: if startToken.pos_ == "NOUN": results.add(startToken.text) startToken = startToken.head if len(results) > 0: hypernym = span.text.split()[0].replace( ',', '') for result in results: relations.append( Relation(hypernym, result)) return relations
def read_relations(rel_path): rel_dict = defaultdict(list) for x in open(rel_path): rel = Relation() rel.init_with_annotation(json.loads(x)) rel_dict[rel.doc_id].append(rel) return rel_dict
def relation(self,type): relation = Relation() relation.setId1(self.currentPersonId) relation.setId2(self.CurrentEntityId) relation.setType(type) relation.setStrength(self.getRelationStrength(self.person.getReference())) self.dao.insertRelation('t_relation', relation, self.cur, self.conn)
def main(): customer = Relation(Schema("Customer", "id, fname, lname, age, height")) customer.add(id=18392, fname="Frank", lname="Smith", age=45, height="5'8") customer.add(id=48921, fname="Jane", lname="Doe", age=42, height="5'6") print(customer) # output: # # Customer # id fname lname age height # 48921 Jane Doe 42 5'6 # 18392 Frank Smith 45 5'8 print() print(Pi["fname", "id"](customer)) # output: # # Customer__fname_id # fname id # Frank 18392 # Jane 48921 print() print(Sigma[lambda tup: tup.age > 43](customer)) # output: # # Customer # id fname lname age height # 18392 Frank Smith 45 5'8 print() print(Rho["MySchema"](Pi["fname", "id"](customer)))
def create_relation_dictionary(config, document, rel_dict=None): if not rel_dict: rel_dict = {} for r in document.getRelations().getRelations(): rel_type = r.getType() matching_rel_configs = [rc for rc in config['relations'] if rel_type in rc["types"]] if not len(matching_rel_configs): continue annotation_from = r.getAnnotationFrom() annotation_to = r.getAnnotationTo() for rel_conf in matching_rel_configs: if (any(re.match(_from, annotation_from.getType()) for _from in rel_conf['from']) and any(re.match(_to, annotation_to.getType()) for _to in rel_conf['to'])) \ or ("allow_reversed" in rel_conf and rel_conf["allow_reversed"] and any(re.match(_from, annotation_from.getType()) for _from in rel_conf['to']) and any( re.match(_to, annotation_to.getType()) for _to in rel_conf['from'])): relation_position = get_relation_position(r) # print(rel_type, annotation_from.getType(), annotation_to.getType()) # if relation_position in rel_dict: # print(relation_position, "already in dict", rel_type, rel_dict[relation_position].getType()) rel_dict[relation_position] = Relation(rel_type, annotation_from, annotation_to) continue # if any( # any(re.match(_from, annotation_from.getType()) for _from in rel_conf['from']) and # any(re.match(_to, annotation_to.getType()) for _to in rel_conf['to']) # for rel_conf in matching_rel_configs): # relation_position = self.get_relation_position(r) # true_rel_dict[relation_position] = Relation(rel_type, annotation_from, annotation_to) return rel_dict
def _split(rel: Relation, fd: FD) -> tuple: """ Splits a relation on a FD according to BCNF. """ r1 = rel.closure(fd.determinants) r2 = rel.relation - (r1 - fd.determinants) new_r1 = Relation() new_r1.relation = r1 new_r1.fds = rel.get_fds_copy() new_r2 = Relation() new_r2.relation = r2 new_r2.fds = rel.get_fds_copy() return new_r1, new_r2
def getRelations(self, doc: Doc) -> [Relation]: relations = [] matches = self._matcher(doc) for match_id, start, end in matches: span = doc[start:end] candidates = set() for sent in docs.sents: for token in sent: #find relation if token.i == span.root.i: for token2 in sent: #first hyponym if token2.head.i == token.i: for token3 in sent: startToken = token3 while startToken and startToken.head.i != sent.root.i and startToken.i != token2.i: if startToken.pos_ == 'NOUN': candidates.add(startToken) startToken = startToken.head if len(candidates) > 0: hypernym = span.text.split()[0].replace(',', '') for candidate in candidates: relations.append( Relation( hypernym, candidate.text, self.__matcherId ) ) return relations
def getRelations(self, doc: Doc) -> [Relation]: relations = [] matches = self._matcher(doc) for match_id, start, end in matches: span = doc[start:end] firstToken = span.root.head results = [firstToken] while firstToken and firstToken.head.pos_ == "NOUN": results.append(firstToken.head) firstToken = firstToken.head hypernym = span.text.split()[-1] relations.append(Relation(hypernym, span.text.split()[0])) if len(results) > 0: for result in results: relations.append(Relation(hypernym, result.text)) return relations
def main(): """Main method; quick testing.""" a, b, c = Attribute("a", []), Attribute("b", []), Attribute("c", []) r = Relation("R1(a,b) <=> ", ["a", "b"], 1) astr = AttributeStructure() print astr + a + b + r
def add_to_catalog(self, catalog_obj, node_id): '''relation_obj will be returned''' tmp_lib = self.lib.parent_catalogs tmp = tmp_lib[str(catalog_obj._id) + '#' + node_id] if tmp is not None: return Relation(_id=tmp) rr = catalog_obj.recommend_subcatalog(node_id, self) tmp_lib[str(catalog_obj._id) + '#' + node_id] = rr._id return rr
def generate_nonexp_relations(self, article): for s1, s2 in zip(article.sentences[:-1], article.sentences[1:]): if not article.has_exp_inter_relation(s1.id): # TODO: Add detail implementation rel = Relation() rel.article = article rel.doc_id = article.id rel.arg1s['parsed'] = [s1.tree.root] if not s1.tree.is_null() else [] rel.arg1_leaves = self.remove_leading_tailing_punc(s1.leaves) rel.arg1_addr = [n.leaf_id for n in rel.arg1_leaves] rel.arg1_sid = rel.arg1_leaves[-1].goto_tree().sent_id if len(rel.arg1_leaves) > 0 else -1 rel.arg1_text = ' '.join(n.value for n in rel.arg1_leaves) rel.arg2s['parsed'] = [s2.tree.root] if not s2.tree.is_null() else [] rel.arg2_leaves = self.remove_leading_tailing_punc(s2.leaves) rel.arg2_addr = [n.leaf_id for n in rel.arg2_leaves] rel.arg2_sid = rel.arg2_leaves[0].goto_tree().sent_id if len(rel.arg2_leaves) > 0 else -1 rel.arg2_text = ' '.join(n.value for n in rel.arg2_leaves) article.nonexp_relations.append(rel) # sentence intra nonexp relation for sen in article.sentences: tree = sen.tree if len(sen.clauses) <= 1 : continue for c1, c2 in zip(sen.clauses[:-1], sen.clauses[1:]): if not article.has_exp_intra_relation(sen.id): rel = Relation() rel.article = article rel.doc_id = article.id rel.arg1s['parsed'] = tree.find_subtrees(c1) rel.arg1_leaves = self.remove_leading_tailing_punc(c1) rel.arg1_addr = [n.leaf_id for n in rel.arg1_leaves] rel.arg1_sid = sen.id rel.arg1_text = ' '.join(n.value for n in rel.arg1_leaves) rel.arg2s['parsed'] = tree.find_subtrees(c2) rel.arg2_leaves = self.remove_leading_tailing_punc(c2) rel.arg2_addr = [n.leaf_id for n in rel.arg2_leaves] rel.arg2_sid = sen.id rel.arg2_text = ' '.join(n.value for n in rel.arg2_leaves) article.nonexp_relations.append(rel)
def get_all_relations(self): ''' return list of instances of class Relation @rtype: generator @return: generator of instances of class Relation ''' path_to_rels = "SynsetRelations/SynsetRelation" for relation_el in self.synset_el.iterfind(path_to_rels): yield Relation(relation_el)
def recommend_subcatalog(self, node_id, subcatalog_obj): rr = Relation(attrs={"relation_type":"catalog-%s" % \ subcatalog_obj.__class__.__name__}) rr.set_relation_set(self, subcatalog_obj) tmp = self.get_node_list(node_id, 'catalogs') tmp.push(rr._id) self.lib.relations_list.push(rr._id) self.get_node_dict(node_id)['subcatalog_count'] += 1 self.do_update() return rr
def recommend_article(self, node_id, article_obj): if self.get_node_dict(node_id)['title'] is None: return None rr = Relation(attrs={"relation_type":"catalog-%s" % \ article_obj.cls_name}) rr.set_relation_set(self, article_obj) tmp_list = self.get_node_list(node_id, 'articles') tmp_list.push(rr._id) self.lib.relations_list.push(rr._id) self.get_node_dict(node_id)['article_count'] += 1 self.do_update() return rr
def test_get_attributes_closure(self): complete_attrs = set(list('ABCDEFGHI')) dep_pairs = [('A', 'B'), ('A', 'C'), ('CG', 'H'), ('CG', 'I'), ('B', 'H')] deps = dependency_generater(dep_pairs) r = Relation(attrs=complete_attrs, deps=deps) attrs = set(['A', 'G']) expect_closure = set(['A', 'B', 'C', 'G', 'H', 'I']) closure = r.get_attrs_closure(attrs) self.assertEqual(expect_closure, closure)
def extract_has_parent(sentence): #extract the has parent relation from sentence. out = [] predicate = "HasParent" annotation = sentence["annotation"] text = sentence["text"] words = iob_2_ner_span_parent(ioblist=[[x[1], x[3], x[4]] for x in annotation]) parents = [] son = '' find_born = False for i in range(len(words)): if words[i][1] == 'PERSON': if son == '': son = words[i][0] else: if i > 0 and (words[i - 1][1] == 'TO' or words[i - 1][1] == 'RELATION'): if words[i - 1][1] == 'RELATION': parents.append(words[i][0]) else: for j in range(i - 1): if words[j][1] == 'BORN': find_born = True break if find_born: parents.append(words[i][0]) find_born = False elif i > 0 and words[i - 1][1] == 'AND' and len(parents) != 0: parents.append(words[i][0]) elif i > 2 and words[i - 2][1] == 'AND' and words[i - 1][1] == 'Mr': parents.append(words[i][0]) elif i > 2 and words[i - 2][1] == 'TO' and words[i - 1][1] == 'Mr': parents.append(words[i][0]) elif i > 2 and words[i - 2][1] == 'RELATION' and words[ i - 1][1] != 'PERSON': parents.append(words[i][0]) if len(parents) != 0: for p in parents: rel = Relation(son, predicate, p) out.append(rel) return out
def extract_date_of_birth(sentence): #extract the date of birth relation from the sentence results = [] person = [] date = [] predicate = "DateOfBirth" annotation = sentence["annotation"] text = sentence["text"] ner = iob_2_ner_span(ioblist=[[x[1], x[3], x[4]] for x in annotation]) for i in range(len(ner)): if ner[i][1] == 'O' and 'born' in ner[i][0] or 'Born' in ner[i][0]: for n in range(len(ner)): if ner[n][1] == 'PERSON': person.append([ner[n][0], n]) for m in range(len(ner)): if ner[m][1] == 'DATE': date.append([ner[m][0], m]) if len(date) == 1: if date[0][1] > person[0][1]: rel = Relation(person[0][0], predicate, date[0][0]) results.append(rel) elif date[0][1] < person[0][1]: rel = Relation(person[0][0], predicate, date[0][0]) results.append(rel) if len(date) >= 2: if person[0][1] > date[0][1]: rel = Relation(person[0][0], predicate, date[0][0]) results.append(rel) elif person[0][1] < date[0][1]: rel = Relation(person[0][0], predicate, date[0][0]) results.append(rel) return results
def getRelation(table, fromName, toName): """ 读取关系对照表 """ relation = Relation(fromName, toName) for line in sopen(table): if line.startswith('#'): continue flds = line.rstrip().split() chs = flds[0] for cht in flds[1:]: relation.add(chs, cht) return relation
def create_char_relation(df_bibleTA_distilled): """This function will create all character and relationobjects from the distilled csv we will use a csv which was generated in the distillDataFrame function, which is located in the eval_graph.py function.The CSV has following form: ,character_A,character_B,emotion 0, God,Abraham,0.0 1, God,ye,0.0 One thing to remeber, is that the relations are distinct, so we will not have the same relationship in two seperate rows.""" """each time we save a character or a relation we do have to do expensive operations in I/O a .pkl file.""" character_list = [] character_name_list = [] relation_list = [] for _, row in df_bibleTA_distilled.iterrows(): character_A_name, character_B_name, emotion = ( row["character_A"].lstrip(), row["character_B"].lstrip(), row["emotion"], ) """check if we already encountered the character in a previous loop process""" if character_A_name in character_name_list: """get the character from the character_list""" for character in character_list: if character.get_name() == character_A_name: character_A = character character_A_exists = False else: character_A = Character(character_A_name) character_name_list.append(character_A_name) character_list.append(character_A) character_A_exists = True if character_B_name in character_name_list: """get the character from the character_list""" for character in character_list: if character.get_name() == character_B_name: character_B = character character_B_exists = False else: character_B = Character(character_B_name) character_name_list.append(character_B_name) character_list.append(character_B) character_B_exists = True relation = Relation(character_A, character_B, emotion) character_A.add_relation(relation) character_B.add_relation(relation) relation_list.append(relation) picklehandler = PickleHandler() picklehandler.save_override_character_list(character_list) picklehandler.save_override_relation_list(relation_list)
def extract_parent_relations(sentence): parents = r""" BORN: {<VBD>?<VBN><IN|PERSON|CC>*} ADDNINFO: {<-LRB-><.|..|PERSON|DATE|BORN|PARENTS>*<-RRB->} PARENTS: {<IN><.|..|...|DATE|HYPH>*<PERSON><.|..|...|DATE|ADDNINFO|HYPH>*<CC><.|..|...|DATE|BORN|PRP.>*<PERSON>} {<BORN><IN><PERSON>} {<BORN>*<PERSON><CC><PERSON>} {<DT|NN|IN|DATE>+<PERSON><CC>*<PERSON>*} RELATION: {<BORN>*<.|..|...|DATE|ADDNINFO|PRP.>*<PERSON><BORN>*<.|..|...|DATE|ADDNINFO|BORN|PRP.>*<PARENTS>} """ results = [] predicate = "HasParent" annotation = sentence["annotation"] text = sentence["text"] tagged_sentence = [(x[1], x[3], x[4]) for x in annotation] token_list = build_sentence_tree_parent(tagged_sentence) cp = nltk.RegexpParser(parents, loop=3) #print(text) PARENT_RELATION = cp.parse(token_list) # print(PARENT_RELATION) # print("Person List") relation_list = [] for subtree in PARENT_RELATION.subtrees( filter=lambda t: t.label() == 'RELATION'): subject = [] parent_names = [] ts = () for info in subtree: if (type(info) != type(ts) and info.label() == 'PERSON'): subject.extend([x[0] for x in info.leaves()]) for parents_rel in subtree.subtrees( filter=lambda t: t.label() == 'PARENTS'): for node in parents_rel: if (type(node) != type(ts) and node.label() == 'PERSON'): parent_names.append(" ".join([x[0] for x in node.leaves()])) #print(subject) #print(parent_names) for name in parent_names: rel = Relation(" ".join(subject), predicate, name) relation_list.append(rel) return relation_list
def get_relations(self, reltype): ''' return list of instance of class Relations that match the relation type @type reltype: str @param: reltype: relation type (most typical are has_hyperonym and has_hyponym @rtype: list @return: list of instances of class Relation ''' xml_query = '''SynsetRelations/SynsetRelation[@relType="%s"]''' % reltype return [ Relation(relation_el) for relation_el in self.synset_el.iterfind(xml_query) ]