def get_ends_phrases(self, story, found_mv_phrase, assume=True): if assume: for np in story.ends.text.noun_chunks: if story.ends.main_object.main in np: story.ends.main_object.phrase = np if story.ends.main_object.phrase: m = story.ends.main_object.main if m.i > 0 and NLPUtility.is_compound(m.nbor(-1)) and m.nbor(-1).head == m: story.ends.main_object.compound = [m.nbor(-1), m] else: for token in story.ends.main_object.phrase: if NLPUtility.is_compound(token) and token.head == story.ends.main_object.main: story.ends.main_object.compound = [token, story.ends.main_object.main] ends_subj = story.ends.subject.main if str.lower(story.ends.subject.main.text) != '' and str.lower(story.ends.subject.main.text) != 'i': for np in story.ends.text.noun_chunks: if story.ends.subject.main in np: story.ends.subject.phrase = np if story.ends.subject.phrase: for token in story.ends.subject.phrase: if NLPUtility.is_compound(token) and token.head == story.ends.subject.main: story.ends.subject.compound = [token, story.ends.subject.main] if not found_mv_phrase: pv = MinerUtility.get_phrasal_verb(story, story.ends.main_verb.main, 'ends.text') story.ends.main_verb.phrase = MinerUtility.get_span(story, pv[0], 'ends.text') story.ends.main_verb.type = pv[1] return story
def make(stories, weights): weighted_tokens = [] indices = [weight[0] for weight in weights] w = 0.0 c = "" for story in stories: if story.has_ends: parts = ['role', 'means', 'ends'] else: parts = ['role', 'means'] for part in parts: for token in eval('story.' + str(part) + '.text'): c = NLPUtility.case(token) if c in indices: for weight in weights: if weight[0] == c: w = weight[1] break else: w = 0.0 weighted_tokens.append(WeightedToken(token, w)) return weighted_tokens
def count_occurence(self, cm, sl, stories): for story in stories: for token in story.data: c = NLPUtility.case(token) if c in cm.index.values: for s in sl: if s[0] == c: s[1].append(story.number) if self.is_phrasal('role.functional_role', token, story) == 1: cm = self.add(cm, c, 'Functional Role') elif self.is_phrasal('role.functional_role', token, story) == 2: cm = self.add(cm, c, 'Functional Role Compound') if self.is_phrasal('means.main_object', token, story) == 1: cm = self.add(cm, c, 'Main Object') elif self.is_phrasal('means.main_object', token, story) == 2: cm = self.add(cm, c, 'Main Object Compound') if self.is_freeform('means', token, story) == 1: cm = self.add(cm, c, 'Means Free Form Noun') if story.ends.free_form: if self.is_phrasal('ends.main_object', token, story) > 0 or self.is_freeform('ends', token, story) == 1: cm = self.add(cm, c, 'Ends Free Form Noun') return cm, sl
def get_role_means_ends(self, matrix, stories): cases = matrix.index.values for case in cases: for story in stories: if story.role.indicator: if case in [NLPUtility.case(token) for token in story.role.text]: matrix.set_value(case, (story.txtnr(), 'Role'), 1) if story.means.indicator: if case in [NLPUtility.case(token) for token in story.means.text]: matrix.set_value(case, (story.txtnr(), 'Means'), 1) if story.ends.indicator: if case in [NLPUtility.case(token) for token in story.ends.text]: matrix.set_value(case, (story.txtnr(), 'Ends'), 1) return matrix
def get_namedict(self, tokens): namedict = {} for token in tokens: namedict[token.lemma] = NLPUtility.case(token) return namedict
def get_verbs(story, span): verbs = [] for token in span: if NLPUtility.is_verb(token): verbs.append(token) return MinerUtility.get_span(story, verbs)
def get_nouns(story, span): nouns = [] for token in span: if NLPUtility.is_noun(token): nouns.append(token) return nouns
def get_lowest_threshold(self, relationship): wt = self.get_weighted_tokens(relationship) lt = 1000.0 if wt: lt = wt[0].weight for w in wt: if str.lower(NLPUtility.get_case(w)) != self.sysname and w.weight < lt: # Exclude system name object from filter lt = w.weight return lt
def remove_verbs(self, matrix, stories): verbs = [] cases = matrix.index.values.tolist() for case in cases: pos = [] for story in stories: for token in story.data: if NLPUtility.case(token) == case: pos.append(token) if len(set(pos)) == 1 and NLPUtility.is_verb(pos[0]): verbs.append(case) for verb in verbs: if matrix.loc[verb, 'sum'] > 0: verbs.remove(verb) return matrix[(-matrix.index.isin(verbs))]
def make_patterns(self, user_stories, threshold): pi = PatternIdentifier(self.weighted_tokens) self.sysname = str.lower(NLPUtility.case(user_stories[0].system.main)) for story in user_stories: pi.identify(story) relationships = self.apply_threshold(pi.relationships, threshold) self.create(relationships, user_stories, threshold, pi.roles) return self.onto
def create(self, relationships, stories, threshold, roles): used = [] for r in relationships: pre = NLPUtility.get_case(r[1]) post = NLPUtility.get_case(r[3]) if r[2] == Pattern.parent: self.onto.get_class_by_name(r[0], pre, post) self.prolog.new_relationship(r[0], pre, 'isa', post) if r[2] != Pattern.parent: rel = NLPUtility.get_case(r[4]) if r[2] == Pattern.subj_dobj or r[2] == Pattern.compound_has: self.onto.get_class_by_name(r[0], pre) self.onto.get_class_by_name(r[0], post) self.prolog.new_relationship(r[0], pre, rel, post) if r[2] == Pattern.subj_dobj: self.make_can_relationship(r[0], pre, rel, post) else: self.make_has_relationship(r[0], pre, rel, post) self.prolog.get_class_by_name(r[0], pre) self.prolog.get_class_by_name(r[0], post) used.append(pre) used.append(post) for wo in self.weighted_tokens: if wo.weight >= threshold: in_stories = self.find_story(wo, stories) for in_story in in_stories: self.onto.get_class_by_name(in_story, wo.case) for r in roles: self.onto.get_class_by_name(r[0], NLPUtility.get_case(r[1]), '', True)
def remove_indicators(self, matrix, stories, nlp): indicators = [] for story in stories: ind = story.role.indicator + " " + story.means.indicator if story.has_ends: ind += " " + story.ends.indicator [indicators.append(NLPUtility.case(t)) for t in nlp(ind)] [indicators.append(i) for i in story.indicators] for indicator in indicators: if matrix.loc[indicator, 'sum'] > 0: indicators.remove(indicator) return matrix[(-matrix.index.isin(indicators))]
def get_parts(self, class_name, story): case = class_name.split() means_compounds = [] means_compounds.append(story.means.main_object.compound) ends_compounds = story.ends.compounds if story.means.free_form: if len(story.means.compounds) > 0: if type(story.means.compounds[0]) is list: mc = [item for item in sublist for sublist in story.means.compounds] else: mc = story.means.compounds means_compounds.extend(mc) if len(ends_compounds) > 0: if type(ends_compounds[0]) is list: ends_compounds = [item for item in sublist for sublist in story.ends.compounds] role = [] means = [] ends = [] rme = [] for token in story.data: if token in story.role.text: if len(case) != 1: role.append(NLPUtility.case(token)) elif token not in story.role.functional_role.compound: role.append(NLPUtility.case(token)) if token in story.means.text: if len(case) != 1: means.append(NLPUtility.case(token)) elif token not in means_compounds: means.append(NLPUtility.case(token)) if story.has_ends: if token in story.ends.text: if len(case) != 1: ends.append(NLPUtility.case(token)) elif token not in ends_compounds: ends.append(NLPUtility.case(token)) if Utility.is_sublist(case, role): rme.append('Role') if Utility.is_sublist(case, means): rme.append('Means') if Utility.is_sublist(case, ends): rme.append('Ends') return rme
def get_compound_nouns(story, span): compounds = [] nouns = MinerUtility.get_nouns(story, span) for token in nouns: for child in token.children: if NLPUtility.is_compound(child): # Replace to take rightmost child if child.idx < token.idx: for compound in compounds: if child in compound or token in compound: compounds.remove(compound) compounds.append([child, token]) for c in compounds: c = MinerUtility.get_span(story, c) if compounds and type(compounds[0]) is list: compounds = compounds[0] return compounds
def get_functional_role(self, story): potential_without_with = [] with_i = -1 for token in story.role.text: if MinerUtility.lower(token.text) == 'with' or MinerUtility.lower(token.text) == 'w/': with_i = token.i if with_i > 0: potential_without_with = story.role.text[0:with_i] else: potential_without_with = story.role.text # If there is just one word if len(story.role.text) == 1: story.role.functional_role.main = story.role.text[0] else: compound = [] for token in potential_without_with: if NLPUtility.is_compound(token): compound.append([token, token.head]) if len(compound) == 1 and type(compound[0]) is list: compound = compound[0] # pick rightmost elif len(compound) > 1 and type(compound[-1]) is list: compound = compound[-1] story.role.functional_role.compound = compound # If it is a compound if story.role.functional_role.compound: story.role.functional_role.main = story.role.functional_role.compound[-1] # Get head of tree else: for token in story.role.text: if token is token.head: story.role.functional_role.main = token return story
def get_mobj_and_mv(self, story, part='means'): has_subj = False simple = False found_verb = False found_obj = False found_mv_phrase = False subject = [] main_verb = [] main_object = [] mv_phrase = [] # Simple case if the subj and dobj are linked by a verb for token in eval('story.' + str(part) + '.text'): if NLPUtility.is_subject(token): has_subj = True subject = token if NLPUtility.is_verb(token.head): found_verb = True main_verb = token.head break if type(subject) is list: subject = eval('story.' + str(part) + '.text')[0] for token in eval('story.' + str(part) + '.text'): if NLPUtility.is_dobj(token): found_obj = True if token.pos_ == "PRON": # If it is a pronoun, look for a preposition with a pobj f = False for child in token.head.children: if child.dep_ == "prep" and child.right_edge.dep_ == "pobj" and not f: token = child.right_edge mv_phrase = [main_verb, child] f = True found_mv_phrase = True elif token.pos_ == "ADJ" or token.pos_ == "ADV": # Set to right edge if there is an adj/adv as dobj, and possibly make a verb phrase original_token = token f = False for child in token.children: if child.dep_ == "prep" and not f: for grandchild in child.children: if grandchild.dep_ == "pobj": mv_phrase = [main_verb, token, child] token = grandchild f = True found_mv_phrase = True if token.head == main_verb: simple = True main_object = token break # If the root of the sentence is a verb if not simple: for token in eval('story.' + str(part) + '.text'): if token.dep_ == 'ROOT' and NLPUtility.is_verb(token): found_verb = True main_verb = token break # If no main verb could be found it is the second word (directly after 'I') # Possibly a NLP error... if not found_verb: main_verb = eval('story.' + str(part) + '.text')[1] # If the sentence contains no dobj it must be another obj if not found_obj: for token in eval('story.' + str(part) + '.text'): if token.dep_[1:] == 'obj': found_obj = True main_object = token break # If none is found it points to the unknown 'system part' # + get phrases for main_object and main_verb if not found_obj and part == 'means': main_object = story.system.main if part == 'means': story.means.main_verb.main = main_verb story.means.main_object.main = main_object if found_mv_phrase: story.means.main_verb.phrase = MinerUtility.get_span(story, mv_phrase, 'means.text') story.means.main_verb.type = "II" else: story.ends.subject.main = subject story.ends.main_verb.main = main_verb story.ends.main_object.main = main_object if found_mv_phrase: story.ends.main_verb.phrase = MinerUtility.get_span(story, mv_phrase, 'ends.text') story.ends.main_verb.type = "II" if main_object == story.system.main: story = eval('self.get_' + str(part) + '_phrases(story, ' + str(found_mv_phrase) + ', False)') else: story = eval('self.get_' + str(part) + '_phrases(story, ' + str(found_mv_phrase) + ')') return story
class Constructor: def __init__(self, nlp, user_stories, matrix): self.nlp = nlp self.user_stories = user_stories self.weights = matrix['sum'].reset_index().values.tolist() def make(self, ontname, threshold, link): weighted_tokens = WeightAttacher.make(self.user_stories, self.weights) self.onto = Ontology(ontname, self.user_stories) self.prolog = Ontology(ontname, self.user_stories) pf = PatternFactory(self.onto, self.prolog, weighted_tokens) self.onto = pf.make_patterns(self.user_stories, threshold) self.prolog = pf.prolog if link: self.link_to_story(self.onto.classes, self.user_stories) g = Generator(self.onto.classes, self.onto.relationships) g_prolog = Generator(self.prolog.classes, self.prolog.relationships, False) per_role_out = [] per_role_onto = self.get_per_role(self.user_stories, link) for p in per_role_onto: per_role_out.append([p[0].replace('/', '_'), p[1].prt(self.onto)]) return g.prt(self.onto), g_prolog.prt( self.prolog), self.onto, self.prolog, per_role_out def link_to_story(self, classes, stories): used_stories = [] for cl in classes: for story in cl.stories: if story >= 0: s = self.get_story(int(story), stories) part_name = self.get_parts(cl.name, s) # for part in part_name: # n = s.txtnr() + part # self.onto.get_class_by_name(-1, n, s.txtnr()) # self.onto.new_relationship(-1, cl.name, cl.name + 'OccursIn' + n, n) self.onto.new_relationship( -1, cl.name, cl.name + 'OccursIn' + s.txtnr(), s.txtnr()) for part in part_name: self.prolog.new_relationship(-1, cl.name, part, s.txtnr()) used_stories.append(s.txtnr()) for story in used_stories: self.onto.get_class_by_name(-1, story, 'UserStory') def get_per_role(self, stories, link): roles_link = [] roles = [] stories_per_role = [] per_role_ontos = [] # Get a list of roles and a list where the stories are linked to their roles for story in self.user_stories: roles_link.append([story.role.t, story.number]) if str.lower(story.role.t) not in [str.lower(s) for s in roles]: roles.append(story.role.t) # Get a list of stories per role and get the generator object for these stories for role in roles: stories_per_role = [] for link in roles_link: if str.lower(role) == str.lower(link[0]): stories_per_role.append(link[1]) per_role_ontos.append( [role, self.get_generator(role, stories_per_role, link)]) return per_role_ontos def get_generator(self, role, spr, link): role_classes = [] role_relationships = [] cl_names = [] # Get classes for cl in self.onto.classes: for story in cl.stories: if story >= 0 and story in spr and cl.name not in cl_names: role_classes.append(cl) cl_names.append(cl.name) if cl.parent != '': for cp in self.onto.classes: if cp.name == cl.parent: role_classes.append(cp) # Get the general classes if cl.stories[0] == -1: if cl.name == 'FunctionalRole' or cl.name == 'Person': role_classes.append(cl) story_classes = [] # Get all relationships belonging to these classes for rel in self.onto.relationships: for story in rel.stories: if rel.domain in cl_names and rel.range in cl_names and story in spr: role_relationships.append(rel) # If 'link' add these classes too if link: for story in spr: if rel.domain in cl_names and rel.range == 'US' + str( story): role_relationships.append(rel) story_classes.append(rel.range) # Retrieve all classes for the relationships created in link if link: for cl in self.onto.classes: for c in story_classes: if cl.name == c: role_classes.append(cl) if cl.name == 'UserStory': role_classes.append(cl) return Generator(role_classes, role_relationships) def get_story(self, nr, stories): for story in stories: if nr == story.number: return story return False def get_parts(self, class_name, story): case = class_name.split() means_compounds = [] means_compounds.append(story.means.main_object.compound) ends_compounds = story.ends.compounds if story.means.free_form: if len(story.means.compounds) > 0: if type(story.means.compounds[0]) is list: mc = [ item for item in sublist for sublist in story.means.compounds ] else: mc = story.means.compounds means_compounds.extend(mc) if len(ends_compounds) > 0: if type(ends_compounds[0]) is list: ends_compounds = [ item for item in sublist for sublist in story.ends.compounds ] role = [] means = [] ends = [] rme = [] for token in story.data: if token in story.role.text: if len(case) != 1: role.append(NLPUtility.case(token)) elif token not in story.role.functional_role.compound: role.append(NLPUtility.case(token)) if token in story.means.text: if len(case) != 1: means.append(NLPUtility.case(token)) elif token not in means_compounds: means.append(NLPUtility.case(token))
def __init__(self, token, weight): self.token = token self.case = NLPUtility.case(token) self.weight = weight
def get_span(story, li, part='data'): ret = [] idxlist = NLPUtility.get_idx(li) for i in idxlist: ret.append(eval('story.' + str(part))[i]) return ret
def get_factor_part(self, matrix, story, part): for token in eval('story.' + str(part) + '.text'): if NLPUtility.case(token) in matrix.index.values: matrix = self.add(matrix, NLPUtility.case(token), story.txtnr(), eval('self.score_' + str(part) + '(token, story)')) return matrix
def find_story(self, w_token, stories): nrs = [] for story in stories: if w_token.case in [NLPUtility.case(t) for t in story.data]: nrs.append(story.number) return nrs
def get_mobj_and_mv(self, story, part='means'): has_subj = False simple = False found_verb = False found_obj = False found_mv_phrase = False subject = [] main_verb = [] main_object = [] mv_phrase = [] # Simple case if the subj and dobj are linked by a verb for token in eval('story.' + str(part) + '.text'): if NLPUtility.is_subject(token): has_subj = True subject = token #BC if NLPUtility.is_verb(token.head): if NLPUtility.is_verb( token.head) and str.lower(token.head.text) != 'can': found_verb = True main_verb = token.head break if type(subject) is list: subject = eval('story.' + str(part) + '.text')[0] for token in eval('story.' + str(part) + '.text'): if NLPUtility.is_dobj(token): found_obj = True if token.pos_ == "PRON": # If it is a pronoun, look for a preposition with a pobj f = False for child in token.head.children: if child.dep_ == "prep" and child.right_edge.dep_ == "pobj" and not f: token = child.right_edge mv_phrase = [main_verb, child] f = True found_mv_phrase = True elif token.pos_ == "ADJ" or token.pos_ == "ADV": # Set to right edge if there is an adj/adv as dobj, and possibly make a verb phrase original_token = token f = False for child in token.children: if child.dep_ == "prep" and not f: for grandchild in child.children: if grandchild.dep_ == "pobj": mv_phrase = [main_verb, token, child] token = grandchild f = True found_mv_phrase = True if token.head == main_verb: simple = True main_object = token break # If the root of the sentence is a verb if not simple: for token in eval('story.' + str(part) + '.text'): if token.dep_ == 'ROOT' and NLPUtility.is_verb(token): found_verb = True main_verb = token break # If no main verb could be found it is the second word (directly after 'I') # Possibly a NLP error... if not found_verb: #BC main_verb = eval('story.' + str(part) + '.text')[1] if str(part) == 'means' or str.lower( eval('story.' + str(part) + '.text')[1].text) == 'can': main_verb = eval('story.' + str(part) + '.text')[2] else: main_verb = eval('story.' + str(part) + '.text')[1] # If the sentence contains no dobj it must be another obj if not found_obj: for token in eval('story.' + str(part) + '.text'): if token.dep_[1:] == 'obj': found_obj = True main_object = token break # If none is found it points to the unknown 'system part' # + get phrases for main_object and main_verb if not found_obj and part == 'means': main_object = story.system.main if part == 'means': story.means.main_verb.main = main_verb story.means.main_object.main = main_object if found_mv_phrase: story.means.main_verb.phrase = MinerUtility.get_span( story, mv_phrase, 'means.text') story.means.main_verb.type = "II" else: story.ends.subject.main = subject story.ends.main_verb.main = main_verb story.ends.main_object.main = main_object if found_mv_phrase: story.ends.main_verb.phrase = MinerUtility.get_span( story, mv_phrase, 'ends.text') story.ends.main_verb.type = "II" if type(main_object) is list or main_object == story.system.main: story = eval('self.get_' + str(part) + '_phrases(story, ' + str(found_mv_phrase) + ', False)') else: story = eval('self.get_' + str(part) + '_phrases(story, ' + str(found_mv_phrase) + ')') return story
for token in story.data: if token in story.role.text: if len(case) != 1: role.append(NLPUtility.case(token)) elif token not in story.role.functional_role.compound: role.append(NLPUtility.case(token)) if token in story.means.text: if len(case) != 1: means.append(NLPUtility.case(token)) elif token not in means_compounds: means.append(NLPUtility.case(token)) if story.has_ends: if token in story.ends.text: if len(case) != 1: ends.append(NLPUtility.case(token)) elif token not in ends_compounds: ends.append(NLPUtility.case(token)) if Utility.is_sublist(case, role): rme.append('Role') if Utility.is_sublist(case, means): rme.append('Means') if Utility.is_sublist(case, ends): rme.append('Ends') return rme