def _get_factor_part(self, matrix, story, part): for token in eval(f'story.{part}.text'): if get_case(token) in matrix.index.values: matrix.at[get_case(token), story.txtnr()] += eval( f'self.score_{part}(token, story)') return matrix
def get_role_means_ends(self, matrix, stories): """Link cases (c) in matrix to their respective user stories (s)""" for c in matrix.index.values: for s in stories: if s.role.indicator: if c in [get_case(t) for t in s.role.text]: matrix.at[c, (s.txtnr(), 'Role')] = 1 if s.means.indicator: if c in [get_case(t) for t in s.means.text]: matrix.at[s, (s.txtnr(), 'Means')] = 1 if s.ends.indicator: if c in [get_case(t) for t in s.ends.text]: matrix.at[c, (s.txtnr(), 'Ends')] = 1 return matrix
def count_occurence(self, cm, sl, stories): """Count how often a token (t) occurs in a story (s)""" for s in stories: for t in s.data: c = get_case(t) if c in cm.index.values: for word, us in sl: if word == c: us.append(s.number) if self.is_phrasal('role.functional_role', t, s) == 1: cm.at[c, 'Functional Role'] += 1 elif self.is_phrasal('role.functional_role', t, s) == 2: cm.at[c, 'Functional Role Compound'] += 1 if self.is_phrasal('means.main_object', t, s) == 1: cm.at[c, 'Main Object'] += 1 elif self.is_phrasal('means.main_object', t, s) == 2: cm.at[c, 'Main Object Compound'] += 1 if self.is_freeform('means', t, s) == 1: cm.at[c, 'Means Free Form Noun'] += 1 if s.ends.free_form: if self.is_phrasal('ends.main_object', t, s) > 0 or self.is_freeform( 'ends', t, s) == 1: cm.at[c, 'Ends Free Form Noun'] += 1 return cm, sl
def make(stories, weights): weighted_tokens = [] indices = [weight[0] for weight in weights] w = 0.0 c = "" for story in stories: if story.has_ends: parts = ['role', 'means', 'ends'] else: parts = ['role', 'means'] for part in parts: for token in eval('story.' + str(part) + '.text'): c = get_case(token) if c in indices: for weight in weights: if weight[0] == c: w = weight[1] break else: w = 0.0 weighted_tokens.append(WeightedToken(token, w)) return weighted_tokens
def get_lowest_threshold(self, relationship): wt = self.get_weighted_tokens(relationship) lt = 1000.0 if wt: lt = wt[0].weight for w in wt: if str.lower(get_case(w)) != self.sysname and w.weight < lt: # Exclude system name object from filter lt = w.weight return lt
def make_patterns(self, user_stories, threshold): pi = PatternIdentifier(self.weighted_tokens) self.sysname = str.lower(get_case(user_stories[0].system.main)) for story in user_stories: pi.identify(story) relationships = self.apply_threshold(pi.relationships, threshold) self.create(relationships, user_stories, threshold, pi.roles) return self.onto
def _remove_indicators(self, matrix, stories, nlp): indicators = [] for story in stories: ind = story.role.indicator + " " + story.means.indicator if story.has_ends: ind += " " + story.ends.indicator [indicators.append(get_case(t)) for t in nlp(ind)] [indicators.append(i) for i in story.indicators] return self._remove_from(matrix, indicators)
def create(self, relationships, stories, threshold, roles): used = [] for r in relationships: pre = get_case(r[1]) post = get_case(r[3]) if r[2] == Pattern.parent: self.onto.get_class_by_name(r[0], pre, post) self.prolog.new_relationship(r[0], pre, 'isa', post) if r[2] != Pattern.parent: rel = get_case(r[4]) if r[2] == Pattern.subj_dobj or r[2] == Pattern.compound_has: self.onto.get_class_by_name(r[0], pre) self.onto.get_class_by_name(r[0], post) self.prolog.new_relationship(r[0], pre, rel, post) if r[2] == Pattern.subj_dobj: self.make_can_relationship(r[0], pre, rel, post) else: self.make_has_relationship(r[0], pre, rel, post) self.prolog.get_class_by_name(r[0], pre) self.prolog.get_class_by_name(r[0], post) used.append(pre) used.append(post) for wo in self.weighted_tokens: if wo.weight >= threshold: in_stories = self.find_story(wo, stories) for in_story in in_stories: self.onto.get_class_by_name(in_story, wo.case) for r in roles: self.onto.get_class_by_name(r[0], get_case(r[1]), '', True)
def _remove_verbs(self, matrix, stories): verbs = [] cases = matrix.index.values.tolist() for case in cases: pos = [] for story in stories: for token in story.data: if get_case(token) == case: pos.append(token) if len(set(pos)) == 1 and is_verb(pos[0]): verbs.append(case) return self._remove_from(matrix, verbs)
def generate(self, stories, all_words, nlp): all_words = ' '.join(all_words.split()) words = [get_case(t) for t in nlp(all_words)] ids = [us.txtnr() for us in stories] # Add weighted scores to the words in the term-by-US matrix w_us = pd.DataFrame(0.0, index=words, columns=ids) w_us = w_us.iloc[np.unique(w_us.index, return_index=True)[1]] w_us = self.get_factor(w_us, stories) w_us['sum'] = w_us.sum(axis=1) # w_us = self._remove_stop_words(w_us, doc_array) w_us = self._remove_indicators(w_us, stories, nlp) w_us = self._remove_verbs(w_us, stories) # Link to US part us_ids, rme = self._get_rme(stories) rme_cols = pd.MultiIndex.from_arrays([us_ids, rme], names=['user_story', 'part']) rme_us = pd.DataFrame(0, index=words, columns=rme_cols) rme_us = rme_us.iloc[np.unique(rme_us.index, return_index=True)[1]] rme_us = self.get_role_means_ends(rme_us, stories) # ... colnames = [ 'Functional Role', 'Functional Role Compound', 'Main Object', 'Main Object Compound', 'Means Free Form Noun', 'Ends Free Form Noun' ] stories_list = [[l, []] for l in list(w_us.index.values)] count_matrix = pd.DataFrame(0, index=w_us.index, columns=colnames) count_matrix, stories_list = self.count_occurence( count_matrix, stories_list, stories) return w_us, count_matrix, stories_list, rme_us
def _print_rel(rel): """Print noun phrases to terminal""" print(get_case(rel[1]), "--", rel[2], "->", get_case(rel[3]))
def find_story(self, w_token, stories): nrs = [] for story in stories: if w_token.case in [get_case(t) for t in story.data]: nrs.append(story.number) return nrs
class Constructor: def __init__(self, nlp, user_stories, matrix): self.nlp = nlp self.user_stories = user_stories self.weights = matrix['sum'].reset_index().values.tolist() def make(self, ontname, threshold, link): weighted_tokens = WeightAttacher.make(self.user_stories, self.weights) self.onto = Ontology(ontname, self.user_stories) self.prolog = Ontology(ontname, self.user_stories) pf = PatternFactory(self.onto, self.prolog, weighted_tokens) self.onto = pf.make_patterns(self.user_stories, threshold) self.prolog = pf.prolog if link: self.link_to_story(self.onto.classes, self.user_stories) per_role_out = [] per_role_onto = self.get_per_role(self.user_stories, link) for p in per_role_onto: per_role_out.append([p[0].replace('/', '_'), str(p[1])]) return (OntologyGenerator(self.onto), PrologGenerator(self.prolog), per_role_out) def link_to_story(self, classes, stories): used_stories = [] for cl in classes: for story in cl.stories: if story >= 0: s = self.get_story(int(story), stories) parts = self.get_parts(cl.name, s) #for part in part_name: # n = s.txtnr() + part # self.onto.get_class_by_name(-1, n, s.txtnr()) # self.onto.new_relationship(-1, cl.name, cl.name + 'OccursIn' + n, n) self.onto.new_relationship( -1, cl.name, cl.name + 'OccursIn' + s.txtnr(), s.txtnr()) for part in parts: self.prolog.new_relationship(-1, cl.name, part, s.txtnr()) used_stories.append(s.txtnr()) for story in used_stories: self.onto.get_class_by_name(-1, story, 'UserStory') def get_per_role(self, stories, link): roles_link = [] roles = [] stories_per_role = [] per_role_ontos = [] # Get a list of roles and a list where the stories are linked to their roles for story in self.user_stories: roles_link.append([story.role.t, story.number]) if str.lower(story.role.t) not in [str.lower(s) for s in roles]: roles.append(story.role.t) # Get a list of stories per role and get the generator object for these stories for role in roles: stories_per_role = [] for link in roles_link: if str.lower(role) == str.lower(link[0]): stories_per_role.append(link[1]) per_role_ontos.append( [role, self.get_generator(role, stories_per_role, link)]) return per_role_ontos def get_generator(self, role, spr, link): role_classes = [] role_relationships = [] cl_names = [] # Get classes for cl in self.onto.classes: for story in cl.stories: if story >= 0 and story in spr and cl.name not in cl_names: role_classes.append(cl) cl_names.append(cl.name) if cl.parent != '': for cp in self.onto.classes: if cp.name == cl.parent: role_classes.append(cp) # Get the general classes if cl.stories[0] == -1: if cl.name == 'FunctionalRole' or cl.name == 'Person': role_classes.append(cl) story_classes = [] # Get all relationships belonging to these classes for rel in self.onto.relationships: for story in rel.stories: if rel.domain in cl_names and rel.range in cl_names and story in spr: role_relationships.append(rel) # If 'link' add these classes too if link: for story in spr: if rel.domain in cl_names and rel.range == 'US' + str( story): role_relationships.append(rel) story_classes.append(rel.range) # Retrieve all classes for the relationships created in link if link: for cl in self.onto.classes: for c in story_classes: if cl.name == c: role_classes.append(cl) if cl.name == 'UserStory': role_classes.append(cl) onto = copy.copy(self.onto) onto.classes, onto.relationships = role_classes, role_relationships return OntologyGenerator(onto) def get_story(self, nr, stories): for story in stories: if nr == story.number: return story return False def get_parts(self, class_name, story): case = class_name.split() means_compounds = [] means_compounds.append(story.means.main_object.compound) ends_compounds = flatten(story.ends.compounds) if story.means.free_form: if len(story.means.compounds) > 0: means_compounds.extend(flatten(story.means.compounds)) role = [] means = [] ends = [] rme = [] for token in story.data: if token in story.role.text: if len(case) != 1: role.append(get_case(token)) elif token not in story.role.functional_role.compound: role.append(get_case(token)) if token in story.means.text: if len(case) != 1: means.append(get_case(token)) elif token not in means_compounds: means.append(get_case(token))
for token in story.data: if token in story.role.text: if len(case) != 1: role.append(get_case(token)) elif token not in story.role.functional_role.compound: role.append(get_case(token)) if token in story.means.text: if len(case) != 1: means.append(get_case(token)) elif token not in means_compounds: means.append(get_case(token)) if story.has_ends: if token in story.ends.text: if len(case) != 1: ends.append(get_case(token)) elif token not in ends_compounds: ends.append(get_case(token)) if is_sublist(case, role): rme.append('Role') if is_sublist(case, means): rme.append('Means') if is_sublist(case, ends): rme.append('Ends') return rme