예제 #1
0
	def get_ends_phrases(self, story, found_mv_phrase, assume=True):
		if assume:
			for np in story.ends.text.noun_chunks:
				if story.ends.main_object.main in np:
					story.ends.main_object.phrase = np
			if story.ends.main_object.phrase:
				m = story.ends.main_object.main
				if m.i > 0 and NLPUtility.is_compound(m.nbor(-1)) and m.nbor(-1).head == m:
					story.ends.main_object.compound = [m.nbor(-1), m]
				else:
					for token in story.ends.main_object.phrase:
						if NLPUtility.is_compound(token) and token.head == story.ends.main_object.main:
							story.ends.main_object.compound = [token, story.ends.main_object.main]

		ends_subj = story.ends.subject.main

		if str.lower(story.ends.subject.main.text) != '' and str.lower(story.ends.subject.main.text) != 'i':
			for np in story.ends.text.noun_chunks:
				if story.ends.subject.main in np:
					story.ends.subject.phrase = np
		
			if story.ends.subject.phrase:
				for token in story.ends.subject.phrase:
					if NLPUtility.is_compound(token) and token.head == story.ends.subject.main:
						story.ends.subject.compound = [token, story.ends.subject.main]

		if not found_mv_phrase:
			pv = MinerUtility.get_phrasal_verb(story, story.ends.main_verb.main, 'ends.text')
			story.ends.main_verb.phrase = MinerUtility.get_span(story, pv[0], 'ends.text')
			story.ends.main_verb.type = pv[1]

		return story	
예제 #2
0
	def make(stories, weights):
		weighted_tokens = []
		indices = [weight[0] for weight in weights]
		w = 0.0
		c = ""

		for story in stories:
			if story.has_ends:
				parts = ['role', 'means', 'ends']
			else:
				parts = ['role', 'means']

			for part in parts:
				for token in eval('story.' + str(part) + '.text'):
					c = NLPUtility.case(token)
					if c in indices:
						for weight in weights:
							if weight[0] == c:
								w = weight[1]
								break
					else:
						w = 0.0
					weighted_tokens.append(WeightedToken(token, w))

		return weighted_tokens
예제 #3
0
	def count_occurence(self, cm, sl, stories):
		for story in stories:
			for token in story.data:
				c = NLPUtility.case(token)
				if c in cm.index.values:
					for s in sl:
						if s[0] == c:
							s[1].append(story.number)					

					if self.is_phrasal('role.functional_role', token, story) == 1:
						cm = self.add(cm, c, 'Functional Role')
					elif self.is_phrasal('role.functional_role', token, story) == 2:
						cm = self.add(cm, c, 'Functional Role Compound')

					if self.is_phrasal('means.main_object', token, story) == 1:
						cm = self.add(cm, c, 'Main Object')
					elif self.is_phrasal('means.main_object', token, story) == 2:
						cm = self.add(cm, c, 'Main Object Compound')

					if self.is_freeform('means', token, story) == 1:
						cm = self.add(cm, c, 'Means Free Form Noun')
					
					if story.ends.free_form:
						if self.is_phrasal('ends.main_object', token, story) > 0 or self.is_freeform('ends', token, story) == 1:
							cm = self.add(cm, c, 'Ends Free Form Noun')
					
		return cm, sl
예제 #4
0
	def get_role_means_ends(self, matrix, stories):
		cases = matrix.index.values

		for case in cases:
			for story in stories:
				if story.role.indicator:
					if case in [NLPUtility.case(token) for token in story.role.text]:
						matrix.set_value(case, (story.txtnr(), 'Role'), 1)
				if story.means.indicator:
					if case in [NLPUtility.case(token) for token in story.means.text]:
						matrix.set_value(case, (story.txtnr(), 'Means'), 1)
				if story.ends.indicator:
					if case in [NLPUtility.case(token) for token in story.ends.text]:
						matrix.set_value(case, (story.txtnr(), 'Ends'), 1)
								
		return matrix
예제 #5
0
	def get_namedict(self, tokens):
		namedict = {}

		for token in tokens:
			namedict[token.lemma] = NLPUtility.case(token)

		return namedict
예제 #6
0
	def get_verbs(story, span):
		verbs = []

		for token in span:
			if NLPUtility.is_verb(token):
				verbs.append(token)

		return MinerUtility.get_span(story, verbs)
예제 #7
0
	def get_nouns(story, span):
		nouns = []

		for token in span:
			if NLPUtility.is_noun(token):
				nouns.append(token)

		return nouns
예제 #8
0
	def get_lowest_threshold(self, relationship):
		wt = self.get_weighted_tokens(relationship)
		lt = 1000.0

		if wt:		
			lt = wt[0].weight
			for w in wt:
				if str.lower(NLPUtility.get_case(w)) != self.sysname and w.weight < lt: # Exclude system name object from filter
					lt = w.weight

		return lt
예제 #9
0
	def remove_verbs(self, matrix, stories):
		verbs = []
		cases = matrix.index.values.tolist()		

		for case in cases:
			pos = []

			for story in stories:
				for token in story.data:
					if NLPUtility.case(token) == case:
						pos.append(token)

			if len(set(pos)) == 1 and NLPUtility.is_verb(pos[0]):
				verbs.append(case)

		for verb in verbs:
			if matrix.loc[verb, 'sum'] > 0:
				verbs.remove(verb)

		return matrix[(-matrix.index.isin(verbs))]
예제 #10
0
	def make_patterns(self, user_stories, threshold):
		pi = PatternIdentifier(self.weighted_tokens)
		self.sysname = str.lower(NLPUtility.case(user_stories[0].system.main))
		
		for story in user_stories:
			pi.identify(story)

		relationships = self.apply_threshold(pi.relationships, threshold)	

		self.create(relationships, user_stories, threshold, pi.roles)

		return self.onto
예제 #11
0
	def create(self, relationships, stories, threshold, roles):
		used = []

		for r in relationships:
			pre = NLPUtility.get_case(r[1])
			post = NLPUtility.get_case(r[3])

			if r[2] == Pattern.parent:
				self.onto.get_class_by_name(r[0], pre, post)
				self.prolog.new_relationship(r[0], pre, 'isa', post)

			if r[2] != Pattern.parent:
				rel = NLPUtility.get_case(r[4])

			if r[2] == Pattern.subj_dobj or r[2] == Pattern.compound_has:
				self.onto.get_class_by_name(r[0], pre)
				self.onto.get_class_by_name(r[0], post)
				self.prolog.new_relationship(r[0], pre, rel, post)

				if r[2] == Pattern.subj_dobj:
					self.make_can_relationship(r[0], pre, rel, post)
				else:
					self.make_has_relationship(r[0], pre, rel, post)

			self.prolog.get_class_by_name(r[0], pre)
			self.prolog.get_class_by_name(r[0], post)

			used.append(pre)
			used.append(post)

		for wo in self.weighted_tokens:
			if wo.weight >= threshold:
				in_stories = self.find_story(wo, stories)
				for in_story in in_stories:
					self.onto.get_class_by_name(in_story, wo.case)

		for r in roles:
			self.onto.get_class_by_name(r[0], NLPUtility.get_case(r[1]), '', True)
예제 #12
0
	def remove_indicators(self, matrix, stories, nlp):
		indicators = []

		for story in stories:
			ind = story.role.indicator + " " + story.means.indicator
			if story.has_ends:
				ind += " " + story.ends.indicator

			[indicators.append(NLPUtility.case(t)) for t in nlp(ind)]

			[indicators.append(i) for i in story.indicators]

		for indicator in indicators:
			if matrix.loc[indicator, 'sum'] > 0:
				indicators.remove(indicator)

		return matrix[(-matrix.index.isin(indicators))]
예제 #13
0
	def get_parts(self, class_name, story):
		case = class_name.split()

		means_compounds = []
		means_compounds.append(story.means.main_object.compound)
		ends_compounds = story.ends.compounds

		if story.means.free_form:
			if len(story.means.compounds) > 0:
				if type(story.means.compounds[0]) is list:
					mc = [item for item in sublist for sublist in story.means.compounds]
				else:
					mc = story.means.compounds
				means_compounds.extend(mc)
			
		if len(ends_compounds) > 0:
			if type(ends_compounds[0]) is list:
				ends_compounds = [item for item in sublist for sublist in story.ends.compounds]

		role = []
		means = []
		ends = []
		rme = []

		for token in story.data:
			if token in story.role.text:
				if len(case) != 1:
					role.append(NLPUtility.case(token))
				elif token not in story.role.functional_role.compound:
					role.append(NLPUtility.case(token))
			if token in story.means.text:
				if len(case) != 1:
					means.append(NLPUtility.case(token))
				elif token not in means_compounds:
					means.append(NLPUtility.case(token))
			if story.has_ends:
				if token in story.ends.text:
					if len(case) != 1:
						ends.append(NLPUtility.case(token))
					elif token not in ends_compounds:
						ends.append(NLPUtility.case(token))

		if Utility.is_sublist(case, role):
			rme.append('Role')

		if Utility.is_sublist(case, means):
			rme.append('Means')

		if Utility.is_sublist(case, ends):
			rme.append('Ends')

		return rme
예제 #14
0
	def get_compound_nouns(story, span):
		compounds = []
		nouns = MinerUtility.get_nouns(story, span)

		for token in nouns:
			for child in token.children:
				if NLPUtility.is_compound(child):
					# Replace to take rightmost child
					if child.idx < token.idx:
						for compound in compounds:
							if child in compound or token in compound:
								compounds.remove(compound)
					compounds.append([child, token])
		
		for c in compounds:
			c = MinerUtility.get_span(story, c)

		if compounds and type(compounds[0]) is list:
			compounds = compounds[0]

		return compounds
예제 #15
0
	def get_functional_role(self, story):
		potential_without_with = []

		with_i = -1
		for token in story.role.text:
			if MinerUtility.lower(token.text) == 'with' or MinerUtility.lower(token.text) == 'w/':
				with_i = token.i
		if with_i > 0:
			potential_without_with = story.role.text[0:with_i]
		else:
			potential_without_with = story.role.text
		
		# If there is just one word
		if len(story.role.text) == 1:
			story.role.functional_role.main = story.role.text[0]
		else:		
			compound = []
			for token in potential_without_with:
				if NLPUtility.is_compound(token):
					compound.append([token, token.head])

			if len(compound) == 1 and type(compound[0]) is list:
				compound = compound[0]
			# pick rightmost
			elif len(compound) > 1 and type(compound[-1]) is list:
				compound = compound[-1]

			story.role.functional_role.compound = compound

			# If it is a compound
			if story.role.functional_role.compound:
				story.role.functional_role.main = story.role.functional_role.compound[-1]

			# Get head of tree
			else:
				for token in story.role.text:
					if token is token.head:
						story.role.functional_role.main = token

		return story
예제 #16
0
	def get_mobj_and_mv(self, story, part='means'):
		has_subj = False
		simple = False
		found_verb = False
		found_obj = False
		found_mv_phrase = False
		subject = []
		main_verb = []
		main_object = []
		mv_phrase = []

		# Simple case if the subj and dobj are linked by a verb
		for token in eval('story.' + str(part) + '.text'):
			if NLPUtility.is_subject(token):
				has_subj = True
				subject = token
				if NLPUtility.is_verb(token.head):
					found_verb = True
					main_verb = token.head
					break

		if type(subject) is list:
			subject = eval('story.' + str(part) + '.text')[0]

		for token in eval('story.' + str(part) + '.text'):
			if NLPUtility.is_dobj(token):
				found_obj = True

				if token.pos_ == "PRON": # If it is a pronoun, look for a preposition with a pobj
					f = False
					for child in token.head.children:
						if child.dep_ == "prep" and child.right_edge.dep_ == "pobj" and not f:
							token = child.right_edge
							mv_phrase = [main_verb, child]
							f = True
							found_mv_phrase = True
				elif token.pos_ == "ADJ" or token.pos_ == "ADV": # Set to right edge if there is an adj/adv as dobj, and possibly make a verb phrase
					original_token = token
					f = False
					for child in token.children:
						if child.dep_ == "prep" and not f:
							for grandchild in child.children:
								if grandchild.dep_ == "pobj":
									mv_phrase = [main_verb, token, child]
									token = grandchild
									f = True
									found_mv_phrase = True
				if token.head == main_verb:
					simple = True

				main_object = token

				break
	
		# If the root of the sentence is a verb
		if not simple:
			for token in eval('story.' + str(part) + '.text'):
				if token.dep_ == 'ROOT' and NLPUtility.is_verb(token):
					found_verb = True
					main_verb = token
					break
		
		# If no main verb could be found it is the second word (directly after 'I')
		# Possibly a NLP error...
		if not found_verb:
			main_verb = eval('story.' + str(part) + '.text')[1]


		# If the sentence contains no dobj it must be another obj
		if not found_obj:
			for token in eval('story.' + str(part) + '.text'):
				if token.dep_[1:] == 'obj':
					found_obj = True
					main_object = token
					break

		# If none is found it points to the unknown 'system part'
		# + get phrases for main_object and main_verb
		if not found_obj and part == 'means':
			main_object = story.system.main

		if part == 'means':
			story.means.main_verb.main = main_verb
			story.means.main_object.main = main_object
			if found_mv_phrase:
				story.means.main_verb.phrase = MinerUtility.get_span(story, mv_phrase, 'means.text')
				story.means.main_verb.type = "II"				
		else:
			story.ends.subject.main = subject
			story.ends.main_verb.main = main_verb
			story.ends.main_object.main = main_object
			if found_mv_phrase:
				story.ends.main_verb.phrase = MinerUtility.get_span(story, mv_phrase, 'ends.text')
				story.ends.main_verb.type = "II"

		if main_object == story.system.main:
			story = eval('self.get_' + str(part) + '_phrases(story, ' + str(found_mv_phrase) + ', False)')
		else:
			story = eval('self.get_' + str(part) + '_phrases(story, ' + str(found_mv_phrase) + ')')

		return story
class Constructor:
    def __init__(self, nlp, user_stories, matrix):
        self.nlp = nlp
        self.user_stories = user_stories
        self.weights = matrix['sum'].reset_index().values.tolist()

    def make(self, ontname, threshold, link):
        weighted_tokens = WeightAttacher.make(self.user_stories, self.weights)

        self.onto = Ontology(ontname, self.user_stories)
        self.prolog = Ontology(ontname, self.user_stories)

        pf = PatternFactory(self.onto, self.prolog, weighted_tokens)
        self.onto = pf.make_patterns(self.user_stories, threshold)
        self.prolog = pf.prolog

        if link:
            self.link_to_story(self.onto.classes, self.user_stories)

        g = Generator(self.onto.classes, self.onto.relationships)
        g_prolog = Generator(self.prolog.classes, self.prolog.relationships,
                             False)

        per_role_out = []
        per_role_onto = self.get_per_role(self.user_stories, link)

        for p in per_role_onto:
            per_role_out.append([p[0].replace('/', '_'), p[1].prt(self.onto)])

        return g.prt(self.onto), g_prolog.prt(
            self.prolog), self.onto, self.prolog, per_role_out

    def link_to_story(self, classes, stories):
        used_stories = []

        for cl in classes:
            for story in cl.stories:
                if story >= 0:
                    s = self.get_story(int(story), stories)
                    part_name = self.get_parts(cl.name, s)

                    # for part in part_name:
                    #	n = s.txtnr() + part
                    #	self.onto.get_class_by_name(-1, n, s.txtnr())
                    #	self.onto.new_relationship(-1, cl.name, cl.name + 'OccursIn' + n, n)
                    self.onto.new_relationship(
                        -1, cl.name, cl.name + 'OccursIn' + s.txtnr(),
                        s.txtnr())

                    for part in part_name:
                        self.prolog.new_relationship(-1, cl.name, part,
                                                     s.txtnr())

                    used_stories.append(s.txtnr())

        for story in used_stories:
            self.onto.get_class_by_name(-1, story, 'UserStory')

    def get_per_role(self, stories, link):
        roles_link = []
        roles = []
        stories_per_role = []
        per_role_ontos = []

        # Get a list of roles and a list where the stories are linked to their roles
        for story in self.user_stories:
            roles_link.append([story.role.t, story.number])
            if str.lower(story.role.t) not in [str.lower(s) for s in roles]:
                roles.append(story.role.t)

        # Get a list of stories per role and get the generator object for these stories
        for role in roles:
            stories_per_role = []
            for link in roles_link:
                if str.lower(role) == str.lower(link[0]):
                    stories_per_role.append(link[1])

            per_role_ontos.append(
                [role, self.get_generator(role, stories_per_role, link)])

        return per_role_ontos

    def get_generator(self, role, spr, link):
        role_classes = []
        role_relationships = []
        cl_names = []

        # Get classes
        for cl in self.onto.classes:
            for story in cl.stories:
                if story >= 0 and story in spr and cl.name not in cl_names:
                    role_classes.append(cl)
                    cl_names.append(cl.name)
                    if cl.parent != '':
                        for cp in self.onto.classes:
                            if cp.name == cl.parent:
                                role_classes.append(cp)

            # Get the general classes
            if cl.stories[0] == -1:
                if cl.name == 'FunctionalRole' or cl.name == 'Person':
                    role_classes.append(cl)

        story_classes = []

        # Get all relationships belonging to these classes
        for rel in self.onto.relationships:
            for story in rel.stories:
                if rel.domain in cl_names and rel.range in cl_names and story in spr:
                    role_relationships.append(rel)

            # If 'link' add these classes too
            if link:
                for story in spr:
                    if rel.domain in cl_names and rel.range == 'US' + str(
                            story):
                        role_relationships.append(rel)
                        story_classes.append(rel.range)

        # Retrieve all classes for the relationships created in link
        if link:
            for cl in self.onto.classes:
                for c in story_classes:
                    if cl.name == c:
                        role_classes.append(cl)
                if cl.name == 'UserStory':
                    role_classes.append(cl)

        return Generator(role_classes, role_relationships)

    def get_story(self, nr, stories):
        for story in stories:
            if nr == story.number:
                return story
        return False

    def get_parts(self, class_name, story):
        case = class_name.split()

        means_compounds = []
        means_compounds.append(story.means.main_object.compound)
        ends_compounds = story.ends.compounds

        if story.means.free_form:
            if len(story.means.compounds) > 0:
                if type(story.means.compounds[0]) is list:
                    mc = [
                        item for item in sublist
                        for sublist in story.means.compounds
                    ]
                else:
                    mc = story.means.compounds
                means_compounds.extend(mc)

        if len(ends_compounds) > 0:
            if type(ends_compounds[0]) is list:
                ends_compounds = [
                    item for item in sublist
                    for sublist in story.ends.compounds
                ]

        role = []
        means = []
        ends = []
        rme = []

        for token in story.data:
            if token in story.role.text:
                if len(case) != 1:
                    role.append(NLPUtility.case(token))
                elif token not in story.role.functional_role.compound:
                    role.append(NLPUtility.case(token))
            if token in story.means.text:
                if len(case) != 1:
                    means.append(NLPUtility.case(token))
                elif token not in means_compounds:
                    means.append(NLPUtility.case(token))
예제 #18
0
	def __init__(self, token, weight):
		self.token = token
		self.case = NLPUtility.case(token)
		self.weight = weight
예제 #19
0
	def get_span(story, li, part='data'):
		ret = []
		idxlist = NLPUtility.get_idx(li)
		for i in idxlist:
			ret.append(eval('story.' + str(part))[i])
		return ret
 def __init__(self, token, weight):
     self.token = token
     self.case = NLPUtility.case(token)
     self.weight = weight
예제 #21
0
	def get_factor_part(self, matrix, story, part):
		for token in eval('story.' + str(part) + '.text'):
			if NLPUtility.case(token) in matrix.index.values:
				matrix = self.add(matrix, NLPUtility.case(token), story.txtnr(), eval('self.score_' + str(part) + '(token, story)'))

		return matrix
예제 #22
0
	def find_story(self, w_token, stories):
		nrs = []
		for story in stories:
			if w_token.case in [NLPUtility.case(t) for t in story.data]:
				nrs.append(story.number)
		return nrs
예제 #23
0
    def get_mobj_and_mv(self, story, part='means'):
        has_subj = False
        simple = False
        found_verb = False
        found_obj = False
        found_mv_phrase = False
        subject = []
        main_verb = []
        main_object = []
        mv_phrase = []

        # Simple case if the subj and dobj are linked by a verb
        for token in eval('story.' + str(part) + '.text'):
            if NLPUtility.is_subject(token):
                has_subj = True
                subject = token
                #BC if NLPUtility.is_verb(token.head):
                if NLPUtility.is_verb(
                        token.head) and str.lower(token.head.text) != 'can':
                    found_verb = True
                    main_verb = token.head
                    break

        if type(subject) is list:
            subject = eval('story.' + str(part) + '.text')[0]

        for token in eval('story.' + str(part) + '.text'):
            if NLPUtility.is_dobj(token):
                found_obj = True

                if token.pos_ == "PRON":  # If it is a pronoun, look for a preposition with a pobj
                    f = False
                    for child in token.head.children:
                        if child.dep_ == "prep" and child.right_edge.dep_ == "pobj" and not f:
                            token = child.right_edge
                            mv_phrase = [main_verb, child]
                            f = True
                            found_mv_phrase = True
                elif token.pos_ == "ADJ" or token.pos_ == "ADV":  # Set to right edge if there is an adj/adv as dobj, and possibly make a verb phrase
                    original_token = token
                    f = False
                    for child in token.children:
                        if child.dep_ == "prep" and not f:
                            for grandchild in child.children:
                                if grandchild.dep_ == "pobj":
                                    mv_phrase = [main_verb, token, child]
                                    token = grandchild
                                    f = True
                                    found_mv_phrase = True
                if token.head == main_verb:
                    simple = True

                main_object = token

                break

        # If the root of the sentence is a verb
        if not simple:
            for token in eval('story.' + str(part) + '.text'):
                if token.dep_ == 'ROOT' and NLPUtility.is_verb(token):
                    found_verb = True
                    main_verb = token
                    break

        # If no main verb could be found it is the second word (directly after 'I')
        # Possibly a NLP error...
        if not found_verb:
            #BC 	main_verb = eval('story.' + str(part) + '.text')[1]
            if str(part) == 'means' or str.lower(
                    eval('story.' + str(part) + '.text')[1].text) == 'can':
                main_verb = eval('story.' + str(part) + '.text')[2]
            else:
                main_verb = eval('story.' + str(part) + '.text')[1]

        # If the sentence contains no dobj it must be another obj
        if not found_obj:
            for token in eval('story.' + str(part) + '.text'):
                if token.dep_[1:] == 'obj':
                    found_obj = True
                    main_object = token
                    break

        # If none is found it points to the unknown 'system part'
        # + get phrases for main_object and main_verb
        if not found_obj and part == 'means':
            main_object = story.system.main

        if part == 'means':
            story.means.main_verb.main = main_verb
            story.means.main_object.main = main_object
            if found_mv_phrase:
                story.means.main_verb.phrase = MinerUtility.get_span(
                    story, mv_phrase, 'means.text')
                story.means.main_verb.type = "II"
        else:
            story.ends.subject.main = subject
            story.ends.main_verb.main = main_verb
            story.ends.main_object.main = main_object
            if found_mv_phrase:
                story.ends.main_verb.phrase = MinerUtility.get_span(
                    story, mv_phrase, 'ends.text')
                story.ends.main_verb.type = "II"

        if type(main_object) is list or main_object == story.system.main:
            story = eval('self.get_' + str(part) + '_phrases(story, ' +
                         str(found_mv_phrase) + ', False)')
        else:
            story = eval('self.get_' + str(part) + '_phrases(story, ' +
                         str(found_mv_phrase) + ')')

        return story
        for token in story.data:
            if token in story.role.text:
                if len(case) != 1:
                    role.append(NLPUtility.case(token))
                elif token not in story.role.functional_role.compound:
                    role.append(NLPUtility.case(token))
            if token in story.means.text:
                if len(case) != 1:
                    means.append(NLPUtility.case(token))
                elif token not in means_compounds:
                    means.append(NLPUtility.case(token))
            if story.has_ends:
                if token in story.ends.text:
                    if len(case) != 1:
                        ends.append(NLPUtility.case(token))
                    elif token not in ends_compounds:
                        ends.append(NLPUtility.case(token))

        if Utility.is_sublist(case, role):
            rme.append('Role')

        if Utility.is_sublist(case, means):
            rme.append('Means')

        if Utility.is_sublist(case, ends):
            rme.append('Ends')

        return rme