Ejemplo n.º 1
0
    def _insert_new_terms_to_tables(self):
        """
        Insert new terms to tables
        clear candidates lists from previous iteration

        """
        self.opinion_candidate_list_prev_iter = {}
        self.opinion_candidate_list_raw = _merge_tables(
            self.opinion_candidate_list_raw, self.opinion_candidate_list_curr_iter
        )
        for cand_term_list in self.opinion_candidate_list_curr_iter.values():
            if len(cand_term_list) >= self.min_freq_opinion_candidate:
                new_opinion_term = _set_opinion_term_polarity(cand_term_list)
                self.opinion_candidate_list_prev_iter[str(new_opinion_term)] = new_opinion_term
        self.opinion_candidate_list_curr_iter = {}
        self.opinion_candidate_list = {
            **self.opinion_candidate_list,
            **self.opinion_candidate_list_prev_iter,
        }
        self.aspects_candidate_list_prev_iter = list()
        self.aspect_candidate_list_raw = _merge_tables(
            self.aspect_candidate_list_raw, self.aspect_candidate_list_curr_iter
        )
        for extracted_aspect_list in self.aspect_candidate_list_curr_iter.values():
            if len(extracted_aspect_list) >= self.min_freq_aspect_candidate:
                first = extracted_aspect_list[0]
                new_aspect_entry = AspectTerm(first.term, first.pos, first.lemma)
                if new_aspect_entry not in self.aspects_candidate_list_prev_iter:
                    self.aspects_candidate_list_prev_iter.append(new_aspect_entry)
        self.aspect_candidate_list_curr_iter = {}
        self.aspect_candidate_list = (
            self.aspect_candidate_list + self.aspects_candidate_list_prev_iter
        )
Ejemplo n.º 2
0
    def extract_terms_from_doc(self, parsed_doc):
        """Extract candidate terms for sentences in parsed document.

        Args:
            parsed_doc (ParsedDocument): Input parsed document.
        """
        for text, parsed_sent in parsed_doc.sent_iter():
            relations = _get_rel_list(parsed_sent)

            for rel_entry in relations:
                if rel_entry.rel != 'root':
                    gov_seen = self.opinion_candidate_list_prev_iter.get(
                        rel_entry.gov.text)
                    dep_seen = self.opinion_candidate_list_prev_iter.get(
                        rel_entry.dep.text)
                    opinions = []
                    aspects = []

                    # =========================== acquisition rules ==============================

                    if bool(gov_seen) ^ bool(dep_seen):
                        opinions.append(
                            rule_1(rel_entry, gov_seen, dep_seen, text))

                    if not gov_seen and dep_seen:
                        opinions.append(
                            rule_2(rel_entry, relations, dep_seen, text))

                        aspects.append(rule_3(rel_entry, relations, text))

                        aspects.append(rule_4(rel_entry, relations, text))

                    if self.aspects_candidate_list_prev_iter and \
                            AspectTerm.from_token(rel_entry.gov) \
                            in self.aspects_candidate_list_prev_iter and \
                            AspectTerm.from_token(rel_entry.dep) \
                            not in self.aspects_candidate_list_prev_iter:

                        opinions.append(rule_5(rel_entry, text))
                        aspects.append(rule_6(rel_entry, relations, text))

                    self._add_opinion_term(opinions)
                    self._add_aspect_term(aspects)
Ejemplo n.º 3
0
    def _add_aspect_term(self, terms):
        """
        add new aspect term to table.
        Args:
            terms (list of CandidateTerm): candidate terms list
        """
        for term in terms:
            if term:
                term_entry = AspectTerm(term.term, term.pos)
                if term_entry not in self.init_aspect_dict and \
                        term_entry not in self.aspect_candidate_list and not\
                        self.ASPECT_STOP_LIST.is_in_stop_list(term.term[0]):
                    _insert_new_term_to_table(
                        term, self.aspect_candidate_list_curr_iter)

        return True