def _insert_new_terms_to_tables(self): """ Insert new terms to tables clear candidates lists from previous iteration """ self.opinion_candidate_list_prev_iter = {} self.opinion_candidate_list_raw = _merge_tables( self.opinion_candidate_list_raw, self.opinion_candidate_list_curr_iter ) for cand_term_list in self.opinion_candidate_list_curr_iter.values(): if len(cand_term_list) >= self.min_freq_opinion_candidate: new_opinion_term = _set_opinion_term_polarity(cand_term_list) self.opinion_candidate_list_prev_iter[str(new_opinion_term)] = new_opinion_term self.opinion_candidate_list_curr_iter = {} self.opinion_candidate_list = { **self.opinion_candidate_list, **self.opinion_candidate_list_prev_iter, } self.aspects_candidate_list_prev_iter = list() self.aspect_candidate_list_raw = _merge_tables( self.aspect_candidate_list_raw, self.aspect_candidate_list_curr_iter ) for extracted_aspect_list in self.aspect_candidate_list_curr_iter.values(): if len(extracted_aspect_list) >= self.min_freq_aspect_candidate: first = extracted_aspect_list[0] new_aspect_entry = AspectTerm(first.term, first.pos, first.lemma) if new_aspect_entry not in self.aspects_candidate_list_prev_iter: self.aspects_candidate_list_prev_iter.append(new_aspect_entry) self.aspect_candidate_list_curr_iter = {} self.aspect_candidate_list = ( self.aspect_candidate_list + self.aspects_candidate_list_prev_iter )
def extract_terms_from_doc(self, parsed_doc): """Extract candidate terms for sentences in parsed document. Args: parsed_doc (ParsedDocument): Input parsed document. """ for text, parsed_sent in parsed_doc.sent_iter(): relations = _get_rel_list(parsed_sent) for rel_entry in relations: if rel_entry.rel != 'root': gov_seen = self.opinion_candidate_list_prev_iter.get( rel_entry.gov.text) dep_seen = self.opinion_candidate_list_prev_iter.get( rel_entry.dep.text) opinions = [] aspects = [] # =========================== acquisition rules ============================== if bool(gov_seen) ^ bool(dep_seen): opinions.append( rule_1(rel_entry, gov_seen, dep_seen, text)) if not gov_seen and dep_seen: opinions.append( rule_2(rel_entry, relations, dep_seen, text)) aspects.append(rule_3(rel_entry, relations, text)) aspects.append(rule_4(rel_entry, relations, text)) if self.aspects_candidate_list_prev_iter and \ AspectTerm.from_token(rel_entry.gov) \ in self.aspects_candidate_list_prev_iter and \ AspectTerm.from_token(rel_entry.dep) \ not in self.aspects_candidate_list_prev_iter: opinions.append(rule_5(rel_entry, text)) aspects.append(rule_6(rel_entry, relations, text)) self._add_opinion_term(opinions) self._add_aspect_term(aspects)
def _add_aspect_term(self, terms): """ add new aspect term to table. Args: terms (list of CandidateTerm): candidate terms list """ for term in terms: if term: term_entry = AspectTerm(term.term, term.pos) if term_entry not in self.init_aspect_dict and \ term_entry not in self.aspect_candidate_list and not\ self.ASPECT_STOP_LIST.is_in_stop_list(term.term[0]): _insert_new_term_to_table( term, self.aspect_candidate_list_curr_iter) return True