def __parse_opinion(line, objects_list): assert (isinstance(objects_list, list)) line = line[len(ContextsReader.OPINION_KEY):] s_from = line.index(u'b:(') s_to = line.index(u')', s_from) label = Label.from_int(int(line[s_from + 3:s_to])) o_from = line.index(u'oi:[') o_to = line.index(u']', o_from) left_object_id, right_object_id = line[o_from + 4:o_to].split(u',') left_object_id = int(left_object_id) right_object_id = int(right_object_id) ref_opinion = RefOpinion(left_index=left_object_id, right_index=right_object_id, sentiment=label, owner=objects_list) s_from = line.index(u'si:{') s_to = line.index(u'}', s_from) opninion_key = line[s_from + 4:s_to] ref_opinion.set_tag(opninion_key) return ref_opinion
def read_opinions(filepath, synonyms, custom_opin_ends_iter=None, read_sentiment=True, skip_non_added=True): assert (isinstance(synonyms, SynonymsCollection)) assert (callable(custom_opin_ends_iter) or custom_opin_ends_iter is None) assert (isinstance(read_sentiment, bool)) assert (isinstance(skip_non_added, bool)) opinions = OpinionCollection(opinions=[], synonyms=synonyms) it = __iter_opinion_end_values(filepath, read_sentiment) if custom_opin_ends_iter is None \ else custom_opin_ends_iter(read_sentiment) for left_value, right_value, sentiment in tqdm(it, "Reading opinions:"): o = Opinion(value_left=left_value, value_right=right_value, sentiment=Label.from_int(sentiment)) add_result = opinions.try_add_opinion(o) msg = "Warning: opinion '{}->{}' was skipped!".format( o.value_left, o.value_right) if add_result is False: if not skip_non_added: raise Exception(msg) else: print(msg) return opinions
def create_test_opinions(test_collections, labels, synonyms_filepath, stemmer): assert (isinstance(test_collections, list)) assert (isinstance(labels, np.ndarray)) assert (isinstance(stemmer, Stemmer)) label_index = 0 opinion_collection_list = [] synonyms = SynonymsCollection.from_file(synonyms_filepath, stemmer=stemmer) for c in test_collections: opinions = OpinionCollection(None, synonyms, stemmer) for opinion_vector in c: l = Label.from_int(int(labels[label_index])) opinion_vector.set_label(l) o = opinions.create_opinion(opinion_vector.value_left, opinion_vector.value_right, opinion_vector.label) if not opinions.has_opinion_by_synonyms(o) and not isinstance( l, NeutralLabel): opinions.add_opinion(o) elif not isinstance(l, NeutralLabel): print "Failed for o={}".format(o.to_unicode().encode('utf-8')) label_index += 1 opinion_collection_list.append(opinions) return opinion_collection_list
def decide_label_of_pair_in_title_optional(self, i, j, title_objects, title_frames): self.__debug_title_opinions_checked += 1 # Checking left object. l_obj = title_objects.get_object(i) if not self.__ner_types_limitation.is_auth(l_obj): self.__debug_title_opinions_with_objs_non_valid_by_type += 1 return None # Checking right object. r_obj = title_objects.get_object(j) if not self.__ner_types_limitation.is_auth(r_obj): self.__debug_title_opinions_with_objs_non_valid_by_type += 1 return None # Getting object bounds l_bound = l_obj.get_bound() r_bound = r_obj.get_bound() frame_variants_in = self.__get_frames_within( left_in=l_bound.TermIndex + l_bound.Length, right_in=r_bound.TermIndex - 1, text_frame_variants=title_frames) text_polarities, is_inverted = get_frames_polarities( text_frame_variants=frame_variants_in, frames=self.Settings.Frames) self.__debug_title_opinions_processed_by_frames += 1 if len(frame_variants_in) == 0: self.__debug_title_opinions_with_empty_frames += 1 return None if len(frame_variants_in) != len(text_polarities): self.__debug_title_opinions_with_polarities_missed += 1 return None labels = [ optional_invert_label(p.Label, is_inverted[p_index]).to_int() for p_index, p in enumerate(text_polarities) ] label = mean(labels) # Force to negative if there is a negative example if -1 in labels: label = -1 if -1 < label < 1: self.__debug_title_opinions_with_unknown_label += 1 return None self.__debug_valid += 1 return Label.from_int(int(label))
def create_label_from_relations(relation_labels, label_creation_mode): assert (isinstance(relation_labels, list)) assert (isinstance(label_creation_mode, unicode)) label = None if label_creation_mode == LabelCalculationMode.FIRST_APPEARED: label = relation_labels[0] if label_creation_mode == LabelCalculationMode.AVERAGE: forwards = [l.Forward.to_int() for l in relation_labels] backwards = [l.Backward.to_int() for l in relation_labels] label = LabelPair(forward=Label.from_int(np.sign(sum(forwards))), backward=Label.from_int(np.sign(sum(backwards)))) if DebugKeys.PredictLabel: print[l.to_int() for l in relation_labels] print "Result: {}".format(label.to_int()) # TODO: Correct label return label
def calculate_label(relation_labels): assert(isinstance(relation_labels, list)) label = None if self.Settings.RelationLabelCalculationMode == LabelCalculationMode.FIRST_APPEARED: label = relation_labels[0] if self.Settings.RelationLabelCalculationMode == LabelCalculationMode.AVERAGE: label = Label.from_int(np.sign(sum([l.to_int() for l in relation_labels]))) if DebugKeys.PredictLabel: print [l.to_int() for l in relation_labels] print "Result: {}".format(label.to_int()) return label