def __parse_opinion(line, objects_list): assert (isinstance(objects_list, list)) line = line[len(ContextsReader.OPINION_KEY):] s_from = line.index(u'b:(') s_to = line.index(u')', s_from) label = Label.from_int(int(line[s_from + 3:s_to])) o_from = line.index(u'oi:[') o_to = line.index(u']', o_from) left_object_id, right_object_id = line[o_from + 4:o_to].split(u',') left_object_id = int(left_object_id) right_object_id = int(right_object_id) ref_opinion = RefOpinion(left_index=left_object_id, right_index=right_object_id, sentiment=label, owner=objects_list) s_from = line.index(u'si:{') s_to = line.index(u'}', s_from) opninion_key = line[s_from + 4:s_to] ref_opinion.set_tag(opninion_key) return ref_opinion
def predict(self, dest_data_type=DataType.Test): self.relation_collections[dest_data_type].reset_labels() for index, relation_groups in enumerate( self.relation_collections[dest_data_type]. iter_by_linked_relations_groups(self.Settings.BatchSize)): batch = Batch(relation_groups, self.Settings.GroupSize) feed_dict = self.create_feed_dict(batch, dest_data_type) result = self.sess.run([self.network.Labels], feed_dict=feed_dict) uint_labels = result[0] for group_index, group in enumerate(batch.iter_groups): for relation in group: assert (isinstance(relation, ExtractedRelation)) self.relation_collections[dest_data_type].apply_label( label=Label.from_uint(int(uint_labels[group_index])), relation_id=relation.relation_id) for news_ID in self.io.get_data_indices(dest_data_type): collection = OpinionCollection(None, self.synonyms, self.Settings.Stemmer) self.relation_collections[dest_data_type].fill_opinion_collection( collection, news_ID, lambda labels: labels[0], debug_check_collection=False) collection.save( self.io.get_opinion_output_filepath( news_ID, self.io.get_model_root(dest_data_type))) return self._evaluate(dest_data_type, self.Settings.Stemmer)
def read_opinions(filepath, synonyms, custom_opin_ends_iter=None, read_sentiment=True, skip_non_added=True): assert (isinstance(synonyms, SynonymsCollection)) assert (callable(custom_opin_ends_iter) or custom_opin_ends_iter is None) assert (isinstance(read_sentiment, bool)) assert (isinstance(skip_non_added, bool)) opinions = OpinionCollection(opinions=[], synonyms=synonyms) it = __iter_opinion_end_values(filepath, read_sentiment) if custom_opin_ends_iter is None \ else custom_opin_ends_iter(read_sentiment) for left_value, right_value, sentiment in tqdm(it, "Reading opinions:"): o = Opinion(value_left=left_value, value_right=right_value, sentiment=Label.from_int(sentiment)) add_result = opinions.try_add_opinion(o) msg = "Warning: opinion '{}->{}' was skipped!".format( o.value_left, o.value_right) if add_result is False: if not skip_non_added: raise Exception(msg) else: print(msg) return opinions
def create_test_opinions(test_collections, labels, synonyms_filepath, stemmer): assert (isinstance(test_collections, list)) assert (isinstance(labels, np.ndarray)) assert (isinstance(stemmer, Stemmer)) label_index = 0 opinion_collection_list = [] synonyms = SynonymsCollection.from_file(synonyms_filepath, stemmer=stemmer) for c in test_collections: opinions = OpinionCollection(None, synonyms, stemmer) for opinion_vector in c: l = Label.from_int(int(labels[label_index])) opinion_vector.set_label(l) o = opinions.create_opinion(opinion_vector.value_left, opinion_vector.value_right, opinion_vector.label) if not opinions.has_opinion_by_synonyms(o) and not isinstance( l, NeutralLabel): opinions.add_opinion(o) elif not isinstance(l, NeutralLabel): print "Failed for o={}".format(o.to_unicode().encode('utf-8')) label_index += 1 opinion_collection_list.append(opinions) return opinion_collection_list
def decide_label_of_pair_in_title_optional(self, i, j, title_objects, title_frames): self.__debug_title_opinions_checked += 1 # Checking left object. l_obj = title_objects.get_object(i) if not self.__ner_types_limitation.is_auth(l_obj): self.__debug_title_opinions_with_objs_non_valid_by_type += 1 return None # Checking right object. r_obj = title_objects.get_object(j) if not self.__ner_types_limitation.is_auth(r_obj): self.__debug_title_opinions_with_objs_non_valid_by_type += 1 return None # Getting object bounds l_bound = l_obj.get_bound() r_bound = r_obj.get_bound() frame_variants_in = self.__get_frames_within( left_in=l_bound.TermIndex + l_bound.Length, right_in=r_bound.TermIndex - 1, text_frame_variants=title_frames) text_polarities, is_inverted = get_frames_polarities( text_frame_variants=frame_variants_in, frames=self.Settings.Frames) self.__debug_title_opinions_processed_by_frames += 1 if len(frame_variants_in) == 0: self.__debug_title_opinions_with_empty_frames += 1 return None if len(frame_variants_in) != len(text_polarities): self.__debug_title_opinions_with_polarities_missed += 1 return None labels = [ optional_invert_label(p.Label, is_inverted[p_index]).to_int() for p_index, p in enumerate(text_polarities) ] label = mean(labels) # Force to negative if there is a negative example if -1 in labels: label = -1 if -1 < label < 1: self.__debug_title_opinions_with_unknown_label += 1 return None self.__debug_valid += 1 return Label.from_int(int(label))
def create_label_from_relations(relation_labels, label_creation_mode): assert (isinstance(relation_labels, list)) assert (isinstance(label_creation_mode, unicode)) label = None if label_creation_mode == LabelCalculationMode.FIRST_APPEARED: label = relation_labels[0] if label_creation_mode == LabelCalculationMode.AVERAGE: forwards = [l.Forward.to_int() for l in relation_labels] backwards = [l.Backward.to_int() for l in relation_labels] label = LabelPair(forward=Label.from_int(np.sign(sum(forwards))), backward=Label.from_int(np.sign(sum(backwards)))) if DebugKeys.PredictLabel: print[l.to_int() for l in relation_labels] print "Result: {}".format(label.to_int()) # TODO: Correct label return label
def calculate_label(relation_labels): assert(isinstance(relation_labels, list)) label = None if self.Settings.RelationLabelCalculationMode == LabelCalculationMode.FIRST_APPEARED: label = relation_labels[0] if self.Settings.RelationLabelCalculationMode == LabelCalculationMode.AVERAGE: label = Label.from_int(np.sign(sum([l.to_int() for l in relation_labels]))) if DebugKeys.PredictLabel: print [l.to_int() for l in relation_labels] print "Result: {}".format(label.to_int()) return label
def from_file(cls, filepath, synonyms_filepaths, stemmer, debug=False): """ filepath: string or list single filepath or list of filepaths. """ assert (isinstance(filepath, unicode) or isinstance(filepath, list)) assert (isinstance(stemmer, Stemmer)) if isinstance(synonyms_filepaths, unicode): synonyms = SynonymsCollection.from_file(synonyms_filepaths, stemmer=stemmer, debug=debug) elif isinstance(synonyms_filepaths, list): synonyms = SynonymsCollection.from_files(synonyms_filepaths, stemmer=stemmer, debug=debug) else: raise Exception("Unsupported type for 'synonyms_filepaths'") opinions = [] filepaths = [] if (isinstance(filepath, unicode)): filepaths.append(filepath) elif (isinstance(filepath, unicode)): filepaths = filepath for fp in filepaths: with io.open(fp, "r", encoding='utf-8') as f: for i, line in enumerate(f.readlines()): if line == '\n': continue args = line.strip().split(',') if len(args) < 3: print "should be at least 3 arguments: {}, '{}'".format( i, line.encode('utf-8')) continue entity_left = args[0].strip() entity_right = args[1].strip() sentiment = Label.from_str(args[2].strip()) o = Opinion(entity_left, entity_right, sentiment, stemmer) opinions.append(o) return cls(opinions, synonyms, stemmer, debug)
def apply_labels(self, uint_labels, minibatch): """ uint_labels: list of int each label could be as follows: 0 -- neutral, and 1 -- positive, 2 -- negative Applying labels for each bag. It is supposed that labels and bags have the same order. """ assert (isinstance(uint_labels, list)) assert (isinstance(minibatch, MiniBatch)) index = 0 for bag in minibatch.bags: for sample in bag.samples: label = Label.from_uint(uint_labels[index]) self._find_relation_and_set_label(sample.position, label) index += 1
def from_file(cls, filepath, synonyms): assert (isinstance(synonyms, SynonymsCollection)) opinions = [] with io.open(filepath, "r", encoding='utf-8') as f: for i, line in enumerate(f.readlines()): if line == '\n': continue args = line.strip().split(',') assert (len(args) >= 3) entity_left = args[0].strip() entity_right = args[1].strip() sentiment = Label.from_str(args[2].strip()) o = Opinion(entity_left, entity_right, sentiment) opinions.append(o) return cls(opinions, synonyms)
def from_file(cls, filepath): """ Read the vectors from *.vectors.txt file """ vectors = {} with io.open(filepath, 'r', encoding='utf-8') as f: for line in f.readlines(): args = line.split(',') opinion_value_left = args[0].strip() opinion_value_right = args[1].strip() label = Label.from_str(args[len(args) - 1].strip()) vector = np.array( [float(args[i]) for i in range(2, len(args) - 1)]) key = cls.___create_key(opinion_value_left, opinion_value_right) vectors[key] = OpinionVector(opinion_value_left, opinion_value_right, vector, label) return cls(vectors)
def predict(self, dest_data_type=DataType.Test): def calculate_label(relation_labels): assert(isinstance(relation_labels, list)) label = None if self.Settings.RelationLabelCalculationMode == LabelCalculationMode.FIRST_APPEARED: label = relation_labels[0] if self.Settings.RelationLabelCalculationMode == LabelCalculationMode.AVERAGE: label = Label.from_int(np.sign(sum([l.to_int() for l in relation_labels]))) if DebugKeys.PredictLabel: print [l.to_int() for l in relation_labels] print "Result: {}".format(label.to_int()) return label assert(isinstance(dest_data_type, unicode)) self._relations_collections[dest_data_type].reset_labels() prediction_collection = RelationPredictionResultCollection(len(self._relations_collections[dest_data_type])) for bags_group in self.bags_collection[dest_data_type].iter_by_groups(self.Settings.BagsPerMinibatch): minibatch = MiniBatch(bags_group) feed_dict = self.create_feed_dict(minibatch, data_type=dest_data_type) log_names, log_params = self.network.Log result = self.sess.run([self.network.Labels, self.network.Output] + log_params, feed_dict=feed_dict) uint_labels = result[0] output = result[1] if DebugKeys.PredictBatchDisplayLog: self._display_log(log_names, result[2:]) # apply labels sample_indices_count = 0 for sample_index, sample in enumerate(minibatch.iter_by_samples()): label = Label.from_uint(int(uint_labels[sample_index])) self._relations_collections[dest_data_type].apply_label(label, sample.RelationID) prediction_collection.add(sample.RelationID, RelationPredictionResult(output[sample_index])) sample_indices_count += 1 assert(sample_indices_count == len(uint_labels)) assert(self._relations_collections[dest_data_type].debug_check_all_relations_has_labels()) self._relations_collections[dest_data_type].debug_labels_statistic(dest_data_type) # Compose Result self._relations_collections[dest_data_type].save( self.io.get_relations_filepath(data_type=dest_data_type, epoch=self._last_fit_epoch_index)) prediction_collection.save( self.io.get_relations_prediction_filepath(data_type=dest_data_type, epoch=self._last_fit_epoch_index)) for news_ID in self.io.get_data_indices(dest_data_type): collection = OpinionCollection(None, self.synonyms, self.settings.Stemmer) self._relations_collections[dest_data_type].fill_opinion_collection(collection, news_ID, calculate_label) collection.save(self.io.get_opinion_output_filepath(news_ID, self.io.get_model_root(dest_data_type))) return self._evaluate(dest_data_type, self.Settings.Stemmer)
def __frame_polarity_from_args(args): return FramePolarity(src=args[0], dest=args[1], label=Label.from_str(args[2]), prob=args[3])