def predict(self, dest_data_type=DataType.Test): self.relation_collections[dest_data_type].reset_labels() for index, relation_groups in enumerate( self.relation_collections[dest_data_type]. iter_by_linked_relations_groups(self.Settings.BatchSize)): batch = Batch(relation_groups, self.Settings.GroupSize) feed_dict = self.create_feed_dict(batch, dest_data_type) result = self.sess.run([self.network.Labels], feed_dict=feed_dict) uint_labels = result[0] for group_index, group in enumerate(batch.iter_groups): for relation in group: assert (isinstance(relation, ExtractedRelation)) self.relation_collections[dest_data_type].apply_label( label=Label.from_uint(int(uint_labels[group_index])), relation_id=relation.relation_id) for news_ID in self.io.get_data_indices(dest_data_type): collection = OpinionCollection(None, self.synonyms, self.Settings.Stemmer) self.relation_collections[dest_data_type].fill_opinion_collection( collection, news_ID, lambda labels: labels[0], debug_check_collection=False) collection.save( self.io.get_opinion_output_filepath( news_ID, self.io.get_model_root(dest_data_type))) return self._evaluate(dest_data_type, self.Settings.Stemmer)
def apply_labels(self, uint_labels, minibatch): """ uint_labels: list of int each label could be as follows: 0 -- neutral, and 1 -- positive, 2 -- negative Applying labels for each bag. It is supposed that labels and bags have the same order. """ assert (isinstance(uint_labels, list)) assert (isinstance(minibatch, MiniBatch)) index = 0 for bag in minibatch.bags: for sample in bag.samples: label = Label.from_uint(uint_labels[index]) self._find_relation_and_set_label(sample.position, label) index += 1
def predict(self, dest_data_type=DataType.Test): def calculate_label(relation_labels): assert(isinstance(relation_labels, list)) label = None if self.Settings.RelationLabelCalculationMode == LabelCalculationMode.FIRST_APPEARED: label = relation_labels[0] if self.Settings.RelationLabelCalculationMode == LabelCalculationMode.AVERAGE: label = Label.from_int(np.sign(sum([l.to_int() for l in relation_labels]))) if DebugKeys.PredictLabel: print [l.to_int() for l in relation_labels] print "Result: {}".format(label.to_int()) return label assert(isinstance(dest_data_type, unicode)) self._relations_collections[dest_data_type].reset_labels() prediction_collection = RelationPredictionResultCollection(len(self._relations_collections[dest_data_type])) for bags_group in self.bags_collection[dest_data_type].iter_by_groups(self.Settings.BagsPerMinibatch): minibatch = MiniBatch(bags_group) feed_dict = self.create_feed_dict(minibatch, data_type=dest_data_type) log_names, log_params = self.network.Log result = self.sess.run([self.network.Labels, self.network.Output] + log_params, feed_dict=feed_dict) uint_labels = result[0] output = result[1] if DebugKeys.PredictBatchDisplayLog: self._display_log(log_names, result[2:]) # apply labels sample_indices_count = 0 for sample_index, sample in enumerate(minibatch.iter_by_samples()): label = Label.from_uint(int(uint_labels[sample_index])) self._relations_collections[dest_data_type].apply_label(label, sample.RelationID) prediction_collection.add(sample.RelationID, RelationPredictionResult(output[sample_index])) sample_indices_count += 1 assert(sample_indices_count == len(uint_labels)) assert(self._relations_collections[dest_data_type].debug_check_all_relations_has_labels()) self._relations_collections[dest_data_type].debug_labels_statistic(dest_data_type) # Compose Result self._relations_collections[dest_data_type].save( self.io.get_relations_filepath(data_type=dest_data_type, epoch=self._last_fit_epoch_index)) prediction_collection.save( self.io.get_relations_prediction_filepath(data_type=dest_data_type, epoch=self._last_fit_epoch_index)) for news_ID in self.io.get_data_indices(dest_data_type): collection = OpinionCollection(None, self.synonyms, self.settings.Stemmer) self._relations_collections[dest_data_type].fill_opinion_collection(collection, news_ID, calculate_label) collection.save(self.io.get_opinion_output_filepath(news_ID, self.io.get_model_root(dest_data_type))) return self._evaluate(dest_data_type, self.Settings.Stemmer)