def _write_inference_result(self, sample_ids, labels, weights, prediction_score, prediction_score_per_coordinate, task_index, schema_params: SchemaParams, output_dir): """ Write inference results. """ output_avro_schema = get_inference_output_avro_schema( self.metadata, True, schema_params, has_weight=self._has_feature(schema_params.weight_column_name)) parsed_schema = parse_schema(output_avro_schema) records = [] for rec_id, rec_label, rec_weight, rec_prediction_score, rec_prediction_score_per_coordinate in \ zip(sample_ids, labels, weights, prediction_score, prediction_score_per_coordinate): rec = {schema_params.uid_column_name: int(rec_id), schema_params.prediction_score_column_name: float(rec_prediction_score), schema_params.prediction_score_per_coordinate_column_name: float(rec_prediction_score_per_coordinate)} if self._has_label(schema_params.label_column_name): rec[schema_params.label_column_name] = int(rec_label) if self._has_feature(schema_params.weight_column_name): rec[schema_params.weight_column_name] = int(rec_weight) records.append(rec) output_file = os.path.join(output_dir, f"part-{task_index:05d}.avro") error_msg = f"worker {task_index} encountered error in writing inference results" with tf1.gfile.GFile(output_file, 'wb') as f: try_write_avro_blocks(f, parsed_schema, records, None, error_msg) logging(f"Worker {task_index} saved inference result to {output_file}")
def _write_inference_result(self, sample_ids, labels, weights, scores, scores_and_offsets, task_index, schema_params, output_dir): """ Write inference results. """ photon_ml_writer = PhotonMLWriter(schema_params=schema_params) output_avro_schema = photon_ml_writer.get_inference_output_avro_schema( self.metadata, self._has_label(schema_params[constants.LABEL]), True, has_weight=self._has_feature(schema_params[constants.SAMPLE_WEIGHT])) parsed_schema = parse_schema(output_avro_schema) records = [] for rec_id, rec_label, rec_weight, rec_score, rec_score_and_offset in \ zip(sample_ids, labels, weights, scores, scores_and_offsets): rec = {schema_params[constants.SAMPLE_ID]: int(rec_id), schema_params[constants.PREDICTION_SCORE]: float(rec_score), schema_params[constants.PREDICTION_SCORE_PER_COORDINATE]: float(rec_score_and_offset) } if self._has_label(schema_params[constants.LABEL]): rec[schema_params[constants.LABEL]] = int(rec_label) if self._has_feature(schema_params[constants.SAMPLE_WEIGHT]): rec[schema_params[constants.SAMPLE_WEIGHT]] = int(rec_weight) records.append(rec) output_file = os.path.join(output_dir, "part-{0:05d}.avro".format(task_index)) error_msg = "worker {} encountered error in writing inference results".format(task_index) with tf1.gfile.GFile(output_file, 'wb') as f: try_write_avro_blocks(f, parsed_schema, records, None, error_msg) logging("Worker {} saved inference result to {}".format(task_index, output_file))
def _write_inference_result(self, sample_ids, labels, weights, prediction_score, prediction_score_per_coordinate, task_index, schema_params: SchemaParams, output_dir): """ Write inference results. """ output_avro_schema = get_inference_output_avro_schema( self.metadata, True, schema_params, has_weight=self._has_feature(schema_params.weight_column_name)) parsed_schema = parse_schema(output_avro_schema) records = [] for rec_id, rec_label, rec_weight, rec_prediction_score, rec_prediction_score_per_coordinate in \ zip(sample_ids, labels, weights, prediction_score, prediction_score_per_coordinate): rec = { schema_params.uid_column_name: int(rec_id), schema_params.prediction_score_column_name: float(rec_prediction_score), schema_params.prediction_score_per_coordinate_column_name: float(rec_prediction_score_per_coordinate) } if self._has_label(schema_params.label_column_name): rec[schema_params.label_column_name] = float(rec_label) if self._has_feature(schema_params.weight_column_name): rec[schema_params.weight_column_name] = int(rec_weight) records.append(rec) # Write to a local file then copy to the destination directory remote_is_hdfs = output_dir.startswith("hdfs://") local_file_name = f"part-{task_index:05d}.avro" output_file = local_file_name if remote_is_hdfs else os.path.join( output_dir, local_file_name) error_msg = f"worker {task_index} encountered error in writing inference results" with open(output_file, 'wb') as f: try_write_avro_blocks(f, parsed_schema, records, None, error_msg) logging( f"Worker {task_index} has written inference result to local file {output_file}" ) if remote_is_hdfs: copy_files([output_file], output_dir) os.remove(output_file) logging( f"Worker {task_index} has copied inference result to directory {output_dir}" )
def save_batch(self, f, batch_score, output_file, n_records, n_batch): validation_results = [] validation_schema = fastavro.parse_schema(self.get_inference_output_avro_schema()) # save one batch of score try: predict_val = batch_score['scores'] ids = batch_score['uid'] labels = batch_score['label'] weights = batch_score['weight'] n_records += self.append_validation_results(validation_results, predict_val, ids, labels, weights) n_batch += 1 except tf.errors.OutOfRangeError: logger.info( 'Iterated through one batch. Finished evaluating work at batch {0}.'.format(n_batch)) else: try_write_avro_blocks(f, validation_schema, validation_results, None, self.create_error_message(n_batch, output_file)) return n_records, n_batch