def _write_inference_result(self, sample_ids, labels, weights, prediction_score,
                                prediction_score_per_coordinate, task_index, schema_params: SchemaParams, output_dir):
        """ Write inference results. """
        output_avro_schema = get_inference_output_avro_schema(
            self.metadata,
            True,
            schema_params,
            has_weight=self._has_feature(schema_params.weight_column_name))
        parsed_schema = parse_schema(output_avro_schema)

        records = []
        for rec_id, rec_label, rec_weight, rec_prediction_score, rec_prediction_score_per_coordinate in \
                zip(sample_ids, labels, weights, prediction_score, prediction_score_per_coordinate):
            rec = {schema_params.uid_column_name: int(rec_id),
                   schema_params.prediction_score_column_name: float(rec_prediction_score),
                   schema_params.prediction_score_per_coordinate_column_name: float(rec_prediction_score_per_coordinate)}
            if self._has_label(schema_params.label_column_name):
                rec[schema_params.label_column_name] = int(rec_label)
            if self._has_feature(schema_params.weight_column_name):
                rec[schema_params.weight_column_name] = int(rec_weight)
            records.append(rec)

        output_file = os.path.join(output_dir, f"part-{task_index:05d}.avro")
        error_msg = f"worker {task_index} encountered error in writing inference results"
        with tf1.gfile.GFile(output_file, 'wb') as f:
            try_write_avro_blocks(f, parsed_schema, records, None, error_msg)
        logging(f"Worker {task_index} saved inference result to {output_file}")
    def _write_inference_result(self, sample_ids, labels, weights, scores,
                                scores_and_offsets, task_index, schema_params, output_dir):
        """ Write inference results. """
        photon_ml_writer = PhotonMLWriter(schema_params=schema_params)
        output_avro_schema = photon_ml_writer.get_inference_output_avro_schema(
            self.metadata,
            self._has_label(schema_params[constants.LABEL]),
            True,
            has_weight=self._has_feature(schema_params[constants.SAMPLE_WEIGHT]))
        parsed_schema = parse_schema(output_avro_schema)

        records = []
        for rec_id, rec_label, rec_weight, rec_score, rec_score_and_offset in \
                zip(sample_ids, labels, weights, scores, scores_and_offsets):
            rec = {schema_params[constants.SAMPLE_ID]: int(rec_id),
                   schema_params[constants.PREDICTION_SCORE]: float(rec_score),
                   schema_params[constants.PREDICTION_SCORE_PER_COORDINATE]: float(rec_score_and_offset)
                   }
            if self._has_label(schema_params[constants.LABEL]):
                rec[schema_params[constants.LABEL]] = int(rec_label)
            if self._has_feature(schema_params[constants.SAMPLE_WEIGHT]):
                rec[schema_params[constants.SAMPLE_WEIGHT]] = int(rec_weight)
            records.append(rec)

        output_file = os.path.join(output_dir, "part-{0:05d}.avro".format(task_index))
        error_msg = "worker {} encountered error in writing inference results".format(task_index)
        with tf1.gfile.GFile(output_file, 'wb') as f:
            try_write_avro_blocks(f, parsed_schema, records, None, error_msg)
        logging("Worker {} saved inference result to {}".format(task_index, output_file))
    def _write_inference_result(self, sample_ids, labels, weights,
                                prediction_score,
                                prediction_score_per_coordinate, task_index,
                                schema_params: SchemaParams, output_dir):
        """ Write inference results. """
        output_avro_schema = get_inference_output_avro_schema(
            self.metadata,
            True,
            schema_params,
            has_weight=self._has_feature(schema_params.weight_column_name))
        parsed_schema = parse_schema(output_avro_schema)

        records = []
        for rec_id, rec_label, rec_weight, rec_prediction_score, rec_prediction_score_per_coordinate in \
                zip(sample_ids, labels, weights, prediction_score, prediction_score_per_coordinate):
            rec = {
                schema_params.uid_column_name:
                int(rec_id),
                schema_params.prediction_score_column_name:
                float(rec_prediction_score),
                schema_params.prediction_score_per_coordinate_column_name:
                float(rec_prediction_score_per_coordinate)
            }
            if self._has_label(schema_params.label_column_name):
                rec[schema_params.label_column_name] = float(rec_label)
            if self._has_feature(schema_params.weight_column_name):
                rec[schema_params.weight_column_name] = int(rec_weight)
            records.append(rec)

        # Write to a local file then copy to the destination directory
        remote_is_hdfs = output_dir.startswith("hdfs://")
        local_file_name = f"part-{task_index:05d}.avro"
        output_file = local_file_name if remote_is_hdfs else os.path.join(
            output_dir, local_file_name)
        error_msg = f"worker {task_index} encountered error in writing inference results"
        with open(output_file, 'wb') as f:
            try_write_avro_blocks(f, parsed_schema, records, None, error_msg)
        logging(
            f"Worker {task_index} has written inference result to local file {output_file}"
        )
        if remote_is_hdfs:
            copy_files([output_file], output_dir)
            os.remove(output_file)
            logging(
                f"Worker {task_index} has copied inference result to directory {output_dir}"
            )
Example #4
0
 def save_batch(self, f, batch_score, output_file, n_records, n_batch):
     validation_results = []
     validation_schema = fastavro.parse_schema(self.get_inference_output_avro_schema())
     # save one batch of score
     try:
         predict_val = batch_score['scores']
         ids = batch_score['uid']
         labels = batch_score['label']
         weights = batch_score['weight']
         n_records += self.append_validation_results(validation_results,
                                                     predict_val,
                                                     ids,
                                                     labels,
                                                     weights)
         n_batch += 1
     except tf.errors.OutOfRangeError:
         logger.info(
             'Iterated through one batch. Finished evaluating work at batch {0}.'.format(n_batch))
     else:
         try_write_avro_blocks(f, validation_schema, validation_results, None,
                               self.create_error_message(n_batch, output_file))
     return n_records, n_batch