Esempio n. 1
0
def detectinfo(ctx, id, want_fused):
    print(json_format.MessageToJson(ctx.obj.pipeclient.detect_info(id, want_fused)))
Esempio n. 2
0
def _serialize_eval_config(eval_config: config.EvalConfig) -> Text:
    return json_format.MessageToJson(
        config_pb2.EvalConfigAndVersion(eval_config=eval_config,
                                        version=tfma_version.VERSION_STRING))
    def _infer(self, request):
        """Returns JSON for the `vz-line-chart`s for a feature.

    Args:
      request: A request that should contain 'inference_address', 'model_name',
        'model_type, 'model_version', 'model_signature' and 'label_vocab_path'.

    Returns:
      A list of JSON objects, one for each chart.
    """
        vocab_path = request.args.get('label_vocab_path')
        if vocab_path:
            try:
                with tf.gfile.GFile(vocab_path, 'r') as f:
                    label_vocab = [line.rstrip('\n') for line in f]
            except tf.errors.NotFoundError as err:
                tf.logging.error('error reading vocab file: %s', err)
                label_vocab = []
        else:
            label_vocab = []

        try:
            if request.method != 'GET':
                tf.logging.error('%s requests are forbidden.', request.method)
                return http_util.Respond(request,
                                         {'error': 'invalid non-GET request'},
                                         'application/json',
                                         code=405)

            serving_bundle = inference_utils.ServingBundle(
                request.args.get('inference_address'),
                request.args.get('model_name'), request.args.get('model_type'),
                request.args.get('model_version'),
                request.args.get('model_signature'))
            indices_to_infer = sorted(self.updated_example_indices)
            examples_to_infer = [
                self.examples[index] for index in indices_to_infer
            ]

            # Get inference results proto and combine with indices of inferred
            # examples and respond with this data as json.
            inference_result_proto = oss_utils.call_servo(
                examples_to_infer, serving_bundle)
            new_inferences = inference_utils.wrap_inference_results(
                inference_result_proto)
            infer_json = json_format.MessageToJson(
                new_inferences, including_default_value_fields=True)
            infer_obj = json.loads(infer_json)
            resp = {'indices': indices_to_infer, 'results': infer_obj}
            self.updated_example_indices = set()
            return http_util.Respond(request, {
                'inferences': json.dumps(resp),
                'vocab': json.dumps(label_vocab)
            }, 'application/json')
        except common_utils.InvalidUserInputError as e:
            return http_util.Respond(request, {'error': e.message},
                                     'application/json',
                                     code=400)
        except AbortionError as e:
            return http_util.Respond(request, {'error': e.details},
                                     'application/json',
                                     code=400)
Esempio n. 4
0
    def testWellKnownInAnyMessage(self):
        message = any_pb2.Any()
        int32_value = wrappers_pb2.Int32Value()
        int32_value.value = 1234
        message.Pack(int32_value)
        self.assertEqual(
            json.loads(json_format.MessageToJson(message, True)),
            json.loads(
                '{\n'
                '  "@type": \"type.googleapis.com/google.protobuf.Int32Value\",\n'
                '  "value": 1234\n'
                '}\n'))
        parsed_message = any_pb2.Any()
        self.CheckParseBack(message, parsed_message)

        timestamp = timestamp_pb2.Timestamp()
        message.Pack(timestamp)
        self.assertEqual(
            json.loads(json_format.MessageToJson(message, True)),
            json.loads(
                '{\n'
                '  "@type": "type.googleapis.com/google.protobuf.Timestamp",\n'
                '  "value": "1970-01-01T00:00:00Z"\n'
                '}\n'))
        self.CheckParseBack(message, parsed_message)

        duration = duration_pb2.Duration()
        duration.seconds = 1
        message.Pack(duration)
        self.assertEqual(
            json.loads(json_format.MessageToJson(message, True)),
            json.loads(
                '{\n'
                '  "@type": "type.googleapis.com/google.protobuf.Duration",\n'
                '  "value": "1s"\n'
                '}\n'))
        self.CheckParseBack(message, parsed_message)

        field_mask = field_mask_pb2.FieldMask()
        field_mask.paths.append('foo.bar')
        field_mask.paths.append('bar')
        message.Pack(field_mask)
        self.assertEqual(
            json.loads(json_format.MessageToJson(message, True)),
            json.loads(
                '{\n'
                '  "@type": "type.googleapis.com/google.protobuf.FieldMask",\n'
                '  "value": "foo.bar,bar"\n'
                '}\n'))
        self.CheckParseBack(message, parsed_message)

        struct_message = struct_pb2.Struct()
        struct_message['name'] = 'Jim'
        message.Pack(struct_message)
        self.assertEqual(
            json.loads(json_format.MessageToJson(message, True)),
            json.loads(
                '{\n'
                '  "@type": "type.googleapis.com/google.protobuf.Struct",\n'
                '  "value": {"name": "Jim"}\n'
                '}\n'))
        self.CheckParseBack(message, parsed_message)

        nested_any = any_pb2.Any()
        int32_value.value = 5678
        nested_any.Pack(int32_value)
        message.Pack(nested_any)
        self.assertEqual(
            json.loads(json_format.MessageToJson(message, True)),
            json.loads(
                '{\n'
                '  "@type": "type.googleapis.com/google.protobuf.Any",\n'
                '  "value": {\n'
                '    "@type": "type.googleapis.com/google.protobuf.Int32Value",\n'
                '    "value": 5678\n'
                '  }\n'
                '}\n'))
        self.CheckParseBack(message, parsed_message)
Esempio n. 5
0
 def _set_examples(self, examples):
     self.examples = [json_format.MessageToJson(ex) for ex in examples]
     self.updated_example_indices = set(range(len(examples)))
     self._generate_sprite()
Esempio n. 6
0
 def from_protobuf(msg):
     data = json_format.MessageToJson(msg)#, problem_pb2.ProblemPerformanceMetric)
     return Metric.from_json(data)
Esempio n. 7
0
 def from_protobuf(msg):
     d = json.loads(json_format.MessageToJson(msg))
     logger.debug("Got msg json: %s" % str(d))
     return Value.from_json(d)
def main():
    parser = argparse.ArgumentParser(
        description='Runs a benchmark of Marshmallow.')
    parser.add_argument(
        '--items',
        type=str,
        default='1,10,100,1000',
        help='Comma-seperated list of number of items in the protobuf')
    args = parser.parse_args()
    items = [int(x.strip()) for x in args.items.split(',')]
    print('***** Benchmark Results *****')
    for item_count in items:
        print(f'\n{item_count} Items per proto:')
        baseline = CppAddressBook()
        for _ in range(item_count):
            new_person = baseline.people.add()
            new_person.name = f'{random_string(4)} {random_string(5)}'
            new_person.email = f'{random_string(6)}@gmail.com'
            new_person.id = 1234
            for _ in range(3):
                new_phone = new_person.phones.add()
                new_phone.number = f'+1425{random.randint(1000000,9999999)}'
                new_phone.type = CppPerson.PhoneType.MOBILE
        baseline_proto = baseline.SerializeToString()

        cython_address_book = CyAddressBook()
        cpp_address_book = CppAddressBook()
        cpp_address_book.ParseFromString(baseline_proto)
        cython_address_book.ParseFromString(baseline_proto)
        json_str = json_format.MessageToJson(cpp_address_book)
        py_dict = json.loads(json_str)

        print('\t*** Compute ***')
        json_timeit_result = run_timeit(lambda: json.loads(json_str))
        print(f'\tjson.loads:                \t{json_timeit_result:,.2f}ns')
        cpp_timeit_result = run_timeit(
            lambda: cpp_address_book.ParseFromString(baseline_proto))
        print(f'\tBaseline ParseFromString:  \t{cpp_timeit_result:,.2f}ns')
        cython_timeit_result = run_timeit(
            lambda: cython_address_book.ParseFromString(baseline_proto))
        print(
            f'\tCython   ParseFromString:  \t{cython_timeit_result:,.2f}ns ({cpp_timeit_result / cython_timeit_result:,.2f} X Speedup)'
        )
        json_timeit_result = run_timeit(lambda: json.dumps(py_dict))
        print(f'\tjson.dumps:                \t{json_timeit_result:,.2f}ns')
        cpp_timeit_result = run_timeit(
            lambda: cpp_address_book.SerializeToString())
        print(f'\tBaseline SerializeToString:\t{cpp_timeit_result:,.2f}ns')
        cython_timeit_result = run_timeit(
            lambda: cython_address_book.SerializeToString())
        print(
            f'\tCython   SerializeToString:\t{cython_timeit_result:,.2f}ns ({cpp_timeit_result / cython_timeit_result:,.2f} X Speedup)'
        )
        cpp_timeit_result = run_timeit(
            lambda: json_format.MessageToJson(cpp_address_book))
        print(f'\tBaseline MessageToJson:    \t{cpp_timeit_result:,.2f}ns')
        cython_timeit_result = run_timeit(
            lambda: cython_address_book.to_json())
        print(
            f'\tCython   MessageToJson:    \t{cython_timeit_result:,.2f}ns ({cpp_timeit_result / cython_timeit_result:,.2f} X Speedup)'
        )
        json_timeit_result = run_timeit(lambda: list(py_dict['people']))
        print(f'\tPython Dictionary Iterate:  \t{json_timeit_result:,.2f}ns')
        cpp_timeit_result = run_timeit(lambda: list(cpp_address_book.people))
        print(f'\tBaseline Iterate:          \t{cpp_timeit_result:,.2f}ns')
        cython_timeit_result = run_timeit(
            lambda: list(cython_address_book.people))
        print(
            f'\tCython   Iterate:          \t{cython_timeit_result:,.2f}ns ({cpp_timeit_result / cython_timeit_result:,.2f} X Speedup)'
        )
        cpp_person = list(cpp_address_book.people)[0]
        cython_person = list(cython_address_book.people)[0]
        python_person = py_dict['people'][0]
        json_timeit_result = run_timeit(lambda: python_person['name'])
        print(
            f'\tPython Dictionary Field Access:\t{json_timeit_result:,.2f}ns')
        cpp_timeit_result = run_timeit(lambda: cpp_person.name)
        print(f'\tBaseline Field Access:        \t{cpp_timeit_result:,.2f}ns')
        cython_timeit_result = run_timeit(lambda: cython_person.name)
        print(
            f'\tCython   Field Access:        \t{cython_timeit_result:,.2f}ns ({cpp_timeit_result / cython_timeit_result:,.2f} X Speedup)'
        )

        cython_memory_result = measure_memory(CyAddressBook, baseline_proto,
                                              5000)
        cpp_memory_result = measure_memory(CppAddressBook, baseline_proto,
                                           5000)

        cython_allocated_memory = cython_memory_result[
            'allocated'] - cython_memory_result['baseline']
        cpp_allocated_memory = cpp_memory_result[
            'allocated'] - cpp_memory_result['baseline']
        percentage_drop = ((cpp_allocated_memory - cython_allocated_memory) /
                           cpp_allocated_memory) * 100
        drop_label = 'Decrease'
        if percentage_drop < 0:
            percentage_drop = abs(percentage_drop)
            drop_label = 'Increase'

        print('\n\t*** Memory ***')
        print(
            f'\tBaseline Memory for 5k protos:\t{cpp_allocated_memory:,.2f}MB')
        print(
            f'\tCython   Memory for 5k protos:\t{cython_allocated_memory:,.2f}MB  ({percentage_drop:,.2f}% {drop_label})'
        )
Esempio n. 9
0
 def as_json(self) -> str:
     return json_format.MessageToJson(self.as_proto, sort_keys=True)
Esempio n. 10
0
def vidmanip(ctx, vid, out):
    client = ctx.obj.client
    print(json_format.MessageToJson(client.vid_manip(vid, out)))
Esempio n. 11
0
def write_defines(net_param, root, model_name):
    class_name = model_name.upper()
    weight_prefix = model_name.lower()
    model_name_cpp = model_name + '.cpp'
    model_name_hpp = model_name + '.hpp'
    model_name_weights_hpp = model_name + '_weights.hpp'

    net = NetParam()
    net.CopyFrom(net_param)

    blob_counts, blob_names, blob_types = [], [], []
    for blob in net.blob:
        blob_type = blob.type if blob.HasField('type') else 'float'
        if blob_type == 'float':
            blob_counts.append(str(len(blob.data_f)))
        elif blob_type == 'int':
            blob_counts.append(str(len(blob.data_i)))
        elif blob_type == 'unsigned char':
            assert len(blob.data_b) == 1
            blob_counts.append(str(len(blob.data_b[0])))
        else:
            raise ValueError('Unknown blob type', blob_type)
        blob_names.append('{}_{}_'.format(weight_prefix, find_replace(blob.name, ['/', '-', ':'], '_')))
        blob_types.append(blob_type)
        blob.ClearField('data_f')
        blob.ClearField('data_i')
        blob.ClearField('data_b')

    proto_str = text_format.MessageToString(net)
    json_str = json_format.MessageToJson(net, preserving_proto_field_name=True)
    custom_str = convert_custom(net_param)

    split_count = 10000

    proto_split_off = len(proto_str) % split_count
    proto_split_num = len(proto_str) // split_count + (proto_split_off > 0)
    json_split_off = len(json_str) % split_count
    json_split_num = len(json_str) // split_count + (json_split_off > 0)
    custom_split_off = len(custom_str) % split_count
    custom_split_num = len(custom_str) // split_count + (custom_split_off > 0)

    proto_split_names = ['proto_model_{}_'.format(n) for n in range(proto_split_num)]
    json_split_names = ['json_model_{}_'.format(n) for n in range(json_split_num)]
    custom_split_names = ['custom_model_{}_'.format(n) for n in range(custom_split_num)]

    ########## write network proto definition to cpp ##########
    with open('{}/{}'.format(root, model_name_cpp), 'w') as cpp_file:
        cpp_file.write('#include "{}"\n'.format(model_name_hpp))
        cpp_file.write('#include "{}"\n\n'.format(model_name_weights_hpp))

        cpp_file.write('namespace Shadow {\n\n')

        offset = 0
        for proto_split_name in proto_split_names:
            cpp_file.write('const std::string {} = \nR"({})";\n\n'.format(proto_split_name, proto_str[offset: offset + split_count]))
            offset += split_count
        cpp_file.write('const std::string {}::proto_model_{{\n    {}\n}};\n\n'.format(class_name, ' + '.join(proto_split_names)))

        offset = 0
        for json_split_name in json_split_names:
            cpp_file.write('const std::string {} = \nR"({})";\n\n'.format(json_split_name, json_str[offset: offset + split_count]))
            offset += split_count
        cpp_file.write('const std::string {}::json_model_{{\n    {}\n}};\n\n'.format(class_name, ' + '.join(json_split_names)))

        offset = 0
        for custom_split_name in custom_split_names:
            cpp_file.write('const std::string {} = \nR"({})";\n\n'.format(custom_split_name, custom_str[offset: offset + split_count]))
            offset += split_count
        cpp_file.write('const std::string {}::custom_model_{{\n    {}\n}};\n\n'.format(class_name, ' + '.join(custom_split_names)))

        cpp_file.write('const std::vector<int> {}::counts_{{\n    {}\n}};\n\n'.format(class_name, ', '.join(blob_counts)))
        cpp_file.write('const std::vector<const void *> {}::weights_{{\n    {}\n}};\n\n'.format(class_name, ',\n    '.join(blob_names)))
        cpp_file.write('const std::vector<std::string> {}::types_{{\n    "{}"\n}};\n\n'.format(class_name, '",\n    "'.join(blob_types)))

        cpp_file.write('}  // namespace Shadow\n')

    ########## write network proto definition to hpp ##########
    with open('{}/{}'.format(root, model_name_hpp), 'w') as hpp_file:
        hpp_file.write('#ifndef SHADOW_{}_HPP\n'.format(class_name) +
                       "#define SHADOW_{}_HPP\n\n".format(class_name))

        hpp_file.write('#include <cstring>\n' +
                       '#include <string>\n' +
                       '#include <vector>\n\n')

        hpp_file.write('namespace Shadow {\n\n')

        hpp_file.write('class {} {{\n'.format(class_name) +
                       ' public:\n')
        hpp_file.write('#if defined(USE_Protobuf)\n')
        hpp_file.write('  static const std::string &proto_model() { return proto_model_; }\n')
        hpp_file.write('#elif defined(USE_JSON)\n')
        hpp_file.write('  static const std::string &json_model() { return json_model_; }\n')
        hpp_file.write('#else\n')
        hpp_file.write('  static const std::string &custom_model() { return custom_model_; }\n')
        hpp_file.write('#endif\n\n')

        hpp_file.write('  static const std::vector<const void *> &weights() { return weights_; }\n')
        hpp_file.write('  static const std::vector<std::string> &types() { return types_; }\n')
        hpp_file.write('  static const std::vector<int> &counts() { return counts_; }\n\n')

        hpp_file.write('  static const void *weights(int n) { return weights_[n]; }\n')
        hpp_file.write('  static const std::string &types(int n) { return types_[n]; }\n')
        hpp_file.write('  static const int counts(int n) { return counts_[n]; }\n\n')

        hpp_file.write(' private:\n')
        hpp_file.write('  static const std::string proto_model_;\n')
        hpp_file.write('  static const std::string json_model_;\n')
        hpp_file.write('  static const std::string custom_model_;\n\n')

        hpp_file.write('  static const std::vector<const void *> weights_;\n')
        hpp_file.write('  static const std::vector<std::string> types_;\n')
        hpp_file.write('  static const std::vector<int> counts_;\n')
        hpp_file.write('};\n\n')

        hpp_file.write('}  // namespace Shadow\n\n')

        hpp_file.write('#endif  // SHADOW_{}_HPP\n'.format(class_name))

    ########## write extern weights definition to hpp ##########
    with open('{}/{}'.format(root, model_name_weights_hpp), 'w') as weights_file:
        weights_file.write('#ifndef SHADOW_{}_WEIGHTS_HPP\n'.format(class_name))
        weights_file.write('#define SHADOW_{}_WEIGHTS_HPP\n\n'.format(class_name))

        weights_file.write('namespace Shadow {\n\n')

        for blob_name, blob_type in zip(blob_names, blob_types):
            weights_file.write('extern const {} {}[];\n'.format(blob_type, blob_name))

        weights_file.write('\n}  // namespace Shadow\n\n')

        weights_file.write('#endif  // SHADOW_{}_WEIGHTS_HPP\n'.format(class_name))
Esempio n. 12
0
def imgmanip(ctx, img, out):
    client = ctx.obj.client
    print(json_format.MessageToJson(client.img_manip(img, out)))
Esempio n. 13
0
def taginfo(ctx):
    print(json_format.MessageToJson(ctx.obj.pipeclient.detection_tag_info()))
Esempio n. 14
0
def deletedetection(ctx, id):
    print(json_format.MessageToJson(ctx.obj.pipeclient.delete_detection(id)))
Esempio n. 15
0
    def resolve_input_artifacts(
        self,
        input_channels: Dict[Text, types.Channel],
        exec_properties: Dict[Text, Any],
        driver_args: data_types.DriverArgs,
        pipeline_info: data_types.PipelineInfo,
    ) -> Dict[Text, List[types.Artifact]]:
        """Overrides BaseDriver.resolve_input_artifacts()."""
        del driver_args  # unused
        del pipeline_info  # unused

        input_config = example_gen_pb2.Input()
        json_format.Parse(exec_properties['input_config'], input_config)

        input_dict = channel_utils.unwrap_channel_dict(input_channels)
        for input_list in input_dict.values():
            for single_input in input_list:
                absl.logging.info('Processing input %s.' % (single_input.uri))
                absl.logging.info('single_input %s.' % (single_input))
                absl.logging.info('single_input.artifact %s.' %
                                  (single_input.artifact))

                # Set the fingerprint of input.
                split_fingerprints = []
                select_span = None
                for split in input_config.splits:
                    # If SPAN is specified, pipeline will process the latest span, note
                    # that this span number must be the same for all splits and it will
                    # be stored in metadata as the span of input artifact.
                    if _SPAN_SPEC in split.pattern:
                        latest_span = self._retrieve_latest_span(
                            single_input.uri, split)
                        if select_span is None:
                            select_span = latest_span
                        if select_span != latest_span:
                            raise ValueError(
                                'Latest span should be the same for each split: %s != %s'
                                % (select_span, latest_span))
                        split.pattern = split.pattern.replace(
                            _SPAN_SPEC, select_span)

                    pattern = os.path.join(single_input.uri, split.pattern)
                    split_fingerprints.append(
                        io_utils.generate_fingerprint(split.name, pattern))
                fingerprint = '\n'.join(split_fingerprints)
                single_input.set_string_custom_property(
                    _FINGERPRINT, fingerprint)
                if select_span is None:
                    select_span = '0'
                single_input.set_string_custom_property(_SPAN, select_span)

                matched_artifacts = []
                for artifact in self._metadata_handler.get_artifacts_by_uri(
                        single_input.uri):
                    if (artifact.custom_properties[_FINGERPRINT].string_value
                            == fingerprint) and (
                                artifact.custom_properties[_SPAN].string_value
                                == select_span):
                        matched_artifacts.append(artifact)

                if matched_artifacts:
                    # TODO(b/138845899): consider use span instead of id.
                    # If there are multiple matches, get the latest one for caching.
                    # Using id because spans are the same for matched artifacts.
                    latest_artifact = max(matched_artifacts,
                                          key=lambda artifact: artifact.id)
                    absl.logging.info('latest_artifact %s.' %
                                      (latest_artifact))
                    absl.logging.info('type(latest_artifact) %s.' %
                                      (type(latest_artifact)))

                    single_input.set_artifact(latest_artifact)
                else:
                    # TODO(jyzhao): whether driver should be read-only for metadata.
                    [new_artifact] = self._metadata_handler.publish_artifacts(
                        [single_input])  # pylint: disable=unbalanced-tuple-unpacking
                    absl.logging.info('Registered new input: %s' %
                                      (new_artifact))
                    single_input.set_artifact(new_artifact)

        exec_properties['input_config'] = json_format.MessageToJson(
            input_config, sort_keys=True)
        return input_dict
def create_python_job(python_module_path: str,
                      project: str,
                      gcp_resources: str,
                      location: str,
                      temp_location: str,
                      requirements_file_path: str = '',
                      args: Optional[str] = '[]'):
    """Creates a Dataflow python job.

  Args:
    python_module_path: The gcs path to the python file to run.
    project: Required. The project of which the resource will be launched.
    gcp_resources: A placeholder output for returning the gcp_resouces proto.
    location: Required. The region of which the resource will be launched.
    temp_location: A GCS path for Dataflow to stage temporary job files created
      during the execution of the pipeline.
    requirements_file_path: Optional, the gcs path to the pip requirements file.
    args: The JsonArray list of args to pass to the python file. It can include
      '--requirements_file' or '--setup_file' to configure the workers however
      the path provided needs to be a GCS path.


  Returns:
    And instance of GCPResouces proto with the dataflow Job ID which is stored
    in gcp_resources path.
  Raises:
    RuntimeError: If the execution does not return a job ID.
  """
    job_id = None
    if requirements_file_path:
        install_requirements(requirements_file_path)
    args_list = []
    if args:
        args_list = json.loads(args)

    python_file_path = stage_file(python_module_path)
    # If --setup_file or --requirements_file are provided stage them locally.
    for idx, param in enumerate(args_list):
        if param in ('--requirements_file', '--setup_file'):
            args_list[idx + 1] = stage_file(args_list[idx + 1])
            logging.info('Staging %s at %s locally.', param,
                         args_list[idx + 1])

    cmd = prepare_cmd(project, location, python_file_path, args_list,
                      temp_location)
    sub_process = Process(cmd)
    for line in sub_process.read_lines():
        logging.info('DataflowRunner output: %s', line)
        job_id, location = extract_job_id_and_location(line)
        if job_id:
            logging.info('Found job id %s and location %s.', job_id, location)
            # Write the job proto to output.
            job_resources = gcp_resources_pb2.GcpResources()
            job_resource = job_resources.resources.add()
            job_resource.resource_type = 'DataflowJob'
            job_resource.resource_uri = f'https://dataflow.googleapis.com/v1b3/projects/{project}/locations/{location}/jobs/{job_id}'

            with open(gcp_resources, 'w') as f:
                f.write(json_format.MessageToJson(job_resources))
            break
    if not job_id:
        raise RuntimeError(
            'No dataflow job was found when running the python file.')
Esempio n. 17
0
def main(argv):
    """Calls ModelService.ImportModelEvaluation."""
    parser = argparse.ArgumentParser(
        prog='Vertex Model Service evaluation importer', description='')
    parser.add_argument('--metrics',
                        dest='metrics',
                        type=str,
                        required=True,
                        default=argparse.SUPPRESS)
    parser.add_argument('--metrics_explanation',
                        dest='metrics_explanation',
                        type=str,
                        default=None)
    parser.add_argument('--explanation',
                        dest='explanation',
                        type=str,
                        default=None)
    parser.add_argument('--problem_type',
                        dest='problem_type',
                        type=str,
                        required=True,
                        default=argparse.SUPPRESS)
    parser.add_argument('--model_name',
                        dest='model_name',
                        type=str,
                        required=True,
                        default=argparse.SUPPRESS)
    parser.add_argument('--gcp_resources',
                        dest='gcp_resources',
                        type=_make_parent_dirs_and_return_path,
                        required=True,
                        default=argparse.SUPPRESS)
    parsed_args, _ = parser.parse_known_args(argv)

    _, project_id, _, location, _, model_id = parsed_args.model_name.split('/')
    api_endpoint = location + '-aiplatform.googleapis.com'
    resource_uri_prefix = f'https://{api_endpoint}/v1/'

    with open(parsed_args.metrics) as metrics_file:
        model_evaluation = {
            'metrics':
            to_value(
                next(
                    iter(
                        json.loads(metrics_file.read())['slicedMetrics'][0]
                        ['metrics'].values()))),
            'metrics_schema_uri':
            PROBLEM_TYPE_TO_SCHEMA_URI.get(parsed_args.problem_type),
        }

    if parsed_args.explanation and parsed_args.explanation == "{{$.inputs.artifacts['explanation'].metadata['explanation_gcs_path']}}":
        # metrics_explanation must contain explanation_gcs_path when provided.
        logging.error('"explanation" must contain explanations when provided.')
        sys.exit(13)
    elif parsed_args.explanation:
        explanation_file_name = parsed_args.explanation if not parsed_args.explanation.startswith(
            'gs://') else '/gcs' + parsed_args.explanation[4:]
    elif parsed_args.metrics_explanation and parsed_args.metrics_explanation != "{{$.inputs.artifacts['metrics'].metadata['explanation_gcs_path']}}":
        explanation_file_name = parsed_args.metrics_explanation if not parsed_args.metrics_explanation.startswith(
            'gs://') else '/gcs' + parsed_args.metrics_explanation[4:]
    else:
        explanation_file_name = None
    if explanation_file_name:
        with open(explanation_file_name) as explanation_file:
            model_evaluation['model_explanation'] = {
                'mean_attributions': [{
                    'feature_attributions':
                    to_value(
                        json.loads(explanation_file.read())['explanation']
                        ['attributions'][0]['featureAttributions'])
                }]
            }

    import_model_evaluation_response = aiplatform.gapic.ModelServiceClient(
        client_info=gapic_v1.client_info.ClientInfo(
            user_agent='google-cloud-pipeline-components', ),
        client_options={
            'api_endpoint': api_endpoint,
        }).import_model_evaluation(
            parent=parsed_args.model_name,
            model_evaluation=model_evaluation,
        )
    model_evaluation_name = import_model_evaluation_response.name

    # Write the model evaluation resource to GcpResources output.
    model_eval_resources = GcpResources()
    model_eval_resource = model_eval_resources.resources.add()
    model_eval_resource.resource_type = RESOURCE_TYPE
    model_eval_resource.resource_uri = f'{resource_uri_prefix}{model_evaluation_name}'

    with open(parsed_args.gcp_resources, 'w') as f:
        f.write(json_format.MessageToJson(model_eval_resources))
Esempio n. 18
0
    def __init__(
        self,
        component: tfx_base_component.BaseComponent,
        component_launcher_class: Type[
            base_component_launcher.BaseComponentLauncher],
        depends_on: Set[dsl.ContainerOp],
        pipeline: tfx_pipeline.Pipeline,
        pipeline_name: Text,
        pipeline_root: dsl.PipelineParam,
        tfx_image: Text,
        kubeflow_metadata_config: Optional[
            kubeflow_pb2.KubeflowMetadataConfig],
    ):
        """Creates a new Kubeflow-based component.

    This class essentially wraps a dsl.ContainerOp construct in Kubeflow
    Pipelines.

    Args:
      component: The logical TFX component to wrap.
      component_launcher_class: the class of the launcher to launch the
        component.
      depends_on: The set of upstream KFP ContainerOp components that this
        component will depend on.
      pipeline: The logical TFX pipeline to which this component belongs.
      pipeline_name: The name of the TFX pipeline.
      pipeline_root: The pipeline root specified, as a dsl.PipelineParam
      tfx_image: The container image to use for this component.
      kubeflow_metadata_config: Configuration settings for connecting to the
        MLMD store in a Kubeflow cluster.
    """
        component_launcher_class_path = '.'.join([
            component_launcher_class.__module__,
            component_launcher_class.__name__
        ])

        arguments = [
            '--pipeline_name',
            pipeline_name,
            '--pipeline_root',
            pipeline_root,
            '--kubeflow_metadata_config',
            json_format.MessageToJson(kubeflow_metadata_config),
            '--additional_pipeline_args',
            json.dumps(pipeline.additional_pipeline_args),
            '--component_launcher_class_path',
            component_launcher_class_path,
            '--serialized_component',
            json_utils.dumps(component),
        ]

        if pipeline.enable_cache:
            arguments.append('--enable_cache')

        self.container_op = dsl.ContainerOp(
            name=component.id.replace('.', '_'),
            command=_COMMAND,
            image=tfx_image,
            arguments=arguments,
        )

        tf.logging.info('Adding upstream dependencies for component {}'.format(
            self.container_op.name))
        for op in depends_on:
            tf.logging.info('   ->  Component: {}'.format(op.name))
            self.container_op.after(op)

        # TODO(b/140172100): Document the use of additional_pipeline_args.
        if _WORKFLOW_ID_KEY in pipeline.additional_pipeline_args:
            # Allow overriding pipeline's run_id externally, primarily for testing.
            self.container_op.add_env_variable(
                k8s_client.V1EnvVar(
                    name=_WORKFLOW_ID_KEY,
                    value=pipeline.additional_pipeline_args[_WORKFLOW_ID_KEY]))
        else:
            # Add the Argo workflow ID to the container's environment variable so it
            # can be used to uniquely place pipeline outputs under the pipeline_root.
            field_path = "metadata.labels['workflows.argoproj.io/workflow']"
            self.container_op.add_env_variable(
                k8s_client.V1EnvVar(
                    name=_WORKFLOW_ID_KEY,
                    value_from=k8s_client.V1EnvVarSource(
                        field_ref=k8s_client.V1ObjectFieldSelector(
                            field_path=field_path))))
Esempio n. 19
0
 def from_protobuf(msg):
     metric = Metric.from_protobuf(msg.metric)
     targets = [json_format.MessageToJson(target) for target in msg.targets]
     val = Value.from_protobuf(msg.value)
     return Score(metric, msg.fold, targets, val)
Esempio n. 20
0
def construct_response_json(
        user_model: SeldonComponent,
        is_request: bool,
        client_request_raw: Union[List, Dict],
        client_raw_response: Union[np.ndarray, str, bytes, dict]) -> Union[List, Dict]:
    """
    This class converts a raw REST response into a JSON object that has the same structure as
    the SeldonMessage proto. This is necessary as the conversion using the SeldonMessage proto
    changes the Numeric types of all ints in a JSON into Floats.

    Parameters
    ----------
    user_model
       Client user class
    is_request
       Whether this is part of the request flow as opposed to the response flow
    client_request_raw
       The request received in JSON format
    client_raw_response
       The raw client response from their model

    Returns
    -------
       A SeldonMessage JSON response

    """
    response = {}

    if "jsonData" in client_request_raw:
        response["jsonData"] = client_raw_response
    elif isinstance(client_raw_response, (bytes, bytearray)):
        base64_data = base64.b64encode(client_raw_response)
        response["binData"] = base64_data.decode("utf-8")
    elif isinstance(client_raw_response, str):
        response["strData"] = client_raw_response
    else:
        is_np = isinstance(client_raw_response, np.ndarray)
        is_list = isinstance(client_raw_response, list)
        if not (is_np or is_list):
            raise SeldonMicroserviceException(
                "Unknown data type returned as payload (must be list or np array):"
                    + str(client_raw_response))
        if is_np:
            np_client_raw_response = client_raw_response
            list_client_raw_response = client_raw_response.tolist()
        else:
            np_client_raw_response = np.array(client_raw_response)
            list_client_raw_response = client_raw_response

        response["data"] = {}
        if "data" in client_request_raw:
            if np.issubdtype(np_client_raw_response.dtype, np.number):
                if "tensor" in client_request_raw["data"]:
                    default_data_type = "tensor"
                    result_client_response = {
                        "values": np_client_raw_response.ravel().tolist(),
                        "shape": np_client_raw_response.shape
                    }
                elif "tftensor" in client_request_raw["data"]:
                    default_data_type = "tftensor"
                    tf_json_str = json_format.MessageToJson(
                            tf.make_tensor_proto(np_client_raw_response))
                    result_client_response = json.loads(tf_json_str)
                else:
                    default_data_type = "ndarray"
                    result_client_response = list_client_raw_response
            else:
                default_data_type = "ndarray"
                result_client_response = list_client_raw_response
        else:
            if np.issubdtype(np_client_raw_response.dtype, np.number):
                default_data_type = "tensor"
                result_client_response = {
                    "values": np_client_raw_response.ravel().tolist(),
                    "shape": np_client_raw_response.shape
                }
            else:
                default_data_type = "ndarray"
                result_client_response = list_client_raw_response

        response["data"][default_data_type] = result_client_response

        if is_request:
            req_names = client_request_raw.get("data", {}).get("names", [])
            names = client_feature_names(user_model, req_names)
        else:
            names = client_class_names(user_model, np_client_raw_response)
        response["data"]["names"] = names

    response["meta"] = {}
    client_custom_tags(user_model)
    tags = client_custom_tags(user_model)
    if tags:
        response["meta"]["tags"] = tags
    metrics = client_custom_metrics(user_model)
    if metrics:
        response["meta"]["metrics"] = metrics
    puid = client_request_raw.get("meta", {}).get("puid", None)
    if puid:
        response["meta"]["puid"] = puid

    return response
Esempio n. 21
0
 def testEmptyMessageToJson(self):
     message = json_format_proto3_pb2.TestMessage()
     self.assertEqual(json_format.MessageToJson(message), '{}')
     parsed_message = json_format_proto3_pb2.TestMessage()
     self.CheckParseBack(message, parsed_message)
Esempio n. 22
0
def execute_hmd(document, model_name):
    """
    HMD
    :param      document:        document
    :param      model_name:         Model Name
    :return
    """
    hmd_dict = dict()
    model_path = '{0}/trained/hmd/{1}__0.hmdmodel'.format(
        os.getenv('MAUM_ROOT'), model_name)
    if not os.path.exists(model_path):
        raise Exception('model is not exists : {0}'.format(model_path))
    model_value = load_model(model_path)
    output_list = list()
    detect_category_dict = dict()
    for rules in model_value.rules:
        strs_list = list()
        category = rules.categories[0]
        dtc_cont = rules.rule
        detect_keyword_list = split_input(dtc_cont)
        for idx in range(len(detect_keyword_list)):
            detect_keyword = detect_keyword_list[idx].split("|")
            strs_list.append(detect_keyword)
        ws = ''
        output = ''
        tmp_result = []
        output += '{0}\t'.format(category)
        output_list += vec_word_combine(tmp_result, output, strs_list, ws, 0)
    for item in output_list:
        item = item.strip()
        if len(item) < 1 or item[0] == '#':
            continue
        item_list = item.split("\t")
        t_loc = len(item_list) - 1
        cate = ''
        for idx in range(t_loc):
            if idx != 0:
                cate += '_'
            cate += item_list[idx]
            if item_list[t_loc] not in hmd_dict:
                hmd_dict[item_list[t_loc]] = [[cate]]
            else:
                hmd_dict[item_list[t_loc]].append([cate])
    json_data = json.loads(json_format.MessageToJson(document, True))
    word_list = list()
    for sentence in json_data['document']['sentences']:
        for words in sentence['words']:
            tagged_text = words['taggedText']
            tagged_text_list = tagged_text.split()
            for tagged_word in tagged_text_list:
                word = tagged_word.split("/")[0]
                word_list.append(word)
    nlp_sent = " ".join(word_list)
    s_tmp = " {0} ".format(nlp_sent)
    vec_space = list()
    vec_ne = list()
    rmv_ne = list()
    cnt = 0
    flag = False
    for idx in range(len(s_tmp)):
        if s_tmp[idx] == ' ':
            vec_space.append(cnt)
        elif (idx + 1 != len(s_tmp)) and s_tmp[idx:idx + 2] == '__':
            if flag:
                t_word = s_tmp[idx + 2:s_tmp[idx + 2:].find(' ') + idx + 2]
                tmp_ne.append(cnt)
                tmp_ne.append(t_word)
                vec_ne.append(tmp_ne)
                cnt = cnt - 2 - len(t_word)
                r_tmp_ne.append(idx)
                r_tmp_ne.append(idx + 2 + len(t_word))
                rmv_ne.append(r_tmp_ne)
                flag = False
            else:
                tmp_ne = list()
                tmp_ne.append(cnt)
                r_tmp_ne = list()
                r_tmp_ne.append(idx)
                flag = True
                cnt -= 2
        cnt += 1
    rmv_ne.reverse()
    for ne in rmv_ne:
        s_tmp = s_tmp[:ne[0]] + s_tmp[ne[0] + 2:ne[1]] + s_tmp[ne[2]:]
    b_check = False
    for key in hmd_dict.keys():
        tmp_line = s_tmp
        vec_key = key.split('$')
        tmp, b_print = find_hmd(vec_key, document.sentence, tmp_line,
                                vec_space)
        if b_print:
            b_check = True
            for item in hmd_dict[key]:
                if item[0] not in detect_category_dict:
                    detect_category_dict[item[0]] = [key]
                else:
                    detect_category_dict[item[0]].append(key)
    return detect_category_dict
Esempio n. 23
0
 def CheckParseBack(self, message, parsed_message):
     json_format.Parse(json_format.MessageToJson(message), parsed_message)
     self.assertEqual(message, parsed_message)
                                                             verbose=False)
            model_config = triton_client.get_model_config(model_name)

        nonmatch = list()
        expected_files = [
            f for f in os.listdir(FLAGS.expected_dir)
            if (os.path.isfile(os.path.join(FLAGS.expected_dir, f)) and (
                f.startswith("expected")))
        ]
        for efile in expected_files:
            with open(os.path.join(FLAGS.expected_dir, efile)) as f:
                config = text_format.Parse(f.read(), mc.ModelConfig())

            if pair[1] == "http":
                config_json = json.loads(
                    json_format.MessageToJson(
                        config, preserving_proto_field_name=True))
                if config_json == model_config:
                    sys.exit(0)
            else:
                if config == model_config.config:
                    sys.exit(0)

        nonmatch.append(config)

    print("Model config doesn't match any expected output:")
    print("Model config:")
    print(model_config)
    for nm in nonmatch:
        print("Non-matching:")
        print(nm)
Esempio n. 25
0
    def __init__(self,
                 component: tfx_base_node.BaseNode,
                 component_launcher_class: Type[
                     base_component_launcher.BaseComponentLauncher],
                 depends_on: Set[dsl.ContainerOp],
                 pipeline: tfx_pipeline.Pipeline,
                 pipeline_name: Text,
                 pipeline_root: dsl.PipelineParam,
                 tfx_image: Text,
                 kubeflow_metadata_config: Optional[
                     kubeflow_pb2.KubeflowMetadataConfig],
                 component_config: base_component_config.BaseComponentConfig,
                 tfx_ir: Optional[pipeline_pb2.Pipeline] = None,
                 pod_labels_to_attach: Optional[Dict[Text, Text]] = None):
        """Creates a new Kubeflow-based component.

    This class essentially wraps a dsl.ContainerOp construct in Kubeflow
    Pipelines.

    Args:
      component: The logical TFX component to wrap.
      component_launcher_class: the class of the launcher to launch the
        component.
      depends_on: The set of upstream KFP ContainerOp components that this
        component will depend on.
      pipeline: The logical TFX pipeline to which this component belongs.
      pipeline_name: The name of the TFX pipeline.
      pipeline_root: The pipeline root specified, as a dsl.PipelineParam
      tfx_image: The container image to use for this component.
      kubeflow_metadata_config: Configuration settings for connecting to the
        MLMD store in a Kubeflow cluster.
      component_config: Component config to launch the component.
      tfx_ir: The TFX intermedia representation of the pipeline.
      pod_labels_to_attach: Optional dict of pod labels to attach to the
        GKE pod.
    """
        component_launcher_class_path = '.'.join([
            component_launcher_class.__module__,
            component_launcher_class.__name__
        ])

        serialized_component = utils.replace_placeholder(
            json_utils.dumps(node_wrapper.NodeWrapper(component)))

        arguments = [
            '--pipeline_name',
            pipeline_name,
            '--pipeline_root',
            pipeline_root,
            '--kubeflow_metadata_config',
            json_format.MessageToJson(message=kubeflow_metadata_config,
                                      preserving_proto_field_name=True),
            '--beam_pipeline_args',
            json.dumps(pipeline.beam_pipeline_args),
            '--additional_pipeline_args',
            json.dumps(pipeline.additional_pipeline_args),
            '--component_launcher_class_path',
            component_launcher_class_path,
            '--serialized_component',
            serialized_component,
            '--component_config',
            json_utils.dumps(component_config),
            '--node_id',
            component.id,
        ]

        if tfx_ir is not None:
            arguments += ['--tfx_ir', json_format.MessageToJson(tfx_ir)]
        else:
            logging.info('No tfx_ir is given. Proceeding without tfx_ir.')

        if pipeline.enable_cache:
            arguments.append('--enable_cache')

        self.container_op = dsl.ContainerOp(
            name=component.id.replace('.', '_'),
            command=_COMMAND,
            image=tfx_image,
            arguments=arguments,
            output_artifact_paths={
                'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json',
            },
        )

        logging.info('Adding upstream dependencies for component %s',
                     self.container_op.name)
        for op in depends_on:
            logging.info('   ->  Component: %s', op.name)
            self.container_op.after(op)

        # TODO(b/140172100): Document the use of additional_pipeline_args.
        if _WORKFLOW_ID_KEY in pipeline.additional_pipeline_args:
            # Allow overriding pipeline's run_id externally, primarily for testing.
            self.container_op.container.add_env_variable(
                k8s_client.V1EnvVar(
                    name=_WORKFLOW_ID_KEY,
                    value=pipeline.additional_pipeline_args[_WORKFLOW_ID_KEY]))
        else:
            # Add the Argo workflow ID to the container's environment variable so it
            # can be used to uniquely place pipeline outputs under the pipeline_root.
            field_path = "metadata.labels['workflows.argoproj.io/workflow']"
            self.container_op.container.add_env_variable(
                k8s_client.V1EnvVar(
                    name=_WORKFLOW_ID_KEY,
                    value_from=k8s_client.V1EnvVarSource(
                        field_ref=k8s_client.V1ObjectFieldSelector(
                            field_path=field_path))))

        if pod_labels_to_attach:
            for k, v in pod_labels_to_attach.items():
                self.container_op.add_pod_label(k, v)
Esempio n. 26
0
def struct_to_dict(struct_obj):
  # type: (struct_pb2.Struct) -> dict
  return json.loads(json_format.MessageToJson(struct_obj))
def __make_json_response_error(message):
    res = rest_api_pb2.Response()
    res.error.error_message = message
    return json_format.MessageToJson(res)
Esempio n. 28
0
class ExecutorTest(tf.test.TestCase, absl.testing.parameterized.TestCase):

  @absl.testing.parameterized.named_parameters(('evaluation_w_eval_config', {
      'eval_config':
          json_format.MessageToJson(
              tfma.EvalConfig(slicing_specs=[
                  tfma.SlicingSpec(feature_keys=['trip_start_hour']),
                  tfma.SlicingSpec(
                      feature_keys=['trip_start_day', 'trip_miles']),
              ]),
              preserving_proto_field_name=True)
  }))
  def testEvalution(self, exec_properties):
    source_data_dir = os.path.join(
        os.path.dirname(os.path.dirname(__file__)), 'testdata')
    output_data_dir = os.path.join(
        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
        self._testMethodName)

    # Create input dict.
    examples = standard_artifacts.Examples()
    examples.uri = os.path.join(source_data_dir, 'csv_example_gen')
    examples.split_names = artifact_utils.encode_split_names(['train', 'eval'])
    model = standard_artifacts.Model()
    baseline_model = standard_artifacts.Model()
    model.uri = os.path.join(source_data_dir, 'trainer/current')
    baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/')
    input_dict = {
        constants.EXAMPLES_KEY: [examples],
        constants.MODEL_KEY: [model],
    }

    # Create output dict.
    eval_output = standard_artifacts.ModelEvaluation()
    eval_output.uri = os.path.join(output_data_dir, 'eval_output')
    blessing_output = standard_artifacts.ModelBlessing()
    blessing_output.uri = os.path.join(output_data_dir, 'blessing_output')
    output_dict = {
        constants.EVALUATION_KEY: [eval_output],
        constants.BLESSING_KEY: [blessing_output],
    }

    # Run executor.
    evaluator = executor.Executor()
    evaluator.Do(input_dict, output_dict, exec_properties)

    # Check evaluator outputs.
    self.assertTrue(
        tf.io.gfile.exists(os.path.join(eval_output.uri, 'eval_config.json')))
    self.assertTrue(
        tf.io.gfile.exists(os.path.join(eval_output.uri, 'metrics')))
    self.assertTrue(tf.io.gfile.exists(os.path.join(eval_output.uri, 'plots')))
    self.assertFalse(
        tf.io.gfile.exists(os.path.join(blessing_output.uri, 'BLESSED')))

  @absl.testing.parameterized.named_parameters(('legacy_feature_slicing', {
      'feature_slicing_spec':
          json_format.MessageToJson(
              evaluator_pb2.FeatureSlicingSpec(specs=[
                  evaluator_pb2.SingleSlicingSpec(
                      column_for_slicing=['trip_start_hour']),
                  evaluator_pb2.SingleSlicingSpec(
                      column_for_slicing=['trip_start_day', 'trip_miles']),
              ]),
              preserving_proto_field_name=True),
  }))
  def testDoLegacySingleEvalSavedModelWFairness(self, exec_properties):
    source_data_dir = os.path.join(
        os.path.dirname(os.path.dirname(__file__)), 'testdata')
    output_data_dir = os.path.join(
        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
        self._testMethodName)

    # Create input dict.
    examples = standard_artifacts.Examples()
    examples.uri = os.path.join(source_data_dir, 'csv_example_gen')
    examples.split_names = artifact_utils.encode_split_names(['train', 'eval'])
    model = standard_artifacts.Model()
    model.uri = os.path.join(source_data_dir, 'trainer/current')
    input_dict = {
        constants.EXAMPLES_KEY: [examples],
        constants.MODEL_KEY: [model],
    }

    # Create output dict.
    eval_output = standard_artifacts.ModelEvaluation()
    eval_output.uri = os.path.join(output_data_dir, 'eval_output')
    blessing_output = standard_artifacts.ModelBlessing()
    blessing_output.uri = os.path.join(output_data_dir, 'blessing_output')
    output_dict = {
        constants.EVALUATION_KEY: [eval_output],
        constants.BLESSING_KEY: [blessing_output],
    }

    try:
      # Need to import the following module so that the fairness indicator
      # post-export metric is registered.  This may raise an ImportError if the
      # currently-installed version of TFMA does not support fairness
      # indicators.
      import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators  # pylint: disable=g-import-not-at-top, unused-variable
      exec_properties['fairness_indicator_thresholds'] = [
          0.1, 0.3, 0.5, 0.7, 0.9
      ]
    except ImportError:
      absl.logging.warning(
          'Not testing fairness indicators because a compatible TFMA version '
          'is not installed.')

    # Run executor.
    evaluator = executor.Executor()
    evaluator.Do(input_dict, output_dict, exec_properties)

    # Check evaluator outputs.
    self.assertTrue(
        tf.io.gfile.exists(os.path.join(eval_output.uri, 'eval_config.json')))
    self.assertTrue(
        tf.io.gfile.exists(os.path.join(eval_output.uri, 'metrics')))
    self.assertTrue(tf.io.gfile.exists(os.path.join(eval_output.uri, 'plots')))
    self.assertFalse(
        tf.io.gfile.exists(os.path.join(blessing_output.uri, 'BLESSED')))

  @absl.testing.parameterized.named_parameters(
      (
          'eval_config_w_validation',
          {
              'eval_config':
                  json_format.MessageToJson(
                      tfma.EvalConfig(
                          model_specs=[
                              tfma.ModelSpec(label_key='tips'),
                          ],
                          metrics_specs=[
                              tfma.MetricsSpec(metrics=[
                                  tfma.config.MetricConfig(
                                      class_name='ExampleCount',
                                      # Count > 0, OK.
                                      threshold=tfma.config.MetricThreshold(
                                          value_threshold=tfma
                                          .GenericValueThreshold(
                                              lower_bound={'value': 0}))),
                              ]),
                          ],
                          slicing_specs=[tfma.SlicingSpec()]),
                      preserving_proto_field_name=True)
          },
          True,
          True),
      (
          'eval_config_w_validation_fail',
          {
              'eval_config':
                  json_format.MessageToJson(
                      tfma.EvalConfig(
                          model_specs=[
                              tfma.ModelSpec(
                                  name='baseline1',
                                  label_key='tips',
                                  is_baseline=True),
                              tfma.ModelSpec(
                                  name='candidate1', label_key='tips'),
                          ],
                          metrics_specs=[
                              tfma.MetricsSpec(metrics=[
                                  tfma.config.MetricConfig(
                                      class_name='ExampleCount',
                                      # Count < -1, NOT OK.
                                      threshold=tfma.config.MetricThreshold(
                                          value_threshold=tfma
                                          .GenericValueThreshold(
                                              upper_bound={'value': -1}))),
                              ]),
                          ],
                          slicing_specs=[tfma.SlicingSpec()]),
                      preserving_proto_field_name=True)
          },
          False,
          True),
      (
          'no_baseline_model_ignore_change_threshold_validation_pass',
          {
              'eval_config':
                  json_format.MessageToJson(
                      tfma.EvalConfig(
                          model_specs=[
                              tfma.ModelSpec(
                                  name='baseline',
                                  label_key='tips',
                                  is_baseline=True),
                              tfma.ModelSpec(
                                  name='candidate', label_key='tips'),
                          ],
                          metrics_specs=[
                              tfma.MetricsSpec(metrics=[
                                  tfma.config.MetricConfig(
                                      class_name='ExampleCount',
                                      # Count > 0, OK.
                                      threshold=tfma.config.MetricThreshold(
                                          value_threshold=tfma
                                          .GenericValueThreshold(
                                              lower_bound={'value': 0}))),
                                  tfma.config.MetricConfig(
                                      class_name='Accuracy',
                                      # Should be ignored due to no baseline.
                                      threshold=tfma.config.MetricThreshold(
                                          change_threshold=tfma
                                          .GenericChangeThreshold(
                                              relative={'value': 0},
                                              direction=tfma.MetricDirection
                                              .LOWER_IS_BETTER))),
                              ]),
                          ],
                          slicing_specs=[tfma.SlicingSpec()]),
                      preserving_proto_field_name=True)
          },
          True,
          False))
  def testDoValidation(self, exec_properties, blessed, has_baseline):
    source_data_dir = os.path.join(
        os.path.dirname(os.path.dirname(__file__)), 'testdata')
    output_data_dir = os.path.join(
        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
        self._testMethodName)

    # Create input dict.
    examples = standard_artifacts.Examples()
    examples.uri = os.path.join(source_data_dir, 'csv_example_gen')
    examples.split_names = artifact_utils.encode_split_names(['train', 'eval'])
    model = standard_artifacts.Model()
    baseline_model = standard_artifacts.Model()
    model.uri = os.path.join(source_data_dir, 'trainer/current')
    baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/')
    blessing_output = standard_artifacts.ModelBlessing()
    blessing_output.uri = os.path.join(output_data_dir, 'blessing_output')
    input_dict = {
        constants.EXAMPLES_KEY: [examples],
        constants.MODEL_KEY: [model],
    }
    if has_baseline:
      input_dict[constants.BASELINE_MODEL_KEY] = [baseline_model]

    # Create output dict.
    eval_output = standard_artifacts.ModelEvaluation()
    eval_output.uri = os.path.join(output_data_dir, 'eval_output')
    blessing_output = standard_artifacts.ModelBlessing()
    blessing_output.uri = os.path.join(output_data_dir, 'blessing_output')
    output_dict = {
        constants.EVALUATION_KEY: [eval_output],
        constants.BLESSING_KEY: [blessing_output],
    }

    # Run executor.
    evaluator = executor.Executor()
    evaluator.Do(input_dict, output_dict, exec_properties)

    # Check evaluator outputs.
    self.assertTrue(
        tf.io.gfile.exists(os.path.join(eval_output.uri, 'eval_config.json')))
    self.assertTrue(
        tf.io.gfile.exists(os.path.join(eval_output.uri, 'metrics')))
    self.assertTrue(tf.io.gfile.exists(os.path.join(eval_output.uri, 'plots')))
    self.assertTrue(
        tf.io.gfile.exists(os.path.join(eval_output.uri, 'validations')))
    if blessed:
      self.assertTrue(
          tf.io.gfile.exists(os.path.join(blessing_output.uri, 'BLESSED')))
    else:
      self.assertTrue(
          tf.io.gfile.exists(os.path.join(blessing_output.uri, 'NOT_BLESSED')))
Esempio n. 29
0
def conformer_to_json(conformer):
    return json_format.MessageToJson(conformer,
                                     preserving_proto_field_name=True,
                                     including_default_value_fields=True)
Esempio n. 30
0
def systembatch(ctx, dir, analytic_id, fuser_id, out, tag):
    print(json_format.MessageToJson(ctx.obj.pipeclient.detect_batch(dir=dir, analytic_id=analytic_id, fuser_id=fuser_id, output_dir=out, tags=tag)))