Ejemplo n.º 1
0
 def test_set_sample_data(self):
     test_array = ["test1", "test2", "test3"]
     temp_dir = tempfile.mkdtemp()
     inp = inputs.Sketchpad()
     out = outputs.Label()
     networking.build_template(temp_dir, inp, out)
     networking.set_sample_data_in_config_file(temp_dir, test_array)
     # We need to come up with a better way so that the config file isn't invalid json unless
     # the following parameters are set... (TODO: abidlabs)
     networking.set_always_flagged_in_config_file(temp_dir, False)
     networking.set_disabled_in_config_file(temp_dir, False)
     config_file = os.path.join(temp_dir, 'static/config.json')
     with open(config_file) as json_file:
         data = json.load(json_file)
         self.assertTrue(test_array == data["sample_inputs"])
Ejemplo n.º 2
0
 def test_postprocessing_dict(self):
     orig_label = {
         3: 0.7,
         1: 0.2,
         0: 0.1
     }
     true_label = {outputs.Label.LABEL_KEY: 3,
                   outputs.Label.CONFIDENCES_KEY: [
                       {outputs.Label.LABEL_KEY: 3, outputs.Label.CONFIDENCE_KEY: 0.7},
                       {outputs.Label.LABEL_KEY: 1, outputs.Label.CONFIDENCE_KEY: 0.2},
                       {outputs.Label.LABEL_KEY: 0, outputs.Label.CONFIDENCE_KEY: 0.1},
                   ]}
     out = outputs.Label()
     label = out.postprocess(orig_label)
     self.assertDictEqual(label, true_label)
Ejemplo n.º 3
0
 def test_postprocessing_1D_array(self):
     array = np.array([0.1, 0.2, 0, 0.7, 0])
     true_label = {
         outputs.Label.LABEL_KEY:
         3,
         outputs.Label.CONFIDENCES_KEY: [
             {
                 outputs.Label.LABEL_KEY: 3,
                 outputs.Label.CONFIDENCE_KEY: 0.7
             },
             {
                 outputs.Label.LABEL_KEY: 1,
                 outputs.Label.CONFIDENCE_KEY: 0.2
             },
             {
                 outputs.Label.LABEL_KEY: 0,
                 outputs.Label.CONFIDENCE_KEY: 0.1
             },
         ]
     }
     out = outputs.Label()
     label = json.loads(out.postprocess(array))
     self.assertDictEqual(label, true_label)
Ejemplo n.º 4
0
def get_huggingface_interface(model_name, api_key, alias):
    model_url = "https://huggingface.co/{}".format(model_name)
    api_url = "https://api-inference.huggingface.co/models/{}".format(model_name)
    print("Fetching model from: {}".format(model_url))

    if api_key is not None:
        headers = {"Authorization": f"Bearer {api_key}"}
    else:
        headers = {}

    # Checking if model exists, and if so, it gets the pipeline
    response = requests.request("GET", api_url,  headers=headers)
    assert response.status_code == 200, "Invalid model name or src"
    p = response.json().get('pipeline_tag')

    def encode_to_base64(r: requests.Response) -> str:
        base64_repr = base64.b64encode(r.content).decode('utf-8')
        data_prefix = ";base64,"
        if data_prefix in base64_repr:
            return base64_repr
        else:
            content_type = r.headers.get('content-type')
            return "data:{};base64,".format(content_type) + base64_repr
        

    pipelines = {
        'audio-classification': {
            # example model: https://hf.co/ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition
            'inputs': inputs.Audio(label="Input", source="upload",
                                   type="filepath"),
            'outputs': outputs.Label(label="Class", type="confidences"),
            'preprocess': lambda i: base64.b64decode(i['data'].split(",")[1]),  # convert the base64 representation to binary
            'postprocess': lambda r: {i["label"].split(", ")[0]: i["score"] for i in r.json()}
        },
        'automatic-speech-recognition': {
            # example model: https://hf.co/jonatasgrosman/wav2vec2-large-xlsr-53-english
            'inputs': inputs.Audio(label="Input", source="upload",
                                   type="filepath"),
            'outputs': outputs.Textbox(label="Output"),
            'preprocess': lambda i: base64.b64decode(i['data'].split(",")[1]),  # convert the base64 representation to binary
            'postprocess': lambda r: r.json()["text"]
        },
        'feature-extraction': {
            # example model: hf.co/julien-c/distilbert-feature-extraction
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Dataframe(label="Output"),
            'preprocess': lambda x: {"inputs": x},
            'postprocess': lambda r: r.json()[0],
        },
        'fill-mask': {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Label(label="Classification", type="confidences"),
            'preprocess': lambda x: {"inputs": x},
            'postprocess': lambda r: {i["token_str"]: i["score"] for i in r.json()}
        },
        'image-classification': {
            # Example: https://huggingface.co/google/vit-base-patch16-224
            'inputs': inputs.Image(label="Input Image", type="filepath"),
            'outputs': outputs.Label(label="Classification", type="confidences"),
            'preprocess': lambda i: base64.b64decode(i.split(",")[1]),  # convert the base64 representation to binary
            'postprocess': lambda r: {i["label"].split(", ")[0]: i["score"] for i in r.json()}
        },
        # TODO: support image segmentation pipeline -- should we add a new output component type?
        # 'image-segmentation': {
        #     # Example: https://hf.co/facebook/detr-resnet-50-panoptic
        #     'inputs': inputs.Image(label="Input Image", type="filepath"),
        #     'outputs': outputs.Image(label="Segmentation"),
        #     'preprocess': lambda i: base64.b64decode(i.split(",")[1]),  # convert the base64 representation to binary
        #     'postprocess': lambda x: base64.b64encode(x.json()[0]["mask"]).decode('utf-8'),
        # },        
        # TODO: also: support NER pipeline, object detection, table question answering
        'question-answering': {
            'inputs': [inputs.Textbox(label="Context", lines=7), inputs.Textbox(label="Question")],
            'outputs': [outputs.Textbox(label="Answer"), outputs.Label(label="Score")],
            'preprocess': lambda c, q: {"inputs": {"context": c, "question": q}},
            'postprocess': lambda r: (r.json()["answer"], r.json()["score"]),
        },
        'summarization': {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Textbox(label="Summary"),
            'preprocess': lambda x: {"inputs": x},
            'postprocess': lambda r: r.json()[0]["summary_text"]
        },
        'text-classification': {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Label(label="Classification", type="confidences"),
            'preprocess': lambda x: {"inputs": x},
            'postprocess': lambda r: {i["label"].split(", ")[0]: i["score"] for i in r.json()[0]}
        },
        'text-generation': {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Textbox(label="Output"),
            'preprocess': lambda x: {"inputs": x},
            'postprocess': lambda r: r.json()[0]["generated_text"],
        },
        'text2text-generation': {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Textbox(label="Generated Text"),
            'preprocess': lambda x: {"inputs": x},
            'postprocess': lambda r: r.json()[0]["generated_text"]
        },
        'translation': {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Textbox(label="Translation"),
            'preprocess': lambda x: {"inputs": x},
            'postprocess': lambda r: r.json()[0]["translation_text"]
        },
        'zero-shot-classification': {
            'inputs': [inputs.Textbox(label="Input"),
                       inputs.Textbox(label="Possible class names ("
                                            "comma-separated)"),
                       inputs.Checkbox(label="Allow multiple true classes")],
            'outputs': outputs.Label(label="Classification", type="confidences"),
            'preprocess': lambda i, c, m: {"inputs": i, "parameters":
            {"candidate_labels": c, "multi_class": m}},
            'postprocess': lambda r: {r.json()["labels"][i]: r.json()["scores"][i] for i in
                                      range(len(r.json()["labels"]))}
        },
        'sentence-similarity': {
            # example model: hf.co/sentence-transformers/distilbert-base-nli-stsb-mean-tokens
            'inputs': [
                inputs.Textbox(label="Source Sentence", default="That is a happy person"),
                inputs.Textbox(lines=7, label="Sentences to compare to", placeholder="Separate each sentence by a newline"),
            ],
            'outputs': outputs.Label(label="Classification", type="confidences"),
            'preprocess': lambda src, sentences: {"inputs": {
                "source_sentence": src,
                "sentences": [s for s in sentences.splitlines() if s != ""],
            }},
            'postprocess': lambda r: { f"sentence {i}": v for i, v in enumerate(r.json()) },
        },
        'text-to-speech': {
            # example model: hf.co/julien-c/ljspeech_tts_train_tacotron2_raw_phn_tacotron_g2p_en_no_space_train
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Audio(label="Audio"),
            'preprocess': lambda x: {"inputs": x},
            'postprocess': encode_to_base64,
        },
        'text-to-image': {
            # example model: hf.co/osanseviero/BigGAN-deep-128
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Image(label="Output"),
            'preprocess': lambda x: {"inputs": x},
            'postprocess': encode_to_base64,
        },
    }

    if p is None or not(p in pipelines):
        raise ValueError("Unsupported pipeline type: {}".format(type(p)))
    
    pipeline = pipelines[p]

    def query_huggingface_api(*params):
        # Convert to a list of input components
        data = pipeline['preprocess'](*params)
        if isinstance(data, dict):  # HF doesn't allow additional parameters for binary files (e.g. images or audio files)
            data.update({'options': {'wait_for_model': True}})
            data = json.dumps(data)
        response = requests.request("POST", api_url, headers=headers, data=data)        
        if not(response.status_code == 200):
            raise ValueError("Could not complete request to HuggingFace API, Error {}".format(response.status_code))
        output = pipeline['postprocess'](response)
        return output
    
    if alias is None:
        query_huggingface_api.__name__ = model_name
    else:
        query_huggingface_api.__name__ = alias

    interface_info = {
        'fn': query_huggingface_api, 
        'inputs': pipeline['inputs'],
        'outputs': pipeline['outputs'],
        'title': model_name,
    }

    return interface_info
Ejemplo n.º 5
0
def load_from_pipeline(pipeline):
    """
    Gets the appropriate Interface kwargs for a given Hugging Face transformers.Pipeline.
    pipeline (transformers.Pipeline): the transformers.Pipeline from which to create an interface
    Returns:
    (dict): a dictionary of kwargs that can be used to construct an Interface object
    """
    try:
        import transformers
    except ImportError:
        raise ImportError("transformers not installed. Please try `pip install transformers`")
    if not isinstance(pipeline, transformers.Pipeline):
        raise ValueError("pipeline must be a transformers.Pipeline")
    
    # Handle the different pipelines. The has_attr() checks to make sure the pipeline exists in the
    # version of the transformers library that the user has installed.
    if hasattr(transformers, 'AudioClassificationPipeline') and isinstance(pipeline, transformers.AudioClassificationPipeline):
        pipeline_info = {
            'inputs': inputs.Audio(label="Input", source="microphone",
                                   type="filepath"),
            'outputs': outputs.Label(label="Class", type="confidences"),
            'preprocess': lambda i: {"inputs": i},
            'postprocess': lambda r: {i["label"].split(", ")[0]: i["score"] for i in r}
        }
    elif hasattr(transformers, 'AutomaticSpeechRecognitionPipeline') and isinstance(pipeline, transformers.AutomaticSpeechRecognitionPipeline):
        pipeline_info = {
            'inputs': inputs.Audio(label="Input", source="microphone",
                                   type="filepath"),
            'outputs': outputs.Textbox(label="Output"),
            'preprocess': lambda i: {"inputs": i},
            'postprocess': lambda r: r["text"]
        }
    elif hasattr(transformers, 'FeatureExtractionPipeline') and isinstance(pipeline, transformers.FeatureExtractionPipeline):
        pipeline_info = {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Dataframe(label="Output"),
            'preprocess': lambda x: {"inputs": x},
            'postprocess': lambda r: r[0],
        }
    elif hasattr(transformers, 'FillMaskPipeline') and isinstance(pipeline, transformers.FillMaskPipeline):
        pipeline_info = {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Label(label="Classification", type="confidences"),
            'preprocess': lambda x: {"inputs": x},
            'postprocess': lambda r: {i["token_str"]: i["score"] for i in r}
        }
    elif hasattr(transformers, 'ImageClassificationPipeline') and isinstance(pipeline, transformers.ImageClassificationPipeline):
        pipeline_info = {
            'inputs': inputs.Image(label="Input Image", type="filepath"),
            'outputs': outputs.Label(label="Classification", type="confidences"),
            'preprocess': lambda i: {"images": i},
            'postprocess': lambda r: {i["label"].split(", ")[0]: i["score"] for i in r}
        }
    elif hasattr(transformers, 'QuestionAnsweringPipeline') and isinstance(pipeline, transformers.QuestionAnsweringPipeline):
        pipeline_info = {
            'inputs': [inputs.Textbox(label="Context", lines=7), inputs.Textbox(label="Question")],
            'outputs': [outputs.Textbox(label="Answer"), outputs.Label(label="Score")],
            'preprocess': lambda c, q: {"context": c, "question": q},
            'postprocess': lambda r: (r["answer"], r["score"]),
        }
    elif hasattr(transformers, 'SummarizationPipeline') and isinstance(pipeline, transformers.SummarizationPipeline):
        pipeline_info = {
            'inputs': inputs.Textbox(label="Input", lines=7),
            'outputs': outputs.Textbox(label="Summary"),
            'preprocess': lambda x: {"inputs": x},
            'postprocess': lambda r: r[0]["summary_text"]
        }
    elif hasattr(transformers, 'TextClassificationPipeline') and isinstance(pipeline, transformers.TextClassificationPipeline):
        pipeline_info = {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Label(label="Classification", type="confidences"),
            'preprocess': lambda x: [x],
            'postprocess': lambda r: {i["label"].split(", ")[0]: i["score"] for i in r}
        }
    elif hasattr(transformers, 'TextGenerationPipeline') and isinstance(pipeline, transformers.TextGenerationPipeline):
        pipeline_info = {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Textbox(label="Output"),
            'preprocess': lambda x: {"text_inputs": x},
            'postprocess': lambda r: r[0]["generated_text"],
        }
    elif hasattr(transformers, 'TranslationPipeline') and isinstance(pipeline, transformers.TranslationPipeline):
        pipeline_info = {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Textbox(label="Translation"),
            'preprocess': lambda x: [x],
            'postprocess': lambda r: r[0]["translation_text"]
        }
    elif hasattr(transformers, 'Text2TextGenerationPipeline') and isinstance(pipeline, transformers.Text2TextGenerationPipeline):
        pipeline_info = {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Textbox(label="Generated Text"),
            'preprocess': lambda x: [x],
            'postprocess': lambda r: r[0]["generated_text"]
        }
    elif hasattr(transformers, 'ZeroShotClassificationPipeline') and isinstance(pipeline, transformers.ZeroShotClassificationPipeline):
        pipeline_info = {
            'inputs': [inputs.Textbox(label="Input"),
                       inputs.Textbox(label="Possible class names ("
                                            "comma-separated)"),
                       inputs.Checkbox(label="Allow multiple true classes")],
            'outputs': outputs.Label(label="Classification", type="confidences"),
            'preprocess': lambda i, c, m: {"sequences": i, 
                "candidate_labels": c, "multi_label": m},
            'postprocess': lambda r: {r["labels"][i]: r["scores"][i] for i in
                                      range(len(r["labels"]))}
        }
    else:
        raise ValueError("Unsupported pipeline type: {}".format(type(pipeline)))
    
    # define the function that will be called by the Interface
    def fn(*params):
        data = pipeline_info["preprocess"](*params)
        # special cases that needs to be handled differently
        if isinstance(pipeline, (transformers.TextClassificationPipeline, 
                                 transformers.Text2TextGenerationPipeline,
                                 transformers.TranslationPipeline)):
            data = pipeline(*data)
        else:
            data = pipeline(**data)
        # print("Before postprocessing", data)
        output = pipeline_info["postprocess"](data)
        return output
    
    interface_info = pipeline_info.copy()
    interface_info["fn"] = fn
    del interface_info["preprocess"]
    del interface_info["postprocess"]
    
    # define the title/description of the Interface
    interface_info["title"] = pipeline.model.__class__.__name__

    return interface_info
Ejemplo n.º 6
0
def get_huggingface_interface(model_name, api_key, alias):
    api_url = "https://api-inference.huggingface.co/models/{}".format(
        model_name)
    if api_key is not None:
        headers = {"Authorization": f"Bearer {api_key}"}
    else:
        headers = {}

    # Checking if model exists, and if so, it gets the pipeline
    response = requests.request("GET", api_url, headers=headers)
    assert response.status_code == 200, "Invalid model name or src"
    p = response.json().get('pipeline_tag')

    def post_process_binary_body(r: requests.Response):
        with tempfile.NamedTemporaryFile(delete=False) as fp:
            fp.write(r.content)
            return fp.name

    pipelines = {
        'question-answering': {
            'inputs': [
                inputs.Textbox(label="Context", lines=7),
                inputs.Textbox(label="Question")
            ],
            'outputs':
            [outputs.Textbox(label="Answer"),
             outputs.Label(label="Score")],
            'preprocess':
            lambda c, q: {
                "inputs": {
                    "context": c,
                    "question": q
                }
            },
            'postprocess':
            lambda r: (r["answer"], r["score"]),
            # 'examples': [['My name is Sarah and I live in London', 'Where do I live?']]
        },
        'text-generation': {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Textbox(label="Output"),
            'preprocess': lambda x: {
                "inputs": x
            },
            'postprocess': lambda r: r[0]["generated_text"],
            # 'examples': [['My name is Clara and I am']]
        },
        'summarization': {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Textbox(label="Summary"),
            'preprocess': lambda x: {
                "inputs": x
            },
            'postprocess': lambda r: r[0]["summary_text"]
        },
        'translation': {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Textbox(label="Translation"),
            'preprocess': lambda x: {
                "inputs": x
            },
            'postprocess': lambda r: r[0]["translation_text"]
        },
        'text2text-generation': {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Textbox(label="Generated Text"),
            'preprocess': lambda x: {
                "inputs": x
            },
            'postprocess': lambda r: r[0]["generated_text"]
        },
        'text-classification': {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Label(label="Classification"),
            'preprocess': lambda x: {
                "inputs": x
            },
            'postprocess': lambda r: {
                'Negative': r[0][0]["score"],
                'Positive': r[0][1]["score"]
            }
        },
        'fill-mask': {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': "label",
            'preprocess': lambda x: {
                "inputs": x
            },
            'postprocess': lambda r: {i["token_str"]: i["score"]
                                      for i in r}
        },
        'zero-shot-classification': {
            'inputs': [
                inputs.Textbox(label="Input"),
                inputs.Textbox(label="Possible class names ("
                               "comma-separated)"),
                inputs.Checkbox(label="Allow multiple true classes")
            ],
            'outputs':
            "label",
            'preprocess':
            lambda i, c, m: {
                "inputs": i,
                "parameters": {
                    "candidate_labels": c,
                    "multi_class": m
                }
            },
            'postprocess':
            lambda r:
            {r["labels"][i]: r["scores"][i]
             for i in range(len(r["labels"]))}
        },
        'automatic-speech-recognition': {
            'inputs': inputs.Audio(label="Input", source="upload",
                                   type="file"),
            'outputs': outputs.Textbox(label="Output"),
            'preprocess': lambda i: {
                "inputs": i
            },
            'postprocess': lambda r: r["text"]
        },
        'image-classification': {
            'inputs': inputs.Image(label="Input Image", type="file"),
            'outputs': outputs.Label(label="Classification"),
            'preprocess': lambda i: i,
            'postprocess':
            lambda r: {i["label"].split(", ")[0]: i["score"]
                       for i in r}
        },
        'feature-extraction': {
            # example model: hf.co/julien-c/distilbert-feature-extraction
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Dataframe(label="Output"),
            'preprocess': lambda x: {
                "inputs": x
            },
            'postprocess': lambda r: r[0],
        },
        'sentence-similarity': {
            # example model: hf.co/sentence-transformers/distilbert-base-nli-stsb-mean-tokens
            'inputs': [
                inputs.Textbox(label="Source Sentence",
                               default="That is a happy person"),
                inputs.Textbox(
                    lines=7,
                    label="Sentences to compare to",
                    placeholder="Separate each sentence by a newline"),
            ],
            'outputs':
            outputs.Label(label="Classification"),
            'preprocess':
            lambda src, sentences: {
                "inputs": {
                    "source_sentence": src,
                    "sentences":
                    [s for s in sentences.splitlines() if s != ""],
                }
            },
            'postprocess':
            lambda r: {f"sentence {i}": v
                       for i, v in enumerate(r)},
        },
        'text-to-speech': {
            # example model: hf.co/julien-c/ljspeech_tts_train_tacotron2_raw_phn_tacotron_g2p_en_no_space_train
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Audio(label="Audio"),
            'preprocess': lambda x: {
                "inputs": x
            },
            'postprocess': post_process_binary_body,
        },
        'text-to-image': {
            # example model: hf.co/osanseviero/BigGAN-deep-128
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Image(label="Output"),
            'preprocess': lambda x: {
                "inputs": x
            },
            'postprocess': post_process_binary_body,
        },
    }

    if p is None or not (p in pipelines):
        print("Warning: no interface information found")

    pipeline = pipelines[p]

    def query_huggingface_api(*input):
        payload = pipeline['preprocess'](*input)
        if p == 'automatic-speech-recognition' or p == 'image-classification':
            with open(input[0].name, "rb") as f:
                data = f.read()
        else:
            payload.update({'options': {'wait_for_model': True}})
            data = json.dumps(payload)
        response = requests.request("POST",
                                    api_url,
                                    headers=headers,
                                    data=data)
        if response.status_code == 200:
            if p == 'text-to-speech' or p == 'text-to-image':
                output = pipeline['postprocess'](response)
            else:
                result = response.json()
                output = pipeline['postprocess'](result)
        else:
            raise ValueError(
                "Could not complete request to HuggingFace API, Error {}".
                format(response.status_code))
        return output

    if alias is None:
        query_huggingface_api.__name__ = model_name
    else:
        query_huggingface_api.__name__ = alias

    interface_info = {
        'fn': query_huggingface_api,
        'inputs': pipeline['inputs'],
        'outputs': pipeline['outputs'],
        'title': model_name,
        # 'examples': pipeline['examples'],
    }

    return interface_info
Ejemplo n.º 7
0
 def test_postprocessing_int(self):
     true_label_array = np.array([[[3]]])
     true_label = {outputs.Label.LABEL_KEY: 3}
     out = outputs.Label()
     label = json.loads(out.postprocess(true_label_array))
     self.assertDictEqual(label, true_label)
Ejemplo n.º 8
0
 def test_postprocessing_1D_array_no_confidences(self):
     array = np.array([0.1, 0.2, 0, 0.7, 0])
     true_label = {outputs.Label.LABEL_KEY: 3}
     out = outputs.Label(show_confidences=False)
     label = json.loads(out.postprocess(array))
     self.assertDictEqual(label, true_label)
Ejemplo n.º 9
0
 def test_postprocessing_string(self):
     string = 'happy'
     out = outputs.Label()
     label = json.loads(out.postprocess(string))
     self.assertDictEqual(label, {outputs.Label.LABEL_KEY: string})
Ejemplo n.º 10
0
 def test_path_exists(self):
     out = outputs.Label()
     path = outputs.BASE_OUTPUT_INTERFACE_JS_PATH.format(out.get_name())
     self.assertTrue(os.path.exists(os.path.join(PACKAGE_NAME, path)))
Ejemplo n.º 11
0
def get_huggingface_interface(model_name, api_key, alias):
    api_url = "https://api-inference.huggingface.co/models/{}".format(
        model_name)
    if api_key is not None:
        headers = {"Authorization": f"Bearer {api_key}"}
    else:
        headers = {}

    # Checking if model exists, and if so, it gets the pipeline
    response = requests.request("GET", api_url, headers=headers)
    assert response.status_code == 200, "Invalid model name or src"
    p = response.json().get('pipeline_tag')

    pipelines = {
        'question-answering': {
            'inputs': [
                inputs.Textbox(label="Context", lines=7),
                inputs.Textbox(label="Question")
            ],
            'outputs':
            [outputs.Textbox(label="Answer"),
             outputs.Label(label="Score")],
            'preprocess':
            lambda c, q: {
                "inputs": {
                    "context": c,
                    "question": q
                }
            },
            'postprocess':
            lambda r: (r["answer"], r["score"]),
            # 'examples': [['My name is Sarah and I live in London', 'Where do I live?']]
        },
        'text-generation': {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Textbox(label="Output"),
            'preprocess': lambda x: {
                "inputs": x
            },
            'postprocess': lambda r: r[0]["generated_text"],
            # 'examples': [['My name is Clara and I am']]
        },
        'summarization': {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Textbox(label="Summary"),
            'preprocess': lambda x: {
                "inputs": x
            },
            'postprocess': lambda r: r[0]["summary_text"]
        },
        'translation': {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Textbox(label="Translation"),
            'preprocess': lambda x: {
                "inputs": x
            },
            'postprocess': lambda r: r[0]["translation_text"]
        },
        'text2text-generation': {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Textbox(label="Generated Text"),
            'preprocess': lambda x: {
                "inputs": x
            },
            'postprocess': lambda r: r[0]["generated_text"]
        },
        'text-classification': {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': outputs.Label(label="Classification"),
            'preprocess': lambda x: {
                "inputs": x
            },
            'postprocess': lambda r: {
                'Negative': r[0][0]["score"],
                'Positive': r[0][1]["score"]
            }
        },
        'fill-mask': {
            'inputs': inputs.Textbox(label="Input"),
            'outputs': "label",
            'preprocess': lambda x: {
                "inputs": x
            },
            'postprocess': lambda r: {i["token_str"]: i["score"]
                                      for i in r}
        },
        'zero-shot-classification': {
            'inputs': [
                inputs.Textbox(label="Input"),
                inputs.Textbox(label="Possible class names ("
                               "comma-separated)"),
                inputs.Checkbox(label="Allow multiple true classes")
            ],
            'outputs':
            "label",
            'preprocess':
            lambda i, c, m: {
                "inputs": i,
                "parameters": {
                    "candidate_labels": c,
                    "multi_class": m
                }
            },
            'postprocess':
            lambda r:
            {r["labels"][i]: r["scores"][i]
             for i in range(len(r["labels"]))}
        },
        'automatic-speech-recognition': {
            'inputs': inputs.Audio(label="Input", source="upload",
                                   type="file"),
            'outputs': outputs.Textbox(label="Output"),
            'preprocess': lambda i: {
                "inputs": i
            },
            'postprocess': lambda r: r["text"]
        },
        'image-classification': {
            'inputs': inputs.Image(label="Input Image", type="file"),
            'outputs': outputs.Label(label="Classification"),
            'preprocess': lambda i: i,
            'postprocess':
            lambda r: {i["label"].split(", ")[0]: i["score"]
                       for i in r}
        }
    }

    if p is None or not (p in pipelines):
        print("Warning: no interface information found")

    pipeline = pipelines[p]

    def query_huggingface_api(*input):
        payload = pipeline['preprocess'](*input)
        if p == 'automatic-speech-recognition' or p == 'image-classification':
            with open(input[0].name, "rb") as f:
                data = f.read()
        else:
            payload.update({'options': {'wait_for_model': True}})
            data = json.dumps(payload)
        response = requests.request("POST",
                                    api_url,
                                    headers=headers,
                                    data=data)
        if response.status_code == 200:
            result = json.loads(response.content.decode("utf-8"))
            output = pipeline['postprocess'](result)
        else:
            raise ValueError(
                "Could not complete request to HuggingFace API, Error {}".
                format(response.status_code))
        return output

    if alias is None:
        query_huggingface_api.__name__ = model_name
    else:
        query_huggingface_api.__name__ = alias

    interface_info = {
        'fn': query_huggingface_api,
        'inputs': pipeline['inputs'],
        'outputs': pipeline['outputs'],
        'title': model_name,
        # 'examples': pipeline['examples'],
    }

    return interface_info
Ejemplo n.º 12
0
 def test_postprocessing_int(self):
     label = 3
     true_label = {outputs.Label.LABEL_KEY: '3'}
     out = outputs.Label()
     label = out.postprocess(label)
     self.assertDictEqual(label, true_label)
Ejemplo n.º 13
0
 def test_postprocessing_array(self):
     array = np.array([0.1, 0.2, 0, 0.7, 0])
     out = outputs.Label()
     self.assertRaises(ValueError, out.postprocess, array)