Ejemplo n.º 1
0
  def process(self, element, *_args, **_kwargs):
    """Encode the function instance.

    This DoFn takes a tokenized function string and
    encodes them into a base64 string of TFExample
    binary format. The "function_tokens" are encoded
    and stored into the "instances" key in a format
    ready for consumption by TensorFlow SavedModel
    estimators. The encoder is provided by a
    Tensor2Tensor problem as provided in the constructor.

    Args:
      element: A Python dict of the form,
        {
          "nwo": "STRING",
          "path": "STRING",
          "function_name": "STRING",
          "lineno": "STRING",
          "original_function": "STRING",
          "function_tokens": "STRING",
          "docstring_tokens": "STRING",
        }

    Yields:
      An updated Python dict of the form
        {
          "nwo": "STRING",
          "path": "STRING",
          "function_name": "STRING",
          "lineno": "STRING",
          "original_function": "STRING",
          "function_tokens": "STRING",
          "docstring_tokens": "STRING",
          "instances": [
            {
              "input": {
                "b64": "STRING",
              }
            }
          ]
        }
    """
    encoder = get_encoder(self.problem, self.data_dir)
    encoded_function = encode_query(encoder, element.get(self.function_tokens_key))

    element[self.instances_key] = [{'input': {'b64': encoded_function}}]
    yield element
Ejemplo n.º 2
0
    def embed(self, query_str):
        """Get query embedding from TFServing

    This involves encoding the input query
    for the TF Serving service
    """
        encoder = get_encoder(self._problem, self._data_dir)
        encoded_query = encode_query(encoder, query_str)
        data = {"instances": [{"input": {"b64": encoded_query}}]}

        response = requests.post(url=self._serving_url,
                                 headers={'content-type': 'application/json'},
                                 data=json.dumps(data))

        result = response.json()
        result['predictions'] = [
            preds['outputs'] for preds in result['predictions']
        ]
        return result
Ejemplo n.º 3
0
  def embed(self, query_str):
    """This function gets the vector embedding from
    the target inference server. The steps involved are
    encoding the input query and decoding the responses
    from the TF Serving service
    TODO(sanyamkapoor): This code is still under construction
    and only representative of the steps needed to build the
    embedding
    """
    encoder, decoder = get_encoder_decoder(self._problem, self._data_dir)
    encoded_query = encode_query(encoder, query_str)
    data = {"instances": [{"input": {"b64": encoded_query}}]}

    response = requests.post(url=self._serving_url,
                             headers={'content-type': 'application/json'},
                             data=json.dumps(data))

    result = response.json()
    for prediction in result['predictions']:
      prediction['outputs'] = decoder.decode(prediction['outputs'])

    return result['predicts'][0]['outputs']
Ejemplo n.º 4
0
    def process(self, element):
        encoder = get_encoder(self.problem, self.data_dir)
        encoded_function = encode_query(encoder, element['function_tokens'])

        element['instances'] = [{'input': {'b64': encoded_function}}]
        yield element