コード例 #1
0
def build_query_encoder(problem, data_dir, embed_code=False):
    """Build a query encoder.

  Args:
    problem: The name of the T2T problem to use
    data_dir: Directory containing the data. This should include the vocabulary.
    embed_code: Whether to compute embeddings for natural language or code.
  """
    encoder = query.get_encoder(problem, data_dir)
    query_encoder = functools.partial(query.encode_query, encoder, embed_code)

    return query_encoder
コード例 #2
0
  def process(self, element, *_args, **_kwargs):
    """Encode the function instance.

    This DoFn takes a tokenized function string and
    encodes them into a base64 string of TFExample
    binary format. The "function_tokens" are encoded
    and stored into the "instances" key in a format
    ready for consumption by TensorFlow SavedModel
    estimators. The encoder is provided by a
    Tensor2Tensor problem as provided in the constructor.

    Args:
      element: A Python dict of the form,
        {
          "nwo": "STRING",
          "path": "STRING",
          "function_name": "STRING",
          "lineno": "STRING",
          "original_function": "STRING",
          "function_tokens": "STRING",
          "docstring_tokens": "STRING",
        }

    Yields:
      An updated Python dict of the form
        {
          "nwo": "STRING",
          "path": "STRING",
          "function_name": "STRING",
          "lineno": "STRING",
          "original_function": "STRING",
          "function_tokens": "STRING",
          "docstring_tokens": "STRING",
          "instances": [
            {
              "input": {
                "b64": "STRING",
              }
            }
          ]
        }
    """
    encoder = get_encoder(self.problem, self.data_dir)
    encoded_function = encode_query(encoder, element.get(self.function_tokens_key))

    element[self.instances_key] = [{'input': {'b64': encoded_function}}]
    yield element
コード例 #3
0
def start_search_server(argv=None):
    """Start a Flask REST server.

  This routine starts a Flask server which maintains
  an in memory index and a reverse-lookup database of
  Python files which can be queried via a simple REST
  API. It also serves the UI for a friendlier interface.

  Args:
    argv: A list of strings representing command line arguments.
  """
    tf.logging.set_verbosity(tf.logging.INFO)

    args = arguments.parse_arguments(argv)

    if not os.path.isdir(args.tmp_dir):
        os.makedirs(args.tmp_dir)

    tf.logging.debug('Reading {}'.format(args.lookup_file))
    lookup_data = []
    with tf.gfile.Open(args.lookup_file) as lookup_file:
        reader = csv.reader(lookup_file)
        for row in reader:
            lookup_data.append(row)

    tmp_index_file = os.path.join(args.tmp_dir,
                                  os.path.basename(args.index_file))

    tf.logging.debug('Reading {}'.format(args.index_file))
    if not os.path.isfile(tmp_index_file):
        tf.gfile.Copy(args.index_file, tmp_index_file)

    encoder = query.get_encoder(args.problem, args.data_dir)
    query_encoder = functools.partial(query.encode_query, encoder)
    embedding_fn = functools.partial(embed_query, query_encoder,
                                     args.serving_url)

    search_engine = CodeSearchEngine(tmp_index_file, lookup_data, embedding_fn)
    search_server = CodeSearchServer(search_engine,
                                     args.ui_dir,
                                     host=args.host,
                                     port=args.port)
    search_server.run()
コード例 #4
0
ファイル: search_engine.py プロジェクト: inc0/examples
    def embed(self, query_str):
        """Get query embedding from TFServing

    This involves encoding the input query
    for the TF Serving service
    """
        encoder = get_encoder(self._problem, self._data_dir)
        encoded_query = encode_query(encoder, query_str)
        data = {"instances": [{"input": {"b64": encoded_query}}]}

        response = requests.post(url=self._serving_url,
                                 headers={'content-type': 'application/json'},
                                 data=json.dumps(data))

        result = response.json()
        result['predictions'] = [
            preds['outputs'] for preds in result['predictions']
        ]
        return result
コード例 #5
0
    def process(self, element):
        encoder = get_encoder(self.problem, self.data_dir)
        encoded_function = encode_query(encoder, element['function_tokens'])

        element['instances'] = [{'input': {'b64': encoded_function}}]
        yield element