def build_query_encoder(problem, data_dir, embed_code=False): """Build a query encoder. Args: problem: The name of the T2T problem to use data_dir: Directory containing the data. This should include the vocabulary. embed_code: Whether to compute embeddings for natural language or code. """ encoder = query.get_encoder(problem, data_dir) query_encoder = functools.partial(query.encode_query, encoder, embed_code) return query_encoder
def process(self, element, *_args, **_kwargs): """Encode the function instance. This DoFn takes a tokenized function string and encodes them into a base64 string of TFExample binary format. The "function_tokens" are encoded and stored into the "instances" key in a format ready for consumption by TensorFlow SavedModel estimators. The encoder is provided by a Tensor2Tensor problem as provided in the constructor. Args: element: A Python dict of the form, { "nwo": "STRING", "path": "STRING", "function_name": "STRING", "lineno": "STRING", "original_function": "STRING", "function_tokens": "STRING", "docstring_tokens": "STRING", } Yields: An updated Python dict of the form { "nwo": "STRING", "path": "STRING", "function_name": "STRING", "lineno": "STRING", "original_function": "STRING", "function_tokens": "STRING", "docstring_tokens": "STRING", "instances": [ { "input": { "b64": "STRING", } } ] } """ encoder = get_encoder(self.problem, self.data_dir) encoded_function = encode_query(encoder, element.get(self.function_tokens_key)) element[self.instances_key] = [{'input': {'b64': encoded_function}}] yield element
def start_search_server(argv=None): """Start a Flask REST server. This routine starts a Flask server which maintains an in memory index and a reverse-lookup database of Python files which can be queried via a simple REST API. It also serves the UI for a friendlier interface. Args: argv: A list of strings representing command line arguments. """ tf.logging.set_verbosity(tf.logging.INFO) args = arguments.parse_arguments(argv) if not os.path.isdir(args.tmp_dir): os.makedirs(args.tmp_dir) tf.logging.debug('Reading {}'.format(args.lookup_file)) lookup_data = [] with tf.gfile.Open(args.lookup_file) as lookup_file: reader = csv.reader(lookup_file) for row in reader: lookup_data.append(row) tmp_index_file = os.path.join(args.tmp_dir, os.path.basename(args.index_file)) tf.logging.debug('Reading {}'.format(args.index_file)) if not os.path.isfile(tmp_index_file): tf.gfile.Copy(args.index_file, tmp_index_file) encoder = query.get_encoder(args.problem, args.data_dir) query_encoder = functools.partial(query.encode_query, encoder) embedding_fn = functools.partial(embed_query, query_encoder, args.serving_url) search_engine = CodeSearchEngine(tmp_index_file, lookup_data, embedding_fn) search_server = CodeSearchServer(search_engine, args.ui_dir, host=args.host, port=args.port) search_server.run()
def embed(self, query_str): """Get query embedding from TFServing This involves encoding the input query for the TF Serving service """ encoder = get_encoder(self._problem, self._data_dir) encoded_query = encode_query(encoder, query_str) data = {"instances": [{"input": {"b64": encoded_query}}]} response = requests.post(url=self._serving_url, headers={'content-type': 'application/json'}, data=json.dumps(data)) result = response.json() result['predictions'] = [ preds['outputs'] for preds in result['predictions'] ] return result
def process(self, element): encoder = get_encoder(self.problem, self.data_dir) encoded_function = encode_query(encoder, element['function_tokens']) element['instances'] = [{'input': {'b64': encoded_function}}] yield element