Ejemplo n.º 1
0
 def search(self, query, language='python', topk=5):
     predictions = []
     query_embedding = self.model.get_query_representations([{
         'docstring_tokens':
         tokenize_docstring_from_string(query),
         'language':
         language
     }])[0]
     idxs, distances = self.indices[language].get_nns_by_vector(
         query_embedding, topk, search_k=10000, include_distances=True)
     for i, idx in enumerate(idxs):
         # print(self.definitions[idx].keys())
         predictions.append({
             "id":
             self.definitions[language][idx]['sha'],
             "name":
             self.definitions[language][idx]['identifier'],
             "func":
             self.definitions[language][idx]['function'],
             "languages": [language],
             "scores": [{
                 "name": "similarity",
                 "score": distances[i]
             }]
         })
     return predictions
Ejemplo n.º 2
0
def query_model(query, model, indices, language, topk=100):
    query_embedding = model.get_query_representations([{
        'docstring_tokens':
        tokenize_docstring_from_string(query),
        'language':
        language
    }])[0]
    idxs, distances = indices.get_nns_by_vector(query_embedding,
                                                topk,
                                                include_distances=True)
    return idxs, distances
Ejemplo n.º 3
0
                    language), 'rb'))
        # dict_keys(['nwo', 'sha', 'path', 'language', 'identifier', 'parameters', 'argument_list', 'return_statement',
        # 'docstring', 'docstring_summary', 'docstring_tokens', 'function', 'function_tokens', 'url', 'score'])
        indexes = [{
            'code': d['function'],
            'code_tokens': d['function_tokens'],
            'language': d['language']
        } for d in tqdm(definitions)]
        code_representations = model.get_code_representations(indexes)

        # use KNN
        query_embeddings = []
        for query in queries:
            query_embedding = model.get_query_representations([{
                'docstring_tokens':
                tokenize_docstring_from_string(query),
                'language':
                language
            }])[0]
            query_embeddings.append(query_embedding)

        nn = NearestNeighbors(n_neighbors=100, metric='cosine', n_jobs=-1)
        nn.fit(code_representations)
        _, nearest_neighbor_indices = nn.kneighbors(query_embeddings)

        for query_idx, query in enumerate(queries):
            for query_nearest_code_idx in nearest_neighbor_indices[
                    query_idx, :]:
                predictions.append({
                    'query':
                    query,