Ejemplo n.º 1
0
def build_short_mode_text(method_info):
  method_text_tokens = " ".join(ParserUtil.extractNLwords([method_info['methodName']]))
  if method_text_tokens == "":
    return ""

  class_text_tokens = " ".join(ParserUtil.extractNLwords([method_info['className']]))
  return_type_text_tokens = method_info['returnType'].lower()
  param_type_text_tokens = " , ".join(
    [" ".join(ParserUtil.extractNLwords([param_type])) for param_type in method_info['paramTypes']]
  )

  text = " | ".join([class_text_tokens, method_text_tokens, return_type_text_tokens, param_type_text_tokens])
  return text
Ejemplo n.º 2
0
    def search_snippets(self, query, with_score=False, cur_snippet=None):
        search_results = self.es.search(index=[self.index],
                                        doc_type=self.type,
                                        body=query)
        search_results = search_results['hits']['hits']

        if self.short_mode:
            query_snippet_text = " | ".join([
                " ".join(ParserUtil.extractNLwords([cur_snippet['className']
                                                    ])),
                " ".join(ParserUtil.extractNLwords([cur_snippet['methodName']
                                                    ]))
            ])
            candidate_texts = [
                " | ".join([
                    " ".join(
                        ParserUtil.extractNLwords(
                            [res['_source']['className']])), " ".join(
                                ParserUtil.extractNLwords(
                                    [res['_source']['methodName']]))
                ]) for res in search_results
            ]
        else:
            if "multi_match" in query['query']:
                # BasicQueryBuilder
                query_snippet_text = query['query']['multi_match']['query']
            elif "bool" in query['query']:
                # CombineQueryBuilder
                query_snippet_text = query['query']['bool']['should'][0][
                    'multi_match']['query']
            else:
                raise Exception()
            candidate_texts = [
                " ".join(res['_source']['tokenSequence'])
                for res in search_results
            ]

        scores = self.bert_manager.rank(query_snippet_text, candidate_texts)
        sorted_scores = sorted([(i, score) for i, score in enumerate(scores)],
                               key=lambda d: d[1],
                               reverse=True)

        if with_score:
            search_results = [(search_results[i]['_source'], score)
                              for i, score in sorted_scores]
        else:
            search_results = [
                search_results[i]['_source'] for i, score in sorted_scores
            ]

        return search_results
Ejemplo n.º 3
0
def process(method_info_dict, data, key1, key2, wf, short_mode):
  if data[key1] not in method_info_dict or data[key2] not in method_info_dict:
    return
  data['textA'] = build_short_mode_text(method_info_dict[data[key1]])
  data['textB'] = build_short_mode_text(method_info_dict[data[key2]])

  if (data['textA'] != "") and (data['textB'] != ""):
    # the short mode is necessary
    wf.write('%s\n' % json.dumps(data, ensure_ascii=False))
  if not short_mode:
    seq1 = method_info_dict[data[key1]]['tokenSequence']
    seq2 = method_info_dict[data[key2]]['tokenSequence']
    for ratio in [1, 2, 3, 5, 10]:
      text_a = " ".join(ParserUtil.extractNLwords(seq1[:len(seq1) // ratio]))
      text_b = " ".join(ParserUtil.extractNLwords(seq2))
      if not ((text_a == "" or text_a.endswith("| ")) or (text_b == "" or text_b.endswith("| "))):
        data_copy = copy.deepcopy(data)
        data_copy['textA'] = text_a
        data_copy['textB'] = text_b
        data_copy['ratio'] = ratio
        wf.write('%s\n' % json.dumps(data_copy, ensure_ascii=False))
Ejemplo n.º 4
0
 def diag_reset_adsl_line(self):
     return ParserUtil.parse_diagnostic_responce(RequestsUtil.diagnostic_dsl_html(4))
Ejemplo n.º 5
0
 def diag_get_dls_line_status(self):
     return ParserUtil.parse_diagnostic_responce(RequestsUtil.diagnostic_dsl_html(3))
Ejemplo n.º 6
0
 def diag_get_atm_loopback_test(self):
     return ParserUtil.parse_diagnostic_responce(RequestsUtil.diagnostic_dsl_html(2))
Ejemplo n.º 7
0
 def diag_get_atm_status(self):
     return ParserUtil.parse_diagnostic_responce(RequestsUtil.diagnostic_dsl_html(1))
Ejemplo n.º 8
0
 def traceroute_ipv6(self, target):
     return ParserUtil.parse_diagnostic_responce(RequestsUtil.diagnostic_ping_html(target, 3))
Ejemplo n.º 9
0
 def ping(self, target):
     return ParserUtil.parse_diagnostic_responce(RequestsUtil.diagnostic_ping_html(target, 1))
Ejemplo n.º 10
0
    def get_status(self, force=None):
        if force or not self.status or (self.status_time + CACHE_TIME) < time():
            self.status = ParserUtil.parse_status(RequestsUtil.get_status_html())
            self.status_time = time()

        return self.status
Ejemplo n.º 11
0
    def get_connections(self, force=None):
        if force or not self.connections or (self.connections_time + CACHE_TIME) < time():
            self.connections = ParserUtil.parse_connections(RequestsUtil.get_connections_html())
            self.connections_time = time()

        return self.connections
Ejemplo n.º 12
0
                search_results = retriever.search_snippets(basic_query)
            search_results = deduplicate(snippet, search_results)

            if user_bert:
                if short_mode:
                    ## short-bert mode
                    query_snippet_text = build_short_mode_text(snippet)
                    candidate_texts = [
                        build_short_mode_text(res) for res in search_results
                    ]
                    scores = short_bert_manager.rank(query_snippet_text,
                                                     candidate_texts)
                else:
                    ## full-bert mode
                    query_snippet_text = " ".join(
                        ParserUtil.extractNLwords(text_tokens))
                    candidate_texts = [
                        " ".join(
                            ParserUtil.extractNLwords(res['tokenSequence']))
                        for res in search_results
                    ]
                    scores = full_bert_manager.rank(query_snippet_text,
                                                    candidate_texts)
                sorted_scores = sorted([(i, score)
                                        for i, score in enumerate(scores)],
                                       key=lambda d: d[1],
                                       reverse=True)
                # 如果bert得分不高,就返回文本匹配的结果
                tmp_indices = []
                for i, score in sorted_scores[:max_size]:
                    if score >= 0.4:
Ejemplo n.º 13
0
    def search_codes(self):
        rawbody = cherrypy.request.body.read(
            int(cherrypy.request.headers['Content-Length']))
        jsonbody = json.loads(rawbody)
        code_context_tokens = jsonbody['codeContextTokens']
        snippet = jsonbody['snippet']
        user_bert = jsonbody['useBert']

        text_tokens = snippet['tokenSequence']
        if self.do_extend:
            inferred_text_tokens = self.lm_infer.infer(code_context_tokens,
                                                       text_tokens,
                                                       self.extend_token_len)
            extend_query = self.extend_query_builder.build_query(
                text_tokens, inferred_text_tokens, self.max_size * 10)
            search_results = self.retriever.search_snippets(extend_query,
                                                            with_score=True)
        else:
            basic_query = self.basic_query_builder.build_query(
                text_tokens, self.max_size * 10)
            search_results = self.retriever.search_snippets(basic_query,
                                                            with_score=True)

        distinct_results = deduplicate(snippet,
                                       search_results,
                                       with_score=True)
        if user_bert and self.args.use_bert:
            if len(snippet['lineCodes']) <= 2:
                ## short-bert mode
                # query_snippet_text = build_short_mode_text(snippet)
                # candidate_texts = [build_short_mode_text(res) for res, _ in search_results]
                query_snippet_text = " | ".join([
                    " ".join(ParserUtil.extractNLwords([snippet['className']
                                                        ])),
                    " ".join(ParserUtil.extractNLwords([snippet['methodName']
                                                        ]))
                ])
                candidate_texts = [
                    " | ".join([
                        " ".join(ParserUtil.extractNLwords([res['className']
                                                            ])),
                        " ".join(ParserUtil.extractNLwords([res['methodName']
                                                            ]))
                    ]) for res, _ in distinct_results
                ]
                scores = self.short_bert_manager.rank(query_snippet_text,
                                                      candidate_texts)
            else:
                ## full-bert mode
                query_snippet_text = " ".join(
                    ParserUtil.extractNLwords(snippet['tokenSequence']))
                candidate_texts = [
                    " ".join(ParserUtil.extractNLwords(res['tokenSequence']))
                    for res, _ in distinct_results
                ]
                scores = self.full_bert_manager.rank(query_snippet_text,
                                                     candidate_texts)
            sorted_scores = sorted([(i, score)
                                    for i, score in enumerate(scores)],
                                   key=lambda d: d[1],
                                   reverse=True)

            tmp_indices = []
            for i, score in sorted_scores[:self.max_size]:
                if score >= 0.0:
                    tmp_indices.append(i)
                else:
                    tmp_index_set = set(tmp_indices)
                    for idx in range(min(self.max_size, len(sorted_scores))):
                        if idx not in tmp_index_set:
                            tmp_indices.append(idx)
                    break
            distinct_results = [distinct_results[idx] for idx in tmp_indices]

        distinct_results = distinct_results[:self.max_size]
        distinct_results = [{
            'methodInfo': res[0],
            'score': float(res[1])
        } for res in distinct_results]

        response = json.dumps(distinct_results)

        print(" ".join(text_tokens))
        print("res size:", len(distinct_results))
        method_ids = [(i + 1, res['methodInfo']['methodId'])
                      for i, res in enumerate(distinct_results)]
        print(method_ids)
        print('=' * 80)

        return response