def code_method(idx): file = path_from + 'file_' + str(idx) + '_Method.csv' if not os.path.exists(file): return with open(file, 'r', encoding='utf-8') as infile: code = infile.readlines() code_method = list() code_package = list() for i in range(len(code)): line = str(code[i]) line = line[line.index(';') + 1:-1] line = line.split(',') method = '[]\n' package = '[]\n' if len(line) == 3: package = line[1] + '\n' method = line[2] + '\n' code_method.append(method) code_package.append(package) cm.save_txt(path_to + 'method/method' + str(idx) + '.txt', code_method) cm.save_txt(path_to + 'package/package' + str(idx) + '.txt', code_package) print('method over: ' + str(idx))
def code_source(idx): file = path_from + 'file_' + str(idx) + '_SourceCode.csv' if not os.path.exists(file): return with open(file, 'r', encoding='utf-8') as infile: code_source = infile.readlines() for i in range(len(code_source)): line = str(code_source[i]) line = line[line.index(';') + 1:-1] if '[]' == line: code_source[i] = '[]\n' else: sub = line.split(';') cmt = '' for s in sub: if s == '': cmt += ';' continue s = s.strip() if not s.startswith('//') and not s.startswith('/*') and not s.startswith('*') and \ not s.startswith('@') and not s.endswith('*') and not s.endswith('*/'): cmt += s cmt = re.sub(' +', ' ', cmt).strip() cmt = re.sub(';+', ';', cmt) code_source[i] = cmt + '\n' cm.save_txt(path_to + 'source/source' + str(idx) + '.txt', code_source) print('source over: ' + str(idx))
def reranking(path_parsed_queries, path_queries, path_jdk, path_fuzzy_search, path_rerank): queries = cm.load_pkl(path_parsed_queries) jdk = cm.load_pkl(path_jdk) for i in range(len(queries)): query = queries[i] words = [] for word in query: words.append(word[0]) queries[i] = words queries_txt = cm.load_txt(path_queries) lines = [] for i in range(99): # 50 respond = cm.load_pkl(path_fuzzy_search + 'respond' + str(i) + '.pkl') query_cmd = cm.load_pkl(path_fuzzy_search + 'cmd' + str(i) + '.pkl') query = queries[i] query_txt = queries_txt[i] scores = list() for j in range(len(respond)): print(str(i) + '-50, iter-1, ' + str(j) + '-' + str(len(respond))) res = respond[j]['_source'] line = res['method'] cmd = query_cmd[j] scores.append([j, matcher_name(query, line, cmd)]) scores.sort(key=operator.itemgetter(1), reverse=True) scores = scores[:100] for j in range(len(scores)): print(str(i + 1) + '-99, iter-2, ' + str(j) + '-' + str(len(scores))) idx = scores[j][0] res = respond[idx]['_source'] line = res['parsed'] scores[j].append(matcher_api(query, line, jdk)) scores.sort(key=operator.itemgetter(1, 2), reverse=True) if '\n' not in query_txt: query_txt += '\n' lines.append(query_txt) results = min(len(scores), 10) if len(scores) > 0: for j in range(results): idx = scores[j][0] lines.append(respond[idx]['_source']['source']) lines.append('\n') cm.save_txt(path_rerank, lines)
def code_return(idx): file = path_from + 'file_' + str(idx) + '_Return.csv' if not os.path.exists(file): return with open(file, 'r', encoding='utf-8') as infile: code_return = infile.readlines() for i in range(len(code_return)): line = str(code_return[i]) line = line[line.index(';') + 1:-1] if line == '': line = '[]' code_return[i] = line + '\n' cm.save_txt(path_to + 'return/return' + str(idx) + '.txt', code_return) print('return over: ' + str(idx))
def code_modifier(idx): file = path_from + 'file_' + str(idx) + '_Modifiers.csv' if not os.path.exists(file): return with open(file, 'r', encoding='utf-8') as infile: code_modifier = infile.readlines() for i in range(len(code_modifier)): line = str(code_modifier[i]) line = line[line.index(';') + 2:-2] if line == '': line = '[]' code_modifier[i] = line + '\n' cm.save_txt(path_to + 'modifier/modifier' + str(idx) + '.txt', code_modifier) print('modifier over: ' + str(idx))
def code_javadoc(idx): file = path_from + 'file_' + str(idx) + '_Javadoc.csv' if not os.path.exists(file): return with open(file, 'r', encoding='utf-8') as infile: code_javadoc = infile.readlines() for i in range(len(code_javadoc)): line = str(code_javadoc[i]) line = line[line.index(';') + 1:-1] line = re.sub(';', ' ', line) line = re.sub(' +', ' ', line).strip() code_javadoc[i] = line + '\n' cm.save_txt(path_to + 'javadoc/javadoc' + str(idx) + '.txt', code_javadoc) print('javadoc over: ' + str(idx))
def code_comment(idx): file = path_from + 'file_' + str(idx) + '_Comment.csv' if not os.path.exists(file): return with open(file, 'r', encoding='utf-8') as infile: code_comment = infile.readlines() for i in range(len(code_comment)): line = str(code_comment[i]) line = line[line.index(';') + 2:-2] line = re.sub(';', ' ', line) line = re.sub(' +', ' ', line).strip() if line == '': line = '[]' code_comment[i] = line + '\n' cm.save_txt(path_to + 'comment/comment' + str(idx) + '.txt', code_comment) print('comment over: ' + str(idx))
def code_parsed(idx): file = path_from + 'file_' + str(idx) + '_ParsedCode.csv' if not os.path.exists(file): return with open(file, 'r', encoding='utf-8') as infile: code_parsed = infile.readlines() for i in range(len(code_parsed)): line = str(code_parsed[i]) line = line[line.index(';') + 1:-1] body = [] if ';' in line: line = line.split(';') for j in range(len(line)): l = line[j].split(',') if len(l) == 3: body.append(l[1]) body = ','.join(body) if body == '': body = '[]' code_parsed[i] = body + '\n' cm.save_txt(path_to + 'parsed/parsed' + str(idx) + '.txt', code_parsed) print('parsed over: ' + str(idx))