Ejemplo n.º 1
0
def generate_indices_from_benchmark(writer, counter):
	javafiles = java_files_from_dir(source_path)
	i = 0
	j = 0
	for javafile in javafiles:
		# print javafile
		i += 1
		if i % 1000 == 0:	#1000개 마다 프린트
			print("Counter: %s" % i)
			print "typed_method_call" + str(counter.typed_method_call_count)
		document = Document()
		document.add(Field("file", javafile, Field.Store.YES, Field.Index.NO))
		try:
			with codecs.open(javafile, "r", encoding='utf-8', errors='ignore') as f:
				file_content = f.read().encode("utf-8", errors='ignore')
			f.close()

			ast = parse(file_content, resolve=False)
			if add_code_keyword_into_document(document, file_content, ast, counter):
				writer.addDocument(document)
				j += 1
				if j % 1000 == 0:
					print "Wrote:: %s files" % j

		except Exception as e:
			print("Error: %s" % e)
			continue
	print "Number of files: %s" % i
	print "Number of duplicates: %s" % len(hashes)
	print "%s files has been indexed" % j
Ejemplo n.º 2
0
def transform_body(body):
    code_snippets = []
    code_hints = []
    for item in body.split("</code>"):
        if "<code>" in item:
            code_tag = item[item.find("<code>") + len("<code>"):]
            code_tag = MyUtils.unescape_html(code_tag)
            if "." in code_tag and "(" in code_tag:
                code_snippets.append(code_tag)

                if "<pre" not in item and len(
                        code_tag
                ) < 25:  # Heuristic to determine if code_tag is enclosed in inline code block
                    code_hints.append(code_tag)
            elif len(code_tag) < 25:
                code_hints.append(code_tag)

    l = []
    for code_hint in code_hints:
        l.extend(MyUtils.tokenize(code_hint))

    code_hints = set(l)
    asts = []
    for code_snippet in code_snippets:
        ast = parse(code_snippet, resolve=True)
        if ast:
            asts.append(ast)

    return asts, code_hints
Ejemplo n.º 3
0
def Generator(code):
    file_content = code
    print '1. Origianl Query : ', file_content
    ast = parse(file_content, resolve=False)
    query = add_code_keyword_into_document(file_content, ast)
    print "2. Right after alternation & before the removing stop words : ", query
    query = remove_unified_stop_lists(query)
    print '3. Right after the stop words removing : ', query
    return query
def Generator(code):
    file_content = code

    # print '1. Origianl Query : ', file_content
    ast = parse(file_content, resolve=False)  # newJavaParser를 사용하여 자바 코드 파싱
    query = add_code_keyword_into_document(file_content, ast)
    # print "Query before the removing stop words : ", query
    # write_search_log("\nQuery before the removing stop words : " + str(query))

    # print '2. Right after the code query generator : ', query
    query = remove_unified_stop_lists(query)
    # print '3. Right after the stop words removing : ', query
    # print "Transformed user code query : ", query
    # write_search_log("\nTransformed user code query : " + str(query))

    return query
Ejemplo n.º 5
0
def generate_indices_from_projects(writer, counter):
    HOME = "/Users/Falcon/Downloads/GCJ_Repository/2014"  #####################/2014
    javafiles = java_files_from_dir(HOME)  #자바 파일들만 뽑아내는 함수
    i = 0
    j = 0
    for javafile in javafiles:
        print javafile
        i += 1
        if i % 1000 == 0:  #1000개 될때마다 프린트 한번씩
            print("Counter: %s" % i)
            print "typed_method_call" + str(counter.typed_method_call_count)
        document = Document()  #루씬 Document 객체

        ################################################################################################################
        splits = javafile.split("/")[6:]
        project_path = ""
        for names in splits:
            project_path += "/" + names

        changed_path = HOME + project_path
        document.add(
            Field("file", changed_path, Field.Store.YES, Field.Index.NO))
        ################################################################################################################

        try:
            with codecs.open(javafile, "r", encoding='utf-8',
                             errors='ignore') as f:
                file_content = f.read().encode("utf-8", errors='ignore')

            ast = parse(file_content,
                        resolve=False)  #newJavaParser를 사용하여 자바 코드 파싱
            if add_code_keyword_into_document(document, file_content, ast,
                                              counter):
                writer.addDocument(document)
                j += 1
                if j % 1000 == 0:
                    print "Wrote:: %s files" % j

        except Exception as e:
            print("Error: %s" % e)
            continue
    print "Number of files: %s" % i
    print "Number of duplicates: %s" % len(hashes)
    print "%s files has been indexed" % j
Ejemplo n.º 6
0
def generate_indices_from_projects(writer, counter):
	HOME = "/Users/Falcon/Desktop/IJA/dataset/"
	javafiles = java_files_from_dir(HOME)	
	i = 0
	j = 0
	for javafile in javafiles:
		#print javafile
		i += 1
		if i % 1000 == 0:	
			print("Counter: %s" % i)
			print "typed_method_call" + str(counter.typed_method_call_count)
		document = Document()	

		################################################################################################################
		splits = javafile.split("/")[6:]
		project_path = ""
		for names in splits:
			project_path += "/" + names

		changed_path = "/Users/Falcon/Desktop/IJA/dataset" + project_path
		document.add(Field("file", changed_path, Field.Store.YES, Field.Index.NO))
		################################################################################################################
		try:
			with codecs.open(javafile, "r", encoding='utf-8', errors='ignore') as f:
				file_content = f.read().encode("utf-8", errors='ignore')

			ast = parse(file_content, resolve=False)
			if add_code_keyword_into_document(document, file_content, ast, counter):
				writer.addDocument(document)
				j += 1
				if j % 1000 == 0:
					print "Wrote:: %s files" % j

		except Exception as e:
			print("Error: %s" % e)
			continue
	print "Number of files: %s" % i
	print "Number of duplicates: %s" % len(hashes)
	print "%s files has been indexed" % j
Ejemplo n.º 7
0
    def __init__(self, snippet, sources):
        self.snippet = parse(snippet, resolve=True)
        self.sources = sources

        self.class_PQN_to_FQN = defaultdict(list)
        self.method_PQN_to_FQN = defaultdict(lambda: {"fqn": [], "class": []})