def load_gitsearch_items(self, github_items):
		matched_term_acc = set()
		for github_item in github_items:
			gitsearch_item = GitSearchItem(github_item)
			gitsearch_item_hash = md5(str(gitsearch_item.file_path))

			if gitsearch_item_hash not in self.hashes:
				self.items.append(gitsearch_item)
				self.hashes.add(gitsearch_item_hash)
				matched_term_acc = matched_term_acc.union(gitsearch_item.matched_terms)
		self.global_matched_terms = list(matched_term_acc)
Esempio n. 2
0
def add_code_keyword_into_document(document, file_content, node, counter):
	# Flag is set when at least 1 code characteristics has been stored
	flag = False
	document.add(Field("line_numbers", str(dict(node["line_numbers"])), Field.Store.YES, Field.Index.NO))
	document.add(Field("hash", str(md5(file_content)), Field.Store.YES, Field.Index.NO))
	document.add(Field("code", so_tokenizer(file_content, False), Field.Store.YES, Field.Index.ANALYZED))

	for m in node["typed_method_call"]:
		if m:
			document.add( Field("typed_method_call", m, Field.Store.YES, Field.Index.ANALYZED))
			counter.typed_method_call_count += 1
			flag = True

	for e in node["extends"]:
		if e:
			document.add(Field("extends", e, Field.Store.NO, Field.Index.ANALYZED))
			counter.extends_count += 1

	for c in node["used_classes"]:
		if c:
			document.add( Field("used_classes", str(c), Field.Store.YES, Field.Index.ANALYZED))
			counter.used_classes_count += 1

	for i in node["class_instance_creation"]:
		if i:
			document.add( Field("class_instance_creation", i, Field.Store.YES, Field.Index.ANALYZED) )
			counter.class_instance_creation_count += 1
			flag = True

	for m in node["methods"]:
		if m:
			document.add(Field("methods", m, Field.Store.YES, Field.Index.ANALYZED))
			counter.methods_count += 1

	for m in node["methods_called"]:
		if m:
			document.add(Field("methods_called", m, Field.Store.YES, Field.Index.ANALYZED))
			counter.methods_called_count += 1
			flag = True

	for m in node["unresolved_method_calls"]:
		if m:
			document.add(Field("unresolved_method_calls", m, Field.Store.YES, Field.Index.ANALYZED))
			counter.unresolved_method_calls_count += 1

	for l in node["literals"]:
		if l:
			document.add( StringField("literals", l, Field.Store.YES))
			counter.literals_count += 1
			flag = True

	return flag
Esempio n. 3
0
    def load_gitsearch_items(self, github_items, search_count, targetpath):
        matched_term_acc = set()
        # global rank

        #여기서 github_items 정렬 다시해보자..
        github_items = sorted(github_items,
                              key=attrgetter('score'),
                              reverse=True)
        rank = 0
        for github_item in github_items:
            rank += 1
            gitsearch_item = GitSearchItem(github_item, search_count, rank,
                                           targetpath)
            gitsearch_item_hash = md5(str(gitsearch_item.file_path))

            if gitsearch_item_hash not in self.hashes:
                self.items.append(gitsearch_item)
                self.hashes.add(gitsearch_item_hash)
                matched_term_acc = matched_term_acc.union(
                    gitsearch_item.matched_terms)
        self.global_matched_terms = list(matched_term_acc)