def test_single_repo_oxygen(self): """Compare direct Oxygen result with the algorithm runner result.""" direct_result = oxygen(self.modules1) runner_result = run_single_repo(self.modules1, OXYGEN) # Equality operator is not overloaded (yet), so the easiest # way of comparing results is using their JSON representations. assert direct_result.json() == runner_result.json()
def _func_def_generic(self, second, algorithm, single): result = run_single_repo(self.reference + second, algorithm) \ if single else run_two_repos(self.reference, second, algorithm) assert result is not None assert isinstance(result, DetectionResult) return result
def main(): """Entry point of the application.""" try: # Parse command start arguments repos, algorithm = handle_cli_args() time_snap("Arguments handled; Repositories parsed") module_list_1 = get_modules_from_dir(repos[0]) time_snap("First repository modules parsed") if not module_list_1: raise UserInputError(f"First repository is empty: \"{repos[0]}\"") if repos[1]: module_list_2 = get_modules_from_dir(repos[1]) time_snap("Second repository modules parsed") if not module_list_2: raise UserInputError( f"Second repository is empty: \"{repos[1]}\"") log.info("Running a two-repository analysis...") result = run_two_repos(module_list_1, module_list_2, algorithm) else: log.info("Running a single-repository analysis...") result = run_single_repo(module_list_1, algorithm) time_snap("Analysis completed") # Save detection result to a JSON file. repo_0 = repos[0][repos[0].rfind('/') + 1:] json_filename = "clones_" + algorithm + "_" + repo_0 + "_" if repos[1]: repo_1 = repos[1][repos[1].rfind('/') + 1:] json_filename += repo_1 + "_" json_filename += datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".json" with open(json_filename, "w") as f: f.write(result.json()) except UserInputError as ex: if ex.message: log.error(ex.message) sys.exit(ex.code)
def hello(): with open(_INDEX_HTML, "r", encoding="utf-8") as f: webpage = f.read() output = "" repo = request.args.get("repo") if repo: try: modules = get_modules_from_repo(repo) result = run_single_repo(modules, "oxygen") output = "<ol>" + "".join([( "<li>" + c.value + f" - Weight: {c.match_weight}" + "<ul>" + "".join([ "<li>" + orig + f" - Similarity: {sim * 100:g} %" + "</li>" for orig, sim in c.origins.items() ]) + "</ul></li>") for c in result.clones]) + "</ol>" except UserInputError as ex: output = ex.message return webpage.replace("#LOG#", output)
def analyze_repo(repo_info, repo_id, algorithm=OXYGEN): """Analyze the repo using the specified algorithm. Store results in db.""" log.info(f"Analyzing repository: {repo_info}") try: conn = pg_conn(db_url) if repo_info.clone_or_pull(): log.success( f"Repository has been successfully cloned: {repo_info}") else: log.warning(f"Unable to clone repository: {repo_info}") conn.run( """UPDATE repos SET status = (SELECT id FROM states WHERE name = 'err_clone') WHERE id = %s;""", repo_id) return modules = get_modules_from_dir(repo_info.dir) if not modules: log.warning("Repository contains no Python module") return result = run_single_repo(modules, algorithm) # Insert repository analysis into database all at once with conn.transaction(): commit_id = conn.one( """INSERT INTO commits (repo_id, hash) VALUES (%s, %s) RETURNING id;""", repo_id, repo_info.hash) for c in result.clones: cluster_id = conn.one( """INSERT INTO clusters (commit_id, "value", weight) VALUES (%s, %s, %s) RETURNING id;""", commit_id, c.value, c.match_weight) for o, s in c.origins.items(): conn.run( """INSERT INTO origins (cluster_id, file, line, col_offset, similarity) VALUES (%s, %s, %s, %s, %s);""", cluster_id, o.file, o.line, o.col_offset, s) log.success( f"Repository has been successfully analyzed: {repo_info}") conn.run( """UPDATE repos SET status = (SELECT id FROM states WHERE name = 'done') WHERE id = %s;""", repo_id) # Once done with the regular analysis, run pattern extraction with conn.transaction(): _extract_patterns(conn, commit_id, modules) log.success(f"Pattern extraction from was successful: {repo_info}") except PG_Error as ex: handle_pg_error(ex, conn, repo_id) finally: conn.close()
def test_single_repo_iodine(self): """Make sure single-repo Iodine fails. It is not implemented yet.""" with raises(UserInputError): run_single_repo(self.modules1, IODINE)
def test_single_repo_chlorine(self): """Compare direct Chlorine result with the algorithm runner result.""" direct_result = chlorine_single_repo(self.modules1) runner_result = run_single_repo(self.modules1, CHLORINE) assert direct_result.json() == runner_result.json()