def test_simple_test_1(): test_path = path_join(chlorine_double_test_dir, "simple_test_1") expected_file = open(path_join(test_path, "result.json")) expected = json.load(expected_file) modules1 = get_modules_from_dir(path_join(test_path, "source1")) modules2 = get_modules_from_dir(path_join(test_path, "source2")) actual = json.loads(chlorine_two_repos(modules1, modules2).json()) assert expected == actual
def setUp(self): data_dir = path_join(test_data_dir, "func_def") reference_dir = path_join(data_dir, "reference") different_dir = path_join(data_dir, "different") type1_dir = path_join(data_dir, "type1") type2_dir = path_join(data_dir, "type2") self.reference = get_modules_from_dir(reference_dir) self.different = get_modules_from_dir(different_dir) self.type1 = get_modules_from_dir(type1_dir) self.type2 = get_modules_from_dir(type2_dir)
def test_chlorine_two_not_none(): """Check if two-repo mode of Chlorine returns non-empty result.""" modules1 = get_modules_from_dir(test_repo1_dir) assert modules1 modules2 = get_modules_from_dir(test_repo2_dir) assert modules2 result = chlorine_two_repos(modules1, modules2) assert result assert result.clones
def main(): """Entry point of the application.""" try: # Parse command start arguments repos, algorithm = handle_cli_args() time_snap("Arguments handled; Repositories parsed") module_list_1 = get_modules_from_dir(repos[0]) time_snap("First repository modules parsed") if not module_list_1: raise UserInputError(f"First repository is empty: \"{repos[0]}\"") if repos[1]: module_list_2 = get_modules_from_dir(repos[1]) time_snap("Second repository modules parsed") if not module_list_2: raise UserInputError( f"Second repository is empty: \"{repos[1]}\"") log.info("Running a two-repository analysis...") result = run_two_repos(module_list_1, module_list_2, algorithm) else: log.info("Running a single-repository analysis...") result = run_single_repo(module_list_1, algorithm) time_snap("Analysis completed") # Save detection result to a JSON file. repo_0 = repos[0][repos[0].rfind('/') + 1:] json_filename = "clones_" + algorithm + "_" + repo_0 + "_" if repos[1]: repo_1 = repos[1][repos[1].rfind('/') + 1:] json_filename += repo_1 + "_" json_filename += datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".json" with open(json_filename, "w") as f: f.write(result.json()) except UserInputError as ex: if ex.message: log.error(ex.message) sys.exit(ex.code)
def run_test(test_dir): test_path = path_join(chlorine_single_test_dir, test_dir) expected_file = open(path_join(test_path, "result.json")) expected = json.load(expected_file) modules = get_modules_from_dir(path_join(test_path, "source")) actual = json.loads(chlorine_single_repo(modules).json()) assert expected == actual
def test_chlorine_single_not_none(): """Check if single-repo mode of Chlorine returns non-empty result.""" modules = get_modules_from_dir(test_repo1_dir) assert modules result = chlorine_single_repo(modules) assert result assert result.clones
def test_simple_1(): test_path = path_join(oxygen_test_dir, "simple_test_1") expected_file = open(path_join(test_path, "result.json")) expected = json.load(expected_file) modules = get_modules_from_dir(path_join(test_path, "source1")) actual = json.loads(oxygen(modules).json()) assert expected == actual
def test_oxygen_not_none(): """Check if running Oxygen on a valid repo returns a non-empty result.""" modules = get_modules_from_dir(test_repo1_dir) assert modules result = oxygen(modules) assert result assert result.clones
def main(): """Entry point of the application.""" try: # Parse command line arguments repos, algorithm = handle_cli_args() time_snap("Cloned repositories") # Find all functions and parse their syntax tree using the TreeNode wrapper log.info("Parsing methods in repositories...") module_list_1 = get_modules_from_dir(repos[0]) if not module_list_1: raise UserInputError(f"First repository is empty: \"{repos[0]}\"") time_snap("Parsed first repository") module_list_2 = get_modules_from_dir(repos[1]) if not module_list_2: raise UserInputError(f"Second repository is empty: \"{repos[1]}\"") time_snap("Parsed second repository") log.info("Beginning full analysis...") clones = run_two_repos(module_list_1, module_list_2, algorithm) time_snap("Analysis completed") # Create output directory if it doesn't exist and print output output_path = os.getcwd() output_filename = "clones_" + \ datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".json" os.makedirs(output_path, exist_ok=True) with open(os.path.join(output_path, output_filename), "w") as output_file: output_file.write(clones.json()) except UserInputError as ex: if ex.message: log.error(ex.message) sys.exit(ex.code)
def test_get_modules(): """Check if any modules are parsed from a valid directory. Check types.""" modules = get_modules_from_dir(test_repo1_dir) assert isinstance(modules, list) assert len(modules) > 0 for m in modules: assert isinstance(m, list) for n in m: assert isinstance(n, TreeNode)
def main(): """ Entry point of the application. """ try: # Parse command line arguments repos = handle_args(sys.argv) time_snap("Cloned repositories") # ------- FOR TESTING PURPOSES ------------ # Find all functions and parse their syntax tree using the TreeNode wrapper log.info("Parsing methods in repositories...") module_list_1 = get_modules_from_dir(repos[0]) if not module_list_1: raise UserInputError(f"First repository is empty: \"{repos[0]}\"") time_snap("Parsed first repository") module_list_2 = get_modules_from_dir(repos[1]) if not module_list_2: raise UserInputError(f"Second repository is empty: \"{repos[1]}\"") time_snap("Parsed second repository") log.info("Beginning full analysis...") iodine(module_list_1, module_list_2) time_snap("Analysis completed") except UserInputError as ex: if ex.message: log.error(ex.message) sys.exit(ex.code)
def analyze_repo(repo_info, repo_id, algorithm=OXYGEN): """Analyze the repo using the specified algorithm. Store results in db.""" log.info(f"Analyzing repository: {repo_info}") try: conn = pg_conn(db_url) if repo_info.clone_or_pull(): log.success( f"Repository has been successfully cloned: {repo_info}") else: log.warning(f"Unable to clone repository: {repo_info}") conn.run( """UPDATE repos SET status = (SELECT id FROM states WHERE name = 'err_clone') WHERE id = %s;""", repo_id) return modules = get_modules_from_dir(repo_info.dir) if not modules: log.warning("Repository contains no Python module") return result = run_single_repo(modules, algorithm) # Insert repository analysis into database all at once with conn.transaction(): commit_id = conn.one( """INSERT INTO commits (repo_id, hash) VALUES (%s, %s) RETURNING id;""", repo_id, repo_info.hash) for c in result.clones: cluster_id = conn.one( """INSERT INTO clusters (commit_id, "value", weight) VALUES (%s, %s, %s) RETURNING id;""", commit_id, c.value, c.match_weight) for o, s in c.origins.items(): conn.run( """INSERT INTO origins (cluster_id, file, line, col_offset, similarity) VALUES (%s, %s, %s, %s, %s);""", cluster_id, o.file, o.line, o.col_offset, s) log.success( f"Repository has been successfully analyzed: {repo_info}") conn.run( """UPDATE repos SET status = (SELECT id FROM states WHERE name = 'done') WHERE id = %s;""", repo_id) # Once done with the regular analysis, run pattern extraction with conn.transaction(): _extract_patterns(conn, commit_id, modules) log.success(f"Pattern extraction from was successful: {repo_info}") except PG_Error as ex: handle_pg_error(ex, conn, repo_id) finally: conn.close()
def setUp(self): """Load modules from both the first and the second source repository.""" self.modules1 = get_modules_from_dir(test_repo1_dir) self.modules2 = get_modules_from_dir(test_repo2_dir)
def setUp(self): """Load modules only from the first source repository.""" self.modules1 = get_modules_from_dir(test_repo1_dir)