def run_two_repos(modules1, modules2, algorithm): """ Run the specified code clone detection algorithm on two repositores. Arguments: modules1 {list[list[TreeNode]]} -- List of first repo's modules. modules2 {list[list[TreeNode]]} -- List of second repo's modules. algorithm {string} -- Code clone detection algorithm to use. Raises: UserInputError -- If the algorithm name is invalid. Returns: DetectionResult -- Result of the code clone detection. """ if algorithm == CHLORINE: return chlorine_two_repos(modules1, modules2) elif algorithm == IODINE: return iodine(modules1, modules2) elif algorithm == OXYGEN: raise UserInputError( f"\"{algorithm}\" can only be run with one repository") else: raise UserInputError(f"Invalid algorithm name: \"{algorithm}\"")
def get_repo_analysis(repo_path): """ Get analysis of a repository given its path. Only one of the possible return values will be returned. Use `isinstance` to determine the type of the returned value. Returns: string -- Message describing the state of the repo's analysis. DetectionResult -- Detection result retrieved from the database. list[dict] -- List of repositories matching the repository path. """ # Strip leading and trailing whitespace from the repo path. repo_path = repo_path.strip() repo_id = None try: repo_info = RepoInfo.parse_repo_info(repo_path) conn = pg_conn(db_url) if not repo_info: if re.fullmatch(r"^[\w\.\-]+$", repo_path): repos = _find_repos_by_metadata(conn, repo_path) # No repository matches the given repository path. if not repos: raise UserInputError( "No matching repository found in the database") # Exact one matching repository. if len(repos) == 1: return _get_repo_summary(conn, repos[0]) # Multiple repositories match the repository path. else: return repos else: raise UserInputError("Invalid Git repository path format") repo_id = _try_insert_repo(conn, repo_info) if repo_id is not None: Thread(target=analyze_repo, args=(repo_info, repo_id)).start() return "The repository has been added to the queue" repo_dict = _get_repo_dict_from_repoinfo(conn, repo_info) return _get_repo_summary(conn, repo_dict) except PG_Error as ex: handle_pg_error(ex, conn, repo_id) return "Database error" finally: conn.close()
def main(): """Entry point of the application.""" try: # Parse command start arguments repos, algorithm = handle_cli_args() time_snap("Arguments handled; Repositories parsed") module_list_1 = get_modules_from_dir(repos[0]) time_snap("First repository modules parsed") if not module_list_1: raise UserInputError(f"First repository is empty: \"{repos[0]}\"") if repos[1]: module_list_2 = get_modules_from_dir(repos[1]) time_snap("Second repository modules parsed") if not module_list_2: raise UserInputError( f"Second repository is empty: \"{repos[1]}\"") log.info("Running a two-repository analysis...") result = run_two_repos(module_list_1, module_list_2, algorithm) else: log.info("Running a single-repository analysis...") result = run_single_repo(module_list_1, algorithm) time_snap("Analysis completed") # Save detection result to a JSON file. repo_0 = repos[0][repos[0].rfind('/') + 1:] json_filename = "clones_" + algorithm + "_" + repo_0 + "_" if repos[1]: repo_1 = repos[1][repos[1].rfind('/') + 1:] json_filename += repo_1 + "_" json_filename += datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".json" with open(json_filename, "w") as f: f.write(result.json()) except UserInputError as ex: if ex.message: log.error(ex.message) sys.exit(ex.code)
def main(): """Entry point of the application.""" try: # Parse command line arguments repos, algorithm = handle_cli_args() time_snap("Cloned repositories") # Find all functions and parse their syntax tree using the TreeNode wrapper log.info("Parsing methods in repositories...") module_list_1 = get_modules_from_dir(repos[0]) if not module_list_1: raise UserInputError(f"First repository is empty: \"{repos[0]}\"") time_snap("Parsed first repository") module_list_2 = get_modules_from_dir(repos[1]) if not module_list_2: raise UserInputError(f"Second repository is empty: \"{repos[1]}\"") time_snap("Parsed second repository") log.info("Beginning full analysis...") clones = run_two_repos(module_list_1, module_list_2, algorithm) time_snap("Analysis completed") # Create output directory if it doesn't exist and print output output_path = os.getcwd() output_filename = "clones_" + \ datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".json" os.makedirs(output_path, exist_ok=True) with open(os.path.join(output_path, output_filename), "w") as output_file: output_file.write(clones.json()) except UserInputError as ex: if ex.message: log.error(ex.message) sys.exit(ex.code)
def run_single_repo(modules, algorithm): """ Run the specified code clone detection algorithm on a single repository. Arguments: modules {list[list[TreeNode]]} -- List of the repo's modules. algorithm {string} -- Code clone detection algorithm to use. Raises: UserInputError -- If the algorithm name is invalid. Returns: DetectionResult -- Result of the code clone detection. """ if algorithm == OXYGEN: return oxygen(modules) elif algorithm == CHLORINE: return chlorine_single_repo(modules) elif algorithm == IODINE: raise UserInputError( f"\"{algorithm}\" can only be run with two repositories") else: raise UserInputError(f"Invalid algorithm name: \"{algorithm}\"")