def statistic_dataset(): # statistic [max, min, avg] [python files, LOC, import lines] on github dataset print("Statistic Github dataset") root = EGO_GITHUB_ROOT meta_path = os.path.join(root, "metadata.json") metadata = read_object_from_file(meta_path) statistic_all(root, metadata)
def statistic_pkgs_ego(): # statistic pkgs installed in PyEGo-generated Dockerfile print("Statistic PyEGo-installed packages") ego_root = EGO_GITHUB_ROOT meta_path = os.path.join(ego_root, "metadata.json") metadata = read_object_from_file(meta_path) count_ego_all(ego_root, metadata=metadata)
def statistic_pkgs_me(): print("Statistic DockerizeMe-installed packages") # statistic pkgs installed in DockerizeMe-generated Dockerfile me_root = ME_GITHUB_ROOT_39 ego_root = EGO_GITHUB_ROOT meta_path = os.path.join(ego_root, "metadata.json") metadata = read_object_from_file(meta_path) count_ego_all(me_root, metadata=metadata)
def test_time_ego(): # test time cost of PyEGo on github dataset root = EGO_GITHUB_ROOT metadata_path = os.path.join(root, "metadata.json") metadata = read_object_from_file(metadata_path) start = datetime.now() batch_test(root, metadata, "PyEGo", generate_only=True) end = datetime.now() cost = end - start print("total: {}s, avg: {}s/item".format(cost.seconds, 1. * cost.seconds / 100))
def test_time_pipreqs(): # test time cost of pipreqs on github dataset # cannot test here, because os.popen is async function start = datetime.now() root = REQS_GITHUB_ROOT_39 metadata_path = os.path.join(root, "metadata.json") metadata = read_object_from_file(metadata_path) batch_test(root, metadata, "pipreqs", generate_only=True) end = datetime.now() cost = end - start print("total: {}s, avg: {}s/item".format(cost.seconds, 1. * cost.seconds / 100))
def commit(commit_message): ''' Commit current index to disk. ''' repo_path = find_pygit_repo() current_index_path = repo_path + '/' + globalVars.current_index_file_name commit_log_path = repo_path + '/' + globalVars.commit_log if os.path.exists(current_index_path): sys.stdout.write('Nothing to commit.') else: # This should be a more interesting data structure. if os.path.exists(commit_log_path): commit_log_list = utils.read_object_from_file(commit_log_path) else: commit_log_list = [] # do stuff with commit_log_list current_index_dict = utils.read_object_from_file(current_index_path) for filename, file_contents = current_index_dict.iteritems(): # do nothing for now. Compute hash later pass # Get first file name are representative hash string. representative_hash_string = utils.compute_string_hash(current_index_dict.iteritems()[0][0]) if not os.path.exists(globalVars.blob_object_location): utils.write_error_message_and_exit("Broken pygit repo. Cannot find blob objects location") else: current_commit_file_name = globalVars.blob_object_location + '/' representative_hash_string) utils.write_object_to_file(current_commit_file_name, current_index_dict) commit_log_list.append((representative_hash_string, current_commit_file_name)) utils.write_object_to_file(commit_log_path) os.remove(current_index_path)
def compare_pkgs_ego_pipreqs(): # statistic pkgs installed in projects which solved by both PyEGo and pipreqs # require execute logs of PyEGo and pipreqs print("Compare Pipreqs-3.9 with DockerizeMe") ego_root = EGO_GITHUB_ROOT ego_log = EGO_GITHUB_LOG reqs_root = REQS_GITHUB_ROOT_39 reqs_log = REQS_GITHUB_LOG_39 meta_path = os.path.join(ego_root, "metadata.json") metadata = read_object_from_file(meta_path) count_same_ego_reqs(ego_root, ego_log, reqs_root, reqs_log, metadata=metadata)
def generate_github_overview(): ego_log = TestResults("PyEGo", "PyEGo", dataset="github", log_path=EGO_GITHUB_LOG) me_log_39 = TestResults("DockerizeMe-3.9", "DockerizeMe", dataset="github", log_path=ME_GITHUB_LOG_39) reqs_log_39 = TestResults("pipreqs-3.9", "PyEGo", dataset="github", log_path=REQS_GITHUB_LOG_39) logs = [ego_log, me_log_39, reqs_log_39] meta_path = os.path.join(EGO_GITHUB_ROOT, "metadata.json") metadata = read_object_from_file(meta_path) logs_ = list() for log in logs: log_ = convert_id_to_name(log, metadata) logs_.append(log_) logs = logs_ output = "./result_github.csv" generate_overview(logs, output)
def compare_pkgs_ego_me(): # statistic pkgs installed in projects which solved by both PyEGo and DockerizeMe # require execute logs of PyEGo and DockerizeMe # Results of DockerizeMe-3.8 and DockerizeMe-3.9 are the same print("Compare PyEGo with DockerizeMe") ego_root = EGO_GITHUB_ROOT ego_log = EGO_GITHUB_LOG me_root = ME_GITHUB_ROOT_39 me_log = ME_GITHUB_LOG_39 meta_path = os.path.join(ego_root, "metadata.json") metadata = read_object_from_file(meta_path) count_same_ego_me(ego_root, ego_log, me_root, me_log, dataset="github", metadata=metadata)
def add(relative_file_path): try: pygit_repo_path = utils.find_pygit_repo() except utils.RepoNotFoundException: sys.stderr.write('Could not find pygit repo.') sys.exit(41) # Check if file path exists if not os.path.exists(relative_file_path): sys.stderr.write('File to add does not exist.') sys.exit(43) # Check if previously tracked and so forth for other status information. start_cwd = os.getcwd() os.chdir(pygit_repo_path + '/.pygit') index_set = utils.read_object_from_file(globalVars.index_file_name) index_set.add(relative_file_path) utils.write_object_to_file(globalVars.index_file_name, index_set) os.chdir(start_cwd)
def run_test_pipreqs(pyver="3.8"): # run pipreqs on github dataset, results are logged in log/github_test.<YYYYMMDD>.log root = REQS_GITHUB_ROOT_39 metadata_path = os.path.join(root, "metadata.json") metadata = read_object_from_file(metadata_path) batch_test(root, metadata, "pipreqs", False, pyver)
def run_test_ego(): # run PyEGo on github dataset, results are logged in log/github_test.<YYYYMMDD>.log root = EGO_GITHUB_ROOT metadata_path = os.path.join(root, "metadata.json") metadata = read_object_from_file(metadata_path) batch_test(root, metadata, "PyEGo")
import os import re import config from ModuleParser.file_parser import parse_import_modules from ModuleParser.folder_parser import extract_all_py_filepath, parse_custom_top_levels from ModuleParser.module_filter import filter_custom_modules, apart_standard_modules from utils import read_object_from_file, write_object_to_file from ModuleParser.neo4j_reader import get_pyvers_by_module, get_all_pyvers, \ get_python_features, get_pkgvers_by_module_pyvers, get_os_by_pkg, get_std_top_modules, get_rank_by_pkg std_top_levels = read_object_from_file(config.STD_TOP_CACHE_PATH) if not std_top_levels: std_top_levels = get_std_top_modules() write_object_to_file(config.STD_TOP_CACHE_PATH, std_top_levels) def parse_modules(root): # both file and folder are ok # extract all custom top level modules and python files custom_top_levels = list() all_py_filepath = list() if os.path.isdir(root): custom_top_levels.extend(parse_custom_top_levels(root, need_init=False)) all_py_filepath.extend(extract_all_py_filepath(root)) elif root.endswith(".py"): all_py_filepath.append(root) else: return None, None, None, None # extract top and second level modules used
pkg, method = pkgmth.split("#") if method == "pip" or pkg == "python": continue dep_dict[pkgmth] = dict() ver_dict = pkgver_dict[pkgmth] for ver in ver_dict: dep_dict[pkgmth][ver] = list() return dep_dict def convert_pkgvers_to_constrain_dict(pkgvers): constrain_dict = dict() for pkgmth in pkgvers: pkg, method = pkgmth.split("#") if method == "pip" or pkg == "python": constrain_dict[pkg] = pkgvers[pkgmth] else: constrain_dict[pkgmth] = pkgvers[pkgmth] return constrain_dict pip_deps_dict = read_object_from_file(config.PIP_DEPS_CACHE_PATH) if not pip_deps_dict: print("Caching pip dependencies in file...") pip_deps_dict = get_pip_deps() write_object_to_file(config.PIP_DEPS_CACHE_PATH, pip_deps_dict) print("Dependencies cached.")