def _code_jam(): """ Run coverage for codejam :return: """ codejam_format = r'Y\d+rUtils\d+P\d+' total_coverage_percentages = [] reports = [] for project_name in sorted(cache.list_dir(properties.META_RESULTS_FOLDER, is_absolute=False)): if re.match(codejam_format, project_name): LOGGER.info("For project %s" % project_name) project_path = "%s/%s" % (properties.META_RESULTS_FOLDER, project_name) coverage_percentages = [] for user in cache.list_dir(project_path, is_absolute=False): project_folder = os.path.join(project_name, user) metrics = coverage(project_folder) coverage_percentages.append(metrics['code_coverage']) reports.append("\n### Project: %s" % project_name) reports.append(stat.Stat(coverage_percentages).report()) total_coverage_percentages += coverage_percentages LOGGER.info("Aggregating ...") reports.append("\n### Total Coverage") reports.append(stat.Stat(total_coverage_percentages).report()) report = "\n".join(reports) cache.write_file(os.path.join(properties.RESULTS_FOLDER, "codejam", "coverage.md"), report) return report
def save_only_mixed_clusters(dataset, mixed_file_base_name): """ Save only mixed functions :param dataset: Name of dataset :param mixed_file_base_name: Type of language eg. java_python :return: """ clusters_base_folder = os.path.join(lib.get_clusters_folder(dataset), "cluster_testing") for folder in sorted(cache.list_dir(clusters_base_folder, is_absolute=False)): LOGGER.info("Processing '%s' ..." % folder) folder_path = os.path.join(clusters_base_folder, folder) base_clusters_file = os.path.join(folder_path, "%s.pkl" % mixed_file_base_name) base_clusters = cache.load_pickle(base_clusters_file) mixed_clusters = {} for label, functions in base_clusters.items(): if label == -1 or len(functions) == 1: continue sources = set() for func in functions: sources.add(func.source) if len(sources) > 1: mixed_clusters[label] = functions LOGGER.info("For folder = %s, # of mixed clusters = %d" % (folder, len(mixed_clusters))) file_path = os.path.join(folder_path, "only_mixed.txt") pkl_path = os.path.join(folder_path, "only_mixed.pkl") file_contents = [] for label, functions in mixed_clusters.items(): file_contents.append("\n\n****** Cluster %d ******" % label) for func in functions: file_contents.append(func.body) cache.write_file(file_path, "\n".join(file_contents)) cache.save_pickle(pkl_path, mixed_clusters)
def save_only_target_functions(dataset, mixed_file_base_name, target_language): """ Save only java functions from a mixture of java and python clusters :param dataset: Name of dataset :param mixed_file_base_name: Type of language eg. java_python :param target_language: Target Language :return: """ clusters_base_folder = os.path.join(lib.get_clusters_folder(dataset), "cluster_testing") for folder in sorted( cache.list_dir(clusters_base_folder, is_absolute=False)): LOGGER.info("Processing '%s' ..." % folder) folder_path = os.path.join(clusters_base_folder, folder) cache.mkdir(folder_path) base_clusters_file = os.path.join(folder_path, "%s.pkl" % mixed_file_base_name) base_clusters = cache.load_pickle(base_clusters_file) target_clusters = {} for label, functions in base_clusters.items(): if label == -1 or len(functions) == 1: continue contains_target = False contains_other = False for func in functions: if func.source == target_language: contains_target = True else: contains_other = True if contains_target and not contains_other: target_clusters[label] = functions LOGGER.info("For folder = %s, # of '%s' clusters = %d" % (folder, target_language, len(target_clusters))) file_path = os.path.join(folder_path, "only_%s.txt" % target_language) pkl_path = os.path.join(folder_path, "only_%s.pkl" % target_language) file_contents = [] for label, functions in target_clusters.items(): file_contents.append("\n\n****** Cluster %d ******" % label) for func in functions: file_contents.append(func.body) cache.write_file(file_path, "\n".join(file_contents)) cache.save_pickle(pkl_path, target_clusters)