def connected_components(dataset, base_folder): base_folder_path = os.path.join(properties.META_RESULTS_FOLDER, dataset, base_folder) contents = ["# Epsilons and methods"] for file_path in sorted( cache.list_files(base_folder_path, is_absolute=True)): if not file_path.endswith(".csv"): continue epsilon = cache.get_file_name(file_path).split(".")[0].split( "_", 1)[1].replace("_", ".") print(file_path) graph = networkx.Graph() contents.append("## eps = %s" % epsilon) with open(file_path) as csv_file: csv_reader = csv.reader(csv_file, delimiter=",") next(csv_reader, None) for row in csv_reader: graph.add_edge(row[0], row[1]) n_clusters = networkx.number_connected_components(graph) contents.append("#### \# Functionalities = %d" % n_clusters) contents.append("```") [ contents.append("%d: %s" % (i, ",\n\t".join(component))) for i, component in enumerate( networkx.connected_components(graph)) ] # [contents.append("%d: %s" % (i, ",".join(map(str, sorted(map(int, component)))))) # for i, component in enumerate(networkx.connected_components(graph))] contents.append("```") LOGGER.info("For epsilon = %s, # clusters = %d" % (epsilon, n_clusters)) write_file = os.path.join(base_folder_path, "components.md") cache.write_file(write_file, "\n".join(contents))
def extract_metadata_for_folder(dataset, problem_id=None): sys.path.append(properties.PYTHON_PROJECTS_HOME) function_store = get_function_store(dataset) root_folder = os.path.join(properties.PYTHON_PROJECTS_HOME, dataset) if problem_id: root_folder = os.path.join(root_folder, problem_id) for file_path in cache.list_files(root_folder, check_nest=True, is_absolute=True): file_name = cache.get_file_name(file_path) if not file_name.startswith(a_consts.GENERATED_PREFIX): continue LOGGER.info("Processing '%s' ..." % helper.get_simple_name(file_path)) for func in helper.get_generated_functions(file_path): function_name = func.__name__ valid, func_key = is_executable_function(dataset, func, False) print(function_name, func_key, valid) if valid: meta_data = { "name": function_name, "body": inspect.getsource(func), "inputKey": func_key, "filePath": file_path } function_store.save_py_metadata(meta_data) sys.path.remove(properties.PYTHON_PROJECTS_HOME)
def execute(dataset, root_folder): for file_path in cache.list_files(root_folder, check_nest=True, is_absolute=True): file_name = cache.get_file_name(file_path) if file_name == "__init__" or file_name.startswith( a_consts.GENERATED_PREFIX): continue get_meta_for_file(dataset, file_path)
def export_methods(dataset): root_folder = os.path.join(properties.PYTHON_PROJECTS_HOME, dataset, problem) for file_path in cache.list_files(root_folder, check_nest=True, is_absolute=True): file_name = cache.get_file_name(file_path) if file_name == "__init__" or file_name.startswith( a_consts.GENERATED_PREFIX): continue generate.generate_for_file(dataset, file_path)
def execute_problem(dataset, problem_id=None): root_folder = os.path.join(properties.PYTHON_PROJECTS_HOME, dataset) if problem_id: root_folder = os.path.join(root_folder, problem_id) for file_path in cache.list_files(root_folder, check_nest=True, is_absolute=True): if not cache.get_file_name(file_path).startswith( a_consts.GENERATED_PREFIX): continue LOGGER.info("Processing '%s'" % helper.get_simple_name(file_path)) execute_file(dataset, file_path)
def runner(force=False): store = mongo_driver.MongoStore(props.DATASET) if force: store.delete_file_stmts(props.TYPE_PYTHON) py_files = get_converted_files() for i, py_file in enumerate(py_files): file_name = cache.get_file_name(py_file) if store.load_stmts_for_file_name(py_file): LOGGER.info("Processed %s. Moving on ... " % file_name) continue LOGGER.info("Processing %d / %d ... " % (i + 1, len(py_files))) LOGGER.info("Processing %s ... " % file_name) lines = PARSER.parse_file(py_file) store.store_file_stmts(py_file, lines, props.TYPE_PYTHON)
def convert_notebook(notebook_path): write_folder = cache.get_parent_folder(notebook_path) file_name = cache.get_file_name(notebook_path) write_file = os.path.join(write_folder, "%s.%s" % (file_name, props.TYPE_PYTHON)) if cache.file_exists(write_file): LOGGER.info("'%s' already converted. Moving on ... " % file_name) return else: LOGGER.info("Converting filename '%s' ... " % file_name) with open(notebook_path) as fh: nb = nbformat.reads(fh.read(), nbformat.NO_CONVERT) exporter = PythonExporter() try: source, meta = exporter.from_notebook_node(nb) except nbformat.validator.NotebookValidationError: LOGGER.error("Validation error while converting '%s'." % notebook_path) return with open(write_file, 'w+') as fh: fh.writelines(source.encode('utf-8'))
def get_valid_functions_from_folder(dataset, problem_id=None): total_valid_functions = 0 accessed_keys = set() root_folder = properties.PYTHON_PROJECTS_HOME if problem_id: root_folder = os.path.join(root_folder, problem_id) for file_path in cache.list_files(root_folder, check_nest=True, is_absolute=True): file_name = cache.get_file_name(file_path) if not file_name.startswith(a_consts.GENERATED_PREFIX): continue LOGGER.info("Processing '%s'" % helper.get_simple_name(file_path)) valid_keys, n_generated_functions = get_valid_function_keys_from_file( dataset, file_path) LOGGER.info("Valid Functions: %d / %d\n" % (len(valid_keys), n_generated_functions)) accessed_keys.update(valid_keys) total_valid_functions += len(valid_keys) LOGGER.info("Total valid functions: %d" % total_valid_functions) print(accessed_keys)
def convert_notebook(notebook_path, force=False): write_folder = cache.get_parent_folder(notebook_path) file_name = cache.get_file_name(notebook_path) write_file = os.path.join(write_folder, "%s.%s" % (file_name, props.TYPE_R)) if not force and cache.file_exists(write_file): LOGGER.info("'%s' already converted. Moving on ... " % file_name) return else: LOGGER.info("Converting filename '%s' ... " % file_name) source_code_lines = [] with open(notebook_path) as fh: json_obj = json.load(fh) cells = json_obj['cells'] for cell in cells: if cell['cell_type'] == 'code' and cell['source']: source_code_lines += cell['source'] source_code = "\n".join(source_code_lines) with open(write_file, 'w+') as fh: fh.writelines(source_code.encode('utf-8')) if not functions.r_compile(write_file, del_compiled=True): cache.delete_file(write_file) return False return True