Exemple #1
0
def connected_components(dataset, base_folder):
    base_folder_path = os.path.join(properties.META_RESULTS_FOLDER, dataset,
                                    base_folder)
    contents = ["# Epsilons and methods"]
    for file_path in sorted(
            cache.list_files(base_folder_path, is_absolute=True)):
        if not file_path.endswith(".csv"):
            continue
        epsilon = cache.get_file_name(file_path).split(".")[0].split(
            "_", 1)[1].replace("_", ".")
        print(file_path)
        graph = networkx.Graph()
        contents.append("## eps = %s" % epsilon)
        with open(file_path) as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=",")
            next(csv_reader, None)
            for row in csv_reader:
                graph.add_edge(row[0], row[1])
            n_clusters = networkx.number_connected_components(graph)
            contents.append("#### \# Functionalities = %d" % n_clusters)
            contents.append("```")
            [
                contents.append("%d: %s" % (i, ",\n\t".join(component)))
                for i, component in enumerate(
                    networkx.connected_components(graph))
            ]
            # [contents.append("%d: %s" % (i, ",".join(map(str, sorted(map(int, component))))))
            #  for i, component in enumerate(networkx.connected_components(graph))]
            contents.append("```")
        LOGGER.info("For epsilon = %s, # clusters = %d" %
                    (epsilon, n_clusters))
    write_file = os.path.join(base_folder_path, "components.md")
    cache.write_file(write_file, "\n".join(contents))
Exemple #2
0
def extract_metadata_for_folder(dataset, problem_id=None):
    sys.path.append(properties.PYTHON_PROJECTS_HOME)
    function_store = get_function_store(dataset)
    root_folder = os.path.join(properties.PYTHON_PROJECTS_HOME, dataset)
    if problem_id:
        root_folder = os.path.join(root_folder, problem_id)
    for file_path in cache.list_files(root_folder,
                                      check_nest=True,
                                      is_absolute=True):
        file_name = cache.get_file_name(file_path)
        if not file_name.startswith(a_consts.GENERATED_PREFIX):
            continue
        LOGGER.info("Processing '%s' ..." % helper.get_simple_name(file_path))
        for func in helper.get_generated_functions(file_path):
            function_name = func.__name__
            valid, func_key = is_executable_function(dataset, func, False)
            print(function_name, func_key, valid)
            if valid:
                meta_data = {
                    "name": function_name,
                    "body": inspect.getsource(func),
                    "inputKey": func_key,
                    "filePath": file_path
                }
                function_store.save_py_metadata(meta_data)
    sys.path.remove(properties.PYTHON_PROJECTS_HOME)
Exemple #3
0
def execute(dataset, root_folder):
    for file_path in cache.list_files(root_folder,
                                      check_nest=True,
                                      is_absolute=True):
        file_name = cache.get_file_name(file_path)
        if file_name == "__init__" or file_name.startswith(
                a_consts.GENERATED_PREFIX):
            continue
        get_meta_for_file(dataset, file_path)
Exemple #4
0
def export_methods(dataset):
    root_folder = os.path.join(properties.PYTHON_PROJECTS_HOME, dataset,
                               problem)
    for file_path in cache.list_files(root_folder,
                                      check_nest=True,
                                      is_absolute=True):
        file_name = cache.get_file_name(file_path)
        if file_name == "__init__" or file_name.startswith(
                a_consts.GENERATED_PREFIX):
            continue
        generate.generate_for_file(dataset, file_path)
Exemple #5
0
def execute_problem(dataset, problem_id=None):
    root_folder = os.path.join(properties.PYTHON_PROJECTS_HOME, dataset)
    if problem_id:
        root_folder = os.path.join(root_folder, problem_id)
    for file_path in cache.list_files(root_folder,
                                      check_nest=True,
                                      is_absolute=True):
        if not cache.get_file_name(file_path).startswith(
                a_consts.GENERATED_PREFIX):
            continue
        LOGGER.info("Processing '%s'" % helper.get_simple_name(file_path))
        execute_file(dataset, file_path)
Exemple #6
0
def runner(force=False):
  store = mongo_driver.MongoStore(props.DATASET)
  if force:
    store.delete_file_stmts(props.TYPE_PYTHON)
  py_files = get_converted_files()
  for i, py_file in enumerate(py_files):
    file_name = cache.get_file_name(py_file)
    if store.load_stmts_for_file_name(py_file):
      LOGGER.info("Processed %s. Moving on ... " % file_name)
      continue
    LOGGER.info("Processing %d / %d ... " % (i + 1, len(py_files)))
    LOGGER.info("Processing %s ... " % file_name)
    lines = PARSER.parse_file(py_file)
    store.store_file_stmts(py_file, lines, props.TYPE_PYTHON)
Exemple #7
0
def convert_notebook(notebook_path):
  write_folder = cache.get_parent_folder(notebook_path)
  file_name = cache.get_file_name(notebook_path)
  write_file = os.path.join(write_folder, "%s.%s" % (file_name, props.TYPE_PYTHON))
  if cache.file_exists(write_file):
    LOGGER.info("'%s' already converted. Moving on ... " % file_name)
    return
  else:
    LOGGER.info("Converting filename '%s' ... " % file_name)
  with open(notebook_path) as fh:
    nb = nbformat.reads(fh.read(), nbformat.NO_CONVERT)
  exporter = PythonExporter()
  try:
    source, meta = exporter.from_notebook_node(nb)
  except nbformat.validator.NotebookValidationError:
    LOGGER.error("Validation error while converting '%s'." % notebook_path)
    return
  with open(write_file, 'w+') as fh:
    fh.writelines(source.encode('utf-8'))
Exemple #8
0
def get_valid_functions_from_folder(dataset, problem_id=None):
    total_valid_functions = 0
    accessed_keys = set()
    root_folder = properties.PYTHON_PROJECTS_HOME
    if problem_id:
        root_folder = os.path.join(root_folder, problem_id)
    for file_path in cache.list_files(root_folder,
                                      check_nest=True,
                                      is_absolute=True):

        file_name = cache.get_file_name(file_path)
        if not file_name.startswith(a_consts.GENERATED_PREFIX):
            continue
        LOGGER.info("Processing '%s'" % helper.get_simple_name(file_path))
        valid_keys, n_generated_functions = get_valid_function_keys_from_file(
            dataset, file_path)
        LOGGER.info("Valid Functions: %d / %d\n" %
                    (len(valid_keys), n_generated_functions))
        accessed_keys.update(valid_keys)
        total_valid_functions += len(valid_keys)
    LOGGER.info("Total valid functions: %d" % total_valid_functions)
    print(accessed_keys)
Exemple #9
0
def convert_notebook(notebook_path, force=False):
    write_folder = cache.get_parent_folder(notebook_path)
    file_name = cache.get_file_name(notebook_path)
    write_file = os.path.join(write_folder,
                              "%s.%s" % (file_name, props.TYPE_R))
    if not force and cache.file_exists(write_file):
        LOGGER.info("'%s' already converted. Moving on ... " % file_name)
        return
    else:
        LOGGER.info("Converting filename '%s' ... " % file_name)
    source_code_lines = []
    with open(notebook_path) as fh:
        json_obj = json.load(fh)
        cells = json_obj['cells']
        for cell in cells:
            if cell['cell_type'] == 'code' and cell['source']:
                source_code_lines += cell['source']
    source_code = "\n".join(source_code_lines)
    with open(write_file, 'w+') as fh:
        fh.writelines(source_code.encode('utf-8'))
    if not functions.r_compile(write_file, del_compiled=True):
        cache.delete_file(write_file)
        return False
    return True