Ejemplo n.º 1
0
def java_is_parsed():
    if not request.json or not 'github_short_url' in request.json or not 'parsing_type' in request.json:
        return jsonify({'Error': 'Must POST JSON request with github_short_url and parsing_type fields'}), 400

    if (request.json['parsing_type'] != 'all' and request.json['parsing_type'] != 'packageclassonly'):
        return jsonify({'Error': "Parsing type must either be 'all' or 'packageclassonly'"}), 400

    task = {
        'github_short_url': request.json['github_short_url'],
        'parsing_type': request.json['parsing_type'],
    }

    try:
        driver = utils.get_neo4j()
        try:
            with driver.session() as session:
                result = session.read_transaction(is_package_parsed, tasks.get('github_short_url'), task.get('parsing_type'), parsing_type)
                return jsonify({'is-parsed': result}), 200
        except Exception:
            traceback.print_exc()
            return jsonify({'Error': 'Error occurred connecting to neo4j'}), 500
        finally:
            driver.close()
            return
    except:
        traceback.print_exc()
        return jsonify({'Error': 'Error occurred fetching neo4j driver'}), 500
Ejemplo n.º 2
0
def fetch_project(github_short_url):
        try:
                driver = utils.get_neo4j()
                depends_service_url = utils.get_depends_service()
                try:
                        # request information about the project from the pom-search-service
                        response = requests.get("{}/java/project/{}?remote=true".format(depends_service_url, github_short_url))
                        if (response.status_code != 200):
                                return

                        # the response is returned as JSON, parse it into a python object  
                        parsed_response = json.loads(response.content)
                        
                        # retrieve the name of the repository from response
                        repo_name = parsed_response.get('github_repo_name') 
                        if (repo_name != None):
                                with driver.session() as session:
                                        # add the repository to neo4j as a new vertices
                                        session.write_transaction(neo4j_queries.add_project_node, repo_name)

                                        # record that this project has been searched
                                        session.write_transaction(neo4j_queries.add_attribute_to_project, repo_name, "projectsearch", "True")

                                        # Parse the pom file associated with the pom file. This will
                                        # initiate searches for any repositories dependent on
                                        # artifacts produced by this repository.
                                        for pom in parsed_response.get('pom'):
                                                parse_pom(session, pom, repo_name, 'high')
                except Exception:
                        traceback.print_exc()
                finally:
                        driver.close()
        except Exception:
                traceback.print_exc()
                return
Ejemplo n.º 3
0
def create_parse_project_task():
    if not request.json or not 'github_short_url' in request.json or not 'parsing_type' in request.json:
        return jsonify({'Error': 'Must POST JSON request with github_short_url and parsing_type fields'}), 400

    if (request.json['parsing_type'] != 'all' and request.json['parsing_type'] != 'packageclassonly'):
        return jsonify({'Error': "Parsing type must either be 'all' or 'packageclassonly'"}), 400

    task = {
        'github_short_url': request.json['github_short_url'],
        'parsing_type': request.json['parsing_type'],
    }

    try:
        driver = utils.get_neo4j()
        try:
            with driver.session() as session:
                # check if the project has already been parsed, or if parsing is in progress. If it is, then
                # don't requeue the project for parsing
                result = session.write_transaction(neo4j_queries.retrieve_attribute_value, request.json['github_short_url'], 'ast-parsed')
                if (result == request.json['parsing_type'] or result == 'in-progress' or result == 'all' or result == 'queued'):
                    print("AST tree already parsed for project {}".format(request.json['github_short_url']))
                    return

                session.write_transaction(neo4j_queries.add_attribute_to_project, request.json['github_short_url'], 'ast-parsed', 'queued')
        except Exception:
            traceback.print_exc()
        finally:
            driver.close()
    except:
        traceback.print_exc()
        return

    job = q.enqueue(parse_repo, task.get('github_short_url'), task.get('parsing_type'), job_timeout=7200)

    return jsonify({'task': task}), 200
Ejemplo n.º 4
0
def is_ast_parsed(group, project):
    print(group)
    print(project)

    try:
        driver = utils.get_neo4j()
        try:
            with driver.session() as session:
                if session.read_transaction(neo4j_queries.project_exists,
                                            group, project):
                    ast_parse_state = session.read_transaction(
                        neo4j_queries.retrieve_project_attribute_value,
                        "{}/{}".format(group, project), 'projectsearch')
                    print(ast_parse_state)
                    return jsonify({
                        'status': 'ok',
                        'state': ast_parse_state
                    }), 200
                else:
                    return jsonify({
                        'status':
                        'ERROR',
                        'reason':
                        'Project does not exist in Neo4j. Have you submitted a parse job to /init/dependents-search/pom yet?'
                    }), 400
        except:
            traceback.print_exc()
            return jsonify({'status': 'SERVER_ERROR'}), 500
        finally:
            driver.session().close()
    except:
        traceback.print_exc()
        return jsonify({'status': 'SERVER_ERROR'}), 500

    return jsonify({'status': 'SERVER_ERROR'}), 500
Ejemplo n.º 5
0
def retrieve_project_calls(group, project):
    try:
        driver = utils.get_neo4j()
        try:
            with driver.session() as session:
                if session.read_transaction(neo4j_queries.project_exists,
                                            group, project):
                    result = session.read_transaction(
                        neo4j_queries.all_project_dependencies, group, project)
                    return jsonify({'status': 'ok', 'data': result}), 200
                else:
                    return jsonify({
                        'status':
                        'ERROR',
                        'reason':
                        'Project does not exist in Neo4j. Have you submitted a parse job to /init/dependents-search/pom yet?'
                    }), 400
        except:
            traceback.print_exc()
            return jsonify({'status': 'SERVER_ERROR'}), 500
        finally:
            driver.session().close()
    except:
        traceback.print_exc()
        return jsonify({'status': 'SERVER_ERROR'}), 500
Ejemplo n.º 6
0
def retrieve_dependents_of_node(group, project):
    node_label = request.args.get('label')
    node_id = request.args.get('id')

    try:
        driver = utils.get_neo4j()
        try:
            with driver.session() as session:
                if session.read_transaction(neo4j_queries.project_exists,
                                            group, project):
                    result = session.read_transaction(
                        neo4j_queries.dependents_from_node, group, project,
                        node_label, node_id)
                    return jsonify({'status': 'ok', 'data': result}), 200
                else:
                    return jsonify({
                        'status':
                        'ERROR',
                        'reason':
                        'Project does not exist in Neo4j. Have you submitted a parse job to /init/dependents-search/pom yet?'
                    }), 400
        except:
            traceback.print_exc()
            return jsonify({'status': 'SERVER_ERROR'}), 500
        finally:
            driver.session().close()
    except:
        traceback.print_exc()
        return jsonify({'status': 'SERVER_ERROR'}), 500

    return jsonify({'status': 'SERVER_ERROR'}), 500
Ejemplo n.º 7
0
def retrieve_hierarchy(group, project):
    """
    This endpoint retrieves all children of the project that are called by at
    least one dependent project.
    """
    try:
        driver = utils.get_neo4j()
        try:
            with driver.session() as session:
                if session.read_transaction(neo4j_queries.project_exists,
                                            group, project):
                    result = session.read_transaction(
                        neo4j_queries.project_hierarchy, group, project)
                    return jsonify({'status': 'ok', 'data': result}), 200
                else:
                    return jsonify({
                        'status':
                        'ERROR',
                        'reason':
                        'Project does not exist in Neo4j. Have you submitted a parse job to /init/dependents-search/pom yet?'
                    }), 400
        except:
            traceback.print_exc()
            return jsonify({'status': 'SERVER_ERROR'}), 500
        finally:
            driver.session().close()
    except:
        traceback.print_exc()
        return jsonify({'status': 'SERVER_ERROR'}), 500
Ejemplo n.º 8
0
def fetch_package_transitive_dependents(group, project):
    print(group)
    print(project)

    try:
        driver = utils.get_neo4j()
        try:
            with driver.session() as session:

                if session.read_transaction(neo4j_queries.artifact_exists,
                                            group, project):
                    transitive_artifacts_search = session.read_transaction(
                        neo4j_queries.get_transitive_artifact_dependents,
                        group, project)
                    artifacts_cache_count = session.read_transaction(
                        neo4j_queries.get_artifact_dependents_total_cached,
                        group, project)
                    artifacts_search_count = session.read_transaction(
                        neo4j_queries.get_artifact_dependents_count, group,
                        project)
                    estimated_transitive_artifacts_count = int(
                        float(
                            transitive_artifacts_search.get('count') /
                            artifacts_search_count) *
                        float(artifacts_cache_count))

                    return jsonify({
                        'status': 'ok',
                        'transitive-artifacts-search':
                        transitive_artifacts_search,
                        'artifacts-cache': {
                            'count': artifacts_cache_count
                        },
                        'artifacts-search': {
                            'count': artifacts_search_count
                        },
                        'predictions': {
                            'estimated-transitive-artifacts-count':
                            estimated_transitive_artifacts_count
                        }
                    }), 200
                else:
                    return jsonify({
                        'status': 'ERROR',
                        'reason': 'Artifact does not exist in Neo4j'
                    }), 400
        except:
            traceback.print_exc()
            return jsonify({'status': 'SERVER_ERROR'}), 500
        finally:
            driver.session().close()
    except:
        traceback.print_exc()
        return jsonify({'status': 'SERVER_ERROR'}), 500

    return jsonify({'status': 'SERVER_ERROR'}), 500
Ejemplo n.º 9
0
def fetch_dependent_ast(group, project):
    print(group)
    print(project)

    dependent_group = request.args.get('group')
    print(dependent_group)
    dependent_repo = request.args.get('repo')
    print(dependent_repo)

    sub_node_label = request.args.get('label')
    sub_node_id = request.args.get('id')

    try:
        driver = utils.get_neo4j()
        try:
            with driver.session() as session:

                if session.read_transaction(
                        neo4j_queries.project_exists, group,
                        project) and session.read_transaction(
                            neo4j_queries.project_exists, dependent_group,
                            dependent_repo):
                    ast_result = session.read_transaction(
                        neo4j_queries.ast_tree_dependent, group, project,
                        dependent_group, dependent_repo, sub_node_label,
                        sub_node_id)

                    return jsonify({
                        'status': 'ok',
                        'ast': ast_result,
                    }), 200
                else:
                    return jsonify({
                        'status': 'ERROR',
                        'reason': 'Artifact does not exist in Neo4j'
                    }), 400
        except:
            traceback.print_exc()
            return jsonify({'status': 'SERVER_ERROR'}), 500
        finally:
            driver.session().close()
    except:
        traceback.print_exc()
        return jsonify({'status': 'SERVER_ERROR'}), 500

    return jsonify({'status': 'SERVER_ERROR'}), 500
Ejemplo n.º 10
0
def are_artifacts_parsed(group, project):
    print(group)
    print(project)

    try:
        driver = utils.get_neo4j()
        try:
            with driver.session() as session:
                if session.read_transaction(neo4j_queries.project_exists,
                                            group, project):
                    artifacts = session.read_transaction(
                        neo4j_queries.get_project_packages, group, project)

                    for artifact in artifacts:
                        ast_parse_state = session.read_transaction(
                            neo4j_queries.retrieve_artifact_attribute_value,
                            artifact.get("group"), artifact.get("artifact"),
                            'dependentsearch')
                        if (ast_parse_state == None):
                            artifact["search-state"] = "not-searched"
                        else:
                            artifact["search-state"] = ast_parse_state

                    return jsonify({
                        'status': 'ok',
                        'artifacts': artifacts
                    }), 200
                else:
                    return jsonify({
                        'status':
                        'ERROR',
                        'reason':
                        'Project does not exist in Neo4j. Have you submitted a parse job to /init/dependents-search/pom yet?'
                    }), 400
        except:
            traceback.print_exc()
            return jsonify({'status': 'SERVER_ERROR'}), 500
        finally:
            driver.session().close()
    except:
        traceback.print_exc()
        return jsonify({'status': 'SERVER_ERROR'}), 500

    return jsonify({'status': 'SERVER_ERROR'}), 500
Ejemplo n.º 11
0
def fetch_project_dependents(group, project):
    print(group)
    print(project)

    try:
        driver = utils.get_neo4j()
        try:
            with driver.session() as session:
                if session.read_transaction(neo4j_queries.project_exists,
                                            group, project):
                    projects_search = session.read_transaction(
                        neo4j_queries.get_project_dependents, group, project)
                    projects_search_count = session.read_transaction(
                        neo4j_queries.get_project_dependents_total_cached,
                        group, project)

                    return jsonify({
                        'status': 'ok',
                        'projects-search': projects_search,
                        'projects-cache': {
                            'count': projects_search_count
                        }
                    }), 200
                else:
                    return jsonify({
                        'status':
                        'ERROR',
                        'reason':
                        'Project does not exist in Neo4j. Have you submitted a parse job to /init/dependents-search/pom yet?'
                    }), 400
        except:
            traceback.print_exc()
            return jsonify({'status': 'SERVER_ERROR'}), 500
        finally:
            driver.session().close()
    except:
        traceback.print_exc()
        return jsonify({'status': 'SERVER_ERROR'}), 500

    return jsonify({'status': 'SERVER_ERROR'}), 500
Ejemplo n.º 12
0
def create_parse_package_task():
    if not request.json:
        return 400

    if not 'group' in request.json or not 'artifact' in request.json or not 'start' in request.json or not 'end' in request.json or not 'parent' in request.json:
        return 400

    task = {
        'group': request.json['group'],
        'artifact': request.json['artifact'],
        'start': request.json['start'],
        'end': request.json['end'],
        'priority': request.json.get('priority'),
        'parent': request.json['parent']
    }

    driver = utils.get_neo4j()
    try:
        with driver.session() as session:
            """
            Guard against re-searching the same repository. This is an imperfect solution, 
            as it only guards against the parsing being initiated again with a start value for
            the records to be retrieved during search <= 1. It doesn't guard against a search
            which overlaps with a previous search. 

            For example, it doesn't guard against the following
                1. search request received to search identified repositories 1 to 100
                2. search request received to search identified repositories 50 to 100.

                In this instance, the repositories 50 to 100 wil be searched twice.
            """
            parsed = session.read_transaction(neo4j_queries.is_package_parsed,
                                              task.get('group'),
                                              task.get('artifact'))

            if (parsed == "completed"):
                print("Parsing already completed on repository")
                return jsonify({'state': "completed"}), 202

            if (parsed == "in-progress" and task.get("start") <= 1):
                print("Parsing already in progress on repository")
                return jsonify({'state': "in-progress"}), 201

    except Exception:
        traceback.print_exc()
    finally:
        driver.close()

    if (task.get('priority') == 'high'):
        job = q_medium.enqueue(fetch_package,
                               task.get('group'),
                               task.get('artifact'),
                               task.get('start'),
                               task.get('end'),
                               task.get('parent'),
                               task.get('priority'),
                               timeout=3600)
    else:
        job = q_low.enqueue(fetch_package,
                            task.get('group'),
                            task.get('artifact'),
                            task.get('start'),
                            task.get('end'),
                            task.get('parent'),
                            task.get('priority'),
                            timeout=3600)

    return jsonify({'task': task}), 200
Ejemplo n.º 13
0
def parse_repo(git_short_url, parsing_type):
    print("Parsing project: " + git_short_url)

    if (parsing_type != 'all' and parsing_type != 'packageclassonly'):
        print("ERROR: parsing type must either be 'all' or 'packageclassonly'")
        print(parsing_type)
        return

    # add an attribute to the project, describing the parsing that has taken place
    try:
        driver = utils.get_neo4j()
        try:
            with driver.session() as session:
                # TODO: a potential race condition exists here. If two jobs are submitted to parse a repo,
                # and both are accepted by two separate parsing workers at the same time, then both may query
                # the parsing state at the same time, and both may carry out parsing. This won't actually cause
                # any errors, but it does waste resources
                result = session.write_transaction(
                    neo4j_queries.retrieve_attribute_value, git_short_url,
                    'ast-parsed')
                if (result == parsing_type or result == 'in-progress'
                        or result == 'all'):
                    print("AST tree already parsed for project {}".format(
                        git_short_url))
                    return

                result = session.write_transaction(
                    neo4j_queries.update_attribute_of_project, git_short_url,
                    'ast-parsed', 'in-progress')
        except Exception:
            traceback.print_exc()
        finally:
            driver.close()
    except:
        traceback.print_exc()
        return

    # create a new temporary directory
    with tempdir() as dirpath:
        print("using dir " + dirpath)
        os.chdir(dirpath)

        clone_path = "git clone https://github.com/{}.git".format(
            git_short_url)
        print(clone_path)
        os.system(clone_path)  # clone the git repo
        cloned_dir = os.listdir(dirpath)
        if (len(cloned_dir) == 1):
            os.chdir(dirpath + "/" + cloned_dir[0])

            os.system("mvn dependency:copy-dependencies"
                      )  # download all dependencies to ./target/dependencies

            # Run parser
            print("Parsing Java Project to produce AST tree")
            result = os.system(
                "java -Xmx1g -jar /java-parser/target/java_parser_cli.jar-jar-with-dependencies.jar -i {} -j {}/{} -s {}/{} -l cypher -t {} -o {}/output.cypher"
                .format(git_short_url, dirpath, cloned_dir[0], dirpath,
                        cloned_dir[0], parsing_type, dirpath))
            if (result != 0):
                print("Error occurred parsing AST tree")
                try:
                    driver = utils.get_neo4j()
                    try:
                        with driver.session() as session:
                            session.write_transaction(
                                neo4j_queries.update_attribute_of_project,
                                git_short_url, 'ast-parsed', 'failed')
                    except Exception:
                        traceback.print_exc()
                    finally:
                        driver.close()
                except:
                    traceback.print_exc()
                    return
                return

            print("Exporting to neo4j")
            # Export output to neo4j, subprocess.call returns the status code of the call
            export_to_neo4j_output = subprocess.call(
                "set -eo pipefail; cat {}/output.cypher | cypher-shell -a $NEO4J_IP -u $NEO4J_USER -p $NEO4J_PASS"
                .format(dirpath),
                shell=True,
                executable='/bin/bash')
            if (export_to_neo4j_output != 0):
                try:
                    driver = utils.get_neo4j()
                    try:
                        with driver.session() as session:
                            session.write_transaction(
                                neo4j_queries.update_attribute_of_project,
                                git_short_url, 'ast-parsed', 'failed')
                    except Exception:
                        traceback.print_exc()
                    finally:
                        driver.close()
                except:
                    traceback.print_exc()
                    return

                print("Error occurred adding call graph to Neo4j")
                return

            # add an attribute to the project, describing the parsing that has taken place
            try:
                driver = utils.get_neo4j()
                try:
                    with driver.session() as session:
                        print("Completed parsing of project")
                        session.write_transaction(
                            neo4j_queries.update_attribute_of_project,
                            git_short_url, 'ast-parsed', parsing_type)
                except Exception:
                    traceback.print_exc()
                finally:
                    driver.close()
            except:
                traceback.print_exc()
                return
        return
Ejemplo n.º 14
0
def fetch_package(package_group, package_artifact, search_start, search_end, parent_project, continuation_priority):
        # validate that all arguments are correct
        if (package_group == None or package_artifact == None or search_start < 0 or search_end < search_start):
                print ("Cannot parse package " + str(package_group or '') + "." + str(package_artifact or ''))
                return

        try:
                driver = utils.get_neo4j()
                depends_service_url = utils.get_depends_service()
                queue_manager = utils.get_queue_manager()
                try:
                        with driver.session() as session:
                                try:
                                        # don't carry out parsing if the search is already completed
                                        parsed = session.read_transaction(neo4j_queries.is_package_parsed, package_group, package_artifact)
                                        if (parsed == "completed"):
                                                print ("Package " + package_group + "." + package_artifact + " at parsing state " + parsed)
                                                return

                                        print ("Parsing package " + package_group + "." + package_artifact)
                                        session.write_transaction(neo4j_queries.add_attribute_to_artifact, package_group, package_artifact, "dependentsearch", "in-progress")
                                        
                                        # search for dependents using the pom-search-service
                                        response = requests.get("{}/java/package/{}/{}/dependents/local?pom=true&start={}&end={}".format(depends_service_url, package_group, package_artifact, search_start, search_end - search_start))
                                        if (response.status_code != 200):
                                                print("Couldn't retrieve dependents from dependents service")
                                                return

                                                
                                        parsed_response = json.loads(response.content)

                                        print ("Received packages from dependents service " + package_group + "." + package_artifact)

                                        total_count = parsed_response.get("total_count")

                                        # returned repositories are paginated - meaning that only a subset of all are returned
                                        # at any one time. If the current end to the paginated search is less than the total count,
                                        # then send a request to the queue manager to carry out the search with the next set of 
                                        # paginated results
                                        if (search_end < total_count):
                                                # Identify the new end position for pagination, maintaining the same number of returned
                                                # results as in this search
                                                new_search_end = search_end + (search_end - search_start)
                                                if (new_search_end > total_count):
                                                        new_search_end = total_count

                                                # request the next search
                                                next_parse_response = requests.post("http://{}/dependents/package".format(queue_manager), json = {'group': package_group, 'artifact': package_artifact, 'start': search_end, 'end': new_search_end, 'parent': parent_project, 'priority': continuation_priority})
                                                if (next_parse_response.status_code != 200):
                                                        print("Error occurred trying to request parsing of next section")

                                        # store the total count of dependents to the artifact
                                        session.write_transaction(neo4j_queries.add_attribute_to_artifact, package_group, package_artifact, "total_count", total_count)
                                        
                                        # for each identified dependent project, add it to Neo4j
                                        project_count = search_start
                                        for project in parsed_response.get("projects"):
                                                repo_name = project.get('github_repo_name') 

                                                session.write_transaction(neo4j_queries.add_project_node, repo_name)
                                                session.write_transaction(neo4j_queries.add_project_node, parent_project)
                                                session.write_transaction(neo4j_queries.add_project_depends_project_edge, repo_name, parent_project)
                                
                                                for pom in project.get("pom"):
                                                        parse_pom(session, pom, repo_name, 'low')

                                                project_count = project_count + 1

                                        #if search_end >= total_count, then add attribute to package stating it has been parsed. 
                                        if (search_end >= total_count):
                                                print ("adding")
                                                session.write_transaction(neo4j_queries.update_attribute_of_artifact, package_group, package_artifact, "dependentsearch", "completed")
                                                print ("added")
                                except Exception:
                                        traceback.print_exc()
                                        session.write_transaction(neo4j_queries.update_attribute_of_artifact, package_group, package_artifact, "dependentsearch", "failed")
                except Exception:
                        traceback.print_exc()
                finally:
                        driver.close()
                        return
        except:
                print("Error occurred parsing package " + str(package_group or '') + "." + str(package_artifact or ''))
                return
Ejemplo n.º 15
0
def createTreeFromEdges(edges, vertices, group, project, sub_node_label,
                        sub_node_id):
    nodes = {}
    forest = []

    driver = utils.get_neo4j()
    with driver.session() as session:
        result = session.read_transaction(dependent_method_usage, group,
                                          project, sub_node_label, sub_node_id)

        node_usages = {}
        for record in result:
            node = record.get('node')

            object_to_return = {}
            object_to_return['label'] = list(getattr(node, '_labels'))[0]
            object_to_return['id'] = getattr(node, '_properties').get('id')
            object_to_return['usage'] = record.get("usage")
            object_to_return['project'] = getattr(record.get("proj"),
                                                  '_properties').get('id')
            object_to_return['distinct_usage'] = record.get("usage_dist")
            object_to_return['properties'] = getattr(node, '_properties')
            object_to_return['name'] = "{}: {}".format(
                object_to_return.get('label'), object_to_return.get('id'))
            node_usages[object_to_return['id']] = object_to_return

    driver.session().close()

    for node_id in vertices.keys():
        nodes[node_id] = {
            'id': node_id,
            "name": getattr(vertices[node_id], '_properties').get('id'),
            "properties": getattr(vertices[node_id], '_properties'),
            "label": list(getattr(vertices[node_id], '_labels'))[0],
            "size": 1,
            "children": []
        }
        id = nodes[node_id]["properties"]["id"]
        nodes[node_id]["id"] = getattr(vertices[node_id],
                                       '_properties').get('id')
        nodes[node_id]["name"] = nodes[node_id]["id"].split('.')[-1]
        nodes[node_id]["size"] = node_usages[id]["usage"]
        nodes[node_id]["value"] = node_usages[id]["usage"]
        nodes[node_id]["usage"] = node_usages[id]["usage"]
        nodes[node_id]["distinct_usage"] = node_usages[id]["distinct_usage"]
        nodes[node_id]["label"] = node_usages[id]["label"]
        nodes[node_id]["project"] = node_usages[id]["project"]
        forest.append(nodes[node_id])

    # Must remove shortest paths to ensure there is no duplication. Java hiearchy naming can be used to ensure only direct children are linked.
    # The issue is that project names at the top level don't follow this pattern, so extra work must be done to identify which children are direct children of the
    # project level.
    roots = []
    for i in edges:
        parent_id, child_id = i
        if nodes[parent_id]["project"] == nodes[parent_id]["id"]:
            roots.append(nodes[child_id]["id"])

    tmp = roots.copy()
    for outer_root in tmp:
        roots = [
            root for root in roots
            if not root.startswith(outer_root) or root == outer_root
        ]

    for i in edges:
        parent_id, child_id = i

        if nodes[parent_id]["project"] == nodes[parent_id]["id"] and nodes[
                child_id]["id"] in roots or nodes[parent_id][
                    "id"] + '.' + nodes[child_id]["id"].split(
                        ".")[-1] == nodes[child_id]["id"]:

            node = nodes[child_id]
            parent = nodes[parent_id]

            parent['children'].append(node)

            if (node in forest):
                forest.remove(node)

    #forest is now a graph, with a single root vertex
    return forest