예제 #1
0
    def crawl(self, repository_id, pull_request_id):
        """
        Entry point for this class
        """
        if (repository_id is None) or (pull_request_id is None):
            print("could not get work item one of the id's was None")
            print(repository_id)
            print(pull_request_id)
            return

        graph = GraphBuilder().GetNewGraph()
        pull_request = PullRequest.select(graph, pull_request_id).first()
        if pull_request is None:
            print("Could not continue, pullrequest was not in db")
            return
        url = self.pull_request_workitems_url(repository_id, pull_request.Id)
        data = self.get_data(url)
        if data is None:
            return
        if "value" not in data:
            logging.info("no work items linked")
            return
        for raw in data["value"]:
            work_item = self.make_work_item(raw)
            if work_item is not None:
                self.link_to_pull_request(work_item, pull_request)
                self.fill_in_the_rest(work_item, graph)
                transaction = graph.begin()
                transaction.merge(work_item)
                transaction.graph.push(work_item)
 def __init__(self):
     """
     class init
     """
     self.graph = GraphBuilder().GetNewGraph()
     self.config = configparser.ConfigParser()
     self.config.read_file(open('default.cfg'))
예제 #3
0
    def crawl(self, project_name, url=None):
        """
        This method will be recursive since we have to follow a url at the end of each request.

        """
        if project_name is None:
            print("ProjectId is needed to link work items")
            return

        if url is None:
            url = self.get_url(project_name)

        data = self.vsts.make_request(url)
        if data is None:
            return
        if "values" not in data:
            logging.info("no work items linked")
            return
        for raw in data["values"]:
            graph = GraphBuilder().GetNewGraph()
            r = self.build_relationship(graph, raw)
            print("adding workitem and relationships")
            graph.create(r)

        if data.get("nextLink"):
            if data.get("isLastBatch"):
                print("reached the end of linked work items for project " +
                      project_name)
                return
            next_url = data["nextLink"]
            self.crawl(project_name, next_url)
예제 #4
0
 def get_pull_request_ids(self, repository_id):
     """
     Get list of pull request ids
     """
     graph = GraphBuilder().GetNewGraph()
     qry = "MATCH (n:Repository{{Id:'{}'}}) -[]-(r:PullRequest) RETURN r.Id as Id ".format(
         repository_id)
     pull_reqs = list(graph.run(qry))
     ids = []
     for _id in pull_reqs:
         ids.append(_id.get("Id"))
     pull_reqs = None
     return ids
예제 #5
0
 def get_repository_ids(self, project_name):
     """
     get list of repository ids
     """
     graph = GraphBuilder().GetNewGraph()
     repo_qry = "MATCH (n:Repository)-[]-(p:Project{{Name:'{}'}}) return n.Id as Id".format(
         project_name)
     repo_ids = list(graph.run(repo_qry))
     ids = []
     for _id in repo_ids:
         ids.append(_id.get("Id"))
     repo_ids = None
     return ids
예제 #6
0
  def __init__(self):

    self.ctpn = CTPN();
    self.parser = OutputParser();
    self.graph_builder = GraphBuilder();
    if exists(join('model', 'ctpn.h5')):
      self.ctpn = tf.keras.models.load_model(join('model','ctpn.h5'), compile = False);
예제 #7
0
 def get_pull_request_ids(self, project_name):
     """
     from neo4j get all of the pull request id's for a given project.
     """
     graph = GraphBuilder().GetNewGraph()
     qry = '''MATCH (pr:PullRequest)-[]-
              (r:Repository)-[]-(p:Project{{Name:"{}"}})
              RETURN pr.Id as Id'''.format(project_name)
     print(qry)
     raw_pull_request_ids = list(graph.run(qry))
     ids = []
     for item in raw_pull_request_ids:
         ids.append(item.get("Id"))
     #freeing up memory, not sure if this is very pythonic or not.
     raw_pull_request_ids = None
     return ids
예제 #8
0
 def crawl(self, raw_data):
     """
     starts doing the crawling work
     """
     graph = GraphBuilder().GetNewGraph()
     proj = self.map_and_save_project(raw_data, graph)
     if proj is not None:
         self.add_teams_to_repo(proj, graph)
     print("Finished Adding Projects Teams and Users")
예제 #9
0
 def crawl(self, pull_request_id):
     '''
     Crawls the comments and puts them in Neo4J
     '''
     graph = GraphBuilder().GetNewGraph()
     pull_request = PullRequest.select(graph, pull_request_id).first()
     for repo in pull_request.ForRepository:
         self.copy_over_comments(repo.Id, pull_request)
     print("finished adding comments")
예제 #10
0
    def copy_over_comments(self, repository_id, pull_request):
        '''
        Copy VSTS Comments to VSTS
        '''
        print("adding comments for pull_request_id" + str(pull_request.Id))
        url = self.generate_vsts_url(repository_id, pull_request.Id)
        data = self.get_vsts_comments(url)
        if data is None:
            logging.warning("no comments from vsts for pull request " + pull_request.Id)
            return

        for item in data["value"]:
            graph = GraphBuilder().GetNewGraph()
            #vsts comment thread not python thread
            thread = self.make_thread_node(item, graph)
            print("working thread " + str(thread.Id))
            for raw_comment in item.get("comments"):
                if self.exclude_system_comments and not self.is_user_comment(raw_comment):
                    continue
                else:
                    thread.PartOf.add(pull_request)
                    comment = self.make_comment_node(raw_comment, thread.Id, graph, url)
                    print("saving comment " + str(comment.Id))
                    graph.merge(comment)
                    print("saved comment " + str(comment.Id))
                    #this should save the therad too
                    comment.PartOf.add(thread)
                    self.link_to_parent_comment(comment, raw_comment, thread.Id, graph)
                    self.link_to_author(comment, raw_comment, graph)
                    graph.push(comment)
                    print("added links for comment " + str(comment.Id))
예제 #11
0
    def crawl(self, project_name):
        """
        Gets Repositories for a given project
        """
        url = (
            "%s/DefaultCollection/%s/_apis/git/repositories?api-version=%s" %
            (self.instance, project_name, self.api_version))
        data = self.vsts.make_request(url)

        for r in data["value"]:
            graph = GraphBuilder().GetNewGraph()
            #print(r["id"])
            repo = Repository()
            repo.Id = r.get("id")
            repo.Name = r.get("name")
            repo.Url = r.get("url")

            raw_proj = r.get("project")
            proj = Project()
            proj.Id = raw_proj.get("id")
            proj.Name = raw_proj.get("name")
            proj.Url = raw_proj.get("url")

            repo_proj = Project.select(graph, proj.Id)
            '''todo: may not need to do this.'''
            if repo_proj is not None:
                proj_tx = graph.begin()
                proj_tx.create(proj)
                proj_tx.commit()

            repo.BelongsTo.add(proj)
            print("Adding Repo: ")
            print(repo.Name)
            transaction = graph.begin()
            transaction.merge(repo)
            transaction.graph.push(repo)
        print("Finished mapping repos")
예제 #12
0
    def crawl(self, project_name):
        '''
        For a single project, gets the pull requests
            from VSTS and saves them to a neo4j database instance
        The list of repositories comes from neo4j, so that import must be done first.

        :param project_name:
        '''

        graph = GraphBuilder().GetNewGraph()
        repo_ids = self.get_repo_ids(graph, project_name)
        for repo_id in repo_ids:
            skip = 0  #part of vsts pagination
            while True:
                url = self.get_vsts_pull_request_url(project_name, repo_id,
                                                     skip)
                raw_pulls = self.vsts.make_request(url)
                if not self.has_data_to_parse(raw_pulls):
                    break
                skip = skip + self.num_per_request  #increment pagination for vsts api call
                for raw_pull_req in raw_pulls["value"]:
                    self.map_and_save_pull_request(graph, raw_pull_req)

        print("Ending PullRequest Crawl for Project " + project_name)
class PostProcessingCommands(object):
    """
    Adds extra goodness to the Neo4j data model after the data has been imported.
    """
    def __init__(self):
        """
        class init
        """
        self.graph = GraphBuilder().GetNewGraph()
        self.config = configparser.ConfigParser()
        self.config.read_file(open('default.cfg'))

    @property
    def developer_names(self):
        """
        List of developers to add a label for in Neo4j
        """
        devs = self.config['DEFAULT']['developer_names'].replace(
            '"', '').replace("\r", '').replace("\n", '').split(",")
        return devs

    @property
    def data_developers(self):
        devs = self.config['DEFAULT']['database_developers'].replace(
            '"', '').replace("\r", '').replace("\n", '').split(",")
        return devs

    def add_bug_label(self):
        """
        Finds work items of type bug and adds the label of bug.
        This makes it easier to query and also visualize the various work item types.
        """
        qry = """MATCH (b:WorkItem{WorkItemType:'Bug'})
                set b :Bug
                return count(b)"""
        self.graph.run(qry)
        print("Added Bug label to work items")

    def add_user_story_label(self):
        """
        Finds work items of type User Story and adds the label of UserStory.
        This makes it easier to query and also visualize the various work item types.
        """
        qry = """MATCH (n:WorkItem{WorkItemType:'User Story'})
                set n :UserStory
                return count(n)"""
        self.graph.run(qry)
        print("Added User Story label to work items")

    def add_tasks_label(self):
        """
        Finds work items of type Task and adds the label of Task.
        This makes it easier to query and also visualize the various work item types.
        """
        qry = """MATCH (n:WorkItem{WorkItemType:'Task'})
                set n :Task
                return count(n)"""
        self.graph.run(qry)
        print("Added Task label to work items")

    def add_created_timestamp(self):
        """
        Finds all nodes with a CreatedDate and adds a CreatedTimestap
        """
        qry = """MATCH (n)
                Where exists( n.CreatedDate)
                set n.CreatedTimestamp = apoc.date.parse(left(replace(n.CreatedDate,"T"," "),19),"ms","yyyy-MM-dd HH:mm:ss")
                return count(n) as n"""
        result = self.graph.evaluate(qry)
        print("Added CreatedTimestamps: Records Changed: {}".format(result))

    def add_creation_timestamp(self):
        """
        creation instead of created, but sticks with the CreatedTimestamp vs CreationTimestap
        Finds all nodes with a Creation and adds a CreatedTimestap
        """
        qry = """MATCH (n)
                Where exists( n.CreationDate)
                set n.CreatedTimestamp = apoc.date.parse(left(replace(n.CreationDate,"T"," "),19),"ms","yyyy-MM-dd HH:mm:ss")
                return count(n) as n"""
        result = self.graph.evaluate(qry)
        print("Added CreatedTimestamps for CreationDate: Records Changed: {}".
              format(result))

    def add_closed_timestamp(self):
        """
        Finds all nodes with a ClosedDate and adds a ClosedTimestap
        """
        qry = """MATCH (n)
                Where exists( n.ClosedDate)
                set n.ClosedTimestamp = apoc.date.parse(left(replace(n.ClosedDate,"T"," "),19),"ms","yyyy-MM-dd HH:mm:ss")
                return count(n)"""
        result = self.graph.evaluate(qry)
        print("Added ClosedTimestamps: Records Changed: {}".format(result))

    def add_published_timestamp(self):
        """
        Finds all nodes with a PublishedDate and adds a PublishedTimestap
        """
        qry = """MATCH (n)
                Where exists( n.PublishedDate)
                set n.PublishedTimestamp = apoc.date.parse(left(replace(n.PublishedDate,"T"," "),19),"ms","yyyy-MM-dd HH:mm:ss")
                return count(n)"""
        result = self.graph.evaluate(qry)
        print("Added PublishedTimestap: Records Changed: {}".format(result))

    def add_developer_label(self):
        """
        Given a list of names adds a label of dev
        """
        developer_names = self.developer_names
        for dev in developer_names:
            qry = """MATCH (n:Person{{Name:"{}"}})
                    set n :Developer
                    """.format(dev)
            self.graph.run(qry)
        print("Added Developers labels to devlist")

    def add_database_developer_label(self):
        """
        Given a list of names adds a label of dev
        """

        for dev in self.data_developers:
            qry = """MATCH (n:Person{{Name:"{}"}})
                    set n :DatabaseDev
                    """.format(dev)
            self.graph.run(qry)
        print("Added Developers labels to devlist")

    def run_all_commands(self):
        """
        Runs all the commands
        """
        print("Executing post processing commands")
        self.add_developer_label()
        self.add_database_developer_label()
        self.add_bug_label()
        self.add_user_story_label()
        self.add_tasks_label()
        self.add_created_timestamp()
        self.add_creation_timestamp()
        self.add_closed_timestamp()
        self.add_published_timestamp()
        print("Finished running post processing commands")
예제 #14
0
    def crawl(self, raw_data):
        """
        starts doing the crawling work
        """
        graph = GraphBuilder().GetNewGraph()
        proj = self.map_and_save_project(raw_data, graph)
        if proj is not None:
            self.add_teams_to_repo(proj, graph)
        print("Finished Adding Projects Teams and Users")

if __name__ == '__main__':
    print("starting Projects Teams and Users")
    #set to false for easier debugging, but it is slower
    RUN_MULTITHREADED = True

    GRAPH = GraphBuilder()
    GRAPH.create_unique_constraints()

    VSTS = VstsInfo(None, None, ignore_cache=True)

    #tod clean up this signature mess and just pass in VSTS
    WORKER = ProjectsTeamsUsersWorker(VSTS.get_request_settings(), VSTS.project_whitelist, VSTS)
    PROJECTS_URL = WORKER.get_vsts_projects_url()
    RAW = WORKER.vsts.make_request(PROJECTS_URL)
    PROJECTS = RAW["value"]

    if RUN_MULTITHREADED:
        with Pool(5) as p:
            p.map(WORKER.crawl, PROJECTS)
    else:
        for PROJ in PROJECTS: