コード例 #1
0
ファイル: koji.py プロジェクト: vaibhav19sharma/estuary-api
    def get_koji_builds(self, start_date, end_date):
        """
        Query Teiid for Koji builds.

        :param datetime.datetime start_date: determines when to start the query
        :param datetime.datetime end_date: determines until when to scrape data
        :return: a list of dictionaries
        :rtype: list
        """
        # SQL query to fetch all builds from start date until now
        log.info('Getting all Koji builds since {0} until {1}'.format(start_date, end_date))
        sql_query = """
            SELECT
                events.time as creation_time,
                build.completion_time,
                build.epoch,
                build.extra,
                build.id,
                brew.users.name as owner_name,
                brew.users.krb_principal as owner_username,
                package.name as package_name,
                build.release,
                build.start_time,
                build.state,
                build.task_id,
                build.version
            FROM build
            LEFT JOIN events ON build.create_event = events.id
            LEFT JOIN package ON build.pkg_id = package.id
            LEFT JOIN brew.users ON build.owner = brew.users.id
            WHERE events.time IS NOT NULL AND events.time >= '{0}' AND events.time <= '{1}'
            ORDER BY build.id
            """.format(start_date, end_date)

        return self.teiid.query(sql=sql_query)
コード例 #2
0
ファイル: bugzilla.py プロジェクト: pombredanne/estuary-api
    def get_bugzilla_bugs(self, start_date, end_date):
        """
        Get the Buzilla bugs information from Teiid.

        :param datetime.datetime start_date: when to start scraping data from
        :param datetime.datetime end_date: determines until when to scrape data
        :return: list of dictionaries containing bug info
        :rtype: list
        """
        log.info('Getting all Bugzilla bugs since {0} until {1}'.format(start_date, end_date))
        sql_query = """
            SELECT bugs.*, products.name AS product_name, classifications.name AS classification,
                assigned.login_name AS assigned_to_email, reported.login_name AS reported_by_email,
                qa.login_name AS qa_contact_email
            FROM bugzilla.bugs AS bugs
            LEFT JOIN bugzilla.products AS products ON bugs.product_id = products.id
            LEFT JOIN bugzilla.classifications AS classifications
                ON products.classification_id = classifications.id
            LEFT JOIN bugzilla.profiles AS assigned ON bugs.assigned_to = assigned.userid
            LEFT JOIN bugzilla.profiles AS reported ON bugs.reporter = reported.userid
            LEFT JOIN bugzilla.profiles AS qa ON bugs.qa_contact = qa.userid
            WHERE classifications.name = 'Red Hat' AND bugs.delta_ts >= '{0}'
                AND bugs.delta_ts <= '{1}'
            ORDER BY bugs.creation_ts DESC;
            """.format(start_date, end_date)

        return self.teiid.query(sql=sql_query)
コード例 #3
0
ファイル: freshmaker.py プロジェクト: pombredanne/estuary-api
    def run(self, since=None, until=None):
        """
        Run the Freshmaker scraper.

        :param str since: a datetime to start scraping data from
        :param str until: a datetime to scrape data until
        """
        if since or until:
            log.warn('Ignoring the since/until parameter; They do not apply to the'
                     'Freshmaker scraper')
        log.info('Starting initial load of Freshmaker events')
        self.query_api_and_update_neo4j()
        log.info('Initial load of Freshmaker events complete!')
コード例 #4
0
ファイル: errata.py プロジェクト: vaibhav19sharma/estuary-api
    def get_advisories(self, since, until):
        """
        Query Teiid for the Errata Tool advisories.

        :param datetime.datetime since: determines when to start querying
        :param datetime.datetime until: determines until when to scrape data
        :return: a list of dictionaries
        :rtype: list
        """
        sql = """\
            SELECT
                main.actual_ship_date,
                main.fulladvisory as advisory_name,
                assigned_users.login_name AS assigned_to,
                main.content_types,
                states.current as state,
                main.created_at,
                main.id AS id,
                main.issue_date,
                package_users.login_name AS package_owner,
                products.name as product_name,
                products.short_name as product_short_name,
                main.release_date,
                reporter_users.login_name AS reporter,
                main.security_impact,
                main.security_sla,
                main.status_updated_at AS status_time,
                main.synopsis,
                main.errata_type AS type,
                main.update_date,
                main.updated_at
            FROM Errata_public.errata_main AS main
            LEFT JOIN Errata_public.state_indices as states
                ON main.current_state_index_id = states.id
            LEFT JOIN Errata_public.errata_products as products
                ON main.product_id = products.id
            LEFT JOIN Errata_public.users AS assigned_users
                ON main.assigned_to_id = assigned_users.id
            LEFT JOIN Errata_public.users AS package_users
                ON main.package_owner_id = package_users.id
            LEFT JOIN Errata_public.users AS reporter_users
                ON main.reporter_id = reporter_users.id
            WHERE main.updated_at >= '{0}' AND main.updated_at <= '{1}'
            ORDER BY main.id;
        """.format(since, until)
        log.info('Getting Errata advisories since {0} until {1}'.format(
            since, until))
        return self.teiid.query(sql)
コード例 #5
0
ファイル: errata.py プロジェクト: vaibhav19sharma/estuary-api
    def get_attached_bugs(self, advisory_id):
        """
        Query Teiid to find the Bugzilla bugs attached to a specific advisory.

        :param int advisory_id: the advisory ID
        :return: a list of a dictionaries
        :rtype: list
        """
        sql = """\
            SELECT filed_bugs.bug_id as id_
            FROM Errata_public.filed_bugs as filed_bugs
            WHERE filed_bugs.errata_id = {0};
        """.format(advisory_id)
        log.info(
            'Getting Bugzilla bugs tied to the advisory with ID {0}'.format(
                advisory_id))
        return self.teiid.query(sql)
コード例 #6
0
ファイル: errata.py プロジェクト: vaibhav19sharma/estuary-api
    def get_advisory_states(self, advisory_id):
        """
        Query Teiid to find the states of a specific advisory.

        :param int advisory_id: the advisory ID
        :return: a list of a dictionaries
        :rtype: list
        """
        sql = """\
            SELECT states.created_at, states.id, states.current as name, states.updated_at,
                users.login_name AS username
            FROM Errata_public.state_indices as states
            LEFT JOIN Errata_public.users as users ON states.who_id = users.id
            WHERE errata_id = {}
            ORDER BY states.id;
        """.format(advisory_id)
        log.info('Getting states tied to the advisory with ID {0}'.format(
            advisory_id))
        return self.teiid.query(sql)
コード例 #7
0
    def update_neo4j(self, bugs):
        """
        Update Neo4j with Bugzilla bugs information from Teiid.

        :param list bugs: a list of dictionaries
        """
        log.info('Beginning to upload data to Neo4j')
        count = 0

        for bug_dict in bugs:
            bug = BugzillaBug.create_or_update(
                dict(id_=bug_dict['bug_id'],
                     severity=bug_dict['bug_severity'],
                     status=bug_dict['bug_status'],
                     creation_time=bug_dict['creation_ts'],
                     modified_time=bug_dict['delta_ts'],
                     priority=bug_dict['priority'],
                     product_name=bytes(bug_dict['product_name'],
                                        'utf-8').decode(),
                     product_version=bug_dict['version'],
                     classification=bug_dict['classification'],
                     resolution=bug_dict['resolution'],
                     target_milestone=bug_dict['target_milestone'],
                     votes=bug_dict['votes'],
                     short_description=bytes(bug_dict['short_desc'],
                                             'utf-8').decode()))[0]

            count += 1
            log.info('Uploaded {0} bugs out of {1}'.format(count, len(bugs)))

            # Creating User nodes and updating their relationships
            if bug_dict['assigned_to']:
                assignee = self.create_user_node(bug_dict['assigned_to_email'])
                bug.conditional_connect(bug.assignee, assignee)

            if bug_dict['reporter']:
                reporter = self.create_user_node(bug_dict['reported_by_email'])
                bug.conditional_connect(bug.reporter, reporter)

            if bug_dict['qa_contact']:
                qa_contact = self.create_user_node(
                    bug_dict['qa_contact_email'])
                bug.conditional_connect(bug.qa_contact, qa_contact)
コード例 #8
0
ファイル: errata.py プロジェクト: vaibhav19sharma/estuary-api
    def get_associated_builds(self, advisory_id):
        """
        Query Teiid to find the Brew builds associated with a specific advisory.

        :param int advisory_id: the advisory ID
        :return: a list of a dictionaries
        :rtype: list
        """
        sql = """\
            SELECT brew_builds.id as id_, packages.name, brew_builds.release, removed_index_id,
                brew_builds.version
            FROM Errata_public.errata_brew_mappings as brew_mappings
            LEFT JOIN Errata_public.brew_builds AS brew_builds
                ON brew_builds.id = brew_mappings.brew_build_id
            LEFT JOIN Errata_public.packages AS packages
                ON brew_builds.package_id = packages.id WHERE errata_id = {0};
        """.format(advisory_id)
        log.info('Getting Brew builds tied to the advisory with ID {0}'.format(
            advisory_id))
        return self.teiid.query(sql)
コード例 #9
0
    def get_distgit_data(self, since, until):
        """
        Query Teiid for the dist-git commit and Bugzilla information.

        :param datetime.datetime since: determines when to start the query
        :param datetime.datetime until: determines until when to scrape data
        :return: a list of dictionaries
        :rtype: list
        """
        sql = """\
            SELECT c.commit_id, c.author, c.author_date, c.commit_date, c.log_message,
                c.sha, bz.bugzilla_id, bz.type as bugzilla_type, p.module, p.ref
            FROM gitbz.git_commits as c
            LEFT JOIN gitbz.git_push_commit_map as map ON c.commit_id = map.commit_id
            LEFT JOIN gitbz.git_pushes as p ON p.push_id = map.push_id
            LEFT JOIN gitbz.redhat_bugzilla_references as bz ON c.commit_id = bz.commit_id
            WHERE c.commit_date >= '{0}' AND c.commit_date <= '{1}'
            ORDER BY c.commit_date DESC;
        """.format(since, until)
        log.info('Getting dist-git commits since {0} until {1}'.format(since, until))
        return self.teiid.query(sql)
コード例 #10
0
ファイル: distgit.py プロジェクト: sarah256/estuary-api
    def run(self, since=None, until=None):
        """
        Run the dist-git scraper.

        :param str since: a datetime to start scraping data from
        :param str until: a datetime to scrape data until
        """
        log.info('Starting initial load of dist-git commits')
        if since is None:
            start_date = self.default_since
        else:
            start_date = timestamp_to_date(since)

        if until is None:
            end_date = self.default_until
        else:
            end_date = timestamp_to_date(until)
        results = self.get_distgit_data(start_date, end_date)
        total_results = len(results)
        log.info('Successfully fetched {0} results from Teiid'.format(
            total_results))
        # Overwrite results with the formatted results so we don't have to store both in RAM
        results = list(self._get_result_chunks(results))
        # Upload the results to Neo4j using multi-processing to process chunks of results. We don't
        # use pool so that way the process doesn't get reused and the RAM is returned to the OS.
        # This will aid in a work-around for a memory leak from one of the libraries used that
        # couldn't be tracked down.
        procs = []
        concurrent_procs = 2
        for i, result in enumerate(results):
            # Only check if we've reached the process limit after it's technically possible
            if i >= concurrent_procs:
                active_procs = [_proc for _proc in procs if _proc.is_alive()]
                if len(active_procs) >= concurrent_procs:
                    log.debug(
                        'There are already {0} processes running. Will wait until one of '
                        'them completes.'.format(len(active_procs)))
                    active_procs[0].join()
            proc = Process(target=self._update_neo4j,
                           args=(neomodel_config.DATABASE_URL, total_results,
                                 result))
            proc.start()
            procs.append(proc)

        for proc in procs:
            # Wait for all the processes to finish
            proc.join()
        log.info('Initial load of dist-git commits complete!')
コード例 #11
0
ファイル: bugzilla.py プロジェクト: pombredanne/estuary-api
    def run(self, since=None, until=None):
        """
        Run the Bugzilla scraper.

        :param str since: a datetime to start scraping data from
        :param str until: a datetime to scrape data until
        """
        log.info('Starting initial load of Bugzilla bugs')
        if since is None:
            start_date = self.default_since
        else:
            start_date = timestamp_to_date(since)
        if until is None:
            end_date = self.default_until
        else:
            end_date = timestamp_to_date(until)

        bugs = self.get_bugzilla_bugs(start_date, end_date)
        log.info('Successfully fetched {0} bugs from teiid'.format(len(bugs)))
        self.update_neo4j(bugs)
        log.info('Initial load of Bugzilla bugs complete!')
コード例 #12
0
ファイル: errata.py プロジェクト: vaibhav19sharma/estuary-api
    def run(self, since=None, until=None):
        """
        Run the Errata Tool scraper.

        :param str since: a datetime to start scraping data from
        :param str until: a datetime to scrape data until
        """
        log.info('Starting initial load of Errata advisories')
        if since is None:
            start_date = self.default_since
        else:
            start_date = timestamp_to_date(since)

        if until is None:
            end_date = self.default_until
        else:
            end_date = timestamp_to_date(until)
        advisories = self.get_advisories(start_date, end_date)
        log.info('Successfully fetched {0} advisories from Teiid'.format(
            len(advisories)))
        self.update_neo4j(advisories)
        log.info('Initial load of Errata advisories complete!')
コード例 #13
0
    def run(self, since=None, until=None):
        """
        Run the dist-git scraper.

        :param str since: a datetime to start scraping data from
        :param str until: a datetime to scrape data until
        """
        log.info('Starting initial load of dist-git commits and pushes')
        if since is None:
            start_date = self.default_since
        else:
            start_date = timestamp_to_date(since)

        if until is None:
            end_date = self.default_until
        else:
            end_date = timestamp_to_date(until)
        results = self.get_distgit_data(start_date, end_date)
        log.info('Successfully fetched {0} results from Teiid'.format(
            len(results)))
        self.update_neo4j(results)
        log.info('Initial load of dist-git commits and pushes complete!')
コード例 #14
0
ファイル: koji.py プロジェクト: vaibhav19sharma/estuary-api
    def run(self, since=None, until=None):
        """
        Run the Koji scraper.

        :param str since: a datetime to start scraping data from
        :param str until: a datetime to scrape data until
        """
        log.info('Starting initial load for Koji')
        # Initialize a start date from which all builds must be fetched
        # If no input is given by the user, fetch builds from the past two years
        if since is None:
            start_date = self.default_since
        else:
            start_date = utils.timestamp_to_date(since)

        if until is None:
            end_date = self.default_until
        else:
            end_date = utils.timestamp_to_date(until)
        builds = self.get_koji_builds(start_date, end_date)
        log.info('Successfully fetched {0} builds from teiid'.format(len(builds)))
        self.update_neo4j(builds)
        log.info('Initial load of Koji builds complete!')
コード例 #15
0
ファイル: errata.py プロジェクト: vaibhav19sharma/estuary-api
    def update_neo4j(self, advisories):
        """
        Update Neo4j with Errata Tool advisories from Teiid.

        :param list advisories: a list of dictionaries of advisories
        """
        count = 0
        for advisory in advisories:
            count += 1
            log.info('Processing advisory {0}/{1}'.format(
                count, len(advisories)))
            # The content_types column is a string with YAML in it, so convert it to a list
            content_types = yaml.safe_load(advisories[0]['content_types'])
            adv = Advisory.create_or_update({
                'actual_ship_date':
                advisory['actual_ship_date'],
                'advisory_name':
                advisory['advisory_name'],
                'content_types':
                content_types,
                'created_at':
                advisory['created_at'],
                'id_':
                advisory['id'],
                'issue_date':
                advisory['issue_date'],
                'product_name':
                advisory['product_name'],
                'product_short_name':
                advisory['product_short_name'],
                'release_date':
                advisory['release_date'],
                'security_impact':
                advisory['security_impact'],
                'security_sla':
                advisory['security_sla'],
                'state':
                advisory['state'],
                'status_time':
                advisory['status_time'],
                'synopsis':
                advisory['synopsis'],
                'type_':
                advisory['type'],
                'update_date':
                advisory['update_date'],
                'updated_at':
                advisory['updated_at']
            })[0]
            assigned_to = User.get_or_create(
                {'username': advisory['assigned_to'].split('@')[0]})[0]
            adv.conditional_connect(adv.assigned_to, assigned_to)
            package_owner = User.get_or_create(
                {'username': advisory['package_owner'].split('@')[0]})[0]
            adv.conditional_connect(adv.package_owner, package_owner)
            reporter = User.get_or_create(
                {'username': advisory['reporter'].split('@')[0]})[0]
            adv.conditional_connect(adv.reporter, reporter)

            for state in self.get_advisory_states(advisory['id']):
                adv_state = AdvisoryState.create_or_update({
                    'id_':
                    state['id'],
                    'name':
                    state['name'],
                    'created_at':
                    state['created_at'],
                    'updated_at':
                    state['updated_at']
                })[0]
                adv_state.conditional_connect(adv_state.advisory, adv)
                state_creator = User.get_or_create(
                    {'username': state['username'].split('@')[0]})[0]
                adv_state.conditional_connect(adv_state.creator, state_creator)

            for attached_bug in self.get_attached_bugs(advisory['id']):
                bug = BugzillaBug.get_or_create(attached_bug)[0]
                adv.attached_bugs.connect(bug)

            for associated_build in self.get_associated_builds(advisory['id']):
                # If this is set, that means it was once part of the advisory but not anymore.
                # This relationship needs to be deleted if it exists.
                if associated_build['removed_index_id']:
                    build = KojiBuild.nodes.get_or_none(
                        id_=associated_build['id_'])
                    if build:
                        adv.attached_builds.disconnect(build)
                else:
                    # This key shouldn't be stored in Neo4j
                    del associated_build['removed_index_id']
                    build = KojiBuild.get_or_create(associated_build)[0]
                    adv.attached_builds.connect(build)
コード例 #16
0
ファイル: koji.py プロジェクト: vaibhav19sharma/estuary-api
    def update_neo4j(self, builds):
        """
        Update Neo4j with Koji build information from Teiid.

        :param list builds: a list of dictionaries
        """
        # Uploads builds data to their respective nodes
        log.info('Beginning to upload data to Neo4j')
        count = 0

        for build_dict in builds:
            build_params = dict(
                id_=build_dict['id'],
                epoch=build_dict['epoch'],
                state=build_dict['state'],
                creation_time=build_dict['creation_time'],
                start_time=build_dict['start_time'],
                completion_time=build_dict['completion_time'],
                extra=build_dict['extra'],
                name=build_dict['package_name'],
                version=build_dict['version'],
                release=build_dict['release']
            )

            package_name = build_dict['package_name']
            try:
                extra_json = json.loads(build_dict['extra'])
            except (ValueError, TypeError):
                extra_json = {}

            container_build = False
            # Checking a heuristic for determining if a build is a container build since, currently
            # there is no definitive way to do it.
            if extra_json and extra_json.get('container_koji_build_id'):
                container_build = True
            # Checking another heuristic for determining if a build is a container build since
            # currently there is no definitive way to do it.
            elif (package_name.endswith('-container') or package_name.endswith('-docker')):
                container_build = True

            if container_build:
                build = ContainerKojiBuild.create_or_update(build_params)[0]
            else:
                build = KojiBuild.create_or_update(build_params)[0]

            if build_dict['owner_username']:
                username = build_dict['owner_username'].split('@')[0]
            else:
                username = build_dict['owner_name']
            user = User.get_or_create(dict(username=username))[0]
            build.conditional_connect(build.owner, user)

            tags = self.get_build_tags(build_dict['id'])
            current_tag_ids = set()
            for _tag in tags:
                current_tag_ids.add(_tag['tag_id'])
                tag = KojiTag.create_or_update(dict(
                    id_=_tag['tag_id'],
                    name=_tag['tag_name']
                ))[0]

                tag.builds.connect(build)

            # _tag.id_ must be cast as an int because it is stored as a string in Neo4j since
            # it's a UniqueIdProperty
            connected_tags = {int(_tag.id_): _tag for _tag in build.tags.all()}
            extra_connected_tag_ids = set(connected_tags.keys()) - current_tag_ids
            for tag_id in extra_connected_tag_ids:
                build.tags.disconnect(connected_tags[tag_id])

            count += 1
            log.info('Uploaded {0} builds out of {1}'.format(count, len(builds)))

            try:
                extra_json = json.loads(build_dict['extra'])
            except (ValueError, TypeError):
                extra_json = {}

            container_koji_task_id = extra_json.get('container_koji_task_id')
            if build_dict['task_id']:
                task_id = build_dict['task_id']
            elif container_koji_task_id:
                task_id = container_koji_task_id
            else:
                # Continue if the task_id is None
                continue
            # Getting task related to the current build
            task_dict = self.get_task(task_id)[0]
            xml_root = ET.fromstring(task_dict['request'])
            commit_hash = None
            for child in xml_root.iter('string'):
                if child.text and child.text.startswith('git'):
                    commit_hash = child.text.rsplit('#', 1)[1]
                    break

            if not task_dict:
                # Continue if no corresponding task found
                continue

            task = KojiTask.create_or_update(dict(
                id_=task_dict['id'],
                weight=task_dict['weight'],
                create_time=task_dict['create_time'],
                start_time=task_dict['start_time'],
                completion_time=task_dict['completion_time'],
                state=task_dict['state'],
                priority=task_dict['priority'],
                arch=task_dict['arch'],
                method=task_dict['method']
            ))[0]

            # Defining Relationships
            task.builds.connect(build)
            task.conditional_connect(task.owner, user)
            if commit_hash:
                commit = DistGitCommit.get_or_create(dict(hash_=commit_hash))[0]
                build.conditional_connect(build.commit, commit)

            child_tasks = self.get_task_children(task_dict['id'])

            if not child_tasks:
                # Continue if no corresponding child task found
                continue

            for child_task_dict in child_tasks:
                child_task = KojiTask.create_or_update(dict(
                    id_=child_task_dict['id'],
                    weight=child_task_dict['weight'],
                    create_time=child_task_dict['create_time'],
                    start_time=child_task_dict['start_time'],
                    completion_time=child_task_dict['completion_time'],
                    state=child_task_dict['state'],
                    priority=child_task_dict['priority'],
                    arch=child_task_dict['arch'],
                    method=child_task_dict['method']
                ))[0]
                child_task.conditional_connect(child_task.parent, task)
コード例 #17
0
ファイル: distgit.py プロジェクト: sarah256/estuary-api
    def _update_neo4j(neo4j_url, total_results, counter_and_results):
        """
        Update Neo4j results via mapping with multiprocessing.

        :param str neo4j_url: database url for Neo4j
        :param int total_results: the total number of results that will be processed. This is used
        for a logging statement about progress.
        :param tuple counter_and_results: a tuple where the first index is the current counter and
        the second index is a list of dictionaries representing results from Teiid
        """
        try:
            previous_total = counter_and_results[0]
            results = counter_and_results[1]
            # Since _update_neo4j will be run in a separate process, we must configure the database
            # URL every time the method is run.
            neomodel_config.DATABASE_URL = neo4j_url
            # Create a thread pool with 4 threads to speed up queries to cgit
            pool = ThreadPool(4)
            counter = 0
            for result in results:
                if counter % 200 == 0:
                    until = counter + 200
                    if until > len(results):
                        until = len(results)
                    # Because of the joins in the SQL query, we end up with several rows with the
                    # same commit hash and we only want to query cgit once per commit
                    unique_commits = set([(c['module'], c['sha'])
                                          for c in results[counter:until]])
                    log.debug(
                        'Getting the author email addresses from cgit in parallel '
                        'for results {0} to {1}'.format(counter, until))
                    repos_info = {
                        r['commit']: r
                        for r in pool.map(DistGitScraper._get_repo_info,
                                          unique_commits)
                    }
                    # This is no longer needed so it can be cleared to save RAM
                    del unique_commits
                counter += 1
                log.info('Processing commit entry {0}/{1}'.format(
                    previous_total + counter, total_results))
                repo_info = repos_info[result['sha']]
                if not repo_info.get('namespace'):
                    log.info(
                        'Skipping nodes creation with commit ID {0}'.format(
                            result['commit_id']))
                    continue

                log.debug(
                    'Creating nodes associated with commit ID {0}'.format(
                        result['commit_id']))
                repo = DistGitRepo.get_or_create({
                    'namespace':
                    repo_info['namespace'],
                    'name':
                    result['module']
                })[0]
                commit = DistGitCommit.create_or_update({
                    'author_date':
                    result['author_date'],
                    'commit_date':
                    result['commit_date'],
                    'hash_':
                    result['sha'],
                    # In case we get unicode characters in Python 2
                    'log_message':
                    bytes(result['log_message'], 'utf-8').decode()
                })[0]
                bug = BugzillaBug.get_or_create({'id_':
                                                 result['bugzilla_id']})[0]

                log.debug(
                    'Creating the user nodes associated with commit ID {0}'.
                    format(result['commit_id']))
                author = User.create_or_update({
                    'username':
                    repo_info['author_username'],
                    'email':
                    repo_info['author_email']
                })[0]

                log.debug(
                    'Creating the relationships associated with commit ID {0}'.
                    format(result['commit_id']))
                repo.commits.connect(commit)

                commit.conditional_connect(commit.author, author)

                if result['bugzilla_type'] == 'related':
                    commit.related_bugs.connect(bug)
                elif result['bugzilla_type'] == 'resolves':
                    commit.resolved_bugs.connect(bug)
                elif result['bugzilla_type'] == 'reverted':
                    commit.reverted_bugs.connect(bug)
                # This is no longer needed so it can be cleared to save RAM
                del repo_info
        finally:
            # Close the DB connection after this is done processing
            db.driver.close()
コード例 #18
0
    def update_neo4j(self, results):
        """
        Update Neo4j with the dist-git commit and push information from Teiid.

        :param list results: a list of dictionaries
        """
        pool = Pool(processes=8)
        counter = 0
        for result in results:
            if counter % 200 == 0:
                until = counter + 200
                if until > len(results):
                    until = len(results)
                # Because of the joins in the SQL query, we end up with several rows with the same
                # commit hash and we only want to query cgit once per commit
                unique_commits = set([(c['module'], c['sha'])
                                      for c in results[counter:until]])
                log.debug(
                    'Getting the author and committer email addresses from cgit in parallel '
                    'for results {0} to {1}'.format(counter, until))
                repos_info = {}
                for _r in pool.map(DistGitScraper._get_repo_info,
                                   unique_commits):
                    r = json.loads(_r)
                    repos_info[r['commit']] = r
                # This is no longer needed so it can be cleared to save RAM
                del unique_commits
                # A lot of RAM was allocated or used up, so let's call gc.collect() to ensure it
                # is removed
                gc.collect()
            counter += 1
            log.info('Processing commit and push entry {0}/{1}'.format(
                str(counter), str(len(results))))
            repo_info = repos_info[result['sha']]
            if not repo_info.get('namespace'):
                log.info(
                    'Skipping nodes creation with commit ID {0} and push ID {1}'
                    .format(result['commit_id'], result['push_id']))
                continue

            log.debug(
                'Creating nodes associated with commit ID {0} and push ID {1}'.
                format(result['commit_id'], result['push_id']))
            repo = DistGitRepo.get_or_create({
                'namespace':
                repo_info['namespace'],
                'name':
                result['module']
            })[0]
            branch_name = result['ref'].rsplit('/', 1)[1]
            branch = DistGitBranch.get_or_create({
                'name':
                branch_name,
                'repo_namespace':
                repo_info['namespace'],
                'repo_name':
                result['module']
            })[0]
            commit = DistGitCommit.create_or_update({
                'author_date':
                result['author_date'],
                'commit_date':
                result['commit_date'],
                'hash_':
                result['sha'],
                # In case we get unicode characters in Python 2
                'log_message':
                bytes(result['log_message'], 'utf-8').decode()
            })[0]
            push = DistGitPush.get_or_create({
                'id_': result['push_id'],
                'push_date': result['push_date'],
                'push_ip': result['push_ip']
            })[0]
            bug = BugzillaBug.get_or_create({'id_': result['bugzilla_id']})[0]

            log.debug(
                'Creating the user nodes associated with commit ID {0} and push ID {1}'
                .format(result['commit_id'], result['push_id']))
            author = User.create_or_update({
                'username':
                repo_info['author_username'],
                'email':
                repo_info['author_email']
            })[0]
            committer = User.create_or_update({
                'username':
                repo_info['committer_username'],
                'email':
                repo_info['committer_email']
            })[0]
            pusher = User.get_or_create({'username': result['pusher']})[0]

            log.debug(
                'Creating the relationships associated with commit ID {0} and push ID {1}'
                .format(result['commit_id'], result['push_id']))
            repo.contributors.connect(author)
            repo.contributors.connect(committer)
            repo.contributors.connect(pusher)
            repo.commits.connect(commit)
            repo.pushes.connect(push)
            repo.branches.connect(branch)

            branch.contributors.connect(author)
            branch.contributors.connect(committer)
            branch.contributors.connect(pusher)
            branch.commits.connect(commit)
            branch.pushes.connect(push)

            push.conditional_connect(push.pusher, pusher)
            push.commits.connect(commit)

            commit.conditional_connect(commit.author, author)
            commit.conditional_connect(commit.committer, committer)

            if repo_info['parent']:
                parent_commit = DistGitCommit.get_or_create(
                    {'hash_': repo_info['parent']})[0]
                commit.conditional_connect(commit.parent, parent_commit)

            if result['bugzilla_type'] == 'related':
                commit.related_bugs.connect(bug)
            elif result['bugzilla_type'] == 'resolves':
                commit.resolved_bugs.connect(bug)
            elif result['bugzilla_type'] == 'reverted':
                commit.reverted_bugs.connect(bug)
            # This is no longer needed so it can be cleared to save RAM
            del repo_info
コード例 #19
0
    def update_neo4j(self, builds):
        """
        Update Neo4j with Koji build information from Teiid.

        :param list builds: a list of dictionaries
        """
        # Uploads builds data to their respective nodes
        log.info('Beginning to upload data to Neo4j')
        count = 0

        for build_dict in builds:
            build_params = dict(
                id_=build_dict['id'],
                epoch=build_dict['epoch'],
                state=build_dict['state'],
                creation_time=build_dict['creation_time'],
                start_time=build_dict['start_time'],
                completion_time=build_dict['completion_time'],
                name=build_dict['package_name'],
                version=build_dict['version'],
                release=build_dict['release']
            )

            try:
                extra_json = json.loads(build_dict['extra'])
            except (ValueError, TypeError):
                extra_json = {}

            if self.is_container_build(build_dict):
                build_params['operator'] = bool(
                    extra_json.get('typeinfo', {}).get('operator-manifests', {}).get('archive')
                )
                try:
                    build = ContainerKojiBuild.create_or_update(build_params)[0]
                except neomodel.exceptions.ConstraintValidationFailed:
                    # This must have errantly been created as a KojiBuild instead of a
                    # ContainerKojiBuild, so let's fix that.
                    build = KojiBuild.nodes.get_or_none(id_=build_params['id_'])
                    if not build:
                        # If there was a constraint validation failure and the build isn't just the
                        # wrong label, then we can't recover.
                        raise
                    build.add_label(ContainerKojiBuild.__label__)
                    build = ContainerKojiBuild.create_or_update(build_params)[0]
            elif self.is_module_build(build_dict):
                module_extra_info = extra_json.get('typeinfo', {}).get('module')
                try:
                    build_params['context'] = module_extra_info.get('context')
                    build_params['mbs_id'] = module_extra_info.get('module_build_service_id')
                    build_params['module_name'] = module_extra_info.get('name')
                    build_params['module_stream'] = module_extra_info.get('stream')
                    build_params['module_version'] = module_extra_info.get('version')
                    build = ModuleKojiBuild.create_or_update(build_params)[0]
                except neomodel.exceptions.ConstraintValidationFailed:
                    # This must have errantly been created as a KojiBuild instead of a
                    # ModuleKojiBuild, so let's fix that.
                    build = KojiBuild.nodes.get_or_none(id_=build_params['id_'])
                    if not build:
                        # If there was a constraint validation failure and the build isn't just the
                        # wrong label, then we can't recover.
                        raise
                    build.add_label(ModuleKojiBuild.__label__)
                    build = ModuleKojiBuild.create_or_update(build_params)[0]
            else:
                build = KojiBuild.create_or_update(build_params)[0]

            username = build_dict['owner_name']
            user = User.get_or_create(dict(username=username))[0]
            build.conditional_connect(build.owner, user)

            if build.__label__ == ModuleKojiBuild.__label__:
                module_build_tag_name = module_extra_info.get('content_koji_tag')
                if module_build_tag_name:
                    module_components = self.get_tag_info(module_build_tag_name)
                    # Some modules don't have components
                    if module_components:
                        for item in module_components:
                            module_component = KojiBuild.get_or_create(dict(
                                id_=item['build_id']
                            ))[0]
                            build.components.connect(module_component)

                        component_builds = self.get_build_info(
                            [item['build_id'] for item in module_components])
                        self.update_neo4j(component_builds)

            count += 1
            log.info('Uploaded {0} builds out of {1}'.format(count, len(builds)))

            container_koji_task_id = extra_json.get('container_koji_task_id')
            if build_dict['task_id']:
                task_id = build_dict['task_id']
            elif container_koji_task_id:
                task_id = container_koji_task_id
            else:
                # Continue if the task_id is None
                continue
            # Getting task related to the current build
            try:
                task_dict = self.get_task(task_id)[0]
            except IndexError:
                continue

            commit_hash = None
            # Only look for the commit hash if the build is an RPM or container
            if task_dict['method'] in ('build', 'buildContainer'):
                xml_root = ET.fromstring(task_dict['request'])
                for child in xml_root.iter('string'):
                    if child.text and child.text.startswith('git'):
                        commit_hash = child.text.rsplit('#', 1)[1]
                        break

            if commit_hash:
                commit = DistGitCommit.get_or_create(dict(hash_=commit_hash))[0]
                build.conditional_connect(build.commit, commit)
コード例 #20
0
    def update_neo4j(self, advisories):
        """
        Update Neo4j with Errata Tool advisories from Teiid.

        :param list advisories: a list of dictionaries of advisories
        """
        count = 0
        for advisory in advisories:
            count += 1
            log.info('Processing advisory {0}/{1}'.format(
                count, len(advisories)))
            # The content_types column is a string with YAML in it, so convert it to a list
            content_types = yaml.safe_load(advisory['content_types'])
            adv = Advisory.create_or_update({
                'actual_ship_date':
                advisory['actual_ship_date'],
                'advisory_name':
                advisory['advisory_name'],
                'content_types':
                content_types,
                'created_at':
                advisory['created_at'],
                'id_':
                advisory['id'],
                'issue_date':
                advisory['issue_date'],
                'product_name':
                advisory['product_name'],
                'product_short_name':
                advisory['product_short_name'],
                'release_date':
                advisory['release_date'],
                'security_impact':
                advisory['security_impact'],
                'security_sla':
                advisory['security_sla'],
                'state':
                advisory['state'],
                'status_time':
                advisory['status_time'],
                'synopsis':
                advisory['synopsis'],
                'update_date':
                advisory['update_date'],
            })[0]
            container_adv = False

            for associated_build in self.get_associated_builds(advisory['id']):
                # Even if a node has two labels in the database, Neo4j returns the node
                # only with the specific label you asked for. Hence we check for labels
                # ContainerKojiBuild and KojiBuild separately for the same node.
                build = ContainerKojiBuild.nodes.get_or_none(
                    id_=associated_build['id_'])
                if not build:
                    build = KojiBuild.nodes.get_or_none(
                        id_=associated_build['id_'])

                if build and not container_adv:
                    if build.__label__ == 'ContainerKojiBuild':
                        adv.add_label(ContainerAdvisory.__label__)
                        container_adv = True

                # If this is set, that means it was once part of the advisory but not anymore.
                # This relationship needs to be deleted if it exists.
                if associated_build['removed_index_id']:
                    if build:
                        adv.attached_builds.disconnect(build)
                else:
                    # Query Teiid and create the entry only if the build is not present in Neo4j
                    if not build:
                        attached_build = self.get_koji_build(
                            associated_build['id_'])
                        if attached_build:
                            if self.is_container_build(attached_build):
                                build = ContainerKojiBuild.get_or_create(
                                    {'id_': associated_build['id_']})[0]
                            else:
                                build = KojiBuild.get_or_create(
                                    {'id_': associated_build['id_']})[0]

                    # This will happen only if we do not find the build we are looking for in Teiid
                    # which shouldn't usually happen under normal conditions
                    if not build:
                        log.warn(
                            'The Koji build with ID {} was not found in Teiid!'
                            .format(associated_build['id_']))
                        continue

                    if adv.__label__ != ContainerAdvisory.__label__ \
                            and build.__label__ == ContainerKojiBuild.__label__:
                        adv.add_label(ContainerAdvisory.__label__)

                    attached_rel = adv.attached_builds.relationship(build)
                    time_attached = associated_build['time_attached']
                    if attached_rel:
                        if attached_rel.time_attached != time_attached:
                            adv.attached_builds.replace(
                                build, {'time_attached': time_attached})
                    else:
                        adv.attached_builds.connect(
                            build, {'time_attached': time_attached})

            assigned_to = User.get_or_create(
                {'username': advisory['assigned_to'].split('@')[0]})[0]
            adv.conditional_connect(adv.assigned_to, assigned_to)
            reporter = User.get_or_create(
                {'username': advisory['reporter'].split('@')[0]})[0]
            adv.conditional_connect(adv.reporter, reporter)

            for attached_bug in self.get_attached_bugs(advisory['id']):
                bug = BugzillaBug.get_or_create(attached_bug)[0]
                adv.attached_bugs.connect(bug)