Ejemplo n.º 1
0
    def run(self, since=None, until=None):
        """
        Run the dist-git scraper.

        :param str since: a datetime to start scraping data from
        :param str until: a datetime to scrape data until
        """
        log.info('Starting initial load of dist-git commits')
        if since is None:
            start_date = self.default_since
        else:
            start_date = timestamp_to_date(since)

        if until is None:
            end_date = self.default_until
        else:
            end_date = timestamp_to_date(until)
        results = self.get_distgit_data(start_date, end_date)
        total_results = len(results)
        log.info('Successfully fetched {0} results from Teiid'.format(
            total_results))
        # Overwrite results with the formatted results so we don't have to store both in RAM
        results = list(self._get_result_chunks(results))
        # Upload the results to Neo4j using multi-processing to process chunks of results. We don't
        # use pool so that way the process doesn't get reused and the RAM is returned to the OS.
        # This will aid in a work-around for a memory leak from one of the libraries used that
        # couldn't be tracked down.
        procs = []
        concurrent_procs = 2
        for i, result in enumerate(results):
            # Only check if we've reached the process limit after it's technically possible
            if i >= concurrent_procs:
                active_procs = [_proc for _proc in procs if _proc.is_alive()]
                if len(active_procs) >= concurrent_procs:
                    log.debug(
                        'There are already {0} processes running. Will wait until one of '
                        'them completes.'.format(len(active_procs)))
                    active_procs[0].join()
            proc = Process(target=self._update_neo4j,
                           args=(neomodel_config.DATABASE_URL, total_results,
                                 result))
            proc.start()
            procs.append(proc)

        for proc in procs:
            # Wait for all the processes to finish
            proc.join()
        log.info('Initial load of dist-git commits complete!')
Ejemplo n.º 2
0
    def run(self, since=None, until=None):
        """
        Run the Bugzilla scraper.

        :param str since: a datetime to start scraping data from
        :param str until: a datetime to scrape data until
        """
        log.info('Starting initial load of Bugzilla bugs')
        if since is None:
            start_date = self.default_since
        else:
            start_date = timestamp_to_date(since)
        if until is None:
            end_date = self.default_until
        else:
            end_date = timestamp_to_date(until)

        bugs = self.get_bugzilla_bugs(start_date, end_date)
        log.info('Successfully fetched {0} bugs from teiid'.format(len(bugs)))
        self.update_neo4j(bugs)
        log.info('Initial load of Bugzilla bugs complete!')
Ejemplo n.º 3
0
    def run(self, since=None, until=None):
        """
        Run the Errata Tool scraper.

        :param str since: a datetime to start scraping data from
        :param str until: a datetime to scrape data until
        """
        log.info('Starting initial load of Errata advisories')
        if since is None:
            start_date = self.default_since
        else:
            start_date = timestamp_to_date(since)

        if until is None:
            end_date = self.default_until
        else:
            end_date = timestamp_to_date(until)
        advisories = self.get_advisories(start_date, end_date)
        log.info('Successfully fetched {0} advisories from Teiid'.format(
            len(advisories)))
        self.update_neo4j(advisories)
        log.info('Initial load of Errata advisories complete!')
Ejemplo n.º 4
0
    def run(self, since=None, until=None):
        """
        Run the dist-git scraper.

        :param str since: a datetime to start scraping data from
        :param str until: a datetime to scrape data until
        """
        log.info('Starting initial load of dist-git commits and pushes')
        if since is None:
            start_date = self.default_since
        else:
            start_date = timestamp_to_date(since)

        if until is None:
            end_date = self.default_until
        else:
            end_date = timestamp_to_date(until)
        results = self.get_distgit_data(start_date, end_date)
        log.info('Successfully fetched {0} results from Teiid'.format(
            len(results)))
        self.update_neo4j(results)
        log.info('Initial load of dist-git commits and pushes complete!')
Ejemplo n.º 5
0
    def run(self, since=None, until=None):
        """
        Run the Koji scraper.

        :param str since: a datetime to start scraping data from
        :param str until: a datetime to scrape data until
        """
        log.info('Starting initial load for Koji')
        # Initialize a start date from which all builds must be fetched
        # If no input is given by the user, fetch builds from the past two years
        if since is None:
            start_date = self.default_since
        else:
            start_date = utils.timestamp_to_date(since)

        if until is None:
            end_date = self.default_until
        else:
            end_date = utils.timestamp_to_date(until)
        builds = self.get_koji_builds(start_date, end_date)
        log.info('Successfully fetched {0} builds from teiid'.format(len(builds)))
        self.update_neo4j(builds)
        log.info('Initial load of Koji builds complete!')
Ejemplo n.º 6
0
def test_timestamp_to_date(input_date, expected_date):
    """Test that a timestamp can be converted to a date object."""
    assert timestamp_to_date(input_date) == expected_date