Example #1
0
    def _update_existing_references(self, repo_id, import_type):
        pairs = self._get_new_commit_file_pairs(repo_id)
        intervals = [
            i for i in multiprocessing_util.get_tasks_intervals(
                pairs, self._num_processes) if len(i) > 0
        ]

        queue_intervals = multiprocessing.JoinableQueue()
        results = multiprocessing.Queue()

        # Start consumers
        multiprocessing_util.start_consumers(self._num_processes, intervals,
                                             results)

        for interval in intervals:
            issue_extractor = Code2DbCommitFile(self._db_name,
                                                self._git_repo_path, interval,
                                                import_type, self._config,
                                                self._log_path)
            queue_intervals.put(issue_extractor)

        # Add end-of-queue markers
        multiprocessing_util.add_poison_pills(self._num_processes,
                                              queue_intervals)

        # Wait for all of the tasks to finish
        queue_intervals.join()
Example #2
0
    def _update_channels(self, instant_messaging_id):
        #updates channels of a instant messaging
        channel_ids = self._dao.get_channel_ids(instant_messaging_id)

        if channel_ids:
            intervals = [
                i for i in multiprocessing_util.get_tasks_intervals(
                    channel_ids, len(self._tokens)) if len(i) > 0
            ]

            queue_extractors = multiprocessing.JoinableQueue()
            results = multiprocessing.Queue()

            # Start consumers
            multiprocessing_util.start_consumers(len(self._tokens),
                                                 queue_extractors, results)

            for i in range(len(intervals)):
                channel_extractor = SlackChannel2Db(
                    self._db_name, instant_messaging_id, intervals[i],
                    self._tokens[i], self._config, self._log_path)
                queue_extractors.put(channel_extractor)

            # Add end-of-queue markers
            multiprocessing_util.add_poison_pills(len(self._tokens),
                                                  queue_extractors)

            # Wait for all of the tasks to finish
            queue_extractors.join()
Example #3
0
    def _get_channels(self, instant_messaging_id):
        #processes Slack channels
        channel_ids = self._get_channel_ids(instant_messaging_id)

        intervals = [
            i for i in multiprocessing_util.get_tasks_intervals(
                channel_ids, len(self._tokens)) if len(i) > 0
        ]

        queue_extractors = multiprocessing.JoinableQueue()
        results = multiprocessing.Queue()

        # Start consumers
        multiprocessing_util.start_consumers(len(self._tokens),
                                             queue_extractors, results)

        pos = 0
        for interval in intervals:
            topic_extractor = SlackChannel2Db(self._db_name,
                                              instant_messaging_id, interval,
                                              self._tokens[pos], self._config,
                                              self._log_path)
            queue_extractors.put(topic_extractor)
            pos += 1

        # Add end-of-queue markers
        multiprocessing_util.add_poison_pills(len(self._tokens),
                                              queue_extractors)

        # Wait for all of the tasks to finish
        queue_extractors.join()
    def _get_topics(self, forum_id):
        # updates topics of a forum
        topic_ids = self._dao.get_topic_own_ids(forum_id)

        if topic_ids:
            intervals = [
                i for i in multiprocessing_util.get_tasks_intervals(
                    topic_ids, len(self._tokens)) if len(i) > 0
            ]

            queue_extractors = multiprocessing.JoinableQueue()
            results = multiprocessing.Queue()

            # Start consumers
            multiprocessing_util.start_consumers(len(self._tokens),
                                                 queue_extractors, results)

            for i in range(len(intervals)):
                topic_extractor = StackOverflowTopic2Db(
                    self._db_name, forum_id, intervals[i], self._tokens[i],
                    self._config, self._log_path)
                queue_extractors.put(topic_extractor)

            # Add end-of-queue markers
            multiprocessing_util.add_poison_pills(len(self._tokens),
                                                  queue_extractors)

            # Wait for all of the tasks to finish
            queue_extractors.join()
Example #5
0
    def _get_topics(self, forum_id):
        #update topics of a forum
        topic_ids = self._dao.get_topic_ids(forum_id)

        if topic_ids:
            self._update_topics_info(forum_id)

            intervals = [
                i for i in multiprocessing_util.get_tasks_intervals(
                    topic_ids, self._num_processes) if len(i) > 0
            ]

            queue_extractors = multiprocessing.JoinableQueue()
            results = multiprocessing.Queue()

            # Start consumers
            multiprocessing_util.start_consumers(self._num_processes,
                                                 queue_extractors, results)

            for interval in intervals:
                topic_extractor = EclipseTopic2Db(self._db_name, forum_id,
                                                  interval, self._config,
                                                  self._log_path)
                queue_extractors.put(topic_extractor)

            # Add end-of-queue markers
            multiprocessing_util.add_poison_pills(self._num_processes,
                                                  queue_extractors)

            # Wait for all of the tasks to finish
            queue_extractors.join()
    def _insert_issue_data(self, repo_id, issue_tracker_id):
        #processes issue data
        imported = self._dao.get_already_imported_issue_ids(
            issue_tracker_id, repo_id)
        issues = list(
            set(self._querier.get_issue_ids(self._before_date)) -
            set(imported))

        intervals = [
            i for i in multiprocessing_util.get_tasks_intervals(
                issues, len(self._tokens)) if len(i) > 0
        ]

        queue_intervals = multiprocessing.JoinableQueue()
        results = multiprocessing.Queue()

        # Start consumers
        multiprocessing_util.start_consumers(len(self._tokens),
                                             queue_intervals, results)

        pos = 0
        for interval in intervals:
            issue_extractor = GitHubIssue2Db(self._db_name, repo_id,
                                             issue_tracker_id, self._url,
                                             interval, self._tokens[pos],
                                             self._config, self._log_path)
            queue_intervals.put(issue_extractor)
            pos += 1

        # Add end-of-queue markers
        multiprocessing_util.add_poison_pills(len(self._tokens),
                                              queue_intervals)

        # Wait for all of the tasks to finish
        queue_intervals.join()
Example #7
0
    def _update_issues(self):
        #updates issues
        project_id = self._dao.select_project_id(self._project_name)
        repo_id = self._dao.select_repo_id(project_id, self._repo_name)
        issue_tracker_id = self._dao.select_issue_tracker_id(repo_id, self._issue_tracker_name)
        issue_tracker_url = self._url

        if issue_tracker_id:
            cursor = self._dao.get_cursor()
            query = "SELECT i.own_id FROM issue i " \
                    "JOIN issue_tracker it ON i.issue_tracker_id = it.id " \
                    "WHERE issue_tracker_id = %s AND repo_id = %s " \
                    "ORDER BY i.own_id ASC;"
            arguments = [issue_tracker_id, repo_id]
            self._dao.execute(cursor, query, arguments)

            issues = []
            row = self._dao.fetchone(cursor)

            while row:
                issues.append(row[0])
                row = self._dao.fetchone(cursor)
            self._dao.close_cursor(cursor)

            if issues:
                intervals = [i for i in multiprocessing_util.get_tasks_intervals(issues, self._num_processes) if len(i) > 0]

                self._update_issue_content(repo_id, issue_tracker_id, intervals, issue_tracker_url)
                self._update_issue_dependency(repo_id, issue_tracker_id, intervals, issue_tracker_url)
Example #8
0
    def _insert_issue_dependencies(self, repo_id, issue_tracker_id):
        # processes issue dependency data
        issues = self._dao.get_already_imported_issue_ids(
            issue_tracker_id, repo_id)
        intervals = [
            i for i in multiprocessing_util.get_tasks_intervals(
                issues, self._num_processes) if len(i) > 0
        ]

        queue_intervals = multiprocessing.JoinableQueue()
        results = multiprocessing.Queue()

        # Start consumers
        multiprocessing_util.start_consumers(self._num_processes,
                                             queue_intervals, results)

        for interval in intervals:
            issue_dependency_extractor = BugzillaIssueDependency2Db(
                self._db_name, repo_id, issue_tracker_id, self._url,
                self._product, interval, self._config, self._log_path)
            queue_intervals.put(issue_dependency_extractor)

        # Add end-of-queue markers
        multiprocessing_util.add_poison_pills(self._num_processes,
                                              queue_intervals)

        # Wait for all of the tasks to finish
        queue_intervals.join()
Example #9
0
    def _get_topics(self, forum_id):
        # processes Stackoverflow questions
        topic_imported = self._dao.get_topic_own_ids(forum_id)
        topic_ids = list(set(self._querier.get_topic_ids(self._search_query, self._before_date)) - set(topic_imported))
        topic_ids.sort()

        intervals = [i for i in multiprocessing_util.get_tasks_intervals(topic_ids, len(self._tokens)) if len(i) > 0]

        queue_extractors = multiprocessing.JoinableQueue()
        results = multiprocessing.Queue()

        # Start consumers
        multiprocessing_util.start_consumers(len(self._tokens), queue_extractors, results)

        pos = 0
        for interval in intervals:
            topic_extractor = StackOverflowTopic2Db(self._db_name, forum_id, interval, self._tokens[pos],
                                                    self._config, self._log_path)
            queue_extractors.put(topic_extractor)
            pos += 1

        # Add end-of-queue markers
        multiprocessing_util.add_poison_pills(len(self._tokens), queue_extractors)

        # Wait for all of the tasks to finish
        queue_extractors.join()
Example #10
0
    def _update_issues(self):
        # updates issues
        project_id = self._dao.select_project_id(self._project_name)
        repo_id = self._dao.select_repo_id(project_id, self._repo_name)
        issue_tracker_id = self._dao.select_issue_tracker_id(repo_id, self._issue_tracker_name)
        issue_tracker_url = self._url

        if issue_tracker_id:
            imported = self._dao.get_already_imported_issue_ids(issue_tracker_id, repo_id)

            if imported:
                intervals = [i for i in multiprocessing_util.get_tasks_intervals(imported, len(self._tokens))
                             if len(i) > 0]

                self._update_issue_content(repo_id, issue_tracker_id, intervals, issue_tracker_url)
                self._update_issue_dependency(repo_id, issue_tracker_id, intervals, issue_tracker_url)