def _update_existing_references(self, repo_id, import_type): pairs = self._get_new_commit_file_pairs(repo_id) intervals = [ i for i in multiprocessing_util.get_tasks_intervals( pairs, self._num_processes) if len(i) > 0 ] queue_intervals = multiprocessing.JoinableQueue() results = multiprocessing.Queue() # Start consumers multiprocessing_util.start_consumers(self._num_processes, intervals, results) for interval in intervals: issue_extractor = Code2DbCommitFile(self._db_name, self._git_repo_path, interval, import_type, self._config, self._log_path) queue_intervals.put(issue_extractor) # Add end-of-queue markers multiprocessing_util.add_poison_pills(self._num_processes, queue_intervals) # Wait for all of the tasks to finish queue_intervals.join()
def _update_channels(self, instant_messaging_id): #updates channels of a instant messaging channel_ids = self._dao.get_channel_ids(instant_messaging_id) if channel_ids: intervals = [ i for i in multiprocessing_util.get_tasks_intervals( channel_ids, len(self._tokens)) if len(i) > 0 ] queue_extractors = multiprocessing.JoinableQueue() results = multiprocessing.Queue() # Start consumers multiprocessing_util.start_consumers(len(self._tokens), queue_extractors, results) for i in range(len(intervals)): channel_extractor = SlackChannel2Db( self._db_name, instant_messaging_id, intervals[i], self._tokens[i], self._config, self._log_path) queue_extractors.put(channel_extractor) # Add end-of-queue markers multiprocessing_util.add_poison_pills(len(self._tokens), queue_extractors) # Wait for all of the tasks to finish queue_extractors.join()
def _get_channels(self, instant_messaging_id): #processes Slack channels channel_ids = self._get_channel_ids(instant_messaging_id) intervals = [ i for i in multiprocessing_util.get_tasks_intervals( channel_ids, len(self._tokens)) if len(i) > 0 ] queue_extractors = multiprocessing.JoinableQueue() results = multiprocessing.Queue() # Start consumers multiprocessing_util.start_consumers(len(self._tokens), queue_extractors, results) pos = 0 for interval in intervals: topic_extractor = SlackChannel2Db(self._db_name, instant_messaging_id, interval, self._tokens[pos], self._config, self._log_path) queue_extractors.put(topic_extractor) pos += 1 # Add end-of-queue markers multiprocessing_util.add_poison_pills(len(self._tokens), queue_extractors) # Wait for all of the tasks to finish queue_extractors.join()
def _get_topics(self, forum_id): # updates topics of a forum topic_ids = self._dao.get_topic_own_ids(forum_id) if topic_ids: intervals = [ i for i in multiprocessing_util.get_tasks_intervals( topic_ids, len(self._tokens)) if len(i) > 0 ] queue_extractors = multiprocessing.JoinableQueue() results = multiprocessing.Queue() # Start consumers multiprocessing_util.start_consumers(len(self._tokens), queue_extractors, results) for i in range(len(intervals)): topic_extractor = StackOverflowTopic2Db( self._db_name, forum_id, intervals[i], self._tokens[i], self._config, self._log_path) queue_extractors.put(topic_extractor) # Add end-of-queue markers multiprocessing_util.add_poison_pills(len(self._tokens), queue_extractors) # Wait for all of the tasks to finish queue_extractors.join()
def _get_topics(self, forum_id): #update topics of a forum topic_ids = self._dao.get_topic_ids(forum_id) if topic_ids: self._update_topics_info(forum_id) intervals = [ i for i in multiprocessing_util.get_tasks_intervals( topic_ids, self._num_processes) if len(i) > 0 ] queue_extractors = multiprocessing.JoinableQueue() results = multiprocessing.Queue() # Start consumers multiprocessing_util.start_consumers(self._num_processes, queue_extractors, results) for interval in intervals: topic_extractor = EclipseTopic2Db(self._db_name, forum_id, interval, self._config, self._log_path) queue_extractors.put(topic_extractor) # Add end-of-queue markers multiprocessing_util.add_poison_pills(self._num_processes, queue_extractors) # Wait for all of the tasks to finish queue_extractors.join()
def _insert_issue_data(self, repo_id, issue_tracker_id): #processes issue data imported = self._dao.get_already_imported_issue_ids( issue_tracker_id, repo_id) issues = list( set(self._querier.get_issue_ids(self._before_date)) - set(imported)) intervals = [ i for i in multiprocessing_util.get_tasks_intervals( issues, len(self._tokens)) if len(i) > 0 ] queue_intervals = multiprocessing.JoinableQueue() results = multiprocessing.Queue() # Start consumers multiprocessing_util.start_consumers(len(self._tokens), queue_intervals, results) pos = 0 for interval in intervals: issue_extractor = GitHubIssue2Db(self._db_name, repo_id, issue_tracker_id, self._url, interval, self._tokens[pos], self._config, self._log_path) queue_intervals.put(issue_extractor) pos += 1 # Add end-of-queue markers multiprocessing_util.add_poison_pills(len(self._tokens), queue_intervals) # Wait for all of the tasks to finish queue_intervals.join()
def _update_issues(self): #updates issues project_id = self._dao.select_project_id(self._project_name) repo_id = self._dao.select_repo_id(project_id, self._repo_name) issue_tracker_id = self._dao.select_issue_tracker_id(repo_id, self._issue_tracker_name) issue_tracker_url = self._url if issue_tracker_id: cursor = self._dao.get_cursor() query = "SELECT i.own_id FROM issue i " \ "JOIN issue_tracker it ON i.issue_tracker_id = it.id " \ "WHERE issue_tracker_id = %s AND repo_id = %s " \ "ORDER BY i.own_id ASC;" arguments = [issue_tracker_id, repo_id] self._dao.execute(cursor, query, arguments) issues = [] row = self._dao.fetchone(cursor) while row: issues.append(row[0]) row = self._dao.fetchone(cursor) self._dao.close_cursor(cursor) if issues: intervals = [i for i in multiprocessing_util.get_tasks_intervals(issues, self._num_processes) if len(i) > 0] self._update_issue_content(repo_id, issue_tracker_id, intervals, issue_tracker_url) self._update_issue_dependency(repo_id, issue_tracker_id, intervals, issue_tracker_url)
def _insert_issue_dependencies(self, repo_id, issue_tracker_id): # processes issue dependency data issues = self._dao.get_already_imported_issue_ids( issue_tracker_id, repo_id) intervals = [ i for i in multiprocessing_util.get_tasks_intervals( issues, self._num_processes) if len(i) > 0 ] queue_intervals = multiprocessing.JoinableQueue() results = multiprocessing.Queue() # Start consumers multiprocessing_util.start_consumers(self._num_processes, queue_intervals, results) for interval in intervals: issue_dependency_extractor = BugzillaIssueDependency2Db( self._db_name, repo_id, issue_tracker_id, self._url, self._product, interval, self._config, self._log_path) queue_intervals.put(issue_dependency_extractor) # Add end-of-queue markers multiprocessing_util.add_poison_pills(self._num_processes, queue_intervals) # Wait for all of the tasks to finish queue_intervals.join()
def _get_topics(self, forum_id): # processes Stackoverflow questions topic_imported = self._dao.get_topic_own_ids(forum_id) topic_ids = list(set(self._querier.get_topic_ids(self._search_query, self._before_date)) - set(topic_imported)) topic_ids.sort() intervals = [i for i in multiprocessing_util.get_tasks_intervals(topic_ids, len(self._tokens)) if len(i) > 0] queue_extractors = multiprocessing.JoinableQueue() results = multiprocessing.Queue() # Start consumers multiprocessing_util.start_consumers(len(self._tokens), queue_extractors, results) pos = 0 for interval in intervals: topic_extractor = StackOverflowTopic2Db(self._db_name, forum_id, interval, self._tokens[pos], self._config, self._log_path) queue_extractors.put(topic_extractor) pos += 1 # Add end-of-queue markers multiprocessing_util.add_poison_pills(len(self._tokens), queue_extractors) # Wait for all of the tasks to finish queue_extractors.join()
def _update_issues(self): # updates issues project_id = self._dao.select_project_id(self._project_name) repo_id = self._dao.select_repo_id(project_id, self._repo_name) issue_tracker_id = self._dao.select_issue_tracker_id(repo_id, self._issue_tracker_name) issue_tracker_url = self._url if issue_tracker_id: imported = self._dao.get_already_imported_issue_ids(issue_tracker_id, repo_id) if imported: intervals = [i for i in multiprocessing_util.get_tasks_intervals(imported, len(self._tokens)) if len(i) > 0] self._update_issue_content(repo_id, issue_tracker_id, intervals, issue_tracker_url) self._update_issue_dependency(repo_id, issue_tracker_id, intervals, issue_tracker_url)