Example #1
0
    def process_queries(self, queries):
        # If this is an old Trac version, update the timeline.
        if self.tm.old_trac:
            # Setup the TracTimeline instance
            try:
                self.timeline = TracTimeline.all_timelines.get(
                    base_url=self.tm.get_base_url())
            except TracTimeline.DoesNotExist:
                self.timeline = TracTimeline(base_url=self.tm.get_base_url())
            # Check when the timeline was last updated.
            timeline_age = datetime.datetime.utcnow(
            ) - self.timeline.last_polled
            # Set up timeline URL.
            timeline_url = urlparse.urljoin(
                self.timeline.base_url,
                "timeline?ticket=on&daysback=%d&format=rss" %
                (timeline_age.days + 1))
            # Add the URL to the waiting list
            self.add_url_to_waiting_list(url=timeline_url,
                                         callback=self.handle_timeline_rss)

        # Add all the queries to the waiting list
        for query in queries:
            query_url = query.get_query_url()
            self.add_url_to_waiting_list(url=query_url,
                                         callback=self.handle_query_csv)
            query.last_polled = datetime.datetime.utcnow()
            query.save()

        # URLs are now all prepped, so start pushing them onto the reactor.
        self.push_urls_onto_reactor()
Example #2
0
 def __init__(self, base_url, tracker_name):
     self.tracker_name = tracker_name
     try:
         self.timeline = TracTimeline.all_timelines.get(base_url = base_url)
     except TracTimeline.DoesNotExist:
         self.timeline = TracTimeline(base_url = base_url)
     # Unsure if this is required here, but can't hurt.
     self.timeline.save()
Example #3
0
    def process_queries(self, queries):
        # If this is an old Trac version, update the timeline.
        if self.tm.old_trac:
            # Setup the TracTimeline instance
            try:
                self.timeline = TracTimeline.all_timelines.get(base_url = self.tm.get_base_url())
            except TracTimeline.DoesNotExist:
                self.timeline = TracTimeline(base_url = self.tm.get_base_url())
            # Check when the timeline was last updated.
            timeline_age = datetime.datetime.utcnow() - self.timeline.last_polled
            # Set up timeline URL.
            timeline_url = urlparse.urljoin(self.timeline.base_url,
                                "timeline?ticket=on&daysback=%d&format=rss" %
                                (timeline_age.days + 1))
            # Add the URL to the waiting list
            self.add_url_to_waiting_list(
                url=timeline_url,
                callback=self.handle_timeline_rss)

        # Add all the queries to the waiting list
        for query in queries:
            query_url = query.get_query_url()
            self.add_url_to_waiting_list(
                    url=query_url,
                    callback=self.handle_query_csv)
            query.last_polled = datetime.datetime.utcnow()
            query.save()

        # URLs are now all prepped, so start pushing them onto the reactor.
        self.push_urls_onto_reactor()
Example #4
0
class TracBugImporter(BugImporter):
    def __init__(self, *args, **kwargs):
        # Create a list to store bug ids obtained from queries.
        self.bug_ids = []
        # Call the parent __init__.
        super(TracBugImporter, self).__init__(*args, **kwargs)

    def process_queries(self, queries):
        # If this is an old Trac version, update the timeline.
        if self.tm.old_trac:
            # Setup the TracTimeline instance
            try:
                self.timeline = TracTimeline.all_timelines.get(
                    base_url=self.tm.get_base_url())
            except TracTimeline.DoesNotExist:
                self.timeline = TracTimeline(base_url=self.tm.get_base_url())
            # Check when the timeline was last updated.
            timeline_age = datetime.datetime.utcnow(
            ) - self.timeline.last_polled
            # Set up timeline URL.
            timeline_url = urlparse.urljoin(
                self.timeline.base_url,
                "timeline?ticket=on&daysback=%d&format=rss" %
                (timeline_age.days + 1))
            # Add the URL to the waiting list
            self.add_url_to_waiting_list(url=timeline_url,
                                         callback=self.handle_timeline_rss)

        # Add all the queries to the waiting list
        for query in queries:
            query_url = query.get_query_url()
            self.add_url_to_waiting_list(url=query_url,
                                         callback=self.handle_query_csv)
            query.last_polled = datetime.datetime.utcnow()
            query.save()

        # URLs are now all prepped, so start pushing them onto the reactor.
        self.push_urls_onto_reactor()

    def handle_timeline_rss(self, timeline_rss):
        # There are two steps to updating the timeline.
        # First step is to use the actual timeline to update the date_reported and
        # last_touched fields for each bug.

        # Parse the returned timeline RSS feed.
        feed = feedparser.parse(timeline_rss)
        for entry in feed.entries:
            # Format the data.
            entry_url = entry.link.rsplit("#", 1)[0]
            entry_date = datetime.datetime(*entry.date_parsed[0:6])
            entry_status = entry.title.split("): ", 1)[0].rsplit(" ", 1)[1]

            try:
                tb_times = self.timeline.tracbugtimes_set.get(
                    canonical_bug_link=entry_url)
            except TracBugTimes.DoesNotExist:
                tb_times = TracBugTimes(canonical_bug_link=entry_url,
                                        timeline=self.timeline)

            # Set the date values as appropriate.
            if 'created' in entry_status:
                tb_times.date_reported = entry_date
            if tb_times.last_touched < entry_date:
                tb_times.last_touched = entry_date
                # Store entry status as well for use in second step.
                tb_times.latest_timeline_status = entry_status

            # Save the TracBugTimes object.
            tb_times.save()

        # Second step is to use the RSS feed for each individual bug to update the
        # last_touched field. This would be unneccessary if the timeline showed
        # update events as well as creation and closing ones, and in fact later
        # versions of Trac have this option - but then the later versions of Trac
        # also hyperlink to the timeline from the bug, making this all moot.
        # Also, we cannot just use the RSS feed for everything, as it is missing
        # the date_reported time, as well as a lot of information about the bug
        # itself (e.g. Priority).
        for tb_times in self.timeline.tracbugtimes_set.all():
            # Check that the bug has not beeen seen as 'closed' in the timeline.
            # This will reduce network load by not grabbing the RSS feed of bugs
            # whose last_touched info is definitely correct.
            if 'closed' not in tb_times.latest_timeline_status:
                self.add_url_to_waiting_list(url=tb_times.canonical_bug_link +
                                             '?format=rss',
                                             callback=self.handle_bug_rss,
                                             callback_args=tb_times)

        # URLs are now all prepped, so start pushing them onto the reactor.
        self.push_urls_onto_reactor()

    def handle_bug_rss(self, bug_rss, tb_times):
        feed = feedparser.parse(bug_rss)
        comment_dates = [
            datetime.datetime(*e.date_parsed[0:6]) for e in feed.entries
        ]
        # Check if there are comments to grab from.
        if comment_dates:
            tb_times.last_polled = max(comment_dates)
            tb_times.save()

    def handle_query_csv(self, query_csv):
        # Turn the string into a list so csv.DictReader can handle it.
        query_csv_list = query_csv.split('\n')
        dictreader = csv.DictReader(query_csv_list)
        self.bug_ids.extend([int(line['id']) for line in dictreader])

    def prepare_bug_urls(self):
        # Pull bug_ids our of the internal storage. This is done in case the
        # list is simultaneously being written to, in which case just copying
        # the entire thing followed by ddeleting the contents could lead to
        # lost IDs.
        bug_id_list = []
        while self.bug_ids:
            bug_id_list.append(self.bug_ids.pop())

        # Convert the obtained bug ids to URLs.
        bug_url_list = [
            urlparse.urljoin(self.tm.get_base_url(), "ticket/%d" % bug_id)
            for bug_id in bug_id_list
        ]

        # Get the sub-list of URLs that are fresh.
        fresh_bug_urls = mysite.search.models.Bug.all_bugs.filter(
            canonical_bug_link__in=bug_url_list,
            last_polled__lt=datetime.datetime.now() -
            datetime.timedelta(days=1)).values_list('canonical_bug_link',
                                                    flat=True)

        # Remove the fresh URLs to be let with stale or new URLs.
        for bug_url in fresh_bug_urls:
            bug_url_list.remove(bug_url)

        # Put the bug list in the form required for process_bugs.
        # The second entry of the tuple is None as Trac never supplies data via queries.
        bug_list = [(bug_url, None) for bug_url in bug_url_list]

        # And now go on to process the bug list
        self.process_bugs(bug_list)

    def process_bugs(self, bug_list):
        # If there are no bug URLs, finish now.
        if not bug_list:
            self.determine_if_finished()
            return

        for bug_url, _ in bug_list:
            # Create a TracBugParser instance to store the bug data
            tbp = TracBugParser(bug_url)

            self.add_url_to_waiting_list(url=tbp.bug_csv_url,
                                         callback=self.handle_bug_csv,
                                         c_args={'tbp': tbp},
                                         errback=self.errback_bug_data,
                                         e_args={'tbp': tbp})

        # URLs are now all prepped, so start pushing them onto the reactor.
        self.push_urls_onto_reactor()

    def handle_bug_csv(self, bug_csv, tbp):
        # Pass the TracBugParser the CSV data
        tbp.set_bug_csv_data(bug_csv)

        # Now fetch the bug HTML
        self.add_url_to_waiting_list(url=tbp.bug_html_url,
                                     callback=self.handle_bug_html,
                                     c_args={'tbp': tbp},
                                     errback=self.errback_bug_data,
                                     e_args={'tbp': tbp})

    def errback_bug_data(self, failure, tbp):
        # For some unknown reason, some trackers choose to delete some bugs entirely instead
        # of just marking them as closed. That is fine for bugs we haven't yet pulled, but
        # if the bug is already being tracked then we get a 404 error. This catcher looks
        # for a 404 and deletes the bug if it occurs.
        if failure.check(
                twisted.web.error.Error
        ) and failure.value.status == twisted.web.http.NOT_FOUND:
            try:
                bug = mysite.search.models.Bug.all_bugs.get(
                    canonical_bug_link=tbp.bug_url)
                bug.delete()
            except mysite.search.models.Bug.DoesNotExist:
                pass
            # To keep the callback chain happy, explicity return None.
            return None
        else:
            # Pass the Failure on.
            return failure

    def handle_bug_html(self, bug_html, tbp):
        # Pass the TracBugParser the HTML data
        tbp.set_bug_html_data(bug_html)

        # Get the parsed data dict from the TracBugParser
        data = tbp.get_parsed_data_dict(self.tm)
        if self.tm.old_trac:
            # It's an old version of Trac that doesn't have links from the
            # bugs to the timeline. So we need to fetch these times from
            # the database built earlier.
            (data['date_reported'],
             data['last_touched']) = self.timeline.get_times(tbp.bug_url)

        # Get or create a Bug object to put the parsed data in.
        try:
            bug = mysite.search.models.Bug.all_bugs.get(
                canonical_bug_link=tbp.bug_url)
        except mysite.search.models.Bug.DoesNotExist:
            bug = mysite.search.models.Bug(canonical_bug_link=tbp.bug_url)

        # Fill the Bug
        for key in data:
            value = data[key]
            setattr(bug, key, value)

        # Save the project onto it
        # Project name is generated from the bug_project_name_format property
        # of the TrackerModel.
        project_from_name, _ = mysite.search.models.Project.objects.get_or_create(
            name=self.generate_bug_project_name(tbp))
        # Manually save() the Project to ensure that if it was created then it has
        # a display_name.
        project_from_name.save()
        bug.project = project_from_name

        # Store the tracker that generated the Bug, update last_polled and save it!
        bug.tracker = self.tm
        bug.last_polled = datetime.datetime.utcnow()
        bug.save()

    def generate_bug_project_name(self, tbp):
        return self.tm.bug_project_name_format.format(
            tracker_name=self.tm.tracker_name, component=tbp.component)

    def determine_if_finished(self):
        # If we got here then there are no more URLs in the waiting list.
        # So if self.bug_ids is also empty then we are done.
        if self.bug_ids:
            self.prepare_bug_urls()
        else:
            self.finish_import()
Example #5
0
class TracBugTimeline(object):
    def __init__(self, base_url, tracker_name):
        self.tracker_name = tracker_name
        try:
            self.timeline = TracTimeline.all_timelines.get(base_url = base_url)
        except TracTimeline.DoesNotExist:
            self.timeline = TracTimeline(base_url = base_url)
        # Unsure if this is required here, but can't hurt.
        self.timeline.save()

    def generate_timeline_entries_from_rss(self, days_back):
        rss_url = urlparse.urljoin(self.timeline.base_url,
                            "timeline?ticket=on&daysback=%d&format=rss" % days_back)
        logging.info("[Trac] Fetching timeline RSS...")
        feed = feedparser.parse(rss_url)
        for entry in feed.entries:
            yield entry

    def update(self):
        logging.info("[Trac] Started refreshing timeline for tracker named %s." % self.tracker_name)

        # Check when the timeline was last updated.
        timeline_age = datetime.datetime.utcnow() - self.timeline.last_polled

        # First step is to use the actual timeline to update the date_reported and
        # last_touched fields for each bug.
        # Add one to days count here to account for possible timezone differences.
        for entry in self.generate_timeline_entries_from_rss(timeline_age.days + 1):
            # Format the data.
            entry_url = entry.link.rsplit("#", 1)[0]
            entry_date = datetime.datetime(*entry.date_parsed[0:6])
            entry_status = entry.title.split("): ", 1)[0].rsplit(" ", 1)[1]

            logging.info("[Trac] Updating %s entry on %s for %s" % (entry_status, entry_date, entry_url))
            # Get or create a TracBugTimes object.
            try:
                tb_times = self.timeline.tracbugtimes_set.get(canonical_bug_link = entry_url)
            except TracBugTimes.DoesNotExist:
                tb_times = TracBugTimes(canonical_bug_link = entry_url,
                                        timeline = self.timeline)

            # Set the date values as appropriate.
            if 'created' in entry_status:
                tb_times.date_reported = entry_date
            if tb_times.last_touched < entry_date:
                tb_times.last_touched = entry_date
                # Store entry status as well for use in second step.
                tb_times.latest_timeline_status = entry_status

            # Save the TracBugTimes object.
            tb_times.save()

        # Second step is to use the RSS feed for each individual bug to update the
        # last_touched field. This would be unneccessary if the timeline showed
        # update events as well as creation and closing ones, and in fact later
        # versions of Trac have this option - but then the later versions of Trac
        # also hyperlink to the timeline from the bug, making this all moot.
        # Also, we cannot just use the RSS feed for everything, as it is missing
        # the date_reported time, as well as a lot of information about the bug
        # itself (e.g. Priority).
        for tb_times in self.timeline.tracbugtimes_set.all():
            # Check that the bug has not beeen seen as 'closed' in the timeline.
            # This will reduce network load by not grabbing the RSS feed of bugs
            # whose last_touched info is definitely correct.
            if 'closed' not in tb_times.latest_timeline_status:
                logging.info("[Trac] Grabbing RSS feed for %s" % tb_times.canonical_bug_link)
                feed = feedparser.parse(tb_times.canonical_bug_link + '?format=rss')
                comment_dates =  [datetime.datetime(*e.date_parsed[0:6]) for e in feed.entries]
                # Check if there are comments to grab from.
                if comment_dates:
                    tb_times.last_polled = max(comment_dates)
                    tb_times.save()

        # Finally, update the timeline's last_polled.
        self.timeline.last_polled = datetime.datetime.utcnow()
        self.timeline.save()

    def get_times(self, bug_url):
        bug_times = self.timeline.tracbugtimes_set.get(canonical_bug_link = bug_url)
        return (bug_times.date_reported, bug_times.last_touched)
Example #6
0
class TracBugImporter(BugImporter):
    def __init__(self, *args, **kwargs):
        # Create a list to store bug ids obtained from queries.
        self.bug_ids = []
        # Call the parent __init__.
        super(TracBugImporter, self).__init__(*args, **kwargs)

    def process_queries(self, queries):
        # If this is an old Trac version, update the timeline.
        if self.tm.old_trac:
            # Setup the TracTimeline instance
            try:
                self.timeline = TracTimeline.all_timelines.get(base_url = self.tm.get_base_url())
            except TracTimeline.DoesNotExist:
                self.timeline = TracTimeline(base_url = self.tm.get_base_url())
            # Check when the timeline was last updated.
            timeline_age = datetime.datetime.utcnow() - self.timeline.last_polled
            # Set up timeline URL.
            timeline_url = urlparse.urljoin(self.timeline.base_url,
                                "timeline?ticket=on&daysback=%d&format=rss" %
                                (timeline_age.days + 1))
            # Add the URL to the waiting list
            self.add_url_to_waiting_list(
                url=timeline_url,
                callback=self.handle_timeline_rss)

        # Add all the queries to the waiting list
        for query in queries:
            query_url = query.get_query_url()
            self.add_url_to_waiting_list(
                    url=query_url,
                    callback=self.handle_query_csv)
            query.last_polled = datetime.datetime.utcnow()
            query.save()

        # URLs are now all prepped, so start pushing them onto the reactor.
        self.push_urls_onto_reactor()

    def handle_timeline_rss(self, timeline_rss):
        # There are two steps to updating the timeline.
        # First step is to use the actual timeline to update the date_reported and
        # last_touched fields for each bug.

        # Parse the returned timeline RSS feed.
        feed = feedparser.parse(timeline_rss)
        for entry in feed.entries:
            # Format the data.
            entry_url = entry.link.rsplit("#", 1)[0]
            entry_date = datetime.datetime(*entry.date_parsed[0:6])
            entry_status = entry.title.split("): ", 1)[0].rsplit(" ", 1)[1]

            try:
                tb_times = self.timeline.tracbugtimes_set.get(canonical_bug_link = entry_url)
            except TracBugTimes.DoesNotExist:
                tb_times = TracBugTimes(canonical_bug_link = entry_url,
                                        timeline = self.timeline)

            # Set the date values as appropriate.
            if 'created' in entry_status:
                tb_times.date_reported = entry_date
            if tb_times.last_touched < entry_date:
                tb_times.last_touched = entry_date
                # Store entry status as well for use in second step.
                tb_times.latest_timeline_status = entry_status

            # Save the TracBugTimes object.
            tb_times.save()
            
        # Second step is to use the RSS feed for each individual bug to update the
        # last_touched field. This would be unneccessary if the timeline showed
        # update events as well as creation and closing ones, and in fact later
        # versions of Trac have this option - but then the later versions of Trac
        # also hyperlink to the timeline from the bug, making this all moot.
        # Also, we cannot just use the RSS feed for everything, as it is missing
        # the date_reported time, as well as a lot of information about the bug
        # itself (e.g. Priority).
        for tb_times in self.timeline.tracbugtimes_set.all():
            # Check that the bug has not beeen seen as 'closed' in the timeline.
            # This will reduce network load by not grabbing the RSS feed of bugs
            # whose last_touched info is definitely correct.
            if 'closed' not in tb_times.latest_timeline_status:
                self.add_url_to_waiting_list(
                        url=tb_times.canonical_bug_link + '?format=rss',
                        callback=self.handle_bug_rss,
                        callback_args=tb_times)

        # URLs are now all prepped, so start pushing them onto the reactor.
        self.push_urls_onto_reactor()

    def handle_bug_rss(self, bug_rss, tb_times):
        feed = feedparser.parse(bug_rss)
        comment_dates =  [datetime.datetime(*e.date_parsed[0:6]) for e in feed.entries]
        # Check if there are comments to grab from.
        if comment_dates:
            tb_times.last_polled = max(comment_dates)
            tb_times.save()

    def handle_query_csv(self, query_csv):
        # Turn the string into a list so csv.DictReader can handle it.
        query_csv_list = query_csv.split('\n')
        dictreader = csv.DictReader(query_csv_list)
        self.bug_ids.extend([int(line['id']) for line in dictreader])

    def prepare_bug_urls(self):
        # Pull bug_ids our of the internal storage. This is done in case the
        # list is simultaneously being written to, in which case just copying
        # the entire thing followed by ddeleting the contents could lead to
        # lost IDs.
        bug_id_list = []
        while self.bug_ids:
            bug_id_list.append(self.bug_ids.pop())

        # Convert the obtained bug ids to URLs.
        bug_url_list = [urlparse.urljoin(self.tm.get_base_url(),
                                "ticket/%d" % bug_id) for bug_id in bug_id_list]

        # Get the sub-list of URLs that are fresh.
        fresh_bug_urls = mysite.search.models.Bug.all_bugs.filter(
                canonical_bug_link__in = bug_url_list,
                last_polled__lt = datetime.datetime.now() - datetime.timedelta(days = 1)
            ).values_list('canonical_bug_link', flat=True)

        # Remove the fresh URLs to be let with stale or new URLs.
        for bug_url in fresh_bug_urls:
            bug_url_list.remove(bug_url)

        # Put the bug list in the form required for process_bugs.
        # The second entry of the tuple is None as Trac never supplies data via queries.
        bug_list = [(bug_url, None) for bug_url in bug_url_list]

        # And now go on to process the bug list
        self.process_bugs(bug_list)

    def process_bugs(self, bug_list):
        # If there are no bug URLs, finish now.
        if not bug_list:
            self.determine_if_finished()
            return

        for bug_url, _ in bug_list:
            # Create a TracBugParser instance to store the bug data
            tbp = TracBugParser(bug_url)

            self.add_url_to_waiting_list(
                    url=tbp.bug_csv_url,
                    callback=self.handle_bug_csv,
                    c_args={'tbp': tbp},
                    errback=self.errback_bug_data,
                    e_args={'tbp': tbp})

        # URLs are now all prepped, so start pushing them onto the reactor.
        self.push_urls_onto_reactor()

    def handle_bug_csv(self, bug_csv, tbp):
        # Pass the TracBugParser the CSV data
        tbp.set_bug_csv_data(bug_csv)

        # Now fetch the bug HTML
        self.add_url_to_waiting_list(
                url=tbp.bug_html_url,
                callback=self.handle_bug_html,
                c_args={'tbp': tbp},
                errback=self.errback_bug_data,
                e_args={'tbp': tbp})

    def errback_bug_data(self, failure, tbp):
        # For some unknown reason, some trackers choose to delete some bugs entirely instead
        # of just marking them as closed. That is fine for bugs we haven't yet pulled, but
        # if the bug is already being tracked then we get a 404 error. This catcher looks
        # for a 404 and deletes the bug if it occurs.
        if failure.check(twisted.web.error.Error) and failure.value.status == twisted.web.http.NOT_FOUND:
            try:
                bug = mysite.search.models.Bug.all_bugs.get(
                    canonical_bug_link=tbp.bug_url)
                bug.delete()
            except mysite.search.models.Bug.DoesNotExist:
                pass
            # To keep the callback chain happy, explicity return None.
            return None
        else:
            # Pass the Failure on.
            return failure

    def handle_bug_html(self, bug_html, tbp):
        # Pass the TracBugParser the HTML data
        tbp.set_bug_html_data(bug_html)

        # Get the parsed data dict from the TracBugParser
        data = tbp.get_parsed_data_dict(self.tm)
        if self.tm.old_trac:
            # It's an old version of Trac that doesn't have links from the
            # bugs to the timeline. So we need to fetch these times from
            # the database built earlier.
            (data['date_reported'], data['last_touched']) = self.timeline.get_times(tbp.bug_url)

        # Get or create a Bug object to put the parsed data in.
        try:
            bug = mysite.search.models.Bug.all_bugs.get(
                canonical_bug_link=tbp.bug_url)
        except mysite.search.models.Bug.DoesNotExist:
            bug = mysite.search.models.Bug(canonical_bug_link = tbp.bug_url)

        # Fill the Bug
        for key in data:
            value = data[key]
            setattr(bug, key, value)

        # Save the project onto it
        # Project name is generated from the bug_project_name_format property
        # of the TrackerModel.
        project_from_name, _ = mysite.search.models.Project.objects.get_or_create(name=self.generate_bug_project_name(tbp))
        # Manually save() the Project to ensure that if it was created then it has
        # a display_name.
        project_from_name.save()
        bug.project = project_from_name

        # Store the tracker that generated the Bug, update last_polled and save it!
        bug.tracker = self.tm
        bug.last_polled = datetime.datetime.utcnow()
        bug.save()

    def generate_bug_project_name(self, tbp):
        return self.tm.bug_project_name_format.format(tracker_name=self.tm.tracker_name,
                                                      component=tbp.component)

    def determine_if_finished(self):
        # If we got here then there are no more URLs in the waiting list.
        # So if self.bug_ids is also empty then we are done.
        if self.bug_ids:
            self.prepare_bug_urls()
        else:
            self.finish_import()