def handle(self, *args, **options):

        github = github_login(token=settings.GITHUB_TOKEN)

        projects_count = Project.objects.count()
        for index, package in enumerate(Project.objects.iterator()):
            Job.update_progress(total_parts=projects_count,
                                total_parts_complete=index)
            logging.info("{} ...".format(package.name))
            print("{} ...".format(package.name))

            # Simple attempt to deal with Github rate limiting
            while True:
                if github.ratelimit_remaining < 50:
                    sleep(120)
                break

            try:
                try:
                    package.fetch_metadata(fetch_pypi=False)
                    package.fetch_commits()
                except Exception as e:
                    raise PackageUpdaterException(e, package.name)
            except PackageUpdaterException:
                pass  # We've already caught the error so let's move on now

            sleep(5)
    def handle(self, *args, **kwargs):
        events_counter = 0
        rulebooks_counter = 0

        rulebooks = TimelineEventInserterRulebook.objects.all()
        rulebooks_count = rulebooks.count()
        for rulebook in rulebooks.order_by('?'):
            print("Project: {}, Rulebook: {}".format(rulebook.project,
                                                     rulebook.name))
            Job.update_progress(total_parts=rulebooks_count,
                                total_parts_complete=rulebooks_counter)

            try:
                for event in rulebook.fetch_new_events():
                    events_counter += 1
                    print("\t[{}][{}] {}".format(str(event.project),
                                                 str(event.date), event.name))
                    print("\t{}\n".format(event.url))

                rulebooks_counter += 1
            except Exception as e:
                # TODO: report sentry
                print("Exception during processing rulebook for project: {}".
                      format(rulebook.project))
                continue

        print(
            "\nStats: {} new TimelineEvents were created thanks to rules in {} rulebooks"
            .format(events_counter, rulebooks_counter))
 def handle(self, *args, **options):
     dryrun = options['dryrun']
     q = Post.objects.all_articleless()
     
     # Keep retrying until we get a legitimate error code
     # explaining the failure.
     q = q.filter(article_content_error_code__isnull=True)
     
     #q = q.filter(article_content_success__isnull=True)
     
     #TODO:retry article_content_success=False but with error_code__isnull=False?
     year = options['year']
     month = options['month']
     if year:
         q = q.filter(date_published__year=year)
     if month:
         q = q.filter(date_published__month=month)
     #q = q.only('id', )
     q = q.order_by('-date_published')
     total = q.count()
     i = 0
     success_count = 0 # successfully retrieved non-empty content
     error_count = 0 # any type of exception was thrown
     meh_count = 0 # no errors were thrown, even if we didn't get content
     print '%i posts without an article.' % (total,)
     if dryrun:
         return
     for post in q.iterator():
         i += 1
         print '\rProcessing post %i (%i of %i, %i success, %i errors, %i mehs)...' \
             % (post.id, i, total, success_count, error_count, meh_count),
         sys.stdout.flush()
         Job.update_progress(total_parts=total, total_parts_complete=i)
         try:
             post.retrieve_article_content(force=options['force'])
             success_count += bool(len((post.article_content or '').strip()))
             meh_count += not bool(len((post.article_content or '').strip()))
         except urllib2.HTTPError, e:
             error_count += 1
             print>>sys.stderr
             print>>sys.stderr, 'Error: Unable to retrieve %s: %s' % (post.link, e)
             post.article_content_error_code = e.code
             post.article_content_error_reason = e.reason
             post.article_content_success = False
             post.save()
         except Exception, e:
             post.article_content_success = False
             ferr = StringIO()
             traceback.print_exc(file=ferr)
             post.article_content = None
             post.article_content_error = ferr.getvalue()
             post.save()
             error_count += 1
             print>>sys.stderr
             print>>sys.stderr, 'Error: Unable to retrieve %s: %s' % (post.link, e)
Beispiel #4
0
 def print_progress(self, clear=True, newline=True):
     if self.last_progress_refresh \
     and (datetime.now() - self.last_progress_refresh).seconds < 0.5:
         return
     bar_length = 10
     if clear:
         sys.stdout.write('\033[2J\033[H') #clear screen
         sys.stdout.write('Importing attributes\n')
     for stripe, msg_parts in sorted(self.progress.items()):
         (current, total, sub_current, sub_total, eta, message) = msg_parts
         sub_status = ''
         if total:
             if not eta:
                 start_time = self.start_times[stripe]
                 current_seconds = time.time() - start_time
                 total_seconds = float(total)/current*current_seconds
                 remaining_seconds = int(total_seconds - current_seconds)
                 eta = timezone.now() + timedelta(seconds=remaining_seconds)
                 
             self.stripe_counts[stripe] = (current, total)
             percent = current/float(total)
             bar = ('=' * int(percent * bar_length)).ljust(bar_length)
             percent = int(percent * 100)
         else:
             eta = eta or '?'
             percent = 0
             bar = ('=' * int(percent * bar_length)).ljust(bar_length)
             percent = '?'
             total = '?'
         if sub_current and sub_total:
             sub_status = '(subtask %s of %s) ' % (sub_current, sub_total)
         sys.stdout.write(
             (('' if newline else '\r') + \
             "%s [%s] %s of %s %s%s%% eta=%s: %s"+('\n' if newline else '')) \
                 % (stripe, bar, current, total, sub_status, percent, eta, message))
     sys.stdout.flush()
     self.last_progress_refresh = datetime.now()
     
     # Update job.
     overall_current_count = 0
     overall_total_count = 0
     for stripe, (current, total) in six.iteritems(self.stripe_counts):
         overall_current_count += current
         overall_total_count += total
     #print('overall_current_count:',overall_current_count
     #print('overall_total_count:',overall_total_count
     if overall_total_count and Job:
         Job.update_progress(
             total_parts_complete=overall_current_count,
             total_parts=overall_total_count,
         )
Beispiel #5
0
    def parse(self) -> None:
        groups = Group.objects.filter(study_form=Group.FULL_TIME)
        groups_count = groups.count()

        if groups_count == 0:
            raise GroupListIsEmpty()

        weeks_per_group = config.WEEKS_IN_SEMESTER
        maximum = groups_count * weeks_per_group

        for group_num, group in enumerate(groups):
            if self.type == ScheduleType.TEACH:
                for week in self.__parse_teach(group):
                    Job.update_progress(
                        total_parts_complete=group_num * weeks_per_group +
                        week,
                        total_parts=maximum,
                    )
            elif self.type == ScheduleType.SESSION:
                self.__parse_session(group)
                Job.update_progress(total_parts_complete=group_num,
                                    total_parts=groups_count)
Beispiel #6
0
def job_counter(total: int):
    index = 0
    while True:
        index += 1
        Job.update_progress(total_parts_complete=index, total_parts=total)
        yield
 def handle(self, *args, **options):
     seconds = int(options['seconds'])
     for i in range(seconds):
         Job.update_progress(total_parts=seconds, total_parts_complete=i)
         print('%i of %i' % (i, seconds))
         time.sleep(1)
 def handle(self, *args, **options):
     seconds = int(options['seconds'])
     for i in range(seconds):
         Job.update_progress(total_parts=seconds, total_parts_complete=i)
         print('%i of %i' % (i, seconds))
         time.sleep(1)
    def handle(self, *args, **options):
        dryrun = options['dryrun']
        q = Post.objects.all_articleless()

        # Keep retrying until we get a legitimate error code
        # explaining the failure.
        q = q.filter(article_content_error_code__isnull=True)

        #q = q.filter(article_content_success__isnull=True)

        #TODO:retry article_content_success=False but with error_code__isnull=False?
        year = options['year']
        month = options['month']
        if year:
            q = q.filter(date_published__year=year)
        if month:
            q = q.filter(date_published__month=month)
        #q = q.only('id', )
        q = q.order_by('-date_published')
        total = q.count()
        i = 0
        success_count = 0  # successfully retrieved non-empty content
        error_count = 0  # any type of exception was thrown
        meh_count = 0  # no errors were thrown, even if we didn't get content
        print('%i posts without an article.' % (total, ))
        if dryrun:
            return
        for post in q.iterator():
            i += 1
            sys.stdout.write('\rProcessing post %i (%i of %i, %i success, %i errors, %i mehs)...' \
                % (post.id, i, total, success_count, error_count, meh_count))
            sys.stdout.flush()
            if Job:
                Job.update_progress(total_parts=total, total_parts_complete=i)
            try:
                post.retrieve_article_content(force=options['force'])
                success_count += bool(len((post.article_content
                                           or '').strip()))
                meh_count += not bool(len(
                    (post.article_content or '').strip()))
            except HTTPError as e:
                error_count += 1
                print(file=sys.stderr)
                print('Error: Unable to retrieve %s: %s' % (post.link, e),
                      file=sys.stderr)
                post.article_content_error_code = e.code
                post.article_content_error_reason = e.reason
                post.article_content_success = False
                post.save()
            except Exception as e:
                post.article_content_success = False
                ferr = StringIO()
                traceback.print_exc(file=ferr)
                post.article_content = None
                post.article_content_error = ferr.getvalue()
                post.save()
                error_count += 1
                print(sys.stderr)
                print('Error: Unable to retrieve %s: %s' % (post.link, e),
                      file=sys.stderr)
        print
        print('-' * 80)
        print('%i successes' % success_count)
        print('%i errors' % error_count)