def handle(self, *args, **options): github = github_login(token=settings.GITHUB_TOKEN) projects_count = Project.objects.count() for index, package in enumerate(Project.objects.iterator()): Job.update_progress(total_parts=projects_count, total_parts_complete=index) logging.info("{} ...".format(package.name)) print("{} ...".format(package.name)) # Simple attempt to deal with Github rate limiting while True: if github.ratelimit_remaining < 50: sleep(120) break try: try: package.fetch_metadata(fetch_pypi=False) package.fetch_commits() except Exception as e: raise PackageUpdaterException(e, package.name) except PackageUpdaterException: pass # We've already caught the error so let's move on now sleep(5)
def handle(self, *args, **kwargs): events_counter = 0 rulebooks_counter = 0 rulebooks = TimelineEventInserterRulebook.objects.all() rulebooks_count = rulebooks.count() for rulebook in rulebooks.order_by('?'): print("Project: {}, Rulebook: {}".format(rulebook.project, rulebook.name)) Job.update_progress(total_parts=rulebooks_count, total_parts_complete=rulebooks_counter) try: for event in rulebook.fetch_new_events(): events_counter += 1 print("\t[{}][{}] {}".format(str(event.project), str(event.date), event.name)) print("\t{}\n".format(event.url)) rulebooks_counter += 1 except Exception as e: # TODO: report sentry print("Exception during processing rulebook for project: {}". format(rulebook.project)) continue print( "\nStats: {} new TimelineEvents were created thanks to rules in {} rulebooks" .format(events_counter, rulebooks_counter))
def handle(self, *args, **options): dryrun = options['dryrun'] q = Post.objects.all_articleless() # Keep retrying until we get a legitimate error code # explaining the failure. q = q.filter(article_content_error_code__isnull=True) #q = q.filter(article_content_success__isnull=True) #TODO:retry article_content_success=False but with error_code__isnull=False? year = options['year'] month = options['month'] if year: q = q.filter(date_published__year=year) if month: q = q.filter(date_published__month=month) #q = q.only('id', ) q = q.order_by('-date_published') total = q.count() i = 0 success_count = 0 # successfully retrieved non-empty content error_count = 0 # any type of exception was thrown meh_count = 0 # no errors were thrown, even if we didn't get content print '%i posts without an article.' % (total,) if dryrun: return for post in q.iterator(): i += 1 print '\rProcessing post %i (%i of %i, %i success, %i errors, %i mehs)...' \ % (post.id, i, total, success_count, error_count, meh_count), sys.stdout.flush() Job.update_progress(total_parts=total, total_parts_complete=i) try: post.retrieve_article_content(force=options['force']) success_count += bool(len((post.article_content or '').strip())) meh_count += not bool(len((post.article_content or '').strip())) except urllib2.HTTPError, e: error_count += 1 print>>sys.stderr print>>sys.stderr, 'Error: Unable to retrieve %s: %s' % (post.link, e) post.article_content_error_code = e.code post.article_content_error_reason = e.reason post.article_content_success = False post.save() except Exception, e: post.article_content_success = False ferr = StringIO() traceback.print_exc(file=ferr) post.article_content = None post.article_content_error = ferr.getvalue() post.save() error_count += 1 print>>sys.stderr print>>sys.stderr, 'Error: Unable to retrieve %s: %s' % (post.link, e)
def print_progress(self, clear=True, newline=True): if self.last_progress_refresh \ and (datetime.now() - self.last_progress_refresh).seconds < 0.5: return bar_length = 10 if clear: sys.stdout.write('\033[2J\033[H') #clear screen sys.stdout.write('Importing attributes\n') for stripe, msg_parts in sorted(self.progress.items()): (current, total, sub_current, sub_total, eta, message) = msg_parts sub_status = '' if total: if not eta: start_time = self.start_times[stripe] current_seconds = time.time() - start_time total_seconds = float(total)/current*current_seconds remaining_seconds = int(total_seconds - current_seconds) eta = timezone.now() + timedelta(seconds=remaining_seconds) self.stripe_counts[stripe] = (current, total) percent = current/float(total) bar = ('=' * int(percent * bar_length)).ljust(bar_length) percent = int(percent * 100) else: eta = eta or '?' percent = 0 bar = ('=' * int(percent * bar_length)).ljust(bar_length) percent = '?' total = '?' if sub_current and sub_total: sub_status = '(subtask %s of %s) ' % (sub_current, sub_total) sys.stdout.write( (('' if newline else '\r') + \ "%s [%s] %s of %s %s%s%% eta=%s: %s"+('\n' if newline else '')) \ % (stripe, bar, current, total, sub_status, percent, eta, message)) sys.stdout.flush() self.last_progress_refresh = datetime.now() # Update job. overall_current_count = 0 overall_total_count = 0 for stripe, (current, total) in six.iteritems(self.stripe_counts): overall_current_count += current overall_total_count += total #print('overall_current_count:',overall_current_count #print('overall_total_count:',overall_total_count if overall_total_count and Job: Job.update_progress( total_parts_complete=overall_current_count, total_parts=overall_total_count, )
def parse(self) -> None: groups = Group.objects.filter(study_form=Group.FULL_TIME) groups_count = groups.count() if groups_count == 0: raise GroupListIsEmpty() weeks_per_group = config.WEEKS_IN_SEMESTER maximum = groups_count * weeks_per_group for group_num, group in enumerate(groups): if self.type == ScheduleType.TEACH: for week in self.__parse_teach(group): Job.update_progress( total_parts_complete=group_num * weeks_per_group + week, total_parts=maximum, ) elif self.type == ScheduleType.SESSION: self.__parse_session(group) Job.update_progress(total_parts_complete=group_num, total_parts=groups_count)
def job_counter(total: int): index = 0 while True: index += 1 Job.update_progress(total_parts_complete=index, total_parts=total) yield
def handle(self, *args, **options): seconds = int(options['seconds']) for i in range(seconds): Job.update_progress(total_parts=seconds, total_parts_complete=i) print('%i of %i' % (i, seconds)) time.sleep(1)
def handle(self, *args, **options): seconds = int(options['seconds']) for i in range(seconds): Job.update_progress(total_parts=seconds, total_parts_complete=i) print('%i of %i' % (i, seconds)) time.sleep(1)
def handle(self, *args, **options): dryrun = options['dryrun'] q = Post.objects.all_articleless() # Keep retrying until we get a legitimate error code # explaining the failure. q = q.filter(article_content_error_code__isnull=True) #q = q.filter(article_content_success__isnull=True) #TODO:retry article_content_success=False but with error_code__isnull=False? year = options['year'] month = options['month'] if year: q = q.filter(date_published__year=year) if month: q = q.filter(date_published__month=month) #q = q.only('id', ) q = q.order_by('-date_published') total = q.count() i = 0 success_count = 0 # successfully retrieved non-empty content error_count = 0 # any type of exception was thrown meh_count = 0 # no errors were thrown, even if we didn't get content print('%i posts without an article.' % (total, )) if dryrun: return for post in q.iterator(): i += 1 sys.stdout.write('\rProcessing post %i (%i of %i, %i success, %i errors, %i mehs)...' \ % (post.id, i, total, success_count, error_count, meh_count)) sys.stdout.flush() if Job: Job.update_progress(total_parts=total, total_parts_complete=i) try: post.retrieve_article_content(force=options['force']) success_count += bool(len((post.article_content or '').strip())) meh_count += not bool(len( (post.article_content or '').strip())) except HTTPError as e: error_count += 1 print(file=sys.stderr) print('Error: Unable to retrieve %s: %s' % (post.link, e), file=sys.stderr) post.article_content_error_code = e.code post.article_content_error_reason = e.reason post.article_content_success = False post.save() except Exception as e: post.article_content_success = False ferr = StringIO() traceback.print_exc(file=ferr) post.article_content = None post.article_content_error = ferr.getvalue() post.save() error_count += 1 print(sys.stderr) print('Error: Unable to retrieve %s: %s' % (post.link, e), file=sys.stderr) print print('-' * 80) print('%i successes' % success_count) print('%i errors' % error_count)