def process_document(a_file): """ Process a file with Google Drive populates and saves the File.html, File.text or if a pdf, File.is_pdf and File.embed_url for either, it saves a File.gdrive_url :a_file: A `models.File` instance associated, document or pdf file :returns: True on success, else False """ print "Processing document: %s -- %s" % (a_file.id, a_file.title) try: convert_with_google_drive(a_file) except Exception, e: print "\terror processing doc: %s -- %s" % (a_file.id, a_file.title) return False
def handle(self, *args, **options): """ Loop over local model.File objects, regenerate .html and .text """ counter = Counter() full_timer = Timer() with full_timer: for fp in File.objects.filter(file__isnull=False).all(): inner_time = Timer() with inner_time: # don't try to process files that are HTML only in the db if not os.path.exists(fp.file.path): counter['file paths not found'] += 1 continue # Kick off celery task to process document #tasks.process_document.delay(fp) try: # Process the document directly convert_with_google_drive(fp) except HttpError: counter['files errored'] += 1 counter['files processed'] += 1 counter[inner_time.duration_in_seconds()] += 1 self.stdout.write('\n\n\n') self.stdout.write('#' * 40) self.stdout.write('\n') self.stdout.write('Processing complete') self.stdout.write('\n') self.stdout.write('Time to completion in seconds:') self.stdout.write('\n') self.stdout.write('\t %s' % full_timer.duration_in_seconds()) for string, count in counter.items(): self.stdout.write('\n') self.stdout.write('\t%s:' % string) self.stdout.write('\t\t%s' % count) self.stdout.write('\n') self.stdout.write('\n') self.stdout.write('Mean processing time:') self.stdout.write('\n') try: self.stdout.write('\t%s' % counter.most_common(3)[2]) except: self.stdout.write('\t%s' % counter.most_common()[:5])