Ejemplo n.º 1
0
def process_document(a_file):
    """ Process a file with Google Drive
        populates and saves the File.html, File.text
        or if a pdf, File.is_pdf and File.embed_url
        for either, it saves a File.gdrive_url

        :a_file: A `models.File` instance associated, document or pdf file
        :returns: True on success, else False
    """
    print "Processing document: %s -- %s" % (a_file.id, a_file.title)
    try:
        convert_with_google_drive(a_file)
    except Exception, e:
        print "\terror processing doc: %s -- %s" % (a_file.id, a_file.title)
        return False
Ejemplo n.º 2
0
    def handle(self, *args, **options):
        """ Loop over local model.File objects, regenerate .html and .text """
        counter = Counter()
        full_timer = Timer()
        with full_timer:
            for fp in File.objects.filter(file__isnull=False).all():
                inner_time = Timer()
                with inner_time:
                    # don't try to process files that are HTML only in the db
                    if not os.path.exists(fp.file.path):
                        counter['file paths not found'] += 1
                        continue

                    # Kick off celery task to process document
                    #tasks.process_document.delay(fp)

                    try:
                        # Process the document directly
                        convert_with_google_drive(fp)
                    except HttpError:
                        counter['files errored'] += 1
                    counter['files processed'] += 1

                counter[inner_time.duration_in_seconds()] += 1

        self.stdout.write('\n\n\n')
        self.stdout.write('#' * 40)
        self.stdout.write('\n')
        self.stdout.write('Processing complete')
        self.stdout.write('\n')
        self.stdout.write('Time to completion in seconds:')
        self.stdout.write('\n')
        self.stdout.write('\t %s' % full_timer.duration_in_seconds())


        for string, count in counter.items():
            self.stdout.write('\n')
            self.stdout.write('\t%s:' % string)
            self.stdout.write('\t\t%s' % count)
            self.stdout.write('\n')

        self.stdout.write('\n')
        self.stdout.write('Mean processing time:')
        self.stdout.write('\n')
        try:
            self.stdout.write('\t%s' % counter.most_common(3)[2])
        except:
            self.stdout.write('\t%s' % counter.most_common()[:5])