Python split_scan Examples

Programming Language: Python

Namespace/Package Name: scanning.tasks

Method/Function: split_scan

Examples at hotexamples.com: 5

Python split_scan - 5 examples found. These are the top rated real world Python examples of scanning.tasks.split_scan extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def load_test_data():
    data_file = os.path.join(settings.MEDIA_ROOT, "test", "test_data.yaml")
    uploader = User.objects.get(username='******')
    commenter = User.objects.create(username="******")
    with open(data_file) as fh:
        data = yaml.safe_load(fh)

    orgs = {}

    print "Setting site..."
    site = Site.objects.get_current()
    site.domain = data['site']['domain']
    site.name = data['site']['name']
    site.save()

    print "Adding admins..."
    for admin_data in data['admins']:
        user, created = User.objects.get_or_create(
            username=admin_data['username'],
            is_superuser=True,
            is_staff=True,
        )
        user.set_password(admin_data['password'])
        user.save()

    print "Adding orgs..."
    for org_data in data['orgs']:
        org, created = Organization.objects.get_or_create(
            name=org_data['name'],
            personal_contact=org_data['personal_contact'],
            slug=slugify(org_data['name']),
            public=org_data['public'],
            mailing_address=org_data['mailing_address'],
            about=org_data.get('about', ''),
            footer=org_data.get('footer', ''),
        )
        orgs[org_data['name']] = org
        for mod_data in org_data['moderators']:
            u, created = User.objects.get_or_create(
                username=mod_data['username'])
            u.set_password(mod_data['password'])
            u.save()
            org.moderators.add(u)
            Group.objects.get(name='moderators').user_set.add(u)
    for org_data in data['orgs']:
        mail_handled_by = org_data.get('outgoing_mail_handled_by', None)
        if mail_handled_by:
            org = Organization.objects.get(name=org_data['name'])
            mailer = Organization.objects.get(name=mail_handled_by)
            org.outgoing_mail_handled_by = mailer
            org.save()

    print "Building pdfs and users..."
    for user_data in data['users']:
        user, created = User.objects.get_or_create(
            username=slugify(user_data['name']))
        if user_data.get('managed', False):
            random_mailing_address = "\n".join([
                # Prisoner number
                "#%s" %
                "".join(random.choice(string.digits) for a in range(8)),
                # Street
                "%s Cherry Tree Lane" %
                "".join(random.choice(string.digits) for a in range(3)),
                # City, state, zip
                "City Name, %s  %s" % (
                    random.choice(US_STATES)[0],
                    "".join(random.choice(string.digits) for a in range(5)),
                )
            ])
        else:
            random_mailing_address = ""

        user.profile.display_name = user_data['name']
        user.profile.mailing_address = random_mailing_address
        user.profile.blogger = user_data.get('blogger', False)
        user.profile.managed = user_data.get('managed', False)
        user.profile.consent_form_received = user_data.get(
            'consent_form_received', False)
        user.profile.blog_name = user_data.get('blog_name', None) or ''
        user.profile.save()

        for org_name in user_data['orgs']:
            orgs[org_name].members.add(user)

        for corresp in user_data['correspondence']:
            direction, content = corresp.items()[0]
            if direction == "received":
                # Build Scan
                pdf = build_pdf(content['parts'], user.profile)
                path = tasks.move_scan_file(filename=pdf)
                scan = Scan.objects.create(uploader=uploader,
                                           org=orgs[org_name],
                                           author=user,
                                           pdf=os.path.relpath(
                                               path, settings.MEDIA_ROOT),
                                           under_construction=True,
                                           processing_complete=True,
                                           created=content['date'])
                # execute synchronously
                tasks.split_scan(scan_id=scan.pk)
                # Build Documents
                page_count = 1  # ignore envelope
                for part in content['parts']:
                    page_count += part["pages"]
                    if part["type"] == "ignore":
                        continue
                    document = Document.objects.create(
                        scan=scan,
                        editor=uploader,
                        author=user,
                        type=part["type"],
                        date_written=content["date"],
                        created=content["date"],
                        title=part.get("title", None) or "",
                    )
                    for i, page_index in enumerate(
                            range(page_count - part["pages"], page_count)):
                        scanpage = scan.scanpage_set.get(order=page_index)
                        DocumentPage.objects.create(document=document,
                                                    scan_page=scanpage,
                                                    order=i)
                    # execute synchronously
                    if part["type"] in ("profile", "post"):
                        document.status = "published"
                    else:
                        document.status = "unpublishable"
                    document.highlight_transform = '{"document_page_id": %s, "crop": [44.5, 58.66667175292969, 582.5, 288.6666717529297]}' % document.documentpage_set.all(
                    )[0].pk
                    document.save()
                    tasks.update_document_images(document.pk)
                    for comment in part.get('comments', []):
                        Comment.objects.create(
                            user=commenter,
                            comment=
                            "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis.",
                            document=document,
                            created=comment['date'],
                        )
        # Finish received scans before parsing letters, to ensure comments/etc
        # are there yet.
        for corresp in user_data['correspondence']:
            direction, content = corresp.items()[0]
            if direction == "sent":
                letter = Letter(
                    type=content['type'],
                    auto_generated=True,
                    sender=uploader,
                    created=content['date'],
                    sent=content['date'],
                    recipient=user,
                    org=Organization.objects.get(name=user_data['orgs'][0]))
                if content['type'] == "comments":
                    letter.save()
                    comments = Comment.objects.unmailed().filter(
                        document__author=user, created__lt=content['date'])
                    for comment in comments:
                        letter.comments.add(comment)
                elif content['type'] == "letter":
                    letter.body = content['body']
                letter.save()

Example #2

Show file

File: fetch_mailboxforwarding_mail.py Project: flexpeace/btb

    def handle(self, *args, **kwargs):
        base_url = "https://www.mailboxforwarding.com/"
        
        if (not hasattr(settings, "MAILBOX_FORWARDING") or 
                not "username" in settings.MAILBOX_FORWARDING or
                not "password" in settings.MAILBOX_FORWARDING):
            print "Requires MAILBOX_FORWARDING settings, e.g.:"
            print 'MAILBOX_FORWARDING = {'
            print '  "username": "******",'
            print '  "password": "******",'
            print '}'
            print "exit 1"
            sys.exit(1)

        sess = requests.Session()
        res = sess.post(base_url + "manage/login.php", {
            "action": "login",
            "email": settings.MAILBOX_FORWARDING["username"],
            "password": settings.MAILBOX_FORWARDING["password"],
            "loginsubmit.x": "0",
            "loginsubmit.y": "0"
        })
        # This is a dslightly dirty hack -- we're matching a javascript data
        # structure with a regex, converting the quotes to doubles so it resembles
        # JSON, and then loading it as JSON.  This may prove brittle.
        match = re.search(r"Ext\.grid\.dummyData = (\[.*\]\]);", res.text, re.DOTALL)
        text = match.group(1)
        text = text.replace('"', '\\"')
        text = text.replace("'", '"')
        data = json.loads(text)

        scans = {}
        packages = {}
        for a,b,date,c,kind,status,dl in data:
            match = re.search("pdfview.php\?id=(\d+)", dl)
            if match:
                id_ = match.group(1)
            else:
                id_ = None

            obj = {"date": date, "kind": kind, "status": status}

            if status == "Scanned":
                scans[id_] = obj
            else:
                packages[id_] = obj

        uploader = User.objects.get(username="******")
        org = Organization.objects.get(pk=1) #TODO: generalize this? 

        new_count = 0
        for id_,details in scans.iteritems():
            source_id = "mailboxforwarding.com-{}".format(id_)
            if Scan.objects.filter(source_id=source_id).exists():
                continue
            new_count += 1

            print "Downloading", source_id

            with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as fh:
                res = sess.get("{}manage/pdfview.php?id={}".format(base_url, id_))
                fh.write(res.content)
                name = fh.name

            path = tasks.move_scan_file(filename=name)
            scan = Scan.objects.create(
                uploader=uploader,
                pdf=os.path.relpath(path, settings.MEDIA_ROOT),
                under_construction=True,
                org=org,
                source_id=source_id
            )
            tasks.split_scan(scan=scan)

        if packages:
            print "Manual action needed on the following at " \
                  "https://www.mailboxforwarding.com/:"
            for id_,details in packages.iteritems():
                new_count += 1
                print details
        print "Examined {} letters, {} new.".format(len(data), new_count)

Example #3

Show file

File: import_test_data.py Project: arthurlutz/btb

def load_test_data():
    data_file = os.path.join(settings.MEDIA_ROOT, "test", "test_data.yaml")
    uploader = User.objects.get(username='******')
    commenter = User.objects.create(username="******")
    with open(data_file) as fh:
        data = yaml.safe_load(fh)

    orgs = {}

    print "Setting site..."
    site = Site.objects.get_current()
    site.domain = data['site']['domain']
    site.name = data['site']['name']
    site.save()

    print "Adding admins..."
    for admin_data in data['admins']:
        user, created = User.objects.get_or_create(
                username=admin_data['username'],
                is_superuser=True,
                is_staff=True,
        )
        user.set_password(admin_data['password'])
        user.save()

    print "Adding orgs..."
    for org_data in data['orgs']:
        org, created = Organization.objects.get_or_create(
                name=org_data['name'],
                personal_contact=org_data['personal_contact'],
                slug=slugify(org_data['name']),
                public=org_data['public'],
                mailing_address=org_data['mailing_address'],
                about=org_data.get('about', ''),
                footer=org_data.get('footer', ''),
        )
        orgs[org_data['name']] = org
        for mod_data in org_data['moderators']:
            u, created = User.objects.get_or_create(
                    username=mod_data['username']
            )
            u.set_password(mod_data['password'])
            u.save()
            org.moderators.add(u)
            Group.objects.get(name='moderators').user_set.add(u)
    for org_data in data['orgs']:
        mail_handled_by = org_data.get('outgoing_mail_handled_by', None)
        if mail_handled_by:
            org = Organization.objects.get(name=org_data['name'])
            mailer = Organization.objects.get(name=mail_handled_by)
            org.outgoing_mail_handled_by = mailer
            org.save()

    print "Building pdfs and users..."
    for user_data in data['users']:
        user, created = User.objects.get_or_create(
                username=slugify(user_data['name'])
        )
        if user_data.get('managed', False):
            random_mailing_address = "\n".join([
                # Prisoner number
                "#%s" % "".join(random.choice(string.digits) for a in range(8)),
                # Street
                "%s Cherry Tree Lane" % "".join(
                    random.choice(string.digits) for a in range(3)),
                # City, state, zip
                "City Name, %s  %s" % (
                    random.choice(US_STATES)[0],
                    "".join(random.choice(string.digits) for a in range(5)),
                )
            ])
        else:
            random_mailing_address = ""

        user.profile.display_name = user_data['name']
        user.profile.mailing_address = random_mailing_address
        user.profile.blogger = user_data.get('blogger', False)
        user.profile.managed = user_data.get('managed', False)
        user.profile.consent_form_received = user_data.get('consent_form_received', False)
        user.profile.blog_name = user_data.get('blog_name', None) or ''
        user.profile.save()

        for org_name in user_data['orgs']:
            orgs[org_name].members.add(user)

        for corresp in user_data['correspondence']:
            direction, content = corresp.items()[0]
            if direction == "received":
                # Build Scan
                pdf = build_pdf(content['parts'], user.profile) 
                path = tasks.move_scan_file(filename=pdf)
                scan = Scan.objects.create(
                        uploader=uploader,
                        org=orgs[org_name],
                        author=user,
                        pdf=os.path.relpath(path, settings.MEDIA_ROOT),
                        under_construction=True,
                        processing_complete=True,
                        created=content['date'])
                # execute synchronously
                tasks.split_scan(scan_id=scan.pk)
                # Build Documents
                page_count = 1 # ignore envelope
                for part in content['parts']:
                    page_count += part["pages"]
                    if part["type"] == "ignore":
                        continue
                    document = Document.objects.create(
                            scan=scan,
                            editor=uploader,
                            author=user,
                            type=part["type"],
                            date_written=content["date"],
                            created=content["date"],
                            title=part.get("title", None) or "",
                    )
                    for i, page_index in enumerate(
                            range(page_count - part["pages"], page_count)):
                        scanpage = scan.scanpage_set.get(order=page_index)
                        DocumentPage.objects.create(
                                document=document,
                                scan_page=scanpage,
                                order=i)
                    # execute synchronously
                    if part["type"] in ("profile", "post"):
                        document.status = "published"
                    else:
                        document.status = "unpublishable"
                    document.highlight_transform = '{"document_page_id": %s, "crop": [44.5, 58.66667175292969, 582.5, 288.6666717529297]}' % document.documentpage_set.all()[0].pk
                    document.save()
                    tasks.update_document_images(document.pk)
                    for comment in part.get('comments', []):
                        Comment.objects.create(
                                user=commenter,
                                comment="Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis.",
                                document=document,
                                created=comment['date'],
                        )
        # Finish received scans before parsing letters, to ensure comments/etc
        # are there yet.
        for corresp in user_data['correspondence']:
            direction, content = corresp.items()[0]
            if direction == "sent":
                letter = Letter(type=content['type'], 
                        auto_generated=True, 
                        sender=uploader,
                        created=content['date'],
                        sent=content['date'],
                        recipient=user,
                        org=Organization.objects.get(name=user_data['orgs'][0]))
                if content['type'] == "comments":
                    letter.save()
                    comments = Comment.objects.unmailed().filter(
                            document__author=user,
                            created__lt=content['date']
                    )
                    for comment in comments:
                        letter.comments.add(comment)
                elif content['type'] == "letter":
                    letter.body = content['body']
                letter.save()

Example #4

Show file

File: fetch_mailboxforwarding_mail.py Project: yourcelf/btb

    def handle(self, *args, **kwargs):
        base_url = "https://www.mailboxforwarding.com/"

        if (not hasattr(settings, "MAILBOX_FORWARDING")
                or not "username" in settings.MAILBOX_FORWARDING
                or not "password" in settings.MAILBOX_FORWARDING):
            print "Requires MAILBOX_FORWARDING settings, e.g.:"
            print 'MAILBOX_FORWARDING = {'
            print '  "username": "******",'
            print '  "password": "******",'
            print '}'
            print "exit 1"
            sys.exit(1)

        sess = requests.Session()
        res = sess.post(
            base_url + "manage/login.php", {
                "action": "login",
                "email": settings.MAILBOX_FORWARDING["username"],
                "password": settings.MAILBOX_FORWARDING["password"],
                "loginsubmit.x": "0",
                "loginsubmit.y": "0"
            })
        # This is a slightly dirty hack -- we're matching a javascript data
        # structure with a regex, converting the quotes to doubles so it resembles
        # JSON, and then loading it as JSON.  This may prove brittle.
        match = re.search(r"Ext\.grid\.dummyData = (\[.*\]\]);", res.text,
                          re.DOTALL)
        if not match:
            raise Exception("Can't find data. Are login creds correct?")
        text = match.group(1)
        text = text.replace('"', '\\"')
        text = text.replace("'", '"')
        data = json.loads(text)

        scans = {}
        packages = {}
        for checkbox, date, envelope, type_status, dl in data:
            details = {}
            match = re.search("Status: <b>([^<]+)</b>.*Type: <b>([^<]+)</b>",
                              type_status)
            if not match:
                raise Exception("Can't match type/status")
            details['kind'] = match.group(2)
            details['status'] = match.group(1)

            if details['kind'] == "Letter" and details['status'] != "Scanned":
                continue

            match = re.search("pdfview.php\?id=(\d+)", dl)
            if match:
                id_ = match.group(1)
            else:
                # TODO: Handle packages correctly
                continue
                #raise Exception("Can't find ID")

            match = re.search("src=\"([^\"]+)\"", envelope)
            if not match:
                raise Exception("Can't match envelope image")
            details['envelope'] = match.group(1)

            if details['status'] == "Scanned":
                scans[id_] = details
            elif details['kind'] != "Letter":
                packages[id_] = details

        uploader = User.objects.get(username="******")
        org = Organization.objects.get(pk=1)  #TODO: generalize this?

        new_count = 0
        for id_, details in scans.iteritems():
            source_id = "mailboxforwarding.com-{}".format(id_)
            if Scan.objects.filter(source_id=source_id).exists():
                continue
            new_count += 1

            print "Downloading pdf", source_id
            res = sess.get("{}manage/pdfview.php?id={}".format(base_url, id_))
            in_pdf_fh = StringIO()
            in_pdf_fh.write(res.content)
            in_pdf_fh.seek(0)
            reader = PdfFileReader(in_pdf_fh)

            print "Downloading envelope", details['envelope']
            res = sess.get(details['envelope'])
            in_envelope_fh = StringIO()
            in_envelope_fh.write(res.content)
            in_envelope_fh.seek(0)
            img = Image.open(in_envelope_fh)
            out_envelope_fh = StringIO()
            img.save(out_envelope_fh, "pdf")
            envelope_reader = PdfFileReader(out_envelope_fh)

            writer = PdfFileWriter()
            writer.addPage(envelope_reader.getPage(0))
            for page in range(reader.getNumPages()):
                writer.addPage(reader.getPage(page))

            with tempfile.NamedTemporaryFile(suffix=".pdf",
                                             delete=False) as fh:
                writer.write(fh)
                dest_pdf_name = fh.name

            in_envelope_fh.close()
            out_envelope_fh.close()
            in_pdf_fh.close()

            path = tasks.move_scan_file(filename=dest_pdf_name)
            scan = Scan.objects.create(uploader=uploader,
                                       pdf=os.path.relpath(
                                           path, settings.MEDIA_ROOT),
                                       under_construction=True,
                                       org=org,
                                       source_id=source_id)
            tasks.split_scan(scan=scan)

        if packages:
            print "Manual action needed on the following at " \
                  "https://www.mailboxforwarding.com/:"
            for id_, details in packages.iteritems():
                new_count += 1
                print details
        print "Examined {} letters, {} new.".format(len(data), new_count)

Example #5

Show file

File: fetch_mailboxforwarding_mail.py Project: yourcelf/btb

    def handle(self, *args, **kwargs):
        base_url = "https://www.mailboxforwarding.com/"
        
        if (not hasattr(settings, "MAILBOX_FORWARDING") or 
                not "username" in settings.MAILBOX_FORWARDING or
                not "password" in settings.MAILBOX_FORWARDING):
            print "Requires MAILBOX_FORWARDING settings, e.g.:"
            print 'MAILBOX_FORWARDING = {'
            print '  "username": "******",'
            print '  "password": "******",'
            print '}'
            print "exit 1"
            sys.exit(1)

        sess = requests.Session()
        res = sess.post(base_url + "manage/login.php", {
            "action": "login",
            "email": settings.MAILBOX_FORWARDING["username"],
            "password": settings.MAILBOX_FORWARDING["password"],
            "loginsubmit.x": "0",
            "loginsubmit.y": "0"
        })
        # This is a slightly dirty hack -- we're matching a javascript data
        # structure with a regex, converting the quotes to doubles so it resembles
        # JSON, and then loading it as JSON.  This may prove brittle.
        match = re.search(r"Ext\.grid\.dummyData = (\[.*\]\]);", res.text, re.DOTALL)
        if not match:
            raise Exception("Can't find data. Are login creds correct?")
        text = match.group(1)
        text = text.replace('"', '\\"')
        text = text.replace("'", '"')
        data = json.loads(text)

        scans = {}
        packages = {}
        for checkbox, date, envelope, type_status, dl in data:
            details = {}
            match = re.search("Type: <b>([^<]+)</b>.*Status: <b>([^<]+)</b>", type_status)
            if not match:
                raise Exception("Can't match type/status")
            details['kind'] = match.group(1)
            details['status'] = match.group(2)

            if details['kind'] == "Letter" and details['status'] != "Scanned":
                continue

            match = re.search("pdfview.php\?id=(\d+)", dl)
            if match:
                id_ = match.group(1)
            else:
                # TODO: Handle packages correctly
                continue
                #raise Exception("Can't find ID")

            match = re.search("src=\"([^\"]+)\"", envelope)
            if not match:
                raise Exception("Can't match envelope image")
            details['envelope'] = match.group(1)


            if details['status'] == "Scanned":
                scans[id_] = details
            elif details['kind'] != "Letter":
                packages[id_] = details

        uploader = User.objects.get(username="******")
        org = Organization.objects.get(pk=1) #TODO: generalize this? 

        new_count = 0
        for id_, details in scans.iteritems():
            source_id = "mailboxforwarding.com-{}".format(id_)
            if Scan.objects.filter(source_id=source_id).exists():
                continue
            new_count += 1

            print "Downloading pdf", source_id
            res = sess.get("{}manage/pdfview.php?id={}".format(base_url, id_))
            in_pdf_fh = StringIO()
            in_pdf_fh.write(res.content)
            in_pdf_fh.seek(0)
            reader = PdfFileReader(in_pdf_fh)

            print "Downloading envelope", details['envelope']
            res = sess.get(details['envelope'])
            in_envelope_fh = StringIO()
            in_envelope_fh.write(res.content)
            in_envelope_fh.seek(0)
            img = Image.open(in_envelope_fh)
            out_envelope_fh = StringIO()
            img.save(out_envelope_fh, "pdf")
            envelope_reader = PdfFileReader(out_envelope_fh)

            writer = PdfFileWriter()
            writer.addPage(envelope_reader.getPage(0))
            for page in range(reader.getNumPages()):
                writer.addPage(reader.getPage(page))

            with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as fh:
                writer.write(fh)
                dest_pdf_name = fh.name

            in_envelope_fh.close()
            out_envelope_fh.close()
            in_pdf_fh.close()

            path = tasks.move_scan_file(filename=dest_pdf_name)
            scan = Scan.objects.create(
                uploader=uploader,
                pdf=os.path.relpath(path, settings.MEDIA_ROOT),
                under_construction=True,
                org=org,
                source_id=source_id
            )
            tasks.split_scan(scan=scan)

        if packages:
            print "Manual action needed on the following at " \
                  "https://www.mailboxforwarding.com/:"
            for id_, details in packages.iteritems():
                new_count += 1
                print details
        print "Examined {} letters, {} new.".format(len(data), new_count)