Ejemplo n.º 1
0
Archivo: views.py Proyecto: Shidash/btb
def scan_add(request):
    """Displays a form for uploading a scan."""
    FormClass = get_org_upload_form(request.user)
    form = FormClass(request.POST or None, request.FILES or None, types={
        "pdf": "application/pdf",
        "zip": "application/zip",
    })
    if form.is_valid():
        if request.FILES['file'].name.lower().endswith(".zip"):
            with tempfile.NamedTemporaryFile(delete=False, suffix="scans.zip") as fh:
                for chunk in request.FILES['file'].chunks():
                    fh.write(chunk)
                task_id = tasks.process_zip.delay(filename=fh.name, 
                        uploader_id=request.user.pk,
                        org_id=form.cleaned_data['organization'].pk,
                        redirect=reverse("moderation.home")
                )
        else:
            path = tasks.move_scan_file(uploaded_file=request.FILES['file'])
            scan = Scan.objects.create(
                uploader=request.user,
                pdf=os.path.relpath(path, settings.MEDIA_ROOT),
                under_construction=True,
                org=form.cleaned_data['organization'])
            task_id = tasks.split_scan.delay(scan_id=scan.pk, 
                    redirect=reverse("moderation.edit_scan", args=[scan.pk]))
        return redirect('moderation.wait_for_processing', task_id)
    return render(request, "scanning/upload.html", {'form': form})
Ejemplo n.º 2
0
Archivo: views.py Proyecto: catstar/btb
def scan_replace(request, scan_id=None):
    try:
        scan = Scan.objects.org_filter(request.user, pk=scan_id).get()
    except Scan.DoesNotExist:
        raise PermissionDenied
    form = ScanUploadForm(request.POST or None, request.FILES or None, types={"pdf": "application/pdf"})
    if form.is_valid():
        filepath = tasks.move_scan_file(uploaded_file=request.FILES["file"])
        scan.full_delete(filesonly=True)
        scan.uploader = request.user
        scan.pdf = os.path.relpath(filepath, settings.MEDIA_ROOT)
        scan.save()

        task_id = tasks.split_scan.delay(scan_id=scan.pk, redirect=reverse("moderation.edit_scan", args=[scan.pk]))
        return redirect("moderation.wait_for_processing", task_id)
    return render(request, "scanning/replace.html", {"form": form})
Ejemplo n.º 3
0
Archivo: views.py Proyecto: Shidash/btb
def scan_replace(request, scan_id=None):
    try:
        scan = Scan.objects.org_filter(request.user, pk=scan_id).get()
    except Scan.DoesNotExist:
        raise PermissionDenied
    form = ScanUploadForm(request.POST or None, request.FILES or None, types={
        "pdf": "application/pdf",
    })
    if form.is_valid():
        filepath = tasks.move_scan_file(uploaded_file=request.FILES['file'])
        scan.full_delete(filesonly=True)
        scan.uploader = request.user
        scan.pdf = os.path.relpath(filepath, settings.MEDIA_ROOT)
        scan.save()

        task_id = tasks.split_scan.delay(
            scan_id=scan.pk, 
            redirect=reverse("moderation.edit_scan", args=[scan.pk])
        )
        return redirect('moderation.wait_for_processing', task_id)
    return render(request, "scanning/replace.html", {'form': form})
Ejemplo n.º 4
0
def new_scan(filename=None, uploaded_file=None, uploader_id=None, scan_id=None):
    """
    Given an absolute filename or a Django UploadedFile instance, write the
    file to MEDIA_ROOT, then hand off processing to the asynchronous task to
    create scans.
    """
    if scan_id:
        scan = Scan.objects.get(pk=scan_id)
        scan.full_delete(filesonly=True)
    if not (filename or uploaded_file):
        raise Exception("Requires one of filename or uploaded_file")
    try:
        uploader = User.objects.get(pk=uploader_id)
    except User.DoesNotExist:
        raise Exception("Requires valid uploader_id.")

    dest = tasks.move_scan_file(uploaded_file, filename)
    after_processing = reverse("moderation.home") + "#/process"
    if scan_id:
        after_processing += "/scan/%s" % scan_id

    task_kwargs = {'redirect': after_processing}
    if scan_id:
        scan.pdf = dest
        scan.uploader = uploader
        scan.save()
        task_kwargs['scan_id'] = scan.id
    else:
        task_kwargs['filename'] = dest
        task_kwargs['uploader_id'] = uploader_id

    task_id = tasks.process_scan.delay(**task_kwargs).task_id

    # Create a TaskMeta for us to look at while processing happens.
    TaskMeta.objects.create(task_id=task_id, status="PENDING", 
            result=str(task_kwargs),
            date_done=datetime.datetime.now())

    return task_id
Ejemplo n.º 5
0
def load_test_data():
    data_file = os.path.join(settings.MEDIA_ROOT, "test", "test_data.yaml")
    uploader = User.objects.get(username='******')
    commenter = User.objects.create(username="******")
    with open(data_file) as fh:
        data = yaml.safe_load(fh)

    orgs = {}

    print "Setting site..."
    site = Site.objects.get_current()
    site.domain = data['site']['domain']
    site.name = data['site']['name']
    site.save()

    print "Adding admins..."
    for admin_data in data['admins']:
        user, created = User.objects.get_or_create(
            username=admin_data['username'],
            is_superuser=True,
            is_staff=True,
        )
        user.set_password(admin_data['password'])
        user.save()

    print "Adding orgs..."
    for org_data in data['orgs']:
        org, created = Organization.objects.get_or_create(
            name=org_data['name'],
            personal_contact=org_data['personal_contact'],
            slug=slugify(org_data['name']),
            public=org_data['public'],
            mailing_address=org_data['mailing_address'],
            about=org_data.get('about', ''),
            footer=org_data.get('footer', ''),
        )
        orgs[org_data['name']] = org
        for mod_data in org_data['moderators']:
            u, created = User.objects.get_or_create(
                username=mod_data['username'])
            u.set_password(mod_data['password'])
            u.save()
            org.moderators.add(u)
            Group.objects.get(name='moderators').user_set.add(u)
    for org_data in data['orgs']:
        mail_handled_by = org_data.get('outgoing_mail_handled_by', None)
        if mail_handled_by:
            org = Organization.objects.get(name=org_data['name'])
            mailer = Organization.objects.get(name=mail_handled_by)
            org.outgoing_mail_handled_by = mailer
            org.save()

    print "Building pdfs and users..."
    for user_data in data['users']:
        user, created = User.objects.get_or_create(
            username=slugify(user_data['name']))
        if user_data.get('managed', False):
            random_mailing_address = "\n".join([
                # Prisoner number
                "#%s" %
                "".join(random.choice(string.digits) for a in range(8)),
                # Street
                "%s Cherry Tree Lane" %
                "".join(random.choice(string.digits) for a in range(3)),
                # City, state, zip
                "City Name, %s  %s" % (
                    random.choice(US_STATES)[0],
                    "".join(random.choice(string.digits) for a in range(5)),
                )
            ])
        else:
            random_mailing_address = ""

        user.profile.display_name = user_data['name']
        user.profile.mailing_address = random_mailing_address
        user.profile.blogger = user_data.get('blogger', False)
        user.profile.managed = user_data.get('managed', False)
        user.profile.consent_form_received = user_data.get(
            'consent_form_received', False)
        user.profile.blog_name = user_data.get('blog_name', None) or ''
        user.profile.save()

        for org_name in user_data['orgs']:
            orgs[org_name].members.add(user)

        for corresp in user_data['correspondence']:
            direction, content = corresp.items()[0]
            if direction == "received":
                # Build Scan
                pdf = build_pdf(content['parts'], user.profile)
                path = tasks.move_scan_file(filename=pdf)
                scan = Scan.objects.create(uploader=uploader,
                                           org=orgs[org_name],
                                           author=user,
                                           pdf=os.path.relpath(
                                               path, settings.MEDIA_ROOT),
                                           under_construction=True,
                                           processing_complete=True,
                                           created=content['date'])
                # execute synchronously
                tasks.split_scan(scan_id=scan.pk)
                # Build Documents
                page_count = 1  # ignore envelope
                for part in content['parts']:
                    page_count += part["pages"]
                    if part["type"] == "ignore":
                        continue
                    document = Document.objects.create(
                        scan=scan,
                        editor=uploader,
                        author=user,
                        type=part["type"],
                        date_written=content["date"],
                        created=content["date"],
                        title=part.get("title", None) or "",
                    )
                    for i, page_index in enumerate(
                            range(page_count - part["pages"], page_count)):
                        scanpage = scan.scanpage_set.get(order=page_index)
                        DocumentPage.objects.create(document=document,
                                                    scan_page=scanpage,
                                                    order=i)
                    # execute synchronously
                    if part["type"] in ("profile", "post"):
                        document.status = "published"
                    else:
                        document.status = "unpublishable"
                    document.highlight_transform = '{"document_page_id": %s, "crop": [44.5, 58.66667175292969, 582.5, 288.6666717529297]}' % document.documentpage_set.all(
                    )[0].pk
                    document.save()
                    tasks.update_document_images(document.pk)
                    for comment in part.get('comments', []):
                        Comment.objects.create(
                            user=commenter,
                            comment=
                            "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis.",
                            document=document,
                            created=comment['date'],
                        )
        # Finish received scans before parsing letters, to ensure comments/etc
        # are there yet.
        for corresp in user_data['correspondence']:
            direction, content = corresp.items()[0]
            if direction == "sent":
                letter = Letter(
                    type=content['type'],
                    auto_generated=True,
                    sender=uploader,
                    created=content['date'],
                    sent=content['date'],
                    recipient=user,
                    org=Organization.objects.get(name=user_data['orgs'][0]))
                if content['type'] == "comments":
                    letter.save()
                    comments = Comment.objects.unmailed().filter(
                        document__author=user, created__lt=content['date'])
                    for comment in comments:
                        letter.comments.add(comment)
                elif content['type'] == "letter":
                    letter.body = content['body']
                letter.save()
Ejemplo n.º 6
0
def edit_profile(request, user_id=None):
    edit_profile = can_edit_profile(request.user, user_id)
    edit_user = can_edit_user(request.user, user_id)
    if not edit_profile and not edit_user:
        raise PermissionDenied

    user = get_object_or_404(User, pk=user_id)
    try:
        document = Document.objects.filter(type="profile", status="published",
                                           author=user).order_by('-modified')[0]
    except IndexError:
        document = None

    # XXX Could probably simplify the permissions backflips by assuming that an
    # editor using this interface either has permissions to edit both
    # profile/user, or neither.
    
    user_form = None
    profile_form = None
    scan_upload_form = None
    ProfileForm = get_profile_form(request.user)
    if request.method == 'POST':
        if edit_profile:
            profile_form = ProfileForm(request.POST, instance=user.profile)
            scan_upload_form = ProfileUploadForm(request.POST, request.FILES)
        if edit_user:
            user_form = UserFormNoEmail(request.POST, instance=user)

        if (not profile_form or profile_form.is_valid()) and \
                (not user_form or user_form.is_valid()) and \
                (not scan_upload_form or scan_upload_form.is_valid()):

            if profile_form:
                profile_form.save()
            if user_form:
                user_form.save()
            if scan_upload_form and 'file' in request.FILES:
                pdf = move_scan_file(uploaded_file=request.FILES['file'])
                scan = Scan.objects.create(
                    uploader=user, 
                    author=user,
                    pdf=pdf
                )
                task_id = process_scan_to_profile.delay(
                    scan.pk, 
                    reverse('profiles.profile_show', args=[user_id]),
                )
                return redirect('moderation.wait_for_processing', task_id=task_id)
            messages.success(request, _("Changes saved."))
            return redirect('profiles.profile_show', user_id)
    else:
        if edit_profile:
            profile_form = ProfileForm(instance=user.profile)
            scan_upload_form = ProfileUploadForm()
        if edit_user:
            user_form = UserFormNoEmail(instance=user)

    return render(request, "profiles/profile_edit.html", {
            'document': document,
            'profile_form': profile_form,
            'user_form': user_form,
            'scan_upload_form': scan_upload_form,
            'profile': user.profile,
            'can_edit_profile': edit_profile,
            'can_edit_user': edit_user,
        })
Ejemplo n.º 7
0
    def handle(self, *args, **kwargs):
        base_url = "https://www.mailboxforwarding.com/"
        
        if (not hasattr(settings, "MAILBOX_FORWARDING") or 
                not "username" in settings.MAILBOX_FORWARDING or
                not "password" in settings.MAILBOX_FORWARDING):
            print "Requires MAILBOX_FORWARDING settings, e.g.:"
            print 'MAILBOX_FORWARDING = {'
            print '  "username": "******",'
            print '  "password": "******",'
            print '}'
            print "exit 1"
            sys.exit(1)

        sess = requests.Session()
        res = sess.post(base_url + "manage/login.php", {
            "action": "login",
            "email": settings.MAILBOX_FORWARDING["username"],
            "password": settings.MAILBOX_FORWARDING["password"],
            "loginsubmit.x": "0",
            "loginsubmit.y": "0"
        })
        # This is a dslightly dirty hack -- we're matching a javascript data
        # structure with a regex, converting the quotes to doubles so it resembles
        # JSON, and then loading it as JSON.  This may prove brittle.
        match = re.search(r"Ext\.grid\.dummyData = (\[.*\]\]);", res.text, re.DOTALL)
        text = match.group(1)
        text = text.replace('"', '\\"')
        text = text.replace("'", '"')
        data = json.loads(text)

        scans = {}
        packages = {}
        for a,b,date,c,kind,status,dl in data:
            match = re.search("pdfview.php\?id=(\d+)", dl)
            if match:
                id_ = match.group(1)
            else:
                id_ = None

            obj = {"date": date, "kind": kind, "status": status}

            if status == "Scanned":
                scans[id_] = obj
            else:
                packages[id_] = obj

        uploader = User.objects.get(username="******")
        org = Organization.objects.get(pk=1) #TODO: generalize this? 

        new_count = 0
        for id_,details in scans.iteritems():
            source_id = "mailboxforwarding.com-{}".format(id_)
            if Scan.objects.filter(source_id=source_id).exists():
                continue
            new_count += 1

            print "Downloading", source_id

            with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as fh:
                res = sess.get("{}manage/pdfview.php?id={}".format(base_url, id_))
                fh.write(res.content)
                name = fh.name

            path = tasks.move_scan_file(filename=name)
            scan = Scan.objects.create(
                uploader=uploader,
                pdf=os.path.relpath(path, settings.MEDIA_ROOT),
                under_construction=True,
                org=org,
                source_id=source_id
            )
            tasks.split_scan(scan=scan)

        if packages:
            print "Manual action needed on the following at " \
                  "https://www.mailboxforwarding.com/:"
            for id_,details in packages.iteritems():
                new_count += 1
                print details
        print "Examined {} letters, {} new.".format(len(data), new_count)
Ejemplo n.º 8
0
    def handle(self, *args, **kwargs):
        base_url = "https://www.mailboxforwarding.com/"

        if (not hasattr(settings, "MAILBOX_FORWARDING")
                or not "username" in settings.MAILBOX_FORWARDING
                or not "password" in settings.MAILBOX_FORWARDING):
            print "Requires MAILBOX_FORWARDING settings, e.g.:"
            print 'MAILBOX_FORWARDING = {'
            print '  "username": "******",'
            print '  "password": "******",'
            print '}'
            print "exit 1"
            sys.exit(1)

        sess = requests.Session()
        res = sess.post(
            base_url + "manage/login.php", {
                "action": "login",
                "email": settings.MAILBOX_FORWARDING["username"],
                "password": settings.MAILBOX_FORWARDING["password"],
                "loginsubmit.x": "0",
                "loginsubmit.y": "0"
            })
        # This is a slightly dirty hack -- we're matching a javascript data
        # structure with a regex, converting the quotes to doubles so it resembles
        # JSON, and then loading it as JSON.  This may prove brittle.
        match = re.search(r"Ext\.grid\.dummyData = (\[.*\]\]);", res.text,
                          re.DOTALL)
        if not match:
            raise Exception("Can't find data. Are login creds correct?")
        text = match.group(1)
        text = text.replace('"', '\\"')
        text = text.replace("'", '"')
        data = json.loads(text)

        scans = {}
        packages = {}
        for checkbox, date, envelope, type_status, dl in data:
            details = {}
            match = re.search("Status: <b>([^<]+)</b>.*Type: <b>([^<]+)</b>",
                              type_status)
            if not match:
                raise Exception("Can't match type/status")
            details['kind'] = match.group(2)
            details['status'] = match.group(1)

            if details['kind'] == "Letter" and details['status'] != "Scanned":
                continue

            match = re.search("pdfview.php\?id=(\d+)", dl)
            if match:
                id_ = match.group(1)
            else:
                # TODO: Handle packages correctly
                continue
                #raise Exception("Can't find ID")

            match = re.search("src=\"([^\"]+)\"", envelope)
            if not match:
                raise Exception("Can't match envelope image")
            details['envelope'] = match.group(1)

            if details['status'] == "Scanned":
                scans[id_] = details
            elif details['kind'] != "Letter":
                packages[id_] = details

        uploader = User.objects.get(username="******")
        org = Organization.objects.get(pk=1)  #TODO: generalize this?

        new_count = 0
        for id_, details in scans.iteritems():
            source_id = "mailboxforwarding.com-{}".format(id_)
            if Scan.objects.filter(source_id=source_id).exists():
                continue
            new_count += 1

            print "Downloading pdf", source_id
            res = sess.get("{}manage/pdfview.php?id={}".format(base_url, id_))
            in_pdf_fh = StringIO()
            in_pdf_fh.write(res.content)
            in_pdf_fh.seek(0)
            reader = PdfFileReader(in_pdf_fh)

            print "Downloading envelope", details['envelope']
            res = sess.get(details['envelope'])
            in_envelope_fh = StringIO()
            in_envelope_fh.write(res.content)
            in_envelope_fh.seek(0)
            img = Image.open(in_envelope_fh)
            out_envelope_fh = StringIO()
            img.save(out_envelope_fh, "pdf")
            envelope_reader = PdfFileReader(out_envelope_fh)

            writer = PdfFileWriter()
            writer.addPage(envelope_reader.getPage(0))
            for page in range(reader.getNumPages()):
                writer.addPage(reader.getPage(page))

            with tempfile.NamedTemporaryFile(suffix=".pdf",
                                             delete=False) as fh:
                writer.write(fh)
                dest_pdf_name = fh.name

            in_envelope_fh.close()
            out_envelope_fh.close()
            in_pdf_fh.close()

            path = tasks.move_scan_file(filename=dest_pdf_name)
            scan = Scan.objects.create(uploader=uploader,
                                       pdf=os.path.relpath(
                                           path, settings.MEDIA_ROOT),
                                       under_construction=True,
                                       org=org,
                                       source_id=source_id)
            tasks.split_scan(scan=scan)

        if packages:
            print "Manual action needed on the following at " \
                  "https://www.mailboxforwarding.com/:"
            for id_, details in packages.iteritems():
                new_count += 1
                print details
        print "Examined {} letters, {} new.".format(len(data), new_count)
Ejemplo n.º 9
0
def edit_profile(request, user_id=None):
    #FIXME: org permission here
    edit_profile = can_edit_profile(request.user, user_id)
    edit_user = can_edit_user(request.user, user_id)
    if not edit_profile and not edit_user:
        raise PermissionDenied

    user = get_object_or_404(User, pk=user_id)
    try:
        document = Document.objects.filter(
            type="profile", status="published",
            author=user).order_by('-modified')[0]
    except IndexError:
        document = None

    # XXX Could probably simplify the permissions backflips by assuming that an
    # editor using this interface either has permissions to edit both
    # profile/user, or neither.

    user_form = None
    profile_form = None
    scan_upload_form = None
    ProfileForm = get_profile_form(request.user)
    if request.method == 'POST':
        if edit_profile:
            profile_form = ProfileForm(request.POST, instance=user.profile)
            scan_upload_form = ProfileUploadForm(request.POST, request.FILES)
        if edit_user:
            user_form = UserFormNoEmail(request.POST, instance=user)

        if (not profile_form or profile_form.is_valid()) and \
                (not user_form or user_form.is_valid()) and \
                (not scan_upload_form or scan_upload_form.is_valid()):

            if profile_form:
                profile_form.save()
            if user_form:
                user_form.save()
            if scan_upload_form and 'file' in request.FILES:
                pdf = move_scan_file(uploaded_file=request.FILES['file'])
                scan = Scan.objects.create(uploader=user, author=user, pdf=pdf)
                task_id = process_scan_to_profile.delay(
                    scan.pk,
                    reverse('profiles.profile_show', args=[user_id]),
                )
                return redirect('moderation.wait_for_processing',
                                task_id=task_id)
            messages.success(request, _("Changes saved."))
            return redirect('profiles.profile_show', user_id)
    else:
        if edit_profile:
            profile_form = ProfileForm(instance=user.profile)
            scan_upload_form = ProfileUploadForm()
        if edit_user:
            user_form = UserFormNoEmail(instance=user)

    return render(
        request, "profiles/profile_edit.html", {
            'document': document,
            'profile_form': profile_form,
            'user_form': user_form,
            'scan_upload_form': scan_upload_form,
            'profile': user.profile,
            'can_edit_profile': edit_profile,
            'can_edit_user': edit_user,
        })
Ejemplo n.º 10
0
def load_test_data():
    data_file = os.path.join(settings.MEDIA_ROOT, "test", "test_data.yaml")
    uploader = User.objects.get(username='******')
    commenter = User.objects.create(username="******")
    with open(data_file) as fh:
        data = yaml.safe_load(fh)

    orgs = {}

    print "Setting site..."
    site = Site.objects.get_current()
    site.domain = data['site']['domain']
    site.name = data['site']['name']
    site.save()

    print "Adding admins..."
    for admin_data in data['admins']:
        user, created = User.objects.get_or_create(
                username=admin_data['username'],
                is_superuser=True,
                is_staff=True,
        )
        user.set_password(admin_data['password'])
        user.save()

    print "Adding orgs..."
    for org_data in data['orgs']:
        org, created = Organization.objects.get_or_create(
                name=org_data['name'],
                personal_contact=org_data['personal_contact'],
                slug=slugify(org_data['name']),
                public=org_data['public'],
                mailing_address=org_data['mailing_address'],
                about=org_data.get('about', ''),
                footer=org_data.get('footer', ''),
        )
        orgs[org_data['name']] = org
        for mod_data in org_data['moderators']:
            u, created = User.objects.get_or_create(
                    username=mod_data['username']
            )
            u.set_password(mod_data['password'])
            u.save()
            org.moderators.add(u)
            Group.objects.get(name='moderators').user_set.add(u)
    for org_data in data['orgs']:
        mail_handled_by = org_data.get('outgoing_mail_handled_by', None)
        if mail_handled_by:
            org = Organization.objects.get(name=org_data['name'])
            mailer = Organization.objects.get(name=mail_handled_by)
            org.outgoing_mail_handled_by = mailer
            org.save()

    print "Building pdfs and users..."
    for user_data in data['users']:
        user, created = User.objects.get_or_create(
                username=slugify(user_data['name'])
        )
        if user_data.get('managed', False):
            random_mailing_address = "\n".join([
                # Prisoner number
                "#%s" % "".join(random.choice(string.digits) for a in range(8)),
                # Street
                "%s Cherry Tree Lane" % "".join(
                    random.choice(string.digits) for a in range(3)),
                # City, state, zip
                "City Name, %s  %s" % (
                    random.choice(US_STATES)[0],
                    "".join(random.choice(string.digits) for a in range(5)),
                )
            ])
        else:
            random_mailing_address = ""

        user.profile.display_name = user_data['name']
        user.profile.mailing_address = random_mailing_address
        user.profile.blogger = user_data.get('blogger', False)
        user.profile.managed = user_data.get('managed', False)
        user.profile.consent_form_received = user_data.get('consent_form_received', False)
        user.profile.blog_name = user_data.get('blog_name', None) or ''
        user.profile.save()

        for org_name in user_data['orgs']:
            orgs[org_name].members.add(user)

        for corresp in user_data['correspondence']:
            direction, content = corresp.items()[0]
            if direction == "received":
                # Build Scan
                pdf = build_pdf(content['parts'], user.profile) 
                path = tasks.move_scan_file(filename=pdf)
                scan = Scan.objects.create(
                        uploader=uploader,
                        org=orgs[org_name],
                        author=user,
                        pdf=os.path.relpath(path, settings.MEDIA_ROOT),
                        under_construction=True,
                        processing_complete=True,
                        created=content['date'])
                # execute synchronously
                tasks.split_scan(scan_id=scan.pk)
                # Build Documents
                page_count = 1 # ignore envelope
                for part in content['parts']:
                    page_count += part["pages"]
                    if part["type"] == "ignore":
                        continue
                    document = Document.objects.create(
                            scan=scan,
                            editor=uploader,
                            author=user,
                            type=part["type"],
                            date_written=content["date"],
                            created=content["date"],
                            title=part.get("title", None) or "",
                    )
                    for i, page_index in enumerate(
                            range(page_count - part["pages"], page_count)):
                        scanpage = scan.scanpage_set.get(order=page_index)
                        DocumentPage.objects.create(
                                document=document,
                                scan_page=scanpage,
                                order=i)
                    # execute synchronously
                    if part["type"] in ("profile", "post"):
                        document.status = "published"
                    else:
                        document.status = "unpublishable"
                    document.highlight_transform = '{"document_page_id": %s, "crop": [44.5, 58.66667175292969, 582.5, 288.6666717529297]}' % document.documentpage_set.all()[0].pk
                    document.save()
                    tasks.update_document_images(document.pk)
                    for comment in part.get('comments', []):
                        Comment.objects.create(
                                user=commenter,
                                comment="Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis.",
                                document=document,
                                created=comment['date'],
                        )
        # Finish received scans before parsing letters, to ensure comments/etc
        # are there yet.
        for corresp in user_data['correspondence']:
            direction, content = corresp.items()[0]
            if direction == "sent":
                letter = Letter(type=content['type'], 
                        auto_generated=True, 
                        sender=uploader,
                        created=content['date'],
                        sent=content['date'],
                        recipient=user,
                        org=Organization.objects.get(name=user_data['orgs'][0]))
                if content['type'] == "comments":
                    letter.save()
                    comments = Comment.objects.unmailed().filter(
                            document__author=user,
                            created__lt=content['date']
                    )
                    for comment in comments:
                        letter.comments.add(comment)
                elif content['type'] == "letter":
                    letter.body = content['body']
                letter.save()
Ejemplo n.º 11
0
    def handle(self, *args, **kwargs):
        base_url = "https://www.mailboxforwarding.com/"
        
        if (not hasattr(settings, "MAILBOX_FORWARDING") or 
                not "username" in settings.MAILBOX_FORWARDING or
                not "password" in settings.MAILBOX_FORWARDING):
            print "Requires MAILBOX_FORWARDING settings, e.g.:"
            print 'MAILBOX_FORWARDING = {'
            print '  "username": "******",'
            print '  "password": "******",'
            print '}'
            print "exit 1"
            sys.exit(1)

        sess = requests.Session()
        res = sess.post(base_url + "manage/login.php", {
            "action": "login",
            "email": settings.MAILBOX_FORWARDING["username"],
            "password": settings.MAILBOX_FORWARDING["password"],
            "loginsubmit.x": "0",
            "loginsubmit.y": "0"
        })
        # This is a slightly dirty hack -- we're matching a javascript data
        # structure with a regex, converting the quotes to doubles so it resembles
        # JSON, and then loading it as JSON.  This may prove brittle.
        match = re.search(r"Ext\.grid\.dummyData = (\[.*\]\]);", res.text, re.DOTALL)
        if not match:
            raise Exception("Can't find data. Are login creds correct?")
        text = match.group(1)
        text = text.replace('"', '\\"')
        text = text.replace("'", '"')
        data = json.loads(text)

        scans = {}
        packages = {}
        for checkbox, date, envelope, type_status, dl in data:
            details = {}
            match = re.search("Type: <b>([^<]+)</b>.*Status: <b>([^<]+)</b>", type_status)
            if not match:
                raise Exception("Can't match type/status")
            details['kind'] = match.group(1)
            details['status'] = match.group(2)

            if details['kind'] == "Letter" and details['status'] != "Scanned":
                continue

            match = re.search("pdfview.php\?id=(\d+)", dl)
            if match:
                id_ = match.group(1)
            else:
                # TODO: Handle packages correctly
                continue
                #raise Exception("Can't find ID")

            match = re.search("src=\"([^\"]+)\"", envelope)
            if not match:
                raise Exception("Can't match envelope image")
            details['envelope'] = match.group(1)


            if details['status'] == "Scanned":
                scans[id_] = details
            elif details['kind'] != "Letter":
                packages[id_] = details

        uploader = User.objects.get(username="******")
        org = Organization.objects.get(pk=1) #TODO: generalize this? 

        new_count = 0
        for id_, details in scans.iteritems():
            source_id = "mailboxforwarding.com-{}".format(id_)
            if Scan.objects.filter(source_id=source_id).exists():
                continue
            new_count += 1

            print "Downloading pdf", source_id
            res = sess.get("{}manage/pdfview.php?id={}".format(base_url, id_))
            in_pdf_fh = StringIO()
            in_pdf_fh.write(res.content)
            in_pdf_fh.seek(0)
            reader = PdfFileReader(in_pdf_fh)

            print "Downloading envelope", details['envelope']
            res = sess.get(details['envelope'])
            in_envelope_fh = StringIO()
            in_envelope_fh.write(res.content)
            in_envelope_fh.seek(0)
            img = Image.open(in_envelope_fh)
            out_envelope_fh = StringIO()
            img.save(out_envelope_fh, "pdf")
            envelope_reader = PdfFileReader(out_envelope_fh)

            writer = PdfFileWriter()
            writer.addPage(envelope_reader.getPage(0))
            for page in range(reader.getNumPages()):
                writer.addPage(reader.getPage(page))

            with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as fh:
                writer.write(fh)
                dest_pdf_name = fh.name

            in_envelope_fh.close()
            out_envelope_fh.close()
            in_pdf_fh.close()

            path = tasks.move_scan_file(filename=dest_pdf_name)
            scan = Scan.objects.create(
                uploader=uploader,
                pdf=os.path.relpath(path, settings.MEDIA_ROOT),
                under_construction=True,
                org=org,
                source_id=source_id
            )
            tasks.split_scan(scan=scan)

        if packages:
            print "Manual action needed on the following at " \
                  "https://www.mailboxforwarding.com/:"
            for id_, details in packages.iteritems():
                new_count += 1
                print details
        print "Examined {} letters, {} new.".format(len(data), new_count)