def load_test_data(): data_file = os.path.join(settings.MEDIA_ROOT, "test", "test_data.yaml") uploader = User.objects.get(username='******') commenter = User.objects.create(username="******") with open(data_file) as fh: data = yaml.safe_load(fh) orgs = {} print "Setting site..." site = Site.objects.get_current() site.domain = data['site']['domain'] site.name = data['site']['name'] site.save() print "Adding admins..." for admin_data in data['admins']: user, created = User.objects.get_or_create( username=admin_data['username'], is_superuser=True, is_staff=True, ) user.set_password(admin_data['password']) user.save() print "Adding orgs..." for org_data in data['orgs']: org, created = Organization.objects.get_or_create( name=org_data['name'], personal_contact=org_data['personal_contact'], slug=slugify(org_data['name']), public=org_data['public'], mailing_address=org_data['mailing_address'], about=org_data.get('about', ''), footer=org_data.get('footer', ''), ) orgs[org_data['name']] = org for mod_data in org_data['moderators']: u, created = User.objects.get_or_create( username=mod_data['username']) u.set_password(mod_data['password']) u.save() org.moderators.add(u) Group.objects.get(name='moderators').user_set.add(u) for org_data in data['orgs']: mail_handled_by = org_data.get('outgoing_mail_handled_by', None) if mail_handled_by: org = Organization.objects.get(name=org_data['name']) mailer = Organization.objects.get(name=mail_handled_by) org.outgoing_mail_handled_by = mailer org.save() print "Building pdfs and users..." for user_data in data['users']: user, created = User.objects.get_or_create( username=slugify(user_data['name'])) if user_data.get('managed', False): random_mailing_address = "\n".join([ # Prisoner number "#%s" % "".join(random.choice(string.digits) for a in range(8)), # Street "%s Cherry Tree Lane" % "".join(random.choice(string.digits) for a in range(3)), # City, state, zip "City Name, %s %s" % ( random.choice(US_STATES)[0], "".join(random.choice(string.digits) for a in range(5)), ) ]) else: random_mailing_address = "" user.profile.display_name = user_data['name'] user.profile.mailing_address = random_mailing_address user.profile.blogger = user_data.get('blogger', False) user.profile.managed = user_data.get('managed', False) user.profile.consent_form_received = user_data.get( 'consent_form_received', False) user.profile.blog_name = user_data.get('blog_name', None) or '' user.profile.save() for org_name in user_data['orgs']: orgs[org_name].members.add(user) for corresp in user_data['correspondence']: direction, content = corresp.items()[0] if direction == "received": # Build Scan pdf = build_pdf(content['parts'], user.profile) path = tasks.move_scan_file(filename=pdf) scan = Scan.objects.create(uploader=uploader, org=orgs[org_name], author=user, pdf=os.path.relpath( path, settings.MEDIA_ROOT), under_construction=True, processing_complete=True, created=content['date']) # execute synchronously tasks.split_scan(scan_id=scan.pk) # Build Documents page_count = 1 # ignore envelope for part in content['parts']: page_count += part["pages"] if part["type"] == "ignore": continue document = Document.objects.create( scan=scan, editor=uploader, author=user, type=part["type"], date_written=content["date"], created=content["date"], title=part.get("title", None) or "", ) for i, page_index in enumerate( range(page_count - part["pages"], page_count)): scanpage = scan.scanpage_set.get(order=page_index) DocumentPage.objects.create(document=document, scan_page=scanpage, order=i) # execute synchronously if part["type"] in ("profile", "post"): document.status = "published" else: document.status = "unpublishable" document.highlight_transform = '{"document_page_id": %s, "crop": [44.5, 58.66667175292969, 582.5, 288.6666717529297]}' % document.documentpage_set.all( )[0].pk document.save() tasks.update_document_images(document.pk) for comment in part.get('comments', []): Comment.objects.create( user=commenter, comment= "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis.", document=document, created=comment['date'], ) # Finish received scans before parsing letters, to ensure comments/etc # are there yet. for corresp in user_data['correspondence']: direction, content = corresp.items()[0] if direction == "sent": letter = Letter( type=content['type'], auto_generated=True, sender=uploader, created=content['date'], sent=content['date'], recipient=user, org=Organization.objects.get(name=user_data['orgs'][0])) if content['type'] == "comments": letter.save() comments = Comment.objects.unmailed().filter( document__author=user, created__lt=content['date']) for comment in comments: letter.comments.add(comment) elif content['type'] == "letter": letter.body = content['body'] letter.save()
def handle(self, *args, **kwargs): base_url = "https://www.mailboxforwarding.com/" if (not hasattr(settings, "MAILBOX_FORWARDING") or not "username" in settings.MAILBOX_FORWARDING or not "password" in settings.MAILBOX_FORWARDING): print "Requires MAILBOX_FORWARDING settings, e.g.:" print 'MAILBOX_FORWARDING = {' print ' "username": "******",' print ' "password": "******",' print '}' print "exit 1" sys.exit(1) sess = requests.Session() res = sess.post(base_url + "manage/login.php", { "action": "login", "email": settings.MAILBOX_FORWARDING["username"], "password": settings.MAILBOX_FORWARDING["password"], "loginsubmit.x": "0", "loginsubmit.y": "0" }) # This is a dslightly dirty hack -- we're matching a javascript data # structure with a regex, converting the quotes to doubles so it resembles # JSON, and then loading it as JSON. This may prove brittle. match = re.search(r"Ext\.grid\.dummyData = (\[.*\]\]);", res.text, re.DOTALL) text = match.group(1) text = text.replace('"', '\\"') text = text.replace("'", '"') data = json.loads(text) scans = {} packages = {} for a,b,date,c,kind,status,dl in data: match = re.search("pdfview.php\?id=(\d+)", dl) if match: id_ = match.group(1) else: id_ = None obj = {"date": date, "kind": kind, "status": status} if status == "Scanned": scans[id_] = obj else: packages[id_] = obj uploader = User.objects.get(username="******") org = Organization.objects.get(pk=1) #TODO: generalize this? new_count = 0 for id_,details in scans.iteritems(): source_id = "mailboxforwarding.com-{}".format(id_) if Scan.objects.filter(source_id=source_id).exists(): continue new_count += 1 print "Downloading", source_id with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as fh: res = sess.get("{}manage/pdfview.php?id={}".format(base_url, id_)) fh.write(res.content) name = fh.name path = tasks.move_scan_file(filename=name) scan = Scan.objects.create( uploader=uploader, pdf=os.path.relpath(path, settings.MEDIA_ROOT), under_construction=True, org=org, source_id=source_id ) tasks.split_scan(scan=scan) if packages: print "Manual action needed on the following at " \ "https://www.mailboxforwarding.com/:" for id_,details in packages.iteritems(): new_count += 1 print details print "Examined {} letters, {} new.".format(len(data), new_count)
def load_test_data(): data_file = os.path.join(settings.MEDIA_ROOT, "test", "test_data.yaml") uploader = User.objects.get(username='******') commenter = User.objects.create(username="******") with open(data_file) as fh: data = yaml.safe_load(fh) orgs = {} print "Setting site..." site = Site.objects.get_current() site.domain = data['site']['domain'] site.name = data['site']['name'] site.save() print "Adding admins..." for admin_data in data['admins']: user, created = User.objects.get_or_create( username=admin_data['username'], is_superuser=True, is_staff=True, ) user.set_password(admin_data['password']) user.save() print "Adding orgs..." for org_data in data['orgs']: org, created = Organization.objects.get_or_create( name=org_data['name'], personal_contact=org_data['personal_contact'], slug=slugify(org_data['name']), public=org_data['public'], mailing_address=org_data['mailing_address'], about=org_data.get('about', ''), footer=org_data.get('footer', ''), ) orgs[org_data['name']] = org for mod_data in org_data['moderators']: u, created = User.objects.get_or_create( username=mod_data['username'] ) u.set_password(mod_data['password']) u.save() org.moderators.add(u) Group.objects.get(name='moderators').user_set.add(u) for org_data in data['orgs']: mail_handled_by = org_data.get('outgoing_mail_handled_by', None) if mail_handled_by: org = Organization.objects.get(name=org_data['name']) mailer = Organization.objects.get(name=mail_handled_by) org.outgoing_mail_handled_by = mailer org.save() print "Building pdfs and users..." for user_data in data['users']: user, created = User.objects.get_or_create( username=slugify(user_data['name']) ) if user_data.get('managed', False): random_mailing_address = "\n".join([ # Prisoner number "#%s" % "".join(random.choice(string.digits) for a in range(8)), # Street "%s Cherry Tree Lane" % "".join( random.choice(string.digits) for a in range(3)), # City, state, zip "City Name, %s %s" % ( random.choice(US_STATES)[0], "".join(random.choice(string.digits) for a in range(5)), ) ]) else: random_mailing_address = "" user.profile.display_name = user_data['name'] user.profile.mailing_address = random_mailing_address user.profile.blogger = user_data.get('blogger', False) user.profile.managed = user_data.get('managed', False) user.profile.consent_form_received = user_data.get('consent_form_received', False) user.profile.blog_name = user_data.get('blog_name', None) or '' user.profile.save() for org_name in user_data['orgs']: orgs[org_name].members.add(user) for corresp in user_data['correspondence']: direction, content = corresp.items()[0] if direction == "received": # Build Scan pdf = build_pdf(content['parts'], user.profile) path = tasks.move_scan_file(filename=pdf) scan = Scan.objects.create( uploader=uploader, org=orgs[org_name], author=user, pdf=os.path.relpath(path, settings.MEDIA_ROOT), under_construction=True, processing_complete=True, created=content['date']) # execute synchronously tasks.split_scan(scan_id=scan.pk) # Build Documents page_count = 1 # ignore envelope for part in content['parts']: page_count += part["pages"] if part["type"] == "ignore": continue document = Document.objects.create( scan=scan, editor=uploader, author=user, type=part["type"], date_written=content["date"], created=content["date"], title=part.get("title", None) or "", ) for i, page_index in enumerate( range(page_count - part["pages"], page_count)): scanpage = scan.scanpage_set.get(order=page_index) DocumentPage.objects.create( document=document, scan_page=scanpage, order=i) # execute synchronously if part["type"] in ("profile", "post"): document.status = "published" else: document.status = "unpublishable" document.highlight_transform = '{"document_page_id": %s, "crop": [44.5, 58.66667175292969, 582.5, 288.6666717529297]}' % document.documentpage_set.all()[0].pk document.save() tasks.update_document_images(document.pk) for comment in part.get('comments', []): Comment.objects.create( user=commenter, comment="Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec a diam lectus. Sed sit amet ipsum mauris. Maecenas congue ligula ac quam viverra nec consectetur ante hendrerit. Donec et mollis dolor. Praesent et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt congue enim, ut porta lorem lacinia consectetur. Donec ut libero sed arcu vehicula ultricies a non tortor. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ut gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id dolor. Pellentesque auctor nisi id magna consequat sagittis.", document=document, created=comment['date'], ) # Finish received scans before parsing letters, to ensure comments/etc # are there yet. for corresp in user_data['correspondence']: direction, content = corresp.items()[0] if direction == "sent": letter = Letter(type=content['type'], auto_generated=True, sender=uploader, created=content['date'], sent=content['date'], recipient=user, org=Organization.objects.get(name=user_data['orgs'][0])) if content['type'] == "comments": letter.save() comments = Comment.objects.unmailed().filter( document__author=user, created__lt=content['date'] ) for comment in comments: letter.comments.add(comment) elif content['type'] == "letter": letter.body = content['body'] letter.save()
def handle(self, *args, **kwargs): base_url = "https://www.mailboxforwarding.com/" if (not hasattr(settings, "MAILBOX_FORWARDING") or not "username" in settings.MAILBOX_FORWARDING or not "password" in settings.MAILBOX_FORWARDING): print "Requires MAILBOX_FORWARDING settings, e.g.:" print 'MAILBOX_FORWARDING = {' print ' "username": "******",' print ' "password": "******",' print '}' print "exit 1" sys.exit(1) sess = requests.Session() res = sess.post( base_url + "manage/login.php", { "action": "login", "email": settings.MAILBOX_FORWARDING["username"], "password": settings.MAILBOX_FORWARDING["password"], "loginsubmit.x": "0", "loginsubmit.y": "0" }) # This is a slightly dirty hack -- we're matching a javascript data # structure with a regex, converting the quotes to doubles so it resembles # JSON, and then loading it as JSON. This may prove brittle. match = re.search(r"Ext\.grid\.dummyData = (\[.*\]\]);", res.text, re.DOTALL) if not match: raise Exception("Can't find data. Are login creds correct?") text = match.group(1) text = text.replace('"', '\\"') text = text.replace("'", '"') data = json.loads(text) scans = {} packages = {} for checkbox, date, envelope, type_status, dl in data: details = {} match = re.search("Status: <b>([^<]+)</b>.*Type: <b>([^<]+)</b>", type_status) if not match: raise Exception("Can't match type/status") details['kind'] = match.group(2) details['status'] = match.group(1) if details['kind'] == "Letter" and details['status'] != "Scanned": continue match = re.search("pdfview.php\?id=(\d+)", dl) if match: id_ = match.group(1) else: # TODO: Handle packages correctly continue #raise Exception("Can't find ID") match = re.search("src=\"([^\"]+)\"", envelope) if not match: raise Exception("Can't match envelope image") details['envelope'] = match.group(1) if details['status'] == "Scanned": scans[id_] = details elif details['kind'] != "Letter": packages[id_] = details uploader = User.objects.get(username="******") org = Organization.objects.get(pk=1) #TODO: generalize this? new_count = 0 for id_, details in scans.iteritems(): source_id = "mailboxforwarding.com-{}".format(id_) if Scan.objects.filter(source_id=source_id).exists(): continue new_count += 1 print "Downloading pdf", source_id res = sess.get("{}manage/pdfview.php?id={}".format(base_url, id_)) in_pdf_fh = StringIO() in_pdf_fh.write(res.content) in_pdf_fh.seek(0) reader = PdfFileReader(in_pdf_fh) print "Downloading envelope", details['envelope'] res = sess.get(details['envelope']) in_envelope_fh = StringIO() in_envelope_fh.write(res.content) in_envelope_fh.seek(0) img = Image.open(in_envelope_fh) out_envelope_fh = StringIO() img.save(out_envelope_fh, "pdf") envelope_reader = PdfFileReader(out_envelope_fh) writer = PdfFileWriter() writer.addPage(envelope_reader.getPage(0)) for page in range(reader.getNumPages()): writer.addPage(reader.getPage(page)) with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as fh: writer.write(fh) dest_pdf_name = fh.name in_envelope_fh.close() out_envelope_fh.close() in_pdf_fh.close() path = tasks.move_scan_file(filename=dest_pdf_name) scan = Scan.objects.create(uploader=uploader, pdf=os.path.relpath( path, settings.MEDIA_ROOT), under_construction=True, org=org, source_id=source_id) tasks.split_scan(scan=scan) if packages: print "Manual action needed on the following at " \ "https://www.mailboxforwarding.com/:" for id_, details in packages.iteritems(): new_count += 1 print details print "Examined {} letters, {} new.".format(len(data), new_count)
def handle(self, *args, **kwargs): base_url = "https://www.mailboxforwarding.com/" if (not hasattr(settings, "MAILBOX_FORWARDING") or not "username" in settings.MAILBOX_FORWARDING or not "password" in settings.MAILBOX_FORWARDING): print "Requires MAILBOX_FORWARDING settings, e.g.:" print 'MAILBOX_FORWARDING = {' print ' "username": "******",' print ' "password": "******",' print '}' print "exit 1" sys.exit(1) sess = requests.Session() res = sess.post(base_url + "manage/login.php", { "action": "login", "email": settings.MAILBOX_FORWARDING["username"], "password": settings.MAILBOX_FORWARDING["password"], "loginsubmit.x": "0", "loginsubmit.y": "0" }) # This is a slightly dirty hack -- we're matching a javascript data # structure with a regex, converting the quotes to doubles so it resembles # JSON, and then loading it as JSON. This may prove brittle. match = re.search(r"Ext\.grid\.dummyData = (\[.*\]\]);", res.text, re.DOTALL) if not match: raise Exception("Can't find data. Are login creds correct?") text = match.group(1) text = text.replace('"', '\\"') text = text.replace("'", '"') data = json.loads(text) scans = {} packages = {} for checkbox, date, envelope, type_status, dl in data: details = {} match = re.search("Type: <b>([^<]+)</b>.*Status: <b>([^<]+)</b>", type_status) if not match: raise Exception("Can't match type/status") details['kind'] = match.group(1) details['status'] = match.group(2) if details['kind'] == "Letter" and details['status'] != "Scanned": continue match = re.search("pdfview.php\?id=(\d+)", dl) if match: id_ = match.group(1) else: # TODO: Handle packages correctly continue #raise Exception("Can't find ID") match = re.search("src=\"([^\"]+)\"", envelope) if not match: raise Exception("Can't match envelope image") details['envelope'] = match.group(1) if details['status'] == "Scanned": scans[id_] = details elif details['kind'] != "Letter": packages[id_] = details uploader = User.objects.get(username="******") org = Organization.objects.get(pk=1) #TODO: generalize this? new_count = 0 for id_, details in scans.iteritems(): source_id = "mailboxforwarding.com-{}".format(id_) if Scan.objects.filter(source_id=source_id).exists(): continue new_count += 1 print "Downloading pdf", source_id res = sess.get("{}manage/pdfview.php?id={}".format(base_url, id_)) in_pdf_fh = StringIO() in_pdf_fh.write(res.content) in_pdf_fh.seek(0) reader = PdfFileReader(in_pdf_fh) print "Downloading envelope", details['envelope'] res = sess.get(details['envelope']) in_envelope_fh = StringIO() in_envelope_fh.write(res.content) in_envelope_fh.seek(0) img = Image.open(in_envelope_fh) out_envelope_fh = StringIO() img.save(out_envelope_fh, "pdf") envelope_reader = PdfFileReader(out_envelope_fh) writer = PdfFileWriter() writer.addPage(envelope_reader.getPage(0)) for page in range(reader.getNumPages()): writer.addPage(reader.getPage(page)) with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as fh: writer.write(fh) dest_pdf_name = fh.name in_envelope_fh.close() out_envelope_fh.close() in_pdf_fh.close() path = tasks.move_scan_file(filename=dest_pdf_name) scan = Scan.objects.create( uploader=uploader, pdf=os.path.relpath(path, settings.MEDIA_ROOT), under_construction=True, org=org, source_id=source_id ) tasks.split_scan(scan=scan) if packages: print "Manual action needed on the following at " \ "https://www.mailboxforwarding.com/:" for id_, details in packages.iteritems(): new_count += 1 print details print "Examined {} letters, {} new.".format(len(data), new_count)