def prelim_doc_form(request, template_name='document_submit.html'): form = PrelimDocumentForm(request.POST or None, request.FILES or None) if form.is_valid(): form.save(commit=False) uploaded_file = request.FILES['file'] # make the doccloud model cloud_doc = Document( file=uploaded_file, title=uploaded_file.name, user=request.user, access_level=DOCUMENTCLOUD_DEFAULT_ACCESS_LEVEL ) # upload cloud_doc.connect_dc_doc() cloud_doc.save() pol_buy = PoliticalBuy( documentcloud_doc=cloud_doc ) pol_buy.save(request.user) pol_buy.broadcasters = form.cleaned_data['broadcasters'] pol_buy.save(request.user) return redirect('politicalbuy_edit', uuid_key=pol_buy.uuid_key) return render(request, template_name, {'form': form})
def make_ad_buy_from_pdf_file(pdf_file): pdf_url = pdf_file.raw_url auser = User.objects.all()[0] tempfile_name = urllib2.unquote(urlparse(pdf_url).path) tempfile_name = tempfile_name.lstrip('/') tempfile_name_fixed = tempfile_name.replace("/", "%%") print "temp name is %s" % (tempfile_name_fixed) tempfile_full = SCRAPER_LOCAL_DOC_DIR + "/" + tempfile_name_fixed page = read_url(pdf_url) print "read the pdf" tempfile = open(tempfile_full, "wb") tempfile.write(page) tempfile.close() print "wrote the pdf" file = open(tempfile_full) djangofile = File(file) print "creating doc" d = Document(title=tempfile_name, description="From the FCC's political files", user=auser, access_level='public') d.file.save('new', djangofile) print "saved via local" d.connect_dc_doc() d.save() print "save 2" pol_buy = PoliticalBuy(documentcloud_doc=d) pol_buy.is_FCC_doc= True pol_buy.related_FCC_file = pdf_file pol_buy.save(auser) if pdf_file.folder.broadcaster: pol_buy.broadcasters.add(pdf_file.folder.broadcaster) pol_buy.save(auser) # # Record that this file has been uploaded. pdf_file.in_document_cloud = True pdf_file.save() return True
def make_ad_buy_from_pdf_file(pdf_file): pdf_url = pdf_file.raw_url auser = User.objects.all()[0] tempfile_name = urllib2.unquote(urlparse(pdf_url).path) tempfile_name = tempfile_name.lstrip('/') tempfile_name_fixed = tempfile_name.replace("/", "%%") print "temp name is %s" % (tempfile_name_fixed) tempfile_full = SCRAPER_LOCAL_DOC_DIR + "/" + tempfile_name_fixed page = read_url(pdf_url) print "read the pdf" tempfile = open(tempfile_full, "wb") tempfile.write(page) tempfile.close() print "wrote the pdf" file = open(tempfile_full) djangofile = File(file) print "creating doc" d = Document(title=tempfile_name, description="From the FCC's political files", user=auser, access_level='public') d.file.save('new', djangofile) print "saved via local" d.connect_dc_doc() d.save() print "save 2" pol_buy = PoliticalBuy(documentcloud_doc=d) pol_buy.is_FCC_doc = True pol_buy.related_FCC_file = pdf_file pol_buy.save(auser) if pdf_file.folder.broadcaster: pol_buy.broadcasters.add(pdf_file.folder.broadcaster) pol_buy.save(auser) # # Record that this file has been uploaded. pdf_file.in_document_cloud = True pdf_file.save() return True
def handle(self, *args, **options): dry_run = options.get('dry_run') verbosity = options.get('verbosity', 1) if dry_run: self.stdout.write( 'Dry run. No new document records will be created.\n') client = get_client() self.stdout.write('Connecting to DocumentCloud...\n') project = client.projects.get( id=DOCUMENTCLOUD_PROJECT_ID) if DOCUMENTCLOUD_PROJECT_ID else None if project: self.stdout.write('Pulling document list for "{0}"...\n\n'.format( project.title)) document_id_list = project.document_ids new_docs_list = [] for doc_id in document_id_list: if verbosity > 1: self.stdout.write('Checking "{0}"\n'.format(doc_id)) try: doc_obj = DocumentCloudProperties.objects.get(dc_id=doc_id) if verbosity > 1 and doc_obj: self.stdout.write( 'DocumentCloudProperties record for "{0}" already exists\n' .format(doc_id)) except DocumentCloudProperties.DoesNotExist: dc_obj = client.documents.get(id=doc_id) new_doc_props = DocumentCloudProperties( dc_id=dc_obj.id, dc_url=dc_obj.canonical_url) if verbosity > 1: self.stdout.write('Creating record for {0}\n'.format( dc_obj.id)) if verbosity > 2: self.stdout.write( 'with:\n\tTitle: {title}\n\tDescription {description}\n\tAccess: {access}\n' .format(title=dc_obj.title, description=dc_obj.description, access=dc_obj.access)) new_doc = Document(title=dc_obj.title, description=dc_obj.description, access_level=dc_obj.access) filename = os.path.basename('{0}.pdf'.format(dc_obj.id)) if verbosity > 1: self.stdout.write( 'Saving file named {0}\n'.format(filename)) if not dry_run: fp = TemporaryFile() djfp = File(fp) djfp.write(dc_obj.pdf) djfp.seek(0) try: if not dry_run: new_doc.file.save(filename, djfp) if not new_doc.file.closed: new_doc.file.close() except AttributeError as e: if verbosity > 1: self.stderr.write( 'Error saving doc:\n\t"{error_message}"\n'. format(error_message=repr(e))) if new_doc.file.url and new_doc.file.url != '': self.stdout.write( "New file at:\n\t{0}\n".format( new_doc.file.url)) pass if not dry_run: djfp.close() new_doc_props.save() new_doc.dc_properties = new_doc_props new_doc.save() new_docs_list.append(doc_id) self.stdout.write('\n') self.stdout.write( 'Added {num_docs} new Document records (out of {num_dc_docs})\n' .format(num_docs=len(new_docs_list), num_dc_docs=len(document_id_list))) else: raise CommandError( 'No DOCUMENTCLOUD_PROJECT_ID set in settings. Cannot proceed\n' )
def handle(self, *args, **options): dry_run = options.get("dry_run") verbosity = options.get("verbosity", 1) if dry_run: self.stdout.write("Dry run. No new document records will be created.\n") client = get_client() self.stdout.write("Connecting to DocumentCloud...\n") project = client.projects.get(id=DOCUMENTCLOUD_PROJECT_ID) if DOCUMENTCLOUD_PROJECT_ID else None if project: self.stdout.write('Pulling document list for "{0}"...\n\n'.format(project.title)) document_id_list = project.document_ids new_docs_list = [] for doc_id in document_id_list: if verbosity > 1: self.stdout.write('Checking "{0}"\n'.format(doc_id)) try: doc_obj = DocumentCloudProperties.objects.get(dc_id=doc_id) if verbosity > 1 and doc_obj: self.stdout.write('DocumentCloudProperties record for "{0}" already exists\n'.format(doc_id)) except DocumentCloudProperties.DoesNotExist: dc_obj = client.documents.get(id=doc_id) new_doc_props = DocumentCloudProperties(dc_id=dc_obj.id, dc_url=dc_obj.canonical_url) if verbosity > 1: self.stdout.write("Creating record for {0}\n".format(dc_obj.id)) if verbosity > 2: self.stdout.write( "with:\n\tTitle: {title}\n\tDescription {description}\n\tAccess: {access}\n".format( title=dc_obj.title, description=dc_obj.description, access=dc_obj.access ) ) new_doc = Document(title=dc_obj.title, description=dc_obj.description, access_level=dc_obj.access) filename = os.path.basename("{0}.pdf".format(dc_obj.id)) if verbosity > 1: self.stdout.write("Saving file named {0}\n".format(filename)) if not dry_run: fp = TemporaryFile() djfp = File(fp) djfp.write(dc_obj.pdf) djfp.seek(0) try: if not dry_run: new_doc.file.save(filename, djfp) if not new_doc.file.closed: new_doc.file.close() except AttributeError as e: if verbosity > 1: self.stderr.write('Error saving doc:\n\t"{error_message}"\n'.format(error_message=repr(e))) if new_doc.file.url and new_doc.file.url != "": self.stdout.write("New file at:\n\t{0}\n".format(new_doc.file.url)) pass if not dry_run: djfp.close() new_doc_props.save() new_doc.dc_properties = new_doc_props new_doc.save() new_docs_list.append(doc_id) self.stdout.write("\n") self.stdout.write( "Added {num_docs} new Document records (out of {num_dc_docs})\n".format( num_docs=len(new_docs_list), num_dc_docs=len(document_id_list) ) ) else: raise CommandError("No DOCUMENTCLOUD_PROJECT_ID set in settings. Cannot proceed\n")