Exemple #1
0
def prelim_doc_form(request, template_name='document_submit.html'):

    form = PrelimDocumentForm(request.POST or None, request.FILES or None)

    if form.is_valid():
        form.save(commit=False)
        uploaded_file = request.FILES['file']

        # make the doccloud model
        cloud_doc = Document(
            file=uploaded_file,
            title=uploaded_file.name,
            user=request.user,
            access_level=DOCUMENTCLOUD_DEFAULT_ACCESS_LEVEL
        )
        # upload
        cloud_doc.connect_dc_doc()
        cloud_doc.save()

        pol_buy = PoliticalBuy(
            documentcloud_doc=cloud_doc
        )
        pol_buy.save(request.user)

        pol_buy.broadcasters = form.cleaned_data['broadcasters']
        pol_buy.save(request.user)

        return redirect('politicalbuy_edit', uuid_key=pol_buy.uuid_key)

    return render(request, template_name, {'form': form})
def make_ad_buy_from_pdf_file(pdf_file):
    
    pdf_url = pdf_file.raw_url
    auser = User.objects.all()[0]
    tempfile_name =  urllib2.unquote(urlparse(pdf_url).path)
    tempfile_name = tempfile_name.lstrip('/')
    tempfile_name_fixed = tempfile_name.replace("/", "%%")
    print "temp name is %s" % (tempfile_name_fixed)
    tempfile_full = SCRAPER_LOCAL_DOC_DIR + "/" + tempfile_name_fixed
    page = read_url(pdf_url)
    print "read the pdf"
    tempfile = open(tempfile_full, "wb")
    tempfile.write(page)
    tempfile.close()
    print "wrote the pdf"
    
    file = open(tempfile_full)
    djangofile = File(file)

    print "creating doc"
    d = Document(title=tempfile_name, description="From the FCC's political files", user=auser, access_level='public')

    d.file.save('new', djangofile)
    print "saved via local"
    d.connect_dc_doc()
    d.save()

    print "save 2"

    pol_buy = PoliticalBuy(documentcloud_doc=d)
    pol_buy.is_FCC_doc= True
    pol_buy.related_FCC_file = pdf_file
    pol_buy.save(auser)
    
    if pdf_file.folder.broadcaster:
        pol_buy.broadcasters.add(pdf_file.folder.broadcaster)
        pol_buy.save(auser)
        
    # 
    # Record that this file has been uploaded. 
    pdf_file.in_document_cloud = True
    pdf_file.save()
    return True
Exemple #3
0
def prelim_doc_form(request, template_name='document_submit.html'):

    form = PrelimDocumentForm(request.POST or None, request.FILES or None)

    if form.is_valid():
        form.save(commit=False)
        uploaded_file = request.FILES['file']

        # make the doccloud model
        cloud_doc = Document(
            file=uploaded_file,
            title=uploaded_file.name,
            user=request.user,
            access_level=DOCUMENTCLOUD_DEFAULT_ACCESS_LEVEL
        )
        # upload
        cloud_doc.connect_dc_doc()
        cloud_doc.save()

        pol_buy = PoliticalBuy(
            documentcloud_doc=cloud_doc
        )
        pol_buy.save(request.user)

        pol_buy.broadcasters = form.cleaned_data['broadcasters']
        pol_buy.save(request.user)

        return redirect('politicalbuy_edit', uuid_key=pol_buy.uuid_key)

    return render(request, template_name, {'form': form})
Exemple #4
0
def make_ad_buy_from_pdf_file(pdf_file):

    pdf_url = pdf_file.raw_url
    auser = User.objects.all()[0]
    tempfile_name = urllib2.unquote(urlparse(pdf_url).path)
    tempfile_name = tempfile_name.lstrip('/')
    tempfile_name_fixed = tempfile_name.replace("/", "%%")
    print "temp name is %s" % (tempfile_name_fixed)
    tempfile_full = SCRAPER_LOCAL_DOC_DIR + "/" + tempfile_name_fixed
    page = read_url(pdf_url)
    print "read the pdf"
    tempfile = open(tempfile_full, "wb")
    tempfile.write(page)
    tempfile.close()
    print "wrote the pdf"

    file = open(tempfile_full)
    djangofile = File(file)

    print "creating doc"
    d = Document(title=tempfile_name,
                 description="From the FCC's political files",
                 user=auser,
                 access_level='public')

    d.file.save('new', djangofile)
    print "saved via local"
    d.connect_dc_doc()
    d.save()

    print "save 2"

    pol_buy = PoliticalBuy(documentcloud_doc=d)
    pol_buy.is_FCC_doc = True
    pol_buy.related_FCC_file = pdf_file
    pol_buy.save(auser)

    if pdf_file.folder.broadcaster:
        pol_buy.broadcasters.add(pdf_file.folder.broadcaster)
        pol_buy.save(auser)

    #
    # Record that this file has been uploaded.
    pdf_file.in_document_cloud = True
    pdf_file.save()
    return True
Exemple #5
0
    def handle(self, *args, **options):
        dry_run = options.get('dry_run')
        verbosity = options.get('verbosity', 1)

        if dry_run:
            self.stdout.write(
                'Dry run. No new document records will be created.\n')

        client = get_client()
        self.stdout.write('Connecting to DocumentCloud...\n')
        project = client.projects.get(
            id=DOCUMENTCLOUD_PROJECT_ID) if DOCUMENTCLOUD_PROJECT_ID else None

        if project:
            self.stdout.write('Pulling document list for "{0}"...\n\n'.format(
                project.title))
            document_id_list = project.document_ids
            new_docs_list = []
            for doc_id in document_id_list:
                if verbosity > 1:
                    self.stdout.write('Checking "{0}"\n'.format(doc_id))
                try:
                    doc_obj = DocumentCloudProperties.objects.get(dc_id=doc_id)
                    if verbosity > 1 and doc_obj:
                        self.stdout.write(
                            'DocumentCloudProperties record for "{0}" already exists\n'
                            .format(doc_id))
                except DocumentCloudProperties.DoesNotExist:
                    dc_obj = client.documents.get(id=doc_id)
                    new_doc_props = DocumentCloudProperties(
                        dc_id=dc_obj.id, dc_url=dc_obj.canonical_url)
                    if verbosity > 1:
                        self.stdout.write('Creating record for {0}\n'.format(
                            dc_obj.id))
                        if verbosity > 2:
                            self.stdout.write(
                                'with:\n\tTitle: {title}\n\tDescription {description}\n\tAccess: {access}\n'
                                .format(title=dc_obj.title,
                                        description=dc_obj.description,
                                        access=dc_obj.access))
                    new_doc = Document(title=dc_obj.title,
                                       description=dc_obj.description,
                                       access_level=dc_obj.access)
                    filename = os.path.basename('{0}.pdf'.format(dc_obj.id))
                    if verbosity > 1:
                        self.stdout.write(
                            'Saving file named {0}\n'.format(filename))
                    if not dry_run:
                        fp = TemporaryFile()
                        djfp = File(fp)
                        djfp.write(dc_obj.pdf)
                        djfp.seek(0)
                    try:
                        if not dry_run:
                            new_doc.file.save(filename, djfp)
                            if not new_doc.file.closed:
                                new_doc.file.close()
                    except AttributeError as e:
                        if verbosity > 1:
                            self.stderr.write(
                                'Error saving doc:\n\t"{error_message}"\n'.
                                format(error_message=repr(e)))
                            if new_doc.file.url and new_doc.file.url != '':
                                self.stdout.write(
                                    "New file at:\n\t{0}\n".format(
                                        new_doc.file.url))
                        pass
                    if not dry_run:
                        djfp.close()
                        new_doc_props.save()
                        new_doc.dc_properties = new_doc_props
                        new_doc.save()
                    new_docs_list.append(doc_id)
                self.stdout.write('\n')
            self.stdout.write(
                'Added {num_docs} new Document records (out of {num_dc_docs})\n'
                .format(num_docs=len(new_docs_list),
                        num_dc_docs=len(document_id_list)))
        else:
            raise CommandError(
                'No DOCUMENTCLOUD_PROJECT_ID set in settings. Cannot proceed\n'
            )
    def handle(self, *args, **options):
        dry_run = options.get("dry_run")
        verbosity = options.get("verbosity", 1)

        if dry_run:
            self.stdout.write("Dry run. No new document records will be created.\n")

        client = get_client()
        self.stdout.write("Connecting to DocumentCloud...\n")
        project = client.projects.get(id=DOCUMENTCLOUD_PROJECT_ID) if DOCUMENTCLOUD_PROJECT_ID else None

        if project:
            self.stdout.write('Pulling document list for "{0}"...\n\n'.format(project.title))
            document_id_list = project.document_ids
            new_docs_list = []
            for doc_id in document_id_list:
                if verbosity > 1:
                    self.stdout.write('Checking "{0}"\n'.format(doc_id))
                try:
                    doc_obj = DocumentCloudProperties.objects.get(dc_id=doc_id)
                    if verbosity > 1 and doc_obj:
                        self.stdout.write('DocumentCloudProperties record for "{0}" already exists\n'.format(doc_id))
                except DocumentCloudProperties.DoesNotExist:
                    dc_obj = client.documents.get(id=doc_id)
                    new_doc_props = DocumentCloudProperties(dc_id=dc_obj.id, dc_url=dc_obj.canonical_url)
                    if verbosity > 1:
                        self.stdout.write("Creating record for {0}\n".format(dc_obj.id))
                        if verbosity > 2:
                            self.stdout.write(
                                "with:\n\tTitle: {title}\n\tDescription {description}\n\tAccess: {access}\n".format(
                                    title=dc_obj.title, description=dc_obj.description, access=dc_obj.access
                                )
                            )
                    new_doc = Document(title=dc_obj.title, description=dc_obj.description, access_level=dc_obj.access)
                    filename = os.path.basename("{0}.pdf".format(dc_obj.id))
                    if verbosity > 1:
                        self.stdout.write("Saving file named {0}\n".format(filename))
                    if not dry_run:
                        fp = TemporaryFile()
                        djfp = File(fp)
                        djfp.write(dc_obj.pdf)
                        djfp.seek(0)
                    try:
                        if not dry_run:
                            new_doc.file.save(filename, djfp)
                            if not new_doc.file.closed:
                                new_doc.file.close()
                    except AttributeError as e:
                        if verbosity > 1:
                            self.stderr.write('Error saving doc:\n\t"{error_message}"\n'.format(error_message=repr(e)))
                            if new_doc.file.url and new_doc.file.url != "":
                                self.stdout.write("New file at:\n\t{0}\n".format(new_doc.file.url))
                        pass
                    if not dry_run:
                        djfp.close()
                        new_doc_props.save()
                        new_doc.dc_properties = new_doc_props
                        new_doc.save()
                    new_docs_list.append(doc_id)
                self.stdout.write("\n")
            self.stdout.write(
                "Added {num_docs} new Document records (out of {num_dc_docs})\n".format(
                    num_docs=len(new_docs_list), num_dc_docs=len(document_id_list)
                )
            )
        else:
            raise CommandError("No DOCUMENTCLOUD_PROJECT_ID set in settings. Cannot proceed\n")