コード例 #1
0
    def handle_noargs(self, **options):
        orphan_docs = Document.objects.filter(politicalbuy__isnull=True)
        self.stdout.write('Found {0} orphan docs (not attached to PoliticalBuy records)\n'.format(orphan_docs.count()))

        try:
            user = User.objects.get(username='******')
        except User.DoesNotExist:
            raise CommandError("Couldn't find user 'auto', which is required to populate moderation fields. Exiting.")

        for orphan_obj in orphan_docs:
            doc_meta = orphan_obj.dc_data
            callsign = doc_meta.get('callsign')
            if callsign is None:
                self.stderr.write('No callsign on "{0}". Skipping...\n'.format(orphan_obj.title))
                continue
            try:
                broadcaster = Broadcaster.objects.get(callsign__startswith=callsign)
            except Broadcaster.DoesNotExist:
                self.stderr.write("Can't find a Broadcaster with a callsign that matches {0}. Skipping...\n".format(broadcaster.callsign))
                continue
            except Broadcaster.MultipleObjectsReturned:
                self.stderr.write("document's callsign, {0}, matches multiple broadcasters. Skipping...\n".format(broadcaster.callsign))
                continue
            else:
                try:
                    pb_obj = PoliticalBuy(documentcloud_doc=orphan_obj)
                    pb_obj.broadcasters.add(broadcaster)
                    try:
                        pb_obj.full_clean()
                    except ValidationError, e:
                        self.stderr.write(e)
                    pb_obj.save(user)
                except Exception, e:
                    self.stderr.write(repr(e))
                    raise e
コード例 #2
0
ファイル: put_pdfs.py プロジェクト: dcloud/fcc_political_ads
def make_ad_buy_from_pdf_file(pdf_file):

    pdf_url = pdf_file.raw_url
    auser = User.objects.all()[0]
    tempfile_name = urllib2.unquote(urlparse(pdf_url).path)
    tempfile_name = tempfile_name.lstrip('/')
    tempfile_name_fixed = tempfile_name.replace("/", "%%")
    print "temp name is %s" % (tempfile_name_fixed)
    tempfile_full = SCRAPER_LOCAL_DOC_DIR + "/" + tempfile_name_fixed
    page = read_url(pdf_url)
    print "read the pdf"
    tempfile = open(tempfile_full, "wb")
    tempfile.write(page)
    tempfile.close()
    print "wrote the pdf"

    file = open(tempfile_full)
    djangofile = File(file)

    print "creating doc"
    d = Document(title=tempfile_name,
                 description="From the FCC's political files",
                 user=auser,
                 access_level='public')

    d.file.save('new', djangofile)
    print "saved via local"
    d.connect_dc_doc()
    d.save()

    print "save 2"

    pol_buy = PoliticalBuy(documentcloud_doc=d)
    pol_buy.is_FCC_doc = True
    pol_buy.related_FCC_file = pdf_file
    pol_buy.save(auser)

    if pdf_file.folder.broadcaster:
        pol_buy.broadcasters.add(pdf_file.folder.broadcaster)
        pol_buy.save(auser)

    #
    # Record that this file has been uploaded.
    pdf_file.in_document_cloud = True
    pdf_file.save()
    return True
コード例 #3
0
ファイル: put_pdfs.py プロジェクト: dwillis/fcc_political_ads
def make_ad_buy_from_pdf_file(pdf_file):
    
    pdf_url = pdf_file.raw_url
    auser = User.objects.all()[0]
    tempfile_name =  urllib2.unquote(urlparse(pdf_url).path)
    tempfile_name = tempfile_name.lstrip('/')
    tempfile_name_fixed = tempfile_name.replace("/", "%%")
    print "temp name is %s" % (tempfile_name_fixed)
    tempfile_full = SCRAPER_LOCAL_DOC_DIR + "/" + tempfile_name_fixed
    page = read_url(pdf_url)
    print "read the pdf"
    tempfile = open(tempfile_full, "wb")
    tempfile.write(page)
    tempfile.close()
    print "wrote the pdf"
    
    file = open(tempfile_full)
    djangofile = File(file)

    print "creating doc"
    d = Document(title=tempfile_name, description="From the FCC's political files", user=auser, access_level='public')

    d.file.save('new', djangofile)
    print "saved via local"
    d.connect_dc_doc()
    d.save()

    print "save 2"

    pol_buy = PoliticalBuy(documentcloud_doc=d)
    pol_buy.is_FCC_doc= True
    pol_buy.related_FCC_file = pdf_file
    pol_buy.save(auser)
    
    if pdf_file.folder.broadcaster:
        pol_buy.broadcasters.add(pdf_file.folder.broadcaster)
        pol_buy.save(auser)
        
    # 
    # Record that this file has been uploaded. 
    pdf_file.in_document_cloud = True
    pdf_file.save()
    return True
コード例 #4
0
    def handle_noargs(self, **options):
        orphan_docs = Document.objects.filter(politicalbuy__isnull=True)
        self.stdout.write(
            'Found {0} orphan docs (not attached to PoliticalBuy records)\n'.
            format(orphan_docs.count()))

        try:
            user = User.objects.get(username='******')
        except User.DoesNotExist:
            raise CommandError(
                "Couldn't find user 'auto', which is required to populate moderation fields. Exiting."
            )

        for orphan_obj in orphan_docs:
            doc_meta = orphan_obj.dc_data
            callsign = doc_meta.get('callsign')
            if callsign is None:
                self.stderr.write('No callsign on "{0}". Skipping...\n'.format(
                    orphan_obj.title))
                continue
            try:
                broadcaster = Broadcaster.objects.get(
                    callsign__startswith=callsign)
            except Broadcaster.DoesNotExist:
                self.stderr.write(
                    "Can't find a Broadcaster with a callsign that matches {0}. Skipping...\n"
                    .format(broadcaster.callsign))
                continue
            except Broadcaster.MultipleObjectsReturned:
                self.stderr.write(
                    "document's callsign, {0}, matches multiple broadcasters. Skipping...\n"
                    .format(broadcaster.callsign))
                continue
            else:
                try:
                    pb_obj = PoliticalBuy(documentcloud_doc=orphan_obj)
                    pb_obj.broadcasters.add(broadcaster)
                    try:
                        pb_obj.full_clean()
                    except ValidationError, e:
                        self.stderr.write(e)
                    pb_obj.save(user)
                except Exception, e:
                    self.stderr.write(repr(e))
                    raise e
コード例 #5
0
def make_ad_buy_from_pdf_file(pdf_file_pk):
    pdf_file = None
    try:
        pdf_file = PDF_File.objects.get(pk=pdf_file_pk)
    except PDF_File.DoesNotExist:
        return None

    auser = User.objects.all()[0]
    print "processing %s" % (pdf_file.__dict__)
    pol_buy = PoliticalBuy()
    pol_buy.is_FCC_doc = True
    pol_buy.related_FCC_file = pdf_file
    # 'Yes' if fruit == 'Apple' else 'No'
    pol_buy.candidate_type = pdf_file.candidate_type(
    )[:31] if pdf_file.candidate_type() else None
    pol_buy.fcc_folder_name = pdf_file.raw_name_guess[:
                                                      255] if pdf_file.raw_name_guess else None
    pol_buy.nielsen_dma = pdf_file.nielsen_dma
    pol_buy.dma_id = pdf_file.dma_id
    pol_buy.community_state = pdf_file.community_state[:
                                                       7] if pdf_file.community_state else None
    pol_buy.upload_time = pdf_file.upload_time
    pol_buy.contract_start_date = pdf_file.upload_time
    pol_buy.contract_end_date = pdf_file.upload_time
    pol_buy.advertiser_display_name = str(
        pdf_file.raw_name_guess or "") + " - " + pdf_file.file_name()
    pol_buy.broadcaster_callsign = pdf_file.callsign[:
                                                     7] if pdf_file.callsign else None
    pol_buy.in_document_cloud = pdf_file.in_document_cloud

    pol_buy.save(auser)

    if pdf_file.facility_id:
        try:
            thisbroadcaster = Broadcaster.objects.get(
                facility_id=pdf_file.facility_id)
            pol_buy.broadcasters.add(thisbroadcaster)
            pol_buy.is_public = True
            pol_buy.save(auser)
        except Broadcaster.DoesNotExist:
            pass
        except Broadcaster.MultipleObjectsReturned:
            pass

        return True
コード例 #6
0
def make_ad_buy_from_pdf_file(pdf_file):
    
    try:
        PoliticalBuy.objects.get(related_FCC_file__pk=pdf_file.pk)
        print "Found buy"
        return False
        
    except PoliticalBuy.DoesNotExist:

        auser = User.objects.all()[0]

        pol_buy = PoliticalBuy()
        pol_buy.is_FCC_doc= True
        pol_buy.related_FCC_file = pdf_file
        
        pol_buy.candidate_type = pdf_file.candidate_type()
        pol_buy.fcc_folder_name = pdf_file.raw_name_guess
        pol_buy.nielsen_dma = pdf_file.nielsen_dma
        pol_buy.dma_id = pdf_file.dma_id
        pol_buy.community_state =pdf_file.community_state
        pol_buy.upload_time = pdf_file.upload_time
        pol_buy.contract_start_date = pdf_file.upload_time
        pol_buy.contract_end_date = pdf_file.upload_time
        pol_buy.advertiser_display_name = pdf_file.raw_name_guess + "-" + pdf_file.file_name()
        pol_buy.broadcaster_callsign = pdf_file.folder.broadcaster.callsign
        pol_buy.in_document_cloud = pdf_file.in_document_cloud
        
        pol_buy.save(auser)
    
        if pdf_file.folder.broadcaster:
            pol_buy.broadcasters.add(pdf_file.folder.broadcaster)
            pol_buy.is_public=True
            pol_buy.save(auser)
        
        return True
コード例 #7
0
def make_ad_buy_from_pdf_file(pdf_file_pk):
    pdf_file = None
    try:
        pdf_file = PDF_File.objects.get(pk=pdf_file_pk)
    except PDF_File.DoesNotExist:
        return None

    auser = User.objects.all()[0]
    print "processing %s" % (pdf_file.__dict__)
    pol_buy = PoliticalBuy()
    pol_buy.is_FCC_doc= True
    pol_buy.related_FCC_file = pdf_file
    # 'Yes' if fruit == 'Apple' else 'No'
    pol_buy.candidate_type = pdf_file.candidate_type()[:31] if pdf_file.candidate_type() else None
    pol_buy.fcc_folder_name = pdf_file.raw_name_guess[:255] if pdf_file.raw_name_guess else None
    pol_buy.nielsen_dma = pdf_file.nielsen_dma
    pol_buy.dma_id = pdf_file.dma_id
    pol_buy.community_state = pdf_file.community_state[:7] if pdf_file.community_state else None
    pol_buy.upload_time = pdf_file.upload_time
    pol_buy.contract_start_date = pdf_file.upload_time
    pol_buy.contract_end_date = pdf_file.upload_time
    pol_buy.advertiser_display_name = str(pdf_file.raw_name_guess or "") + " - " + pdf_file.file_name()
    pol_buy.broadcaster_callsign = pdf_file.callsign[:7] if pdf_file.callsign else None
    pol_buy.in_document_cloud = pdf_file.in_document_cloud
    
    pol_buy.save(auser)

    if pdf_file.facility_id:
        try:
            thisbroadcaster = Broadcaster.objects.get(facility_id=pdf_file.facility_id)
            pol_buy.broadcasters.add(thisbroadcaster)
            pol_buy.is_public=True
            pol_buy.save(auser)
        except Broadcaster.DoesNotExist:
            pass
        except Broadcaster.MultipleObjectsReturned:
            pass
                
        return True