def handle_noargs(self, **options): orphan_docs = Document.objects.filter(politicalbuy__isnull=True) self.stdout.write('Found {0} orphan docs (not attached to PoliticalBuy records)\n'.format(orphan_docs.count())) try: user = User.objects.get(username='******') except User.DoesNotExist: raise CommandError("Couldn't find user 'auto', which is required to populate moderation fields. Exiting.") for orphan_obj in orphan_docs: doc_meta = orphan_obj.dc_data callsign = doc_meta.get('callsign') if callsign is None: self.stderr.write('No callsign on "{0}". Skipping...\n'.format(orphan_obj.title)) continue try: broadcaster = Broadcaster.objects.get(callsign__startswith=callsign) except Broadcaster.DoesNotExist: self.stderr.write("Can't find a Broadcaster with a callsign that matches {0}. Skipping...\n".format(broadcaster.callsign)) continue except Broadcaster.MultipleObjectsReturned: self.stderr.write("document's callsign, {0}, matches multiple broadcasters. Skipping...\n".format(broadcaster.callsign)) continue else: try: pb_obj = PoliticalBuy(documentcloud_doc=orphan_obj) pb_obj.broadcasters.add(broadcaster) try: pb_obj.full_clean() except ValidationError, e: self.stderr.write(e) pb_obj.save(user) except Exception, e: self.stderr.write(repr(e)) raise e
def make_ad_buy_from_pdf_file(pdf_file): pdf_url = pdf_file.raw_url auser = User.objects.all()[0] tempfile_name = urllib2.unquote(urlparse(pdf_url).path) tempfile_name = tempfile_name.lstrip('/') tempfile_name_fixed = tempfile_name.replace("/", "%%") print "temp name is %s" % (tempfile_name_fixed) tempfile_full = SCRAPER_LOCAL_DOC_DIR + "/" + tempfile_name_fixed page = read_url(pdf_url) print "read the pdf" tempfile = open(tempfile_full, "wb") tempfile.write(page) tempfile.close() print "wrote the pdf" file = open(tempfile_full) djangofile = File(file) print "creating doc" d = Document(title=tempfile_name, description="From the FCC's political files", user=auser, access_level='public') d.file.save('new', djangofile) print "saved via local" d.connect_dc_doc() d.save() print "save 2" pol_buy = PoliticalBuy(documentcloud_doc=d) pol_buy.is_FCC_doc = True pol_buy.related_FCC_file = pdf_file pol_buy.save(auser) if pdf_file.folder.broadcaster: pol_buy.broadcasters.add(pdf_file.folder.broadcaster) pol_buy.save(auser) # # Record that this file has been uploaded. pdf_file.in_document_cloud = True pdf_file.save() return True
def make_ad_buy_from_pdf_file(pdf_file): pdf_url = pdf_file.raw_url auser = User.objects.all()[0] tempfile_name = urllib2.unquote(urlparse(pdf_url).path) tempfile_name = tempfile_name.lstrip('/') tempfile_name_fixed = tempfile_name.replace("/", "%%") print "temp name is %s" % (tempfile_name_fixed) tempfile_full = SCRAPER_LOCAL_DOC_DIR + "/" + tempfile_name_fixed page = read_url(pdf_url) print "read the pdf" tempfile = open(tempfile_full, "wb") tempfile.write(page) tempfile.close() print "wrote the pdf" file = open(tempfile_full) djangofile = File(file) print "creating doc" d = Document(title=tempfile_name, description="From the FCC's political files", user=auser, access_level='public') d.file.save('new', djangofile) print "saved via local" d.connect_dc_doc() d.save() print "save 2" pol_buy = PoliticalBuy(documentcloud_doc=d) pol_buy.is_FCC_doc= True pol_buy.related_FCC_file = pdf_file pol_buy.save(auser) if pdf_file.folder.broadcaster: pol_buy.broadcasters.add(pdf_file.folder.broadcaster) pol_buy.save(auser) # # Record that this file has been uploaded. pdf_file.in_document_cloud = True pdf_file.save() return True
def handle_noargs(self, **options): orphan_docs = Document.objects.filter(politicalbuy__isnull=True) self.stdout.write( 'Found {0} orphan docs (not attached to PoliticalBuy records)\n'. format(orphan_docs.count())) try: user = User.objects.get(username='******') except User.DoesNotExist: raise CommandError( "Couldn't find user 'auto', which is required to populate moderation fields. Exiting." ) for orphan_obj in orphan_docs: doc_meta = orphan_obj.dc_data callsign = doc_meta.get('callsign') if callsign is None: self.stderr.write('No callsign on "{0}". Skipping...\n'.format( orphan_obj.title)) continue try: broadcaster = Broadcaster.objects.get( callsign__startswith=callsign) except Broadcaster.DoesNotExist: self.stderr.write( "Can't find a Broadcaster with a callsign that matches {0}. Skipping...\n" .format(broadcaster.callsign)) continue except Broadcaster.MultipleObjectsReturned: self.stderr.write( "document's callsign, {0}, matches multiple broadcasters. Skipping...\n" .format(broadcaster.callsign)) continue else: try: pb_obj = PoliticalBuy(documentcloud_doc=orphan_obj) pb_obj.broadcasters.add(broadcaster) try: pb_obj.full_clean() except ValidationError, e: self.stderr.write(e) pb_obj.save(user) except Exception, e: self.stderr.write(repr(e)) raise e
def make_ad_buy_from_pdf_file(pdf_file_pk): pdf_file = None try: pdf_file = PDF_File.objects.get(pk=pdf_file_pk) except PDF_File.DoesNotExist: return None auser = User.objects.all()[0] print "processing %s" % (pdf_file.__dict__) pol_buy = PoliticalBuy() pol_buy.is_FCC_doc = True pol_buy.related_FCC_file = pdf_file # 'Yes' if fruit == 'Apple' else 'No' pol_buy.candidate_type = pdf_file.candidate_type( )[:31] if pdf_file.candidate_type() else None pol_buy.fcc_folder_name = pdf_file.raw_name_guess[: 255] if pdf_file.raw_name_guess else None pol_buy.nielsen_dma = pdf_file.nielsen_dma pol_buy.dma_id = pdf_file.dma_id pol_buy.community_state = pdf_file.community_state[: 7] if pdf_file.community_state else None pol_buy.upload_time = pdf_file.upload_time pol_buy.contract_start_date = pdf_file.upload_time pol_buy.contract_end_date = pdf_file.upload_time pol_buy.advertiser_display_name = str( pdf_file.raw_name_guess or "") + " - " + pdf_file.file_name() pol_buy.broadcaster_callsign = pdf_file.callsign[: 7] if pdf_file.callsign else None pol_buy.in_document_cloud = pdf_file.in_document_cloud pol_buy.save(auser) if pdf_file.facility_id: try: thisbroadcaster = Broadcaster.objects.get( facility_id=pdf_file.facility_id) pol_buy.broadcasters.add(thisbroadcaster) pol_buy.is_public = True pol_buy.save(auser) except Broadcaster.DoesNotExist: pass except Broadcaster.MultipleObjectsReturned: pass return True
def make_ad_buy_from_pdf_file(pdf_file): try: PoliticalBuy.objects.get(related_FCC_file__pk=pdf_file.pk) print "Found buy" return False except PoliticalBuy.DoesNotExist: auser = User.objects.all()[0] pol_buy = PoliticalBuy() pol_buy.is_FCC_doc= True pol_buy.related_FCC_file = pdf_file pol_buy.candidate_type = pdf_file.candidate_type() pol_buy.fcc_folder_name = pdf_file.raw_name_guess pol_buy.nielsen_dma = pdf_file.nielsen_dma pol_buy.dma_id = pdf_file.dma_id pol_buy.community_state =pdf_file.community_state pol_buy.upload_time = pdf_file.upload_time pol_buy.contract_start_date = pdf_file.upload_time pol_buy.contract_end_date = pdf_file.upload_time pol_buy.advertiser_display_name = pdf_file.raw_name_guess + "-" + pdf_file.file_name() pol_buy.broadcaster_callsign = pdf_file.folder.broadcaster.callsign pol_buy.in_document_cloud = pdf_file.in_document_cloud pol_buy.save(auser) if pdf_file.folder.broadcaster: pol_buy.broadcasters.add(pdf_file.folder.broadcaster) pol_buy.is_public=True pol_buy.save(auser) return True
def make_ad_buy_from_pdf_file(pdf_file_pk): pdf_file = None try: pdf_file = PDF_File.objects.get(pk=pdf_file_pk) except PDF_File.DoesNotExist: return None auser = User.objects.all()[0] print "processing %s" % (pdf_file.__dict__) pol_buy = PoliticalBuy() pol_buy.is_FCC_doc= True pol_buy.related_FCC_file = pdf_file # 'Yes' if fruit == 'Apple' else 'No' pol_buy.candidate_type = pdf_file.candidate_type()[:31] if pdf_file.candidate_type() else None pol_buy.fcc_folder_name = pdf_file.raw_name_guess[:255] if pdf_file.raw_name_guess else None pol_buy.nielsen_dma = pdf_file.nielsen_dma pol_buy.dma_id = pdf_file.dma_id pol_buy.community_state = pdf_file.community_state[:7] if pdf_file.community_state else None pol_buy.upload_time = pdf_file.upload_time pol_buy.contract_start_date = pdf_file.upload_time pol_buy.contract_end_date = pdf_file.upload_time pol_buy.advertiser_display_name = str(pdf_file.raw_name_guess or "") + " - " + pdf_file.file_name() pol_buy.broadcaster_callsign = pdf_file.callsign[:7] if pdf_file.callsign else None pol_buy.in_document_cloud = pdf_file.in_document_cloud pol_buy.save(auser) if pdf_file.facility_id: try: thisbroadcaster = Broadcaster.objects.get(facility_id=pdf_file.facility_id) pol_buy.broadcasters.add(thisbroadcaster) pol_buy.is_public=True pol_buy.save(auser) except Broadcaster.DoesNotExist: pass except Broadcaster.MultipleObjectsReturned: pass return True