コード例 #1
0
    def handle(self, *args, **options):

        my_logger = fcc_logger()
        my_logger.info("starting backup run...")

        conn = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
        b = conn.get_bucket('politicaladsleuth-assets')
        k = Key(b)

        pdfs_to_backup = PDF_File.objects.filter(
            local_file_path__isnull=False, is_backed_up=False).values('id')

        num_to_process = len(pdfs_to_backup)

        print "Processing %s files" % num_to_process
        count = 0

        for this_pdf_id in pdfs_to_backup:
            this_pdf = PDF_File.objects.get(pk=this_pdf_id['id'])

            if this_pdf.is_backed_up:
                print "already backed up!"
                continue

            count += 1
            if (count % 100 == 0):
                print "Processed %s" % count
            local_file_path = this_pdf.local_file_path
            full_file_path = SCRAPER_LOCAL_DOC_DIR + "/" + local_file_path
            #print "path is: %s" % full_file_path

            local_file_path = local_file_path.replace("%%", "/")
            s3_string = "media/fcc_backup/%s" % local_file_path
            #print "s3 destination is: %s" % s3_string

            k.key = s3_string
            try:
                result = k.set_contents_from_filename(full_file_path,
                                                      policy='public-read')
            except:
                tb = traceback.format_exc()
                message = "*BACKUP ERROR:* Error uploading %s\n%s" % (
                    local_file_path, tb)
                print message
                my_logger.warn(message)

                continue
            this_pdf.is_backed_up = True
            this_pdf.s3_full_url = s3_string
            this_pdf.save()
コード例 #2
0
    def handle(self, *args, **options):
        
        
        my_logger=fcc_logger()
        my_logger.info("starting backup run...")
        
        conn = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
        b = conn.get_bucket('politicaladsleuth-assets')
        k = Key(b)
        
        pdfs_to_backup = PDF_File.objects.filter(local_file_path__isnull=False, is_backed_up=False).values('id')

        num_to_process = len(pdfs_to_backup)
        
        print "Processing %s files" % num_to_process
        count = 0 

        for this_pdf_id in pdfs_to_backup:
            this_pdf = PDF_File.objects.get(pk=this_pdf_id['id'])

            if this_pdf.is_backed_up:
                print "already backed up!"
                continue
            
            count+=1
            if (count %100 == 0):
                print "Processed %s" % count
            local_file_path = this_pdf.local_file_path
            full_file_path = SCRAPER_LOCAL_DOC_DIR + "/" + local_file_path
            #print "path is: %s" % full_file_path
            
            local_file_path = local_file_path.replace("%%", "/")
            s3_string = "media/fcc_backup/%s" % local_file_path
            #print "s3 destination is: %s" % s3_string
            
            k.key = s3_string
            try:
                result = k.set_contents_from_filename(full_file_path, policy='public-read')
            except:
                tb = traceback.format_exc()
                message = "*BACKUP ERROR:* Error uploading %s\n%s" % (local_file_path, tb)
                print message
                my_logger.warn(message)

                continue
            this_pdf.is_backed_up = True
            this_pdf.s3_full_url = s3_string
            this_pdf.save()
コード例 #3
0
""" This takes the place of the folder scraping routines that were built before there was an rss file available. """

from django.core.management.base import BaseCommand, CommandError
from django.conf import settings

from scraper.api_scraper import parse_api_feed
from scraper.models import PDF_File, StationData
from broadcasters.models import Broadcaster

FCC_SCRAPER_LOG_DIRECTORY = getattr(settings, 'FCC_SCRAPER_LOG')
from scraper.local_log import fcc_logger

my_logger = fcc_logger()
my_logger.info("starting fcc rss scrape...")


class Command(BaseCommand):
    def handle(self, *args, **options):
        political_files = None

        if args:
            start_date = args[0]
            end_date = args[1]
            print "start_date=%s end_date=%s" % (start_date, end_date)
            political_files = parse_api_feed(start_date, end_date)
        else:
            political_files = parse_api_feed()

        for thisfile in political_files:
            if not thisfile:
                # if there's no details, continue
コード例 #4
0
""" This takes the place of the folder scraping routines that were built before there was an rss file available. """

from django.core.management.base import BaseCommand, CommandError
from django.conf import settings

from scraper.rss_scraper import parse_xml_from_text, get_rss_from_web, get_rss_from_file
from scraper.models import PDF_File, StationData
from broadcasters.models import Broadcaster

FCC_SCRAPER_LOG_DIRECTORY = getattr(settings, 'FCC_SCRAPER_LOG')
from scraper.local_log import fcc_logger

my_logger=fcc_logger()
my_logger.info("starting fcc rss scrape...")

def handle_file(thisfile):
    print thisfile
    [callsign, nielsen_dma, dma_id, community_state] = [None, None, None, None]
    try:
        thisbroadcaster = Broadcaster.objects.get(facility_id=thisfile['facility_id'])
        callsign = thisbroadcaster.callsign
        nielsen_dma = thisbroadcaster.nielsen_dma
        community_state = thisbroadcaster.community_state
        dma_id = thisbroadcaster.dma_id
    except Broadcaster.DoesNotExist:
        pass

    if not callsign:
        callsign = thisfile['callsign']

    if thisfile['href']: