Ejemplo n.º 1
0
 def handle(self, *args, **options):
     start_date = date(2013,6,16)
     end_date = date(2013,6,18)
     one_day = timedelta(days=1)
     
     
     this_date = start_date
     while (this_date < end_date):
         datestring = this_date.strftime("%Y%m%d")
         entry_time = datetime(this_date.year, this_date.month, this_date.day, 7,0)
         print "datestring %s" % (datestring)
         this_date += one_day            
         filing_info = None
         try:
             filing_info = filerange[datestring]
         except KeyError:
             print "Missing data for %s" % datestring
             continue
         
         #print filing_info
         thisfilerange=range(int(filing_info['first']), 1+int(filing_info['last']))
         #thisfilerange=['868338']
         for filenum in thisfilerange:
             
             # see if the file is downloaded, and if it isn't just ignore it. Some numbers are skipped; our assumption here is that we're entering files that have come from a zipfile. 
             
             local_file_location = FILECACHE_DIRECTORY + "/" + str(filenum) + ".fec"
             if path.isfile(local_file_location):
                 print "Processing %s" % (filenum)
                 process_filing_header(filenum, fp=fp, filing_time=entry_time, filing_time_is_exact=False)
             else:
                 print "!! missing file %s" % filenum
Ejemplo n.º 2
0
def get_file_list(filemin=0, list_length=100):
    filecount = 0
    arraylist = []
    for d, _, files in os.walk(FILECACHE_DIRECTORY):
        for a in files:
            filingnum = a.replace(".fec", "")
            if int(filingnum) < filemin:
                continue
            filecount += 1
            if filecount > list_length:
                break
            arraylist.append(filingnum)
    return arraylist

#file_list = get_file_list(767159, 1000)
# some of the biggest files
file_list = [838168, 824988, 840327, 821325, 798883, 804867, 827978, 754317]
start_time = time.time()
print file_list

count = 0
for i in (file_list):
    count += 1
    print "Processing #%s : %s" % (count, i)
    process_filing_header(i)
    
elapsed_time = time.time() - start_time
print "Elapsed time = %s seconds" % (elapsed_time)