def handle(self, *args, **options): start_date = date(2013,6,16) end_date = date(2013,6,18) one_day = timedelta(days=1) this_date = start_date while (this_date < end_date): datestring = this_date.strftime("%Y%m%d") entry_time = datetime(this_date.year, this_date.month, this_date.day, 7,0) print "datestring %s" % (datestring) this_date += one_day filing_info = None try: filing_info = filerange[datestring] except KeyError: print "Missing data for %s" % datestring continue #print filing_info thisfilerange=range(int(filing_info['first']), 1+int(filing_info['last'])) #thisfilerange=['868338'] for filenum in thisfilerange: # see if the file is downloaded, and if it isn't just ignore it. Some numbers are skipped; our assumption here is that we're entering files that have come from a zipfile. local_file_location = FILECACHE_DIRECTORY + "/" + str(filenum) + ".fec" if path.isfile(local_file_location): print "Processing %s" % (filenum) process_filing_header(filenum, fp=fp, filing_time=entry_time, filing_time_is_exact=False) else: print "!! missing file %s" % filenum
def get_file_list(filemin=0, list_length=100): filecount = 0 arraylist = [] for d, _, files in os.walk(FILECACHE_DIRECTORY): for a in files: filingnum = a.replace(".fec", "") if int(filingnum) < filemin: continue filecount += 1 if filecount > list_length: break arraylist.append(filingnum) return arraylist #file_list = get_file_list(767159, 1000) # some of the biggest files file_list = [838168, 824988, 840327, 821325, 798883, 804867, 827978, 754317] start_time = time.time() print file_list count = 0 for i in (file_list): count += 1 print "Processing #%s : %s" % (count, i) process_filing_header(i) elapsed_time = time.time() - start_time print "Elapsed time = %s seconds" % (elapsed_time)