def analyze(): c = Cube() print 'analyzing pickled cfda awards data...' print 'initalizing...' cfda_agency_map = {} for program in Program.objects.all(): cfda_agency_map[program.program_number] = program.agency_id print 'loading data...' f = open(DATA_DIR + 'cfda_awards.out.bin', 'rb') awards = pickle.load(f) f.close() print 'building cube...' for award_id in awards: award = awards[award_id] cfda = award['cfda'] if cfda_agency_map.has_key(cfda): agency = cfda_agency_map[cfda] fed_amount = award['fed_amount'] fiscal_year = award['fiscal_year'] reporting_lag = award['reporting_lag'] fiscal_year_lag = award['fiscal_year_lag'] # reporting lag of negative days converted to 0 reporting_lag = reporting_lag if reporting_lag > 0 else 0 # select only fy 2007-2009 inclusive if fiscal_year > 2009 or fiscal_year < 2007: continue # filter transactions that occur later than 180 days after the end of the fiscal year # this allows us to make a comparison between 2007/2008 and 2009. if fiscal_year_lag > 180: continue # add record to data cube c.add({'fy':fiscal_year, 'cfda':cfda, 'agency':agency}, {'days':reporting_lag, 'dollars':fed_amount}) awards = None print 'querying cfda aggregates...' result = c.query(groups=['cfda','fy']) print 'loading cfda results into db...' ProgramTimeliness.objects.all().delete() for cfda in result.values: program = Program.objects.get(program_number=cfda) for fy in range(2007, 2010): metric = ProgramTimeliness.objects.create(program=program, agency=program.agency, fiscal_year=fy, late_dollars=result.values[cfda].values[fy].get_data(aggregator=sum_dollars_45days_late), late_rows=result.values[cfda].values[fy].get_data(aggregator=count_records_45days_late), total_dollars=result.values[cfda].values[fy].get_data(aggregator=sum_dollars), total_rows=result.values[cfda].values[fy].get_data(aggregator=len), avg_lag_rows=result.values[cfda].values[fy].get_data(aggregator=avg_days_by_awards), avg_lag_dollars=result.values[cfda].values[fy].get_data(aggregator=avg_days_by_dollars)) metric.save() print 'querying agency aggregates...' result = c.query(groups=['agency','fy']) print 'loading agency results into db...' AgencyTimeliness.objects.all().delete() for agency_id in result.values: agency = Agency.objects.get(pk=agency_id) for fy in range(2007, 2010): metric = AgencyTimeliness.objects.create(agency=agency, fiscal_year=fy, late_dollars=result.values[agency_id].values[fy].get_data(aggregator=sum_dollars_45days_late), late_rows=result.values[agency_id].values[fy].get_data(aggregator=count_records_45days_late), total_dollars=result.values[agency_id].values[fy].get_data(aggregator=sum_dollars), total_rows=result.values[agency_id].values[fy].get_data(aggregator=len), avg_lag_rows=result.values[agency_id].values[fy].get_data(aggregator=avg_days_by_awards), avg_lag_dollars=result.values[agency_id].values[fy].get_data(aggregator=avg_days_by_dollars)) metric.save()
def analyzer_main(): print 'Loading data...' cfda_agency_map = dict( (p.program_number, p.agency_id) for p in Program.objects.all()) awards = unpickle(os.path.join(DATA_DIR, 'cfda_awards.out.bin')) print 'Building cube' c = Cube() for (idx, (award_id, award)) in enumerate(awards.iteritems()): # Simple progress ticker if idx % 1000 == 0: sys.stdout.write('.') sys.stdout.flush() cfda = award['cfda'] agency = cfda_agency_map.get(cfda, None) if agency: fed_amount = award['fed_amount'] fiscal_year = award['fiscal_year'] reporting_lag = award['reporting_lag'] fiscal_year_lag = award['fiscal_year_lag'] # select only fy 2007-2009 inclusive if fiscal_year not in FISCAL_YEARS: continue # We need to set an upper bound on the fiscal year lag in order to # make comparisons between fiscal years useful. if fiscal_year_lag > FISCAL_YEAR_LAG_THRESHOLD: continue # reporting lag of negative days converted to 0 reporting_lag = reporting_lag if reporting_lag > 0 else 0 # add record to data cube c.add({ 'fy': fiscal_year, 'cfda': cfda, 'agency': agency }, { 'days': reporting_lag, 'dollars': fed_amount }) awards = None print 'Querying cfda aggregates...' result = c.query(groups=['cfda', 'fy']) print 'Loading cfda results into db...' ProgramTimeliness.objects.all().delete() for (cfda, cfda_results) in result.values.iteritems(): program = Program.objects.get(program_number=cfda) for fy in FISCAL_YEARS: cfda_fy_results = cfda_results.values[fy] metric = ProgramTimeliness.objects.create( program=program, agency=program.agency, fiscal_year=fy, late_dollars=cfda_fy_results.get_data(sum_dollars_45days_late), late_rows=cfda_fy_results.get_data(count_records_45days_late), total_dollars=cfda_fy_results.get_data(sum_dollars), total_rows=cfda_fy_results.get_data(len), avg_lag_rows=cfda_fy_results.get_data(avg_days_by_awards), avg_lag_dollars=cfda_fy_results.get_data(avg_days_by_dollars)) if metric.total_dollars > 0: metric.late_pct = metric.late_dollars * 100 / metric.total_dollars metric.save() print 'Querying agency aggregates...' result = c.query(groups=['agency', 'fy']) print 'Loading agency results into db...' AgencyTimeliness.objects.all().delete() for (agency_id, agency_results) in result.values.iteritems(): agency = Agency.objects.get(pk=agency_id) for fy in FISCAL_YEARS: agency_fy_results = agency_results.values[fy] metric = AgencyTimeliness.objects.create( agency=agency, fiscal_year=fy, late_dollars=agency_fy_results.get_data( sum_dollars_45days_late), late_rows=agency_fy_results.get_data( count_records_45days_late), total_dollars=agency_fy_results.get_data(sum_dollars), total_rows=agency_fy_results.get_data(len), avg_lag_rows=agency_fy_results.get_data(avg_days_by_awards), avg_lag_dollars=agency_fy_results.get_data( avg_days_by_dollars)) if metric.total_dollars > 0: metric.late_pct = metric.late_dollars * 100 / metric.total_dollars metric.save()
def analyzer_main(): print 'Loading data...' cfda_agency_map = dict((p.program_number, p.agency_id) for p in Program.objects.all()) awards = unpickle(os.path.join(DATA_DIR, 'cfda_awards.out.bin')) print 'Building cube' c = Cube() for (idx, (award_id, award)) in enumerate(awards.iteritems()): # Simple progress ticker if idx % 1000 == 0: sys.stdout.write('.') sys.stdout.flush() cfda = award['cfda'] agency = cfda_agency_map.get(cfda, None) if agency: fed_amount = award['fed_amount'] fiscal_year = award['fiscal_year'] reporting_lag = award['reporting_lag'] fiscal_year_lag = award['fiscal_year_lag'] # select only fy 2007-2009 inclusive if fiscal_year not in FISCAL_YEARS: continue # We need to set an upper bound on the fiscal year lag in order to # make comparisons between fiscal years useful. if fiscal_year_lag > FISCAL_YEAR_LAG_THRESHOLD: continue # reporting lag of negative days converted to 0 reporting_lag = reporting_lag if reporting_lag > 0 else 0 # add record to data cube c.add({'fy':fiscal_year, 'cfda':cfda, 'agency':agency}, {'days':reporting_lag, 'dollars':fed_amount}) awards = None print 'Querying cfda aggregates...' result = c.query(groups=['cfda','fy']) print 'Loading cfda results into db...' ProgramTimeliness.objects.all().delete() for (cfda, cfda_results) in result.values.iteritems(): program = Program.objects.get(program_number=cfda) for fy in FISCAL_YEARS: cfda_fy_results = cfda_results.values[fy] metric = ProgramTimeliness.objects.create( program=program, agency=program.agency, fiscal_year=fy, late_dollars=cfda_fy_results.get_data(sum_dollars_45days_late), late_rows=cfda_fy_results.get_data(count_records_45days_late), total_dollars=cfda_fy_results.get_data(sum_dollars), total_rows=cfda_fy_results.get_data(len), avg_lag_rows=cfda_fy_results.get_data(avg_days_by_awards), avg_lag_dollars=cfda_fy_results.get_data(avg_days_by_dollars) ) if metric.total_dollars > 0: metric.late_pct = metric.late_dollars * 100 / metric.total_dollars metric.save() print 'Querying agency aggregates...' result = c.query(groups=['agency','fy']) print 'Loading agency results into db...' AgencyTimeliness.objects.all().delete() for (agency_id, agency_results) in result.values.iteritems(): agency = Agency.objects.get(pk=agency_id) for fy in FISCAL_YEARS: agency_fy_results = agency_results.values[fy] metric = AgencyTimeliness.objects.create( agency=agency, fiscal_year=fy, late_dollars=agency_fy_results.get_data(sum_dollars_45days_late), late_rows=agency_fy_results.get_data(count_records_45days_late), total_dollars=agency_fy_results.get_data(sum_dollars), total_rows=agency_fy_results.get_data(len), avg_lag_rows=agency_fy_results.get_data(avg_days_by_awards), avg_lag_dollars=agency_fy_results.get_data(avg_days_by_dollars) ) if metric.total_dollars > 0: metric.late_pct = metric.late_dollars * 100 / metric.total_dollars metric.save()