def import_programs(self, file): date = datetime.today() new_program_count = 0 f = open(file, 'rU') this_version = int(file[-9:-4]) #pull the date off of the programs csv file #updated kevin's regex to include 2010 funding re_funding = re.compile('FY ([0-1][0,6-9]{1,1})( est. | est | )[\$]([0-9,]+)') re_funding_type = re.compile('\((.*?)\)') re_exclude = re.compile('[sS]alaries') re_loan = re.compile('[lL]oan') re_guar = re.compile('[gG]uarantee') re_insur = re.compile('[iI]nsur') #regex to pull account numbers ONLY out of free text account = re.compile('[\d]{2}[-][\d]{4}[-][\d]{1}[-][\d]{1}[-][\d]{3}') re_writer = csv.writer(open('csv/regex_check.csv', 'w')) reader = csv.reader(f) reader.next() # skip headers while True: try: row = reader.next() except: break if not row: break if len(row) == 0 or len(row) < 10: continue program_number = row[1].strip() matching_programs = Program.objects.filter(program_number=program_number) if len(matching_programs)==0: matching_program = Program() new_program_count += 1 else: matching_program = matching_programs[0] try: agency = Agency.objects.get(code=int(program_number[:2])) matching_program.agency = agency except Exception,e: print "cfda program: %s, %s" % (program_number, e) for (i,s) in enumerate(self.FIELD_MAPPINGS): if s is None or i==2: continue elif s == 'obligations': # do obligations parsing try: clean_obs = smart_unicode(un.kill_gremlins(row[i])) matches = re_funding.findall(clean_obs) type_matches = re_funding_type.findall(clean_obs) edited = [] type_iter = iter(type_matches) if type_matches: curr_type = type_iter.next() else: curr_type = 'default' curr_year = '2000' for tuple in matches: year = '20' + tuple[0] if year < curr_year: try: curr_type = type_iter.next() except StopIteration: pass curr_year = year if len(re_exclude.findall(curr_type)) <= 0: obligation = tuple[2].replace(",", "") if len(re_guar.findall(curr_type)) > 0: type = 2 #guarantees and insurance had their own types but it was getting complicated so I collapsed them elif len(re_loan.findall(curr_type)) > 0: type = 2 elif len(re_insur.findall(curr_type)) > 0: type = 2 else: try: assist_types = matching_program.types_of_assistance.all() if assist_types[0].code == 6: type = 2 elif assist_types[0].code == 7: type = 2 elif assist_types[0].code == 5: type = 2 else: type = 1 except Exception: type = 1 matching_obligation = ProgramObligation.objects.filter(program=matching_program, fiscal_year=int(year), type=type) if len(matching_obligation) == 0 or matching_obligation[0].cfda_version <= this_version: try: #either it doesn't exist yet or this is a newer version of cfda if len(matching_obligation) == 0: matching_ob = ProgramObligation(program=matching_program, fiscal_year=int(year), type=type) else: matching_ob = matching_obligation[0] if not matching_ob.corrected: #if it's been corrected don't update it matching_ob.cfda_version = this_version if matching_ob in edited: #there are multiple line items for this type, year and program in the obligation text, so we add instead of replacing matching_ob = edited[edited.index(matching_ob)] matching_ob.obligation += int(obligation) else: matching_ob.obligation = int(obligation) edited.append(matching_ob) matching_ob.delta = (matching_ob.usaspending_obligation or 0) - (matching_ob.obligation or 0) try: matching_ob.weighted_delta = matching_ob.delta / matching_ob.obligation except: matching_ob.weighted_delta = 0 matching_ob.save() except Exception, e: print "in obs %s" % e except Exception, e: print "in obs exception %s" % e print "\n" elif s == 'types_of_assistance': # do extra assistance classifying test = '' try: asst_types = smart_unicode(un.kill_gremlins(row[i])).strip('.').split(';') for asst in asst_types: clean_asst = asst.lower().strip().replace("\n", "") for type_tuple in AssistanceType.CODE_OPTIONS: if clean_asst == type_tuple[1].lower(): matching_assistance_relations = matching_program.types_of_assistance.filter(code=type_tuple[0]) if len(matching_assistance_relations) == 0: #need to add matching_program.types_of_assistance.add(AssistanceType.objects.get(code=type_tuple[0])) matching_program.save() test = 'match' elif len(type_tuple) > 2: for other_name in type_tuple[2]: if clean_asst == other_name.lower(): matching_assistance_relations = matching_program.types_of_assistance.filter(code=type_tuple[0]) if len(matching_assistance_relations) == 0: #need to add matching_program.types_of_assistance.add(AssistanceType.objects.get(code=type_tuple[0])) matching_program.save() test = 'match' if test != 'match': print "Assistance type didn't match: %s" % asst test = '' except Exception, e: print str(e) + 'bla'
matching_program.types_of_assistance.add(AssistanceType.objects.get(code=type_tuple[0])) matching_program.save() test = 'match' if test != 'match': print "Assistance type didn't match: %s" % asst test = '' except Exception, e: print str(e) + 'bla' elif s == 'account_identification': # do extra accounts parsing try: #account is a regex described above accts = account.findall(un.kill_gremlins(row[i])) for a in accts: matching_accounts = ProgramAccount.objects.filter(account_number=a) if len(matching_accounts) == 0: matching_account = ProgramAccount(account_number=a) matching_account.save() else: matching_account = matching_accounts[0] if matching_account not in matching_program.account_identification.all(): matching_program.account_identification.add(matching_account) matching_program.save() except Exception, e:
matching_ob.weighted_delta = str(matching_ob.weighted_delta) matching_ob.save() except Exception, e: print "in obs %s" % e except Exception, e: print "in obs exception %s" % e print "\n" elif s == 'types_of_assistance': # do extra assistance classifying test = '' try: asst_types = smart_unicode(un.kill_gremlins(row[i])).strip('.').split(';') for asst in asst_types: clean_asst = asst.lower().strip().replace("\n", "") for type_tuple in AssistanceType.CODE_OPTIONS: if clean_asst == type_tuple[1].lower(): matching_assistance_relations = matching_program.types_of_assistance.filter(code=type_tuple[0]) if len(matching_assistance_relations) == 0: #need to add matching_program.types_of_assistance.add(AssistanceType.objects.get(code=type_tuple[0])) matching_program.save() test = 'match'
def import_programs(self, file): date = datetime.today() new_program_count = 0 f = open(file, 'rU') this_version = int( file[-9:-4]) #pull the date off of the programs csv file #updated kevin's regex to include 2010 funding re_funding = re.compile( 'FY ([20]*[0-1][0,1,6-9]{1,1})( est. | est | )[\$]([0-9,]+)') re_funding_type = re.compile('\((.*?)\)') re_exclude = re.compile('[sS]alaries') re_loan = re.compile('[lL]oan') re_guar = re.compile('[gG]uarantee') re_insur = re.compile('[iI]nsur') #regex to pull account numbers ONLY out of free text account = re.compile('[\d]{2}[-][\d]{4}[-][\d]{1}[-][\d]{1}[-][\d]{3}') re_writer = csv.writer(open('csv/regex_check.csv', 'w')) reader = csv.reader(f) reader.next() # skip headers while True: try: row = reader.next() except: break if not row: break if len(row) == 0 or len(row) < 10: continue program_number = row[1].strip() matching_programs = Program.objects.filter( program_number=program_number) if len(matching_programs) == 0: matching_program = Program() new_program_count += 1 else: matching_program = matching_programs[0] try: agency = Agency.objects.get(code=int(program_number[:2])) matching_program.agency = agency except Exception, e: print "cfda program: %s, %s" % (program_number, e) for (i, s) in enumerate(self.FIELD_MAPPINGS): if s is None or i == 2: continue elif s == 'obligations': # do obligations parsing try: clean_obs = smart_unicode(un.kill_gremlins(row[i])) matches = re_funding.findall(clean_obs) type_matches = re_funding_type.findall(clean_obs) edited = [] type_iter = iter(type_matches) if type_matches: curr_type = type_iter.next() else: curr_type = 'default' curr_year = '2000' for tuple in matches: if len(tuple[0]) == 2: year = '20' + tuple[0] if year < curr_year: try: curr_type = type_iter.next() except StopIteration: pass curr_year = year if len(re_exclude.findall(curr_type)) <= 0: obligation = tuple[2].replace(",", "") if len(re_guar.findall(curr_type)) > 0: type = 2 #guarantees and insurance had their own types but it was getting complicated so I collapsed them elif len(re_loan.findall(curr_type)) > 0: type = 2 elif len(re_insur.findall(curr_type)) > 0: type = 1 else: try: assist_types = matching_program.types_of_assistance.all( ) if assist_types[0].code == 6: type = 2 elif assist_types[0].code == 7: type = 2 elif assist_types[0].code == 5: type = 2 else: type = 1 except Exception: type = 1 matching_obligation = ProgramObligation.objects.filter( program=matching_program, fiscal_year=int(year), type=type) if len(matching_obligation ) == 0 or matching_obligation[ 0].cfda_version <= this_version: try: #either it doesn't exist yet or this is a newer version of cfda if len(matching_obligation) == 0: matching_ob = ProgramObligation( program=matching_program, fiscal_year=int(year), type=type) else: matching_ob = matching_obligation[ 0] if not matching_ob.corrected: #if it's been corrected don't update it matching_ob.cfda_version = this_version if matching_ob in edited: #there are multiple line items for this type, year and program in the obligation text, so we add instead of replacing matching_ob = edited[ edited.index(matching_ob)] matching_ob.obligation += int( obligation) else: matching_ob.obligation = int( obligation) edited.append(matching_ob) matching_ob.delta = ( matching_ob. usaspending_obligation or 0) - (matching_ob.obligation or 0) try: matching_ob.weighted_delta = float( matching_ob.delta) / float( matching_ob.obligation) except (ZeroDivisionError, DivisionByZero): if fabs(matching_ob.delta) > 0: matching_ob.weighted_delta = float( 1.0) else: matching_ob.weighted_delta = float( 0.0) except Exception, e: print "Generic exception: %s" % str( e) matching_ob.weighted_delta = float( 0.0) except: print "Untyped exception caught." matching_ob.weighted_delta = str( matching_ob.weighted_delta) matching_ob.save() except Exception, e: print "in obs %s" % e
matching_ob.weighted_delta = str( matching_ob.weighted_delta) matching_ob.save() except Exception, e: print "in obs %s" % e except Exception, e: print "in obs exception %s" % e print "\n" elif s == 'types_of_assistance': # do extra assistance classifying test = '' try: asst_types = smart_unicode(un.kill_gremlins( row[i])).strip('.').split(';') for asst in asst_types: clean_asst = asst.lower().strip().replace("\n", "") for type_tuple in AssistanceType.CODE_OPTIONS: if clean_asst == type_tuple[1].lower(): matching_assistance_relations = matching_program.types_of_assistance.filter( code=type_tuple[0]) if len(matching_assistance_relations) == 0: #need to add matching_program.types_of_assistance.add( AssistanceType.objects.get( code=type_tuple[0])) matching_program.save()
def import_programs(self, file_name): date = datetime.today() new_program_count = 0 new_programs = [] f = open(file_name, 'rU') this_version = int( file_name[-9:-4]) #pull the date off of the programs csv file reader = csv.reader(f) reader.next() # skip headers while True: try: row = reader.next() except: break if not row: break if len(row) == 0 or len(row) < 10: continue program_number = row[1].strip() program_title = row[0].strip() matching_programs = ProgramDescription.objects.filter( program_number=program_number) if len(matching_programs) == 0: matching_program = ProgramDescription() new_program_count += 1 new_programs.append("%s - %s" % (program_number, program_title)) print "new program: %s" % (program_number) else: matching_program = matching_programs[0] matching_program.agency = Agency.objects.get( cfda_code=program_number[:2]) for (i, s) in enumerate(self.FIELD_MAPPINGS): # try: prepared_string = smart_unicode(un.kill_gremlins(row[i]), errors='ignore') setattr(matching_program, s, prepared_string) if i == 1: #we have the program vitals, save so we can use as foreign key for other attributes matching_program.save() if i == 24: #print "parsing Obligation" self.parseObligations(prepared_string, matching_program, this_version) # except Exception, e: # print e # continue matching_program.save() f.close() mail_text = "CFDA programs added on %s\n" % datetime.now() for n in new_programs: mail_text += "%s\n" % n admins = [] for ad in settings.ADMINS: admins.append(ad[1]) if new_programs: send_mail("New CFDA Programs", mail_text, '*****@*****.**', admins, fail_silently=False) else: send_mail("No New CFDA Programs - Cron ran successfully", "", '*****@*****.**', admins, fail_silently=False) print "Run complete. \n%s new programs were added" % new_program_count