def algo15(db, ref): #Column P: must be a number>=0 and <= Column N (more conditions to follow) db_cnt_loc = etl.find_in_header(db, 'Total Number Households') db_fem_loc = etl.find_in_header(db, 'Female headed households') #get count values cnt_vals = [] for row in db.iter_rows(db_cnt_loc + "2:" + etl.find_last_value(db, db_cnt_loc, 'c')): cnt_vals.append(etl.xstr(row[0].value)) #get female values fem_vals = [] for row in db.iter_rows(db_fem_loc + "2:" + etl.find_last_value(db, db_fem_loc, 'c')): fem_vals.append(etl.xstr(row[0].value)) malformatted = [] #zip and check values for vals in zip(cnt_vals, fem_vals): if not (vals[0].isdigit() and vals[1].isdigit()): malformatted.append(etl.xstr(vals)) elif int(vals[1]) > int(vals[0]): malformatted.append(etl.xstr(vals)) return db, ref, return_message( 'Total #HH and Female #HH conflict or issue ', malformatted)
def algo16(db, ref): #Column Q: must be a number>=0 and <= Column N (more conditions to follow) db_cnt_loc = etl.find_in_header(db, 'Total Number Households') db_vul_loc = etl.find_in_header( db, 'Vulnerable Caste / Ethnicity households ') #get count values cnt_vals = [] for row in db.iter_rows(db_cnt_loc + "2:" + etl.find_last_value(db, db_cnt_loc, 'c')): cnt_vals.append(str(row[0].value)) #get female values vul_vals = [] for row in db.iter_rows(db_vul_loc + "2:" + etl.find_last_value(db, db_vul_loc, 'c')): vul_vals.append(str(row[0].value)) malformatted = [] #zip and check values for vals in zip(cnt_vals, vul_vals): if not (vals[0].isdigit() and vals[1].isdigit()): malformatted.append(str(vals)) elif int(vals[1]) > int(vals[0]): malformatted.append(str(vals)) return db, ref, 'Total #HH and Vulnerable #HH conflict or issue \n' + ','.join( malformatted)
def algo10(db, ref): #column J: must be in reference>ActionDescription relative to Action_Type #There are 4 action types each with their own self labeled column db_type_loc = etl.find_in_header(db, 'Action type') db_desc_loc = etl.find_in_header(db, 'Action description') ref_col_loc = etl.find_in_header(ref, 'Action_Type') #get ref action type tuples act_types_tups = [] #go through action types and go through respected cols and make tuples for row in ref.iter_rows(ref_col_loc + "2:" + etl.find_last_value(ref, ref_col_loc, 'c')): act = str(row[0].value) act_col = etl.find_in_header(ref, act) for row in ref.iter_rows(act_col + "2:" + etl.find_last_value(ref, act_col, 'c')): act_types_tups.append((act, str(row[0].value))) #get tuples of dist actions type_desc_tup = [] for row in db.iter_rows(db_type_loc + "2:" + etl.find_last_value(db, db_desc_loc, 'c')): type_desc_tup.append((str(row[0].value), str(row[1].value))) malformed_acts = [] #go through dist actions and see if they're in ref tuples for tup in type_desc_tup: if tup not in act_types_tups: malformed_acts.append(str(tup)) return db, ref, 'Action descriptions not in action types:\n' + ','.join( malformed_acts)
def algo16(db,ref): #Column Q: must be a number>=0 and <= Column N (more conditions to follow) db_cnt_loc = etl.find_in_header(db, 'Total Number Households') db_vul_loc = etl.find_in_header(db, 'Vulnerable Caste / Ethnicity households ') #get count values cnt_vals = [] for row in db.iter_rows(db_cnt_loc + "2:" + etl.find_last_value(db, db_cnt_loc, 'c')): cnt_vals.append(etl.xstr(row[0].value)) #get female values vul_vals = [] for row in db.iter_rows(db_vul_loc + "2:" + etl.find_last_value(db, db_vul_loc, 'c')): vul_vals.append(etl.xstr(row[0].value)) malformatted = [] #zip and check values for vals in zip(cnt_vals, vul_vals): if not (vals[0].isdigit() and vals[1].isdigit()): malformatted.append(etl.xstr(vals)) elif int(vals[1]) > int(vals[0]): malformatted.append(etl.xstr(vals)) return db, ref, return_message('Total #HH and Vulnerable #HH conflict or issue ' ,malformatted)
def algo7(db, ref): #Column H: must be in Reference>VDC - relative to districy #the district in which a vdc in the db is located must = the district in ref db_dist_loc = etl.find_in_header(db, 'District') db_vdc_loc = etl.find_in_header(db, 'VDC / Municipalities') ref_dist_loc = etl.find_in_header(ref, 'Admin1_District') ref_vdc_loc = etl.find_in_header(ref, 'Admin2_OCHA_VDC-Municipality') #create tuple of vdc, dist for db and ref db_tuple = [] for row in db.iter_rows(db_dist_loc + "2:" + etl.find_last_value(db, db_vdc_loc, 'c')): db_tuple.append((str(row[0].value), str(row[1].value))) ref_tuple = [] #do it just for dist, then for vdc as they're not neighbors for row in ref.iter_rows('E2:' + etl.find_last_value(ref, 'E', 'c')): ref_tuple.append((str(row[0].value), )) it = 0 for row in ref.iter_rows(ref_vdc_loc + "2:" + etl.find_last_value(ref, ref_vdc_loc, 'c')): ref_tuple[it] += (str(row[0].value.encode("utf8")), ) it += 1 #iterate through db_tuple and see if tuple is in ref malformed_vdc = [] for tup in db_tuple: if tup not in ref_tuple: malformed_vdc.append(str(tup)) return db, ref, 'VDCs in incorrect districts:\n' + ','.join(malformed_vdc)
def algo10(db,ref): #column J: must be in reference>ActionDescription relative to Action_Type #There are 4 action types each with their own self labeled column db_type_loc = etl.find_in_header(db, 'Action type') db_desc_loc = etl.find_in_header(db, 'Action description') ref_col_loc = etl.find_in_header(ref,'Action_Type') #get ref action type tuples act_types_tups = [] #go through action types and go through respected cols and make tuples for row in ref.iter_rows(ref_col_loc + "2:" + etl.find_last_value(ref, ref_col_loc, 'c')): act = etl.xstr(row[0].value) act_col = etl.find_in_header(ref, act) for row in ref.iter_rows(act_col + "2:" + etl.find_last_value(ref, act_col, 'c')): act_types_tups.append((act, etl.xstr(row[0].value))) #get tuples of dist actions type_desc_tup = [] for row in db.iter_rows(db_type_loc + "2:" + etl.find_last_value(db, db_desc_loc, 'c')): type_desc_tup.append((etl.xstr(row[0].value),etl.xstr(row[1].value))) malformed_acts = [] #go through dist actions and see if they're in ref tuples for tup in type_desc_tup: if tup not in act_types_tups: malformed_acts.append(etl.xstr(tup)) return db, ref, return_message('Action descriptions not in action types:' ,malformed_acts)
def algo5(db,ref): #if Column C contains Column A: remove all spaces, commas, dashes and the matching substring in C db_col_loc_a = etl.find_in_header(db, 'Implementing agency') db_col_loc_c = etl.find_in_header(db, 'Local partner agency') vals_changed = [] for row in db.iter_rows(db_col_loc_a + "2:" + etl.find_last_value(db, db_col_loc_c, 'c')): if etl.xstr(row[0].value) in etl.xstr(row[2].value) and etl.xstr(row[0].value) != etl.xstr(row[2].value): vals_changed.append(row[2].value) row[2].value = row[2].value.replace(row[0].value,'').replace(' ','').replace(',','').replace('-','') return db, ref, return_message('IA contained in LPA for the following:' ,vals_changed)
def algo2(db, ref): #Column B: If == Implementing Agency set Column B=Internal #assumes they are next to each other db_col_loc_impl = etl.find_in_header(db, 'Implementing agency') db_col_loc_source = etl.find_in_header(db, 'Sourcing Agency') vals_changed = [] for row in db.iter_rows(db_col_loc_impl + "2:" + etl.find_last_value(db, db_col_loc_source, 'c')): if row[0].value == row[1].value: vals_changed.append(str(row[1].value)) row[1].value = 'INTERNAL' return db, ref, 'Sourcing Agencies set as INTERNAL:\n' + ','.join( vals_changed)
def algo12(db,ref): #Column M: must be a number>=0 and (more conditions to follow) db_col_loc = etl.find_in_header(db, '# Items / # Man-hours / NPR') bad_vals = [] for row in db.iter_rows(db_col_loc + "2:" + etl.find_last_value(db, db_col_loc, 'c')): if not etl.xstr(row[0].value).isdigit(): if row[0].value == None: bad_vals.append('Blank @ cell' + row[0].coordinate) else: bad_vals.append(etl.xstr(row[0].value)) return db, ref, return_message('Invalid numbers for # Items / # Man-hours / NPR' ,bad_vals)
def algo2(db,ref): #Column B: If == Implementing Agency set Column B=Internal #assumes they are next to each other db_col_loc_impl = etl.find_in_header(db, 'Implementing agency') db_col_loc_source = etl.find_in_header(db, 'Sourcing Agency') vals_changed = [] for row in db.iter_rows(db_col_loc_impl + "2:" + etl.find_last_value(db, db_col_loc_source, 'c')): if row[0].value == row[1].value: vals_changed.append(etl.xstr(row[1].value)) row[1].value = 'INTERNAL' return db, ref, return_message('Sourcing Agencies set as INTERNAL:' ,vals_changed)
def algo13(db,ref): #Column N: must be a number>=0 and (more conditions to follow) db_col_loc = etl.find_in_header(db, 'Total Number Households') bad_vals = [] for row in db.iter_rows(db_col_loc + "2:" + etl.find_last_value(db, db_col_loc, 'c')): if not etl.xstr(row[0].value).isdigit(): if row[0].value == None: bad_vals.append('Blank@ ' + row[0].coordinate) else: bad_vals.append(etl.xstr(row[0].value)) return db, ref, return_message('Invalid numbers for Total Number Households' ,bad_vals)
def algo14(db, ref): #Column O: must be a number>=0 and (more conditions to follow) db_col_loc = etl.find_in_header(db, 'Average cost per households (NPR)') bad_vals = [] for row in db.iter_rows(db_col_loc + "2:" + etl.find_last_value(db, db_col_loc, 'c')): if not str(row[0].value).isdigit(): if row[0].value == None: bad_vals.append('Blank@ ' + row[0].coordinate) else: bad_vals.append(str(row[0].value)) return db, ref, 'Invalid numbers for Average cost per households (NPR)\n' + ','.join( bad_vals)
def algo13(db, ref): #Column N: must be a number>=0 and (more conditions to follow) db_col_loc = etl.find_in_header(db, 'Total Number Households') bad_vals = [] for row in db.iter_rows(db_col_loc + "2:" + etl.find_last_value(db, db_col_loc, 'c')): if not etl.xstr(row[0].value).isdigit(): if row[0].value == None: bad_vals.append('Blank@ ' + row[0].coordinate) else: bad_vals.append(etl.xstr(row[0].value)) return db, ref, return_message( 'Invalid numbers for Total Number Households', bad_vals)
def algo7(db,ref): #Column H: must be in Reference>VDC - relative to districy #the district in which a vdc in the db is located must = the district in ref db_dist_loc = etl.find_in_header(db, 'District') db_vdc_loc = etl.find_in_header(db, 'VDC / Municipalities') ref_dist_loc = etl.find_in_header(ref, 'Admin1_District') ref_vdc_loc = etl.find_in_header(ref, 'Admin2_OCHA_VDC-Municipality') #create tuple of vdc, dist for db and ref db_tuple = [] for row in db.iter_rows(db_dist_loc + "2:" + etl.find_last_value(db, db_vdc_loc, 'c')): db_tuple.append((etl.xstr(row[0].value), etl.xstr(row[1].value))) ref_tuple = [] #do it just for dist, then for vdc as they're not neighbors for row in ref.iter_rows('E2:' + etl.find_last_value(ref, 'E', 'c')): ref_tuple.append((etl.xstr(row[0].value),)) it = 0 for row in ref.iter_rows(ref_vdc_loc + "2:" + etl.find_last_value(ref, ref_vdc_loc, 'c')): ref_tuple[it]+=(etl.xstr(row[0].value.encode("utf8")),) it+=1 #iterate through db_tuple and see if tuple is in ref malformed_vdc = [] for tup in db_tuple: if tup not in ref_tuple: malformed_vdc.append(etl.xstr(tup)) return db, ref, return_message('VDCs in incorrect districts:' ,malformed_vdc)
def algo4(db,ref): #if Column C == Column A #Clear column C db_col_loc_a = etl.find_in_header(db, 'Implementing agency') db_col_loc_c = etl.find_in_header(db, 'Local partner agency') vals_changed = [] for row in db.iter_rows(db_col_loc_a + "2:" + etl.find_last_value(db, db_col_loc_c, 'c')): if etl.xstr(row[0].value) == etl.xstr(row[2].value): vals_changed.append(etl.xstr(row[2].value)) row[2].value = '' return db, ref, return_message('Local Partner Agencies matched Implementing and were cleared' ,vals_changed)
def algo12(db, ref): #Column M: must be a number>=0 and (more conditions to follow) db_col_loc = etl.find_in_header(db, '# Items / # Man-hours / NPR') bad_vals = [] for row in db.iter_rows(db_col_loc + "2:" + etl.find_last_value(db, db_col_loc, 'c')): if not str(row[0].value).isdigit(): if row[0].value == None: bad_vals.append('Blank @ cell' + row[0].coordinate) else: bad_vals.append(str(row[0].value)) return db, ref, 'Invalid numbers for # Items / # Man-hours / NPR\n' + ','.join( bad_vals)
def algo5(db, ref): #if Column C contains Column A: remove all spaces, commas, dashes and the matching substring in C db_col_loc_a = etl.find_in_header(db, 'Implementing agency') db_col_loc_c = etl.find_in_header(db, 'Local partner agency') vals_changed = [] for row in db.iter_rows(db_col_loc_a + "2:" + etl.find_last_value(db, db_col_loc_c, 'c')): if str(row[0].value) in str( row[2].value) and str(row[0].value) != str(row[2].value): vals_changed.append(row[2].value) row[2].value = row[2].value.replace(row[0].value, '').replace( ' ', '').replace(',', '').replace('-', '') return db, ref, 'IA contained in LPA for the following:\n' + ','.join( vals_changed)
def algo4(db, ref): #if Column C == Column A #Clear column C db_col_loc_a = etl.find_in_header(db, 'Implementing agency') db_col_loc_c = etl.find_in_header(db, 'Local partner agency') vals_changed = [] for row in db.iter_rows(db_col_loc_a + "2:" + etl.find_last_value(db, db_col_loc_c, 'c')): if str(row[0].value) == str(row[2].value): vals_changed.append(str(row[2].value)) row[2].value = '' return db, ref, 'Local Partner Agencies matched Implementing and were cleared\n' + ','.join( vals_changed)
def algo19(db, ref): #Column T: must be a date>= 25/04/2015 and >=Column S comp_col_loc = etl.find_in_header(db, 'Completion Date\n (Actual or Planned)') start_col_loc = etl.find_in_header(db, 'Start date \n(Actual or Planned)') bad_date = [] for row in db.iter_rows(comp_col_loc + "2:" + etl.find_last_value(db, comp_col_loc, 'c')): cur = str(row[0].value) try: if parse(cur) < parse('4-25-2015') or parse(cur) < parse( str(db[start_col_loc + str(row[0].row)].value)): bad_date.append(cur + ' @ cell ' + row[0].coordinate) except Exception, e: bad_date.append(cur + ' @ cell ' + row[0].coordinate)
def algo18(db, ref): #Column S: must be a date>= 25/04/2015 date_col_loc = etl.find_in_header(db, 'Start date \n(Actual or Planned)') if date_col_loc is None: date_col_loc = etl.find_in_header(db, 'Start date') bad_date = [] for row in db.iter_rows(date_col_loc + "2:" + etl.find_last_value(db, date_col_loc, 'c')): cur = str(row[0].value) try: if parse(cur) < parse('4-25-2015'): bad_date.append(cur + ' @ cell ' + row[0].coordinate) except: bad_date.append(cur + ' @ cell ' + row[0].coordinate) return db, ref, 'Malformatted date\n' + ','.join(bad_date)
def test_find_last_value_column(self): self.assertEqual(etl.find_last_value(ref, 'Z', 'c'), 'Z11')
def test_find_last_value_row(self): self.assertEqual(etl.find_last_value(ref, 'A', 'r'), 'Z1')