def algo11(db, ref): #Column L: must be in Reference>Target db_col_loc = etl.find_in_header(db, 'Targeting') ref_col_loc = etl.find_in_header(ref, 'Target') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, 'Targets not in Referece\n' + ','.join(missing_names)
def algo6(db, ref): #Column G: must be in Reference>District db_col_loc = etl.find_in_header(db, 'District') ref_col_loc = etl.find_in_header(ref, 'Admin1_District') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, 'Districts not in the reference:\n' + ','.join( missing_names)
def algo17(db,ref): #column R: must be in reference>Status db_col_loc = etl.find_in_header(db, 'Activity Status') ref_col_loc = etl.find_in_header(ref,'Status') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, return_message('Activity Status not in Reference' ,missing_names)
def algo9(db, ref): #column J: must be in reference>Type of Activity db_col_loc = etl.find_in_header(db, 'Action type') ref_col_loc = etl.find_in_header(ref, 'Action_Type') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, 'Incorrect Action types:\n' + ','.join(missing_names)
def algo11(db,ref): #Column L: must be in Reference>Target db_col_loc = etl.find_in_header(db, 'Targeting') ref_col_loc = etl.find_in_header(ref,'Target') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, return_message('Targets not in Referece' ,missing_names)
def algo1(db, ref): #***Column A must be in Reference>ImplementingAgency if not: make a note db_col_loc = etl.find_in_header(db, 'Implementing agency') ref_col_loc = etl.find_in_header(ref,'Implementing_Agency_Name') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, return_message('Agencies not in the reference:' ,missing_names)
def algo9(db,ref): #column J: must be in reference>Type of Activity db_col_loc = etl.find_in_header(db, 'Action type') ref_col_loc = etl.find_in_header(ref,'Action_Type') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, return_message('Incorrect Action types:' ,missing_names)
def algo3(db, ref): #* what return? change? #Column C: must be in Reference>LocalPartnerAgency #If not: Check if mispelling (like '%agencyname%) db_col_loc = etl.find_in_header(db, 'Local partner agency') ref_col_loc = etl.find_in_header(ref, 'Local_Partner_Agency') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)
def algo17(db, ref): #column R: must be in reference>Status db_col_loc = etl.find_in_header(db, 'Activity Status') ref_col_loc = etl.find_in_header(ref, 'Status') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, 'Activity Status not in Referece\n' + ','.join( missing_names)
def algo1(db, ref): #***Column A must be in Reference>ImplementingAgency if not: make a note db_col_loc = etl.find_in_header(db, 'Implementing agency') ref_col_loc = etl.find_in_header(ref, 'Implementing_Agency_Name') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, 'Agencies not in the reference:\n' + ','.join( missing_names)
def algo3(db,ref): #* what return? change? #Column C: must be in Reference>LocalPartnerAgency #If not: Check if mispelling (like '%agencyname%) db_col_loc = etl.find_in_header(db, 'Local partner agency') ref_col_loc = etl.find_in_header(ref,'Local_Partner_Agency') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)
def match_vdcs(): #notes... #1651 VDCs without exact match w = etl.pull_wb("/Users/ewanog/Downloads/ward_level_data.xlsx", "local") #w = etl.pull_wb("/Users/ewanog/Downloads/test.xlsx", "local") #from cbs: col 1 (dist), col 2 (vdc), col 3 (ward) #from new: H (dist), N (VDC), O (ward) cbs = w.get_sheet_by_name("cbs") new = w.get_sheet_by_name("new") cbs_dist = etl.get_values(cbs.columns[0][1:]) cbs_vdc = etl.get_values(cbs.columns[1][1:]) cbs_ward = etl.get_values(cbs.columns[2][1:]) new_dist = etl.get_values(new.columns[7][1:]) new_vdc = etl.get_values(new.columns[13][1:]) new_ward = etl.get_values(new.columns[14][1:]) ns = set(new_vdc) #see vdc matches cb_zip = zip(cbs_dist, cbs_vdc) new_zip = zip(new_dist, new_vdc) print len(set(etl.colvals_notincol(cbs,'B',new,'N'))) print len(set(cbs_vdc)) for v in set(etl.colvals_notincol(cbs,'B',new,'N')): r = process.extract(v, [r for r in new_zip if r[1] == ]) print r[0][1] if __name__ == '__main__': read = False if read: for ws in w.worksheets: if ws['A44'].value: r = get_vals(ws) print r[-1][0] #pr(r) else: match_vdcs()
def algo8(db, ref): #Column I: must be in reference>Ward or a number or numbers separated by commas #take all into missing names and trim from there db_col_loc = etl.find_in_header(db, 'Municipal Ward') ref_col_loc = etl.find_in_header(ref, 'Wards') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) #trim missing_names to see if they contain any forbidden chars invalid = [] for v in missing_names: for letter in v: #if we've found an illegal letter if letter not in set(string.digits + ' ' + ','): invalid.append('(' + v + ')') break return db, ref, 'Malformed wards: \n' + ','.join(invalid)
def algo8(db,ref): #Column I: must be in reference>Ward or a number or numbers separated by commas #take all into missing names and trim from there db_col_loc = etl.find_in_header(db, 'Municipal Ward') ref_col_loc = etl.find_in_header(ref,'Wards') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) #trim missing_names to see if they contain any forbidden chars invalid = [] for v in missing_names: for letter in v: #if we've found an illegal letter if letter not in set(string.digits + ' ' + ','): invalid.append('(' + v + ')') break return db, ref, return_message('Malformed wards: ' ,invalid)
def test_colvals_notincol(self): self.assertEqual(tuple(etl.colvals_notincol(db, 'A', ref, 'A')), tuple(['notincluded1','notincluded2','notincluded3']))
def algo6(db,ref): #Column G: must be in Reference>District db_col_loc = etl.find_in_header(db, 'District') ref_col_loc = etl.find_in_header(ref,'Admin1_District') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, return_message('Districts not in the reference:' ,missing_names)