def algo1(db, ref): #***Column A must be in Reference>ImplementingAgency if not: make a note db_col_loc = etl.find_in_header(db, 'Implementing agency') ref_col_loc = etl.find_in_header(ref,'Implementing_Agency_Name') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, return_message('Agencies not in the reference:' ,missing_names)
def algo11(db,ref): #Column L: must be in Reference>Target db_col_loc = etl.find_in_header(db, 'Targeting') ref_col_loc = etl.find_in_header(ref,'Target') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, return_message('Targets not in Referece' ,missing_names)
def algo10(db,ref): #column J: must be in reference>ActionDescription relative to Action_Type #There are 4 action types each with their own self labeled column db_type_loc = etl.find_in_header(db, 'Action type') db_desc_loc = etl.find_in_header(db, 'Action description') ref_col_loc = etl.find_in_header(ref,'Action_Type') #get ref action type tuples act_types_tups = [] #go through action types and go through respected cols and make tuples for row in ref.iter_rows(ref_col_loc + "2:" + etl.find_last_value(ref, ref_col_loc, 'c')): act = etl.xstr(row[0].value) act_col = etl.find_in_header(ref, act) for row in ref.iter_rows(act_col + "2:" + etl.find_last_value(ref, act_col, 'c')): act_types_tups.append((act, etl.xstr(row[0].value))) #get tuples of dist actions type_desc_tup = [] for row in db.iter_rows(db_type_loc + "2:" + etl.find_last_value(db, db_desc_loc, 'c')): type_desc_tup.append((etl.xstr(row[0].value),etl.xstr(row[1].value))) malformed_acts = [] #go through dist actions and see if they're in ref tuples for tup in type_desc_tup: if tup not in act_types_tups: malformed_acts.append(etl.xstr(tup)) return db, ref, return_message('Action descriptions not in action types:' ,malformed_acts)
def algo15(db, ref): #Column P: must be a number>=0 and <= Column N (more conditions to follow) db_cnt_loc = etl.find_in_header(db, 'Total Number Households') db_fem_loc = etl.find_in_header(db, 'Female headed households') #get count values cnt_vals = [] for row in db.iter_rows(db_cnt_loc + "2:" + etl.find_last_value(db, db_cnt_loc, 'c')): cnt_vals.append(etl.xstr(row[0].value)) #get female values fem_vals = [] for row in db.iter_rows(db_fem_loc + "2:" + etl.find_last_value(db, db_fem_loc, 'c')): fem_vals.append(etl.xstr(row[0].value)) malformatted = [] #zip and check values for vals in zip(cnt_vals, fem_vals): if not (vals[0].isdigit() and vals[1].isdigit()): malformatted.append(etl.xstr(vals)) elif int(vals[1]) > int(vals[0]): malformatted.append(etl.xstr(vals)) return db, ref, return_message( 'Total #HH and Female #HH conflict or issue ', malformatted)
def algo16(db, ref): #Column Q: must be a number>=0 and <= Column N (more conditions to follow) db_cnt_loc = etl.find_in_header(db, 'Total Number Households') db_vul_loc = etl.find_in_header( db, 'Vulnerable Caste / Ethnicity households ') #get count values cnt_vals = [] for row in db.iter_rows(db_cnt_loc + "2:" + etl.find_last_value(db, db_cnt_loc, 'c')): cnt_vals.append(str(row[0].value)) #get female values vul_vals = [] for row in db.iter_rows(db_vul_loc + "2:" + etl.find_last_value(db, db_vul_loc, 'c')): vul_vals.append(str(row[0].value)) malformatted = [] #zip and check values for vals in zip(cnt_vals, vul_vals): if not (vals[0].isdigit() and vals[1].isdigit()): malformatted.append(str(vals)) elif int(vals[1]) > int(vals[0]): malformatted.append(str(vals)) return db, ref, 'Total #HH and Vulnerable #HH conflict or issue \n' + ','.join( malformatted)
def algo9(db, ref): #column J: must be in reference>Type of Activity db_col_loc = etl.find_in_header(db, 'Action type') ref_col_loc = etl.find_in_header(ref, 'Action_Type') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, 'Incorrect Action types:\n' + ','.join(missing_names)
def algo20(db, ref): #Output if completion date < current date and activity status not like completed status_col_loc = column_index_from_string( etl.find_in_header(db, 'Activity Status')) - 1 comp_col_loc = column_index_from_string(etl.find_in_header(db, 'DD - End')) - 1 bad_date = [] valid = True comp = "" for row in db.rows[1:]: try: comp = datetime(day = int(etl.xstr(row[comp_col_loc].value)), \ month = int(etl.xstr(row[comp_col_loc+1].value)), \ year = int(etl.xstr(row[comp_col_loc+2].value))) except: valid = False if valid: try: if comp < datetime.now( ) and 'completed' not in row[status_col_loc].value.lower(): bad_date.append('%s row %i' % (str(comp), row[0].row)) except: bad_date.append('%s row %i' % (str(comp), row[0].row)) return db, ref, return_message( 'Bad date or Completion Date has passed for ', bad_date)
def algo19(db, ref): #Column T: must be a date>= 25/04/2015 and >=Column S start_col_loc = column_index_from_string( etl.find_in_header(db, 'DD - Start')) - 1 comp_col_loc = column_index_from_string(etl.find_in_header(db, 'DD - End')) - 1 bad_date = [] start = "" comp = "" for row in db.rows[1:]: try: start = datetime(day = int(etl.xstr(row[start_col_loc].value)), \ month = int(etl.xstr(row[start_col_loc+1].value)), \ year = int(etl.xstr(row[start_col_loc+2].value))) comp = datetime(day = int(etl.xstr(row[comp_col_loc].value)), \ month = int(etl.xstr(row[comp_col_loc+1].value)), \ year = int(etl.xstr(row[comp_col_loc+2].value))) if comp < start: bad_date.append(str(start) + ' @ row ' + str(row[0].row)) except: bad_date.append(str(start) + ' @ row ' + str(row[0].row)) return db, ref, return_message('Malformatted date or before Start Date', bad_date)
def algo10(db, ref): #column J: must be in reference>ActionDescription relative to Action_Type #There are 4 action types each with their own self labeled column db_type_loc = etl.find_in_header(db, 'Action type') db_desc_loc = etl.find_in_header(db, 'Action description') ref_col_loc = etl.find_in_header(ref, 'Action_Type') #get ref action type tuples act_types_tups = [] #go through action types and go through respected cols and make tuples for row in ref.iter_rows(ref_col_loc + "2:" + etl.find_last_value(ref, ref_col_loc, 'c')): act = str(row[0].value) act_col = etl.find_in_header(ref, act) for row in ref.iter_rows(act_col + "2:" + etl.find_last_value(ref, act_col, 'c')): act_types_tups.append((act, str(row[0].value))) #get tuples of dist actions type_desc_tup = [] for row in db.iter_rows(db_type_loc + "2:" + etl.find_last_value(db, db_desc_loc, 'c')): type_desc_tup.append((str(row[0].value), str(row[1].value))) malformed_acts = [] #go through dist actions and see if they're in ref tuples for tup in type_desc_tup: if tup not in act_types_tups: malformed_acts.append(str(tup)) return db, ref, 'Action descriptions not in action types:\n' + ','.join( malformed_acts)
def algo6(db, ref): #Column G: must be in Reference>District db_col_loc = etl.find_in_header(db, 'District') ref_col_loc = etl.find_in_header(ref, 'Admin1_District') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, 'Districts not in the reference:\n' + ','.join( missing_names)
def algo9(db,ref): #column J: must be in reference>Type of Activity db_col_loc = etl.find_in_header(db, 'Action type') ref_col_loc = etl.find_in_header(ref,'Action_Type') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, return_message('Incorrect Action types:' ,missing_names)
def algo11(db, ref): #Column L: must be in Reference>Target db_col_loc = etl.find_in_header(db, 'Targeting') ref_col_loc = etl.find_in_header(ref, 'Target') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, 'Targets not in Referece\n' + ','.join(missing_names)
def reach(self, db): """how is the vdc accessible""" #TODO: use index() method instead of dict dist_loc = column_index_from_string(etl.find_in_header(db, 'District')) - 1 vdc_loc = column_index_from_string( etl.find_in_header(db, 'VDC / Municipalities')) - 1 acc_look = column_index_from_string( etl.find_in_header(self.s_dict['acc'], 'DistrictVDC Concatenation')) - 1 acc_acc = column_index_from_string( etl.find_in_header(self.s_dict['acc'], 'NeKSAP ACCESS')) - 1 dist_vals = etl.get_values(db.columns[dist_loc][1:]) vdc_vals = etl.get_values(db.columns[vdc_loc][1:]) acc_look_vals = etl.get_values( self.s_dict['acc'].columns[acc_look][1:]) acc_acc_vals = etl.get_values(self.s_dict['acc'].columns[acc_acc][1:]) #make dict for access, concatenate dist and vdc acc_dict = dict(zip(acc_look_vals, acc_acc_vals)) d_v_conc = [dist_vals[i] + vdc_vals[i] for i in xrange(len(dist_vals))] ret = [] for v in d_v_conc: if v in acc_dict and v != '': ret.append(acc_dict[v]) else: ret.append('') return ret
def algo20(db,ref): #Output if completion date < current date and activity status not like completed status_col_loc = column_index_from_string(etl.find_in_header(db, 'Activity Status')) - 1 comp_col_loc = column_index_from_string(etl.find_in_header(db, 'DD - End')) - 1 bad_date = [] valid = True comp = "" for row in db.rows[1:]: try: comp = datetime(day = int(etl.xstr(row[comp_col_loc].value)), \ month = int(etl.xstr(row[comp_col_loc+1].value)), \ year = int(etl.xstr(row[comp_col_loc+2].value))) except: valid = False if valid: try: if comp < datetime.now() and 'completed' not in row[status_col_loc].value.lower(): bad_date.append('%s row %i' % (str(comp),row[0].row)) except: bad_date.append('%s row %i' % (str(comp),row[0].row)) return db, ref, return_message('Bad date or Completion Date has passed for ' ,bad_date)
def algo19(db,ref): #Column T: must be a date>= 25/04/2015 and >=Column S start_col_loc = column_index_from_string(etl.find_in_header(db, 'DD - Start')) - 1 comp_col_loc = column_index_from_string(etl.find_in_header(db, 'DD - End')) - 1 bad_date = [] start = "" comp = "" for row in db.rows[1:]: try: start = datetime(day = int(etl.xstr(row[start_col_loc].value)), \ month = int(etl.xstr(row[start_col_loc+1].value)), \ year = int(etl.xstr(row[start_col_loc+2].value))) comp = datetime(day = int(etl.xstr(row[comp_col_loc].value)), \ month = int(etl.xstr(row[comp_col_loc+1].value)), \ year = int(etl.xstr(row[comp_col_loc+2].value))) if comp < start: bad_date.append(str(start) + ' @ row ' + str(row[0].row)) except: bad_date.append(str(start) + ' @ row ' + str(row[0].row)) return db, ref, return_message('Malformatted date or before Start Date', bad_date)
def algo7(db, ref): #Column H: must be in Reference>VDC - relative to districy #the district in which a vdc in the db is located must = the district in ref db_dist_loc = etl.find_in_header(db, 'District') db_vdc_loc = etl.find_in_header(db, 'VDC / Municipalities') ref_dist_loc = etl.find_in_header(ref, 'Admin1_District') ref_vdc_loc = etl.find_in_header(ref, 'Admin2_OCHA_VDC-Municipality') #create tuple of vdc, dist for db and ref db_tuple = [] for row in db.iter_rows(db_dist_loc + "2:" + etl.find_last_value(db, db_vdc_loc, 'c')): db_tuple.append((str(row[0].value), str(row[1].value))) ref_tuple = [] #do it just for dist, then for vdc as they're not neighbors for row in ref.iter_rows('E2:' + etl.find_last_value(ref, 'E', 'c')): ref_tuple.append((str(row[0].value), )) it = 0 for row in ref.iter_rows(ref_vdc_loc + "2:" + etl.find_last_value(ref, ref_vdc_loc, 'c')): ref_tuple[it] += (str(row[0].value.encode("utf8")), ) it += 1 #iterate through db_tuple and see if tuple is in ref malformed_vdc = [] for tup in db_tuple: if tup not in ref_tuple: malformed_vdc.append(str(tup)) return db, ref, 'VDCs in incorrect districts:\n' + ','.join(malformed_vdc)
def algo17(db,ref): #column R: must be in reference>Status db_col_loc = etl.find_in_header(db, 'Activity Status') ref_col_loc = etl.find_in_header(ref,'Status') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, return_message('Activity Status not in Reference' ,missing_names)
def algo16(db,ref): #Column Q: must be a number>=0 and <= Column N (more conditions to follow) db_cnt_loc = etl.find_in_header(db, 'Total Number Households') db_vul_loc = etl.find_in_header(db, 'Vulnerable Caste / Ethnicity households ') #get count values cnt_vals = [] for row in db.iter_rows(db_cnt_loc + "2:" + etl.find_last_value(db, db_cnt_loc, 'c')): cnt_vals.append(etl.xstr(row[0].value)) #get female values vul_vals = [] for row in db.iter_rows(db_vul_loc + "2:" + etl.find_last_value(db, db_vul_loc, 'c')): vul_vals.append(etl.xstr(row[0].value)) malformatted = [] #zip and check values for vals in zip(cnt_vals, vul_vals): if not (vals[0].isdigit() and vals[1].isdigit()): malformatted.append(etl.xstr(vals)) elif int(vals[1]) > int(vals[0]): malformatted.append(etl.xstr(vals)) return db, ref, return_message('Total #HH and Vulnerable #HH conflict or issue ' ,malformatted)
def algo3(db,ref): #* what return? change? #Column C: must be in Reference>LocalPartnerAgency #If not: Check if mispelling (like '%agencyname%) db_col_loc = etl.find_in_header(db, 'Local partner agency') ref_col_loc = etl.find_in_header(ref,'Local_Partner_Agency') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)
def algo3(db, ref): #* what return? change? #Column C: must be in Reference>LocalPartnerAgency #If not: Check if mispelling (like '%agencyname%) db_col_loc = etl.find_in_header(db, 'Local partner agency') ref_col_loc = etl.find_in_header(ref, 'Local_Partner_Agency') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)
def algo17(db, ref): #column R: must be in reference>Status db_col_loc = etl.find_in_header(db, 'Activity Status') ref_col_loc = etl.find_in_header(ref, 'Status') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, 'Activity Status not in Referece\n' + ','.join( missing_names)
def algo1(db, ref): #***Column A must be in Reference>ImplementingAgency if not: make a note db_col_loc = etl.find_in_header(db, 'Implementing agency') ref_col_loc = etl.find_in_header(ref, 'Implementing_Agency_Name') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, 'Agencies not in the reference:\n' + ','.join( missing_names)
def algo5(db,ref): #if Column C contains Column A: remove all spaces, commas, dashes and the matching substring in C db_col_loc_a = etl.find_in_header(db, 'Implementing agency') db_col_loc_c = etl.find_in_header(db, 'Local partner agency') vals_changed = [] for row in db.iter_rows(db_col_loc_a + "2:" + etl.find_last_value(db, db_col_loc_c, 'c')): if etl.xstr(row[0].value) in etl.xstr(row[2].value) and etl.xstr(row[0].value) != etl.xstr(row[2].value): vals_changed.append(row[2].value) row[2].value = row[2].value.replace(row[0].value,'').replace(' ','').replace(',','').replace('-','') return db, ref, return_message('IA contained in LPA for the following:' ,vals_changed)
def algo2(db,ref): #Column B: If == Implementing Agency set Column B=Internal #assumes they are next to each other db_col_loc_impl = etl.find_in_header(db, 'Implementing agency') db_col_loc_source = etl.find_in_header(db, 'Sourcing Agency') vals_changed = [] for row in db.iter_rows(db_col_loc_impl + "2:" + etl.find_last_value(db, db_col_loc_source, 'c')): if row[0].value == row[1].value: vals_changed.append(etl.xstr(row[1].value)) row[1].value = 'INTERNAL' return db, ref, return_message('Sourcing Agencies set as INTERNAL:' ,vals_changed)
def algo2(db, ref): #Column B: If == Implementing Agency set Column B=Internal #assumes they are next to each other db_col_loc_impl = etl.find_in_header(db, 'Implementing agency') db_col_loc_source = etl.find_in_header(db, 'Sourcing Agency') vals_changed = [] for row in db.iter_rows(db_col_loc_impl + "2:" + etl.find_last_value(db, db_col_loc_source, 'c')): if row[0].value == row[1].value: vals_changed.append(str(row[1].value)) row[1].value = 'INTERNAL' return db, ref, 'Sourcing Agencies set as INTERNAL:\n' + ','.join( vals_changed)
def algo4(db,ref): #if Column C == Column A #Clear column C db_col_loc_a = etl.find_in_header(db, 'Implementing agency') db_col_loc_c = etl.find_in_header(db, 'Local partner agency') vals_changed = [] for row in db.iter_rows(db_col_loc_a + "2:" + etl.find_last_value(db, db_col_loc_c, 'c')): if etl.xstr(row[0].value) == etl.xstr(row[2].value): vals_changed.append(etl.xstr(row[2].value)) row[2].value = '' return db, ref, return_message('Local Partner Agencies matched Implementing and were cleared' ,vals_changed)
def algo5(db, ref): #if Column C contains Column A: remove all spaces, commas, dashes and the matching substring in C db_col_loc_a = etl.find_in_header(db, 'Implementing agency') db_col_loc_c = etl.find_in_header(db, 'Local partner agency') vals_changed = [] for row in db.iter_rows(db_col_loc_a + "2:" + etl.find_last_value(db, db_col_loc_c, 'c')): if str(row[0].value) in str( row[2].value) and str(row[0].value) != str(row[2].value): vals_changed.append(row[2].value) row[2].value = row[2].value.replace(row[0].value, '').replace( ' ', '').replace(',', '').replace('-', '') return db, ref, 'IA contained in LPA for the following:\n' + ','.join( vals_changed)
def algo4(db, ref): #if Column C == Column A #Clear column C db_col_loc_a = etl.find_in_header(db, 'Implementing agency') db_col_loc_c = etl.find_in_header(db, 'Local partner agency') vals_changed = [] for row in db.iter_rows(db_col_loc_a + "2:" + etl.find_last_value(db, db_col_loc_c, 'c')): if str(row[0].value) == str(row[2].value): vals_changed.append(str(row[2].value)) row[2].value = '' return db, ref, 'Local Partner Agencies matched Implementing and were cleared\n' + ','.join( vals_changed)
def priority(self, db): """is it a priority district""" dloc = column_index_from_string(etl.find_in_header(db, 'District')) - 1 rloc = column_index_from_string( etl.find_in_header(self.s_dict['ref'], 'Priority Districts')) - 1 dvals = etl.get_values(db.columns[dloc][1:]) rvals = etl.get_values(self.s_dict['ref'].columns[rloc][1:]) ret = [] for v in dvals: if v in rvals and v != '': ret.append('TRUE') else: ret.append('FALSE') return ret
def algo19(db, ref): #Column T: must be a date>= 25/04/2015 and >=Column S comp_col_loc = etl.find_in_header(db, 'Completion Date\n (Actual or Planned)') start_col_loc = etl.find_in_header(db, 'Start date \n(Actual or Planned)') bad_date = [] for row in db.iter_rows(comp_col_loc + "2:" + etl.find_last_value(db, comp_col_loc, 'c')): cur = str(row[0].value) try: if parse(cur) < parse('4-25-2015') or parse(cur) < parse( str(db[start_col_loc + str(row[0].row)].value)): bad_date.append(cur + ' @ cell ' + row[0].coordinate) except Exception, e: bad_date.append(cur + ' @ cell ' + row[0].coordinate)
def algo8(db, ref): #Column I: must be in reference>Ward or a number or numbers separated by commas #take all into missing names and trim from there db_col_loc = etl.find_in_header(db, 'Municipal Ward') ref_col_loc = etl.find_in_header(ref, 'Wards') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) #trim missing_names to see if they contain any forbidden chars invalid = [] for v in missing_names: for letter in v: #if we've found an illegal letter if letter not in set(string.digits + ' ' + ','): invalid.append('(' + v + ')') break return db, ref, 'Malformed wards: \n' + ','.join(invalid)
def algo18(db, ref): #Column S: must be a date>= 25/04/2015 date_col_loc = etl.find_in_header(db, 'Start date \n(Actual or Planned)') if date_col_loc is None: date_col_loc = etl.find_in_header(db, 'Start date') bad_date = [] for row in db.iter_rows(date_col_loc + "2:" + etl.find_last_value(db, date_col_loc, 'c')): cur = str(row[0].value) try: if parse(cur) < parse('4-25-2015'): bad_date.append(cur + ' @ cell ' + row[0].coordinate) except: bad_date.append(cur + ' @ cell ' + row[0].coordinate) return db, ref, 'Malformatted date\n' + ','.join(bad_date)
def update(self, db): "last updated" dloc = column_index_from_string(etl.find_in_header(db, 'Last Update')) - 1 dvals = etl.get_values(db.columns[dloc][1:]) to_ret = [] for v in dvals: to_ret.append(v) return to_ret
def algo8(db,ref): #Column I: must be in reference>Ward or a number or numbers separated by commas #take all into missing names and trim from there db_col_loc = etl.find_in_header(db, 'Municipal Ward') ref_col_loc = etl.find_in_header(ref,'Wards') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) #trim missing_names to see if they contain any forbidden chars invalid = [] for v in missing_names: for letter in v: #if we've found an illegal letter if letter not in set(string.digits + ' ' + ','): invalid.append('(' + v + ')') break return db, ref, return_message('Malformed wards: ' ,invalid)
def act_cat(self, db): "UNOCHA cat" dloc = column_index_from_string( etl.find_in_header(db, 'Action description')) - 1 sc_rloc = column_index_from_string( etl.find_in_header(self.s_dict['ref'], 'SC Categories')) - 1 un_rloc = column_index_from_string( etl.find_in_header(self.s_dict['ref'], 'UN Categories')) - 1 dvals = etl.get_values(db.columns[dloc][1:]) sc_rloc_vals = etl.get_values(self.s_dict['ref'].columns[sc_rloc][1:]) un_rloc_vals = etl.get_values(self.s_dict['ref'].columns[un_rloc][1:]) ret = [] for v in dvals: if v in sc_rloc_vals and v != '': ret.append(un_rloc_vals[sc_rloc_vals.index(v)]) else: ret.append('') return ret
def dist_hlcit(self, db): "dist code" dloc = column_index_from_string(etl.find_in_header(db, 'District')) - 1 d_rloc = column_index_from_string( etl.find_in_header(self.s_dict['ref'], 'Admin1_District')) - 1 code_rloc = column_index_from_string( etl.find_in_header(self.s_dict['ref'], 'Admin1_P-Code')) - 1 dvals = etl.get_values(db.columns[dloc][1:]) d_rloc_vals = etl.get_values(self.s_dict['ref'].columns[d_rloc][1:]) code_rloc_vals = etl.get_values( self.s_dict['ref'].columns[code_rloc][1:]) ret = [] for v in dvals: if v in d_rloc_vals and v != '': ret.append(code_rloc_vals[d_rloc_vals.index(v)]) else: ret.append('') return ret
def hub(self, db): "what a hub a dist is in" dloc = column_index_from_string(etl.find_in_header(db, 'District')) - 1 pri_rloc = column_index_from_string( etl.find_in_header(self.s_dict['ref'], 'Priority Districts')) - 1 hub_rloc = column_index_from_string( etl.find_in_header(self.s_dict['ref'], 'Shelter Cluster Hubs')) - 1 dvals = etl.get_values(db.columns[dloc][1:]) pri_rloc_vals = etl.get_values( self.s_dict['ref'].columns[pri_rloc][1:]) hub_rloc_vals = etl.get_values( self.s_dict['ref'].columns[hub_rloc][1:]) ret = [] for v in dvals: if v in pri_rloc_vals and v != '': ret.append(hub_rloc_vals[pri_rloc_vals.index(v)]) else: ret.append('') return ret
def algo12(db,ref): #Column M: must be a number>=0 and (more conditions to follow) db_col_loc = etl.find_in_header(db, '# Items / # Man-hours / NPR') bad_vals = [] for row in db.iter_rows(db_col_loc + "2:" + etl.find_last_value(db, db_col_loc, 'c')): if not etl.xstr(row[0].value).isdigit(): if row[0].value == None: bad_vals.append('Blank @ cell' + row[0].coordinate) else: bad_vals.append(etl.xstr(row[0].value)) return db, ref, return_message('Invalid numbers for # Items / # Man-hours / NPR' ,bad_vals)
def algo13(db,ref): #Column N: must be a number>=0 and (more conditions to follow) db_col_loc = etl.find_in_header(db, 'Total Number Households') bad_vals = [] for row in db.iter_rows(db_col_loc + "2:" + etl.find_last_value(db, db_col_loc, 'c')): if not etl.xstr(row[0].value).isdigit(): if row[0].value == None: bad_vals.append('Blank@ ' + row[0].coordinate) else: bad_vals.append(etl.xstr(row[0].value)) return db, ref, return_message('Invalid numbers for Total Number Households' ,bad_vals)
def uid(self, db): "UID" ok_key = False if db.title == 'Distributions': vals = [ "Implementing agency", "Local partner agency", "District", "VDC / Municipalities", "Municipal Ward", "Action type", "Action description", "# Items / # Man-hours / NPR", "Total Number Households" ] ok_key = True elif db.title == 'Trainings': vals = [ "Implementing agency", "Local partner agency", "District", "VDC / Municipalities", "Municipal Ward", "Training Subject", "Audience", "IEC Materials Distributed", "Males" ] ok_key = True to_ret = [] mal_count = 0 print 'Starting UID' if ok_key: #pull vals into array key_vals = [] max_len = 0 for v in vals: val_arr = etl.get_values(db.columns[ column_index_from_string(etl.find_in_header(db, v)) - 1]) key_vals.append(val_arr) if len(val_arr) > max_len: max_len = len(val_arr) for i in xrange(1, max_len): cur_key = '' for k in xrange(len(key_vals)): try: cur_key += key_vals[k][i] except: pass to_ret.append(cur_key) return to_ret else: print 'ERROR: no suitable WB found for UID' return to_ret
def algo7(db,ref): #Column H: must be in Reference>VDC - relative to districy #the district in which a vdc in the db is located must = the district in ref db_dist_loc = etl.find_in_header(db, 'District') db_vdc_loc = etl.find_in_header(db, 'VDC / Municipalities') ref_dist_loc = etl.find_in_header(ref, 'Admin1_District') ref_vdc_loc = etl.find_in_header(ref, 'Admin2_OCHA_VDC-Municipality') #create tuple of vdc, dist for db and ref db_tuple = [] for row in db.iter_rows(db_dist_loc + "2:" + etl.find_last_value(db, db_vdc_loc, 'c')): db_tuple.append((etl.xstr(row[0].value), etl.xstr(row[1].value))) ref_tuple = [] #do it just for dist, then for vdc as they're not neighbors for row in ref.iter_rows('E2:' + etl.find_last_value(ref, 'E', 'c')): ref_tuple.append((etl.xstr(row[0].value),)) it = 0 for row in ref.iter_rows(ref_vdc_loc + "2:" + etl.find_last_value(ref, ref_vdc_loc, 'c')): ref_tuple[it]+=(etl.xstr(row[0].value.encode("utf8")),) it+=1 #iterate through db_tuple and see if tuple is in ref malformed_vdc = [] for tup in db_tuple: if tup not in ref_tuple: malformed_vdc.append(etl.xstr(tup)) return db, ref, return_message('VDCs in incorrect districts:' ,malformed_vdc)
def algo13(db, ref): #Column N: must be a number>=0 and (more conditions to follow) db_col_loc = etl.find_in_header(db, 'Total Number Households') bad_vals = [] for row in db.iter_rows(db_col_loc + "2:" + etl.find_last_value(db, db_col_loc, 'c')): if not etl.xstr(row[0].value).isdigit(): if row[0].value == None: bad_vals.append('Blank@ ' + row[0].coordinate) else: bad_vals.append(etl.xstr(row[0].value)) return db, ref, return_message( 'Invalid numbers for Total Number Households', bad_vals)
def algo12(db, ref): #Column M: must be a number>=0 and (more conditions to follow) db_col_loc = etl.find_in_header(db, '# Items / # Man-hours / NPR') bad_vals = [] for row in db.iter_rows(db_col_loc + "2:" + etl.find_last_value(db, db_col_loc, 'c')): if not str(row[0].value).isdigit(): if row[0].value == None: bad_vals.append('Blank @ cell' + row[0].coordinate) else: bad_vals.append(str(row[0].value)) return db, ref, 'Invalid numbers for # Items / # Man-hours / NPR\n' + ','.join( bad_vals)
def algo14(db, ref): #Column O: must be a number>=0 and (more conditions to follow) db_col_loc = etl.find_in_header(db, 'Average cost per households (NPR)') bad_vals = [] for row in db.iter_rows(db_col_loc + "2:" + etl.find_last_value(db, db_col_loc, 'c')): if not str(row[0].value).isdigit(): if row[0].value == None: bad_vals.append('Blank@ ' + row[0].coordinate) else: bad_vals.append(str(row[0].value)) return db, ref, 'Invalid numbers for Average cost per households (NPR)\n' + ','.join( bad_vals)
def algo18(db,ref): #Column S: must be a date>= 25/04/2015 date_col_loc = column_index_from_string(etl.find_in_header(db, 'DD - Start')) - 1 cur = "" bad_date = [] for row in db.rows[1:]: try: cur = datetime(day = int(etl.xstr(row[date_col_loc].value)), \ month = int(etl.xstr(row[date_col_loc+1].value)), \ year = int(etl.xstr(row[date_col_loc+2].value))) if cur < parse('4-25-2015'): bad_date.append(str(cur) + ' @ row ' + str(row[0].row)) except : bad_date.append(str(cur) + ' @ row ' + str(row[0].row)) return db, ref, return_message('Malformatted date before EQ date', bad_date)
def algo18(db, ref): #Column S: must be a date>= 25/04/2015 date_col_loc = column_index_from_string( etl.find_in_header(db, 'DD - Start')) - 1 cur = "" bad_date = [] for row in db.rows[1:]: try: cur = datetime(day = int(etl.xstr(row[date_col_loc].value)), \ month = int(etl.xstr(row[date_col_loc+1].value)), \ year = int(etl.xstr(row[date_col_loc+2].value))) if cur < parse('4-25-2015'): bad_date.append(str(cur) + ' @ row ' + str(row[0].row)) except: bad_date.append(str(cur) + ' @ row ' + str(row[0].row)) return db, ref, return_message('Malformatted date before EQ date', bad_date)
def get_locs(ws): """find column headers in advance so we don't have to call each time""" ret = {} ret["priority"]=column_index_from_string(etl.find_in_header(ws,"Priority")) ret["access_method"]=column_index_from_string(etl.find_in_header(ws,"Hard to Reach Access Methods")) ret["hub"]=column_index_from_string(etl.find_in_header(ws,"Shelter Cluster Hub")) ret["as_of"]=column_index_from_string(etl.find_in_header(ws,"Last Update")) ret["dist_code"]=column_index_from_string(etl.find_in_header(ws,"District HLCIT Code")) ret["vdc_code"]=column_index_from_string(etl.find_in_header(ws,"VDC / Municipality HLCIT Code")) ret["act_cat"]=column_index_from_string(etl.find_in_header(ws,"UNOCHA Activity Categories")) ret["imp_agency"]=column_index_from_string(etl.find_in_header(ws,"Implementing agency")) ret["source_agency"]=column_index_from_string(etl.find_in_header(ws,"Sourcing Agency")) ret["local_partner"]=column_index_from_string(etl.find_in_header(ws,"Local partner agency")) ret["contact_name"]=column_index_from_string(etl.find_in_header(ws,"Local Contact Name")) ret["contact_email"]=column_index_from_string(etl.find_in_header(ws,"Local Contact Email")) ret["contact_phone"]=column_index_from_string(etl.find_in_header(ws,"Local Contact Phone #")) ret["district"]=column_index_from_string(etl.find_in_header(ws,"District")) ret["vdc"]=column_index_from_string(etl.find_in_header(ws,"VDC / Municipalities")) ret["ward"]=column_index_from_string(etl.find_in_header(ws,"Municipal Ward")) ret["act_type"]=column_index_from_string(etl.find_in_header(ws,"Action type")) ret["act_desc"]=column_index_from_string(etl.find_in_header(ws,"Action description")) ret["targeting"]=column_index_from_string(etl.find_in_header(ws,"Targeting")) ret["quantity"]=column_index_from_string(etl.find_in_header(ws,"# Items / # Man-hours / NPR")) ret["total_hh"]=column_index_from_string(etl.find_in_header(ws,"Total Number Households")) ret["avg_hh_cost"]=column_index_from_string(etl.find_in_header(ws,"Average cost per households (NPR)")) ret["fem_hh"]=column_index_from_string(etl.find_in_header(ws,"Female headed households")) ret["vuln_hh"]=column_index_from_string(etl.find_in_header(ws,"Vulnerable Caste / Ethnicity households")) ret["act_status"]=column_index_from_string(etl.find_in_header(ws,"Activity Status")) ret["start_dt"]=column_index_from_string(etl.find_in_header(ws,"Start date")) ret["comp_dt"]=column_index_from_string(etl.find_in_header(ws,"Completion Date")) ret["comments"]=column_index_from_string(etl.find_in_header(ws,"Additional Comments")) return ret
def test_find_in_header(self): self.assertEqual(etl.find_in_header(ref, 'TESTINGCOL'), 'J')
def algo6(db,ref): #Column G: must be in Reference>District db_col_loc = etl.find_in_header(db, 'District') ref_col_loc = etl.find_in_header(ref,'Admin1_District') missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc) return db, ref, return_message('Districts not in the reference:' ,missing_names)