def get_vals(ws): w = etl.pull_wb("/Users/ewanog/Downloads/ward.xlsx", "local") cur_y = 47 found = True vals = [] #dist = 'A' #vdc = 'F' ward = 'A' pop = 'D' hh = 'B' dist = ws['A44'].value.split()[2] # while found: vdc = ws['F'+str(cur_y-3)].value.split()[2] if ws['F'+str(cur_y-3)].value.split()[3][0]!='[': vdc+= ' ' + ws['F'+str(cur_y-3)].value.split()[3] i = 0 while isinstance(ws['A'+str(cur_y)].value,int): vals.append([dist, vdc, ws[ward+str(cur_y)].value,ws[pop+str(cur_y)].value,ws[hh+str(cur_y)].value]) cur_y+=1 cur_y+=7 if ws['A'+str(cur_y)].value != 1 and vdc != 'Bhusafeda': found = False else: vdc = ws['F'+str(cur_y-3)].value.split()[2] return vals
def match_vdcs(): #notes... #1651 VDCs without exact match w = etl.pull_wb("/Users/ewanog/Downloads/ward_level_data.xlsx", "local") #w = etl.pull_wb("/Users/ewanog/Downloads/test.xlsx", "local") #from cbs: col 1 (dist), col 2 (vdc), col 3 (ward) #from new: H (dist), N (VDC), O (ward) cbs = w.get_sheet_by_name("cbs") new = w.get_sheet_by_name("new") cbs_dist = etl.get_values(cbs.columns[0][1:]) cbs_vdc = etl.get_values(cbs.columns[1][1:]) cbs_ward = etl.get_values(cbs.columns[2][1:]) new_dist = etl.get_values(new.columns[7][1:]) new_vdc = etl.get_values(new.columns[13][1:]) new_ward = etl.get_values(new.columns[14][1:]) ns = set(new_vdc) #see vdc matches cb_zip = zip(cbs_dist, cbs_vdc) new_zip = zip(new_dist, new_vdc) print len(set(etl.colvals_notincol(cbs,'B',new,'N'))) print len(set(cbs_vdc)) for v in set(etl.colvals_notincol(cbs,'B',new,'N')): r = process.extract(v, [r for r in new_zip if r[1] == ]) print r[0][1] if __name__ == '__main__': read = False if read: for ws in w.worksheets: if ws['A44'].value: r = get_vals(ws) print r[-1][0] #pr(r) else: match_vdcs()
def insert_data(path): """iterate over each row and add to db""" path = "/Users/ewanog/tmp/simp.xlsx" ws = etl.pull_wb(path, 'local').get_sheet_by_name('Distributions') locs = get_locs(ws) conn = engine.connect() ins = Base.metadata.tables['distributions'].insert() c=0 for r in ws.rows[1:]: conn.execute( ins, priority=r[locs["priority"]-1].value, access_method=r[locs["access_method"]-1].value, hub=r[locs["hub"]-1].value, as_of=r[locs["as_of"]-1].value, dist_code=r[locs["dist_code"]-1].value, vdc_code=r[locs["vdc_code"]-1].value, act_cat=r[locs["act_cat"]-1].value, imp_agency=r[locs["imp_agency"]-1].value, source_agency=r[locs["source_agency"]-1].value, local_partner=r[locs["local_partner"]-1].value, contact_name=r[locs["contact_name"]-1].value, contact_email=r[locs["contact_email"]-1].value, contact_phone=r[locs["contact_phone"]-1].value, district=r[locs["district"]-1].value, vdc=r[locs["vdc"]-1].value, ward=r[locs["ward"]-1].value, act_type=r[locs["act_type"]-1].value, act_desc=r[locs["act_desc"]-1].value, targeting=r[locs["targeting"]-1].value, quantity=r[locs["quantity"]-1].value, total_hh=r[locs["total_hh"]-1].value, avg_hh_cost=r[locs["avg_hh_cost"]-1].value, fem_hh=r[locs["fem_hh"]-1].value, vuln_hh=r[locs["vuln_hh"]-1].value, act_status=r[locs["act_status"]-1].value, start_dt=r[locs["start_dt"]-1].value, comp_dt=r[locs["comp_dt"]-1].value, comments=r[locs["comments"]-1].value, pk=gen_pk(r, locs))
def insert_data(path): """iterate over each row and add to db""" path = "/Users/ewanog/tmp/simp.xlsx" ws = etl.pull_wb(path, 'local').get_sheet_by_name('Distributions') locs = get_locs(ws) conn = engine.connect() ins = Base.metadata.tables['distributions'].insert() c = 0 for r in ws.rows[1:]: conn.execute(ins, priority=r[locs["priority"] - 1].value, access_method=r[locs["access_method"] - 1].value, hub=r[locs["hub"] - 1].value, as_of=r[locs["as_of"] - 1].value, dist_code=r[locs["dist_code"] - 1].value, vdc_code=r[locs["vdc_code"] - 1].value, act_cat=r[locs["act_cat"] - 1].value, imp_agency=r[locs["imp_agency"] - 1].value, source_agency=r[locs["source_agency"] - 1].value, local_partner=r[locs["local_partner"] - 1].value, contact_name=r[locs["contact_name"] - 1].value, contact_email=r[locs["contact_email"] - 1].value, contact_phone=r[locs["contact_phone"] - 1].value, district=r[locs["district"] - 1].value, vdc=r[locs["vdc"] - 1].value, ward=r[locs["ward"] - 1].value, act_type=r[locs["act_type"] - 1].value, act_desc=r[locs["act_desc"] - 1].value, targeting=r[locs["targeting"] - 1].value, quantity=r[locs["quantity"] - 1].value, total_hh=r[locs["total_hh"] - 1].value, avg_hh_cost=r[locs["avg_hh_cost"] - 1].value, fem_hh=r[locs["fem_hh"] - 1].value, vuln_hh=r[locs["vuln_hh"] - 1].value, act_status=r[locs["act_status"] - 1].value, start_dt=r[locs["start_dt"] - 1].value, comp_dt=r[locs["comp_dt"] - 1].value, comments=r[locs["comments"] - 1].value, pk=gen_pk(r, locs))
import os import shutil import etl from openpyxl import Workbook files = ['/Users/ewanog/Downloads/tm/'+f for f in \ os.listdir('/Users/ewanog/Downloads/tm/') if '.xlsx' in f and '~' not in f and 'merge.xlsx' not in f] print files d = [] t = [] it = 0 for f in files: w = etl.pull_wb(f, 'local',True) d.append(etl.get_all_values_from_ws(w.get_sheet_by_name('Distributions'), 'r')) t.append(etl.get_all_values_from_ws(w.get_sheet_by_name('Training'), 'r')) w = Workbook() w.create_sheet(1,'Distributions') w.create_sheet(2,'Training') sum = 0 print 'Dist' s = w.get_sheet_by_name('Distributions') s.append(d[0][0]) for entry in d: print len(entry) - 1 sum += len(entry) - 1
import openpyxl import etl from datetime import datetime import csv s = etl.pull_wb("/Users/ewanog/Documents/tmp/date_test.xlsx", "local", True).active for r in s.rows[1:]: for v in r: rv = v.value if rv is not None: first = int(rv.split("/")[0]) second = int(rv.split("/")[1]) third = int(rv.split("/")[2]) if third in [15, 16, 17]: third = "20" + str(third) if second < 4 and first < 25: t = first first = second second = t v.value = str(first) + "/" + str(second) + "/" + str(third) try: v.value = etl.convert_date(v.value, "%d/%m/%Y", "%d/%m/%Y") except: try: v.value = etl.convert_date(v.value, "%d/%m/%y", "%d/%m/%Y") except: