def get_vals(ws):
    w = etl.pull_wb("/Users/ewanog/Downloads/ward.xlsx", "local")
    cur_y = 47
    found = True
    vals = []
    #dist = 'A'
    #vdc = 'F'
    ward = 'A'
    pop = 'D'
    hh = 'B'
    dist = ws['A44'].value.split()[2]
    #
    while found:
        vdc = ws['F'+str(cur_y-3)].value.split()[2]
        if ws['F'+str(cur_y-3)].value.split()[3][0]!='[':
            vdc+= ' ' + ws['F'+str(cur_y-3)].value.split()[3]

        i = 0
        while isinstance(ws['A'+str(cur_y)].value,int):
            vals.append([dist, vdc, ws[ward+str(cur_y)].value,ws[pop+str(cur_y)].value,ws[hh+str(cur_y)].value])
            cur_y+=1

        cur_y+=7
        if ws['A'+str(cur_y)].value != 1 and vdc != 'Bhusafeda':
            found = False
        else:
            vdc = ws['F'+str(cur_y-3)].value.split()[2]

    return vals
def match_vdcs():
    #notes...
    #1651 VDCs without exact match


    w = etl.pull_wb("/Users/ewanog/Downloads/ward_level_data.xlsx", "local")
    #w = etl.pull_wb("/Users/ewanog/Downloads/test.xlsx", "local")
    #from cbs: col 1 (dist), col 2 (vdc), col 3 (ward)
    #from new: H (dist), N (VDC), O (ward)
    cbs = w.get_sheet_by_name("cbs")
    new = w.get_sheet_by_name("new")
    cbs_dist = etl.get_values(cbs.columns[0][1:])
    cbs_vdc = etl.get_values(cbs.columns[1][1:])
    cbs_ward = etl.get_values(cbs.columns[2][1:])

    new_dist = etl.get_values(new.columns[7][1:])
    new_vdc = etl.get_values(new.columns[13][1:])
    new_ward = etl.get_values(new.columns[14][1:])
    ns = set(new_vdc)
    #see vdc matches

    cb_zip = zip(cbs_dist, cbs_vdc)
    new_zip = zip(new_dist, new_vdc)


    print len(set(etl.colvals_notincol(cbs,'B',new,'N')))
    print len(set(cbs_vdc))
    for v in set(etl.colvals_notincol(cbs,'B',new,'N')):
        r = process.extract(v, [r for r in new_zip if r[1] == ])
        print r[0][1]


if __name__ == '__main__':
    read = False
    if read:
        for ws in w.worksheets:
            if ws['A44'].value:
                r = get_vals(ws)
                print r[-1][0]
                #pr(r)
    else:
        match_vdcs()
def insert_data(path):
    """iterate over each row and add to db"""
    path = "/Users/ewanog/tmp/simp.xlsx"
    ws = etl.pull_wb(path, 'local').get_sheet_by_name('Distributions')
    locs = get_locs(ws)
    conn = engine.connect()
    ins = Base.metadata.tables['distributions'].insert()

    c=0
    for r in ws.rows[1:]:
        conn.execute(
        ins,
        priority=r[locs["priority"]-1].value,
        access_method=r[locs["access_method"]-1].value,
        hub=r[locs["hub"]-1].value,
        as_of=r[locs["as_of"]-1].value,
        dist_code=r[locs["dist_code"]-1].value,
        vdc_code=r[locs["vdc_code"]-1].value,
        act_cat=r[locs["act_cat"]-1].value,
        imp_agency=r[locs["imp_agency"]-1].value,
        source_agency=r[locs["source_agency"]-1].value,
        local_partner=r[locs["local_partner"]-1].value,
        contact_name=r[locs["contact_name"]-1].value,
        contact_email=r[locs["contact_email"]-1].value,
        contact_phone=r[locs["contact_phone"]-1].value,
        district=r[locs["district"]-1].value,
        vdc=r[locs["vdc"]-1].value,
        ward=r[locs["ward"]-1].value,
        act_type=r[locs["act_type"]-1].value,
        act_desc=r[locs["act_desc"]-1].value,
        targeting=r[locs["targeting"]-1].value,
        quantity=r[locs["quantity"]-1].value,
        total_hh=r[locs["total_hh"]-1].value,
        avg_hh_cost=r[locs["avg_hh_cost"]-1].value,
        fem_hh=r[locs["fem_hh"]-1].value,
        vuln_hh=r[locs["vuln_hh"]-1].value,
        act_status=r[locs["act_status"]-1].value,
        start_dt=r[locs["start_dt"]-1].value,
        comp_dt=r[locs["comp_dt"]-1].value,
        comments=r[locs["comments"]-1].value,
        pk=gen_pk(r, locs))
예제 #4
0
def insert_data(path):
    """iterate over each row and add to db"""
    path = "/Users/ewanog/tmp/simp.xlsx"
    ws = etl.pull_wb(path, 'local').get_sheet_by_name('Distributions')
    locs = get_locs(ws)
    conn = engine.connect()
    ins = Base.metadata.tables['distributions'].insert()

    c = 0
    for r in ws.rows[1:]:
        conn.execute(ins,
                     priority=r[locs["priority"] - 1].value,
                     access_method=r[locs["access_method"] - 1].value,
                     hub=r[locs["hub"] - 1].value,
                     as_of=r[locs["as_of"] - 1].value,
                     dist_code=r[locs["dist_code"] - 1].value,
                     vdc_code=r[locs["vdc_code"] - 1].value,
                     act_cat=r[locs["act_cat"] - 1].value,
                     imp_agency=r[locs["imp_agency"] - 1].value,
                     source_agency=r[locs["source_agency"] - 1].value,
                     local_partner=r[locs["local_partner"] - 1].value,
                     contact_name=r[locs["contact_name"] - 1].value,
                     contact_email=r[locs["contact_email"] - 1].value,
                     contact_phone=r[locs["contact_phone"] - 1].value,
                     district=r[locs["district"] - 1].value,
                     vdc=r[locs["vdc"] - 1].value,
                     ward=r[locs["ward"] - 1].value,
                     act_type=r[locs["act_type"] - 1].value,
                     act_desc=r[locs["act_desc"] - 1].value,
                     targeting=r[locs["targeting"] - 1].value,
                     quantity=r[locs["quantity"] - 1].value,
                     total_hh=r[locs["total_hh"] - 1].value,
                     avg_hh_cost=r[locs["avg_hh_cost"] - 1].value,
                     fem_hh=r[locs["fem_hh"] - 1].value,
                     vuln_hh=r[locs["vuln_hh"] - 1].value,
                     act_status=r[locs["act_status"] - 1].value,
                     start_dt=r[locs["start_dt"] - 1].value,
                     comp_dt=r[locs["comp_dt"] - 1].value,
                     comments=r[locs["comments"] - 1].value,
                     pk=gen_pk(r, locs))
import os
import shutil
import etl
from openpyxl import Workbook

files = ['/Users/ewanog/Downloads/tm/'+f for f in \
         os.listdir('/Users/ewanog/Downloads/tm/') if '.xlsx' in f and '~' not in f and 'merge.xlsx' not in f]

print files

d = []
t = []

it = 0
for f in files:
    w = etl.pull_wb(f, 'local',True)
    d.append(etl.get_all_values_from_ws(w.get_sheet_by_name('Distributions'), 'r'))
    t.append(etl.get_all_values_from_ws(w.get_sheet_by_name('Training'), 'r'))

w = Workbook()
w.create_sheet(1,'Distributions')
w.create_sheet(2,'Training')


sum = 0
print 'Dist'
s = w.get_sheet_by_name('Distributions')
s.append(d[0][0])
for entry in d:
    print len(entry) - 1
    sum += len(entry) - 1
예제 #6
0
import openpyxl
import etl
from datetime import datetime
import csv

s = etl.pull_wb("/Users/ewanog/Documents/tmp/date_test.xlsx", "local", True).active

for r in s.rows[1:]:
    for v in r:
        rv = v.value
        if rv is not None:
            first = int(rv.split("/")[0])
            second = int(rv.split("/")[1])
            third = int(rv.split("/")[2])

            if third in [15, 16, 17]:
                third = "20" + str(third)

            if second < 4 and first < 25:
                t = first
                first = second
                second = t

            v.value = str(first) + "/" + str(second) + "/" + str(third)

            try:
                v.value = etl.convert_date(v.value, "%d/%m/%Y", "%d/%m/%Y")
            except:
                try:
                    v.value = etl.convert_date(v.value, "%d/%m/%y", "%d/%m/%Y")
                except: