def reach(self, db):
        """how is the vdc accessible"""
        #TODO: use index() method instead of dict
        dist_loc = column_index_from_string(etl.find_in_header(db,
                                                               'District')) - 1
        vdc_loc = column_index_from_string(
            etl.find_in_header(db, 'VDC / Municipalities')) - 1
        acc_look = column_index_from_string(
            etl.find_in_header(self.s_dict['acc'],
                               'DistrictVDC Concatenation')) - 1
        acc_acc = column_index_from_string(
            etl.find_in_header(self.s_dict['acc'], 'NeKSAP ACCESS')) - 1

        dist_vals = etl.get_values(db.columns[dist_loc][1:])
        vdc_vals = etl.get_values(db.columns[vdc_loc][1:])
        acc_look_vals = etl.get_values(
            self.s_dict['acc'].columns[acc_look][1:])
        acc_acc_vals = etl.get_values(self.s_dict['acc'].columns[acc_acc][1:])

        #make dict for access, concatenate dist and vdc
        acc_dict = dict(zip(acc_look_vals, acc_acc_vals))
        d_v_conc = [dist_vals[i] + vdc_vals[i] for i in xrange(len(dist_vals))]

        ret = []
        for v in d_v_conc:
            if v in acc_dict and v != '':
                ret.append(acc_dict[v])
            else:
                ret.append('')

        return ret
    def priority(self, db):
        """is it a priority district"""
        dloc = column_index_from_string(etl.find_in_header(db, 'District')) - 1
        rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'Priority Districts')) - 1

        dvals = etl.get_values(db.columns[dloc][1:])
        rvals = etl.get_values(self.s_dict['ref'].columns[rloc][1:])
        ret = []

        for v in dvals:
            if v in rvals and v != '':
                ret.append('TRUE')
            else:
                ret.append('FALSE')

        return ret
    def update(self, db):
        "last updated"
        dloc = column_index_from_string(etl.find_in_header(db,
                                                           'Last Update')) - 1
        dvals = etl.get_values(db.columns[dloc][1:])
        to_ret = []

        for v in dvals:
            to_ret.append(v)

        return to_ret
    def dist_hlcit(self, db):
        "dist code"
        dloc = column_index_from_string(etl.find_in_header(db, 'District')) - 1
        d_rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'Admin1_District')) - 1
        code_rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'Admin1_P-Code')) - 1

        dvals = etl.get_values(db.columns[dloc][1:])
        d_rloc_vals = etl.get_values(self.s_dict['ref'].columns[d_rloc][1:])
        code_rloc_vals = etl.get_values(
            self.s_dict['ref'].columns[code_rloc][1:])
        ret = []

        for v in dvals:
            if v in d_rloc_vals and v != '':
                ret.append(code_rloc_vals[d_rloc_vals.index(v)])
            else:
                ret.append('')

        return ret
    def act_cat(self, db):
        "UNOCHA cat"
        dloc = column_index_from_string(
            etl.find_in_header(db, 'Action description')) - 1
        sc_rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'SC Categories')) - 1
        un_rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'UN Categories')) - 1

        dvals = etl.get_values(db.columns[dloc][1:])
        sc_rloc_vals = etl.get_values(self.s_dict['ref'].columns[sc_rloc][1:])
        un_rloc_vals = etl.get_values(self.s_dict['ref'].columns[un_rloc][1:])
        ret = []

        for v in dvals:
            if v in sc_rloc_vals and v != '':
                ret.append(un_rloc_vals[sc_rloc_vals.index(v)])
            else:
                ret.append('')

        return ret
    def hub(self, db):
        "what a hub a dist is in"
        dloc = column_index_from_string(etl.find_in_header(db, 'District')) - 1
        pri_rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'Priority Districts')) - 1
        hub_rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'Shelter Cluster Hubs')) - 1

        dvals = etl.get_values(db.columns[dloc][1:])
        pri_rloc_vals = etl.get_values(
            self.s_dict['ref'].columns[pri_rloc][1:])
        hub_rloc_vals = etl.get_values(
            self.s_dict['ref'].columns[hub_rloc][1:])
        ret = []

        for v in dvals:
            if v in pri_rloc_vals and v != '':
                ret.append(hub_rloc_vals[pri_rloc_vals.index(v)])
            else:
                ret.append('')

        return ret
Beispiel #7
0
    def test_consolidate(self):
        #create historical db
        db = Workbook().active
        db.append(("Implementing agency", "Local partner agency" , "District", 
            "VDC / Municipalities", "Municipal Ward", "Action type", 
            "Action description", "# Items / # Man-hours / NPR",
            "Total Number Households"))
        db.append(('val', 'key', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))
        db.append(('dup1', 'dupkey1', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))
        db.append(('row2val', 'key1', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))
        db.append(('row3val', 'key2', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))
        db.append(('dup2', 'dupkey2', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))

        #create two sheets to add
        wb1 = Workbook()
        wb1.create_sheet(2, 'Distributions')
        ws1 = wb1.get_sheet_by_name('Distributions')
        ws1.append(("Implementing agency", "Local partner agency" , "District",
            "VDC / Municipalities", "Municipal Ward", "Action type", 
            "Action description", "# Items / # Man-hours / NPR",
            "Total Number Households"))
        ws1.append(('val', 'key', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))
        ws1.append(('dup1', 'dupkey1', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))
        ws1.append(('notdupedws1', 'notdupedvalws1', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))

        wb2 = Workbook()
        wb2.create_sheet(1, 'Distributions')
        ws2 = wb2.get_sheet_by_name('Distributions')
        ws2.title = 'Distributions'
        ws2.append(("Implementing agency", "Local partner agency" , "District", 
            "VDC / Municipalities", "Municipal Ward", "Action type", 
            "Action description", "# Items / # Man-hours / NPR",
            "Total Number Households"))
        ws2.append(('val', 'key', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))
        ws2.append(('dup2', 'dupkey2', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))
        ws2.append(('notdupedws2', 'notdupedvalws2', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))

        #weird bug
        wb2.get_sheet_by_name('Distributions1').title = 'Distributions'
        print 'in test: ' + str(wb1.get_sheet_names())
        print 'in test2: ' + str(wb2.get_sheet_names())

        cons = etl.consolidate(db, (wb1, wb2), 'J')
        cons_sheet = cons.get_sheet_by_name('Consolidated')

        self.assertEqual(set(etl.get_values(cons_sheet.columns[9])), 
            (set(['None', 'valkeyNoneNoneNoneNoneNoneNoneNone','row2valkey1NoneNoneNoneNoneNoneNoneNone',
                'row3valkey2NoneNoneNoneNoneNoneNoneNone','dup1dupkey1NoneNoneNoneNoneNoneNoneNone',
                'notdupedws1notdupedvalws1NoneNoneNoneNoneNoneNoneNone','dup2dupkey2NoneNoneNoneNoneNoneNoneNone',
                'notdupedws2notdupedvalws2NoneNoneNoneNoneNoneNoneNone'])))
    def uid(self, db):
        "UID"
        ok_key = False
        if db.title == 'Distributions':
            vals = [
                "Implementing agency", "Local partner agency", "District",
                "VDC / Municipalities", "Municipal Ward", "Action type",
                "Action description", "# Items / # Man-hours / NPR",
                "Total Number Households"
            ]
            ok_key = True

        elif db.title == 'Trainings':
            vals = [
                "Implementing agency", "Local partner agency", "District",
                "VDC / Municipalities", "Municipal Ward", "Training Subject",
                "Audience", "IEC Materials Distributed", "Males"
            ]
            ok_key = True

        to_ret = []
        mal_count = 0

        print 'Starting UID'

        if ok_key:
            #pull vals into array
            key_vals = []
            max_len = 0
            for v in vals:
                val_arr = etl.get_values(db.columns[
                    column_index_from_string(etl.find_in_header(db, v)) - 1])
                key_vals.append(val_arr)
                if len(val_arr) > max_len:
                    max_len = len(val_arr)

            for i in xrange(1, max_len):
                cur_key = ''
                for k in xrange(len(key_vals)):
                    try:
                        cur_key += key_vals[k][i]
                    except:
                        pass
                to_ret.append(cur_key)

            return to_ret

        else:
            print 'ERROR: no suitable WB found for UID'
            return to_ret
def match_vdcs():
    #notes...
    #1651 VDCs without exact match


    w = etl.pull_wb("/Users/ewanog/Downloads/ward_level_data.xlsx", "local")
    #w = etl.pull_wb("/Users/ewanog/Downloads/test.xlsx", "local")
    #from cbs: col 1 (dist), col 2 (vdc), col 3 (ward)
    #from new: H (dist), N (VDC), O (ward)
    cbs = w.get_sheet_by_name("cbs")
    new = w.get_sheet_by_name("new")
    cbs_dist = etl.get_values(cbs.columns[0][1:])
    cbs_vdc = etl.get_values(cbs.columns[1][1:])
    cbs_ward = etl.get_values(cbs.columns[2][1:])

    new_dist = etl.get_values(new.columns[7][1:])
    new_vdc = etl.get_values(new.columns[13][1:])
    new_ward = etl.get_values(new.columns[14][1:])
    ns = set(new_vdc)
    #see vdc matches

    cb_zip = zip(cbs_dist, cbs_vdc)
    new_zip = zip(new_dist, new_vdc)


    print len(set(etl.colvals_notincol(cbs,'B',new,'N')))
    print len(set(cbs_vdc))
    for v in set(etl.colvals_notincol(cbs,'B',new,'N')):
        r = process.extract(v, [r for r in new_zip if r[1] == ])
        print r[0][1]


if __name__ == '__main__':
    read = False
    if read:
        for ws in w.worksheets:
            if ws['A44'].value:
                r = get_vals(ws)
                print r[-1][0]
                #pr(r)
    else:
        match_vdcs()
Beispiel #10
0
 def test_get_values(self):
     db = Workbook().active
     db.append(('val', 'key'))
     self.assertEqual(etl.get_values(db.rows[0]),['val','key'])
            try:
                v.value = etl.convert_date(v.value, "%d/%m/%Y", "%d/%m/%Y")
            except:
                try:
                    v.value = etl.convert_date(v.value, "%d/%m/%y", "%d/%m/%Y")
                except:
                    try:
                        v.value = etl.convert_date(v.value, "%m/%d/%y", "%d/%m/%Y")
                    except:
                        try:
                            v.value = etl.convert_date(v.value, "%m/%d/%Y", "%d/%m/%Y")
                        except:
                            "cant parse: " + v.value

with open("/Users/ewanog/Documents/tmp/dates.csv", "w") as f:
    csv_out = csv.writer(f, delimiter=",")
    csv_out.writerow(etl.get_values(s.rows[0]))
    for r in s.rows[1:]:
        out = []
        for v in r:
            try:
                if v.value is not None:
                    out.append(v.value)
            except Exception, e:
                print e

        csv_out.writerow(out)
        print str(out)

    f.close()