コード例 #1
0
    def reach(self, db):
        """how is the vdc accessible"""
        #TODO: use index() method instead of dict
        dist_loc = column_index_from_string(etl.find_in_header(db,
                                                               'District')) - 1
        vdc_loc = column_index_from_string(
            etl.find_in_header(db, 'VDC / Municipalities')) - 1
        acc_look = column_index_from_string(
            etl.find_in_header(self.s_dict['acc'],
                               'DistrictVDC Concatenation')) - 1
        acc_acc = column_index_from_string(
            etl.find_in_header(self.s_dict['acc'], 'NeKSAP ACCESS')) - 1

        dist_vals = etl.get_values(db.columns[dist_loc][1:])
        vdc_vals = etl.get_values(db.columns[vdc_loc][1:])
        acc_look_vals = etl.get_values(
            self.s_dict['acc'].columns[acc_look][1:])
        acc_acc_vals = etl.get_values(self.s_dict['acc'].columns[acc_acc][1:])

        #make dict for access, concatenate dist and vdc
        acc_dict = dict(zip(acc_look_vals, acc_acc_vals))
        d_v_conc = [dist_vals[i] + vdc_vals[i] for i in xrange(len(dist_vals))]

        ret = []
        for v in d_v_conc:
            if v in acc_dict and v != '':
                ret.append(acc_dict[v])
            else:
                ret.append('')

        return ret
コード例 #2
0
    def priority(self, db):
        """is it a priority district"""
        dloc = column_index_from_string(etl.find_in_header(db, 'District')) - 1
        rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'Priority Districts')) - 1

        dvals = etl.get_values(db.columns[dloc][1:])
        rvals = etl.get_values(self.s_dict['ref'].columns[rloc][1:])
        ret = []

        for v in dvals:
            if v in rvals and v != '':
                ret.append('TRUE')
            else:
                ret.append('FALSE')

        return ret
コード例 #3
0
    def update(self, db):
        "last updated"
        dloc = column_index_from_string(etl.find_in_header(db,
                                                           'Last Update')) - 1
        dvals = etl.get_values(db.columns[dloc][1:])
        to_ret = []

        for v in dvals:
            to_ret.append(v)

        return to_ret
コード例 #4
0
    def dist_hlcit(self, db):
        "dist code"
        dloc = column_index_from_string(etl.find_in_header(db, 'District')) - 1
        d_rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'Admin1_District')) - 1
        code_rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'Admin1_P-Code')) - 1

        dvals = etl.get_values(db.columns[dloc][1:])
        d_rloc_vals = etl.get_values(self.s_dict['ref'].columns[d_rloc][1:])
        code_rloc_vals = etl.get_values(
            self.s_dict['ref'].columns[code_rloc][1:])
        ret = []

        for v in dvals:
            if v in d_rloc_vals and v != '':
                ret.append(code_rloc_vals[d_rloc_vals.index(v)])
            else:
                ret.append('')

        return ret
コード例 #5
0
    def act_cat(self, db):
        "UNOCHA cat"
        dloc = column_index_from_string(
            etl.find_in_header(db, 'Action description')) - 1
        sc_rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'SC Categories')) - 1
        un_rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'UN Categories')) - 1

        dvals = etl.get_values(db.columns[dloc][1:])
        sc_rloc_vals = etl.get_values(self.s_dict['ref'].columns[sc_rloc][1:])
        un_rloc_vals = etl.get_values(self.s_dict['ref'].columns[un_rloc][1:])
        ret = []

        for v in dvals:
            if v in sc_rloc_vals and v != '':
                ret.append(un_rloc_vals[sc_rloc_vals.index(v)])
            else:
                ret.append('')

        return ret
コード例 #6
0
    def hub(self, db):
        "what a hub a dist is in"
        dloc = column_index_from_string(etl.find_in_header(db, 'District')) - 1
        pri_rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'Priority Districts')) - 1
        hub_rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'Shelter Cluster Hubs')) - 1

        dvals = etl.get_values(db.columns[dloc][1:])
        pri_rloc_vals = etl.get_values(
            self.s_dict['ref'].columns[pri_rloc][1:])
        hub_rloc_vals = etl.get_values(
            self.s_dict['ref'].columns[hub_rloc][1:])
        ret = []

        for v in dvals:
            if v in pri_rloc_vals and v != '':
                ret.append(hub_rloc_vals[pri_rloc_vals.index(v)])
            else:
                ret.append('')

        return ret
コード例 #7
0
    def test_consolidate(self):
        #create historical db
        db = Workbook().active
        db.append(("Implementing agency", "Local partner agency" , "District", 
            "VDC / Municipalities", "Municipal Ward", "Action type", 
            "Action description", "# Items / # Man-hours / NPR",
            "Total Number Households"))
        db.append(('val', 'key', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))
        db.append(('dup1', 'dupkey1', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))
        db.append(('row2val', 'key1', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))
        db.append(('row3val', 'key2', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))
        db.append(('dup2', 'dupkey2', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))

        #create two sheets to add
        wb1 = Workbook()
        wb1.create_sheet(2, 'Distributions')
        ws1 = wb1.get_sheet_by_name('Distributions')
        ws1.append(("Implementing agency", "Local partner agency" , "District",
            "VDC / Municipalities", "Municipal Ward", "Action type", 
            "Action description", "# Items / # Man-hours / NPR",
            "Total Number Households"))
        ws1.append(('val', 'key', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))
        ws1.append(('dup1', 'dupkey1', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))
        ws1.append(('notdupedws1', 'notdupedvalws1', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))

        wb2 = Workbook()
        wb2.create_sheet(1, 'Distributions')
        ws2 = wb2.get_sheet_by_name('Distributions')
        ws2.title = 'Distributions'
        ws2.append(("Implementing agency", "Local partner agency" , "District", 
            "VDC / Municipalities", "Municipal Ward", "Action type", 
            "Action description", "# Items / # Man-hours / NPR",
            "Total Number Households"))
        ws2.append(('val', 'key', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))
        ws2.append(('dup2', 'dupkey2', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))
        ws2.append(('notdupedws2', 'notdupedvalws2', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last'))

        #weird bug
        wb2.get_sheet_by_name('Distributions1').title = 'Distributions'
        print 'in test: ' + str(wb1.get_sheet_names())
        print 'in test2: ' + str(wb2.get_sheet_names())

        cons = etl.consolidate(db, (wb1, wb2), 'J')
        cons_sheet = cons.get_sheet_by_name('Consolidated')

        self.assertEqual(set(etl.get_values(cons_sheet.columns[9])), 
            (set(['None', 'valkeyNoneNoneNoneNoneNoneNoneNone','row2valkey1NoneNoneNoneNoneNoneNoneNone',
                'row3valkey2NoneNoneNoneNoneNoneNoneNone','dup1dupkey1NoneNoneNoneNoneNoneNoneNone',
                'notdupedws1notdupedvalws1NoneNoneNoneNoneNoneNoneNone','dup2dupkey2NoneNoneNoneNoneNoneNoneNone',
                'notdupedws2notdupedvalws2NoneNoneNoneNoneNoneNoneNone'])))
コード例 #8
0
    def uid(self, db):
        "UID"
        ok_key = False
        if db.title == 'Distributions':
            vals = [
                "Implementing agency", "Local partner agency", "District",
                "VDC / Municipalities", "Municipal Ward", "Action type",
                "Action description", "# Items / # Man-hours / NPR",
                "Total Number Households"
            ]
            ok_key = True

        elif db.title == 'Trainings':
            vals = [
                "Implementing agency", "Local partner agency", "District",
                "VDC / Municipalities", "Municipal Ward", "Training Subject",
                "Audience", "IEC Materials Distributed", "Males"
            ]
            ok_key = True

        to_ret = []
        mal_count = 0

        print 'Starting UID'

        if ok_key:
            #pull vals into array
            key_vals = []
            max_len = 0
            for v in vals:
                val_arr = etl.get_values(db.columns[
                    column_index_from_string(etl.find_in_header(db, v)) - 1])
                key_vals.append(val_arr)
                if len(val_arr) > max_len:
                    max_len = len(val_arr)

            for i in xrange(1, max_len):
                cur_key = ''
                for k in xrange(len(key_vals)):
                    try:
                        cur_key += key_vals[k][i]
                    except:
                        pass
                to_ret.append(cur_key)

            return to_ret

        else:
            print 'ERROR: no suitable WB found for UID'
            return to_ret
コード例 #9
0
def match_vdcs():
    #notes...
    #1651 VDCs without exact match


    w = etl.pull_wb("/Users/ewanog/Downloads/ward_level_data.xlsx", "local")
    #w = etl.pull_wb("/Users/ewanog/Downloads/test.xlsx", "local")
    #from cbs: col 1 (dist), col 2 (vdc), col 3 (ward)
    #from new: H (dist), N (VDC), O (ward)
    cbs = w.get_sheet_by_name("cbs")
    new = w.get_sheet_by_name("new")
    cbs_dist = etl.get_values(cbs.columns[0][1:])
    cbs_vdc = etl.get_values(cbs.columns[1][1:])
    cbs_ward = etl.get_values(cbs.columns[2][1:])

    new_dist = etl.get_values(new.columns[7][1:])
    new_vdc = etl.get_values(new.columns[13][1:])
    new_ward = etl.get_values(new.columns[14][1:])
    ns = set(new_vdc)
    #see vdc matches

    cb_zip = zip(cbs_dist, cbs_vdc)
    new_zip = zip(new_dist, new_vdc)


    print len(set(etl.colvals_notincol(cbs,'B',new,'N')))
    print len(set(cbs_vdc))
    for v in set(etl.colvals_notincol(cbs,'B',new,'N')):
        r = process.extract(v, [r for r in new_zip if r[1] == ])
        print r[0][1]


if __name__ == '__main__':
    read = False
    if read:
        for ws in w.worksheets:
            if ws['A44'].value:
                r = get_vals(ws)
                print r[-1][0]
                #pr(r)
    else:
        match_vdcs()
コード例 #10
0
 def test_get_values(self):
     db = Workbook().active
     db.append(('val', 'key'))
     self.assertEqual(etl.get_values(db.rows[0]),['val','key'])
コード例 #11
0
            try:
                v.value = etl.convert_date(v.value, "%d/%m/%Y", "%d/%m/%Y")
            except:
                try:
                    v.value = etl.convert_date(v.value, "%d/%m/%y", "%d/%m/%Y")
                except:
                    try:
                        v.value = etl.convert_date(v.value, "%m/%d/%y", "%d/%m/%Y")
                    except:
                        try:
                            v.value = etl.convert_date(v.value, "%m/%d/%Y", "%d/%m/%Y")
                        except:
                            "cant parse: " + v.value

with open("/Users/ewanog/Documents/tmp/dates.csv", "w") as f:
    csv_out = csv.writer(f, delimiter=",")
    csv_out.writerow(etl.get_values(s.rows[0]))
    for r in s.rows[1:]:
        out = []
        for v in r:
            try:
                if v.value is not None:
                    out.append(v.value)
            except Exception, e:
                print e

        csv_out.writerow(out)
        print str(out)

    f.close()