Esempi in Python per xstr, esempi in Python per etl.xstr

Esempio n. 1

0

Mostra file

File: clean.py Progetto: shelter-cluster-nepal-im/etl-extravaganza

def algo20(db,ref):
    #Output if completion date < current date and activity status not like completed
    status_col_loc = column_index_from_string(etl.find_in_header(db, 'Activity Status')) - 1
    comp_col_loc = column_index_from_string(etl.find_in_header(db, 'DD - End')) - 1

    bad_date = []
    valid = True
    comp = ""

    for row in db.rows[1:]:
        try:
            comp = datetime(day = int(etl.xstr(row[comp_col_loc].value)), \
                month = int(etl.xstr(row[comp_col_loc+1].value)), \
                year = int(etl.xstr(row[comp_col_loc+2].value)))
        except:
            valid = False

        if valid:
            try:
                if comp < datetime.now() and 'completed' not in row[status_col_loc].value.lower():
                    bad_date.append('%s row %i' % (str(comp),row[0].row))
            except:
                    bad_date.append('%s row %i' % (str(comp),row[0].row))

    return db, ref, return_message('Bad date or Completion Date has passed for ' ,bad_date)

Esempio n. 2

0

Mostra file

def algo10(db, ref):
    #column J: must be in reference>ActionDescription relative to Action_Type
    #There are 4 action types each with their own self labeled column
    db_type_loc = etl.find_in_header(db, 'Action type')
    db_desc_loc = etl.find_in_header(db, 'Action description')
    ref_col_loc = etl.find_in_header(ref, 'Action_Type')

    #get ref action type tuples
    act_types_tups = []

    #go through action types and go through respected cols and make tuples
    for row in ref.iter_rows(ref_col_loc + "2:" +
                             etl.find_last_value(ref, ref_col_loc, 'c')):
        act = etl.xstr(row[0].value)
        act_col = etl.find_in_header(ref, act)
        for row in ref.iter_rows(act_col + "2:" +
                                 etl.find_last_value(ref, act_col, 'c')):
            act_types_tups.append((act, etl.xstr(row[0].value)))

    #get tuples of dist actions
    type_desc_tup = []
    for row in db.iter_rows(db_type_loc + "2:" +
                            etl.find_last_value(db, db_desc_loc, 'c')):
        type_desc_tup.append((etl.xstr(row[0].value), etl.xstr(row[1].value)))

    malformed_acts = []
    #go through dist actions and see if they're in ref tuples
    for tup in type_desc_tup:
        if tup not in act_types_tups:
            malformed_acts.append(etl.xstr(tup))

    return db, ref, return_message('Action descriptions not in action types:',
                                   malformed_acts)

Esempio n. 3

0

Mostra file

def algo7(db, ref):
    #Column H: must be in Reference>VDC - relative to districy
    #the district in which a vdc in the db is located must = the district in ref
    db_dist_loc = etl.find_in_header(db, 'District')
    db_vdc_loc = etl.find_in_header(db, 'VDC / Municipalities')
    ref_dist_loc = etl.find_in_header(ref, 'Admin1_District')
    ref_vdc_loc = etl.find_in_header(ref, 'Admin2_OCHA_VDC-Municipality')

    #create tuple of vdc, dist for db and ref
    db_tuple = []
    for row in db.iter_rows(db_dist_loc + "2:" +
                            etl.find_last_value(db, db_vdc_loc, 'c')):
        db_tuple.append((etl.xstr(row[0].value), etl.xstr(row[1].value)))

    ref_tuple = []
    #do it just for dist, then for vdc as they're not neighbors
    for row in ref.iter_rows('E2:' + etl.find_last_value(ref, 'E', 'c')):
        ref_tuple.append((etl.xstr(row[0].value), ))

    it = 0
    for row in ref.iter_rows(ref_vdc_loc + "2:" +
                             etl.find_last_value(ref, ref_vdc_loc, 'c')):
        ref_tuple[it] += (etl.xstr(row[0].value.encode("utf8")), )
        it += 1

    #iterate through db_tuple and see if tuple is in ref
    malformed_vdc = []
    for tup in db_tuple:
        if tup not in ref_tuple:
            malformed_vdc.append(etl.xstr(tup))

    return db, ref, return_message('VDCs in incorrect districts:',
                                   malformed_vdc)

Esempio n. 4

0

Mostra file

File: clean.py Progetto: shelter-cluster-nepal-im/etl-extravaganza

def algo16(db,ref):
    #Column Q: must be a number>=0 and <= Column N (more conditions to follow)
    db_cnt_loc = etl.find_in_header(db, 'Total Number Households')
    db_vul_loc = etl.find_in_header(db, 'Vulnerable Caste / Ethnicity households ')


    #get count values
    cnt_vals = []
    for row in db.iter_rows(db_cnt_loc + "2:" +
        etl.find_last_value(db, db_cnt_loc, 'c')):
            cnt_vals.append(etl.xstr(row[0].value))

    #get female values
    vul_vals = []
    for row in db.iter_rows(db_vul_loc + "2:" +
        etl.find_last_value(db, db_vul_loc, 'c')):
            vul_vals.append(etl.xstr(row[0].value))

    malformatted = []
    #zip and check values
    for vals in zip(cnt_vals, vul_vals):
        if not (vals[0].isdigit() and vals[1].isdigit()):
            malformatted.append(etl.xstr(vals))
        elif int(vals[1]) > int(vals[0]):
            malformatted.append(etl.xstr(vals))

    return db, ref, return_message('Total #HH and Vulnerable #HH conflict or issue ' ,malformatted)

Esempio n. 5

0

Mostra file

def algo16(db, ref):
    #Column Q: must be a number>=0 and <= Column N (more conditions to follow)
    db_cnt_loc = etl.find_in_header(db, 'Total Number Households')
    db_vul_loc = etl.find_in_header(
        db, 'Vulnerable Caste / Ethnicity households ')

    #get count values
    cnt_vals = []
    for row in db.iter_rows(db_cnt_loc + "2:" +
                            etl.find_last_value(db, db_cnt_loc, 'c')):
        cnt_vals.append(etl.xstr(row[0].value))

    #get female values
    vul_vals = []
    for row in db.iter_rows(db_vul_loc + "2:" +
                            etl.find_last_value(db, db_vul_loc, 'c')):
        vul_vals.append(etl.xstr(row[0].value))

    malformatted = []
    #zip and check values
    for vals in zip(cnt_vals, vul_vals):
        if not (vals[0].isdigit() and vals[1].isdigit()):
            malformatted.append(etl.xstr(vals))
        elif int(vals[1]) > int(vals[0]):
            malformatted.append(etl.xstr(vals))

    return db, ref, return_message(
        'Total #HH and Vulnerable #HH conflict or issue ', malformatted)

Esempio n. 6

0

Mostra file

File: clean.py Progetto: shelter-cluster-nepal-im/etl-extravaganza

def algo10(db,ref):
    #column J: must be in reference>ActionDescription relative to Action_Type
    #There are 4 action types each with their own self labeled column
    db_type_loc = etl.find_in_header(db, 'Action type')
    db_desc_loc = etl.find_in_header(db, 'Action description')
    ref_col_loc = etl.find_in_header(ref,'Action_Type')

    #get ref action type tuples
    act_types_tups = []

    #go through action types and go through respected cols and make tuples
    for row in ref.iter_rows(ref_col_loc + "2:" +
        etl.find_last_value(ref, ref_col_loc, 'c')):
            act = etl.xstr(row[0].value)
            act_col = etl.find_in_header(ref, act)
            for row in ref.iter_rows(act_col + "2:" +
                etl.find_last_value(ref, act_col, 'c')):
                    act_types_tups.append((act, etl.xstr(row[0].value)))

    #get tuples of dist actions
    type_desc_tup = []
    for row in db.iter_rows(db_type_loc + "2:" +
        etl.find_last_value(db, db_desc_loc, 'c')):
            type_desc_tup.append((etl.xstr(row[0].value),etl.xstr(row[1].value)))

    malformed_acts = []
    #go through dist actions and see if they're in ref tuples
    for tup in type_desc_tup:
        if tup not in act_types_tups:
            malformed_acts.append(etl.xstr(tup))

    return db, ref, return_message('Action descriptions not in action types:' ,malformed_acts)

Esempio n. 7

0

Mostra file

File: clean.py Progetto: shelter-cluster-nepal-im/etl-extravaganza

def algo5(db,ref):
    #if Column C contains Column A: remove all spaces, commas, dashes and the matching substring in C
    db_col_loc_a = etl.find_in_header(db, 'Implementing agency')
    db_col_loc_c = etl.find_in_header(db, 'Local partner agency')
    vals_changed = []

    for row in db.iter_rows(db_col_loc_a + "2:" +
        etl.find_last_value(db, db_col_loc_c, 'c')):
            if etl.xstr(row[0].value) in etl.xstr(row[2].value) and etl.xstr(row[0].value) != etl.xstr(row[2].value):
                vals_changed.append(row[2].value)
                row[2].value = row[2].value.replace(row[0].value,'').replace(' ','').replace(',','').replace('-','')

    return db, ref, return_message('IA contained in LPA for the following:' ,vals_changed)

Esempio n. 8

0

Mostra file

File: clean.py Progetto: shelter-cluster-nepal-im/etl-extravaganza

def algo12(db,ref):
    #Column M: must be a number>=0 and (more conditions to follow)
    db_col_loc = etl.find_in_header(db, '# Items / # Man-hours / NPR')
    bad_vals = []

    for row in db.iter_rows(db_col_loc + "2:" +
        etl.find_last_value(db, db_col_loc, 'c')):
            if not etl.xstr(row[0].value).isdigit():
                if row[0].value == None:
                    bad_vals.append('Blank @ cell' + row[0].coordinate)
                else:
                    bad_vals.append(etl.xstr(row[0].value))

    return db, ref, return_message('Invalid numbers for # Items / # Man-hours / NPR' ,bad_vals)

Esempio n. 9

0

Mostra file

File: clean.py Progetto: shelter-cluster-nepal-im/etl-extravaganza

def algo13(db,ref):
    #Column N: must be a number>=0 and (more conditions to follow)
    db_col_loc = etl.find_in_header(db, 'Total Number Households')
    bad_vals = []

    for row in db.iter_rows(db_col_loc + "2:" +
        etl.find_last_value(db, db_col_loc, 'c')):
            if not etl.xstr(row[0].value).isdigit():
                if row[0].value == None:
                    bad_vals.append('Blank@ ' + row[0].coordinate)
                else:
                    bad_vals.append(etl.xstr(row[0].value))

    return db, ref, return_message('Invalid numbers for Total Number Households' ,bad_vals)

Esempio n. 10

0

Mostra file

File: clean.py Progetto: shelter-cluster-nepal-im/etl-extravaganza

def algo4(db,ref):
    #if Column C == Column A
    #Clear column C

    db_col_loc_a = etl.find_in_header(db, 'Implementing agency')
    db_col_loc_c = etl.find_in_header(db, 'Local partner agency')
    vals_changed = []

    for row in db.iter_rows(db_col_loc_a + "2:" +
        etl.find_last_value(db, db_col_loc_c, 'c')):
            if etl.xstr(row[0].value) == etl.xstr(row[2].value):
                vals_changed.append(etl.xstr(row[2].value))
                row[2].value = ''

    return db, ref, return_message('Local Partner Agencies matched Implementing and were cleared' ,vals_changed)

Esempio n. 11

0

Mostra file

def algo14(db, ref):
    #Column O: must be a number>=0 and (more conditions to follow)
    db_col_loc = etl.find_in_header(db, 'Average cost per households (NPR)')
    bad_vals = []

    for row in db.iter_rows(db_col_loc + "2:" +
                            etl.find_last_value(db, db_col_loc, 'c')):
        if not etl.xstr(row[0].value).isdigit():
            if row[0].value == None:
                bad_vals.append('Blank@ ' + row[0].coordinate)
            else:
                bad_vals.append(etl.xstr(row[0].value))

    return db, ref, return_message(
        'Invalid numbers for Average cost per households (NPR)', bad_vals)

Esempio n. 12

0

Mostra file

def algo12(db, ref):
    #Column M: must be a number>=0 and (more conditions to follow)
    db_col_loc = etl.find_in_header(db, '# Items / # Man-hours / NPR')
    bad_vals = []

    for row in db.iter_rows(db_col_loc + "2:" +
                            etl.find_last_value(db, db_col_loc, 'c')):
        if not etl.xstr(row[0].value).isdigit():
            if row[0].value == None:
                bad_vals.append('Blank @ cell' + row[0].coordinate)
            else:
                bad_vals.append(etl.xstr(row[0].value))

    return db, ref, return_message(
        'Invalid numbers for # Items / # Man-hours / NPR', bad_vals)

Esempio n. 13

0

Mostra file

File: clean.py Progetto: shelter-cluster-nepal-im/etl-extravaganza

def algo18(db,ref):
    #Column S: must be a date>= 25/04/2015
    date_col_loc = column_index_from_string(etl.find_in_header(db, 'DD - Start')) - 1

    cur = ""
    bad_date = []

    for row in db.rows[1:]:
        try:
            cur = datetime(day = int(etl.xstr(row[date_col_loc].value)), \
                            month = int(etl.xstr(row[date_col_loc+1].value)), \
                            year = int(etl.xstr(row[date_col_loc+2].value)))

            if cur < parse('4-25-2015'):
                bad_date.append(str(cur) + ' @ row ' + str(row[0].row))
        except :
            bad_date.append(str(cur) + ' @ row ' + str(row[0].row))

    return db, ref, return_message('Malformatted date before EQ date', bad_date)

Esempio n. 14

0

Mostra file

File: clean.py Progetto: shelter-cluster-nepal-im/etl-extravaganza

def algo19(db,ref):
    #Column T: must be a date>= 25/04/2015 and >=Column S
    start_col_loc = column_index_from_string(etl.find_in_header(db, 'DD - Start')) - 1
    comp_col_loc = column_index_from_string(etl.find_in_header(db, 'DD - End')) - 1

    bad_date = []
    start = ""
    comp = ""

    for row in db.rows[1:]:
        try:
            start = datetime(day = int(etl.xstr(row[start_col_loc].value)), \
                            month = int(etl.xstr(row[start_col_loc+1].value)), \
                            year = int(etl.xstr(row[start_col_loc+2].value)))

            comp = datetime(day = int(etl.xstr(row[comp_col_loc].value)), \
                            month = int(etl.xstr(row[comp_col_loc+1].value)), \
                            year = int(etl.xstr(row[comp_col_loc+2].value)))

            if comp < start:
                bad_date.append(str(start) + ' @ row ' + str(row[0].row))

        except:
            bad_date.append(str(start) + ' @ row ' + str(row[0].row))

    return db, ref, return_message('Malformatted date or before Start Date', bad_date)

Esempio n. 15

0

Mostra file

File: clean.py Progetto: shelter-cluster-nepal-im/etl-extravaganza

def algo2(db,ref):
    #Column B: If == Implementing Agency set Column B=Internal
    #assumes they are next to each other
    db_col_loc_impl = etl.find_in_header(db, 'Implementing agency')
    db_col_loc_source = etl.find_in_header(db, 'Sourcing Agency')
    vals_changed = []

    for row in db.iter_rows(db_col_loc_impl + "2:" +
        etl.find_last_value(db, db_col_loc_source, 'c')):
            if row[0].value == row[1].value:
                vals_changed.append(etl.xstr(row[1].value))
                row[1].value = 'INTERNAL'

    return db, ref, return_message('Sourcing Agencies set as INTERNAL:' ,vals_changed)

Esempio n. 16

0

Mostra file

File: clean.py Progetto: shelter-cluster-nepal-im/etl-extravaganza

def algo7(db,ref):
    #Column H: must be in Reference>VDC - relative to districy
    #the district in which a vdc in the db is located must = the district in ref
    db_dist_loc = etl.find_in_header(db, 'District')
    db_vdc_loc = etl.find_in_header(db, 'VDC / Municipalities')
    ref_dist_loc = etl.find_in_header(ref, 'Admin1_District')
    ref_vdc_loc = etl.find_in_header(ref, 'Admin2_OCHA_VDC-Municipality')

    #create tuple of vdc, dist for db and ref
    db_tuple = []
    for row in db.iter_rows(db_dist_loc + "2:" +
        etl.find_last_value(db, db_vdc_loc, 'c')):
            db_tuple.append((etl.xstr(row[0].value), etl.xstr(row[1].value)))

    ref_tuple = []
    #do it just for dist, then for vdc as they're not neighbors
    for row in ref.iter_rows('E2:' +
        etl.find_last_value(ref, 'E', 'c')):
            ref_tuple.append((etl.xstr(row[0].value),))




    it = 0
    for row in ref.iter_rows(ref_vdc_loc + "2:" +
        etl.find_last_value(ref, ref_vdc_loc, 'c')):
            ref_tuple[it]+=(etl.xstr(row[0].value.encode("utf8")),)
            it+=1

    #iterate through db_tuple and see if tuple is in ref
    malformed_vdc = []
    for tup in db_tuple:
        if tup not in ref_tuple:
            malformed_vdc.append(etl.xstr(tup))

    return db, ref, return_message('VDCs in incorrect districts:' ,malformed_vdc)

Esempio n. 17

0

Mostra file

File: master_db_creation.py Progetto: eoglethorpe/nepal-earthquake

def gen_pk(r, locs):
    return etl.xstr(r[locs["imp_agency"]-1].value)+etl.xstr(r[locs["local_partner"]-1].value)+etl.xstr(r[locs["district"]-1].value)+etl.xstr(r[locs["vdc"]-1].value)+etl.xstr(r[locs["ward"]-1].value)+etl.xstr(r[locs["act_type"]-1].value)+etl.xstr(r[locs["act_desc"]-1].value)+etl.xstr(r[locs["quantity"]-1].value)+etl.xstr(r[locs["total_hh"]-1].value)