Python find_in_header Exemples, etl.find_in_header Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : clean.py Projet : shelter-cluster-nepal-im/etl-extravaganza

def algo1(db, ref):
    #***Column A must be in Reference>ImplementingAgency if not: make a note
    db_col_loc = etl.find_in_header(db, 'Implementing agency')
    ref_col_loc = etl.find_in_header(ref,'Implementing_Agency_Name')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    return db, ref, return_message('Agencies not in the reference:' ,missing_names)

Exemple #2

0

Afficher le fichier

Fichier : clean.py Projet : shelter-cluster-nepal-im/etl-extravaganza

def algo11(db,ref):
    #Column L: must be in Reference>Target
    db_col_loc = etl.find_in_header(db, 'Targeting')
    ref_col_loc = etl.find_in_header(ref,'Target')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    return db, ref, return_message('Targets not in Referece' ,missing_names)

Exemple #3

0

Afficher le fichier

Fichier : clean.py Projet : shelter-cluster-nepal-im/etl-extravaganza

def algo10(db,ref):
    #column J: must be in reference>ActionDescription relative to Action_Type
    #There are 4 action types each with their own self labeled column
    db_type_loc = etl.find_in_header(db, 'Action type')
    db_desc_loc = etl.find_in_header(db, 'Action description')
    ref_col_loc = etl.find_in_header(ref,'Action_Type')

    #get ref action type tuples
    act_types_tups = []

    #go through action types and go through respected cols and make tuples
    for row in ref.iter_rows(ref_col_loc + "2:" +
        etl.find_last_value(ref, ref_col_loc, 'c')):
            act = etl.xstr(row[0].value)
            act_col = etl.find_in_header(ref, act)
            for row in ref.iter_rows(act_col + "2:" +
                etl.find_last_value(ref, act_col, 'c')):
                    act_types_tups.append((act, etl.xstr(row[0].value)))

    #get tuples of dist actions
    type_desc_tup = []
    for row in db.iter_rows(db_type_loc + "2:" +
        etl.find_last_value(db, db_desc_loc, 'c')):
            type_desc_tup.append((etl.xstr(row[0].value),etl.xstr(row[1].value)))

    malformed_acts = []
    #go through dist actions and see if they're in ref tuples
    for tup in type_desc_tup:
        if tup not in act_types_tups:
            malformed_acts.append(etl.xstr(tup))

    return db, ref, return_message('Action descriptions not in action types:' ,malformed_acts)

Exemple #4

0

Afficher le fichier

def algo15(db, ref):
    #Column P: must be a number>=0 and <= Column N (more conditions to follow)
    db_cnt_loc = etl.find_in_header(db, 'Total Number Households')
    db_fem_loc = etl.find_in_header(db, 'Female headed households')

    #get count values
    cnt_vals = []
    for row in db.iter_rows(db_cnt_loc + "2:" +
                            etl.find_last_value(db, db_cnt_loc, 'c')):
        cnt_vals.append(etl.xstr(row[0].value))

    #get female values
    fem_vals = []
    for row in db.iter_rows(db_fem_loc + "2:" +
                            etl.find_last_value(db, db_fem_loc, 'c')):
        fem_vals.append(etl.xstr(row[0].value))

    malformatted = []
    #zip and check values
    for vals in zip(cnt_vals, fem_vals):
        if not (vals[0].isdigit() and vals[1].isdigit()):
            malformatted.append(etl.xstr(vals))
        elif int(vals[1]) > int(vals[0]):
            malformatted.append(etl.xstr(vals))

    return db, ref, return_message(
        'Total #HH and Female #HH conflict or issue ', malformatted)

Exemple #5

0

Afficher le fichier

def algo16(db, ref):
    #Column Q: must be a number>=0 and <= Column N (more conditions to follow)
    db_cnt_loc = etl.find_in_header(db, 'Total Number Households')
    db_vul_loc = etl.find_in_header(
        db, 'Vulnerable Caste / Ethnicity households ')

    #get count values
    cnt_vals = []
    for row in db.iter_rows(db_cnt_loc + "2:" +
                            etl.find_last_value(db, db_cnt_loc, 'c')):
        cnt_vals.append(str(row[0].value))

    #get female values
    vul_vals = []
    for row in db.iter_rows(db_vul_loc + "2:" +
                            etl.find_last_value(db, db_vul_loc, 'c')):
        vul_vals.append(str(row[0].value))

    malformatted = []
    #zip and check values
    for vals in zip(cnt_vals, vul_vals):
        if not (vals[0].isdigit() and vals[1].isdigit()):
            malformatted.append(str(vals))
        elif int(vals[1]) > int(vals[0]):
            malformatted.append(str(vals))

    return db, ref, 'Total #HH and Vulnerable #HH conflict or issue \n' + ','.join(
        malformatted)

Exemple #6

0

Afficher le fichier

def algo9(db, ref):
    #column J: must be in reference>Type of Activity
    db_col_loc = etl.find_in_header(db, 'Action type')
    ref_col_loc = etl.find_in_header(ref, 'Action_Type')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    return db, ref, 'Incorrect Action types:\n' + ','.join(missing_names)

Exemple #7

0

Afficher le fichier

def algo20(db, ref):
    #Output if completion date < current date and activity status not like completed
    status_col_loc = column_index_from_string(
        etl.find_in_header(db, 'Activity Status')) - 1
    comp_col_loc = column_index_from_string(etl.find_in_header(db,
                                                               'DD - End')) - 1

    bad_date = []
    valid = True
    comp = ""

    for row in db.rows[1:]:
        try:
            comp = datetime(day = int(etl.xstr(row[comp_col_loc].value)), \
                month = int(etl.xstr(row[comp_col_loc+1].value)), \
                year = int(etl.xstr(row[comp_col_loc+2].value)))
        except:
            valid = False

        if valid:
            try:
                if comp < datetime.now(
                ) and 'completed' not in row[status_col_loc].value.lower():
                    bad_date.append('%s row %i' % (str(comp), row[0].row))
            except:
                bad_date.append('%s row %i' % (str(comp), row[0].row))

    return db, ref, return_message(
        'Bad date or Completion Date has passed for ', bad_date)

Exemple #8

0

Afficher le fichier

def algo19(db, ref):
    #Column T: must be a date>= 25/04/2015 and >=Column S
    start_col_loc = column_index_from_string(
        etl.find_in_header(db, 'DD - Start')) - 1
    comp_col_loc = column_index_from_string(etl.find_in_header(db,
                                                               'DD - End')) - 1

    bad_date = []
    start = ""
    comp = ""

    for row in db.rows[1:]:
        try:
            start = datetime(day = int(etl.xstr(row[start_col_loc].value)), \
                            month = int(etl.xstr(row[start_col_loc+1].value)), \
                            year = int(etl.xstr(row[start_col_loc+2].value)))

            comp = datetime(day = int(etl.xstr(row[comp_col_loc].value)), \
                            month = int(etl.xstr(row[comp_col_loc+1].value)), \
                            year = int(etl.xstr(row[comp_col_loc+2].value)))

            if comp < start:
                bad_date.append(str(start) + ' @ row ' + str(row[0].row))

        except:
            bad_date.append(str(start) + ' @ row ' + str(row[0].row))

    return db, ref, return_message('Malformatted date or before Start Date',
                                   bad_date)

Exemple #9

0

Afficher le fichier

def algo10(db, ref):
    #column J: must be in reference>ActionDescription relative to Action_Type
    #There are 4 action types each with their own self labeled column
    db_type_loc = etl.find_in_header(db, 'Action type')
    db_desc_loc = etl.find_in_header(db, 'Action description')
    ref_col_loc = etl.find_in_header(ref, 'Action_Type')

    #get ref action type tuples
    act_types_tups = []

    #go through action types and go through respected cols and make tuples
    for row in ref.iter_rows(ref_col_loc + "2:" +
                             etl.find_last_value(ref, ref_col_loc, 'c')):
        act = str(row[0].value)
        act_col = etl.find_in_header(ref, act)
        for row in ref.iter_rows(act_col + "2:" +
                                 etl.find_last_value(ref, act_col, 'c')):
            act_types_tups.append((act, str(row[0].value)))

    #get tuples of dist actions
    type_desc_tup = []
    for row in db.iter_rows(db_type_loc + "2:" +
                            etl.find_last_value(db, db_desc_loc, 'c')):
        type_desc_tup.append((str(row[0].value), str(row[1].value)))

    malformed_acts = []
    #go through dist actions and see if they're in ref tuples
    for tup in type_desc_tup:
        if tup not in act_types_tups:
            malformed_acts.append(str(tup))

    return db, ref, 'Action descriptions not in action types:\n' + ','.join(
        malformed_acts)

Exemple #10

0

Afficher le fichier

def algo6(db, ref):
    #Column G: must be in Reference>District
    db_col_loc = etl.find_in_header(db, 'District')
    ref_col_loc = etl.find_in_header(ref, 'Admin1_District')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)
    return db, ref, 'Districts not in the reference:\n' + ','.join(
        missing_names)

Exemple #11

0

Afficher le fichier

Fichier : clean.py Projet : shelter-cluster-nepal-im/etl-extravaganza

def algo9(db,ref):
    #column J: must be in reference>Type of Activity
    db_col_loc = etl.find_in_header(db, 'Action type')
    ref_col_loc = etl.find_in_header(ref,'Action_Type')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    return db, ref, return_message('Incorrect Action types:' ,missing_names)

Exemple #12

0

Afficher le fichier

def algo11(db, ref):
    #Column L: must be in Reference>Target
    db_col_loc = etl.find_in_header(db, 'Targeting')
    ref_col_loc = etl.find_in_header(ref, 'Target')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    return db, ref, 'Targets not in Referece\n' + ','.join(missing_names)

Exemple #13

0

Afficher le fichier

Fichier : metadata.py Projet : shelter-cluster-nepal-im/etl-extravaganza

    def reach(self, db):
        """how is the vdc accessible"""
        #TODO: use index() method instead of dict
        dist_loc = column_index_from_string(etl.find_in_header(db,
                                                               'District')) - 1
        vdc_loc = column_index_from_string(
            etl.find_in_header(db, 'VDC / Municipalities')) - 1
        acc_look = column_index_from_string(
            etl.find_in_header(self.s_dict['acc'],
                               'DistrictVDC Concatenation')) - 1
        acc_acc = column_index_from_string(
            etl.find_in_header(self.s_dict['acc'], 'NeKSAP ACCESS')) - 1

        dist_vals = etl.get_values(db.columns[dist_loc][1:])
        vdc_vals = etl.get_values(db.columns[vdc_loc][1:])
        acc_look_vals = etl.get_values(
            self.s_dict['acc'].columns[acc_look][1:])
        acc_acc_vals = etl.get_values(self.s_dict['acc'].columns[acc_acc][1:])

        #make dict for access, concatenate dist and vdc
        acc_dict = dict(zip(acc_look_vals, acc_acc_vals))
        d_v_conc = [dist_vals[i] + vdc_vals[i] for i in xrange(len(dist_vals))]

        ret = []
        for v in d_v_conc:
            if v in acc_dict and v != '':
                ret.append(acc_dict[v])
            else:
                ret.append('')

        return ret

Exemple #14

0

Afficher le fichier

Fichier : clean.py Projet : shelter-cluster-nepal-im/etl-extravaganza

def algo20(db,ref):
    #Output if completion date < current date and activity status not like completed
    status_col_loc = column_index_from_string(etl.find_in_header(db, 'Activity Status')) - 1
    comp_col_loc = column_index_from_string(etl.find_in_header(db, 'DD - End')) - 1

    bad_date = []
    valid = True
    comp = ""

    for row in db.rows[1:]:
        try:
            comp = datetime(day = int(etl.xstr(row[comp_col_loc].value)), \
                month = int(etl.xstr(row[comp_col_loc+1].value)), \
                year = int(etl.xstr(row[comp_col_loc+2].value)))
        except:
            valid = False

        if valid:
            try:
                if comp < datetime.now() and 'completed' not in row[status_col_loc].value.lower():
                    bad_date.append('%s row %i' % (str(comp),row[0].row))
            except:
                    bad_date.append('%s row %i' % (str(comp),row[0].row))

    return db, ref, return_message('Bad date or Completion Date has passed for ' ,bad_date)

Exemple #15

0

Afficher le fichier

Fichier : clean.py Projet : shelter-cluster-nepal-im/etl-extravaganza

def algo19(db,ref):
    #Column T: must be a date>= 25/04/2015 and >=Column S
    start_col_loc = column_index_from_string(etl.find_in_header(db, 'DD - Start')) - 1
    comp_col_loc = column_index_from_string(etl.find_in_header(db, 'DD - End')) - 1

    bad_date = []
    start = ""
    comp = ""

    for row in db.rows[1:]:
        try:
            start = datetime(day = int(etl.xstr(row[start_col_loc].value)), \
                            month = int(etl.xstr(row[start_col_loc+1].value)), \
                            year = int(etl.xstr(row[start_col_loc+2].value)))

            comp = datetime(day = int(etl.xstr(row[comp_col_loc].value)), \
                            month = int(etl.xstr(row[comp_col_loc+1].value)), \
                            year = int(etl.xstr(row[comp_col_loc+2].value)))

            if comp < start:
                bad_date.append(str(start) + ' @ row ' + str(row[0].row))

        except:
            bad_date.append(str(start) + ' @ row ' + str(row[0].row))

    return db, ref, return_message('Malformatted date or before Start Date', bad_date)

Exemple #16

0

Afficher le fichier

def algo7(db, ref):
    #Column H: must be in Reference>VDC - relative to districy
    #the district in which a vdc in the db is located must = the district in ref
    db_dist_loc = etl.find_in_header(db, 'District')
    db_vdc_loc = etl.find_in_header(db, 'VDC / Municipalities')
    ref_dist_loc = etl.find_in_header(ref, 'Admin1_District')
    ref_vdc_loc = etl.find_in_header(ref, 'Admin2_OCHA_VDC-Municipality')

    #create tuple of vdc, dist for db and ref
    db_tuple = []
    for row in db.iter_rows(db_dist_loc + "2:" +
                            etl.find_last_value(db, db_vdc_loc, 'c')):
        db_tuple.append((str(row[0].value), str(row[1].value)))

    ref_tuple = []
    #do it just for dist, then for vdc as they're not neighbors
    for row in ref.iter_rows('E2:' + etl.find_last_value(ref, 'E', 'c')):
        ref_tuple.append((str(row[0].value), ))

    it = 0
    for row in ref.iter_rows(ref_vdc_loc + "2:" +
                             etl.find_last_value(ref, ref_vdc_loc, 'c')):
        ref_tuple[it] += (str(row[0].value.encode("utf8")), )
        it += 1

    #iterate through db_tuple and see if tuple is in ref
    malformed_vdc = []
    for tup in db_tuple:
        if tup not in ref_tuple:
            malformed_vdc.append(str(tup))

    return db, ref, 'VDCs in incorrect districts:\n' + ','.join(malformed_vdc)

Exemple #17

0

Afficher le fichier

Fichier : clean.py Projet : shelter-cluster-nepal-im/etl-extravaganza

def algo17(db,ref):
    #column R: must be in reference>Status
    db_col_loc = etl.find_in_header(db, 'Activity Status')
    ref_col_loc = etl.find_in_header(ref,'Status')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    return db, ref, return_message('Activity Status not in Reference' ,missing_names)

Exemple #18

0

Afficher le fichier

Fichier : clean.py Projet : shelter-cluster-nepal-im/etl-extravaganza

def algo16(db,ref):
    #Column Q: must be a number>=0 and <= Column N (more conditions to follow)
    db_cnt_loc = etl.find_in_header(db, 'Total Number Households')
    db_vul_loc = etl.find_in_header(db, 'Vulnerable Caste / Ethnicity households ')


    #get count values
    cnt_vals = []
    for row in db.iter_rows(db_cnt_loc + "2:" +
        etl.find_last_value(db, db_cnt_loc, 'c')):
            cnt_vals.append(etl.xstr(row[0].value))

    #get female values
    vul_vals = []
    for row in db.iter_rows(db_vul_loc + "2:" +
        etl.find_last_value(db, db_vul_loc, 'c')):
            vul_vals.append(etl.xstr(row[0].value))

    malformatted = []
    #zip and check values
    for vals in zip(cnt_vals, vul_vals):
        if not (vals[0].isdigit() and vals[1].isdigit()):
            malformatted.append(etl.xstr(vals))
        elif int(vals[1]) > int(vals[0]):
            malformatted.append(etl.xstr(vals))

    return db, ref, return_message('Total #HH and Vulnerable #HH conflict or issue ' ,malformatted)

Exemple #19

0

Afficher le fichier

Fichier : clean.py Projet : shelter-cluster-nepal-im/etl-extravaganza

def algo3(db,ref):
    #* what return? change?
    #Column C: must be in Reference>LocalPartnerAgency
    #If not: Check if mispelling (like '%agencyname%)

    db_col_loc = etl.find_in_header(db, 'Local partner agency')
    ref_col_loc = etl.find_in_header(ref,'Local_Partner_Agency')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

Exemple #20

0

Afficher le fichier

def algo3(db, ref):
    #* what return? change?
    #Column C: must be in Reference>LocalPartnerAgency
    #If not: Check if mispelling (like '%agencyname%)

    db_col_loc = etl.find_in_header(db, 'Local partner agency')
    ref_col_loc = etl.find_in_header(ref, 'Local_Partner_Agency')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

Exemple #21

0

Afficher le fichier

def algo17(db, ref):
    #column R: must be in reference>Status
    db_col_loc = etl.find_in_header(db, 'Activity Status')
    ref_col_loc = etl.find_in_header(ref, 'Status')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    return db, ref, 'Activity Status not in Referece\n' + ','.join(
        missing_names)

Exemple #22

0

Afficher le fichier

def algo1(db, ref):
    #***Column A must be in Reference>ImplementingAgency if not: make a note
    db_col_loc = etl.find_in_header(db, 'Implementing agency')
    ref_col_loc = etl.find_in_header(ref, 'Implementing_Agency_Name')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    return db, ref, 'Agencies not in the reference:\n' + ','.join(
        missing_names)

Exemple #23

0

Afficher le fichier

Fichier : clean.py Projet : shelter-cluster-nepal-im/etl-extravaganza

def algo5(db,ref):
    #if Column C contains Column A: remove all spaces, commas, dashes and the matching substring in C
    db_col_loc_a = etl.find_in_header(db, 'Implementing agency')
    db_col_loc_c = etl.find_in_header(db, 'Local partner agency')
    vals_changed = []

    for row in db.iter_rows(db_col_loc_a + "2:" +
        etl.find_last_value(db, db_col_loc_c, 'c')):
            if etl.xstr(row[0].value) in etl.xstr(row[2].value) and etl.xstr(row[0].value) != etl.xstr(row[2].value):
                vals_changed.append(row[2].value)
                row[2].value = row[2].value.replace(row[0].value,'').replace(' ','').replace(',','').replace('-','')

    return db, ref, return_message('IA contained in LPA for the following:' ,vals_changed)

Exemple #24

0

Afficher le fichier

Fichier : clean.py Projet : shelter-cluster-nepal-im/etl-extravaganza

def algo2(db,ref):
    #Column B: If == Implementing Agency set Column B=Internal
    #assumes they are next to each other
    db_col_loc_impl = etl.find_in_header(db, 'Implementing agency')
    db_col_loc_source = etl.find_in_header(db, 'Sourcing Agency')
    vals_changed = []

    for row in db.iter_rows(db_col_loc_impl + "2:" +
        etl.find_last_value(db, db_col_loc_source, 'c')):
            if row[0].value == row[1].value:
                vals_changed.append(etl.xstr(row[1].value))
                row[1].value = 'INTERNAL'

    return db, ref, return_message('Sourcing Agencies set as INTERNAL:' ,vals_changed)

Exemple #25

0

Afficher le fichier

def algo2(db, ref):
    #Column B: If == Implementing Agency set Column B=Internal
    #assumes they are next to each other
    db_col_loc_impl = etl.find_in_header(db, 'Implementing agency')
    db_col_loc_source = etl.find_in_header(db, 'Sourcing Agency')
    vals_changed = []

    for row in db.iter_rows(db_col_loc_impl + "2:" +
                            etl.find_last_value(db, db_col_loc_source, 'c')):
        if row[0].value == row[1].value:
            vals_changed.append(str(row[1].value))
            row[1].value = 'INTERNAL'
    return db, ref, 'Sourcing Agencies set as INTERNAL:\n' + ','.join(
        vals_changed)

Exemple #26

0

Afficher le fichier

Fichier : clean.py Projet : shelter-cluster-nepal-im/etl-extravaganza

def algo4(db,ref):
    #if Column C == Column A
    #Clear column C

    db_col_loc_a = etl.find_in_header(db, 'Implementing agency')
    db_col_loc_c = etl.find_in_header(db, 'Local partner agency')
    vals_changed = []

    for row in db.iter_rows(db_col_loc_a + "2:" +
        etl.find_last_value(db, db_col_loc_c, 'c')):
            if etl.xstr(row[0].value) == etl.xstr(row[2].value):
                vals_changed.append(etl.xstr(row[2].value))
                row[2].value = ''

    return db, ref, return_message('Local Partner Agencies matched Implementing and were cleared' ,vals_changed)

Exemple #27

0

Afficher le fichier

def algo5(db, ref):
    #if Column C contains Column A: remove all spaces, commas, dashes and the matching substring in C
    db_col_loc_a = etl.find_in_header(db, 'Implementing agency')
    db_col_loc_c = etl.find_in_header(db, 'Local partner agency')
    vals_changed = []

    for row in db.iter_rows(db_col_loc_a + "2:" +
                            etl.find_last_value(db, db_col_loc_c, 'c')):
        if str(row[0].value) in str(
                row[2].value) and str(row[0].value) != str(row[2].value):
            vals_changed.append(row[2].value)
            row[2].value = row[2].value.replace(row[0].value, '').replace(
                ' ', '').replace(',', '').replace('-', '')

    return db, ref, 'IA contained in LPA for the following:\n' + ','.join(
        vals_changed)

Exemple #28

0

Afficher le fichier

def algo4(db, ref):
    #if Column C == Column A
    #Clear column C

    db_col_loc_a = etl.find_in_header(db, 'Implementing agency')
    db_col_loc_c = etl.find_in_header(db, 'Local partner agency')
    vals_changed = []

    for row in db.iter_rows(db_col_loc_a + "2:" +
                            etl.find_last_value(db, db_col_loc_c, 'c')):
        if str(row[0].value) == str(row[2].value):
            vals_changed.append(str(row[2].value))
            row[2].value = ''

    return db, ref, 'Local Partner Agencies matched Implementing and were cleared\n' + ','.join(
        vals_changed)

Exemple #29

0

Afficher le fichier

Fichier : metadata.py Projet : shelter-cluster-nepal-im/etl-extravaganza

    def priority(self, db):
        """is it a priority district"""
        dloc = column_index_from_string(etl.find_in_header(db, 'District')) - 1
        rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'Priority Districts')) - 1

        dvals = etl.get_values(db.columns[dloc][1:])
        rvals = etl.get_values(self.s_dict['ref'].columns[rloc][1:])
        ret = []

        for v in dvals:
            if v in rvals and v != '':
                ret.append('TRUE')
            else:
                ret.append('FALSE')

        return ret

Exemple #30

0

Afficher le fichier

def algo19(db, ref):
    #Column T: must be a date>= 25/04/2015 and >=Column S
    comp_col_loc = etl.find_in_header(db,
                                      'Completion Date\n (Actual or Planned)')
    start_col_loc = etl.find_in_header(db, 'Start date \n(Actual or Planned)')

    bad_date = []

    for row in db.iter_rows(comp_col_loc + "2:" +
                            etl.find_last_value(db, comp_col_loc, 'c')):
        cur = str(row[0].value)
        try:
            if parse(cur) < parse('4-25-2015') or parse(cur) < parse(
                    str(db[start_col_loc + str(row[0].row)].value)):
                bad_date.append(cur + ' @ cell ' + row[0].coordinate)
        except Exception, e:
            bad_date.append(cur + ' @ cell ' + row[0].coordinate)

Exemple #31

0

Afficher le fichier

def algo8(db, ref):
    #Column I: must be in reference>Ward or a number or numbers separated by commas

    #take all into missing names and trim from there
    db_col_loc = etl.find_in_header(db, 'Municipal Ward')
    ref_col_loc = etl.find_in_header(ref, 'Wards')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    #trim missing_names to see if they contain any forbidden chars
    invalid = []

    for v in missing_names:
        for letter in v:
            #if we've found an illegal letter
            if letter not in set(string.digits + ' ' + ','):
                invalid.append('(' + v + ')')
                break

    return db, ref, 'Malformed wards: \n' + ','.join(invalid)

Exemple #32

0

Afficher le fichier

def algo18(db, ref):
    #Column S: must be a date>= 25/04/2015
    date_col_loc = etl.find_in_header(db, 'Start date \n(Actual or Planned)')

    if date_col_loc is None:
        date_col_loc = etl.find_in_header(db, 'Start date')

    bad_date = []

    for row in db.iter_rows(date_col_loc + "2:" +
                            etl.find_last_value(db, date_col_loc, 'c')):
        cur = str(row[0].value)
        try:
            if parse(cur) < parse('4-25-2015'):
                bad_date.append(cur + ' @ cell ' + row[0].coordinate)
        except:
            bad_date.append(cur + ' @ cell ' + row[0].coordinate)

    return db, ref, 'Malformatted date\n' + ','.join(bad_date)

Exemple #33

0

Afficher le fichier

Fichier : metadata.py Projet : shelter-cluster-nepal-im/etl-extravaganza

    def update(self, db):
        "last updated"
        dloc = column_index_from_string(etl.find_in_header(db,
                                                           'Last Update')) - 1
        dvals = etl.get_values(db.columns[dloc][1:])
        to_ret = []

        for v in dvals:
            to_ret.append(v)

        return to_ret

Exemple #34

0

Afficher le fichier

Fichier : clean.py Projet : shelter-cluster-nepal-im/etl-extravaganza

def algo8(db,ref):
    #Column I: must be in reference>Ward or a number or numbers separated by commas

    #take all into missing names and trim from there
    db_col_loc = etl.find_in_header(db, 'Municipal Ward')
    ref_col_loc = etl.find_in_header(ref,'Wards')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    #trim missing_names to see if they contain any forbidden chars
    invalid = []

    for v in missing_names:
        for letter in v:
            #if we've found an illegal letter
            if letter not in set(string.digits + ' ' + ','):
                invalid.append('(' + v + ')')
                break


    return db, ref, return_message('Malformed wards: ' ,invalid)

Exemple #35

0

Afficher le fichier

Fichier : metadata.py Projet : shelter-cluster-nepal-im/etl-extravaganza

    def act_cat(self, db):
        "UNOCHA cat"
        dloc = column_index_from_string(
            etl.find_in_header(db, 'Action description')) - 1
        sc_rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'SC Categories')) - 1
        un_rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'UN Categories')) - 1

        dvals = etl.get_values(db.columns[dloc][1:])
        sc_rloc_vals = etl.get_values(self.s_dict['ref'].columns[sc_rloc][1:])
        un_rloc_vals = etl.get_values(self.s_dict['ref'].columns[un_rloc][1:])
        ret = []

        for v in dvals:
            if v in sc_rloc_vals and v != '':
                ret.append(un_rloc_vals[sc_rloc_vals.index(v)])
            else:
                ret.append('')

        return ret

Exemple #36

0

Afficher le fichier

Fichier : metadata.py Projet : shelter-cluster-nepal-im/etl-extravaganza

    def dist_hlcit(self, db):
        "dist code"
        dloc = column_index_from_string(etl.find_in_header(db, 'District')) - 1
        d_rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'Admin1_District')) - 1
        code_rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'Admin1_P-Code')) - 1

        dvals = etl.get_values(db.columns[dloc][1:])
        d_rloc_vals = etl.get_values(self.s_dict['ref'].columns[d_rloc][1:])
        code_rloc_vals = etl.get_values(
            self.s_dict['ref'].columns[code_rloc][1:])
        ret = []

        for v in dvals:
            if v in d_rloc_vals and v != '':
                ret.append(code_rloc_vals[d_rloc_vals.index(v)])
            else:
                ret.append('')

        return ret

Exemple #37

0

Afficher le fichier

Fichier : metadata.py Projet : shelter-cluster-nepal-im/etl-extravaganza

    def hub(self, db):
        "what a hub a dist is in"
        dloc = column_index_from_string(etl.find_in_header(db, 'District')) - 1
        pri_rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'Priority Districts')) - 1
        hub_rloc = column_index_from_string(
            etl.find_in_header(self.s_dict['ref'], 'Shelter Cluster Hubs')) - 1

        dvals = etl.get_values(db.columns[dloc][1:])
        pri_rloc_vals = etl.get_values(
            self.s_dict['ref'].columns[pri_rloc][1:])
        hub_rloc_vals = etl.get_values(
            self.s_dict['ref'].columns[hub_rloc][1:])
        ret = []

        for v in dvals:
            if v in pri_rloc_vals and v != '':
                ret.append(hub_rloc_vals[pri_rloc_vals.index(v)])
            else:
                ret.append('')

        return ret

Exemple #38

0

Afficher le fichier

Fichier : clean.py Projet : shelter-cluster-nepal-im/etl-extravaganza

def algo12(db,ref):
    #Column M: must be a number>=0 and (more conditions to follow)
    db_col_loc = etl.find_in_header(db, '# Items / # Man-hours / NPR')
    bad_vals = []

    for row in db.iter_rows(db_col_loc + "2:" +
        etl.find_last_value(db, db_col_loc, 'c')):
            if not etl.xstr(row[0].value).isdigit():
                if row[0].value == None:
                    bad_vals.append('Blank @ cell' + row[0].coordinate)
                else:
                    bad_vals.append(etl.xstr(row[0].value))

    return db, ref, return_message('Invalid numbers for # Items / # Man-hours / NPR' ,bad_vals)

Exemple #39

0

Afficher le fichier

Fichier : clean.py Projet : shelter-cluster-nepal-im/etl-extravaganza

def algo13(db,ref):
    #Column N: must be a number>=0 and (more conditions to follow)
    db_col_loc = etl.find_in_header(db, 'Total Number Households')
    bad_vals = []

    for row in db.iter_rows(db_col_loc + "2:" +
        etl.find_last_value(db, db_col_loc, 'c')):
            if not etl.xstr(row[0].value).isdigit():
                if row[0].value == None:
                    bad_vals.append('Blank@ ' + row[0].coordinate)
                else:
                    bad_vals.append(etl.xstr(row[0].value))

    return db, ref, return_message('Invalid numbers for Total Number Households' ,bad_vals)

Exemple #40

0

Afficher le fichier

Fichier : metadata.py Projet : shelter-cluster-nepal-im/etl-extravaganza

    def uid(self, db):
        "UID"
        ok_key = False
        if db.title == 'Distributions':
            vals = [
                "Implementing agency", "Local partner agency", "District",
                "VDC / Municipalities", "Municipal Ward", "Action type",
                "Action description", "# Items / # Man-hours / NPR",
                "Total Number Households"
            ]
            ok_key = True

        elif db.title == 'Trainings':
            vals = [
                "Implementing agency", "Local partner agency", "District",
                "VDC / Municipalities", "Municipal Ward", "Training Subject",
                "Audience", "IEC Materials Distributed", "Males"
            ]
            ok_key = True

        to_ret = []
        mal_count = 0

        print 'Starting UID'

        if ok_key:
            #pull vals into array
            key_vals = []
            max_len = 0
            for v in vals:
                val_arr = etl.get_values(db.columns[
                    column_index_from_string(etl.find_in_header(db, v)) - 1])
                key_vals.append(val_arr)
                if len(val_arr) > max_len:
                    max_len = len(val_arr)

            for i in xrange(1, max_len):
                cur_key = ''
                for k in xrange(len(key_vals)):
                    try:
                        cur_key += key_vals[k][i]
                    except:
                        pass
                to_ret.append(cur_key)

            return to_ret

        else:
            print 'ERROR: no suitable WB found for UID'
            return to_ret

Exemple #41

0

Afficher le fichier

Fichier : clean.py Projet : shelter-cluster-nepal-im/etl-extravaganza

def algo7(db,ref):
    #Column H: must be in Reference>VDC - relative to districy
    #the district in which a vdc in the db is located must = the district in ref
    db_dist_loc = etl.find_in_header(db, 'District')
    db_vdc_loc = etl.find_in_header(db, 'VDC / Municipalities')
    ref_dist_loc = etl.find_in_header(ref, 'Admin1_District')
    ref_vdc_loc = etl.find_in_header(ref, 'Admin2_OCHA_VDC-Municipality')

    #create tuple of vdc, dist for db and ref
    db_tuple = []
    for row in db.iter_rows(db_dist_loc + "2:" +
        etl.find_last_value(db, db_vdc_loc, 'c')):
            db_tuple.append((etl.xstr(row[0].value), etl.xstr(row[1].value)))

    ref_tuple = []
    #do it just for dist, then for vdc as they're not neighbors
    for row in ref.iter_rows('E2:' +
        etl.find_last_value(ref, 'E', 'c')):
            ref_tuple.append((etl.xstr(row[0].value),))




    it = 0
    for row in ref.iter_rows(ref_vdc_loc + "2:" +
        etl.find_last_value(ref, ref_vdc_loc, 'c')):
            ref_tuple[it]+=(etl.xstr(row[0].value.encode("utf8")),)
            it+=1

    #iterate through db_tuple and see if tuple is in ref
    malformed_vdc = []
    for tup in db_tuple:
        if tup not in ref_tuple:
            malformed_vdc.append(etl.xstr(tup))

    return db, ref, return_message('VDCs in incorrect districts:' ,malformed_vdc)

Exemple #42

0

Afficher le fichier

def algo13(db, ref):
    #Column N: must be a number>=0 and (more conditions to follow)
    db_col_loc = etl.find_in_header(db, 'Total Number Households')
    bad_vals = []

    for row in db.iter_rows(db_col_loc + "2:" +
                            etl.find_last_value(db, db_col_loc, 'c')):
        if not etl.xstr(row[0].value).isdigit():
            if row[0].value == None:
                bad_vals.append('Blank@ ' + row[0].coordinate)
            else:
                bad_vals.append(etl.xstr(row[0].value))

    return db, ref, return_message(
        'Invalid numbers for Total Number Households', bad_vals)

Exemple #43

0

Afficher le fichier

def algo12(db, ref):
    #Column M: must be a number>=0 and (more conditions to follow)
    db_col_loc = etl.find_in_header(db, '# Items / # Man-hours / NPR')
    bad_vals = []

    for row in db.iter_rows(db_col_loc + "2:" +
                            etl.find_last_value(db, db_col_loc, 'c')):
        if not str(row[0].value).isdigit():
            if row[0].value == None:
                bad_vals.append('Blank @ cell' + row[0].coordinate)
            else:
                bad_vals.append(str(row[0].value))

    return db, ref, 'Invalid numbers for # Items / # Man-hours / NPR\n' + ','.join(
        bad_vals)

Exemple #44

0

Afficher le fichier

def algo14(db, ref):
    #Column O: must be a number>=0 and (more conditions to follow)
    db_col_loc = etl.find_in_header(db, 'Average cost per households (NPR)')
    bad_vals = []

    for row in db.iter_rows(db_col_loc + "2:" +
                            etl.find_last_value(db, db_col_loc, 'c')):
        if not str(row[0].value).isdigit():
            if row[0].value == None:
                bad_vals.append('Blank@ ' + row[0].coordinate)
            else:
                bad_vals.append(str(row[0].value))

    return db, ref, 'Invalid numbers for Average cost per households (NPR)\n' + ','.join(
        bad_vals)

Exemple #45

0

Afficher le fichier

Fichier : clean.py Projet : shelter-cluster-nepal-im/etl-extravaganza

def algo18(db,ref):
    #Column S: must be a date>= 25/04/2015
    date_col_loc = column_index_from_string(etl.find_in_header(db, 'DD - Start')) - 1

    cur = ""
    bad_date = []

    for row in db.rows[1:]:
        try:
            cur = datetime(day = int(etl.xstr(row[date_col_loc].value)), \
                            month = int(etl.xstr(row[date_col_loc+1].value)), \
                            year = int(etl.xstr(row[date_col_loc+2].value)))

            if cur < parse('4-25-2015'):
                bad_date.append(str(cur) + ' @ row ' + str(row[0].row))
        except :
            bad_date.append(str(cur) + ' @ row ' + str(row[0].row))

    return db, ref, return_message('Malformatted date before EQ date', bad_date)

Exemple #46

0

Afficher le fichier

def algo18(db, ref):
    #Column S: must be a date>= 25/04/2015
    date_col_loc = column_index_from_string(
        etl.find_in_header(db, 'DD - Start')) - 1

    cur = ""
    bad_date = []

    for row in db.rows[1:]:
        try:
            cur = datetime(day = int(etl.xstr(row[date_col_loc].value)), \
                            month = int(etl.xstr(row[date_col_loc+1].value)), \
                            year = int(etl.xstr(row[date_col_loc+2].value)))

            if cur < parse('4-25-2015'):
                bad_date.append(str(cur) + ' @ row ' + str(row[0].row))
        except:
            bad_date.append(str(cur) + ' @ row ' + str(row[0].row))

    return db, ref, return_message('Malformatted date before EQ date',
                                   bad_date)

Exemple #47

0

Afficher le fichier

Fichier : master_db_creation.py Projet : eoglethorpe/nepal-earthquake

def get_locs(ws):
    """find column headers in advance so we don't have to call each time"""
    ret = {}
    ret["priority"]=column_index_from_string(etl.find_in_header(ws,"Priority"))
    ret["access_method"]=column_index_from_string(etl.find_in_header(ws,"Hard to Reach Access Methods"))
    ret["hub"]=column_index_from_string(etl.find_in_header(ws,"Shelter Cluster Hub"))
    ret["as_of"]=column_index_from_string(etl.find_in_header(ws,"Last Update"))
    ret["dist_code"]=column_index_from_string(etl.find_in_header(ws,"District HLCIT Code"))
    ret["vdc_code"]=column_index_from_string(etl.find_in_header(ws,"VDC / Municipality HLCIT Code"))
    ret["act_cat"]=column_index_from_string(etl.find_in_header(ws,"UNOCHA Activity Categories"))
    ret["imp_agency"]=column_index_from_string(etl.find_in_header(ws,"Implementing agency"))
    ret["source_agency"]=column_index_from_string(etl.find_in_header(ws,"Sourcing Agency"))
    ret["local_partner"]=column_index_from_string(etl.find_in_header(ws,"Local partner agency"))
    ret["contact_name"]=column_index_from_string(etl.find_in_header(ws,"Local Contact Name"))
    ret["contact_email"]=column_index_from_string(etl.find_in_header(ws,"Local Contact Email"))
    ret["contact_phone"]=column_index_from_string(etl.find_in_header(ws,"Local Contact Phone #"))
    ret["district"]=column_index_from_string(etl.find_in_header(ws,"District"))
    ret["vdc"]=column_index_from_string(etl.find_in_header(ws,"VDC / Municipalities"))
    ret["ward"]=column_index_from_string(etl.find_in_header(ws,"Municipal Ward"))
    ret["act_type"]=column_index_from_string(etl.find_in_header(ws,"Action type"))
    ret["act_desc"]=column_index_from_string(etl.find_in_header(ws,"Action description"))
    ret["targeting"]=column_index_from_string(etl.find_in_header(ws,"Targeting"))
    ret["quantity"]=column_index_from_string(etl.find_in_header(ws,"# Items / # Man-hours / NPR"))
    ret["total_hh"]=column_index_from_string(etl.find_in_header(ws,"Total Number Households"))
    ret["avg_hh_cost"]=column_index_from_string(etl.find_in_header(ws,"Average cost per households (NPR)"))
    ret["fem_hh"]=column_index_from_string(etl.find_in_header(ws,"Female headed households"))
    ret["vuln_hh"]=column_index_from_string(etl.find_in_header(ws,"Vulnerable Caste / Ethnicity households"))
    ret["act_status"]=column_index_from_string(etl.find_in_header(ws,"Activity Status"))
    ret["start_dt"]=column_index_from_string(etl.find_in_header(ws,"Start date"))
    ret["comp_dt"]=column_index_from_string(etl.find_in_header(ws,"Completion Date"))
    ret["comments"]=column_index_from_string(etl.find_in_header(ws,"Additional Comments"))
    return ret

Exemple #48

0

Afficher le fichier

 def test_find_in_header(self):
     self.assertEqual(etl.find_in_header(ref, 'TESTINGCOL'), 'J')

Exemple #49

0

Afficher le fichier

Fichier : clean.py Projet : shelter-cluster-nepal-im/etl-extravaganza

def algo6(db,ref):
    #Column G: must be in Reference>District
    db_col_loc = etl.find_in_header(db, 'District')
    ref_col_loc = etl.find_in_header(ref,'Admin1_District')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)
    return db, ref, return_message('Districts not in the reference:' ,missing_names)