Python CsvWriter.write Exemples, src.util.csv_writer.CsvWriter.write Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_csv_writer.py Projet : polofr/scrap-python-indian-gov

 def test_csv(self):
     file_path = 'test.csv'
     lines = [
         ['a', 'b', 'c'],
         ['1', '3', '2'],
         ['a', 'b', 'c'],
         [1, 2, 3]
     ]
     CsvWriter.write(file_path, lines)

Exemple #2

0

Afficher le fichier

Fichier : 1_bis_merge_all_reservation_files.py Projet : polofr/scrap-python-indian-gov

def main(argv):
    SamplingVillageIds.prepare_reservation()
    lines = []
    lines.append(['villageid', 'gender', 'caste'])

    for village_id, values in SamplingVillageIds.village_id_to_reservation.items(
    ):
        lines.append([village_id, values[0], values[1]])

    file_path = f'C:/Data_PoloFr/scrap-python-indian-gov/src/scripts_final_merge/csv_files_merged/Sampling.csv'
    CsvWriter.write(file_path, lines)

Exemple #3

0

Afficher le fichier

def main(argv):
    file_prefixes = [
        'Gram_Sevak_Survey_', 'Group_Survey_', 'Notable_Survey_',
        'Sarpanch_Survey_', 'Upa_Sarpanch_Survey_'
    ]
    for file_prefix in file_prefixes:
        file_suffixes = ['1', '2', '2_bis', '3', '4']
        for file_suffix in file_suffixes:
            file_path = f'C:/Data_PoloFr/scrap-python-indian-gov/src/scripts_final_merge/csv_files_corrected/{file_prefix}{file_suffix}.csv'
            if not os.path.isfile(file_path):
                raise Exception(f'{file_path} is not valid ')
            result_lines = []
            with open(file_path, 'r', encoding='utf-8') as original:
                lines = csv.reader(original, delimiter=',')
                skip_first = True
                for line in lines:
                    if skip_first is True:
                        skip_first = False
                        for idx, col in enumerate(line):
                            line[idx] = col.lower().replace('gps-', 'gps')
                    result_lines.append(line)
            CsvWriter.write(file_path, result_lines)

Exemple #4

0

Afficher le fichier

def main(argv):
    district_column_name = 'q6'
    villageid_column_name = 'villageid'
    villagename_column_name = 'q1'

    instanceid_set = {}

    files_with_ids = [
        'C:/Data_PoloFr/scrap-python-indian-gov/src/scripts_final_merge/with_village_id/Sarpanch_Group_merged.csv',
        'C:/Data_PoloFr/scrap-python-indian-gov/csv_files/ahmednagar/Sarpanch Survey_WIDE.csv',
        'C:/Data_PoloFr/scrap-python-indian-gov/csv_files/ahmednagar/Sarpanch Survey_WIDE (1).csv',
        'C:/Data_PoloFr/scrap-python-indian-gov/csv_files/Sarpanch_Survey_Merged_20210824.csv'
    ]
    for file_with_ids in files_with_ids:
        with open(file_with_ids, 'r', encoding='utf-8') as original:
            lines = csv.reader(original, delimiter=',')
            skip_first = True
            district_column_pos = None
            villageid_column_pos = None
            villagename_column_pos = None
            instanceid_column_pos = None
            for line in lines:
                if skip_first is True:
                    skip_first = False
                    district_column_pos = Helper.find_column_position(
                        line, district_column_name)
                    villageid_column_pos = Helper.find_column_position(
                        line, villageid_column_name)
                    villagename_column_pos = Helper.find_column_position(
                        line, villagename_column_name)
                    instanceid_column_pos = Helper.find_column_position(
                        line, 'instanceid')
                    continue
                instanceid = line[instanceid_column_pos]
                if not instanceid:
                    continue
                if not line[villageid_column_pos]:
                    # print(f'missing village id for {instanceid} inside {file_with_ids}')
                    continue
                if instanceid in instanceid_set:
                    # print(f'Duplicate {instanceid} inside Sarpanch_Group_merged.csv')
                    continue
                instanceid_set[instanceid] = {
                    'district': line[district_column_pos].split('.0')[0],
                    'villagename': line[villagename_column_pos],
                    'villageid': line[villageid_column_pos].split('.0')[0]
                }

    SamplingVillageIds.prepare()

    file_suffixes = ['1', '2', '2_bis', '3', '4']
    for file_suffix in file_suffixes:
        file_path = f'C:/Data_PoloFr/scrap-python-indian-gov/src/scripts_final_merge/csv_files/Sarpanch_Survey_{file_suffix}.csv'
        if not os.path.isfile(file_path):
            raise Exception(f'{file_path} is not valid ')
        try:
            result_lines = []
            with open(file_path, 'r', encoding='utf-8') as original:
                lines = csv.reader(original, delimiter=',')
                skip_first = True
                district_column_pos = None
                villageid_column_pos = None
                villagename_column_pos = None
                instanceid_column_pos = None
                for idx, line in enumerate(lines):
                    result_lines.append(line)
                    if skip_first is True:
                        skip_first = False
                        district_column_pos = Helper.find_column_position(
                            line, district_column_name)
                        villageid_column_pos = Helper.find_column_position(
                            line, villageid_column_name)
                        villagename_column_pos = Helper.find_column_position(
                            line, villagename_column_name)
                        instanceid_column_pos = Helper.find_column_position(
                            line, 'instanceid')
                        continue
                    villageid = line[villageid_column_pos]
                    if villageid:
                        continue
                    instanceid = line[instanceid_column_pos]
                    district = line[district_column_pos]
                    villagename = line[villagename_column_pos]

                    result = instanceid_set.get(instanceid)
                    if result is None:
                        print(
                            f'Could not find a village id at line {idx + 1} in Sarpanch_Survey_{file_suffix}.csv for {instanceid} {district} {villagename}'
                        )
                        SamplingVillageIds.find_best_match(
                            villagename, district)
                    else:
                        expected_result = {
                            'district': district,
                            'villagename': villagename,
                            'villageid': result['villageid']
                        }
                        if result != expected_result:
                            print(
                                f'Found a village id for {instanceid} in Sarpanch_Survey_{file_suffix}.csv but {json.dumps(result)} vs {json.dumps(expected_result)}'
                            )
                        result_lines[-1][villageid_column_pos] = result[
                            'villageid']
                        result_lines[-1][villageid_column_pos +
                                         1] = result['villageid']

                CsvWriter.write(
                    file_path.replace('csv_files', 'csv_files_corrected'),
                    result_lines)
        except Exception as exp:
            raise Exception(
                f'Failed for Sarpanch_Survey_{file_suffix}.csv : {str(exp)}')

Exemple #5

0

Afficher le fichier

Fichier : read_village_file.py Projet : polofr/scrap-python-indian-gov

def main(argv):
    file_path = f'C:/Data_PoloFr/scrap-python-indian-gov/results_wide/results_MAHARASHTRA_2020.csv'
    if not os.path.isfile(file_path):
        return
    all_villages = {
        'PUNE': {},
        'SOLAPUR': {}
    }
    with open(file_path, 'r') as original:
        lines = csv.reader(original, delimiter=',')
        skip_first = True
        for line in lines:
            if skip_first is True:
                skip_first = False
                continue
            district = line[3].strip().upper()
            if district != 'PUNE' and district != 'SOLAPUR':
                continue
            block_name = line[5].strip().upper()
            panchayat_name = line[7].strip().upper()
            panchayat_id = line[9].strip().upper()
            block_villages = all_villages[district].get(block_name)
            if block_villages is None:
                all_villages[district][block_name] = []
            all_villages[district][block_name].append({
                'name': panchayat_name,
                'id': panchayat_id,
                'line': line
            })

    file_path = f'C:/Data_PoloFr/scrap-python-indian-gov/villages/sarpanch.csv'
    if not os.path.isfile(file_path):
        return

    result_lines = []
    with open(file_path, 'r') as original:
        lines = csv.reader(original, delimiter=',')
        skip_first = True
        for line in lines:
            if skip_first is True:
                skip_first = False
                continue
            village_id = line[0]
            village_name = line[5].upper().replace('GRAMPANCHAYAT', '').replace('GRAMPANCHAYT', '')\
                .replace(', AKKALKOT', '').replace('(BHOINJE)', '').replace('SAPATNE(BHO)', 'SAPATNE (BHOSE)')\
                .replace('GRAMPANCHYAT', '').replace('GRAMPANACHAYAT', '').replace('GRAM PANCHAYT', '')\
                .replace('GRAMAPANCHAYAT', '').strip()

            block_name = line[7]
            if block_name == '1':
                block_name = 'MADHA'
            elif block_name == '2':
                block_name = 'AKKALKOT'
            elif block_name == '3':
                block_name = 'SOUTH SOLAPUR'
            elif block_name == '4':
                block_name = 'PANDHARPUR'
            elif block_name == '5':
                block_name = 'MOHOL'
            elif block_name == '6':
                block_name = 'BHOR'
            elif block_name == '7':
                block_name = 'BARAMATI'
            elif block_name == '8':
                block_name = 'DAUND'
            elif block_name == '9':
                block_name = 'MULSHI'
            elif block_name == '10':
                block_name = 'KHED'
            else:
                raise Exception(f'No block_name found for {line}')

            district_name = line[6]
            if district_name == '1':
                district_name = 'SOLAPUR'
                if block_name not in ['MADHA', 'AKKALKOT', 'SOUTH SOLAPUR', 'PANDHARPUR', 'MOHOL']:
                    print(f'District and block mistmatch for {line}')
                    continue
            elif district_name == '2':
                district_name = 'PUNE'
                if block_name not in ['BHOR', 'BARAMATI', 'DAUND', 'MULSHI', 'KHED']:
                    print(f'District and block mistmatch for {line}')
                    continue
            else:
                print(f'No district found for {line}')
                continue

            cmp_results = []
            for village in all_villages[district_name][block_name]:
                cmp_results.append({
                    'score': textdistance.hamming(village_name, village['name']),
                    'match': village['name'],
                    'id': village['id'],
                    'line': village['line']
                })
            cmp_results.sort(key=lambda v: v['score'])
            print(f'{district_name} - {block_name} - {village_name} vs {cmp_results[0]["match"]} = {cmp_results[0]["score"]}')

            line = cmp_results[0]['line']
            if cmp_results[0]['score'] > 10:
                for idx, cmp_result in enumerate(cmp_results[0:4]):
                    print('{:>2} {}'.format(cmp_result['score'], cmp_result['match']))
                print()
                selected_row = read_user_input() - 1
                if selected_row < 4:
                    line = cmp_results[selected_row]['line']
                elif selected_row == 4:
                    line = []
            new_line = [village_id, village_name] + line
            result_lines.append(new_line)

        new_file_path = f'C:/Data_PoloFr/scrap-python-indian-gov/villages/merge_sarpanch.csv'
        CsvWriter.write(new_file_path, result_lines)

Exemple #6

0

Afficher le fichier

def main(argv):
    file_path = f'C:/Data_PoloFr/scrap-python-indian-gov/results_wide/results_HARYANA_2020.csv'
    if not os.path.isfile(file_path):
        return
    all_villages = {}
    chosen_matches = {}
    with open(file_path, 'r') as original:
        lines = csv.reader(original, delimiter=',')
        skip_first = True
        for line in lines:
            if skip_first is True:
                skip_first = False
                continue
            district = line[3].strip().upper()
            block_name = line[5].strip().upper().replace(' (PART)', '')
            panchayat_name = line[7].strip().upper()
            panchayat_id = line[9].strip().upper()
            if all_villages.get(district) is None:
                all_villages[district] = {}
            block_villages = all_villages[district].get(block_name)
            if block_villages is None:
                all_villages[district][block_name] = []
            all_villages[district][block_name].append({
                'name': panchayat_name,
                'id': panchayat_id,
                'line': line
            })

    file_path = f'C:/Data_PoloFr/scrap-python-indian-gov/villages/Haryana_new_incomplete.csv'
    if not os.path.isfile(file_path):
        return

    result_lines = []
    with open(file_path, 'r') as original:
        lines = csv.reader(original, delimiter=',')
        skip_first = 1
        for line in lines:
            if skip_first > 0:
                skip_first -= 1
                continue
            village_id = line[0]
            village_name = line[1].upper().strip()
            district_name = line[12].upper().strip()
            block_name = line[13].replace(' 1', '-I').replace(' 2', '-II').replace('Bhattu', 'Bhattu Kalan')\
                .replace('Ballabhgarh', 'Ballabgarh').replace('Nissing', 'Nissing At Chirao')\
                .replace('Meham', 'Maham').replace('Lakhan', 'Lakhan Majra') \
                .replace('GHARAUNDA (PART)', 'GHARAUNDA')\
                .replace('Block Saha', 'Saha').replace('Block Naraingarh', 'Naraingarh')\
                .replace('Block Shahzadpur', 'Shahzadpur').replace('Block Barara', 'Barara')\
                .upper().strip().replace('BLOCK ', f'{district_name}-')
            if all_villages.get(district_name) is None:
                raise Exception(f'Invalid district {district_name} for {line}')
            if all_villages[district_name].get(block_name) is None:
                raise Exception(f'Invalid {block_name} for {line}')

            cmp_results = []
            for village in all_villages[district_name][block_name]:
                cmp_results.append({
                    'score':
                    textdistance.hamming(village_name, village['name']),
                    'match':
                    village['name'],
                    'id':
                    village['id'],
                    'line':
                    village['line']
                })
            cmp_results.sort(key=lambda v: v['score'])
            print(
                f'{district_name} - {block_name} - {village_name} vs {cmp_results[0]["match"]} = {cmp_results[0]["score"]}'
            )

            line = cmp_results[0]['line']
            if cmp_results[0]['score'] > 10:
                selected_row = chosen_matches.get(
                    f'{district_name} - {block_name} - {village_name}')
                if selected_row is None:
                    for idx, cmp_result in enumerate(cmp_results[0:4]):
                        print('{:>2} {}'.format(cmp_result['score'],
                                                cmp_result['match']))
                    print()
                    selected_row = read_user_input() - 1
                    chosen_matches[
                        f'{district_name} - {block_name} - {village_name}'] = selected_row
                if selected_row < 4:
                    line = cmp_results[selected_row]['line']
                elif selected_row == 4:
                    line = []
            new_line = [village_id, village_name] + line
            result_lines.append(new_line)

        new_file_path = f'C:/Data_PoloFr/scrap-python-indian-gov/villages/merge_sarpanch_haryana.csv'
        CsvWriter.write(new_file_path, result_lines)

Exemple #7

0

Afficher le fichier

def main(argv):
    village_set = {}
    village_id_to_names = {}
    village_id_to_gan_sevac_sex = {}
    village_id_to_reservation = {}

    file_path = f'C:/Data_PoloFr/scrap-python-indian-gov/csv_files/sampling/Sampling_PUNE.csv'
    if not os.path.isfile(file_path):
        raise Exception(f'Failed to find {file_path}')
    with open(file_path, 'r', encoding='utf-8') as original:
        lines = csv.reader(original, delimiter=',')
        skip_first = True
        for line in lines:
            if skip_first is True:
                skip_first = False
                continue
            set_reservation_for_pune(village_id_to_reservation, line[1], line[0])

    file_path = f'C:/Data_PoloFr/scrap-python-indian-gov/csv_files/Sarpanch_Survey_Merged_20210824.csv'
    if not os.path.isfile(file_path):
        raise Exception(f'Failed to find {file_path}')
    with open(file_path, 'r', encoding='utf-8') as original:
        lines = csv.reader(original, delimiter=',')
        skip_first = True
        pune_villages = []
        for line in lines:
            if skip_first is True:
                skip_first = False
                print(f'For Sarpanch survey, district is q6 =?= {line[22]}')
                print(f'For Sarpanch survey, villageid =?= {line[16]}')
                print(f'For Sarpanch survey, village name is q1 =?= {line[21]}')
                continue
            if line[22] == '2.0':
                pune_villages.append(line)
                set_village(village_set, line[16], 'sarpanch')
                set_village_name(village_id_to_names, line[16], 'sarpanch', line[21])

        print(f'Found {len(pune_villages)} in sarpanch survey for Pune district')

    print('\n\n')
    file_path = f'C:/Data_PoloFr/scrap-python-indian-gov/csv_files/Upa_Sarpanch_Survey_Merged_20210824.csv'
    if not os.path.isfile(file_path):
        raise Exception(f'Failed to find {file_path}')
    with open(file_path, 'r', encoding='utf-8') as original:
        lines = csv.reader(original, delimiter=',')
        skip_first = True
        pune_villages = []
        for line in lines:
            if skip_first is True:
                skip_first = False
                print(f'For Upa_Sarpanch, district is q6 =?= {line[22]}')
                print(f'For Upa_Sarpanch survey, villageid =?= {line[16]}')
                print(f'For Upa_Sarpanch survey, village name is q1 =?= {line[21]}')
                continue
            if line[22] == '2':
                pune_villages.append(line)
                set_village(village_set, line[16], 'upa-sarpanch')
                set_village_name(village_id_to_names, line[16], 'upa-sarpanch', line[21])
        print(f'Found {len(pune_villages)} in Upa_Sarpanch survey for Pune district')

    print('\n\n')
    file_path = f'C:/Data_PoloFr/scrap-python-indian-gov/csv_files/Notable_Survey_20201026.csv'
    if not os.path.isfile(file_path):
        raise Exception(f'Failed to find {file_path}')
    with open(file_path, 'r', encoding='utf-8') as original:
        lines = csv.reader(original, delimiter=',')
        skip_first = True
        pune_villages = []
        for line in lines:
            if skip_first is True:
                skip_first = False
                print(f'For Notable, assuming district is q6 =?= {line[21]}')
                print(f'For Notable survey, villageid =?= {line[16]}')
                print(f'For Notable survey, village name is q1 =?= {line[20]}')
                continue
            if line[21] == '2':
                pune_villages.append(line)
                set_village(village_set, line[16], 'notable')
                set_village_name(village_id_to_names, line[16], 'notable', line[20])
        print(f'Found {len(pune_villages)} in Notable survey for Pune district')

    print('\n\n')
    file_path = f'C:/Data_PoloFr/scrap-python-indian-gov/csv_files/Gram_Sevak_Survey_Merged_20210904.csv'
    if not os.path.isfile(file_path):
        raise Exception(f'Failed to find {file_path}')
    with open(file_path, 'r', encoding='utf-8') as original:
        lines = csv.reader(original, delimiter=',')
        skip_first = True
        pune_villages = []
        for line in lines:
            if skip_first is True:
                skip_first = False
                print(f'For Gram_Sevak, assuming district is q6 =?= {line[22]}')
                print(f'For Gram_Sevak survey, villageid =?= {line[16]}')
                print(f'For Gram_Sevak survey, village name is q1 =?= {line[21]}')
                print(f'For Gram_Sevak survey, sex is q15 =?= {line[50]}')
                continue
            if line[22] == '2':
                pune_villages.append(line)
                set_village(village_set, line[16], 'gram-sevak')
                set_village_name(village_id_to_names, line[16], 'gram-sevak', line[21])
                set_gran_sevac_gender(village_id_to_gan_sevac_sex, line[16], line[50])
        print(f'Found {len(pune_villages)} in Gram_Sevak survey for Pune district')

    print('\n\n')
    file_path = f'C:/Data_PoloFr/scrap-python-indian-gov/csv_files/Group_Survey_Merged_20210824.csv'
    if not os.path.isfile(file_path):
        raise Exception(f'Failed to find {file_path}')
    with open(file_path, 'r', encoding='utf-8') as original:
        lines = csv.reader(original, delimiter=',')
        skip_first = True
        pune_villages = []
        for line in lines:
            if skip_first is True:
                skip_first = False
                print(f'For Group, assuming district is q7 =?= {line[26]}')
                print(f'For Group survey, villageid =?= {line[16]}')
                print(f'For Group survey, village name is q5 =?= {line[25]}')
                continue
            if line[26] == '2.0':
                pune_villages.append(line)
                set_village(village_set, line[16], 'group')
                set_village_name(village_id_to_names, line[16], 'group', line[25])
        print(f'Found {len(pune_villages)} in Group survey for Pune district')

    print('\n\n')
    print('village ids')
    print(village_set.keys())
    print('village ids end')

    new_csv = []
    new_entry = ['village_id', 'reservation_sex', 'reservation_caste', 'gram_sevak_sex', 'village_name']
    new_csv.append(new_entry)

    villages_with_all = []
    for village_id, surveys in village_set.items():
        if surveys[0] == 0:
            continue
        if surveys[0] != 1:
            print(f'Weird more than one survey for sarpanch {village_id} {village_id_to_names[village_id]}')
            continue
        if surveys[1] == 0:
            continue
        if surveys[1] != 1:
            print(f'Weird more than one survey for upa-sarpanch {village_id} {village_id_to_names[village_id]}')
            continue
        if surveys[2] == 0:
            continue
        if surveys[2] != 1:
            print(f'Weird more than one survey for gram-sevak {village_id} {village_id_to_names[village_id]}')
            continue
        if surveys[3] == 0:
            continue
        if surveys[3] != 1:
            print(f'Weird more than one survey for group {village_id} {village_id_to_names[village_id]}')
            continue
        if surveys[4] < 4:
            continue
        villages_with_all.append(village_id)
        if village_id_to_reservation.get(village_id) is None:
            continue
        new_entry = [village_id, village_id_to_reservation[village_id][0], village_id_to_reservation[village_id][1], village_id_to_gan_sevac_sex[village_id], village_id_to_names[village_id][0]]
        new_csv.append(new_entry)

    print('villages with all surveys')
    print(villages_with_all)
    print('villages with all surveys end')

    new_file_path = 'C:/Data_PoloFr/scrap-python-indian-gov/csv_files/results/result_pune.csv'
    CsvWriter.write(new_file_path, new_csv)