Python opencsv Examples, utilities.utilities.opencsv Python Examples

Example #1

0

Show file

File: data_processing.py Project: yalemssa/aspace_tools

def main():
    h1, c1 = u.opencsv(
        '/Users/amd243/Downloads/glad_aos_still_need_dates_boxes.csv')
    h2, c2 = u.opencsv('/Users/amd243/Desktop/glad_top_containers_created.csv')
    fileobject, csvoutfile = u.opencsvout(
        '/Users/amd243/Desktop/matched_top_containers.csv')
    match_ao_uris_w_tc_uris(c1, c2, fileobject, csvoutfile)

Example #2

0

Show file

 def __init__(self):
     self.cnfg = u.get_config(cfg='/Users/aliciadetelich/as_tools_config.yml')
     self.header_row, self.csvfile = u.opencsv(self.cnfg['input_csv'])
     self.rowcount = row_count = sum(1 for line in open(self.cnfg['input_csv']).readlines()) - 1
     self.api_url = 'http://api.snaccooperative.org/'
     self.headers = {'Content-type': 'application/json','Accept': 'text/plain'}
     self.q = {'command': 'read','sameas': 'lcnaf_uri'}

Example #3

0

Show file

File: music_data.py Project: yalemssa/aspace_tools

def main():
    #1: Get a list of distinct creators for each collection, write output to file
    #cfg_fp = input('Please enter path to config file: ')
    list_of_parent_ids = input('Please enter path to list of parent IDs: ')
    try:
        header_row, parent_id_list = u.opencsv(list_of_parent_ids)
        #need to do this to re-use the list
        parent_id_list = [row for row in parent_id_list]
        #set the configuration file here??
        dbconn = dbssh.DBConn()
        print('Running queries')
        creator_data = aspace_run.run_db_queries(dbconn, parent_id_list,
                                                 queries.get_distinct_creators)
        composition_data = aspace_run.run_db_queries(dbconn, parent_id_list,
                                                     queries.get_music_data)
        outfile_path = input('Please enter path to outfile: ')
        fileobject, csvoutfile = u.opencsvout(outfile_path)
        write_outfile(creator_data, csvoutfile)
        fileobject.close()
        #2: Review manually and remediate any issues with agent records or duplicate agents
        to_continue = input(
            'After reviewing file please enter CONTINUE to continue: ')
        if to_continue == 'CONTINUE':
            #3: Create subseries for each agent record, save new URI
            agent_data = u.opencsvdict(outfile_path)
            #do the config here - need to fix utilities again
            api_url, headers = u.login()
            print('Creating subseries')
            rows_w_uris = aspace_run.call_api(api_url,
                                              headers,
                                              agent_data,
                                              crud=c.create_data,
                                              json_data=jd.create_subseries)
            #but if I'm just going to put it in a list anyway?? I guess for other implementations it makes more sense?
            #Match new subseries URIs with all children
            combined_data = match_uris(composition_data, rows_w_uris)
            #5: Run data munging functions to get appropriate position
            enumerated_data = add_positions(combined_data)
            #NOW need to flatten this data as I did before...
            flattened_data = flatten_data(enumerated_data)
            #6: Use update ao position action to make change
            dirpath = u.setdirectory()
            aspace_run.call_api(api_url,
                                headers,
                                flattened_data,
                                dirpath=dirpath,
                                crud=c.update_parent)
    except Exception as exc:
        print('Error: ')
        print(traceback.format_exc())
    finally:
        dbconn.close_conn()

Example #4

0

Show file

def main():
    mysql_instance = mysql.connector.connect(user='******',
                                             password='******',
                                             host='127.0.0.1',
                                             use_pure=True)
    db_cursor = mysql_instance.cursor()
    db_tables = database_tables()
    db_name = 'aspace_preservica_db'
    header_row, csvfile = u.opencsv(
        '/Users/aliciadetelich/Dropbox/git/mssa_digital_data_projects/database/db_data/tables_ingested/deliverable_unit_table_53615_2020-12-04-16-08_with_root.csv'
    )
    try:
        #initialize_database(db_cursor, db_name, db_tables)
        use_database(db_cursor, db_name)
        drop_table(db_cursor, 'deliverable_unit')
        create_table(db_cursor, 'deliverable_unit')
        update_database(db_cursor, 'deliverable_unit', csvfile)
    except Exception:
        print(traceback.format_exc())
    finally:
        mysql_instance.commit()
        db_cursor.close()
        mysql_instance.close()

Example #5

0

Show file

#         csv_row['tc_uri']: The URI of the top container.
#         csv_row['old_position']: The current position of the instance
#         csv_row['new_position']: The desired position of the instance.
#
#        Returns:
#         dict: The JSON structure.
#     '''
#         new_instance_list = []
#         for position, instance in enumerate(record_json['instances']):
#             if instance['sub_container']['top_container']['ref'] == csv_row['tc_uri']:
#
#
#         record_json['instances'] = new_instance_list
#         return record_json

h1, c1 = u.opencsv(
    '/Users/aliciadetelich/Desktop/glad_ao_multiple_instance_counts_out.csv')

csvlist = [row for row in c1]

case_dict = defaultdict(list)

for row in csvlist:
    uri = row[0]
    tc_uri = row[2]
    tc_indicator = row[3]
    current_position = row[4]
    case_dict[uri].append([tc_uri, tc_indicator, current_position])

for key, value in case_dict.items():
    for position, v in enumerate(value):
        value[position].append(position)

Example #6

0

Show file

File: wikidata.py Project: yalemssa/chit_archives_scripts

 def row_count(self):
     '''Returns a count of rows in a CSV file, minus the header row'''
     return len([row for row in u.opencsv(self.input_csv)[1]])

Example #7

0

Show file

File: deduplication.py Project: yalemssa/chit_archives_scripts

def main():
    header_row, csvfile = u.opencsv()
    #fileobject, csvoutfile = u.opencsvout()
    #csvoutfile.writerow(['identifier', 'ratio', 'name'])
    matcher(csvfile)

Example #8

0

Show file

#/usr/bin/python3

from collections import Counter
from tqdm import tqdm
from utilities import utilities as u

header_row, csvfile = u.opencsv()
fileobject, csvoutfile = u.opencsvout()

headers = header_row + ['count']

csvoutfile.writerow(headers)

record_links = [row for row in csvfile]
agent_uris = [row[2] for row in record_links]
agent_uri_count = Counter(agent_uris)

output = [
    row + [agent_uri_count[row[2]]] for row in tqdm(record_links)
    if row[2] in agent_uri_count
]

csvoutfile.writerows(output)

fileobject.close()