def main(): h1, c1 = u.opencsv( '/Users/amd243/Downloads/glad_aos_still_need_dates_boxes.csv') h2, c2 = u.opencsv('/Users/amd243/Desktop/glad_top_containers_created.csv') fileobject, csvoutfile = u.opencsvout( '/Users/amd243/Desktop/matched_top_containers.csv') match_ao_uris_w_tc_uris(c1, c2, fileobject, csvoutfile)
def __init__(self): self.cnfg = u.get_config(cfg='/Users/aliciadetelich/as_tools_config.yml') self.header_row, self.csvfile = u.opencsv(self.cnfg['input_csv']) self.rowcount = row_count = sum(1 for line in open(self.cnfg['input_csv']).readlines()) - 1 self.api_url = 'http://api.snaccooperative.org/' self.headers = {'Content-type': 'application/json','Accept': 'text/plain'} self.q = {'command': 'read','sameas': 'lcnaf_uri'}
def main(): #1: Get a list of distinct creators for each collection, write output to file #cfg_fp = input('Please enter path to config file: ') list_of_parent_ids = input('Please enter path to list of parent IDs: ') try: header_row, parent_id_list = u.opencsv(list_of_parent_ids) #need to do this to re-use the list parent_id_list = [row for row in parent_id_list] #set the configuration file here?? dbconn = dbssh.DBConn() print('Running queries') creator_data = aspace_run.run_db_queries(dbconn, parent_id_list, queries.get_distinct_creators) composition_data = aspace_run.run_db_queries(dbconn, parent_id_list, queries.get_music_data) outfile_path = input('Please enter path to outfile: ') fileobject, csvoutfile = u.opencsvout(outfile_path) write_outfile(creator_data, csvoutfile) fileobject.close() #2: Review manually and remediate any issues with agent records or duplicate agents to_continue = input( 'After reviewing file please enter CONTINUE to continue: ') if to_continue == 'CONTINUE': #3: Create subseries for each agent record, save new URI agent_data = u.opencsvdict(outfile_path) #do the config here - need to fix utilities again api_url, headers = u.login() print('Creating subseries') rows_w_uris = aspace_run.call_api(api_url, headers, agent_data, crud=c.create_data, json_data=jd.create_subseries) #but if I'm just going to put it in a list anyway?? I guess for other implementations it makes more sense? #Match new subseries URIs with all children combined_data = match_uris(composition_data, rows_w_uris) #5: Run data munging functions to get appropriate position enumerated_data = add_positions(combined_data) #NOW need to flatten this data as I did before... flattened_data = flatten_data(enumerated_data) #6: Use update ao position action to make change dirpath = u.setdirectory() aspace_run.call_api(api_url, headers, flattened_data, dirpath=dirpath, crud=c.update_parent) except Exception as exc: print('Error: ') print(traceback.format_exc()) finally: dbconn.close_conn()
def main(): mysql_instance = mysql.connector.connect(user='******', password='******', host='127.0.0.1', use_pure=True) db_cursor = mysql_instance.cursor() db_tables = database_tables() db_name = 'aspace_preservica_db' header_row, csvfile = u.opencsv( '/Users/aliciadetelich/Dropbox/git/mssa_digital_data_projects/database/db_data/tables_ingested/deliverable_unit_table_53615_2020-12-04-16-08_with_root.csv' ) try: #initialize_database(db_cursor, db_name, db_tables) use_database(db_cursor, db_name) drop_table(db_cursor, 'deliverable_unit') create_table(db_cursor, 'deliverable_unit') update_database(db_cursor, 'deliverable_unit', csvfile) except Exception: print(traceback.format_exc()) finally: mysql_instance.commit() db_cursor.close() mysql_instance.close()
# csv_row['tc_uri']: The URI of the top container. # csv_row['old_position']: The current position of the instance # csv_row['new_position']: The desired position of the instance. # # Returns: # dict: The JSON structure. # ''' # new_instance_list = [] # for position, instance in enumerate(record_json['instances']): # if instance['sub_container']['top_container']['ref'] == csv_row['tc_uri']: # # # record_json['instances'] = new_instance_list # return record_json h1, c1 = u.opencsv( '/Users/aliciadetelich/Desktop/glad_ao_multiple_instance_counts_out.csv') csvlist = [row for row in c1] case_dict = defaultdict(list) for row in csvlist: uri = row[0] tc_uri = row[2] tc_indicator = row[3] current_position = row[4] case_dict[uri].append([tc_uri, tc_indicator, current_position]) for key, value in case_dict.items(): for position, v in enumerate(value): value[position].append(position)
def row_count(self): '''Returns a count of rows in a CSV file, minus the header row''' return len([row for row in u.opencsv(self.input_csv)[1]])
def main(): header_row, csvfile = u.opencsv() #fileobject, csvoutfile = u.opencsvout() #csvoutfile.writerow(['identifier', 'ratio', 'name']) matcher(csvfile)
#/usr/bin/python3 from collections import Counter from tqdm import tqdm from utilities import utilities as u header_row, csvfile = u.opencsv() fileobject, csvoutfile = u.opencsvout() headers = header_row + ['count'] csvoutfile.writerow(headers) record_links = [row for row in csvfile] agent_uris = [row[2] for row in record_links] agent_uri_count = Counter(agent_uris) output = [ row + [agent_uri_count[row[2]]] for row in tqdm(record_links) if row[2] in agent_uri_count ] csvoutfile.writerows(output) fileobject.close()