def create_clean_csv(csv_file_path): old_file_name = csv_file_path.split('/')[-1][:-4] with open(csv_file_path, 'r') as csv_file: csv_contents = csv.DictReader(csv_file) for index, row in enumerate(csv_contents): new_file_name = old_file_name + '_' + clean_data.monthly_or_quarterly(row) + '.csv' with open('d3_csv/' + new_file_name, 'w') as new_csv_file: columns = list(row.keys()) for index, column in enumerate(columns): if column == '': columns[index] = 'date' csv_writer = csv.DictWriter(new_csv_file, fieldnames=columns) if index == 0: print(new_file_name) csv_writer.writeheader() if row.get('') is not None: row['date'] = row[''] row.pop('') for key, value in row.items(): row[key] = '' for char in value: if char.isdigit() or char in ['.', ',']: row[key] += char csv_writer.writerow(row) csv_file.close()
def load_to_postgres(csv_file_path): with open(csv_file_path, 'r') as csv_file: csv_contents = csv.DictReader(csv_file) file_name = csv_file_path.split('/')[-1].replace('.csv', '') for index, row in enumerate(csv_contents): if index == 0: table_name = file_name + '_' + clean_data.monthly_or_quarterly( row) try: # probably a bad way to format, but it's a known data source pg_cursor.execute(""" DROP TABLE IF EXISTS {table_name}; CREATE TABLE {table_name} ( metric_date date not null, metric_values jsonb default '[]'::JSON, initial_release_date date not null, final_release_date date not null ); """.format(table_name=table_name)) except Exception as e: print(table_name + ' Error: ' + str(e)) populate_row(table_name=table_name, row=row) print('Loaded file ', file_name)
def create_clean_csv(csv_file_path): old_file_name = csv_file_path.split('/')[-1][:-4] with open(csv_file_path, 'r') as csv_file: csv_contents = csv.DictReader(csv_file) for index, row in enumerate(csv_contents): new_file_name = old_file_name + '_' + clean_data.monthly_or_quarterly( row) + '.csv' with open('d3_csv/' + new_file_name, 'w') as new_csv_file: columns = list(row.keys()) for index, column in enumerate(columns): if column == '': columns[index] = 'date' csv_writer = csv.DictWriter(new_csv_file, fieldnames=columns) if index == 0: print(new_file_name) csv_writer.writeheader() if row.get('') is not None: row['date'] = row[''] row.pop('') for key, value in row.items(): row[key] = '' for char in value: if char.isdigit() or char in ['.', ',']: row[key] += char csv_writer.writerow(row) csv_file.close()
def load_to_postgres(csv_file_path): with open(csv_file_path, 'r') as csv_file: csv_contents = csv.DictReader(csv_file) file_name = csv_file_path.split('/')[-1].replace('.csv', '') for index, row in enumerate(csv_contents): if index == 0: table_name = file_name + '_' + clean_data.monthly_or_quarterly( row) table_from_csv(table_name=table_name, column_names=row.keys()) populate_row(table_name=table_name, row=row) print('Loaded file ', file_name)
def load_to_postgres(csv_file_path): with open(csv_file_path, 'r') as csv_file: csv_contents = csv.DictReader(csv_file) file_name = csv_file_path.split('/')[-1].replace('.csv', '') for index, row in enumerate(csv_contents): if index == 0: table_name = file_name + '_' + clean_data.monthly_or_quarterly(row) table_from_csv( table_name=table_name, column_names=row.keys() ) populate_row( table_name=table_name, row=row ) print('Loaded file ', file_name)