def renameAndZipFile(file_name): try: call([ 'zip', 'data/zipped_csv_files/{0}.csv.zip'.format(file_name), 'data/piped_csv_files/{0}.csv'.format(file_name) ]) print('Successful zip on {0}'.format(file_name)) except Exception as e: le.logError(file_name, e) print('Failed zip on {0}'.format(file_name))
def renameAndZipFileMulti(file_name, num_files): try: for i in range(num_files): call([ 'zip', 'data/zipped_csv_files/{0}_{1}.csv.zip'.format(file_name, i), 'data/piped_csv_files/{0}_{1}.csv'.format(file_name, i) ]) print('Successful zip on {0}_{1}'.format(file_name, i)) except Exception as e: le.logError(file_name, e) print('Failed zip on {0}'.format(file_name))
def xlsxToCsv(infile_path, outfile_path, sheet_name): """ Transforms a single sheet from an xlsx document to csv format """ print("Transforming {} to {}".format(infile_path, outfile_path)) try: data_xls = pd.read_excel(infile_path, sheet_name, index_col=None) data_xls.to_csv(outfile_path, encoding='utf-8', index=False) print('Successful xlsx "{}" to csv "{}"'.format( infile_path, outfile_path)) except Exception as e: le.logError(infile_path, e) print('Failed xlsx "{}" to csv "{}"'.format(infile_path, outfile_path))
def addPipeDelimiter(file_name): try: with open('data/unpiped_csv_files/{0}.csv'.format(file_name), 'r', encoding="utf-8") as infile, open( 'data/piped_csv_files/{0}.csv'.format(file_name), 'w', encoding="utf-8") as outfile: reader = csv.reader(infile) writer = csv.writer(outfile, delimiter='|', quoting=csv.QUOTE_ALL) writer.writerows(reader) print('Successful add pipe on {0}'.format(file_name)) except Exception as e: le.logError(file_name, e) print('Failed add pipe on {0}'.format(file_name))
def xlsxToCsvMulti(infile_path, outfile_path, sheet_name, num_files): """ Transforms a single sheet from an xlsx document to csv format split between multiple files. For use when file size is a restriction. (Looking at you phpMyAdmin) """ print("Transforming {} to {} files at {}".format(infile_path, num_files, outfile_path)) try: data_xls = pd.read_excel(infile_path, sheet_name, index_col=None) data_sets = np.array_split(data_xls, num_files) for i in range(num_files): data_set = data_sets[i] data_set.to_csv('{}_{}'.format(i, outfile_path), encoding='utf-8', index=False) print('Successful xlsx to csv on {}_{}'.format(i, outfile_path)) except Exception as e: le.logError(file_name, e) print('Failed xlsx to csv on {}_{}'.format(i, outfile_path))
def multiCsvToSingle(infile_list, outfile_path): """ Takes in multiple csv files and combines them into one file """ try: # Load in data print("Loading data") df_list = [] for file_name in infile_list: df_list.append(pd.read_csv(file_name)) # Combine it print("Combining data") df = pd.concat(df_list) # Write it to file print("Writing to file") df.to_csv(outfile_path, encoding='utf-8', index=False) print("Success converting to single csv: {}".format(outfile_name)) except Exception as e: le.logError(file_list, e) print('Failed. Writing to logs.')
from connection import Database from pandas import read_excel, read_sql, DataFrame import os from config_db import Config import datetime from send_mail import SendMail from ftplib import FTP from log_error import logError import pymssql db = Database(Config) log = logError() class ProcessingFileANS: def downloadFile(self): try: ftp = FTP(Config.ftp_host) ftp.login(user=Config.ftp_user, passwd=Config.ftp_password) listing = [] ftp.retrlines("LIST", listing.append) if len(listing) != 0: #File existing to download words = listing[0].split(None, 8) filename = words[-1].lstrip() # download the file to folder repository local_filename = os.path.join(Config.ftp_folder, filename) lf = open(local_filename, "wb") ftp.retrbinary("RETR " + filename, lf.write, 8 * 1024) ftp.delete(filename) #delete file lf.close()