def import_zip(zip_file): starttime = longtime() zf = zipfile.ZipFile(zip_file, 'r') print('Opened zip file: %s' % zip_file) try: savefolder = re.search('ew_download\/(.+?)\/data_raw', zip_file).group( 1) # Find the month the data relates to with regex except: print('>>> Path error, killing script <<<') quit( ) # If can't find a month then end the script and poke the user to investigate log_output_filename = [] # This captures all files saved for the log file for filename in cc_files: try: check_filename = filename + '.bcp' csv_filename = filename + '.csv' # check whether there is a file in the for i in zf.namelist(): if i[-len(check_filename):] == check_filename: bcp_filename = i bcpdata = zf.read(bcp_filename) bcpdata = bcpdata.decode('utf-8', errors="replace") lineterminator = '*@@*' delimiter = '@**@' quote = '"' newdelimiter = ',' escapechar = '\\' newline = '\n' bcpdata = bcpdata.replace(escapechar, escapechar + escapechar) bcpdata = bcpdata.replace(quote, escapechar + quote) bcpdata = bcpdata.replace(delimiter, quote + newdelimiter + quote) bcpdata = bcpdata.replace(lineterminator, quote + newline + quote) bcpdata = quote + bcpdata + quote extractpath = to_file(bcpdata, savefolder, csvfilename=csv_filename, col_headers=cc_files[filename]) outputfilename = extractpath + csv_filename log_output_filename.append( outputfilename) # Grab the name of each file for the log print('Converted: %s' % bcp_filename) except KeyError: print('ERROR: Did not find %s in zip file' % bcp_filename) #Logfile finishtime = longtime() # Get ending time scriptname = os.path.basename( __file__) # Get the current scriptname as a variable scriptpath = (os.path.dirname(os.path.realpath(__file__)) ) # Get the absolute dir the script is in scriptdesc = 'Unzips the files downloaded by ew_download and writes them out as separate CSVs calling bcp.py to convert.' processedfiles = zip_file # Get the input file details writtenfiles = log_output_filename # Get list of created files gen_log(starttime, finishtime, scriptname, scriptpath, scriptdesc, processedfiles, writtenfiles) # Pass info to log file generator
reader = csv.reader(inCSVfile) with open(outputfilepath, 'w', newline='', encoding='utf-8') as outCSVfile: writer = csv.writer(outCSVfile) fieldnames = next(reader) writer.writerow(fieldnames + ['dereg_act', 'dereg_reason']) for row in reader: if row[12] != 'null': # Column 13 'M' is dregistration reason # Reason is coded as a tuple of Act and text reason codetuple = reasoncoding(row[12]) print(codetuple) else: codetuple = ['.', '.'] writer.writerow(row + codetuple) #Log generator finishtime = longtime() # Get ending time scriptname = os.path.basename( __file__) # Get the current scriptname as a variable scriptpath = (os.path.dirname(os.path.realpath(__file__)) ) # Get the absolute dir the script is in scriptdesc = 'This script takes deregistration field (column M "Deregistrationreasons") from nz_Organisations_y0_m0_p0_integrity.csv, which are free text, and extracts the reason and which part of legislation this relates to as two new columns.' processedfiles = inputfilepath # Get the input file details writtenfiles = outputfilepath # Get list of created files gen_log(log_starttime, finishtime, scriptname, scriptpath, scriptdesc, processedfiles, writtenfiles) # Pass info to log file generator print('\nAll done!')
import nz_codederegistration print('nz_codederegistration.py - Successfully completed.') print(' ') print('---------------------------------------------') print(' ') #Log generator finishtime = longtime() # Get ending time scriptname = os.path.basename( __file__) # Get the current scriptname as a variable scriptpath = scriptpath # Get the absolute dir the script is in scriptdesc = 'This script downloads the data from the API and acts as a master file which can call the other scripts to process the data once it is downloaded. This logfile only relates to the data download, any other script called will generate its own logfile. The download also creates an additional "download_log_[date].csv" log file which contains more information on the download.' processedfiles = None # Get the input file details if download_toggle == True: writtenfiles = writtenfiles else: writtenfiles = None # Get list of created files settings_toggles = { 'download_toggle': download_toggle, 'regroup_toggle': regroup_toggle, 'integrity_toggle': integrity_toggle, 'codeareas_toggle': codeareas_toggle, 'deregistration_toggle': deregistration_toggle, 'downloadsleep': downloadsleep, 'wranglesleep': wranglesleep } gen_log(log_starttime, finishtime, scriptname, scriptpath, scriptdesc, processedfiles, writtenfiles, str(settings_toggles)) # Pass info to log file generato print('Everything all done!\n')