예제 #1
0
def import_zip(zip_file):
    starttime = longtime()
    zf = zipfile.ZipFile(zip_file, 'r')
    print('Opened zip file: %s' % zip_file)
    try:
        savefolder = re.search('ew_download\/(.+?)\/data_raw', zip_file).group(
            1)  # Find the month the data relates to with regex
    except:
        print('>>> Path error, killing script <<<')
        quit(
        )  # If can't find a month then end the script and poke the user to investigate
    log_output_filename = []  # This captures all files saved for the log file
    for filename in cc_files:
        try:
            check_filename = filename + '.bcp'
            csv_filename = filename + '.csv'
            # check whether there is a file in the
            for i in zf.namelist():
                if i[-len(check_filename):] == check_filename:
                    bcp_filename = i

            bcpdata = zf.read(bcp_filename)
            bcpdata = bcpdata.decode('utf-8', errors="replace")
            lineterminator = '*@@*'
            delimiter = '@**@'
            quote = '"'
            newdelimiter = ','
            escapechar = '\\'
            newline = '\n'
            bcpdata = bcpdata.replace(escapechar, escapechar + escapechar)
            bcpdata = bcpdata.replace(quote, escapechar + quote)
            bcpdata = bcpdata.replace(delimiter, quote + newdelimiter + quote)
            bcpdata = bcpdata.replace(lineterminator, quote + newline + quote)
            bcpdata = quote + bcpdata + quote
            extractpath = to_file(bcpdata,
                                  savefolder,
                                  csvfilename=csv_filename,
                                  col_headers=cc_files[filename])

            outputfilename = extractpath + csv_filename
            log_output_filename.append(
                outputfilename)  # Grab the name of each file for the log
            print('Converted: %s' % bcp_filename)
        except KeyError:
            print('ERROR: Did not find %s in zip file' % bcp_filename)

    #Logfile
    finishtime = longtime()  # Get ending time
    scriptname = os.path.basename(
        __file__)  # Get the current scriptname as a variable
    scriptpath = (os.path.dirname(os.path.realpath(__file__))
                  )  # Get the absolute dir the script is in
    scriptdesc = 'Unzips the files downloaded by ew_download and writes them out as separate CSVs calling bcp.py to convert.'
    processedfiles = zip_file  # Get the input file details
    writtenfiles = log_output_filename  # Get list of created files
    gen_log(starttime, finishtime, scriptname, scriptpath, scriptdesc,
            processedfiles, writtenfiles)  # Pass info to log file generator
    reader = csv.reader(inCSVfile)

    with open(outputfilepath, 'w', newline='', encoding='utf-8') as outCSVfile:

        writer = csv.writer(outCSVfile)
        fieldnames = next(reader)
        writer.writerow(fieldnames + ['dereg_act', 'dereg_reason'])

        for row in reader:
            if row[12] != 'null':  # Column 13 'M' is dregistration reason
                # Reason is coded as a tuple of Act and text reason
                codetuple = reasoncoding(row[12])
                print(codetuple)
            else:
                codetuple = ['.', '.']
            writer.writerow(row + codetuple)

#Log generator
finishtime = longtime()  # Get ending time
scriptname = os.path.basename(
    __file__)  # Get the current scriptname as a variable
scriptpath = (os.path.dirname(os.path.realpath(__file__))
              )  # Get the absolute dir the script is in
scriptdesc = 'This script takes deregistration field (column M "Deregistrationreasons") from nz_Organisations_y0_m0_p0_integrity.csv, which are free text, and extracts the reason and which part of legislation this relates to as two new columns.'
processedfiles = inputfilepath  # Get the input file details
writtenfiles = outputfilepath  # Get list of created files
gen_log(log_starttime, finishtime, scriptname, scriptpath, scriptdesc,
        processedfiles, writtenfiles)  # Pass info to log file generator

print('\nAll done!')
예제 #3
0
    import nz_codederegistration
    print('nz_codederegistration.py - Successfully completed.')
    print('                                             ')
    print('---------------------------------------------')
    print('                                             ')

#Log generator
finishtime = longtime()  # Get ending time
scriptname = os.path.basename(
    __file__)  # Get the current scriptname as a variable
scriptpath = scriptpath  # Get the absolute dir the script is in
scriptdesc = 'This script downloads the data from the API and acts as a master file which can call the other scripts to process the data once it is downloaded. This logfile only relates to the data download, any other script called will generate its own logfile. The download also creates an additional "download_log_[date].csv" log file which contains more information on the download.'
processedfiles = None  # Get the input file details
if download_toggle == True:
    writtenfiles = writtenfiles
else:
    writtenfiles = None  # Get list of created files
settings_toggles = {
    'download_toggle': download_toggle,
    'regroup_toggle': regroup_toggle,
    'integrity_toggle': integrity_toggle,
    'codeareas_toggle': codeareas_toggle,
    'deregistration_toggle': deregistration_toggle,
    'downloadsleep': downloadsleep,
    'wranglesleep': wranglesleep
}
gen_log(log_starttime, finishtime, scriptname,
        scriptpath, scriptdesc, processedfiles, writtenfiles,
        str(settings_toggles))  # Pass info to log file generato

print('Everything all done!\n')