def download(url, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields):
    reqReq = [x.upper() for x in reqFields]
    dName = outPath

    iYear = reqReq[0]
    iMonth = reqReq[1]

    if reqReq[2] != 'ALL':
        reqs = reqReq[2:]

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    raw_data = {}
    for j in col:
        raw_data[j] = []

    # operate this csv file
    logfile.write(str(now.now()) + ' csv file loading\n')
    print('csv file loading------')
    df = pd.read_csv(socket, dtype='unicode')
    cList = df.columns.tolist()

    if reqReq[2] == 'ALL':
        reqs = cList[8:]

    # data reading
    logfile.write(str(now.now()) + ' data reading\n')
    print('data reading------')

    list0 = df.loc[:, col[0]].tolist()
    list1 = df.loc[:, col[1]].tolist()
    list2 = df.loc[:, col[2]].tolist()
    list3 = df.loc[:, col[3]].tolist()
    list4 = df.loc[:, col[4]].tolist()

    for req in reqs:
        if req not in cList:
            errfile.write(str(now.now()) + " Requested data " + str(req) + " don't match the csv file. Please check the file at: " + str(url) + " . End progress\n")
            logfile.write(str(now.now()) + ' error and end progress\n')
            sys.exit("Requested data " + str(req) + " don't match the excel file. Please check the file at: " + url)

        valueList = df.loc[:, req].tolist()

        raw_data[col[0]] = raw_data[col[0]] + list0
        raw_data[col[1]] = raw_data[col[1]] + list1
        raw_data[col[2]] = raw_data[col[2]] + list2
        raw_data[col[3]] = raw_data[col[3]] + list3
        raw_data[col[4]] = raw_data[col[4]] + list4
        raw_data[col[5]] = raw_data[col[5]] + [req.split('_')[0]] * len(valueList)
        raw_data[col[6]] = raw_data[col[6]] + [req.split('_')[1]] * len(valueList)
        raw_data[col[7]] = raw_data[col[7]] + valueList

    raw_data[col[8]] = [iYear] * len(raw_data[col[0]])
    raw_data[col[9]] = [iMonth] * len(raw_data[col[0]])
    logfile.write(str(now.now()) + ' data reading end\n')
    print('data reading end------')

    # save csv file
    dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields):
    yearReq = reqFields
    dName = outPath

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    # operate this excel file
    logfile.write(str(now.now()) + ' excel file loading\n')
    print('excel file loading------')
    xd = pd.ExcelFile(socket)
    df = xd.parse(sheet)

    # indicator checking
    logfile.write(str(now.now()) + ' indicator checking\n')
    print('indicator checking------')
    for i in range(df.shape[0]):
        yearCol = []
        for k in yearReq:
            kk = []
            k_asked = "19 in " + k[2:]
            for j in range(df.shape[1]):
                if df.iloc[i, j] == k_asked:
                    kk.append(j)
                    restartIndex = i + 1

            if len(kk) == 4:
                yearCol.append(kk[3])

        if len(yearCol) == len(yearReq):
            break

    if len(yearCol) != len(yearReq):
        errfile.write(str(now.now()) + " Requested data " + str(yearReq).strip(
            '[]') + " don't match the excel file. Please check the file at: " + str(url) + " . End progress\n")
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit("Requested data " + str(yearReq).strip(
            '[]') + " don't match the excel file. Please check the file at: " + url)

    raw_data = {}
    for j in col:
        raw_data[j] = []

    # data reading
    logfile.write(str(now.now()) + ' data reading\n')
    print('data reading------')
    for i in range(restartIndex, df.shape[0]):
        if re.match(r'E\d{8}$', str(df.iloc[i, 0])):
            ii = 0
            for j in range(len(yearCol)):
                raw_data[col[0]].append(df.iloc[i, 0])
                raw_data[col[1]].append(df.iloc[i, 2])
                raw_data[col[2]].append(yearReq[ii])
                raw_data[col[3]].append(df.iloc[i, yearCol[ii]])
                ii += 1
    logfile.write(str(now.now()) + ' data reading end\n')
    print('data reading end------')

    # save csv file
    dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
Esempio n. 3
0
def getapi(date, colFields, logfile, errfile):
    url = "https://www.nomisweb.co.uk/api/v01/dataset/NM_18_1.data.csv?date=latest&select=date"
    minusDate = []

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    # load this csv file
    df = pd.read_csv(socket, dtype='unicode')

    # get the latest date
    latestDate = df.iloc[0, 0].split('-')
    logfile.write(str(now.now()) + ' get the latest date\n')
    print('get the latest date------')

    # get the required date string
    for i in date:
        if i.lower() == "latest":
            minusDate.append('latest')
            continue

        nYear = int(latestDate[0]) - int(i.split('-')[0])
        nMonth = int(latestDate[1]) - int(i.split('-')[1])

        if nYear == 0:
            nDate = nMonth
            if nDate == 0:
                minusDate.append('latest')
            else:
                minusDate.append('latestMINUS' + str(nDate))
        else:
            nDate = nMonth + 12 * nYear
            minusDate.append('latestMINUS' + str(nDate))
    minusDate = list(set(minusDate))
    minusDate.sort(key=nkeys.natural_keys)
    dateString = ','.join(minusDate)

    # get the required selection string
    colFields = [x.lower() for x in colFields]
    colSelect = ','.join(colFields)

    # get the required API
    urlBase = 'https://www.nomisweb.co.uk/api/v01/dataset/'
    urlAPI = urlBase + 'NM_18_1.data.csv?'
    urlAPI += 'geography=1946157199...1946157245&'
    urlAPI += 'date=' + dateString + '&'
    urlAPI += 'age=MAKE|Aged%2016-24|1;2&duration=MAKE|Up%20to%206%20months|1...7,MAKE|Over%206%20months%20and%20up%20to%20a%20year|8;9,MAKE|Over%201%20year|10...16&sex=5,6&measures=20100,20206'
    urlAPI += '&select=' + colSelect

    return urlAPI
Esempio n. 4
0
def download(url, outPath, keyCol, digitCheckCol, noDigitRemoveFields, logfile, errfile):
    dName = outPath

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    # load this csv file
    logfile.write(str(now.now()) + ' csv file loading\n')
    print('csv file loading------')
    df = pd.read_csv(socket, dtype='unicode')
    col = df.columns.tolist()

    # save csv file
    dsave.save(df, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def download(url, reqInfo, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields):
    reqReq = reqInfo
    dName = outPath

    iYear = reqReq[0]
    iMonth = reqReq[1]
    iSex = reqReq[2]

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    raw_data = {}
    for j in col:
        raw_data[j] = []

    # operate this csv file
    logfile.write(str(now.now()) + ' csv file loading\n')
    print('csv file loading------')
    df = pd.read_csv(socket, dtype='unicode')

    for k in range(0, df.shape[1]):
        if re.match(r'E\d{8}$', str(df.iloc[0][k])):
            break

    if k == df.shape[1]:
        errfile.write(str(now.now()) + " Cannot find ecode in row " + str(2) + ". Please check the file at: " + str(url) + " . End progress\n")
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit("Cannot find ecode in row " + str(2) + ". Please check the file at: " + url)

    logfile.write(str(now.now()) + ' data reading\n')
    print('data reading------')
    for i in range(0, df.shape[0], 2):
        if str(df.iloc[i][0]):
            eList = df.iloc[i, k:].dropna().tolist()
            raw_data[col[5]] = raw_data[col[5]] + eList
            raw_data[col[6]] = raw_data[col[6]] + df.iloc[i+1, k:].dropna().tolist()
            raw_data[col[0]] = raw_data[col[0]] + [df.iloc[i][0]] * len(eList)
            raw_data[col[1]] = raw_data[col[1]] + [df.iloc[i][1]] * len(eList)

    raw_data[col[2]] = [iYear] * len(raw_data[col[0]])
    raw_data[col[3]] = [iMonth] * len(raw_data[col[0]])
    raw_data[col[4]] = [iSex] * len(raw_data[col[0]])
    logfile.write(str(now.now()) + ' data reading end\n')
    print('data reading end------')

    # save csv file
    dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def download(url, sheet, reqFields, outPath, keyCol, digitCheckCol, noDigitRemoveFields):
    col = reqFields
    dName = outPath

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    # operate this excel file
    logfile.write(str(now.now()) + " excel file loading\n")
    print("excel file loading------")
    xd = pd.ExcelFile(socket)
    df = xd.parse(sheet)

    # data reading
    logfile.write(str(now.now()) + " data reading\n")
    print("data reading------")
    raw_data = df.loc[:, col]

    # save csv file
    dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields):
    schoolReq = reqFields

    if len(schoolReq) != 1:
        errfile.write(
            str(now.now())
            + " Requested data "
            + str(schoolReq).strip("[]")
            + " don't match the excel file. This code is only for extracting data from filed 'State-funded primary, secondary and special schools (5)' with 'Percentage of persistent absentees (4)'. Please check the file at: "
            + str(url)
            + " . End progress\n"
        )
        logfile.write(str(now.now()) + " error and end progress\n")
        sys.exit(
            "Requested data "
            + str(schoolReq).strip("[]")
            + " don't match the excel file. This code is only for extracting data from filed 'State-funded primary, secondary and special schools (5)' with 'Percentage of persistent absentees (4)'. Please check the file at: "
            + url
        )

    dName = outPath

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    # operate this excel file
    logfile.write(str(now.now()) + " excel file loading\n")
    print("excel file loading------")
    xd = pd.ExcelFile(socket)
    df = xd.parse(sheet)

    iYear = (df.iloc[2, 0].split(","))[0]

    # indicator checking
    logfile.write(str(now.now()) + " indicator checking\n")
    print("indicator checking------")
    for i in range(df.shape[0]):
        numCol = []
        for k in schoolReq:
            k_asked = k
            for j in range(df.shape[1]):
                if str(k_asked) in str(df.iloc[i, j]):
                    numCol.append(j)
                    restartIndex = i + 1

        if len(numCol) == len(schoolReq):
            break

    if len(numCol) != len(schoolReq):
        errfile.write(
            str(now.now())
            + " Requested data "
            + str(schoolReq).strip("[]")
            + " don't match the excel file. Please check the file at: "
            + str(url)
            + " . End progress\n"
        )
        logfile.write(str(now.now()) + " error and end progress\n")
        sys.exit(
            "Requested data "
            + str(schoolReq).strip("[]")
            + " don't match the excel file. Please check the file at: "
            + url
        )

    numCol.append(df.shape[1])

    for i in range(restartIndex, df.shape[0]):
        kk = []
        k_asked = "Percentage of persistent absentees (4)"
        for k in range(len(numCol) - 1):
            for j in range(numCol[k], numCol[k + 1]):
                if df.iloc[i, j] == k_asked:
                    kk.append(j)
                    restartIndex = i + 1
                    break

        if len(kk) == len(schoolReq):
            break

    numCol.pop()

    if len(kk) != len(schoolReq):
        sys.exit(
            "Requested data "
            + str(schoolReq).strip("[]")
            + " in the field 'Percentage of persistent absentees (4)' don't match the excel file. Please check the file at: "
            + url
        )

    raw_data = {}
    for j in col:
        raw_data[j] = []

    # data reading
    logfile.write(str(now.now()) + " data reading\n")
    print("data reading------")
    for i in range(restartIndex, df.shape[0]):
        for k in kk:
            if re.match(r"E\d{8}$", str(df.iloc[i, 1])):
                raw_data[col[0]].append(df.iloc[i, 1])
                raw_data[col[1]].append(df.iloc[i, 3])
                raw_data[col[2]].append(iYear)
                raw_data[col[3]].append(df.iloc[i, k])
    logfile.write(str(now.now()) + " data reading end\n")
    print("data reading end------")

    # save csv file
    dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
Esempio n. 8
0
def main():
	print(bcolors.BLUE + "Running Meseek v1.0.0" + bcolors.ENDC)
	query = ''

	if (sys.argv[1] == '--custom' or sys.argv[1] == '-c'):
		i=2
		while (i<len(sys.argv)):
			query = query + sys.argv[i]
			i+=1
	else:
		i=1
		command = []
		while (i<len(sys.argv)):
			command.append(sys.argv[i])
			i+=1
		try:
			process = subprocess.run(command,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
		except FileNotFoundError:
			query = FileNotFoundError
		
		if (query!=''):
			query = querycleaner(query)
		elif (process.returncode==0):
			query = str(process.stdout)
			query = querycleaner(query)
			print (bcolors.BLUE + "Command executed with no issues." + bcolors.ENDC)
			return
		else:
			query = str(process.stderr)
			query = querycleaner(query)

	fixlist = fetcher (query)
	logging.info('user queried: %s',query)

	decision = 1
	fix_counter = 1
	
	while (decision!=0):
		if (len(fixlist)==0):
			print(bcolors.BLUE + "No Fix available" + bcolors.ENDC)
			return
		print("==="+query)
		print(bcolors.BLUE + "Please select an option:")
		print("1. Open Fix (" + str(fix_counter) + '/' + str(len(fixlist)) + ')')
		print("2. Share Fix with Meseek")
		print("0. Exit"  + bcolors.ENDC)
		decision = int(input())
		
		if (decision==1):
			url = fixlist[fix_counter-1]
			openurl(url)
			fix_counter += 1
			if (fix_counter==len(fixlist)+1):
				print(bcolors.BLUE + "Meseek ran out of solutions" + bcolors.ENDC)
				decision = 0
		
		elif (decision==2):
			#Write code here
			logging.warning('User tried to give the solution')
			decision=0

		else:
			print(bcolors.BLUE + "Not a valid option. Exiting." + bcolors.ENDC)
			logging.warning('User tried to use invalid option.')
			decision=0
Esempio n. 9
0
        tag = tagText.split('-')[1]
        sText = label.string
        s = sText.strip()
        data.append([tag, s])

    return data


def get_request(url, category, open_url):
    #category[0] - category_id
    return [open_url.get(url), category]


if __name__ == '__main__':
    sql_conn, sql_cursor = db.open()
    open_url = openurl()

    if db.category_is_empty(sql_cursor) or defines.ALWAYS_REINIT_CATEGORY:
        print("open catalog..")
        db.clear_category(sql_cursor)
        #categories = parseCategories(open('test.html'))
        catalog = open_url.get(defines.CATALOG_HEAD_URL)

        with codecs.open("test.html", "w", encoding='utf8') as f:
            f.write(catalog)

        categories = parseCategories(open_url.get(defines.CATALOG_HEAD_URL))
        for category in categories:
            db.set_category(sql_cursor, category)
    else:
        categories = db.get_categories(sql_cursor)
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields):
    yearReq = reqFields
    dName = outPath

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    # operate this excel file
    logfile.write(str(now.now()) + ' excel file loading\n')
    print('excel file loading------')
    xd = pd.ExcelFile(socket)
    df = xd.parse(sheet)

    # indicator checking
    logfile.write(str(now.now()) + ' indicator checking\n')
    print('indicator checking------')
    for i in range(df.shape[0]):
        yearCol = []
        for k in yearReq:
            k_asked = k
            for j in range(df.shape[1]):
                if str(k_asked) in str(df.iloc[i, j]):
                    yearCol.append(j)
                    restartIndex = i + 1

        if len(yearCol) == len(yearReq):
            break

    if len(yearCol) != len(yearReq):
        errfile.write(str(now.now()) + " Requested data " + str(yearReq).strip(
            '[]') + " don't match the excel file. Please check the file at: " + str(url) + " . End progress\n")
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit("Requested data " + str(yearReq).strip(
            '[]') + " don't match the excel file. Please check the file at: " + url)

    yearCol.append(df.shape[1])

    for i in range(restartIndex, df.shape[0]):
        kk = []
        k_asked = "All Apprenticeships"
        for k in range(len(yearCol)-1):
            for j in range(yearCol[k], yearCol[k+1]):
                if df.iloc[i, j] == k_asked:
                    kk.append(j)
                    restartIndex = i + 1
                    break

        if len(kk) == len(yearReq):
            break

    yearCol.pop()

    if len(kk) != len(yearReq):
        errfile.write(str(now.now()) + " Requested data " + str(yearReq).strip(
            '[]') + " in the field 'All Apprenticeships' don't match the excel file. Please check the file at: " + str(url) + " . End progress\n")
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit("Requested data " + str(yearReq).strip(
            '[]') + " in the field 'All Apprenticeships' don't match the excel file. Please check the file at: " + url)

    raw_data = {}
    for j in col:
        raw_data[j] = []

    # data reading
    logfile.write(str(now.now()) + ' data reading\n')
    print('data reading------')
    for i in range(restartIndex, df.shape[0]):
            ii = 0
            for k in kk:
                if (pd.notnull(df.iloc[i, 1])) and (pd.notnull(df.iloc[i, k])) and (df.iloc[i, 1] != "Total"):
                    ij = 0
                    for jj in ["Under 19", "19-24"]:
                        raw_data[col[0]].append(df.iloc[i, 1])
                        raw_data[col[1]].append(yearReq[ii])
                        raw_data[col[2]].append(jj)
                        raw_data[col[3]].append(df.iloc[i, k+ij])

                        ij += 1

                ii += 1
    logfile.write(str(now.now()) + ' data reading end\n')
    print('data reading end------')

    # save csv file
    dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol,
             noDigitRemoveFields):
    yearReq = reqFields
    dName = outPath

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    # operate this excel file
    logfile.write(str(now.now()) + ' excel file loading\n')
    print('excel file loading------')
    xd = pd.ExcelFile(socket)
    df = xd.parse(sheet)

    # indicator checking
    logfile.write(str(now.now()) + ' indicator checking\n')
    print('indicator checking------')
    for i in range(df.shape[0]):
        yearCol = []
        for k in yearReq:
            k_asked = k
            for j in range(df.shape[1]):
                if str(k_asked) in str(df.iloc[i, j]):
                    yearCol.append(j)
                    restartIndex = i + 1

        if len(yearCol) == len(yearReq):
            break

    if len(yearCol) != len(yearReq):
        errfile.write(
            str(now.now()) + " Requested data " + str(yearReq).strip('[]') +
            " don't match the excel file. Please check the file at: " +
            str(url) + " . End progress\n")
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit("Requested data " + str(yearReq).strip('[]') +
                 " don't match the excel file. Please check the file at: " +
                 url)

    yearCol.append(df.shape[1])

    for i in range(restartIndex, df.shape[0]):
        kk = []
        k_asked = "All Apprenticeships"
        for k in range(len(yearCol) - 1):
            for j in range(yearCol[k], yearCol[k + 1]):
                if df.iloc[i, j] == k_asked:
                    kk.append(j)
                    restartIndex = i + 1
                    break

        if len(kk) == len(yearReq):
            break

    yearCol.pop()

    if len(kk) != len(yearReq):
        errfile.write(
            str(now.now()) + " Requested data " + str(yearReq).strip('[]') +
            " in the field 'All Apprenticeships' don't match the excel file. Please check the file at: "
            + str(url) + " . End progress\n")
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit(
            "Requested data " + str(yearReq).strip('[]') +
            " in the field 'All Apprenticeships' don't match the excel file. Please check the file at: "
            + url)

    raw_data = {}
    for j in col:
        raw_data[j] = []

    # data reading
    logfile.write(str(now.now()) + ' data reading\n')
    print('data reading------')
    for i in range(restartIndex, df.shape[0]):
        ii = 0
        for k in kk:
            if (pd.notnull(df.iloc[i, 1])) and (pd.notnull(
                    df.iloc[i, k])) and (df.iloc[i, 1] != "Total"):
                ij = 0
                for jj in ["Under 19", "19-24"]:
                    raw_data[col[0]].append(df.iloc[i, 1])
                    raw_data[col[1]].append(yearReq[ii])
                    raw_data[col[2]].append(jj)
                    raw_data[col[3]].append(df.iloc[i, k + ij])

                    ij += 1

            ii += 1
    logfile.write(str(now.now()) + ' data reading end\n')
    print('data reading end------')

    # save csv file
    dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields,
               dName, logfile)
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields):
    homeReq = reqFields

    if len(homeReq) != 1:
        errfile.write(str(now.now()) + " Requested data " + str(homeReq).strip(
            '[]') + " don't match the excel file. This code is only for extracting data from filed 'e1b1a'. Please check the file at: " + str(
            url) + " . End progress\n")
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit("Requested data " + str(homeReq).strip(
            '[]') + " don't match the excel file. This code is only for extracting data from filed 'e1b1a'. Please check the file at: " + url)

    dName = outPath

    # open url
    socket = openurl.openurl(url, logfile, errfile)

    # operate this excel file
    logfile.write(str(now.now()) + ' excel file loading\n')
    print('excel file loading------')
    xd = pd.ExcelFile(socket)
    df = xd.parse(sheet)

    # find year and quarter
    listurl = (url.split('_'))
    iYQ = listurl[len(listurl) - 1]
    iYQ = (iYQ.split('.'))[0]
    iYear = iYQ[:4]
    iQuarter = str(int(int(iYQ[4:]) / 3))

    # indicator checking
    logfile.write(str(now.now()) + ' indicator checking\n')
    print('indicator checking------')
    for i in range(df.shape[0]):
        numCol = []
        for k in homeReq:
            for j in range(df.shape[1]):
                if df.iloc[i][j] == k:
                    numCol.append(j)
                    restartIndex = i + 1

        if len(numCol) == len(homeReq):
            break

    if len(numCol) != len(homeReq):
        errfile.write(str(now.now()) + " Requested data " + str(homeReq).strip(
            '[]') + " don't match the excel file. Please check the file at: " + str(url) + " . End progress\n")
        logfile.write(str(now.now()) + ' error and end progress\n')
        sys.exit("Requested data " + str(homeReq).strip(
            '[]') + " don't match the excel file. Please check the file at: " + url)

    raw_data = {}
    for j in col:
        raw_data[j] = []

    # data reading
    logfile.write(str(now.now()) + ' data reading\n')
    print('data reading------')
    for i in range(restartIndex, df.shape[0]):
        for k in numCol:
            if re.match(r'E\d{8}$', str(df.index[i][0])):
                raw_data[col[0]].append(df.index[i][0])
                raw_data[col[1]].append(df.index[i][1])
                raw_data[col[2]].append(iYear)
                raw_data[col[3]].append(iQuarter)
                raw_data[col[4]].append(df.iloc[i][k])
    logfile.write(str(now.now()) + ' data reading end\n')
    print('data reading end------')

    # save csv file
    dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)