def download(url, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields): reqReq = [x.upper() for x in reqFields] dName = outPath iYear = reqReq[0] iMonth = reqReq[1] if reqReq[2] != 'ALL': reqs = reqReq[2:] # open url socket = openurl.openurl(url, logfile, errfile) raw_data = {} for j in col: raw_data[j] = [] # operate this csv file logfile.write(str(now.now()) + ' csv file loading\n') print('csv file loading------') df = pd.read_csv(socket, dtype='unicode') cList = df.columns.tolist() if reqReq[2] == 'ALL': reqs = cList[8:] # data reading logfile.write(str(now.now()) + ' data reading\n') print('data reading------') list0 = df.loc[:, col[0]].tolist() list1 = df.loc[:, col[1]].tolist() list2 = df.loc[:, col[2]].tolist() list3 = df.loc[:, col[3]].tolist() list4 = df.loc[:, col[4]].tolist() for req in reqs: if req not in cList: errfile.write(str(now.now()) + " Requested data " + str(req) + " don't match the csv file. Please check the file at: " + str(url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit("Requested data " + str(req) + " don't match the excel file. Please check the file at: " + url) valueList = df.loc[:, req].tolist() raw_data[col[0]] = raw_data[col[0]] + list0 raw_data[col[1]] = raw_data[col[1]] + list1 raw_data[col[2]] = raw_data[col[2]] + list2 raw_data[col[3]] = raw_data[col[3]] + list3 raw_data[col[4]] = raw_data[col[4]] + list4 raw_data[col[5]] = raw_data[col[5]] + [req.split('_')[0]] * len(valueList) raw_data[col[6]] = raw_data[col[6]] + [req.split('_')[1]] * len(valueList) raw_data[col[7]] = raw_data[col[7]] + valueList raw_data[col[8]] = [iYear] * len(raw_data[col[0]]) raw_data[col[9]] = [iMonth] * len(raw_data[col[0]]) logfile.write(str(now.now()) + ' data reading end\n') print('data reading end------') # save csv file dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields): yearReq = reqFields dName = outPath # open url socket = openurl.openurl(url, logfile, errfile) # operate this excel file logfile.write(str(now.now()) + ' excel file loading\n') print('excel file loading------') xd = pd.ExcelFile(socket) df = xd.parse(sheet) # indicator checking logfile.write(str(now.now()) + ' indicator checking\n') print('indicator checking------') for i in range(df.shape[0]): yearCol = [] for k in yearReq: kk = [] k_asked = "19 in " + k[2:] for j in range(df.shape[1]): if df.iloc[i, j] == k_asked: kk.append(j) restartIndex = i + 1 if len(kk) == 4: yearCol.append(kk[3]) if len(yearCol) == len(yearReq): break if len(yearCol) != len(yearReq): errfile.write(str(now.now()) + " Requested data " + str(yearReq).strip( '[]') + " don't match the excel file. Please check the file at: " + str(url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit("Requested data " + str(yearReq).strip( '[]') + " don't match the excel file. Please check the file at: " + url) raw_data = {} for j in col: raw_data[j] = [] # data reading logfile.write(str(now.now()) + ' data reading\n') print('data reading------') for i in range(restartIndex, df.shape[0]): if re.match(r'E\d{8}$', str(df.iloc[i, 0])): ii = 0 for j in range(len(yearCol)): raw_data[col[0]].append(df.iloc[i, 0]) raw_data[col[1]].append(df.iloc[i, 2]) raw_data[col[2]].append(yearReq[ii]) raw_data[col[3]].append(df.iloc[i, yearCol[ii]]) ii += 1 logfile.write(str(now.now()) + ' data reading end\n') print('data reading end------') # save csv file dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def getapi(date, colFields, logfile, errfile): url = "https://www.nomisweb.co.uk/api/v01/dataset/NM_18_1.data.csv?date=latest&select=date" minusDate = [] # open url socket = openurl.openurl(url, logfile, errfile) # load this csv file df = pd.read_csv(socket, dtype='unicode') # get the latest date latestDate = df.iloc[0, 0].split('-') logfile.write(str(now.now()) + ' get the latest date\n') print('get the latest date------') # get the required date string for i in date: if i.lower() == "latest": minusDate.append('latest') continue nYear = int(latestDate[0]) - int(i.split('-')[0]) nMonth = int(latestDate[1]) - int(i.split('-')[1]) if nYear == 0: nDate = nMonth if nDate == 0: minusDate.append('latest') else: minusDate.append('latestMINUS' + str(nDate)) else: nDate = nMonth + 12 * nYear minusDate.append('latestMINUS' + str(nDate)) minusDate = list(set(minusDate)) minusDate.sort(key=nkeys.natural_keys) dateString = ','.join(minusDate) # get the required selection string colFields = [x.lower() for x in colFields] colSelect = ','.join(colFields) # get the required API urlBase = 'https://www.nomisweb.co.uk/api/v01/dataset/' urlAPI = urlBase + 'NM_18_1.data.csv?' urlAPI += 'geography=1946157199...1946157245&' urlAPI += 'date=' + dateString + '&' urlAPI += 'age=MAKE|Aged%2016-24|1;2&duration=MAKE|Up%20to%206%20months|1...7,MAKE|Over%206%20months%20and%20up%20to%20a%20year|8;9,MAKE|Over%201%20year|10...16&sex=5,6&measures=20100,20206' urlAPI += '&select=' + colSelect return urlAPI
def download(url, outPath, keyCol, digitCheckCol, noDigitRemoveFields, logfile, errfile): dName = outPath # open url socket = openurl.openurl(url, logfile, errfile) # load this csv file logfile.write(str(now.now()) + ' csv file loading\n') print('csv file loading------') df = pd.read_csv(socket, dtype='unicode') col = df.columns.tolist() # save csv file dsave.save(df, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def download(url, reqInfo, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields): reqReq = reqInfo dName = outPath iYear = reqReq[0] iMonth = reqReq[1] iSex = reqReq[2] # open url socket = openurl.openurl(url, logfile, errfile) raw_data = {} for j in col: raw_data[j] = [] # operate this csv file logfile.write(str(now.now()) + ' csv file loading\n') print('csv file loading------') df = pd.read_csv(socket, dtype='unicode') for k in range(0, df.shape[1]): if re.match(r'E\d{8}$', str(df.iloc[0][k])): break if k == df.shape[1]: errfile.write(str(now.now()) + " Cannot find ecode in row " + str(2) + ". Please check the file at: " + str(url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit("Cannot find ecode in row " + str(2) + ". Please check the file at: " + url) logfile.write(str(now.now()) + ' data reading\n') print('data reading------') for i in range(0, df.shape[0], 2): if str(df.iloc[i][0]): eList = df.iloc[i, k:].dropna().tolist() raw_data[col[5]] = raw_data[col[5]] + eList raw_data[col[6]] = raw_data[col[6]] + df.iloc[i+1, k:].dropna().tolist() raw_data[col[0]] = raw_data[col[0]] + [df.iloc[i][0]] * len(eList) raw_data[col[1]] = raw_data[col[1]] + [df.iloc[i][1]] * len(eList) raw_data[col[2]] = [iYear] * len(raw_data[col[0]]) raw_data[col[3]] = [iMonth] * len(raw_data[col[0]]) raw_data[col[4]] = [iSex] * len(raw_data[col[0]]) logfile.write(str(now.now()) + ' data reading end\n') print('data reading end------') # save csv file dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def download(url, sheet, reqFields, outPath, keyCol, digitCheckCol, noDigitRemoveFields): col = reqFields dName = outPath # open url socket = openurl.openurl(url, logfile, errfile) # operate this excel file logfile.write(str(now.now()) + " excel file loading\n") print("excel file loading------") xd = pd.ExcelFile(socket) df = xd.parse(sheet) # data reading logfile.write(str(now.now()) + " data reading\n") print("data reading------") raw_data = df.loc[:, col] # save csv file dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields): schoolReq = reqFields if len(schoolReq) != 1: errfile.write( str(now.now()) + " Requested data " + str(schoolReq).strip("[]") + " don't match the excel file. This code is only for extracting data from filed 'State-funded primary, secondary and special schools (5)' with 'Percentage of persistent absentees (4)'. Please check the file at: " + str(url) + " . End progress\n" ) logfile.write(str(now.now()) + " error and end progress\n") sys.exit( "Requested data " + str(schoolReq).strip("[]") + " don't match the excel file. This code is only for extracting data from filed 'State-funded primary, secondary and special schools (5)' with 'Percentage of persistent absentees (4)'. Please check the file at: " + url ) dName = outPath # open url socket = openurl.openurl(url, logfile, errfile) # operate this excel file logfile.write(str(now.now()) + " excel file loading\n") print("excel file loading------") xd = pd.ExcelFile(socket) df = xd.parse(sheet) iYear = (df.iloc[2, 0].split(","))[0] # indicator checking logfile.write(str(now.now()) + " indicator checking\n") print("indicator checking------") for i in range(df.shape[0]): numCol = [] for k in schoolReq: k_asked = k for j in range(df.shape[1]): if str(k_asked) in str(df.iloc[i, j]): numCol.append(j) restartIndex = i + 1 if len(numCol) == len(schoolReq): break if len(numCol) != len(schoolReq): errfile.write( str(now.now()) + " Requested data " + str(schoolReq).strip("[]") + " don't match the excel file. Please check the file at: " + str(url) + " . End progress\n" ) logfile.write(str(now.now()) + " error and end progress\n") sys.exit( "Requested data " + str(schoolReq).strip("[]") + " don't match the excel file. Please check the file at: " + url ) numCol.append(df.shape[1]) for i in range(restartIndex, df.shape[0]): kk = [] k_asked = "Percentage of persistent absentees (4)" for k in range(len(numCol) - 1): for j in range(numCol[k], numCol[k + 1]): if df.iloc[i, j] == k_asked: kk.append(j) restartIndex = i + 1 break if len(kk) == len(schoolReq): break numCol.pop() if len(kk) != len(schoolReq): sys.exit( "Requested data " + str(schoolReq).strip("[]") + " in the field 'Percentage of persistent absentees (4)' don't match the excel file. Please check the file at: " + url ) raw_data = {} for j in col: raw_data[j] = [] # data reading logfile.write(str(now.now()) + " data reading\n") print("data reading------") for i in range(restartIndex, df.shape[0]): for k in kk: if re.match(r"E\d{8}$", str(df.iloc[i, 1])): raw_data[col[0]].append(df.iloc[i, 1]) raw_data[col[1]].append(df.iloc[i, 3]) raw_data[col[2]].append(iYear) raw_data[col[3]].append(df.iloc[i, k]) logfile.write(str(now.now()) + " data reading end\n") print("data reading end------") # save csv file dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def main(): print(bcolors.BLUE + "Running Meseek v1.0.0" + bcolors.ENDC) query = '' if (sys.argv[1] == '--custom' or sys.argv[1] == '-c'): i=2 while (i<len(sys.argv)): query = query + sys.argv[i] i+=1 else: i=1 command = [] while (i<len(sys.argv)): command.append(sys.argv[i]) i+=1 try: process = subprocess.run(command,stdout=subprocess.PIPE,stderr=subprocess.PIPE) except FileNotFoundError: query = FileNotFoundError if (query!=''): query = querycleaner(query) elif (process.returncode==0): query = str(process.stdout) query = querycleaner(query) print (bcolors.BLUE + "Command executed with no issues." + bcolors.ENDC) return else: query = str(process.stderr) query = querycleaner(query) fixlist = fetcher (query) logging.info('user queried: %s',query) decision = 1 fix_counter = 1 while (decision!=0): if (len(fixlist)==0): print(bcolors.BLUE + "No Fix available" + bcolors.ENDC) return print("==="+query) print(bcolors.BLUE + "Please select an option:") print("1. Open Fix (" + str(fix_counter) + '/' + str(len(fixlist)) + ')') print("2. Share Fix with Meseek") print("0. Exit" + bcolors.ENDC) decision = int(input()) if (decision==1): url = fixlist[fix_counter-1] openurl(url) fix_counter += 1 if (fix_counter==len(fixlist)+1): print(bcolors.BLUE + "Meseek ran out of solutions" + bcolors.ENDC) decision = 0 elif (decision==2): #Write code here logging.warning('User tried to give the solution') decision=0 else: print(bcolors.BLUE + "Not a valid option. Exiting." + bcolors.ENDC) logging.warning('User tried to use invalid option.') decision=0
tag = tagText.split('-')[1] sText = label.string s = sText.strip() data.append([tag, s]) return data def get_request(url, category, open_url): #category[0] - category_id return [open_url.get(url), category] if __name__ == '__main__': sql_conn, sql_cursor = db.open() open_url = openurl() if db.category_is_empty(sql_cursor) or defines.ALWAYS_REINIT_CATEGORY: print("open catalog..") db.clear_category(sql_cursor) #categories = parseCategories(open('test.html')) catalog = open_url.get(defines.CATALOG_HEAD_URL) with codecs.open("test.html", "w", encoding='utf8') as f: f.write(catalog) categories = parseCategories(open_url.get(defines.CATALOG_HEAD_URL)) for category in categories: db.set_category(sql_cursor, category) else: categories = db.get_categories(sql_cursor)
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields): yearReq = reqFields dName = outPath # open url socket = openurl.openurl(url, logfile, errfile) # operate this excel file logfile.write(str(now.now()) + ' excel file loading\n') print('excel file loading------') xd = pd.ExcelFile(socket) df = xd.parse(sheet) # indicator checking logfile.write(str(now.now()) + ' indicator checking\n') print('indicator checking------') for i in range(df.shape[0]): yearCol = [] for k in yearReq: k_asked = k for j in range(df.shape[1]): if str(k_asked) in str(df.iloc[i, j]): yearCol.append(j) restartIndex = i + 1 if len(yearCol) == len(yearReq): break if len(yearCol) != len(yearReq): errfile.write(str(now.now()) + " Requested data " + str(yearReq).strip( '[]') + " don't match the excel file. Please check the file at: " + str(url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit("Requested data " + str(yearReq).strip( '[]') + " don't match the excel file. Please check the file at: " + url) yearCol.append(df.shape[1]) for i in range(restartIndex, df.shape[0]): kk = [] k_asked = "All Apprenticeships" for k in range(len(yearCol)-1): for j in range(yearCol[k], yearCol[k+1]): if df.iloc[i, j] == k_asked: kk.append(j) restartIndex = i + 1 break if len(kk) == len(yearReq): break yearCol.pop() if len(kk) != len(yearReq): errfile.write(str(now.now()) + " Requested data " + str(yearReq).strip( '[]') + " in the field 'All Apprenticeships' don't match the excel file. Please check the file at: " + str(url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit("Requested data " + str(yearReq).strip( '[]') + " in the field 'All Apprenticeships' don't match the excel file. Please check the file at: " + url) raw_data = {} for j in col: raw_data[j] = [] # data reading logfile.write(str(now.now()) + ' data reading\n') print('data reading------') for i in range(restartIndex, df.shape[0]): ii = 0 for k in kk: if (pd.notnull(df.iloc[i, 1])) and (pd.notnull(df.iloc[i, k])) and (df.iloc[i, 1] != "Total"): ij = 0 for jj in ["Under 19", "19-24"]: raw_data[col[0]].append(df.iloc[i, 1]) raw_data[col[1]].append(yearReq[ii]) raw_data[col[2]].append(jj) raw_data[col[3]].append(df.iloc[i, k+ij]) ij += 1 ii += 1 logfile.write(str(now.now()) + ' data reading end\n') print('data reading end------') # save csv file dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields): yearReq = reqFields dName = outPath # open url socket = openurl.openurl(url, logfile, errfile) # operate this excel file logfile.write(str(now.now()) + ' excel file loading\n') print('excel file loading------') xd = pd.ExcelFile(socket) df = xd.parse(sheet) # indicator checking logfile.write(str(now.now()) + ' indicator checking\n') print('indicator checking------') for i in range(df.shape[0]): yearCol = [] for k in yearReq: k_asked = k for j in range(df.shape[1]): if str(k_asked) in str(df.iloc[i, j]): yearCol.append(j) restartIndex = i + 1 if len(yearCol) == len(yearReq): break if len(yearCol) != len(yearReq): errfile.write( str(now.now()) + " Requested data " + str(yearReq).strip('[]') + " don't match the excel file. Please check the file at: " + str(url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit("Requested data " + str(yearReq).strip('[]') + " don't match the excel file. Please check the file at: " + url) yearCol.append(df.shape[1]) for i in range(restartIndex, df.shape[0]): kk = [] k_asked = "All Apprenticeships" for k in range(len(yearCol) - 1): for j in range(yearCol[k], yearCol[k + 1]): if df.iloc[i, j] == k_asked: kk.append(j) restartIndex = i + 1 break if len(kk) == len(yearReq): break yearCol.pop() if len(kk) != len(yearReq): errfile.write( str(now.now()) + " Requested data " + str(yearReq).strip('[]') + " in the field 'All Apprenticeships' don't match the excel file. Please check the file at: " + str(url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit( "Requested data " + str(yearReq).strip('[]') + " in the field 'All Apprenticeships' don't match the excel file. Please check the file at: " + url) raw_data = {} for j in col: raw_data[j] = [] # data reading logfile.write(str(now.now()) + ' data reading\n') print('data reading------') for i in range(restartIndex, df.shape[0]): ii = 0 for k in kk: if (pd.notnull(df.iloc[i, 1])) and (pd.notnull( df.iloc[i, k])) and (df.iloc[i, 1] != "Total"): ij = 0 for jj in ["Under 19", "19-24"]: raw_data[col[0]].append(df.iloc[i, 1]) raw_data[col[1]].append(yearReq[ii]) raw_data[col[2]].append(jj) raw_data[col[3]].append(df.iloc[i, k + ij]) ij += 1 ii += 1 logfile.write(str(now.now()) + ' data reading end\n') print('data reading end------') # save csv file dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields): homeReq = reqFields if len(homeReq) != 1: errfile.write(str(now.now()) + " Requested data " + str(homeReq).strip( '[]') + " don't match the excel file. This code is only for extracting data from filed 'e1b1a'. Please check the file at: " + str( url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit("Requested data " + str(homeReq).strip( '[]') + " don't match the excel file. This code is only for extracting data from filed 'e1b1a'. Please check the file at: " + url) dName = outPath # open url socket = openurl.openurl(url, logfile, errfile) # operate this excel file logfile.write(str(now.now()) + ' excel file loading\n') print('excel file loading------') xd = pd.ExcelFile(socket) df = xd.parse(sheet) # find year and quarter listurl = (url.split('_')) iYQ = listurl[len(listurl) - 1] iYQ = (iYQ.split('.'))[0] iYear = iYQ[:4] iQuarter = str(int(int(iYQ[4:]) / 3)) # indicator checking logfile.write(str(now.now()) + ' indicator checking\n') print('indicator checking------') for i in range(df.shape[0]): numCol = [] for k in homeReq: for j in range(df.shape[1]): if df.iloc[i][j] == k: numCol.append(j) restartIndex = i + 1 if len(numCol) == len(homeReq): break if len(numCol) != len(homeReq): errfile.write(str(now.now()) + " Requested data " + str(homeReq).strip( '[]') + " don't match the excel file. Please check the file at: " + str(url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit("Requested data " + str(homeReq).strip( '[]') + " don't match the excel file. Please check the file at: " + url) raw_data = {} for j in col: raw_data[j] = [] # data reading logfile.write(str(now.now()) + ' data reading\n') print('data reading------') for i in range(restartIndex, df.shape[0]): for k in numCol: if re.match(r'E\d{8}$', str(df.index[i][0])): raw_data[col[0]].append(df.index[i][0]) raw_data[col[1]].append(df.index[i][1]) raw_data[col[2]].append(iYear) raw_data[col[3]].append(iQuarter) raw_data[col[4]].append(df.iloc[i][k]) logfile.write(str(now.now()) + ' data reading end\n') print('data reading end------') # save csv file dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)