def handle(self, *args, **kwargs): if not hasattr(settings, 'SUPERFASTMATCH'): raise CommandError('You must configure SUPERFASTMATCH in your project settings.') if not hasattr(settings, 'DEFAULT_DOCTYPE'): raise CommandError('You must specify a DEFAULT_DOCTYPE in your project settings.') self.sfm = from_django_conf() sources = Source.objects.filter(source_type=2) for source in sources: try: if source.is_stale(): self.scrape_releases(source) source.last_retrieved = now() source.save() failures = SourceScrapeFailure.objects.filter(resolved__isnull=True, source=source) for f in failures: f.resolved = now() f.save() except SourceScrapeFailure as failure: failure.save() except Exception as e: failure = SourceScrapeFailure.objects.create(source=source, description=unicode(e))
def download(url, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields): reqReq = [x.upper() for x in reqFields] dName = outPath iYear = reqReq[0] iMonth = reqReq[1] if reqReq[2] != 'ALL': reqs = reqReq[2:] # open url socket = openurl.openurl(url, logfile, errfile) raw_data = {} for j in col: raw_data[j] = [] # operate this csv file logfile.write(str(now.now()) + ' csv file loading\n') print('csv file loading------') df = pd.read_csv(socket, dtype='unicode') cList = df.columns.tolist() if reqReq[2] == 'ALL': reqs = cList[8:] # data reading logfile.write(str(now.now()) + ' data reading\n') print('data reading------') list0 = df.loc[:, col[0]].tolist() list1 = df.loc[:, col[1]].tolist() list2 = df.loc[:, col[2]].tolist() list3 = df.loc[:, col[3]].tolist() list4 = df.loc[:, col[4]].tolist() for req in reqs: if req not in cList: errfile.write(str(now.now()) + " Requested data " + str(req) + " don't match the csv file. Please check the file at: " + str(url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit("Requested data " + str(req) + " don't match the excel file. Please check the file at: " + url) valueList = df.loc[:, req].tolist() raw_data[col[0]] = raw_data[col[0]] + list0 raw_data[col[1]] = raw_data[col[1]] + list1 raw_data[col[2]] = raw_data[col[2]] + list2 raw_data[col[3]] = raw_data[col[3]] + list3 raw_data[col[4]] = raw_data[col[4]] + list4 raw_data[col[5]] = raw_data[col[5]] + [req.split('_')[0]] * len(valueList) raw_data[col[6]] = raw_data[col[6]] + [req.split('_')[1]] * len(valueList) raw_data[col[7]] = raw_data[col[7]] + valueList raw_data[col[8]] = [iYear] * len(raw_data[col[0]]) raw_data[col[9]] = [iMonth] * len(raw_data[col[0]]) logfile.write(str(now.now()) + ' data reading end\n') print('data reading end------') # save csv file dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def standard(self): i = 0 num_cycles_left = 0 the_page = None while True: if i >= num_cycles_left: if now.now().strftime("%H") == "12" and now.now().minute < 20: the_page = page.LunchPage() else: the_page = pageFactory.get_loaded_random() try: signal = ThreadSignaller.queue.get_nowait() if isinstance(signal, ThreadSignaller.ShowPage): the_page = pageFactory.get_reloaded_page(signal.page_num) elif isinstance(signal, ThreadSignaller.ShowGreetingPage): the_page = get_greeting_page(signal.barcode) elif signal == ThreadSignaller.CleanExit: self.weather_thread.stop() sys.exit() elif signal == ThreadSignaller.InterruptStandardLoop: break except Queue.Empty: pass if the_page: num_cycles_left = self._get_cycles_left(the_page.duration_sec) i = 0 the_page.show() the_page = None if not the_page: i += 1 time.sleep(config.sleeping_time_ms / 1000.0)
def download(inPath, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields): dName = outPath genderArray = ["female", "male"] listinPath = inPath[0].split('/') iYear = listinPath[len(listinPath) - 1].split('-')[0] iPopID = "ONS-" + iYear + "-based-LAD-Subnational-Population-Projections" #iPopType = "Base" iPopdescription = "ONS projections (http://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationprojections/datasets/localauthoritiesinenglandtable2)" # operate this file raw_data = {} for j in col: raw_data[j] = [] for i in range(2): inFile = inPath[i] # load files logfile.write(str(now.now()) + ' ' + inFile + ' file loading\n') print(inFile + ' file loading------') df = pd.read_csv(inFile, dtype='unicode') csvcol = df.columns.tolist() yearcol = csvcol[3:] lenyearcol = len(yearcol) for j in range(lenyearcol): raw_data[col[2]] = raw_data[col[2]] + df.ix[:, 2].tolist() raw_data[col[4]] = raw_data[col[4]] + [yearcol[j]] * df.shape[0] raw_data[col[5]] = raw_data[col[5]] + df.ix[:, 0].tolist() raw_data[col[6]] = raw_data[col[6]] + df.ix[:, 1].tolist() raw_data[col[7]] = raw_data[col[7]] + df.ix[:, j + 3].tolist() raw_data[col[8]] = raw_data[col[8]] + [int(float(x))/int(float(df.ix[df.shape[0]-1, j + 3])) for x in df.ix[:, j + 3].tolist()] raw_data[col[0]] = raw_data[col[0]] + [iPopID] * df.shape[0] * lenyearcol #raw_data[col[1]] = raw_data[col[1]] + [iPopType] * df.shape[0] * lenyearcol raw_data[col[1]] = raw_data[col[1]] + [iPopdescription] * df.shape[0] * lenyearcol raw_data[col[3]] = raw_data[col[3]] + [genderArray[i]] * df.shape[0] * lenyearcol raw_data[col[7]] = [int(float(i) * 1000) for i in raw_data[col[7]]] raw_data[col[2]] = [i.replace("All ages", "All Ages") for i in raw_data[col[2]]] df1 = pd.DataFrame(raw_data) strings = df1.to_json(orient="records") jsonString = '[{"jsondata":' + strings + '}]' myJson = pd.read_json(jsonString) myJson.index = ['mydata'] # save to file myJson.to_json(path_or_buf=dName, orient="index") logfile.write(str(now.now()) + ' has been extracted and saved as ' + str(dName) + '\n') print('Requested data has been extracted and saved as ' + dName) logfile.write(str(now.now()) + ' finished\n') print("finished")
def random(self): import now from random import choice if now.now().hour < 12: return choice(self.greetings + self.morning) if now.now().hour > 17: return choice(self.greetings + self.evening) return choice(self.greetings)
def get_greeting_page(barcode): namefile_path = "/home/pi/cards/" + barcode extra = "" from page import greetings if isfile(namefile_path): (name, house, twitter) = points.get_name_house(namefile_path) if not house: extra = """Error finding your house. Please report to Scroggs.""" if twitter is None: deets = "" else: deets = greetings.random() + " @"+twitter+"! " time = now.now().strftime("%H") name_file = points.read_name_file(namefile_path) if points.should_add_morning_points(time, house, name_file, barcode): points_added = points.add_morning_points(time, house, barcode, deets) extra = str(points_added) + " points to " + house + "!" name_page = page.NamePage(name, extra=extra) else: name_page = page.NamePage(barcode, large=False) return name_page
def download(url, reqInfo, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields): reqReq = reqInfo dName = outPath iYear = reqReq[0] iMonth = reqReq[1] iSex = reqReq[2] # open url socket = openurl.openurl(url, logfile, errfile) raw_data = {} for j in col: raw_data[j] = [] # operate this csv file logfile.write(str(now.now()) + ' csv file loading\n') print('csv file loading------') df = pd.read_csv(socket, dtype='unicode') for k in range(0, df.shape[1]): if re.match(r'E\d{8}$', str(df.iloc[0][k])): break if k == df.shape[1]: errfile.write(str(now.now()) + " Cannot find ecode in row " + str(2) + ". Please check the file at: " + str(url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit("Cannot find ecode in row " + str(2) + ". Please check the file at: " + url) logfile.write(str(now.now()) + ' data reading\n') print('data reading------') for i in range(0, df.shape[0], 2): if str(df.iloc[i][0]): eList = df.iloc[i, k:].dropna().tolist() raw_data[col[5]] = raw_data[col[5]] + eList raw_data[col[6]] = raw_data[col[6]] + df.iloc[i+1, k:].dropna().tolist() raw_data[col[0]] = raw_data[col[0]] + [df.iloc[i][0]] * len(eList) raw_data[col[1]] = raw_data[col[1]] + [df.iloc[i][1]] * len(eList) raw_data[col[2]] = [iYear] * len(raw_data[col[0]]) raw_data[col[3]] = [iMonth] * len(raw_data[col[0]]) raw_data[col[4]] = [iSex] * len(raw_data[col[0]]) logfile.write(str(now.now()) + ' data reading end\n') print('data reading end------') # save csv file dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def is_stale(self, seconds=None): seconds = seconds or settings.SCRAPE_PERIOD if self.last_retrieved is None: return True since_last = now() - self.last_retrieved if since_last.total_seconds() > seconds: return True return False
def __init__(self): super(LunchPage, self).__init__("???") self.name = "Lunch" self.content = colour_print(printer.text_to_ascii("Lunchtime!")) if now.now().strftime("%a")=="Fri": self.content += "\n" self.content += colour_print(printer.text_to_ascii("It's Fancy Friday!"),colours.Background.RED) self.loaded = True
def download(url, sheet, reqFields, outPath, keyCol, digitCheckCol, noDigitRemoveFields): col = reqFields dName = outPath # open url socket = openurl.openurl(url, logfile, errfile) # operate this excel file logfile.write(str(now.now()) + " excel file loading\n") print("excel file loading------") xd = pd.ExcelFile(socket) df = xd.parse(sheet) # data reading logfile.write(str(now.now()) + " data reading\n") print("data reading------") raw_data = df.loc[:, col] # save csv file dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields): yearReq = reqFields dName = outPath # open url socket = openurl.openurl(url, logfile, errfile) # operate this excel file logfile.write(str(now.now()) + ' excel file loading\n') print('excel file loading------') xd = pd.ExcelFile(socket) df = xd.parse(sheet) # indicator checking logfile.write(str(now.now()) + ' indicator checking\n') print('indicator checking------') for i in range(df.shape[0]): yearCol = [] for k in yearReq: kk = [] k_asked = "19 in " + k[2:] for j in range(df.shape[1]): if df.iloc[i, j] == k_asked: kk.append(j) restartIndex = i + 1 if len(kk) == 4: yearCol.append(kk[3]) if len(yearCol) == len(yearReq): break if len(yearCol) != len(yearReq): errfile.write(str(now.now()) + " Requested data " + str(yearReq).strip( '[]') + " don't match the excel file. Please check the file at: " + str(url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit("Requested data " + str(yearReq).strip( '[]') + " don't match the excel file. Please check the file at: " + url) raw_data = {} for j in col: raw_data[j] = [] # data reading logfile.write(str(now.now()) + ' data reading\n') print('data reading------') for i in range(restartIndex, df.shape[0]): if re.match(r'E\d{8}$', str(df.iloc[i, 0])): ii = 0 for j in range(len(yearCol)): raw_data[col[0]].append(df.iloc[i, 0]) raw_data[col[1]].append(df.iloc[i, 2]) raw_data[col[2]].append(yearReq[ii]) raw_data[col[3]].append(df.iloc[i, yearCol[ii]]) ii += 1 logfile.write(str(now.now()) + ' data reading end\n') print('data reading end------') # save csv file dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def nodigit(data, check_field, remove_field, logfile): if len(check_field) > 0: # check the no digit data logfile.write(str(now.now()) + ' check the no digit data\n') print('check the no digit data') inrow = dvalid.checkdigit(data, check_field, logfile) # drop the no digit data remove_inrow = [] for i in remove_field: remove_inrow += inrow[i] remove_inrow = list(set(remove_inrow)) if len(remove_inrow) > 0: data = data.drop(data.index[remove_inrow]) droppedRow = ','.join(str(x+2) for x in remove_inrow) print('------------------------------------') logfile.write(str(now.now()) + ' the dropped rows are: row ' + droppedRow + '\n') print('the dropped rows are: row ' + droppedRow) return data
def getapi(date, colFields, logfile, errfile): url = "https://www.nomisweb.co.uk/api/v01/dataset/NM_18_1.data.csv?date=latest&select=date" minusDate = [] # open url socket = openurl.openurl(url, logfile, errfile) # load this csv file df = pd.read_csv(socket, dtype='unicode') # get the latest date latestDate = df.iloc[0, 0].split('-') logfile.write(str(now.now()) + ' get the latest date\n') print('get the latest date------') # get the required date string for i in date: if i.lower() == "latest": minusDate.append('latest') continue nYear = int(latestDate[0]) - int(i.split('-')[0]) nMonth = int(latestDate[1]) - int(i.split('-')[1]) if nYear == 0: nDate = nMonth if nDate == 0: minusDate.append('latest') else: minusDate.append('latestMINUS' + str(nDate)) else: nDate = nMonth + 12 * nYear minusDate.append('latestMINUS' + str(nDate)) minusDate = list(set(minusDate)) minusDate.sort(key=nkeys.natural_keys) dateString = ','.join(minusDate) # get the required selection string colFields = [x.lower() for x in colFields] colSelect = ','.join(colFields) # get the required API urlBase = 'https://www.nomisweb.co.uk/api/v01/dataset/' urlAPI = urlBase + 'NM_18_1.data.csv?' urlAPI += 'geography=1946157199...1946157245&' urlAPI += 'date=' + dateString + '&' urlAPI += 'age=MAKE|Aged%2016-24|1;2&duration=MAKE|Up%20to%206%20months|1...7,MAKE|Over%206%20months%20and%20up%20to%20a%20year|8;9,MAKE|Over%201%20year|10...16&sex=5,6&measures=20100,20206' urlAPI += '&select=' + colSelect return urlAPI
def download(url, outPath, keyCol, digitCheckCol, noDigitRemoveFields, logfile, errfile): dName = outPath # open url socket = openurl.openurl(url, logfile, errfile) # load this csv file logfile.write(str(now.now()) + ' csv file loading\n') print('csv file loading------') df = pd.read_csv(socket, dtype='unicode') col = df.columns.tolist() # save csv file dsave.save(df, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def checkdigit(data, field, logfile): inrow = {} for j in field: inrow[j] = [] for j in field: for i in data.index.tolist(): if str(data[j][i]).strip().isdigit() != True: cData = str(data[j][i]).strip().split('.') if not ((len(cData) == 2)and(cData[0].isdigit())and(cData[1].isdigit())): inrow[j].append(i) print('------------------------------------') logfile.write(str(now.now()) + ' the value is not a digit number at: row ' + str(i+2) + ', col ' + j + '\n') print('the value is not a digit number at: row ' + str(i+2) + ', col ' + j) return inrow
def all_url(kind, rid): if kind == "bilibili": return bilibili(rid) if kind == "chushou": return chushou(rid) if kind == "douyin": return douyin(rid) if kind == "douyu": return douyu(rid) if kind == "egame": return egame(rid) if kind == "huajiao": return huajiao(rid) if kind == "huomao": return huomao(rid) if kind == "huya": return huya(rid) if kind == "iqiyi": return iqiyi(rid) if kind == "kuaishou": return kuaishou(rid) if kind == "kugou": return kugou(rid) if kind == "longzhu": return longzhu(rid) if kind == "now": return now(rid) if kind == "pps": return pps(rid) if kind == "v6cn": return v6cn(rid) if kind == "wangyicc": return wangyicc(rid) if kind == "xigua": return xigua(rid) if kind == "yingke": return yingke(rid) if kind == "yizhibo": return yizhibo(rid) if kind == "yy": return yy(rid) if kind == "zhanqi": return zhanqi(rid)
def scrape_releases(self, source): feed = source.fetch_feed() for entry in feed['entries']: link = entry.get('link') if link.lower()[-4:] == ".pdf": logging.warn("Skipping PDF link: {0}".format(link)) continue title = entry.get('title') date = dateutil.parser.parse(entry.get('published') or entry.get('updated') or entry.get('a10:updated') or now()) source_name = source.organization body = get_link_content(link) (release, created) = Release.objects.get_or_create(url=link, title=title, date=date, body=body, source=source) if body is None or len(body.strip()) == 0: continue try: result = self.sfm.add(doctype=source.doc_type or settings.DEFAULT_DOCTYPE, docid=release.id, text=body, defer=True, source=source_name, date=date, title=title, put=False) except superfastmatch.SuperFastMatchError as e: raise SourceScrapeFailure(source=source, description=unicode(e)) if result['success'] != True: msg = 'Superfastmatch failure: {0}'.format(result.get('error', '')) raise SourceScrapeFailure(source, description=msg)
def save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile): # write csv file logfile.write(str(now.now()) + ' writing to file\n') print('writing to file ' + dName) df = pd.DataFrame(raw_data, columns=col) df.columns = [x.title() for x in col] col = df.columns.tolist() # clean data--remove spaces logfile.write(str(now.now()) + ' data cleaning\n') print('data cleaning------') df = dclean.stripcsv(df, col) # remove the cell with no digit data check_field = [x.title() for x in digitCheckCol] remove_field = [x.title() for x in noDigitRemoveFields] df = dclean.nodigit(df, check_field, remove_field, logfile) # delete the duplicate data logfile.write(str(now.now()) + ' check and delete the duplicate data\n') print('check and delete the duplicate data------') df = df.drop_duplicates(col, take_last=True) # create primary key by md5 for each row if len(keyCol) != 0: logfile.write(str(now.now()) + ' create primary key\n') print('create primary key------') col += ['pkey'] keyCol = [x.title() for x in keyCol] df[col[-1]] = fpkey.fpkey(df, keyCol) logfile.write(str(now.now()) + ' create primary key end\n') print('create primary key end------') else: print('no primary key------') # save to file df.to_csv(dName, index=False) logfile.write(str(now.now()) + ' has been extracted and saved as ' + str(dName) + '\n') print('Requested data has been extracted and saved as ' + dName) logfile.write(str(now.now()) + ' finished\n') print("finished")
def openurl(url, logfile, errfile): try: socket = urllib.request.urlopen(url) logfile.write(str(now.now()) + ' open url\n') print('open url------') except urllib.error.HTTPError as e: errfile.write(str(now.now()) + ' file download HTTPError is ' + str(e.code) + ' . End progress\n') logfile.write(str(now.now()) + ' error and end progress\n') sys.exit('file download HTTPError = ' + str(e.code)) except urllib.error.URLError as e: errfile.write(str(now.now()) + ' file download URLError is ' + str(e.args) + ' . End progress\n') logfile.write(str(now.now()) + ' error and end progress\n') sys.exit('file download URLError = ' + str(e.args)) except Exception: print('file download error') import traceback errfile.write(str(now.now()) + ' generic exception: ' + str(traceback.format_exc()) + ' . End progress\n') logfile.write(str(now.now()) + ' error and end progress\n') sys.exit('generic exception: ' + traceback.format_exc()) return socket
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields): yearReq = reqFields dName = outPath # open url socket = openurl.openurl(url, logfile, errfile) # operate this excel file logfile.write(str(now.now()) + ' excel file loading\n') print('excel file loading------') xd = pd.ExcelFile(socket) df = xd.parse(sheet) # indicator checking logfile.write(str(now.now()) + ' indicator checking\n') print('indicator checking------') for i in range(df.shape[0]): yearCol = [] for k in yearReq: k_asked = k for j in range(df.shape[1]): if str(k_asked) in str(df.iloc[i, j]): yearCol.append(j) restartIndex = i + 1 if len(yearCol) == len(yearReq): break if len(yearCol) != len(yearReq): errfile.write( str(now.now()) + " Requested data " + str(yearReq).strip('[]') + " don't match the excel file. Please check the file at: " + str(url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit("Requested data " + str(yearReq).strip('[]') + " don't match the excel file. Please check the file at: " + url) yearCol.append(df.shape[1]) for i in range(restartIndex, df.shape[0]): kk = [] k_asked = "All Apprenticeships" for k in range(len(yearCol) - 1): for j in range(yearCol[k], yearCol[k + 1]): if df.iloc[i, j] == k_asked: kk.append(j) restartIndex = i + 1 break if len(kk) == len(yearReq): break yearCol.pop() if len(kk) != len(yearReq): errfile.write( str(now.now()) + " Requested data " + str(yearReq).strip('[]') + " in the field 'All Apprenticeships' don't match the excel file. Please check the file at: " + str(url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit( "Requested data " + str(yearReq).strip('[]') + " in the field 'All Apprenticeships' don't match the excel file. Please check the file at: " + url) raw_data = {} for j in col: raw_data[j] = [] # data reading logfile.write(str(now.now()) + ' data reading\n') print('data reading------') for i in range(restartIndex, df.shape[0]): ii = 0 for k in kk: if (pd.notnull(df.iloc[i, 1])) and (pd.notnull( df.iloc[i, k])) and (df.iloc[i, 1] != "Total"): ij = 0 for jj in ["Under 19", "19-24"]: raw_data[col[0]].append(df.iloc[i, 1]) raw_data[col[1]].append(yearReq[ii]) raw_data[col[2]].append(jj) raw_data[col[3]].append(df.iloc[i, k + ij]) ij += 1 ii += 1 logfile.write(str(now.now()) + ' data reading end\n') print('data reading end------') # save csv file dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
'X-Requested-With': 'XMLHttpRequest' } res = requests.get(url, headers=headers) return res if __name__ == '__main__': with open("urllist.txt", 'r') as f: urllist = [i for i in f] while urllist: try: url = urllist.pop(0) res = get_res(url) html = etree.HTML(res.text) # 获取下页链接 imgurl = html.xpath("//div[@class='single-wallpaper']/img/@src") if imgurl: tmp_img = get_res(imgurl[0]) with open('4k/' + imgurl[0].split('/')[-1], 'wb') as im: im.write(tmp_img.content) print(now.now(), "写入了:", imgurl[0].split('/')[-1]) with open('urllist.txt', 'w') as p: for i in urllist: p.write(i + '\n') time.sleep(1) except Exception as e: with open('urllist.txt', 'w') as p: for i in urllist: p.write(i + '\n') print('出错了,休息3s继续') time.sleep(3)
def now(self): return now.now()
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields): yearReq = reqFields dName = outPath # open url socket = openurl.openurl(url, logfile, errfile) # operate this excel file logfile.write(str(now.now()) + ' excel file loading\n') print('excel file loading------') xd = pd.ExcelFile(socket) df = xd.parse(sheet) # indicator checking logfile.write(str(now.now()) + ' indicator checking\n') print('indicator checking------') for i in range(df.shape[0]): yearCol = [] for k in yearReq: k_asked = k for j in range(df.shape[1]): if str(k_asked) in str(df.iloc[i, j]): yearCol.append(j) restartIndex = i + 1 if len(yearCol) == len(yearReq): break if len(yearCol) != len(yearReq): errfile.write(str(now.now()) + " Requested data " + str(yearReq).strip( '[]') + " don't match the excel file. Please check the file at: " + str(url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit("Requested data " + str(yearReq).strip( '[]') + " don't match the excel file. Please check the file at: " + url) yearCol.append(df.shape[1]) for i in range(restartIndex, df.shape[0]): kk = [] k_asked = "All Apprenticeships" for k in range(len(yearCol)-1): for j in range(yearCol[k], yearCol[k+1]): if df.iloc[i, j] == k_asked: kk.append(j) restartIndex = i + 1 break if len(kk) == len(yearReq): break yearCol.pop() if len(kk) != len(yearReq): errfile.write(str(now.now()) + " Requested data " + str(yearReq).strip( '[]') + " in the field 'All Apprenticeships' don't match the excel file. Please check the file at: " + str(url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit("Requested data " + str(yearReq).strip( '[]') + " in the field 'All Apprenticeships' don't match the excel file. Please check the file at: " + url) raw_data = {} for j in col: raw_data[j] = [] # data reading logfile.write(str(now.now()) + ' data reading\n') print('data reading------') for i in range(restartIndex, df.shape[0]): ii = 0 for k in kk: if (pd.notnull(df.iloc[i, 1])) and (pd.notnull(df.iloc[i, k])) and (df.iloc[i, 1] != "Total"): ij = 0 for jj in ["Under 19", "19-24"]: raw_data[col[0]].append(df.iloc[i, 1]) raw_data[col[1]].append(yearReq[ii]) raw_data[col[2]].append(jj) raw_data[col[3]].append(df.iloc[i, k+ij]) ij += 1 ii += 1 logfile.write(str(now.now()) + ' data reading end\n') print('data reading end------') # save csv file dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def download(url, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields): dName = outPath listurl = url.split('/') pDate = listurl[len(listurl) - 1][:4] # operate this excel file logfile.write(str(now.now()) + ' excel file loading\n') print('excel file loading------') xd = pd.ExcelFile(url) sheets = xd.sheet_names raw_data = {} for j in col: raw_data[j] = [] for sheet in sheets: df = xd.parse(sheet) logfile.write(str(now.now()) + ' for sheet ' + str(sheet) + '------\n') logfile.write(str(now.now()) + ' indicator checking\n') print('for sheet ' + str(sheet) + ' ------') print('indicator checking------') # indicator checking fflag = 0 for i in range(df.shape[0]): for j in range(df.shape[1]): if ('Aged' in str(df.iloc[i][j]).split()) and (len(str(df.iloc[i][j]).split()) == 2): fflag = 1 break if fflag == 1: ageReq = df.iloc[i][j:-1].tolist() restartIndex = i + 1 break if fflag == 0: errfile.write(str(now.now()) + " The sheet " + str(sheet) + " has not required fields, such as 'Aged 10-14'. Please check the file at: " + str(url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit("The sheet " + str(sheet) + " has not not required fields, such as 'Aged 10-14'. Please check the file at: " + url) # data reading logfile.write(str(now.now()) + ' data reading\n') print('data reading------') for i in range(restartIndex, df.shape[0]): if str(df.iloc[i][0]): for k in ageReq: raw_data[col[5]].append(k.split()[1]) raw_data[col[0]] = raw_data[col[0]] + [(df.iloc[i][0])] * len(ageReq) raw_data[col[1]] = raw_data[col[1]] + [(df.iloc[i][1])] * len(ageReq) raw_data[col[2]] = raw_data[col[2]] + [(df.iloc[i][2])] * len(ageReq) raw_data[col[4]] = raw_data[col[4]] + [(df.iloc[i][3])] * len(ageReq) raw_data[col[6]] = raw_data[col[6]] + df.iloc[i][j:-1].tolist() raw_data[col[3]] = raw_data[col[3]] + [sheet] * len(ageReq) * (df.shape[0] - restartIndex) raw_data[col[7]] = [pDate] * len(raw_data[col[0]]) raw_data[col[8]] = ["HCC_SAPF_2015"] * len(raw_data[col[0]]) logfile.write(str(now.now()) + ' data reading end\n') print('data reading end------') # save csv file dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
args = parser.parse_args() if args.generateConfig: obj = { "url": "http://www.hscic.gov.uk/catalogue/PUB13365/gp-reg-patients-01-2014.csv", "outPath": "tempGpPatientsAge.csv", #"reqFields": ["2014", "01", "MALE_0-4", "MALE_5-9", "MALE_10-14", "FEMALE_0-4", "FEMALE_5-9", "FEMALE_10-14"] "reqFields": ["2014", "01", "all"], #"all" means all "male" and "female" fields "colFields": ['GP_PRACTICE_CODE', 'POSTCODE', 'CCG_CODE', 'NHSE_AREA_TEAM_CODE', 'NHSE_REGION_CODE', 'Sex', 'Age', 'Value', 'Year', 'Month'], "primaryKeyCol": ['GP_PRACTICE_CODE', 'CCG_CODE', 'NHSE_AREA_TEAM_CODE', 'NHSE_REGION_CODE', 'Sex', 'Age', 'Year', 'Month'],#[0, 2, 3, 4, 5, 6, 8, 9], "digitCheckCol": ['Value'],#[7], "noDigitRemoveFields": [] } logfile = open("log_tempGpPatientsAge.log", "w") logfile.write(str(now.now()) + ' start\n') errfile = open("err_tempGpPatientsAge.err", "w") with open("config_tempGpPatientsAge.json", "w") as outfile: json.dump(obj, outfile, indent=4) logfile.write(str(now.now()) + ' config file generated and end\n') sys.exit("config file generated") if args.configFile == None: args.configFile = "config_tempGpPatientsAge.json" with open(args.configFile) as json_file: oConfig = json.load(json_file) logfile = open('log_' + oConfig["outPath"].split('.')[0] + '.log', "w")
args = parser.parse_args() if args.generateConfig: obj = { "url": "https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/417835/SFR10_2015_Local_authority_tables.xlsx", "outPath": "tempPerTru.csv", "sheet": "Table_11_1", "reqFields": ["State-funded primary, secondary and special schools (5)"], "colFields": ["ecode", "name", "year", "value"], "primaryKeyCol": ["ecode", "year"], # [0, 2], "digitCheckCol": ["value"], # [3], "noDigitRemoveFields": [], } logfile = open("log_tempPerTru.log", "w") logfile.write(str(now.now()) + " start\n") errfile = open("err_tempPerTru.err", "w") with open("config_tempPerTru.json", "w") as outfile: json.dump(obj, outfile, indent=4) logfile.write(str(now.now()) + " config file generated and end\n") sys.exit("config file generated") if args.configFile == None: args.configFile = "config_tempPerTru.json" with open(args.configFile) as json_file: oConfig = json.load(json_file) logfile = open("log_" + oConfig["outPath"].split(".")[0] + ".log", "w")
def generate_content(self,debug=False): from clock import clock minute = [[False]*len(i) for i in clock] hour = [[False]*len(i) for i in clock] tag = "KLB Mean Time" current_minute = float(now().strftime("%M")) current_hour = float(now().strftime("%I")) current_weekday = now().strftime("%a") if current_weekday == "Mon": bgcolor = self.colours.Style.BLINK+self.colours.Background.RED if current_weekday == "Tue": bgcolor = self.colours.Style.BLINK+self.colours.Background.YELLOW if current_weekday == "Wed": bgcolor = self.colours.Style.BLINK+self.colours.Background.CYAN if current_weekday == "Thu": bgcolor = self.colours.Style.BLINK+self.colours.Background.GREEN if current_weekday == "Fri": bgcolor = self.colours.Style.BLINK+self.colours.Background.MAGENTA if current_weekday == "Sat": bgcolor = self.colours.Style.BLINK+self.colours.Background.BLUE if current_weekday == "Sun": bgcolor = self.colours.Background.RED content = colour_print(printer.text_to_ascii(now().strftime("%A %-d %b")),background=bgcolor,foreground=self.colours.Foreground.BLACK) content += "\n" circle_radius = 19 screen_radius = 19 # num_points = 250 # circle_x=np.array([circle_radius*np.cos(t) for t in range(num_points)]) # circle_y=np.array([circle_radius*np.sin(t) for t in range(num_points)]) # circle_points=[np.complex(x,y) for x,y in zip(circle_x,circle_y)] d = .3 num_points = 25 current_hourtopointat = current_hour + current_minute/60. for a in range(0,num_points+1): r = circle_radius*a*.5/num_points hx = r*cos(pi/2 - current_hourtopointat*2*pi/12) hy = r*sin(pi/2 - current_hourtopointat*2*pi/12) for dx in [-d,d]: for dy in [-d,d]: hour_x = screen_radius + int(floor(hx+.5+dx)) hour_y = screen_radius - int(floor(hy+.5+dy)) hour[hour_y][hour_x] = True r = circle_radius*a*.8/num_points mx = r*cos(pi/2 - current_minute*2*pi/60) my = r*sin(pi/2 - current_minute*2*pi/60) for dx in [-d,d]: for dy in [-d,d]: minute_x = screen_radius + int(floor(mx+.5+dx)) minute_y = screen_radius - int(floor(my+.5+dy)) minute[minute_y][minute_x] = True # hour_x = np.array([r*np.cos(np.pi/2 - current_hourtopointat*2*np.pi/12) for r in np.arange(0,circle_radius*0.5,circle_radius*0.5/num_points)]) # hour_y = -np.array([r*np.sin(np.pi/2 - current_hourtopointat*2*np.pi/12) for r in np.arange(0,circle_radius*0.5,circle_radius*0.5/num_points)]) # hour_points=[np.complex(x,y) for x,y in zip(hour_x,hour_y)] # minute_x = np.array([r*np.cos(np.pi/2 - current_minute*2*np.pi/60) for r in np.arange(0,circle_radius*0.8,circle_radius*0.8/num_points)]) # minute_y = -np.array([r*np.sin(np.pi/2 - current_minute*2*np.pi/60) for r in np.arange(0,circle_radius*0.8,circle_radius*0.8/num_points)]) # minute_points=[np.complex(x,y) for x,y in zip(minute_x,minute_y)] # hourmarkers_x = np.array([r*np.cos(np.pi/2 - h*2*np.pi/12) for r in np.arange(circle_radius*0.78,circle_radius*0.8,circle_radius*0.8/num_points) for h in [0,3,6,9]]) # hourmarkers_y = -np.array([r*np.sin(np.pi/2 - h*2*np.pi/12) for r in np.arange(circle_radius*0.78,circle_radius*0.8,circle_radius*0.8/num_points) for h in [0,3,6,9]]) # hourmarkers_points=[np.complex(x,y) for x,y in zip(hourmarkers_x,hourmarkers_y)] output = "" for y in range(0, 2*screen_radius+1): for x in range(0, 2*screen_radius+1): # if debug: print x,y," ",y+screen_radius,x+screen_radius, " ", clock[x][y] if clock[y][x] or minute[y][x] or hour[y][x]: output += "X" else: output += " " # output = output + "\n" output = output + " "*(2*screen_radius + 1) output2 = "" for y in range(0, 2*screen_radius+1, 2): output2 = output2 + " "*(screen_radius+1) for x in range(0, 2*screen_radius+1): letter0 = output[y*(2*screen_radius+1)+x] letter1 = output[(y+1)*(2*screen_radius+1)+x] if letter0 == " " and letter1 == " ": output2 = output2 + " " elif letter0 == "X" and letter1 == "X": output2 = output2 + u"\u2588" elif letter0 == "X" and letter1 == " ": output2 = output2 + u"\u2580" else: output2 = output2 + u"\u2584" if y != 2*screen_radius: output2 = output2 + "\n" content += output2 self.content = content self.tagline = tag
parser.add_argument("--configFile", "-c", help="path for config file") args = parser.parse_args() if args.generateConfig: obj = { #"url": "https://www.nomisweb.co.uk/api/v01/dataset/NM_18_1.data.csv?geography=1946157199...1946157245&date=latest&age=MAKE|Aged%2016-24|1;2&duration=MAKE|Up%20to%206%20months|1...7,MAKE|Over%206%20months%20and%20up%20to%20a%20year|8;9,MAKE|Over%201%20year|10...16&sex=5,6&measures=20100,20206&select=geography_code,geography_name,sex_name,age_name,duration_name,measures_name,obs_value,date", "outPath": "tempYouthUnemployment.csv", "date": ["Latest", "2015-07", "2015-04", "2015-09", "2014-03", "2012-05"], "colFields": ["Geography_code", "geography_name", "sex_name", "Age_name", "duration_name", "Measures_name", "Obs_value", "Date"], "primaryKeyCol": ["Geography_code", "sex_name", "Age_name", "duration_name", "Measures_name", "Date"],#[0, 2, 3, 4, 5, 7], "digitCheckCol": ["Obs_value"],#[6], "noDigitRemoveFields": ["Obs_value"],#[6] } logfile = open("log_tempYouthUnemployment.log", "w") logfile.write(str(now.now()) + ' start\n') errfile = open("err_tempYouthUnemployment.err", "w") with open("config_tempYouthUnemployment.json", "w") as outfile: json.dump(obj, outfile, indent=4) logfile.write(str(now.now()) + ' config file generated and end\n') sys.exit("config file generated") if args.configFile == None: args.configFile = "config_tempYouthUnemployment.json" with open(args.configFile) as json_file: oConfig = json.load(json_file) logfile = open('log_' + oConfig["outPath"].split('.')[0] + '.log', "w")
def generate_content(self): import urllib2 import now import json from time import strftime from dateutil import parser def friendly_date(date): if date.date() == datetime.today().date(): return "Today" elif date.date() == datetime.today().date() + timedelta(days=1): return "Tomorrow" else: return date.strftime("%A %-d") content = colour_print(printer.text_to_ascii("Muirhead Room")) response = urllib2.urlopen("http://www.mscroggs.co.uk/room_list.json") events = json.load(response) now = now.now().replace(tzinfo=None) for e in events: e[0] = parser.parse(e[0]) e[1] = parser.parse(e[1]) events = [e for e in events if e[1]>now] occupied = False for event in events: if event[0] < now and event[1] > now: occupied = True if occupied == False: try: next_occupied = events[0][0] except: next_occupied = datetime.today() + timedelta(days=300) colours_start = self.colours.Background.GREEN + self.colours.Foreground.BLACK colours_end = self.colours.Foreground.DEFAULT + self.colours.Background.DEFAULT if next_occupied.date() != now.date(): message = "Free all day" else: message = "Free until " + next_occupied.strftime("%H:%M") if next_occupied.date() - now.date() <= timedelta(hours=1): colours_start = self.colours.Background.YELLOW + self.colours.Foreground.BLACK colours_end = self.colours.Foreground.DEFAULT + self.colours.Background.DEFAULT else: colours_start = self.colours.Background.GREEN + self.colours.Foreground.BLACK colours_end = self.colours.Foreground.DEFAULT + self.colours.Background.DEFAULT if occupied == True: colours_start = self.colours.Background.RED + self.colours.Foreground.WHITE colours_end = self.colours.Foreground.DEFAULT + self.colours.Background.DEFAULT next_free = 0 i = 0 while next_free == 0: if events[i+1][0] != events[i][1]: next_free = events[i][1] i+=1 message = "Busy until " + next_free.strftime("%H:%M") left_banner = " "*int((screen.WIDTH - len(message))/2) right_banner = " "*int(round((screen.WIDTH - len(message))/2)) content += "\n\n" content += colours_start + left_banner + message + right_banner + colours_end + "\n" previous_date = datetime(2015,3,14).date() for event in events: start_time = event[0] end_time = event[1] name = event[2] if end_time.date() != previous_date: content += "\n "+self.colours.Foreground.GREEN content += friendly_date(end_time) content += self.colours.Foreground.DEFAULT + "\n" content += self.colours.Foreground.RED content += start_time.strftime("%H:%M") + "-" + end_time.strftime("%H:%M") + " " content += self.colours.Foreground.DEFAULT content += name content += "\n" previous_date = end_time.date() self.content = content
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields): schoolReq = reqFields if len(schoolReq) != 1: errfile.write( str(now.now()) + " Requested data " + str(schoolReq).strip("[]") + " don't match the excel file. This code is only for extracting data from filed 'State-funded primary, secondary and special schools (5)' with 'Percentage of persistent absentees (4)'. Please check the file at: " + str(url) + " . End progress\n" ) logfile.write(str(now.now()) + " error and end progress\n") sys.exit( "Requested data " + str(schoolReq).strip("[]") + " don't match the excel file. This code is only for extracting data from filed 'State-funded primary, secondary and special schools (5)' with 'Percentage of persistent absentees (4)'. Please check the file at: " + url ) dName = outPath # open url socket = openurl.openurl(url, logfile, errfile) # operate this excel file logfile.write(str(now.now()) + " excel file loading\n") print("excel file loading------") xd = pd.ExcelFile(socket) df = xd.parse(sheet) iYear = (df.iloc[2, 0].split(","))[0] # indicator checking logfile.write(str(now.now()) + " indicator checking\n") print("indicator checking------") for i in range(df.shape[0]): numCol = [] for k in schoolReq: k_asked = k for j in range(df.shape[1]): if str(k_asked) in str(df.iloc[i, j]): numCol.append(j) restartIndex = i + 1 if len(numCol) == len(schoolReq): break if len(numCol) != len(schoolReq): errfile.write( str(now.now()) + " Requested data " + str(schoolReq).strip("[]") + " don't match the excel file. Please check the file at: " + str(url) + " . End progress\n" ) logfile.write(str(now.now()) + " error and end progress\n") sys.exit( "Requested data " + str(schoolReq).strip("[]") + " don't match the excel file. Please check the file at: " + url ) numCol.append(df.shape[1]) for i in range(restartIndex, df.shape[0]): kk = [] k_asked = "Percentage of persistent absentees (4)" for k in range(len(numCol) - 1): for j in range(numCol[k], numCol[k + 1]): if df.iloc[i, j] == k_asked: kk.append(j) restartIndex = i + 1 break if len(kk) == len(schoolReq): break numCol.pop() if len(kk) != len(schoolReq): sys.exit( "Requested data " + str(schoolReq).strip("[]") + " in the field 'Percentage of persistent absentees (4)' don't match the excel file. Please check the file at: " + url ) raw_data = {} for j in col: raw_data[j] = [] # data reading logfile.write(str(now.now()) + " data reading\n") print("data reading------") for i in range(restartIndex, df.shape[0]): for k in kk: if re.match(r"E\d{8}$", str(df.iloc[i, 1])): raw_data[col[0]].append(df.iloc[i, 1]) raw_data[col[1]].append(df.iloc[i, 3]) raw_data[col[2]].append(iYear) raw_data[col[3]].append(df.iloc[i, k]) logfile.write(str(now.now()) + " data reading end\n") print("data reading end------") # save csv file dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)
def download(url, sheet, reqFields, outPath, col, keyCol, digitCheckCol, noDigitRemoveFields): homeReq = reqFields if len(homeReq) != 1: errfile.write(str(now.now()) + " Requested data " + str(homeReq).strip( '[]') + " don't match the excel file. This code is only for extracting data from filed 'e1b1a'. Please check the file at: " + str( url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit("Requested data " + str(homeReq).strip( '[]') + " don't match the excel file. This code is only for extracting data from filed 'e1b1a'. Please check the file at: " + url) dName = outPath # open url socket = openurl.openurl(url, logfile, errfile) # operate this excel file logfile.write(str(now.now()) + ' excel file loading\n') print('excel file loading------') xd = pd.ExcelFile(socket) df = xd.parse(sheet) # find year and quarter listurl = (url.split('_')) iYQ = listurl[len(listurl) - 1] iYQ = (iYQ.split('.'))[0] iYear = iYQ[:4] iQuarter = str(int(int(iYQ[4:]) / 3)) # indicator checking logfile.write(str(now.now()) + ' indicator checking\n') print('indicator checking------') for i in range(df.shape[0]): numCol = [] for k in homeReq: for j in range(df.shape[1]): if df.iloc[i][j] == k: numCol.append(j) restartIndex = i + 1 if len(numCol) == len(homeReq): break if len(numCol) != len(homeReq): errfile.write(str(now.now()) + " Requested data " + str(homeReq).strip( '[]') + " don't match the excel file. Please check the file at: " + str(url) + " . End progress\n") logfile.write(str(now.now()) + ' error and end progress\n') sys.exit("Requested data " + str(homeReq).strip( '[]') + " don't match the excel file. Please check the file at: " + url) raw_data = {} for j in col: raw_data[j] = [] # data reading logfile.write(str(now.now()) + ' data reading\n') print('data reading------') for i in range(restartIndex, df.shape[0]): for k in numCol: if re.match(r'E\d{8}$', str(df.index[i][0])): raw_data[col[0]].append(df.index[i][0]) raw_data[col[1]].append(df.index[i][1]) raw_data[col[2]].append(iYear) raw_data[col[3]].append(iQuarter) raw_data[col[4]].append(df.iloc[i][k]) logfile.write(str(now.now()) + ' data reading end\n') print('data reading end------') # save csv file dsave.save(raw_data, col, keyCol, digitCheckCol, noDigitRemoveFields, dName, logfile)