def GOOGLE_get_data(company): google_news_rss_url = "https://news.google.com/news/?q=%s&output=rss" % company rss_feed = feedparser.parse(google_news_rss_url) content_list = list() for entry in rss_feed['entries']: title = entry['title'] link = entry['link'] try: news_page = urllib2.urlopen(link).read() extractor = Extractor(extractor='ArticleExtractor', html=news_page) except: continue content = extractor.getText() now = datetime.datetime.now() content_list.append({"title": title, "article": content, "link": link, "source": "GOOGLE", "target": company, "date": "%04d%02d%02d" % (now.year, now.month, now.day), "hash": hashlib.sha224(title.encode("UTF-8")).hexdigest()}) DBOperation.save_db(content_list)
def on_resume(self, id): self.lock.acquire() isok = False flag = 0 try: index = find_request_by_id(id, self.pause_queue) request1 = self.pause_queue[int(index)] if len(self.serve_queue) < int( self.max_serve_num): # 如果服务队列不满,则直接加入服务队列 self.serve_queue.append(request1) self.serve_time_list.append(0) client = g_conn_pool_dict[id] client.sendall(RUNNINGMSG.encode('utf-8')) room_dict[id].request_state = 'running' flag = 1 else: locate = self.select_lowest_request() # 如果队列满,欺负最弱的 if request1.speed > self.serve_queue[locate].speed: self.swap_out(self.serve_queue[locate].id) self.serve_queue.append(request1) self.serve_time_list.append(0) client = g_conn_pool_dict[id] client.sendall(RUNNINGMSG.encode('utf-8')) room_dict[id].request_state = 'running' else: # 如果没有优先级更低的,进入等待队列 self.wait_queue.append(request1) self.wait_time_list.append(0) client = g_conn_pool_dict[id] client.sendall(WAITINGMSG.encode('utf-8')) room_dict[id].request_state = 'waiting' flag = 2 self.pause_queue.remove(request1) # 将结果写入数据库 now_time = get_sys_time() # sql = '' if flag == 1: sql = "insert into log values('" + now_time + "','" + id + "', 'running','" + str( request1.tem) + "','" + str(request1.speed) + "')" DB_Log = DBOperation.IMapper() DB_Log.update(sql) DB_Log.DB.close_conn() elif flag == 2: sql = "insert into log values('" + now_time + "','" + id + "', 'waiting','" + str( request1.tem) + "','" + str(request1.speed) + "')" DB_Log = DBOperation.IMapper() DB_Log.update(sql) DB_Log.DB.close_conn() finally: self.lock.release()
def download_new_testcases(): db_sc_cases = DBOperation.MyDatabase('127.0.0.1', 'root', '082666') case_list = db_sc_cases.get(StaticUtils.case_table, 'name, doc_id, court, YEAR(DATE)', 'download=\'no\'') total = len(case_list) i = 0 for case in case_list: case_name, case_doc_id, case_court, case_year = case print(case_name, case_doc_id, case_court, case_year) try: case_text = download_case(case_doc_id) except Exception as e: print(e) db_sc_cases.commit() print("Sleep 2s ...") time.sleep(2) if case_text: verdict = VerdictAnalyser.VerdictAnalyser(case_text) print(f"{i}/{total} case {case_name} is downloaded.") db_sc_cases.update(StaticUtils.case_table, 'download', '\'yes\'', f'doc_id=\'{case_doc_id}\'') db_sc_cases.update(StaticUtils.case_table, 'content', f'\'{verdict.content}\'', f'doc_id=\'{case_doc_id}\'') db_sc_cases.commit() else: db_sc_cases.update(StaticUtils.case_table, 'download', '\'empty\'', f'doc_id=\'{case_doc_id}\'') print(f"{i}/{total} case {case_name} is empty.") i += 1 db_sc_cases.commit() db_sc_cases.close()
def check_out(self, client, addr, msg): # client.sendall("input target room id:".encode("utf-8")) # msg = client.recv(BUFSIZE).decode(encoding="utf8") print(addr, "客户端消息2:", msg) room_dict[str(msg[0])].check_out() spare_room_list.append(str(msg[0])) spare_room_list.sort() client.sendall((str(msg[0]) + "check out success!").encode("utf-8")) now_time = get_sys_time() Bill = get_bill() Bill.set_item(str(msg[0])) Bill.set_checkout_time(str(msg[0]), now_time) Bill.set_fee(str(msg[0])) room_bill = bill_dict[str(msg[0])] # 入店时间、出店时间、花费 room_detail_list = DBOperation.get_replist().get_roomlist(str( msg[0])) # dbo detailist msg = str(msg[0]) + ',' + str(room_bill.check_in_time) + '^' + str(room_bill.check_out_time) + \ '^' + str(room_bill.cost) + ',' for index, item in enumerate(room_detail_list): if index != len(room_detail_list) - 1: msg = msg + str(item.start.split(' ')[1]) + '^' + str(item.end.split(' ')[1]) + '^' + str(item.duration) + '^' + \ str(item.speed) + '^' + str(item.rate) + '^' + str(item.cost) + '|' else: msg = msg + str(item.start.split(' ')[1]) + '^' + str(item.end.split(' ')[1]) + '^' + str(item.duration) + '^' + \ str(item.speed) + '^' + str(item.rate) + '^' + str(item.cost) print(msg) signal = client.recv(BUFSIZE).decode(encoding="utf8").split(' ') print(signal) if signal[0] == BILLMSG: client.sendall(msg.encode("utf-8")) '''for x in bill_dict.keys():
def get_log(self, client, addr, msg): # self.client.sendall("input date: mm-dd".encode("utf-8")) # msg = self.client.recv(BUFSIZE).decode(encoding="utf8") print(self.addr, "客户端消息:", msg[0]) f = DBOperation.get_replist().get_file_report() print(f) if f == 'success': self.client.sendall( ("daily log of %s output success" % msg[0]).encode("utf-8"))
def getAndConvertCostData(dateDetail): dbo = DBOperation.DBOperation() #rawData = dbo.customizedFetch(sqlCommand) #rawData = dbo.fetchAllData('cost') cmd = makeCommand(dateDetail) rawData = dbo.customizedFetch(cmd) (gridData, gridLabel) = decryptionList(rawData) return gridData, gridLabel
def Twitter_get_data(company): config = FYPsetting.TWITTER_CONFIG twitter = Twitter(auth=OAuth(config["access_key"], config["access_secret"], config["consumer_key"], config["consumer_secret"])) query = twitter.search.tweets(q=company, lang="en", result_type="recent", count="%d" % FYPsetting.QUERY_PAGE) urllist = list() content_list = list() for result in query["statuses"]: #print "@%s %s" % (result["user"]["screen_name"].encode("UTF-8"), result["text"].encode("UTF-8")) cur_text = result["text"].split(" ") #pre-process a readable title title_list = [ value for value in result["text"].split(" ") if not value.startswith("http") and not value.startswith("#") and not value.startswith("@") ] final_title = ' '.join(title_list) #parse and extract article for word in cur_text: if word.startswith("http"): utf_word = word.encode('latin-1', 'ignore') if utf_word in urllist: break urllist.append(utf_word) try: extractor = Extractor(extractor='ArticleExtractor', url=utf_word) except: break content = extractor.getText() if content is not "": now = datetime.datetime.now() content_list.append({"title": final_title, "article": content, "link": utf_word, "source": "TWITTER", "target": company, "date": "%04d%02d%02d" % (now.year, now.month, now.day), "hash": hashlib.sha224(result["text"].encode("UTF-8")).hexdigest()}) break DBOperation.save_db(content_list)
def getStaff(): sql = "select username, passwd from staff" DB_Log = DBOperation.IMapper() result = DB_Log.query(sql) staff_dict = dict() for x in result: user = x[0] pwd = x[1] staff_dict[user] = pwd DB_Log.DB.close_conn() return staff_dict
def set_pause(self): ac.handle_pause(self.id) room = room_dict[self.id] tem = room.ac_tem speed = room.ac_speed now_time = get_sys_time() sql = "insert into log values('" + now_time + "','" + self.id + "','pausing','" + str( tem) + "','" + str(speed) + "')" DB_Log = DBOperation.IMapper() DB_Log.update(sql) DB_Log.DB.close_conn()
def main(): db_sc_cases = DBOperation.MyDatabase('127.0.0.1', 'root', '082666') t0 = time.time() cases = analyse_case(db_sc_cases) t1 = time.time() update_to_db(db_sc_cases, cases) t2 = time.time() db_sc_cases.commit() t3 = time.time() print(t1 - t0, t2 - t1, t3 - t2) db_sc_cases.close()
def NYT_get_data(company): raw_response_list = list() API_base_url = "http://api.nytimes.com/svc/search/v2/articlesearch.json?" config = FYPsetting.NYT_CONFIG now = datetime.datetime.now() past = now - datetime.timedelta(hours=72) now_str = "%04d%02d%02d" % (now.year, now.month, now.day) past_str = "%04d%02d%02d" % (past.year, past.month, past.day) for page in range(FYPsetting.QUERY_PAGE//3): url = "%sbegin_data=%s&sort=newest&page=%d&q=%s&api-key=%s" % (API_base_url, past_str, page, company, config["API_key"]) response = requests.get(url).json() raw_response_list += response["response"]["docs"] content_list = list() for doc in raw_response_list: url = doc["web_url"] title = doc["headline"]["main"] #print title try: cj = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) html = opener.open(url).read() extractor = Extractor(extractor='ArticleExtractor', html=html) except: continue content = extractor.getText() now = datetime.datetime.now() content_list.append({"title": title, "article": content, "link": url, "source": "NYT", "target": company, "date": now_str, "hash": hashlib.sha224(title.encode("UTF-8")).hexdigest()}) DBOperation.save_db(content_list)
def NASDAQ_get_data(company_code): url = 'http://www.nasdaq.com/symbol/%s/news-headlines' % company_code conn = urllib2.urlopen(url) html = conn.read() soup = BeautifulSoup(html) content_div = soup.find("div", {'class': "news-headlines"}) # No news found? if content_div==None: return links = content_div.findAll('a') content_list = list() for tag in links: if tag.parent.name != "span": continue link = tag.get('href', None) title = tag.contents[0] try: news_page = urllib2.urlopen(link).read() extractor = Extractor(extractor='ArticleExtractor', html=news_page) except: continue content = extractor.getText() now = datetime.datetime.now() content_list.append({"title": title, "article": content, "link": link, "source": "NASDAQ", "target": company_code, "date": "%04d%02d%02d" % (now.year, now.month, now.day), "hash": hashlib.sha224(title.encode("UTF-8")).hexdigest()}) DBOperation.save_db(content_list)
def getAllRawData(self): cleanedTimeList = [] dbo = DBOperation.DBOperation() try : dbo.fetchAllData('cost') (gridData,gridLabel) = cmm.getAndConvertCostData(("","","",)) except : gridLabel = "" for item in gridLabel: #print "item : >%s<"%item cleanedTimeList.append(item[:item.rfind(' ')]) cleanedTimeList = list(set(cleanedTimeList)) #print "cleanedTimeList : ",cleanedTimeList return cleanedTimeList
def onAddCost(self, event): #value = self.textList[3].GetLabelText() value = self.textList[3].GetValue() comments = self.textList[5].GetLabelText() if '.' in str(value): value = value[:str(value).rfind('.')] if not str(value).isdigit() or 0 == len(str(value)): self.textList[6].SetLabel("value should be numbers...") else: name = ed.enDecryption.encryption(self.costNameForDB) value = ed.enDecryption.encryption(value) comments = ed.enDecryption.encryption(comments) curTime = cmm.getTimeAndWeek()[0] insertValue = (name, value, comments, curTime) db = dbo.DBOperation() db.insertData('cost', insertValue) self.Destroy()
def InitStockDB(file): # logger = LoggerFactory.getLogger("InitStockDB") # loggerBasicData = LoggerFactory.getLogger("GetStockBasicData") stocklist = StockDataByTX.GetAllStockCode(file) mytime = "str_to_date('%s'," % time.strftime('%Y-%m-%d') + "'%Y-%m-%d')" dboper = DBOperation.DBOperation() counter = 0 for stock in stocklist: code = stock['code'] stockBasicData = StockDataByTX.GetStockBasicData(code) if stockBasicData is not None: LoggerFactory.info("InitStockDB", "正在处理: %s" % stockBasicData['name']) if stockBasicData['mount'] == 0: status = 0 else: status = 1 sql = "insert into stocks(code, codealias, name, industry, circulated, totalstock, status, peg, lyr, mtime, area) values('%s', '%s', '%s', '%s', %0.2f, %0.2f, %d, %0.2f, %0.2f, %s, '%s' )" \ % (stockBasicData['code'], stockBasicData['codealias'], stockBasicData['name'], stock['industry'], stockBasicData['circulated_stock'], \ stockBasicData['total_stock'], status, stockBasicData['peg'], stockBasicData['lyr'], mytime, stock['area']) dboper.sqlExecute(sql) counter = counter + 1 LoggerFactory.info( "InitStockDB", "Stock DB initialization has completed! There're %s Stocks created into the Database!" % str(counter))
def swap_out(self, id): self.lock.acquire() try: # 写入数据库 room = room_dict[id] now_time = get_sys_time() sql = "insert into log values('" + now_time + "','" + id + "', 'swap out','" + str( room.ac_tem) + "','" + str(room.ac_speed) + "')" DB_Log = DBOperation.IMapper() DB_Log.update(sql) DB_Log.DB.close_conn() result = find_request_by_id(id, self.serve_queue) self.wait_queue.append(self.serve_queue[result]) self.wait_time_list.append(0) self.serve_queue.remove(self.serve_queue[result]) self.serve_time_list.remove(self.serve_time_list[result]) client = g_conn_pool_dict[id] client.sendall(WAITINGMSG.encode('utf-8')) room_dict[id].request_state = 'waiting' finally: self.lock.release()
def download_case_list_by_upload_period(year, start_date, end_date): search_criteria = "案件类型:刑事案件,审判程序:一审,法院地域:四川省,裁判年份:{},文书类型:判决书,上传日期:{} TO {}".format(year, start_date, end_date) total_number = get_total_number(search_criteria) if int(total_number) == 0: return None max_page = int(total_number) // 20 if int(total_number) % 20 == 0 else (int(total_number) // 20) + 1 cases = download_all_caselist(search_criteria, max_page) db_sc_cases = DBOperation.MyDatabase('127.0.0.1', 'root', '082666') length = len(cases['name']) for i in range(length): data = dict() for key in cases: if key == 'procedure': data['trial'] = cases[key][i] else: data[key] = cases[key][i] data['download'] = 'no' data['upload_date'] = start_date doc_id = db_sc_cases.get(StaticUtils.case_table, 'doc_id', 'doc_id=\'{}\''.format(data['doc_id'])) if not doc_id: fields_list = ["name", "doc_id", "date", "case_id", "trial", "court", "download", "upload_date"] values = '' for key in fields_list: # transfer to str if it's a int if isinstance(data[key], int): values = values + str(data[key]) else: if values: values = values + ",\'" + data[key] + "\'" else: values = "(\'" + data[key] + "\'" values = values + ")" fields = ','.join(fields_list) db_sc_cases.insert(CASE_TABLE, fields, values) # db_sc_cases.insert(StaticUtils.case_table, data) db_sc_cases.commit() db_sc_cases.close()
def blackBoxEnter(self, event): command = self.blackBox.GetValue() command.strip() if 'costDate' not in command: varOld = "" if 'like' in command or 'LIKE' in command: varOld = command[command.find('\'') + 1:command.rfind('%')] elif '=' in command: varOld = command[command.find('\'') + 1:command.rfind('\'')] varNew = ed.enDecryption.encryption(varOld) varNew.strip() command = command.replace(varOld, varNew) dbo = DBOperation.DBOperation() raw = dbo.customizedFetch(command) (data, label) = cmm.decryptionList(raw) total = cmm.calculatingTotalCost(data) self.totalBoxList[1].SetValue(str(total)) table = basicTable(data, rowLabel=label, colLabel=("Name", "Money", "Comments")) self.grid.SetTable(table) self.__setGridAttributes() self.Refresh()
playing_style, player_skills, com_playing_styles = [], [], [] #打开数据库连接 db = pymysql.connect(host="localhost", user="******", password="******", db="pesdb", port=3306) cur = db.cursor() gi.getpageinfo(page_num, players_urlid, players_name) players_num = [((page_num - 1) * len(players_urlid) + i + 1) for i in range(len(players_urlid))] for i in range(len(players_urlid)): gi.getplayerinfo(players_urlid[i][2:], players_name[i], data_name, level_datas, other_info, playing_style, player_skills, com_playing_styles) print(playing_style, '\n', player_skills, '\n', com_playing_styles) db_o.insert_baseinfo(cur, players_num[i], players_urlid[i], players_name[i], other_info, playing_style, player_skills, com_playing_styles) #for table_name, leval_data in level_datas.items(): # db_o.insert_abilitys(cur, table_name, players_num[i], players_name[i], leval_data) print('--------------------------------------') #关闭数据库和光标 cur.close() db.close()
if __name__ == '__main__': print('2013 Data Processing running...') start = time.time() # init global vars # test data path # DATA_PATH = '../test' # DATA_PATH = os.path.abspath(os.path.dirname(__file__)) DATA_PATH = '../..' hh_folders = ['{:0>2d}'.format(i) for i in range(24)] dates = ['20130501'] # init instance of Spatial Unit su = SpatialUnit.SpatialUnit(DATA_PATH + '/TaxiData/SpatialUnit/TAZ2010.shp') # init db operator db = DBOperation.OpenTSDB(host='192.168.61.251', port=4242) # unzip and data format transform # generate daily data if os.path.exists(DATA_PATH + '/TaxiData/2013/2013.dat'): os.remove(DATA_PATH + '/TaxiData/2013/2013.dat') fp_year = get_year_fp() # detect trajectories for cabs # cabs = {'cab_id': [ weight, from_unit ]} print('Detecting ODs...') num_lines = sum(1 for _ in fp_year) fp_year.seek(0) cabs = {} ods = [] num_error_on = 0 num_error_off = 0 count = 0
LoggerFactory.error("DailyHisData", "股票代码信息获取失败.....") def InsertHisData(code, dboper): realtimeData = StockDataByTX.CollectRealTimeData(code) if realtimeData is not None: LoggerFactory.info("InsertHisData", "正在处理: %s" % realtimeData['code']) DBDataHandle.InsertHisData(dboper, realtimeData) else: LoggerFactory.error("InsertHisData", "股票: %s 的相关信息获取失败..." % code) if "__name == __main__": # week_day = datetime.date.today().weekday() # # if ( week_day == 5 or week_day == 6 ): # # LoggerFactory.info("HisStock", "Today is weekend, no historic data need to be collected.....") # # else: dboper = DBOperation.DBOperation() DailyHisData(dboper)
def set_item(self, id): if id not in bill_dict.keys(): bill_dict[id] = DBOperation.bill()
def test_addRecord(self): self.OBperation.a self.assertEquals(DBOperation.addRecord())
import DBOperation import logging import json import io import os import sys import math from bs4 import BeautifulSoup #Call Function here try: if __name__ == '__main__': HttpRequest=HttpHandler.HttpHandler() ObjStringUtil=StringHelper.StringHelper() ObjDbOperations=DBOperation.DBOperation() objRegularExpressionParser=RegularExpressionParser.RegularExpressionParser() infoLavel=Config.Config.LogLevel logging.info('Completed configuring logger()!') isRedirection=True Cookies="" Refer="" ResponseCookie="" redirectionURL="" # Category & Subcategory lastURLS = [None] * 2 if len(sys.argv) > 1 and sys.argv[1] == "continue": print("\nContinuing script from last URL\n") # Get last URL with open('lastURL.txt') as f:
import DBOperation db = DBOperation.DBOperation() db.OpenDB("./test.db") #测试数据库 print(db.getPairs()) db.addPair(1, "a", "b", "c") db.addPair(2, "a", "bsd", "csd") print(db.getPairs()) db.delPair(1) db.updatePair(2, "GoodWe 5kw ES", "INV", "GoodWe_es_v3") print(db.getPairs())
def get_latest_upload_date(): db_sc_cases = DBOperation.MyDatabase('127.0.0.1', 'root', '082666') result = db_sc_cases.get_max_record(StaticUtils.case_table, 'upload_date') db_sc_cases.close() return result[0]
def set_fee(self, id): # 获得总费用 report_dict = DBOperation.get_replist().get_report() R = report_dict[id] bill_dict[id].cost = R.cost
def add_request(self, request): self.lock.acquire() isok = False try: result = find_request_by_id(request.id, self.serve_queue) if result != FAILMSG: self.serve_queue.remove(self.serve_queue[result]) self.serve_time_list.remove(self.serve_time_list[result]) result = find_request_by_id(request.id, self.wait_queue) if result != FAILMSG: self.wait_queue.remove(self.wait_queue[result]) self.wait_time_list.remove(self.wait_time_list[result]) result = find_request_by_id(request.id, self.pause_queue) if result != FAILMSG: self.pause_queue.remove(self.pause_queue[result]) if len(self.serve_queue) < int(self.max_serve_num) and len( self.wait_queue) == 0: # 如果服务队列有空位,直接加 self.serve_queue.append(request) self.serve_time_list.append(0) client = g_conn_pool_dict[request.id] client.sendall(RUNNINGMSG.encode('utf-8')) room_dict[request.id].request_state = 'running' isok = True else: # 如果服务队列没空位,选择性的加 if request.speed != LOW: # 不是低风,存在直接进入运行队列的可能性 locate = self.select_lowest_request() if self.serve_queue[locate].speed < request.speed: self.swap_out(self.serve_queue[locate].id) self.serve_queue.append(request) self.serve_time_list.append(0) client = g_conn_pool_dict[request.id] client.sendall(RUNNINGMSG.encode('utf-8')) room_dict[request.id].request_state = 'running' isok = True else: self.wait_queue.append(request) # 直接进入等待队列 self.wait_time_list.append(0) client = g_conn_pool_dict[request.id] client.sendall(WAITINGMSG.encode('utf-8')) room_dict[request.id].request_state = 'waiting' elif request.speed == LOW: # 如果是低风 self.wait_queue.append(request) # 直接进入等待队列 self.wait_time_list.append(0) client = g_conn_pool_dict[request.id] client.sendall(WAITINGMSG.encode('utf-8')) room_dict[request.id].request_state = 'waiting' # 将结果写入数据库 now_time = get_sys_time() if isok: sql = "insert into log values('" + now_time + "','" + request.id + "', 'running','" + str( request.tem) + "','" + str(request.speed) + "')" else: sql = "insert into log values('" + now_time + "','" + request.id + "', 'waiting','" + str( request.tem) + "','" + str(request.speed) + "')" DB_Log = DBOperation.IMapper() DB_Log.update(sql) DB_Log.DB.close_conn() finally: self.lock.release()