def translate_category(self): update_query = 'update `category` set `title_en`=%(title_en)s where `id`=%(id)s' conn = MySQLHelper.connection_inst() conn.connect() cnx = None try: cnx = conn.cursor() query = 'select `id`, `title` from `category` where id > 0' cnx.execute(query) translated = [] # type: List[Dict[str, str]] for _id, title in cnx.fetchall(): translated.append({ 'id': _id, 'title_en': self._translator.translate(title) }) if len(translated) >= self.BATCH_SIZE: MySQLHelper.update_many(update_query, translated) translated = [] if len(translated) > 0: MySQLHelper.update_many(update_query, translated) except Exception as e: if conn.in_transaction: conn.rollback() logging.error(e) finally: cnx = None conn.close() conn = None
def getAllContent(self): sqlHelper = MySQLHelper('101.1.16.50','s559384db0','995y455y','s559384db0'); if type(self.urls) == list: for urlItem in self.urls: sqlStr = "select infoId from tblJobInfo where bbsId = "+ urlItem["bbsId"] + " and " + self.timeCondition temptInfoList = sqlHelper.queryAll(sqlStr) self.existInfoIdList = [] for infoDic in temptInfoList: self.existInfoIdList.append(infoDic["infoId"]) self.getContent(urlItem) else: sqlStr = "select infoId from tblJobInfo where bbsId = "+ str(self.urls["bbsId"]) + " and " + self.timeCondition temptInfoList = sqlHelper.queryAll(sqlStr) self.existInfoIdList = [] for infoDic in temptInfoList: self.existInfoIdList.append(infoDic["infoId"]) self.getContent(self.urls)
def main(): tracker2 = MySQLHelper(tracker2_host, tracker2_user, tracker2_pass, tracker2_db) countid = 0 countid1 = 1000000 while countid < 123000000: select = """SELECT b.id, c.key_id FROM tracker2.matchedVideoP2PItem AS b, tracker2.matchedVideo AS c WHERE b.company_id = 14 AND c.company_id = 14 AND b.matchedVideo_id = c.id and b.id > %s and b.id <= %s""" % ( countid, countid1) countid += 1000000 countid1 += 1000000 print select select_result = tracker2.query_sql_cmd(select) print "fetched!" retrytracker2 = 0 while retrytracker2 < 10: try: print "retrytracker2:" print retrytracker2 target = MySQLHelper(t_host, t_user, t_pass, t_db) print "start insert!" print datetime.datetime.now() break except Exception, e: print e retrytracker2 += 1 #target = MySQLHelper(t_host,t_user,t_pass,t_db) insert_result = "insert into infback_Tmp " + " values " + str( select_result)[1:-1].replace("L", "").replace("u", "") target.insert_sql_cmd(insert_result) print "finished!" print datetime.datetime.now()
def Main_CrawlNovelData(pp): sql = 'select distinct novelurl from novelurl order by rundate desc limit %s offset %s' % ( 300, int(pp) * 300) a = MySQLHelper() NovelUrl = a.SqlFecthAll(sql) a.CloseCon() proxies = fetchproxies(40) for i in NovelUrl: try: header = get_header() proxy = random.choice(proxies) print i[0] CrawlNovelData(i[0], header, proxy) except Exception as e: print 'error' print e.message pass
def translate_property(self): query = 'select distinct `key` as `key` from `property`' \ ' where `key` is not null and `key` <> \'\' and `value` is not null' keys = MySQLHelper.fetch_all(query) update_query = 'update `property` set `key_en`=%(key_en)s where `key`=%(key)s' translated = [] # type: List[Dict[str, str]] for k in keys: translated.append({ 'key': k[0], 'key_en': self._translator.translate(k[0]) }) if len(translated) > self.BATCH_SIZE: MySQLHelper.update_many(update_query, translated) translated = [] if len(translated) > 0: MySQLHelper.update_many(update_query, translated)
def main(): helper = MySQLHelper('127.0.0.1', 'root', '', 'pymysql') helper.connect() # 添加 helper.cud(sql='insert into `tb_stu`(`name`) values(%s)', params=["mary"]) # 查询 results = helper.query(sql="select * from `tb_stu`") for result in results: id = result[0] name = result[1] print("id:%s,name:%s" % (id, name)) # 关闭 helper.close()
class DataDaemon: """ Multithreading data gathering. """ def __init__(self, run): self.__run = run self.__raven = Raven() self.__db = MySQLHelper() self.__running = False def storeRavenData(self): try: if not self.__raven.exists(): self.__raven.refresh() if self.__running and self.__raven.exists(): GET_DEMAND = {'Name': 'get_instantaneous_demand'} self.__raven.write(GET_DEMAND) time.sleep(.1) XMLresponse = self.__raven.read() if (XMLresponse.tag != 'InstantaneousDemand'): data = (-1, -1, -1, self.__run) else: attributes = list(XMLresponse) attribute_list = [3, 4, 5] # XML frag contains hex, convert to decimal data = tuple( int(attributes[i].text, 16) for i in attribute_list) data += (self.__run, ) print(data) self.__db.insertDemandData(data) threading.Timer(7 * SECOND, self.storeRavenData).start() except: print(sys.exc_info(), file=sys.stderr) def start(self): self.__running = True self.storeRavenData() def stop(self): self.__running = False
def getContact(self): contactList = [] sqlHelper = MySQLHelper('101.1.16.46','s559384db0','995y455y','s559384db0'); dbUserList = sqlHelper.queryAll("SELECT fakeId FROM tblUserInfo ") dbFakeIdList = [] for user in dbUserList: dbFakeIdList.append(user["fakeId"]) print dbFakeIdList print self.fakeIdList[0:2] for fakeId in self.fakeIdList: if fakeId not in dbFakeIdList: print fakeId self.pushWelcomeMsg(fakeId) userContact = self.getUserInfo(fakeId) encoding = chardet.detect(userContact)['encoding'] if userContact != 'utf-8': userContact = userContact.decode(encoding, 'replace').encode('utf-8') contactList.append(userContact) for value in contactList: insertValue = eval(value) del insertValue["Groups"] sqlHelper.insert("tblUserInfo", insertValue)
def NovelCrawl_Main(count): url = r'http://www.yousuu.com/booklist' proxies = fetchproxies() for i in range(count): try: header = get_header() proxy = random.choice(proxies) url = CrawlNovelList(url, header, proxy) except Exception as e: print 'error' print e.message pass sql = 'select distinct novellisturl from pagenovel ' a = MySQLHelper() NovelListUrl = a.SqlFecthAll(sql) a.CloseCon() print len(NovelListUrl) #proxies = fetchproxies() for i in NovelListUrl: try: header = get_header() proxy = random.choice(proxies) print i[0] CrawlNovel(i[0], header, proxy) except Exception as e: print 'error' print e.message pass a = MySQLHelper() sql = 'select distinct novelurl from novelurl' NovelUrl = a.SqlFecthAll(sql) a.CloseCon() print len(NovelUrl) #proxies = fetchproxies() for i in NovelUrl: try: header = get_header() proxy = random.choice(proxies) print i[0] CrawlNovelData(i[0], header, proxy) except Exception as e: print 'error' print e.message pass
def insetToDB(): jobInfo = [] telnetBbsContent = TelnetBBSContent([{ "url": "http://www.newsmth.net/bbsdoc.php?board=Career_Campus", "contentUrl": "http://www.newsmth.net/bbscon.php?", "bbsId": 2, "flag":"docWriter" },{ "url": "http://www.lsxk.org/bbsdoc.php?board=Part_timeJob", "contentUrl": "http://www.lsxk.org/bbscon.php?", "bbsId": 6, "flag":"docWriter" }]) telnetBbsContent.getAllContent() BYRbbsContent = BYRBBSParser({ "url": "http://bbs.byr.cn/board/JobInfo?p=", "contentUrl": "http://bbs.byr.cn", "bbsId":4, "selector": { "parseAnchor": ".title_9", "parserTimeStamp": ".title_10" } }) BYRbbsContent.getAllContent() print "-----------------------*******-------------------------------" print "BYR: " + str(BYRbbsContent.finalInfo) print "Telnet: " + str(telnetBbsContent.finalInfo) print "SaveTime: " +datetime.now().strftime("%Y-%m-%d %H:%M:%S") print "-----------------------*******-------------------------------" jobInfo.extend(telnetBbsContent.finalInfo) jobInfo.extend(BYRbbsContent.finalInfo) sqlHelper = MySQLHelper('101.1.16.50','s559384db0','995y455y','s559384db0'); for value in jobInfo: print value sqlHelper.insert("tblJobInfo", value)
def getInfoContent(self): self.sqlHelper = MySQLHelper('101.1.16.50','s559384db0','995y455y','s559384db0'); startTimeStamp = datetime.now() + timedelta(hours=-7) endTimeStamp = datetime.now() + timedelta(hours=1) startTime = datetime(startTimeStamp.year, startTimeStamp.month, startTimeStamp.day, startTimeStamp.hour).strftime("%Y-%m-%d %H:%M:%S") endTime = datetime(endTimeStamp.year, endTimeStamp.month, endTimeStamp.day, endTimeStamp.hour).strftime("%Y-%m-%d %H:%M:%S") timeCondition = "saveTime between '" + startTime + "' and '" + endTime + "'" sqlStr = "select * from tblJobInfo where " + timeCondition + " order by id desc limit 24" print sqlStr temptInfoList = self.sqlHelper.queryAll(sqlStr) if len(temptInfoList) == 0: self.isNotSendMsg = True return resultContent = "" for index, value in enumerate(temptInfoList): if index % 2 == 1: contentUrl = "http://campus4me.net/showMsgContent.php?msgId=" + value["id"] resultContent += "名称:" + value["title"] + "<a href=\"" + contentUrl +"\">[查看详情]</a>\n\n" self.resultContent = resultContent + "亲,记得回复‘h’显示更多帮助哦!!" print self.resultContent
class DataDaemon: """ #Multithreading data gathering. Single-threaded for now, issues with multithreading. """ def __init__(self, run): self.__run = run self.__hash = str(uuid.uuid4()) self.__raven = Raven() self.__db = MySQLHelper() def _begin(self): self.__db.insertRunInfo([self.__hash,self.__run]) def run(self): self._begin() FAKE_DATA = (-1,-1,-1,self.__run, self.__hash) while True: data=FAKE_DATA try: if not self.__raven.exists(): print("searching...") self.__raven.refresh() if self.__raven.exists(): GET_DEMAND = {'Name':'get_instantaneous_demand'} self.__raven.write(GET_DEMAND) time.sleep(.1) XMLresponse = self.__raven.read() if (XMLresponse.tag == 'InstantaneousDemand'): attributes = list(XMLresponse) attribute_list = [3,4,5] # XML frag contains hex, convert to decimal data = tuple(int(attributes[i].text, 16) for i in attribute_list) data += (self.__run,self.__hash) print(data) self.__db.insertDemandData(data) except: self.__raven._raven = None self.__raven.refresh() data = FAKE_DATA self.__db.insertDemandData(data) print(sys.exc_info(), file=sys.stderr) time.sleep(8)
def __init__(self, run): self.__run = run self.__hash = str(uuid.uuid4()) self.__raven = Raven() self.__db = MySQLHelper()
#cursor.execute("drop index meta_isp on infringingP2P_Tmp") cursor.execute("drop index meta_isp on allP2P_Tmp") pre_DASHBOARD.close() print "truncate finished!" '''tracker2 = MySQLHelper(tracker2_host,tracker2_user,tracker2_pass,tracker2_db) print datetime.datetime.now() infP2P_Tmp_select = """SELECT a.meta_id, a.isp_id, c.key_id AS protocol_hash, count(a.IP) AS infringingIPs FROM tracker2.takedownNoticeItemP2PDetail AS a, tracker2.matchedVideoP2PItem AS b, tracker2.matchedVideo AS c WHERE a.company_id = 14 AND b.company_id = 14 AND c.company_id = 14 AND a.matchedVideoP2PItem_id = b.id AND b.matchedVideo_id = c.id AND a.first_notice_send_time >= DATE_SUB('%s 08:00:00', INTERVAL +1 DAY) AND a.first_notice_send_time < DATE_SUB('%s 08:00:00', INTERVAL 0 DAY) GROUP BY a.meta_id, a.isp_id, c.key_id""" %(ETL_DTE,ETL_DTE) print infP2P_Tmp_select infP2P_result = tracker2.query_sql_cmd(infP2P_Tmp_select) print "inf fetched"''' retryVIADASHBOARD = 0 while retryVIADASHBOARD < 10: try: print "retryVIADASHBOARD:" print retryVIADASHBOARD VIACOM_DASHBOARD = MySQLHelper(DASHBOARD_host, DASHBOARD_user, DASHBOARD_pass, DASHBOARD_db) break except Exception, e: print e retryVIADASHBOARD += 1 '''infP2P_Tmp_insert = "insert into infringingP2P_Tmp " + " values " + str(infP2P_result)[1:-1].replace("L", "").replace("u","") VIACOM_DASHBOARD.insert_sql_cmd(infP2P_Tmp_insert) print datetime.datetime.now()''' retryTime_ware = 0 while retryTime_ware < 10: try: print "retryTime_ware:" print retryTime_ware p2pwarehouse = MySQLHelper(p2pwarehouse_host, p2pwarehouse_user, p2pwarehouse_pass, p2pwarehouse_db)
break except Exception,e: print e retryTime += 1 cursor.execute("truncate table allP2P_Tmp_150304;drop index meta_isp on allP2P_Tmp_150304") cursor.close() #DASHBOARD.commit() print "truncate finished!" retryTime_ware = 0 while retryTime_ware < 10: try: print "retryTime_ware:" print retryTime_ware p2pwarehouse = MySQLHelper(p2pwarehouse_host, p2pwarehouse_user, p2pwarehouse_pass, p2pwarehouse_db) break except Exception,e: print e retryTime_ware += 1 #sql_set_time_zone = """set time_zone = '-8:00';""" #db1.query_sql_cmd(sql_set_time_zone) #print sql_set_time_zone allP2P_Tmp_select = """SELECT a.trackingMeta_id AS meta_id, a.isp_id, a.protocol_hash, count(a.peer_ip_address) AS allIPs FROM infringmentSummary20150304 AS a WHERE a.created_at >= DATE_SUB('%s 08:00:00', INTERVAL +1 DAY) AND a.created_at < DATE_SUB('%s 08:00:00', INTERVAL 0 DAY) GROUP BY a.trackingMeta_id, a.isp_id, a.protocol_hash;""" %(ETL_DTE,ETL_DTE) print allP2P_Tmp_select allP2P_result = p2pwarehouse.query_sql_cmd(allP2P_Tmp_select) print "all fetched" #print len(Res) allP2P_Tmp_insert = "insert into allP2P_Tmp_150304 " + " values " + str(allP2P_result)[1:-1].replace("L", "").replace("u","") retryVIADASHBOARD = 0
print e retryTime += 1 cursor.execute("truncate table infringingP2P_Tmp") #cursor.execute("truncate table allP2P_Tmp") cursor.execute("drop index meta_isp on infringingP2P_Tmp") #cursor.execute("drop index meta_isp on allP2P_Tmp") pre_DASHBOARD.close() print "truncate finished!" retrytracker2 = 0 while retrytracker2 < 10: try: print "retrytracker2:" print retrytracker2 tracker2 = MySQLHelper(tracker2_host,tracker2_user,tracker2_pass,tracker2_db) break except Exception,e: print e retrytracker2 += 1 #tracker2 = MySQLHelper(tracker2_host,tracker2_user,tracker2_pass,tracker2_db) print datetime.datetime.now() infP2P_Tmp_select = """SELECT a.meta_id, a.isp_id, c.key_id AS protocol_hash, count(a.IP) AS infringingIPs FROM tracker2.takedownNoticeItemP2PDetail AS a, tracker2.matchedVideoP2PItem AS b, tracker2.matchedVideo AS c WHERE a.company_id = 14 AND b.company_id = 14 AND c.company_id = 14 AND a.matchedVideoP2PItem_id = b.id AND b.matchedVideo_id = c.id AND a.first_notice_send_time >= DATE_SUB('%s 08:00:00', INTERVAL +1 DAY) AND a.first_notice_send_time < DATE_SUB('%s 08:00:00', INTERVAL 0 DAY) GROUP BY a.meta_id, a.isp_id, c.key_id""" %(ETL_DTE,ETL_DTE) print infP2P_Tmp_select infP2P_result = tracker2.query_sql_cmd(infP2P_Tmp_select) print "inf fetched" retryVIADASHBOARD = 0 while retryVIADASHBOARD < 10: try: print "retryVIADASHBOARD:" print retryVIADASHBOARD
except Exception, e: print e retryTime += 1 cursor.execute("truncate table infringingP2P_Tmp") #cursor.execute("truncate table allP2P_Tmp") cursor.execute("drop index meta_isp on infringingP2P_Tmp") #cursor.execute("drop index meta_isp on allP2P_Tmp") pre_DASHBOARD.close() print "truncate finished!" retrytracker2 = 0 while retrytracker2 < 10: try: print "retrytracker2:" print retrytracker2 tracker2 = MySQLHelper(tracker2_host, tracker2_user, tracker2_pass, tracker2_db) break except Exception, e: print e retrytracker2 += 1 #tracker2 = MySQLHelper(tracker2_host,tracker2_user,tracker2_pass,tracker2_db) print datetime.datetime.now() infP2P_Tmp_select = """SELECT a.meta_id, a.isp_id, c.key_id AS protocol_hash, count(a.IP) AS infringingIPs FROM tracker2.takedownNoticeItemP2PDetail AS a, tracker2.matchedVideoP2PItem AS b, tracker2.matchedVideo AS c WHERE a.company_id = 14 AND b.company_id = 14 AND c.company_id = 14 AND a.matchedVideoP2PItem_id = b.id AND b.matchedVideo_id = c.id AND a.first_notice_send_time >= DATE_SUB('%s 08:00:00', INTERVAL +1 DAY) AND a.first_notice_send_time < DATE_SUB('%s 08:00:00', INTERVAL 0 DAY) GROUP BY a.meta_id, a.isp_id, c.key_id""" % ( ETL_DTE, ETL_DTE) print infP2P_Tmp_select infP2P_result = tracker2.query_sql_cmd(infP2P_Tmp_select) print "inf fetched" retryVIADASHBOARD = 0 while retryVIADASHBOARD < 10: try: print "retryVIADASHBOARD:"
#coding= utf-8 import re import time import cx_Oracle from MySQLHelper import MySQLHelper a = MySQLHelper() sql = 'select * from fun.noveldata order by rundate desc ' rawdata = a.SqlFecthAll(sql) a.CloseCon() conn = cx_Oracle.connect('system/Syy19930119@localhost:1521/orcl') cursor = conn.cursor() try: cursor.execute('drop table SYY_NOVELDATA') except: pass sql = """ CREATE TABLE system.SYY_NOVELDATA (\ RUNDATE DATE\ ,NOVELURL VARCHAR2(250)\ ,NOVELNAME VARCHAR2(250)\ ,NOVEL_ORGURL VARCHAR2(250)\ ,TAG_CATEGORY VARCHAR2(250)\ ,AUTHOR VARCHAR2(250)\ ,NOVELRANK VARCHAR2(250)\ ,WORLDCOUNT VARCHAR2(250)\ ,SECTIONCOUNT VARCHAR2(250)\
class AutoSendMsg(): def __init__(self, userName, pwd): self.userName = userName self.pwd = pwd self.isNotSendMsg = False self.msgUrl ='https://mp.weixin.qq.com/cgi-bin/singlesend?t=ajax-response&lang=zh_CN' def getRequestInfo(self): wecharInfo = WechartToken(self.userName, self.pwd) self.headers = { 'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8', 'Referer':'https://mp.weixin.qq.com/cgi-bin/singlemsgpage', 'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.63 Safari/537.31', 'X-Requested-With':'XMLHttpRequest' } self.cookies = dict(wecharInfo.cookie) self.token = wecharInfo.token def getInfoContent(self): self.sqlHelper = MySQLHelper('101.1.16.50','s559384db0','995y455y','s559384db0'); startTimeStamp = datetime.now() + timedelta(hours=-7) endTimeStamp = datetime.now() + timedelta(hours=1) startTime = datetime(startTimeStamp.year, startTimeStamp.month, startTimeStamp.day, startTimeStamp.hour).strftime("%Y-%m-%d %H:%M:%S") endTime = datetime(endTimeStamp.year, endTimeStamp.month, endTimeStamp.day, endTimeStamp.hour).strftime("%Y-%m-%d %H:%M:%S") timeCondition = "saveTime between '" + startTime + "' and '" + endTime + "'" sqlStr = "select * from tblJobInfo where " + timeCondition + " order by id desc limit 24" print sqlStr temptInfoList = self.sqlHelper.queryAll(sqlStr) if len(temptInfoList) == 0: self.isNotSendMsg = True return resultContent = "" for index, value in enumerate(temptInfoList): if index % 2 == 1: contentUrl = "http://campus4me.net/showMsgContent.php?msgId=" + value["id"] resultContent += "名称:" + value["title"] + "<a href=\"" + contentUrl +"\">[查看详情]</a>\n\n" self.resultContent = resultContent + "亲,记得回复‘h’显示更多帮助哦!!" print self.resultContent def sendMsg(self): sqlUserFakeId = "select fakeId from tblUserInfo" self.getInfoContent() print "isNotSendMsg: " + str(self.isNotSendMsg) if self.isNotSendMsg == True: return self.getRequestInfo() fakeIdList = self.sqlHelper.queryAll(sqlUserFakeId) sendMsg = { 'type':1, 'error':False, 'tofakeid':'', 'token':self.token, 'content': self.resultContent, 'ajax':1 } print sendMsg sendedUser = [] hasSendedUser = [] print len(hasSendedUser) for index, infoItem in enumerate(fakeIdList): if infoItem["fakeId"] != "31164395" and infoItem["fakeId"] not in hasSendedUser: #if infoItem["fakeId"] == "520181815": sendMsg.update({'tofakeid':infoItem["fakeId"]}) sendedUser.append(infoItem["fakeId"]) print sendedUser responseMsg = requests.post(self.msgUrl, data = sendMsg, headers = self.headers,cookies = self.cookies); print responseMsg.content print sendedUser
def __init__(self, run): self.__run = run self.__raven = Raven() self.__db = MySQLHelper() self.__running = False