from databaseUtil import DatabaseUtil from sessionUtil import SessionUtil from logUtil import LogUtil from dictUtil import DictUtil import traceback import json def handleData(returnStr): jsonData=json.loads(returnStr) dateList=jsonData.get('date') dataList=jsonData.get('data1') return dateList,dataList def storeData(dateStr,dataStr,conn,cur,wdzjPlatId): sql='insert into platIncomeRate (date,incomeRate,wdzjPlatId) values ("'+dateStr+'","'+dataStr+'","'+wdzjPlatId+'")' cur.execute(sql) conn.commit() conn,cur=DatabaseUtil().getConn() session=SessionUtil() logUtil=LogUtil("platIncomeRate.log") cur.execute('select wdzjPlatId from platData where month="2017-06"') data=cur.fetchall() print(data) mylist=list() print(data) for i in range(0,len(data)): platId=str(data[i].get('wdzjPlatId')) mylist.append(platId) print mylist for i in mylist: url='http://shuju.wdzj.com/plat-info-target.html'
planList=jsonData.get('data').get('plans') return planList def storeData(jsonOne,conn,cur,logUtil,loanId): amount=jsonOne.get('amount') earnInterest=jsonOne.get('earnInterest') expectedYearRate=jsonOne.get('expectedYearRate') fundsuserRate=jsonOne.get('fundsUseRate') planId=jsonOne.get('id') name=jsonOne.get('name') status=jsonOne.get('status') subpointCountActual=jsonOne.get('subpointCountActual') sql='insert into RRDXinPlanList (amount,earnInterest,expectedYearRate,fundsUseRate,planId,name,status,subpointCountActual) values ("'+amount+'","'+earnInterest+'","'+expectedYearRate+'","'+fundsuserRate+'","'+planId+'","'+name+'","'+status+'","'+subpointCountActual+'")' print(sql) logUtil.warning(loanId) cur.execute(sql) conn.commit() session=SessionUtil() conn,cur=DatabaseUtil().getConn() logUtil=LogUtil("uplanList.log") for i in range(1,73): url='https://www.renrendai.com/autoinvestplan/listPlan!listPlanJson.action?pageIndex='+str(i)+'&_='+str(int(time.time())) try: planList=handleData(session.getReq(url)) for j in range(len(planList)): dictObject=DictUtil(planList[j]) storeData(dictObject,conn,cur,logUtil,str(i)) except Exception,e: logUtil.warning(traceback.print_exc()) cur.close() conn.close()
if choice == "A" or choice == "a": objBluetooth = BluetoothID() print("Performing Inquiry") #Looking for nearby devices nearby_devices = discover_devices(lookup_names = True) print ("found %d devices" %len(nearby_devices)) #Print out all the nearby devices for name in nearby_devices: print( "%s -%s" % (name)) #Fetching the mac address registered in the database obj = DatabaseUtil() phone = obj.getDeviceID("Altis") #split tuple so we get just the mac addr (addr, ownder) = name objBluetooth.Verify(addr, phone) elif choice == "B" or choice == "b": """ Main entry point of the program """ obj = QRCodeScanner() arguments = obj.argumentParser() # initialize the camera
def __init__(self): # 初始化一个数据库对象 self.dbUtil = DatabaseUtil()
class NewsCrawler(): def __init__(self): # 初始化一个数据库对象 self.dbUtil = DatabaseUtil() # 根据新闻的 url,获取新闻的具体内容 def getNewsContent(self, url: str): content = [] req = urllib.request.Request(url=url) response = urllib.request.urlopen(req, timeout=5) try: response_read = response.read().decode('utf-8') except: response_read = response.read().decode('gbk') soup = BeautifulSoup(response_read, 'lxml') article = soup.select_one('div.article') if not article: article = soup.select_one('div#artibody') paras = article.select('p') if paras: for para in paras: line = para.text.strip() if len(line) >= 1: content.append(line) return content # Python3 支持的指定参数形式 def parseNewsDetail(self, r: dict): entity = {} # 新闻标题 entity['title'] = r['title'] print(entity['title']) # 新闻的 url,进一步获取新闻的具体内容,从而生成关键字、摘要 entity['url'] = r['url'] print(entity['url']) # 新闻的时间 timeInt = int(r['time']) entity['time'] = timeInt # 新闻的具体文本内容 content = self.getNewsContent(entity['url']) contentConcat = '' if len(content) >= 1: contentConcat = '\n'.join(content) entity['content'] = contentConcat # 使用 SnowNLP 提供的生成关键词、提取摘要的接口 # 具体算法是 TextRank,毕业论文需要具体介绍这种方法 if contentConcat: s = SnowNLP(contentConcat) # 生成关键词,因为有的时候关键词是一个单字(常用字),所以只保留长度大于等于 2 的 keywords10 = s.keywords(10) keywords = [ x for x in keywords10 if (len(x) > 1 and self.validKeywords(x)) ] keywordsSQL = '|'.join(keywords) entity['keywords'] = keywordsSQL # 生成一个三句话的摘要 summary = s.summary(3) summarySQL = '|'.join(summary) entity['summary'] = summarySQL # print(keywordsSQL) # print(summarySQL) # print('='*80) # 写入数据库中 self.dbUtil.insert(entity) def getSinaRollNews(self, startPage, endPage): for i in range(startPage, endPage): print("Page {}".format(i)) params = globalParams.copy() params['page'] = str(i) allHtml = requests.get(newsUrl, params=params, headers=headers) pageHtml = allHtml.content.decode('gbk') pageHtml = pageHtml[pageHtml.index('{'):-1] # 解析获取到的 JSON 格式的新闻列表 data_str = eval(pageHtml, Dummy()) data_str = json.dumps(data_str) data_str = json.loads(data_str) data_str = data_str['list'] for r in data_str: # 对每一个具体的新闻都进行解析,并且写入数据库 self.parseNewsDetail(r) # 稍微暂停一下,避免爬取频率太高 time.sleep(random.random() * 6.0) time.sleep(10) # 有时候得到的关键词是一些标点符号、纯数字等,所以过滤掉 def validKeywords(self, x: str): if ('—' in x) or (':') in x or ('"' in x) or \ (',' in x) or ('。' in x) or (x.isdigit()): return False else: return True
'''Script to be run by cronjob. Goes through each item in database and updates price''' from time import gmtime, strftime from webscraper import * from databaseUtil import DatabaseUtil if __name__ == "__main__": pathname = "/home/tma/host2/Users/tma/cs145new/mysite/mysite/script_log.txt" dbpath = "/home/tma/host2/Users/tma/cs145new/mysite/sqlite3db/mydb.db" localtime = strftime("%Y-%m-%d %H:%M:%S", gmtime()) try: log = open(pathname, "a") myDB = DatabaseUtil(dbpath) # Go through every url in database and check for any changes urls = myDB.selectAllURLs() for url in urls: productName, productId, productPrice, storeName = parseUrl(url[0]) myDB.update(productId, storeName, productPrice) except IOError: print "Error: Missing log file...creating new log file" log = open(pathname, "w") except Exception, e: log.write("ERROR at "+ localtime + "\n") log.write(str(e) + "\n") else: log.write("Successful cronjob at " + localtime + "\n") finally: log.close()