def getDistanceAndTimeByLngLat(origin, destination): url = unicode( 'http://restapi.amap.com/v3/direction/driving?key={0}&origin={1}&destination={2}&strategy=9' ).format(getKey(), origin, destination) res = httpGet(url) jo = json.loads(res) if (int(jo['status']) == 0): writeWarningLog( unicode("【高德】驾车 路径规划失败, origin:{0},destination:{1},原因:{2}").format( origin, destination, jo)) return (None, None) paths = jo['route']['paths'] if (len(paths) <= 0): writeWarningLog( unicode( "【高德】驾车 没有找到相关路径, origin:{0},destination:{1},原因:{2}").format( origin, destination, jo)) return (None, None) distance = unicode("{0}公里").format( round(float(paths[0]['distance']) / 1000, 1)) minutes = int(float(paths[0]['duration']) / 60) hour = int(minutes / 60) minute = int(minutes % 60) time = unicode("{0}分").format(minute) if hour <= 0 else unicode( "{0}小时{1}分").format(hour, minute) return (distance, time)
def getStockInfo(code): bit = 1 if code.startswith("6") else 2 url = 'http://nuff.eastmoney.com/EM_Finance2015TradeInterface/JS.ashx?id=' + code + str( bit) + '&_=149941' + str(random.randint(1000000, 9999999)) res = httpGet(url) res = res[9:-1] writeLog(unicode("getStockInfo, url:{0}, resule:{1} ").format(url, res)) jo = json.loads(res) data = jo['Value'] result = {} result['limitUp'] = data[23] result['limitDown'] = data[24] result['avgPrice'] = data[26] result['volume'] = data[31] result['amount'] = data[35] result['highPrice'] = data[30] result['lowPrice'] = data[32] result['openPrice'] = data[28] result['closePrice'] = data[25] result['changePercent'] = data[29] result['changeAmount'] = data[27] result['turnOverRatio'] = data[37] result['QRR'] = data[36] result['totalValue'] = data[46] result['circulatedValue'] = data[45] result['PE'] = 0 if data[38] == '-' else data[38] result['PTB'] = 0 if data[43] == '-' else data[43] result['internalPan'] = data[40] result['externalPan'] = data[39] result['code'] = code return None if float(result['closePrice']) == 0 else result
def getShAvgPe(): url = "http://www.sse.com.cn/" res = httpGet(url) # print res pattern = re.compile(r'RATIO_OF_PE.*') match = pattern.search(res) matchData = match.group() pattern = re.compile(r'\d+\.\d+') pe = pattern.search(matchData).group() return round(float(pe), 2)
def getListData(code, page): url = "http://nufm.dfcfw.com/EM_Finance2014NumericApplication/JS.aspx?type=CT&cmd=" + code + "&sty=FCOIATA&sortType=I&sortRule=1&page=" + str( page ) + "&pageSize=20&js=var%20quote_123%3d{rank:[(x)],pages:(pc)}&token=7bc05d0d4c3c22ef9fca8c2a912d779c&jsName=quote_123&_g=0.148125620" + str( random.randint(1000000, 9999999)) res = httpGet(url).decode("utf-8") # print res index = res.find("=") if (index < 0): return res = res[14:].replace("rank", "\"rank\"").replace("pages", "\"pages\"") # print res jo = json.loads(res) return jo
def getSzAvgPe(): # sys.setdefaultencoding('gb18030') url = "http://www.szse.cn/" res = httpGet(url) pattern = re.compile(r'<table.*?id=\"REPORTID_tab1\".*?>(.*?)</table>') tables = pattern.findall(res) PEs = [0, 0, 0] # [主板,中小板,创业板] for i in range(1, len(tables)): if tables[i] is not None: pattern = re.compile(r'<td.*?>(.*?)</td>') tdDatas = pattern.findall(tables[i]) PEs[i - 1] = round(float(tdDatas[7]), 2) return PEs
def getPEGByWC(code): url = "http://www.iwencai.com/stockpick/search?ts=1&f=1&qs=stockhome_topbar_click&w=" + code + "%20peg" res = httpGet(url) res = res.decode(getEncoding(res)) pattern = re.compile(r'<table class=\"upright_table\">([\s\S]*)</table>') tableData = pattern.search(res) pattern = re.compile( r'<div class=\"em alignRight alignRight\">([\s\S]*?)</div>') divData = pattern.findall(tableData.group()) pattern = re.compile(r'<a[^>]*?>([\s\S]*?)</a>') pegData = 0 if '--' in divData[0] else pattern.findall(divData[0])[0] return (pegData, divData[1])
def getPE(code): bit = 1 if code.startswith("6") else 2 url = "http://nuff.eastmoney.com/EM_Finance2015TradeInterface/JS.ashx?id=" + code + str( bit) res = httpGet(url).decode("utf-8") res = res[9:-1] jo = json.loads(res) if jo is None or jo["Value"] is None: return info = jo["Value"] name = info[2] pe = info[38] pe = 0 if '-' in pe else pe # print name, pe return (name, pe)
def getHouseListByPage(page): url = "https://tj.lianjia.com/ershoufang/pg" + str(page) + "co32sf1ba45ea10000ep131" res = httpGet(url, {'Host': 'tj.lianjia.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36', 'Cookie': 'lianjia_uuid=dfcb3cfc-1367-44d0-ab71-a4371c024f14; _jzqy=1.1497172963.1497172963.1.jzqsr=baidu|jzqct=%E9%93%BE%E5%AE%B6.-; UM_distinctid=15c96766dee1d-02c0d24031f09b-57e1b3c-100200-15c96766def2d0; lianjia_token=2.001caa2ee5666b6ddd0d0707d40582481c; Hm_lvt_efa595b768cc9dc7d7f9823368e795f1=1497173127; select_city=120000; all-lj=6341ae6e32895385b04aae0cf3d794b0; _jzqx=1.1501484606.1501816686.2.jzqsr=tj%2Elianjia%2Ecom|jzqct=/ershoufang/co32sf1ep131/.jzqsr=captcha%2Elianjia%2Ecom|jzqct=/; _jzqckmp=1; Hm_lvt_9152f8221cb6243a53c83b956842be8a=1501566186,1501632151,1501727320,1501816685; Hm_lpvt_9152f8221cb6243a53c83b956842be8a=1501817428; _smt_uid=593d0be2.3137f8c0; CNZZDATA1253477585=1433138464-1497169384-null%7C1501812237; CNZZDATA1254525948=1005085447-1497168099-null%7C1501812362; CNZZDATA1255633284=2082143268-1497171373-null%7C1501817197; CNZZDATA1255604082=911884105-1497168620-null%7C1501813499; _qzja=1.1430239548.1497172962788.1501727321514.1501816685601.1501817373141.1501817428734.0.0.0.86.20; _qzjb=1.1501816685601.5.0.0.0; _qzjc=1; _qzjto=5.1.0; _jzqa=1.3807096351994737700.1497172963.1501727322.1501816686.20; _jzqc=1; _jzqb=1.5.10.1501816686.1; _ga=GA1.2.457334799.1497172965; _gid=GA1.2.1962824039.1501816690; lianjia_ssid=11e953d7-b086-4fcf-a424-b43314c76cca'}) pattern = re.compile(r'<ul class=\"sellListContent\".*?>(.*?)</ul>') contentUl = pattern.search(res) pattern = re.compile(r'<li[^>]*?>([\s\S]*?)</li>') houseInfoList = pattern.findall(contentUl.group()) pattern = re.compile(r'\"totalPage\":(.*?),') totalPages = pattern.findall(res) totalPages = 0 if len(totalPages) <= 0 else totalPages[0] return (totalPages, houseInfoList)
def getGeoCodes(address): url = unicode( "http://restapi.amap.com/v3/geocode/geo?key={0}&address={1}&batch=true" ).format(getKey(), address) res = httpGet(url) jo = json.loads(res) if (int(jo['status']) == 0): writeWarningLog( unicode("【高德】解析地址失败, address:{0}, 原因:{1}").format(address, jo)) return (None, None) result = [] for i in range(0, len(jo['geocodes'])): geoCodes = jo['geocodes'][i] result.append(geoCodes['location']) return result
def getTechParamter(code): sys.setdefaultencoding('gbk') url = 'http://stock.quote.stockstar.com/tech_' + code + '.shtml' res = httpGet(url) pattern = re.compile(r'<div class=\"listInfo\">([\s\S]*)</table>') tableData = pattern.search(res) pattern = re.compile(r'<td.*?>(.*?)</td>') tdData = pattern.findall(tableData.group()) dictData = {} dictStatus = {} writeLog(unicode("getTech: code: {0}, url:{1}").format(code, url)) for i in range(0, len(tdData) - 2): key = tdData[i].strip() data = tdData[i + 1] dictData[key] = 0 if '--' in data else data dictStatus[key] = unicode(tdData[i + 2]) # print code, dictData stockTechInfo = StockTechInfo(code, dictData) pattern = re.compile(r'<p class=\"lf\">(.*?)</p>') techStatus = pattern.search(res) bulls = 0 bears = 0 notsure = 0 if techStatus is not None: pattern = re.compile(r'\d+') numbers = pattern.findall(techStatus.group()) if numbers is not None: bulls = numbers[1] bears = numbers[2] notsure = numbers[3] stockTechStatus = StockTechStatus(code, dictStatus, bulls, bears, notsure) forecastInfo = getGainForecast(stockTechStatus) return (stockTechInfo, stockTechStatus, forecastInfo)
def getRankPeg(code): url = "http://www.iwencai.com/diag/block-detail?pid=1559&codes=" + code + "&codeType=stock&info=%7B%22view%22%3A%7B%22nolazy%22%3A1%2C%22parseArr%22%3A%7B%22_v%22%3A%22new%22%2C%22dateRange%22%3A%5B%2220170728%22%2C%2220170728%22%5D%2C%22staying%22%3A%5B%5D%2C%22queryCompare%22%3A%5B%5D%2C%22comparesOfIndex%22%3A%5B%5D%7D%2C%22asyncParams%22%3A%7B%22tid%22%3A659%7D%7D%7D" res = httpGet(url) res = res.decode(getEncoding(res)) jo = json.loads(res) if not bool(jo['success']): return ['--', '--', '--'] data = jo["data"]["data"]["tableTempl"] pattern = re.compile(r'<tr[^>]*?>([\s\S]*?)</tr>') trData = pattern.findall(data) columnNames = ['rankNo', 'code', 'name', 'peg'] result = [] for j in range(0, len(trData)): tr = trData[j] if (j > 3): break pattern = re.compile(r'<div.*?>(.*?)</div>') divData = pattern.findall(tr) if len(divData) <= 0: continue dict = {} for i in range(0, len(divData) - 1): if '<a' in divData[i]: pattern = re.compile(r'<a.*?>(.*?)</a>') aData = pattern.findall(divData[i]) dict[columnNames[i]] = aData[0] else: dict[columnNames[i]] = divData[i] result.append( unicode("{0}:{1}:{2}").format(dict.get('code'), dict.get('name'), dict.get('peg'))) for i in range(0, 3 - len(result)): result.append('--') return result
def getStockInfos(code, year): try: url = "http://d.10jqka.com.cn/v2/line/hs_{0}/01/{1}.js".format( code, year) res = httpGet(url).decode("utf-8") index = res.find("(") if (index < 0): writeErrorLog( unicode("解析行情失败: code:{0}, year:{1}, res:{2}").format( code, year, res)) return [] res = res[index + 1:-1] writeLog(unicode("获取股票历史行情: code: {0}, year:{1}").format(code, year)) jo = json.loads(res) dataInfo = jo['data'].split(';') result = {} for item in dataInfo: infos = item.split(',') dic = {} dic['open'] = infos[1] dic['high'] = infos[2] dic['low'] = infos[3] dic['close'] = infos[4] dic['volume'] = infos[5] dic['amount'] = "{0}亿".format(round( float(infos[6]) / 100000000, 1)) result[datetime.strptime(infos[0], '%Y%m%d').strftime('%Y-%m-%d')] = dic return result except Exception, e: writeErrorLog( unicode("解析行情失败: code:{0}, year:{1}, e:{2}").format( code, year, str(e))) if "404" in str(e): return [] else: return None
def getListData(type, page): url = "http://q.10jqka.com.cn/index/index/board/" + type + "/field/syl/order/asc/page/" + str( page) + "/ajax/1/" res = httpGet( url, { "Accept-Encoding": "gzip, deflate, sdch", "Accept-Language": "zh-CN,zh;q=0.8" }) try: res = res.decode(getEncoding(res)) columnNames = [ 'number', 'code', 'name', 'price', 'changepercent', 'changeamount', 'changerate', 'turnoverratio', 'QRR', 'zf', 'amount', 'liutonggu', 'liutongshizhi', 'pe' ] listData = [] pattern = re.compile(r'<tr[^>]*?>([\s\S]*?)</tr>') trData = pattern.findall(res) for tr in trData: pattern = re.compile(r'<td.*?>(.*?)</td>') tdData = pattern.findall(tr) if len(tdData) <= 0: continue dict = {} for i in range(1, len(tdData) - 1): if i <= 2: pattern = re.compile(r'<a.*?>(.*?)</a>') aData = pattern.findall(tdData[i]) dict[columnNames[i]] = aData[0] else: dict[columnNames[i]] = tdData[i] listData.append(dict) return listData except Exception, e: print e, res
def getPEG(code): market = "sh" if code.startswith("6") else "sz" url = "http://emweb.securities.eastmoney.com/PC_HSF10/IndustryAnalysis/IndustryAnalysisAjax?code=" + market + code + "&icode=" + str( random.randint(100, 999)) res = httpGet(url).decode("utf-8") jo = json.loads(res) print jo rate = 0 PEG = 0 if (jo is not None and jo["Result"]["gzbj"] is not None and jo["Result"]["gzbj"]["data"] is not None): if (jo["Result"]["gzbj"]["data"][0]["dm"] != code): writeWarningLog("估值比较的股票代码没找到,实际代码:" + code + ",当前代码:" + jo["Result"]["gzbj"]["data"][0]["dm"]) else: rate = jo["Result"]["gzbj"]["data"][0]["pm"] PEG = jo["Result"]["gzbj"]["data"][0]["peg"] PEG = 0 if '--' in PEG else PEG if (jo is None or jo["Result"]["czxbj"] is None or jo["Result"]["czxbj"]["data"] is None or len(jo["Result"]["czxbj"]["data"]) <= 0): return (rate, PEG, 0, 0, 0, 0) e2017 = 0 e2018 = 0 e2019 = 0 mixThree = 0 if (jo["Result"]["czxbj"]["data"][0]["dm"] != code): writeWarningLog("成长性比较的股票代码没找到,实际代码:" + code + ",当前代码:" + jo["Result"]["czxbj"]["data"][0]["dm"]) else: mixThree = jo["Result"]["czxbj"]["data"][0]["jbmgsyzzlfh"] e2017 = jo["Result"]["czxbj"]["data"][0]["jbmgsyzzl1"] e2018 = jo["Result"]["czxbj"]["data"][0]["jbmgsyzzl2"] e2019 = jo["Result"]["czxbj"]["data"][0]["jbmgsyzzl3"] return (rate, PEG, mixThree, e2017, e2018, e2019)
def getDirectionByLngLat(origin, destination): url = unicode( 'http://restapi.amap.com/v3/direction/transit/integrated?key={0}&origin={1}&destination={2}&strategy=0&city=天津市' ).format(getKey(), origin, destination) res = httpGet(url) try: jo = json.loads(res) if (int(jo['status']) == 0): writeWarningLog( unicode( "【高德】公交 路径规划失败, origin:{0},destination:{1},原因:{2}").format( origin, destination, jo)) return None transits = jo['route']['transits'] if (len(transits) <= 0): writeWarningLog( unicode("【高德】公交规划 没有找到相关路径, origin:{0},destination:{1},原因:{2}" ).format(origin, destination, jo)) return None transit = transits[0] # 找第一条规划路线 segments = transit['segments'] # 路径信息 if (len(segments) <= 0): writeWarningLog( unicode("【高德】公交规划 没有找到路径信息, origin:{0},destination:{1},原因:{2}" ).format(origin, destination, jo)) return None result = [] for i in range(0, len(segments)): segment = segments[i] if i == 0: walkingDistance = round( float(segment['walking']['distance']) / 1000, 1) walkingTime = int(float(segment['walking']['duration']) / 60) result.append( unicode("步行{0}公里:约{1}分钟").format(walkingDistance, walkingTime)) busName = segment['bus']['buslines'][0]['name'] busDepartureStop = segment['bus']['buslines'][0][ 'departure_stop']['name'] busArrivalStop = segment['bus']['buslines'][0]['arrival_stop'][ 'name'] totalStops = len( segment['bus']['buslines'][0]['via_stops']) + 1 result.append( unicode("({0}) {1} => {2} 共{3}站").format( busName, busDepartureStop, busArrivalStop, totalStops)) elif i == len(segments) - 1: if segment['walking'] is not None: if len(segment['walking']) <= 0: continue walkingDistance = round( float(segment['walking']['distance']) / 1000, 1) walkingTime = int( float(segment['walking']['duration']) / 60) result.append( unicode("步行{0}公里:约{1}分钟").format( walkingDistance, walkingTime)) else: busName = segment['bus']['buslines'][0]['name'] busDepartureStop = segment['bus']['buslines'][0][ 'departure_stop']['name'] busArrivalStop = segment['bus']['buslines'][0]['arrival_stop'][ 'name'] totalStops = len( segment['bus']['buslines'][0]['via_stops']) + 1 result.append( unicode("({0}) {1} => {2} 共{3}站").format( busName, busDepartureStop, busArrivalStop, totalStops)) return result except Exception, e: traceback.print_exc() print url print res return None
def handleHouseList(houseInfoList, geoCodeHome, geoCodeHerOffice, geoCodeMyOffice, result, i): for houseInfo in houseInfoList: print houseInfo print i i = i + 1 try: pattern = re.compile(r'<div class=\"houseInfo\">([\s\S]*?)</div>') houseInfoDiv = pattern.findall(houseInfo)[0] pattern = re.compile(r'<a[^>]*?>([\s\S]*?)</a>') village = pattern.findall(houseInfoDiv)[0].strip() # 小区名称 other = re.subn(r'<[span|a][^>]*?>([\s\S]*?)</[span|a]>', '', houseInfoDiv)[0] pattern = re.compile(r'<div class=\"positionInfo\">([\s\S]*?)</div>') positionInfoDiv = pattern.findall(houseInfo)[0] pattern = re.compile(r'<a[^>]*?>([\s\S]*?)</a>') street = pattern.findall(positionInfoDiv)[0].strip() # 街道名称 pattern = re.compile(r'<span class=\"subway\">([\s\S]*?)</span>') subwayInfo = pattern.findall(houseInfo) subwayInfo = '' if len(subwayInfo) <= 0 else subwayInfo[0].strip() # 地铁信息 lineIndex = subwayInfo.find('线') stationIndex = subwayInfo.find('站') subway = '' if (lineIndex >= 0 and stationIndex >= 0): subway = subwayInfo[lineIndex + 3:stationIndex] pattern = re.compile(r'<div class=\"totalPrice\"><span>([\s\S]*?)</span>') totalPrice = pattern.findall(houseInfo)[0] # 总价 pattern = re.compile(r'<div class=\"unitPrice\".*?><span>([\s\S]*?)</span>') unitPrice = pattern.findall(houseInfo)[0][6:-10] # 单价 pattern = re.compile(r'<div class=\"followInfo\"><span.*?></span>([\s\S]*?)</div>') dateInfo = pattern.findall(houseInfo) # 发布时间 dateInfo = dateInfo[0].split('/')[2] pattern = re.compile(r'<a class=\"img \" href=\"(.*?)\"') linkUrl = pattern.findall(houseInfo)[0] linkRes = httpGet(linkUrl, { 'Cookie': 'lianjia_uuid=dfcb3cfc-1367-44d0-ab71-a4371c024f14; _jzqy=1.1497172963.1497172963.1.jzqsr=baidu|jzqct=%E9%93%BE%E5%AE%B6.-; UM_distinctid=15c96766dee1d-02c0d24031f09b-57e1b3c-100200-15c96766def2d0; lianjia_token=2.001caa2ee5666b6ddd0d0707d40582481c; Hm_lvt_efa595b768cc9dc7d7f9823368e795f1=1497173127; select_city=120000; all-lj=6341ae6e32895385b04aae0cf3d794b0; _jzqx=1.1501484606.1501816686.2.jzqsr=tj%2Elianjia%2Ecom|jzqct=/ershoufang/co32sf1ep131/.jzqsr=captcha%2Elianjia%2Ecom|jzqct=/; _jzqckmp=1; Hm_lvt_9152f8221cb6243a53c83b956842be8a=1501566186,1501632151,1501727320,1501816685; Hm_lpvt_9152f8221cb6243a53c83b956842be8a=1501817441; _smt_uid=593d0be2.3137f8c0; CNZZDATA1253477585=1433138464-1497169384-null%7C1501812237; CNZZDATA1254525948=1005085447-1497168099-null%7C1501812362; CNZZDATA1255633284=2082143268-1497171373-null%7C1501817197; CNZZDATA1255604082=911884105-1497168620-null%7C1501813499; _qzja=1.1430239548.1497172962788.1501727321514.1501816685601.1501817428734.1501817441713.0.0.0.87.20; _qzjb=1.1501816685601.6.0.0.0; _qzjc=1; _qzjto=6.1.0; _jzqa=1.3807096351994737700.1497172963.1501727322.1501816686.20; _jzqc=1; _jzqb=1.6.10.1501816686.1; _ga=GA1.2.457334799.1497172965; _gid=GA1.2.1962824039.1501816690; lianjia_ssid=11e953d7-b086-4fcf-a424-b43314c76cca', 'Host': 'tj.lianjia.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'}) pattern = re.compile(r'<div class="areaName">.*?</div>') areaDiv = pattern.search(linkRes) pattern = re.compile(r'<a.*?>(.*?)</a>') aData = pattern.findall(areaDiv.group()) areaData = '' if aData is None or len(aData) <= 0 else aData[0] geoCodeVillage = getGeoCodes(u'天津市{0}'.format(village))[0] # 小区坐标 distanceAndTime = getDistanceAndTimeByLngLat(geoCodeHome, geoCodeVillage) # 路径规划信息,获取距离和时间(自驾) babyDirection = getDirectionByLngLat(geoCodeHerOffice, geoCodeVillage) babyDirectionStr = '\"' for item in babyDirection: babyDirectionStr = babyDirectionStr + unicode('{0}\n').format(item) babyDirectionStr = babyDirectionStr + '\"' meDirection = getDirectionByLngLat(geoCodeMyOffice, geoCodeVillage) meDirectionStr = '\"' for item in meDirection: meDirectionStr = meDirectionStr + unicode('{0}\n').format(item) meDirectionStr = meDirectionStr + '\"' result.append( unicode("{0},{1},{10},{2},{3},{9},{4},{11},{5},{6},{7},{8}\n").format(village, street, totalPrice, unitPrice, distanceAndTime[0], other, subway, babyDirectionStr, meDirectionStr, dateInfo, areaData, distanceAndTime[1]).encode( 'gbk')) saveFile("houselist1.csv", unicode("{0},{1},{10},{2},{3},{9},{4},{11},{5},{6},{7},{8}\n").format(village, street, totalPrice, unitPrice, distanceAndTime[0], other, subway, babyDirectionStr, meDirectionStr, dateInfo, areaData, distanceAndTime[1]).encode( 'gbk'), 'a') except Exception, e: traceback.print_exc() time.sleep(random.randint(2, 5))