def craw_by_date(date): try: client = Client() client.init( '0a10b42bf3c6488a9d2e97a8e5ba839633c0791dc130569b64de680209fe60ae') slice_cnt = 200 ids = [ i.strip() for i in open("/data/zhli7/news/id_{}.txt".format(date), 'r').readlines() ] c = 0 with open("news_{}.csv".format(date), 'w') as f: while c < len(ids) / slice_cnt: id_str = ','.join(ids[slice_cnt * c:slice_cnt * (c + 1)]) url2 = '/api/subject/getNewsBody.json?field=&newsID=' + id_str code, result = client.getData(url2) if (code == 200): json_result = json.loads(result, encoding='utf8') for value in json_result['data']: f.write('$*$'.join([ str(value['newsID']), value['newsBody'], value['newsURL'] ]) + "\n") f.write(json.dumps(json_result)) #f.write(result+"\n") break else: print(code) print(result) c += 1 except Exception as e: #traceback.print_exc() raise e
def GetMktConFutdJY(secID, exchangeCD, start_time, end_time): try: client = Client() client.init( 'c17b9229eaa7df5c1693a25d7225353b47918cbc222b7b9e693de9861f5c8ded') url = '/api/finFutu/getMktConFutdJY.json?FIELD=&secID=' + secID + '&secIDSrc=&startDate=' + start_time + '&endDate=' + end_time + '&beginTime=&endTime=' code, result = client.getData(url) if code == 200: file_name = "./data/" + exchangeCD + "/" + secID + "/" + start_time + "_" + end_time + ".txt" WriteFile(file_name, result) else: print code print result except Exception, e: raise e
def getequclient(ticker, secID, type): try: client = Client() client.init( 'ae8820c8eb8ccd418dd8141b4c685d2d208c58a564a9fd2c22f8c95ac6a2ef23') url1 = '/api/equity/getEqu.json?field=&listStatusCD=&secID=' + secID + '&ticker=' + ticker + '&equTypeCD=' + type # ticker = 000001 # secID 000001.XSHE # listStatusCD A # equTypeCD rawresult = client.getData(url1) # rawresult is a 2 elements tuple (200, '{"retCode":1,"retMsg":"Success","data":[{"secID":"000002.XSHE","ticker":"000002","exchangeCD":"XSHE","ListSectorCD":1,"ListSector":"\xe4\xb8\xbb\xe6\x9d\xbf","transCurrCD":"CNY","secShortName":"\xe4\xb8\x87\xe7\xa7\x91A","secFullName":"\xe4\xb8\x87\xe7\xa7\x91\xe4\xbc\x81\xe4\xb8\x9a\xe8\x82\xa1\xe4\xbb\xbd\xe6\x9c\x89\xe9\x99\x90\xe5\x85\xac\xe5\x8f\xb8","listStatusCD":"L","listDate":"1991-01-29","equTypeCD":"A","equType":"\xe6\xb2\xaa\xe6\xb7\xb1A\xe8\x82\xa1","exCountryCD":"CHN","partyID":3,"totalShares":11039152000,"nonrestFloatShares":11024120600,"nonrestfloatA":9709165100,"officeAddr":"\xe5\xb9\xbf\xe4\xb8\x9c\xe7\x9c\x81\xe6\xb7\xb1\xe5\x9c\xb3\xe5\xb8\x82\xe7\x9b\x90\xe7\x94\xb0\xe5\x8c\xba\xe5\xa4\xa7\xe6\xa2\x85\xe6\xb2\x99\xe7\x8e\xaf\xe6\xa2\x85\xe8\xb7\xaf33\xe5\x8f\xb7\xe4\xb8\x87\xe7\xa7\x91\xe4\xb8\xad\xe5\xbf\x83","primeOperating":"\xe6\x88\xbf\xe5\x9c\xb0\xe4\xba\xa7\xe4\xb8\x9a\xe5\x8a\xa1\xe5\x8f\x8a\xe6\x8a\x95\xe8\xb5\x84\xe9\x9b\xb6\xe5\x94\xae\xe4\xb8\x9a\xe5\x8a\xa1\xe3\x80\x82","endDate":"2017-06-30","TShEquity":161157756356.52}]}') # first element is 200 # second element is a string # if code==200: data = json.loads(rawresult[1]) # Convert tuple's second element the string to a dictionary, useful information is after 'data': # data['data'] is a one element list, so access the content through data['data'][0] for item in data['data'][0]: print item, data['data'][0][item] # primeOperating # ListSectorCD # exchangeCD # secID # secFullName # nonrestFloatShares # endDate # officeAddr # listDate # secShortName # TShEquity # equType # nonrestfloatA # listStatusCD # ListSector # partyID # totalShares # transCurrCD # exCountryCD # ticker # equTypeCD # print type(result) <type 'tuple'> # print type(data) <type 'dict'> # print type(data['data']) <type 'list'> # print type(data['data'][0])<type 'dict'> except Exception, e: print 'error'
def GetFutuCfCCXE(exchange): try: client = Client() client.init( 'c17b9229eaa7df5c1693a25d7225353b47918cbc222b7b9e693de9861f5c8ded') url = '/api/future/getFutuCfCCXE.json?field=&secID=&ticker=&exchangeCD=' + exchange + '&varUniCode=&contractStatus=DE' code, result = client.getData(url) if code == 200: list = ParserFutu(result) return list else: print code print result return None except Exception, e: raise e return None
def fetch(stock): try: client = Client() client.init('b3914afefef661cda6be2a6f897ce2676bd0596bb98a62c2afc15ffafd0836aa') url1='/api/equity/getEquIndustry.json?field=&industryVersionCD=010303&industry=&secID=' + stock code, result = client.getData(url1) if code==200: jsonObj = json.loads(result) if jsonObj['retCode'] == 1: df = pd.read_json(json.dumps(jsonObj['data'])) df = df[df["isNew"] == 1] df = df.set_index(["secID"]) return df["industryName1"] else: print code print result except Exception, e: #traceback.print_exc() print "Error:" + stock
def fetch(stock): try: client = Client() client.init( 'b3914afefef661cda6be2a6f897ce2676bd0596bb98a62c2afc15ffafd0836aa') url1 = '/api/equity/getEquIndustry.json?field=&industryVersionCD=010303&industry=&secID=' + stock code, result = client.getData(url1) if code == 200: jsonObj = json.loads(result) if jsonObj['retCode'] == 1: df = pd.read_json(json.dumps(jsonObj['data'])) df = df[df["isNew"] == 1] df = df.set_index(["secID"]) return df["industryName1"] else: print code print result except Exception, e: #traceback.print_exc() print "Error:" + stock
def craw_by_code(code): target_file = "/data/zhli7/company_news/news_{}.csv".format(code) if os.path.isfile(target_file): print(target_file, ' exsits, skip it') return 0 try: client = Client() #client.init('13acded54e0e47e4e440cb3fe42fdbff1113e6dcb6a27d601ee128f28ad91192') client.init( '97b197e1451722213caa5ff4a16d51dd868188e1d0d363bf673de596e7201297') slice_cnt = 300 ids = [ i.strip() for i in open( "/data/zhli7/code/code_{}.txt".format(code), 'r').readlines() ] c = 0 with open(target_file, 'w') as f: while c < len(ids) / slice_cnt: id_str = ','.join(ids[slice_cnt * c:slice_cnt * (c + 1)]) url2 = '/api/subject/getNewsBody.json?field=&newsID=' + id_str status, result = client.getData(url2) if (status == 200): json_result = json.loads(result, encoding='utf8') for value in json_result['data']: f.write('$*$'.join([ str(value['newsID']), value['newsBody'].replace( "\n", " "), value['newsURL'] ]) + "\n") f.write(json.dumps(json_result)) #f.write(result+"\n") else: print(status) print(result) c += 1 except Exception as e: #traceback.print_exc() raise e
last_update_date = None today = datetime.datetime.now().strftime("%Y-%m-%d") try: last_update_date = db.Get("update_time") except Exception, e: print e if read_cache and today == last_update_date: df = pd.read_csv("options_info.csv",dtype={"optID":str}) df = df.set_index(["optID"]) return df; global wmclient url1='/api/options/getOpt.json?field=&secID=&optID=&ticker=&varSecID=510050.XSHG&varticker=&contractStatus=' code, result = wmclient.getData(url1) if code==200: jsonObj = json.loads(result) if jsonObj['retCode'] == 1: data_bag = json.loads(json.dumps(jsonObj['data'])) df = pd.DataFrame(data_bag) df = df.set_index(["optID"]) df.to_csv("options_info.csv") db.Put("update_time",today) return df else: print(jsonObj) else: print(code) print(result)
from pandas import DataFrame from dataapiclient import Client import json client = Client() client.init('cae5c4acc4ad4ccb93a8aaac4b8adb04363feaa9852c34d14ddd2248613b09b3') url = '/api/equity/getEqu.json?field=ticker,secShortName,listDate,delistDate&listStatusCD=L,S,DE,UN&secID=&ticker=&equTypeCD=A' code, result = client.getData(url) j = json.loads(result.decode()) d = DataFrame(j['data']) d = d.set_index('ticker') d = d[['secShortName', 'listDate', 'delistDate']] d.to_csv('data/ticker_and _day_of_(de)list_date.csv')
# file_object = open('themes.csv', 'w') # file_object.write(result) # file_object.close() # else: # print code # print result # # url3='/api/market/getMktEqud.csv?field=&beginDate=&endDate=&secID=&ticker=&tradeDate=20160328' # code, result = client.getData(url3) # if code==200: # file_object = open('mktequd20160328.csv', 'w') # file_object.write(result) # file_object.close() # else: # print code # print result url4='/api/market/getBarRTIntraDay.json?securityID=600050.XSHG&startTime=&endTime=&unit=1' # url4='/api/market/getBarRTIntraDayOneMinute.csv?time=11:20&exchangeCD=&unit=1' code, result = client.getData(url4) if code==200: file_object = open('oneminute20160330.csv', 'w') file_object.write(result) file_object.close() else: print code print result except Exception, e: traceback.print_exc() raise e
class Download(object): """docstring for Download""" def __init__(self): super(Download, self).__init__() token = 'b6a5eded39d16731278aa4646ec96128304f0ccab0850bbdacebcdcada4dbb7f' self.client = Client() self.client.init(token) self.cfg = StockConfig() # self.name = 'Smaug' self.name = 'Hobbit' # 下载历史数据 # eg:stocks = [600519, ..., 159915] def downHistory(self, stocks, stockType=0): date = '20151201' if self.name == 'Smaug': date = '20050101' urls = [ # '/api/market/getMktEqud.json?field=&beginDate=20050101&endDate=&secID=&ticker=%s&isOpen=1', # '/api/market/getMktFundd.json?field=&beginDate=20050101&endDate=&secID=&ticker=%s&tradeDate=', # '/api/market/getMktIdxd.json?field=&beginDate=20050101&endDate=&indexID=&ticker=%s&tradeDate=', '/api/market/getMktEqud.json?field=&beginDate=%s&endDate=&secID=&ticker=%s&isOpen=1', '/api/market/getMktFundd.json?field=&beginDate=%s&endDate=&secID=&ticker=%s&tradeDate=', '/api/market/getMktIdxd.json?field=&beginDate=%s&endDate=&indexID=&ticker=%s&tradeDate=', ] newStockTypes = [] for _stock in stocks: stockType = self.getTypeByCode(_stock) url = urls[stockType] % (date, self.initCode(_stock)) # print url result = self.down(url) if result['retCode'] == -1: for x in xrange(1, len(urls)): stockType = x url = urls[stockType] % (date, self.initCode(_stock)) result = self.down(url) if result['retCode'] == 1: newStockTypes.append({_stock: stockType}) break # print result['data'] # print result content = '' if stockType == SD.IDX: content = self.handleIdxAsStock(result['data']) else: content = self.handleAsStock(result['data']) # if _stock == 'SH600036': # print content # return self.save('%s/%s.txt' % (self.name, _stock), content) self.updateType(newStockTypes) # 序列化股票代码 # SH6000000 --> 6000000 def initCode(self, code): code = str(code) if len(code) == 8: return code[2:] elif len(code) == 6: return code raise 'Change Code Error : ' + code # 根据证券代码返回股票类型 # -1:无记录;0:股票;1:基金;2:指数 def getTypeByCode(self, code): result = self.cfg.getTypeByCode(code) return result if not result == -1 else 0 def updateType(self, array): self.cfg.update(array) # 根据url下载内容 def down(self, url): self.url = url # print url code, result = self.client.getData(url) if code == 200: return eval(result) else: print code print result # 将数据处理成股票数据格式 # 前复权 def handleAsStock(self, data): result = '' for _data in data: # print _data accumAdjFactor = _data['accumAdjFactor'] # 前复权因子 date = self.formatDate(_data['tradeDate']) openPrice = round(_data['openPrice'] * accumAdjFactor, 3) highestPrice = round(_data['highestPrice'] * accumAdjFactor, 3) lowestPrice = round(_data['lowestPrice'] * accumAdjFactor, 3) closePrice = round(_data['closePrice'] * accumAdjFactor, 3) turnoverVol = round(_data['turnoverVol'], 2) # 成交量 turnoverValue = round(_data['turnoverValue'], 2) # 成交金额 result = result + '%s,%s,%s,%s,%s,%s,%s\n' % ( date, openPrice, highestPrice, lowestPrice, closePrice, turnoverVol, turnoverValue) return result # 将指数的数据处理成股票数据格式 def handleIdxAsStock(self, data): result = '' for _data in data: try: date = self.formatDate(_data['tradeDate']) openPrice = round(_data['openIndex'], 3) highestPrice = round(_data['highestIndex'], 3) lowestPrice = round(_data['lowestIndex'], 3) closePrice = round(_data['closeIndex'], 3) turnoverVol = round(_data['turnoverVol'], 2) # 成交量 turnoverValue = round(_data['turnoverValue'], 2) # 成交金额 result = result + '%s,%s,%s,%s,%s,%s,%s\n' % ( date, openPrice, highestPrice, lowestPrice, closePrice, turnoverVol, turnoverValue) except Exception, e: print self.url print _data raise e return result