def tasks(self, job): print '++++++++++++++++publicFundStockHolder++++++++++++++++++' qd = job.get('quant_data') qd_client = serviced.get_service_client('quant_data.%s' % qd) symbols = qd_client.grep_symbols(job.get('symbols')) for symbol in symbols: params = { 'symbol': public_symbol(symbol) } url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vCI_FundStockHolder/stockid/%(symbol)s.phtml' % params d = { 'url': url, 'target': '%s/%s' % (job['cache_path'], symbol), 'task_id': '%s' % (symbol), 'info': { 'fields': ['symbol', 'deadline', 'fund_name', 'fund_id', 'hold_volume', 'hold_percent', 'hold_value', 'hold_net_worth_percent'], 'symbol': symbol, 'fund_stock_holder': True, 'job_cache_path': job['cache_path'] } } yield d
def tasks(self, job): print '++++++++++++++public++++++++++++++++++++++++' print repr(job) qd = job.get('quant_data') qd = serviced.get_service_client('quant_data.%s' % qd) symbols = qd.grep_symbols(job.get('symbols')) #qd.normalize_symbol() for symbol in symbols: params = { 'symbol': public_symbol(symbol) } url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vCI_CirculateStockHolder/stockid/%(symbol)s.phtml' % params d = { 'url': url, 'target': '%s/%s' % (job['cache_path'], symbol), 'task_id': '%s' % (symbol), 'info': { 'fields': ['deadline', 'order', 'holder_name', 'hold_volume', 'hold_percent', 'stock_property'], 'symbol': symbol } } yield d
def filter(self, request, response): info = request['info'] fields = info['fields'] qd = info['qd'] qd_client = serviced.get_service_client('quant_data.%s' % qd) #qd.normalize_symbol(s) text = response.content table = get_table(text) for tr in table.find('tr'): item = pq(tr) if not item('td'): continue d = { fields[0]: datetime_utils.to_datetime(item('td').eq(0).html()), fields[1]: qd_client.normalize_symbol(item('td').eq(1)('a').html()), fields[2]: item('td').eq(2)('a').html(), fields[3]: float_normalize(item('td').eq(3).html()), fields[4]: volume_normalize(float_normalize(item('td').eq(4).html())), fields[5]: value_normalize(float_normalize(item('td').eq(5).html()),'w'), fields[6]: str_normalize(item('td').eq(6).html()), fields[7]: str_normalize(item('td').eq(7).html()), fields[8]: str_normalize(item('td').eq(8).html()) } yield d
def filter(self, request, response): info = request['info'] fields = info['fields'] if info['is_fund_list_page']: text = response.content #print repr(text) html = pq(text) #print repr(html) div = html('div[class="result_list"]') tbody = div('table')('tbody') #print '+++++++++++++++++++++++++++++++++++++++++++++++' #print repr(tbody) assert tbody for tr in tbody.find('tr'): tr = pq(tr) d = { fields[0]: tr('td').eq(2)('a').html(), fields[1]: tr('td').eq(3)('a').html() } yield {'t1': d} for textarea in div.find('textarea'): textarea = pq(textarea) trs = pq(textarea.html()) for tr in trs.find('tr'): tr = pq(tr) d = { fields[0]: tr('td').eq(2)('a').html(), fields[1]: tr('td').eq(3)('a').html() } yield {'t1': d} else: qd = info['qd'] qd_client = serviced.get_service_client('quant_data.%s' % qd) text = response.content div = pq(text)('div[class="part_g"]') assert div table = div('table').eq(0) assert table #print repr(table) #print table for tr in table.find('tr')[2::]: tr = pq(tr) d = { fields[0]: info['fund_id'], fields[1]: info['time'], fields[2]: num_normalize(tr('td').eq(0).html()), fields[3]: qd_client.normalize_symbol(tr('td').eq(1).html()), fields[4]: tr('td').eq(2).html(), fields[5]: volume_normalize(float_normalize(tr('td').eq(3).html())), fields[6]: value_normalize(float_normalize(tr('td').eq(4).html()),'w'), fields[7]: percent_normalize(float_normalize(tr('td').eq(5).html().replace('%', ''))), fields[8]: volume_normalize(float_normalize(tr('td').eq(6)('span').html())), } yield {'t2': d}
def filter(self, request, response): info = request['info'] fields = info['fields'] time = info['time'] symbol = info['symbol'] qd = info['qd'] qd_client = serviced.get_service_client('quant_data.%s' % qd) text = response.content data_json = json.loads(text) print repr(data_json) if data_json.has_key('data'): if data_json['data']: for item in data_json['data']: d = { 'time': date_normalize(time, '%Y-%m-%d'), 'symbol': qd_client.normalize_symbol(symbol), #'symbol': symbol, fields[0]: item['id'], fields[1]: item['fund_name'], fields[2]: item['fund_short_name'], fields[3]: item['fund_id'], fields[4]: item['company_short_name'], fields[5]: item['advisor_id'], fields[6]: item['mangers_name'], fields[7]: item['managers_id'], fields[8]: volume_normalize(item['holding_num']), fields[9]: percent_normalize(item['holding_ratio']), fields[10]: volume_normalize(item['pre_holding_num']), fields[11]: percent_normalize(item['pre_holding_ratio']), fields[12]: volume_normalize(item['holding_num_change']), fields[13]: percent_normalize(item['holding_ratio_change']), fields[14]: item['max_holding_stock_name'], fields[15]: percent_normalize(item['max_holding_ratio']), fields[16]: qd_client.normalize_symbol(item['from_code']) if item['from_code'] else None, #fields[16]: item['from_code'], fields[17]: item['from_name'], fields[18]: item['company_name'], fields[19]: qd_client.normalize_symbol(item['max_holding_sec_code']), #fields[19]: item['max_holding_sec_code'], fields[20]: volume_normalize(item['max_holding']), } yield d else: yield {} else: yield {}
def tasks(self, job): print '++++++++++++++++++++++++++++++++++++++++++++++' print repr(job) BREAK_POINT() qd = job.get('quant_data') or 'futures' qd = serviced.get_service_client('quant_data.%s' % qd) symbols = qd.grep_symbols(job.get('symbols')) period = job.get('period', 'm1') count = job.get('count', 200) fields = job.get('fields', ['amount', 'volume']) period_map = { 'm1': 'min1', 'm5': 'min5', 'm30': 'min30', 'day': 'day', 'week': 'week', 'month': 'month' } indicator_map = { 'amount': 'AMOUNT', 'volume': 'VOL' } indicators = [indicator_map[f] for f in fields] for symbol in symbols: params = { 'callback': 'X', 'symbol': ytcj_symbol(symbol), 'zq': period_map[period], 'n': count, 'zb': '*'.join(indicators), } #NOTE! url params order is important! url = 'http://www.baring.cn:81/chartdata?callback=%(callback)s&symbol=%(symbol)s&zb=%(zb)s&zq=%(zq)s&n=%(n)s' % params d = { 'url': url, 'target': '%s/%s/%s' % (job['cache_path'], period, symbol), 'task_id': '%s:%s' % (period, symbol), 'info': { 'fields': ['time', 'open', 'high', 'low', 'close'] + fields, 'symbol': symbol } } yield d
def normalize_value(self, key, value): if key=='symbol': #TODO quant_data env switch #NOTE: sorted, is for `order by time,symbol` in realtime pipeline quant_data = serviced.get_service_client('quant_data.stock') return sorted(quant_data.get_symbols(value)) elif key=='period': if value=='*': return 'm1 m5 m30 day week month quarter year'.split() elif key=='date': if is_str(value): d0, d1 = get_date_range(value) return list(date_sequence(d0, end_date=d1, to_str=True)) elif isinstance(value, dict): return list(date_sequence(value.get('start'), end_date=value.get('end'), to_str=True)) return get_list(value)
def execute(self, job): debug = job.get('debug') if job.get('debug'): crawler_server = None else: crawler_server = serviced.get_service_client('quant_crawler') kwargs = { 'crawler_server': crawler_server } self.crawler = create_obj_from_config(job['crawler'], kwargs=kwargs) stream = self.crawler.feed(job) output = job.get('output') if debug or output is None: #start generator for x in stream: print json_ext.dumps(x) return None else: tsdb = self.tsdb() tsdb.create_series(output) tsdb.write_series(output, None, stream, timeout=None)
def tasks(self, job): qd = job.get('quant_data') qd_client = serviced.get_service_client('quant_data.%s' % qd) symbols = qd_client.grep_symbols(job.get('symbols')) params = { 'symbol': '00700', } #the list ['00005', '00857', '00700'] should change by symbols for symbol in ['00005', '00857', '00700']: params['symbol'] = symbol url = 'http://stock.finance.sina.com.cn/hkstock/rights/%(symbol)s.html' % params d = { 'url': url, 'target': '%s/%s' % (job['cache_path'], symbol), 'task_id': '%s' % (symbol), 'info': { 'fields1': ['symbol', 'time', 'holder_Chinese_name', 'holder_name', 'pre_hold_volume', 'pre_hold_percent', 'pre_hold_kind', 'hold_volume', 'hold_percent', 'hold_kind', 'stock_property'], 'fields2': ['symbol', 'time', 'volume', 'highest_price', 'lowest_price', 'value', 'average_price'], 'symbol': symbol } } yield d
def tsdb(self): return serviced.get_service_client('quant_tsdb')