def tasks(self, job):
        print '++++++++++++++++publicFundStockHolder++++++++++++++++++'
        qd = job.get('quant_data')
        qd_client = serviced.get_service_client('quant_data.%s' % qd)
        symbols = qd_client.grep_symbols(job.get('symbols'))

        for symbol in symbols:
            params = {
                'symbol': public_symbol(symbol)
            }

            url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vCI_FundStockHolder/stockid/%(symbol)s.phtml' % params

            d = {
                'url': url,
                'target': '%s/%s' % (job['cache_path'], symbol),
                'task_id': '%s' % (symbol),
                'info': {
                    'fields': ['symbol', 'deadline', 'fund_name', 'fund_id', 'hold_volume', 'hold_percent', 'hold_value', 'hold_net_worth_percent'],
                    'symbol': symbol,
                    'fund_stock_holder': True,
                    'job_cache_path': job['cache_path']
                }
            }
            yield d
    def tasks(self, job):
        print '++++++++++++++public++++++++++++++++++++++++'
        print repr(job)
        qd = job.get('quant_data')
        qd = serviced.get_service_client('quant_data.%s' % qd)
        symbols = qd.grep_symbols(job.get('symbols'))
        #qd.normalize_symbol()

        for symbol in symbols:
            params = {
                'symbol': public_symbol(symbol)
            }

            url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vCI_CirculateStockHolder/stockid/%(symbol)s.phtml' % params

            d = {
                'url': url,
                'target': '%s/%s' % (job['cache_path'], symbol),
                'task_id': '%s' % (symbol),
                'info': {
                    'fields': ['deadline', 'order', 'holder_name', 'hold_volume', 'hold_percent', 'stock_property'],
                    'symbol': symbol
                }
            }
            yield d
Exemple #3
0
    def filter(self, request, response):
        info = request['info']
        fields = info['fields']
        qd = info['qd']
        qd_client = serviced.get_service_client('quant_data.%s' % qd)
        #qd.normalize_symbol(s)

        text = response.content
        table = get_table(text)
        
        for tr in table.find('tr'):
            item = pq(tr)

            if not item('td'):
                continue

            d = {
                fields[0]: datetime_utils.to_datetime(item('td').eq(0).html()),
                fields[1]: qd_client.normalize_symbol(item('td').eq(1)('a').html()),
                fields[2]: item('td').eq(2)('a').html(),
                fields[3]: float_normalize(item('td').eq(3).html()),
                fields[4]: volume_normalize(float_normalize(item('td').eq(4).html())),
                fields[5]: value_normalize(float_normalize(item('td').eq(5).html()),'w'),
                fields[6]: str_normalize(item('td').eq(6).html()),
                fields[7]: str_normalize(item('td').eq(7).html()),
                fields[8]: str_normalize(item('td').eq(8).html())
            }
            yield d
Exemple #4
0
    def filter(self, request, response):
        info = request['info']
        fields = info['fields']

        if info['is_fund_list_page']:
            text = response.content
            #print repr(text)
            html = pq(text)
            #print repr(html)
            div = html('div[class="result_list"]')
            tbody = div('table')('tbody')
            #print '+++++++++++++++++++++++++++++++++++++++++++++++'
            #print repr(tbody)
            assert tbody

            for tr in tbody.find('tr'):
                tr = pq(tr)
                d = {
                    fields[0]: tr('td').eq(2)('a').html(),
                    fields[1]: tr('td').eq(3)('a').html()
                }
                yield {'t1': d}

            for textarea in div.find('textarea'):
                textarea = pq(textarea)
                trs = pq(textarea.html())
                for tr in trs.find('tr'):
                    tr = pq(tr)
                    d = {
                        fields[0]: tr('td').eq(2)('a').html(),
                        fields[1]: tr('td').eq(3)('a').html()
                    }
                    yield {'t1': d}
        else:
            qd = info['qd']
            qd_client = serviced.get_service_client('quant_data.%s' % qd)

            text = response.content
            div = pq(text)('div[class="part_g"]')
            assert div
            table = div('table').eq(0)
            assert table
            #print repr(table)
            #print table
            for tr in table.find('tr')[2::]:
                tr = pq(tr)
                d = {
                    fields[0]: info['fund_id'],
                    fields[1]: info['time'],
                    fields[2]: num_normalize(tr('td').eq(0).html()),
                    fields[3]: qd_client.normalize_symbol(tr('td').eq(1).html()),
                    fields[4]: tr('td').eq(2).html(),
                    fields[5]: volume_normalize(float_normalize(tr('td').eq(3).html())),
                    fields[6]: value_normalize(float_normalize(tr('td').eq(4).html()),'w'),
                    fields[7]: percent_normalize(float_normalize(tr('td').eq(5).html().replace('%', ''))),
                    fields[8]: volume_normalize(float_normalize(tr('td').eq(6)('span').html())),
                }
                yield {'t2': d}
Exemple #5
0
    def filter(self, request, response):
        info = request['info']
        fields = info['fields']
        time = info['time']
        symbol = info['symbol']
        qd = info['qd']
        qd_client = serviced.get_service_client('quant_data.%s' % qd)

        text = response.content

        data_json = json.loads(text)
        print repr(data_json)
        if data_json.has_key('data'):
            if data_json['data']:
                for item in data_json['data']:
                    d = {
                        'time': date_normalize(time, '%Y-%m-%d'),
                        'symbol': qd_client.normalize_symbol(symbol),
                        #'symbol': symbol,
                        fields[0]: item['id'],
                        fields[1]: item['fund_name'],
                        fields[2]: item['fund_short_name'],
                        fields[3]: item['fund_id'],
                        fields[4]: item['company_short_name'],
                        fields[5]: item['advisor_id'],
                        fields[6]: item['mangers_name'],
                        fields[7]: item['managers_id'],
                        fields[8]: volume_normalize(item['holding_num']),
                        fields[9]: percent_normalize(item['holding_ratio']),
                        fields[10]: volume_normalize(item['pre_holding_num']),
                        fields[11]: percent_normalize(item['pre_holding_ratio']),
                        fields[12]: volume_normalize(item['holding_num_change']),
                        fields[13]: percent_normalize(item['holding_ratio_change']),
                        fields[14]: item['max_holding_stock_name'],
                        fields[15]: percent_normalize(item['max_holding_ratio']),
                        fields[16]: qd_client.normalize_symbol(item['from_code']) if item['from_code'] else None,
                        #fields[16]: item['from_code'],
                        fields[17]: item['from_name'],
                        fields[18]: item['company_name'],
                        fields[19]: qd_client.normalize_symbol(item['max_holding_sec_code']),
                        #fields[19]: item['max_holding_sec_code'],
                        fields[20]: volume_normalize(item['max_holding']),
                    }
                    yield d
            else:
                yield {}
        else:
            yield {}
Exemple #6
0
    def tasks(self, job):
        print '++++++++++++++++++++++++++++++++++++++++++++++'
        print repr(job)
        BREAK_POINT()
        qd = job.get('quant_data') or 'futures'
        qd = serviced.get_service_client('quant_data.%s' % qd)
        symbols = qd.grep_symbols(job.get('symbols'))
        period = job.get('period', 'm1')
        count = job.get('count', 200)
        fields = job.get('fields', ['amount', 'volume'])

        period_map = {
            'm1': 'min1',
            'm5': 'min5',
            'm30': 'min30',
            'day': 'day',
            'week': 'week',
            'month': 'month'
        }

        indicator_map = {
            'amount': 'AMOUNT',
            'volume': 'VOL'
        }
        indicators = [indicator_map[f] for f in fields]

        for symbol in symbols:
            params = {
                'callback': 'X',
                'symbol': ytcj_symbol(symbol),
                'zq': period_map[period],
                'n': count,
                'zb': '*'.join(indicators),
            }

            #NOTE! url params order is important!
            url = 'http://www.baring.cn:81/chartdata?callback=%(callback)s&symbol=%(symbol)s&zb=%(zb)s&zq=%(zq)s&n=%(n)s' % params

            d = {
                'url': url,
                'target': '%s/%s/%s' % (job['cache_path'], period, symbol),
                'task_id': '%s:%s' % (period, symbol),
                'info': {
                    'fields': ['time', 'open', 'high', 'low', 'close'] + fields,
                    'symbol': symbol
                }
            }
            yield d
 def normalize_value(self, key, value):
     if key=='symbol':
         #TODO quant_data env switch
         #NOTE: sorted, is for `order by time,symbol` in realtime pipeline
         quant_data = serviced.get_service_client('quant_data.stock')
         return sorted(quant_data.get_symbols(value))
     elif key=='period':
         if value=='*':
             return 'm1 m5 m30 day week month quarter year'.split()
     elif key=='date':
         if is_str(value):
             d0, d1 = get_date_range(value)
             return list(date_sequence(d0, end_date=d1, to_str=True))
         elif isinstance(value, dict):
             return list(date_sequence(value.get('start'), end_date=value.get('end'), to_str=True))
     return get_list(value)
    def execute(self, job):
        debug = job.get('debug')

        if job.get('debug'):
            crawler_server = None
        else:
            crawler_server = serviced.get_service_client('quant_crawler')
        kwargs = {
            'crawler_server': crawler_server
        }
        self.crawler = create_obj_from_config(job['crawler'], kwargs=kwargs)
        stream = self.crawler.feed(job)

        output = job.get('output')
        if debug or output is None:
            #start generator
            for x in stream:
                print json_ext.dumps(x)
            return None
        else:
            tsdb = self.tsdb()
            tsdb.create_series(output)
            tsdb.write_series(output, None, stream, timeout=None)
    def tasks(self, job):
        qd = job.get('quant_data')
        qd_client = serviced.get_service_client('quant_data.%s' % qd)
        symbols = qd_client.grep_symbols(job.get('symbols'))

        params = {
                'symbol': '00700',
        }
        #the list ['00005', '00857', '00700'] should change by symbols
        for symbol in ['00005', '00857', '00700']:
            params['symbol'] = symbol
            url = 'http://stock.finance.sina.com.cn/hkstock/rights/%(symbol)s.html' % params

            d = {
                'url': url,
                'target': '%s/%s' % (job['cache_path'], symbol),
                'task_id': '%s' % (symbol),
                'info': {
                    'fields1': ['symbol', 'time', 'holder_Chinese_name', 'holder_name', 'pre_hold_volume', 'pre_hold_percent', 'pre_hold_kind', 'hold_volume', 'hold_percent', 'hold_kind', 'stock_property'],
                    'fields2': ['symbol', 'time', 'volume', 'highest_price', 'lowest_price', 'value', 'average_price'],
                    'symbol': symbol
                }
            }
            yield d
Exemple #10
0
 def tsdb(self):
     return serviced.get_service_client('quant_tsdb')