Esempio n. 1
0
def get_codes(delay=.0):  # 20200810: need delay 4s
    codes = []
    urls = [
        'http://app.finance.ifeng.com/list/stock.php?t=ha&f=symbol&o=asc',
        'http://app.finance.ifeng.com/list/stock.php?t=hs&f=symbol&o=asc',
        'http://app.finance.ifeng.com/list/stock.php?t=sa&f=symbol&o=asc',
        'http://app.finance.ifeng.com/list/stock.php?t=kcb&f=symbol&o=asc',
    ]

    lr = LRequest(delay=delay)

    try:
        for url, m in urls:
            # logger.info('Load: %s' % url)
            lr.load(url, isdecode=True)
            while 1:
                for ele in lr.xpaths(
                        '//div[@class="tab01"]/table//td[1]/a')[:-1]:
                    code = ele.text.strip()
                    if code.isdigit():
                        codes.append(code)

                next_ele = lr.xpath(u'//a[contains(text(), "下一页")]')
                if next_ele is None:
                    break
                next_url = urljoin(url, next_ele.attrib['href'])
                # logger.info('Load: %s' % next_url)
                lr.load(next_url, isdecode=True)
    except:
        logger.error(traceback.format_exc())
    return codes
Esempio n. 2
0
def do(queue, string_proxy):
    lr = LRequest(string_proxy=string_proxy)
    while 1:
        try:
            # https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=sheets+silk
            category = queue.get(timeout=30)
            url = 'https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%%3Daps&field-keywords=%s' % urllib.quote_plus(
                category)

            lr.load(url)
            if check_captcha(lr):
                lr.load(url)
            ele = lr.xpath('//h2[@id="s-result-count"]')

            f.write('%s\t%s\n' % (category, ele.text.split(
                'result', 1)[0].split('of')[-1].strip().replace(',', '')))
            f.flush()
            print '%s\t%s' % (category, ele.text.split(
                'result', 1)[0].split('of')[-1].strip().replace(',', ''))

        except Empty:
            print 'empty'
            break
        except Exception as e:
            queue.put(category)
            print 'EEEEEEEEE %s' % e
Esempio n. 3
0
def get_new_stock_code(year=None):

    lr = LRequest()
    stock_codes = []

    if year is None:
        year = str(datetime.date.today().year)

    lr.load('http://quotes.money.163.com/data/ipo/shengou.html?reportdate=%s' %
            year)
    # lr.loads(BeautifulSoup(lr.body).prettify())

    for ele in lr.xpaths('//table[@id="plate_performance"]/tr/td[3]'):  # codes
        # print ele.text.strip()
        stock_codes.append(ele.text.strip())

    for ele in lr.xpaths(
            '//div[@class="fn_cm_pages"]//a[contains(@href, "page")]'
    )[:-1]:  # pages
        u = urljoin('http://quotes.money.163.com/data/ipo/shengou.html',
                    ele.attrib['href'])

        lr.load(u)
        lr.loads(BeautifulSoup(lr.body, 'lxml').prettify())

        for ce in lr.xpaths(
                '//table[@id="plate_performance"]/tr/td[3]'):  # codes
            # print ce.text.strip()
            stock_codes.append(ce.text.strip())

    return stock_codes
Esempio n. 4
0
class GoogleSearch(object):

    search_url = 'https://www.google.%(tld)s/search?q=%(query)s&hl=%(lang)s&filter=%(filter)d&num=%(num)d&start=%(start)s&btnG=Google+Search'

    def __init__(self, query, *args, **kwargs):

        self.query = query

        self._tld = kwargs.get('tld', 'com')
        self._filter = kwargs.get('filter', 0)
        self._lang = kwargs.get('lang', 'en')
        self._num = kwargs.get('num', 100)
        self._page = kwargs.get('page', 0)

        timeout = kwargs.get('timeout', 90)
        string_proxy = kwargs.get('string_proxy', None)

        self.lr = LRequest(timeout=timeout, string_proxy=string_proxy, handers=[GoogleHTTPErrorProcessor(), ])


    @property
    def page(self):
        return self._page

    @page.setter
    def page(self, value):
        self._page = value


    def _get_result(self):
        safe_url = self.search_url % {'query': urllib.quote_plus(self.query),
                            'start': self.page * self._num,
                            'num': self._num,
                            'tld' : self._tld,
                            'lang' : self._lang,
                            'filter': self._filter}

        print safe_url
        self.lr.load(safe_url)

        results = []
        i = 0
        for r in self.lr.xpath('//li[@class="g"]'):
            i += 1
            result = {}
            result['title'] = ''.join(r.xpath('./div/h3//text()'))
            result['description'] = ''.join(r.xpath('./div//span[@class="st"]//text()'))
            result['url'] = ''.join(r.xpath('./div/h3/a/@href'))

            results.append(result)

        print i

        return results

    def get_result(self):

        return self._get_result()
Esempio n. 5
0
def get_codes_sina(delay=.0):
    codes = []
    # url = 'http://vip.stock.finance.sina.com.cn/datacenter/hqstat.html#jdgd'
    url = '''http://money.finance.sina.com.cn/quotes_service/api/jsonp_v2.php/IO.XSRV2.CallbackList['ys65jC9HtVOEBgTh']/StatisticsService.getPeriodList?page=1&num=9999&sort=_5high&asc=0&node=adr_hk'''

    lr = LRequest(delay=delay)

    try:
        lr.load(url, isdecode=True)

        for s in json.loads(lr.body.split('](', 1)[-1][:-2]):
            codes.append(s['symbol'])
    except:
        logger.error(traceback.format_exc())

    return codes
Esempio n. 6
0
def do(queue, string_proxy):
    lr = LRequest(string_proxy=string_proxy)
    while 1:
        try:
            # https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=sheets+silk
            category = queue.get(timeout=30)
            url = 'https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%%3Daps&field-keywords=%s' % urllib.quote_plus(
                category)

            lr.load(url)
            if check_captcha(lr):
                lr.load(url)

            total_price = 0.0
            count = 0.0
            price_eles = lr.xpaths(
                '//span[contains(@class, "s-price a-text-bold")]')
            for price_ele in price_eles:  # $49.99
                price = price_ele.text.replace('$', '').replace(',', '').split(
                    '-', 1)[0].strip()
                try:
                    float(price)
                except:
                    pass
                else:
                    total_price += float(price)
                    count += 1
            if count > 0:
                ave_price = total_price / count

            ele = lr.xpath('//h2[@id="s-result-count"]')

            f.write('%s\t%s\t%.2f\n' % (category, ele.text.split(
                'result', 1)[0].split('of')[-1].strip().replace(
                    ',', ''), ave_price))
            f.flush()
            print '%s\t%s\t%.2f' % (category, ele.text.split(
                'result', 1)[0].split('of')[-1].strip().replace(',',
                                                                ''), ave_price)

        except Empty:
            print 'empty'
            break
        except Exception as e:
            traceback.print_exc()
            queue.put(category)
            print 'EEEEEEEEE %s' % e
Esempio n. 7
0
def iter_name(string_proxy, queue):
    lr = LRequest(string_proxy)

    while 1:
        try:
            url, deep = queue.get(timeout=30)
            xp = '//ul[@id="zg_browseRoot"]/%s/li/a' % '/'.join(['ul' for i in range(deep)])
            # print xp
            lr.load(url.encode('utf-8'))

            next_deep = deep + 1
            for ele in lr.xpaths(xp):
                name = ele.text.strip()
                if name not in categories:
                    categories.add(name)
                    print name.encode('utf-8')
                    queue.put([ele.attrib['href'], next_deep])

        except Empty:
            print 'Empty'
Esempio n. 8
0
def get_all_codes():
    stock_code_url = 'http://quote.eastmoney.com/center/gridlist.html'  # 'http://quote.eastmoney.com/stocklist.html' # us: http://quote.eastmoney.com/usstocklist.html
    exchanges = ['ss', 'sz', 'hk']

    lr = LRequest()
    stock_codes = []

    lr.load(stock_code_url)

    # stock_eles = lr.xpath('//div[@id="quotesearch"]//li/a[@target="_blank"]')
    stock_exchange_eles = lr.xpaths('//div[@id="quotesearch"]/ul')

    for i, stock_exchange_ele in enumerate(stock_exchange_eles):
        stock_eles = stock_exchange_ele.xpath('./li/a[@target="_blank"]')
        for stock_ele in stock_eles:
            # code = stock_ele.get('href').rsplit('/', 1)[-1].split('.', 1)[0]
            if stock_ele.text:
                code = stock_ele.text.split('(', 1)[-1].split(')', 1)[0]

                stock_codes.append((exchanges[i], code))

    return stock_codes
Esempio n. 9
0
def iter_name(string_proxy, queue):
    lr = LRequest(string_proxy)

    while 1:
        try:
            url, deep = queue.get(timeout=30)
            xp = '//ul[@id="zg_browseRoot"]/%s/li/a' % '/'.join(['ul' for i in range(deep)])
            # logger.info(xp)
            lr.load(url)

            next_deep = deep + 1
            for ele in lr.xpaths(xp):
                name = ele.text.strip()
                if name not in categories:
                    categories.add(name)
                    logger.info(name)
                    queue.put([ele.attrib['href'], next_deep])
        # except KeyboardInterrupt:
        #     return
        except Exception as ex:
            traceback.print_exec()
        except Empty:
            logger.info('Empty')
Esempio n. 10
0
class GsaCaptcha():

    lr = None
    ip = ''
    port = ''

    def __init__(self, ip='127.0.0.1', port='80'):
        self.ip = ip
        self.port = port

        self.lr = LRequest()

    def decode(self, file_path):
        try:

            form = ParseFile(StringIO.StringIO(gsa_form_str %
                                               (self.ip, self.port)),
                             base_uri='http://%s:%s' % (self.ip, self.port))[0]
            form.add_file(open(file_path, 'rb'), name='file')
            self.lr.load(form.click(), is_xpath=False)
            gsa_result = self.lr.body
            result = ''
            if gsa_result.find('<span id="captcha_result">') > -1:
                result = gsa_result.split(
                    '<span id="captcha_result">')[1].split('</span>')[0]

            return result
        except:
            raise

    def decode_stream(self, file_data):
        try:

            form = ParseFile(StringIO.StringIO(gsa_form_str %
                                               (self.ip, self.port)),
                             base_uri='http://%s:%s' % (self.ip, self.port))[0]
            form.add_file(StringIO.StringIO(file_data), name='file')
            self.lr.load(form.click(), is_xpath=False)
            result = ''
            gsa_result = self.lr.body
            if gsa_result.find('<span id="captcha_result">') > -1:
                result = gsa_result.split(
                    '<span id="captcha_result">')[1].split('</span>')[0]

            return result
        except:
            raise

    def decode_url(self, url):
        try:
            self.lr.load(url)

            form = ParseFile(StringIO.StringIO(gsa_form_str %
                                               (self.ip, self.port)),
                             base_uri='http://%s:%s' % (self.ip, self.port))[0]
            form.add_file(StringIO.StringIO(self.lr.body), name='file')
            self.lr.load(form.click(), is_xpath=False)
            result = ''
            gsa_result = self.lr.body
            if gsa_result.find('<span id="captcha_result">') > -1:
                result = gsa_result.split(
                    '<span id="captcha_result">')[1].split('</span>')[0]

            return result
        except:
            raise
Esempio n. 11
0
class AmazonBase(object):

    CACHE_ROOT = ''
    CACHE_PAGES_ROOT = ''
    CACHE_IMAGES_ROOT = ''

    CACHE_EXPIRED_DAYS = 15

    captcha = None

    def __init__(self, **kwargs):

        self.lr = LRequest(string_proxy=kwargs.get('string_proxy', ''))

        self.captcha = GsaCaptcha(ip=kwargs.get('gsa_ip', '192.168.1.188'),
                                  port=kwargs.get('gsa_port', '8000'))

        self.CACHE_ROOT = config.AMAZON_CACHE_ROOT
        self.CACHE_PAGES_ROOT = kwargs.get(
            'cache_page', os.path.join(self.CACHE_ROOT, 'pages'))
        self.CACHE_IMAGES_ROOT = kwargs.get(
            'cache_image', os.path.join(self.CACHE_ROOT, 'images'))

        if not os.path.exists(self.CACHE_ROOT): os.makedirs(self.CACHE_ROOT)
        if not os.path.exists(self.CACHE_PAGES_ROOT):
            os.makedirs(self.CACHE_PAGES_ROOT)
        if not os.path.exists(self.CACHE_IMAGES_ROOT):
            os.makedirs(self.CACHE_IMAGES_ROOT)

        self.domain = kwargs.get('domain', 'amazon.com')

        self.CACHE_EXPIRED_DAYS = kwargs.get('cache_expired_days', 15)

    def load(self, url, is_xpath=True, is_decode=True):
        # logger.info('Load Url: %s' % url)
        url = urllib.parse.quote(url, safe='https:/')
        self.lr.load(url, is_xpath=is_xpath, is_decode=is_decode)
        if self.check_captcha():
            self.lr.load(url, is_xpath=is_xpath, is_decode=is_decode)

    def check_captcha(self):
        if self.captcha is not None:
            captcha_img_ele = self.lr.xpath(
                '//form[contains(@action, "Captcha")]//img[contains(@src, "captcha")]'
            )
            if captcha_img_ele is not None:
                while 1:
                    logger.info('Need Captcha')

                    try:
                        if captcha_img_ele is not None:
                            print('##### %s ' % captcha_img_ele.attrib['src'])
                            form = self.lr.get_forms()[0]
                            self.lr.load(captcha_img_ele.attrib['src'])
                            cap = self.captcha.decode_stream(self.lr.body)
                            logger.info('Captcha: %s' % cap)

                            form['field-keywords'] = cap
                            self.lr.load(form.click())
                        else:
                            return True

                        captcha_img_ele = self.lr.xpath(
                            '//form[contains(@action, "Captcha")]//img[contains(@src, "captcha")]'
                        )

                    except KeyboardInterrupt:
                        raise
                    except IndexError:
                        self.lr.load(self.lr.current_url)
                        captcha_img_ele = self.lr.xpath(
                            '//form[contains(@action, "Captcha")]//img[contains(@src, "captcha")]'
                        )
                        if captcha_img_ele is None:
                            return True
                    except:
                        # open(os.path.join('I:\\captcha_error_page', '%s.html' % time.time()), 'w').write(self.lr.body)
                        logger.error(traceback.format_exc())

            return False
        else:
            raise RuntimeError('Not Captcha Server...')

    def exists_cache(self, cache_name):
        cache_path = os.path.join(self.CACHE_PAGES_ROOT, cache_name[0],
                                  cache_name[1], cache_name)
        return os.path.exists(cache_path)

    def remove_cache(self, cache_name):
        cache_path = os.path.join(self.CACHE_PAGES_ROOT, cache_name[0],
                                  cache_name[1], cache_name)

        if os.path.exists(cache_path):
            try:
                os.remove(cache_path)
            except:
                pass

    def load_cache(self, cache_name):
        cache_path = os.path.join(self.CACHE_PAGES_ROOT, cache_name[0],
                                  cache_name[1], cache_name)

        if os.path.exists(cache_path):
            try:
                return pickle.loads(gzip.GzipFile(cache_path, 'rb').read())
            except:
                return {}

        return {}

    def save_cache(self, cache_name, data):
        _p = os.path.join(self.CACHE_PAGES_ROOT, cache_name[0], cache_name[1])
        if not os.path.exists(_p): os.makedirs(_p)

        cache_path = os.path.join(self.CACHE_PAGES_ROOT, cache_name[0],
                                  cache_name[1], cache_name)

        gzip_file = gzip.open(cache_path, 'wb')
        gzip_file.write(pickle.dumps(data))
        gzip_file.close()

    def exists_image(self, name):
        image_path = os.path.join(self.CACHE_IMAGES_ROOT, name[0], name[1],
                                  name)
        return os.path.exists(image_path)

    def save_image(self, name, data):
        _p = os.path.join(self.CACHE_IMAGES_ROOT, name[0], name[1])
        if not os.path.exists(_p): os.makedirs(_p)

        image_path = os.path.join(self.CACHE_IMAGES_ROOT, name[0], name[1],
                                  name)
        open(image_path, 'wb').write(data)

    @staticmethod
    def wrapped_url(url):
        return url.split('/ref', 1)[0]

    @cache()
    @load_html
    @name
    @price
    @brand
    @merchant
    @sold_by
    @reviews
    @star
    @ranks_str
    @other_seller
    @weight_ounces
    def product_detail(self, asin, is_cache=True, **kwargs):

        return kwargs.get('product_info', {})

    @cache()
    @load_html
    @image_urls
    @image_data
    def product(self, asin, is_cache=True, **kwargs):
        return kwargs.get('product_info', {})
Esempio n. 12
0
File: xxx.py Progetto: xtwxfxk/left5
# -*- coding: utf-8 -*-
__author__ = 'xtwxfxk'

import urllib.parse
from lutils.lrequest import LRequest

# url = 'https://www.amazon.com/Best-Sellers-Home-Kitchen/zgbs/home-garden/ref=zg_bs_nav_0'
url = 'https://www.amazon.com/Best-Sellers-Home-Kitchen-Décor-Products/zgbs/home-garden/1063278'
# url = urllib.parse.quote('https://www.amazon.com/Best-Sellers-Home-Kitchen-Décor-Products/zgbs/home-garden/1063278')
# url = urllib.parse.urlencode('https://www.amazon.com/Best-Sellers-Home-Kitchen-Décor-Products/zgbs/home-garden/1063278')
url = urllib.parse.quote(url, safe='https:/')
print(url)
lr = LRequest()

lr.load(url, is_decode=True)
eles = lr.xpaths('//ul[@id="zg_browseRoot"]/ul/ul/ul/li/a')

for ele in eles:
    print(ele.text.strip(), ele.attrib['href'])

# https://www.amazon.com/Best-Sellers-Home-Kitchen-D%C3%A9cor-Products/zgbs/home-garden/1063278/
Esempio n. 13
0
class LStockData():

    start_url = 'http://money.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/%s.phtml'
    url_format = 'http://money.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/%s.phtml?year=%s&jidu=%s'

    real_time_date_url = 'http://hq2fls.eastmoney.com/EM_Quote2010PictureApplication/Flash.aspx?Type=CR&ID=6035771&r=0.8572017126716673'

    def __init__(self,
                 delay=0.0,
                 cache=None,
                 debuglevel=0):  #, input, output, **kwargs):
        # threading.Thread.__init__(self)

        # self.input = input
        # self.output = output

        self.count = 0
        self.cache = cache
        self.debuglevel = debuglevel
        self.lr = LRequest(delay=delay)

    def _fetch_detail(self):
        details = []
        if self.lr.body.find('class="datatbl"') > -1:
            trs = self.lr.xpaths('//table[@class="datatbl"]//tr')[1:]
            for tr in trs:
                t = tr.xpath('./th[1]')[0].text.strip()
                price = tr.xpath('./td[1]')[0].text.strip()
                _price_change = tr.xpath('./td[2]')[0].text.strip()
                volume = tr.xpath('./td[3]')[0].text.strip()
                _turnover = tr.xpath('./td[4]')[0].text.strip()
                _nature = bytes(
                    ''.join(tr.xpath('./th[2]')[0].itertext()).strip(),
                    'ISO-8859-1').decode('gbk')

                if _nature == '卖盘':
                    nature = 'sell'
                elif _nature == '买盘':
                    nature = 'buy'
                elif _nature == '中性盘':
                    nature = 'neutral_plate'
                else:
                    nature = _nature

                price_change = '0.0'
                if _price_change != '--':
                    price_change = _price_change

                turnover = _turnover.replace(',', '')

                details.append({
                    'time': t,
                    'price': price,
                    'price_change': price_change,
                    'volume': volume,
                    'turnover': turnover,
                    'nature': nature,
                })
        return details

    def _check_delay(self):
        if (time.time() - self.t1) > 1800:
            logger.info('Wait 60 Sec..')
            time.sleep(60)
            self.t1 = time.time()

    # @try_request_count(wait_count=50)
    @try_except_response
    def load(self, url):

        return self.lr.load(url)

    def search_to_h5(self,
                     code,
                     save_path,
                     start_year=2007,
                     mode='a',
                     is_detail=True):
        h5file = tables.open_file(save_path, mode=mode)

        k_line_mins = [5, 15, 30, 60]

        end_year = datetime.date.today().year + 1
        self.t1 = time.time()
        try:

            if '/stock' not in h5file:
                stocks_group = h5file.create_group('/', 'stock',
                                                   'Stock Information')
            else:
                stocks_group = h5file.get_node('/stock')

            if '/stock/stocks' not in h5file:
                stock_table = h5file.create_table(stocks_group, 'stocks',
                                                  Stocks, "Stock Table")
            else:
                stock_table = h5file.get_node('/stock/stocks')
            stock = stock_table.row

            if '/stock/details' not in h5file:
                detail_table = h5file.create_table(stocks_group, 'details',
                                                   StockDetails,
                                                   "Stock Detail Table")
            else:
                detail_table = h5file.get_node('/stock/details')
            detail = detail_table.row

            ####################################
            #
            # 20200810 server disable
            #
            ####################################
            # if stock_table.nrows > 0:
            #     last_data = stock_table[-1]
            #     last_date = str(last_data[0]).split('_')[-1]
            #     last_date = '%s-%s-%s' % (last_date[0:4], last_date[4:6], last_date[6:8])
            #     start_year = last_date.split('-')[0]

            # else:
            #     last_date = '1990-01-01'
            #     last_year = '1990'

            #     url = self.start_url % code
            #     # logger.info('Load Url: %s' % url)
            #     self.load(url)

            #     _start_year = self.lr.xpaths('//select[@name="year"]/option')[-1].attrib['value'].strip()
            #     # if _start_year < '2007':
            #     #     _start_year = '2007'

            #     _start_year = int(_start_year)
            #     if start_year < _start_year:
            #         start_year = _start_year

            # t = datetime.datetime.strptime(last_date, '%Y-%m-%d')
            # quarter = pd.Timestamp(t).quarter
            # start_year = int(start_year)
            # for year in range(start_year, end_year):
            #     for quarter in range(quarter, 5):
            #         try:
            #             self._check_delay()
            #             _url = self.url_format % (code, year, quarter)
            #             # logger.info('Load: %s: %s' % (code, _url))

            #             # time.sleep(1) # random.randint(1, 5))
            #             self.load(_url)

            #             if self.lr.body.find('FundHoldSharesTable') > -1:
            #                 records = list(self.lr.xpaths('//table[@id="FundHoldSharesTable"]//tr')[2:])
            #                 records.reverse()

            #                 for record in records:
            #                     _date = record.xpath('./td[1]/div')[0].text.strip()
            #                     # _date = record.xpath('./td[1]/div[1]/text()')[0].strip()

            #                     detail_url = ''
            #                     if not _date:
            #                         _date = record.xpath('./td[1]/div/a')[0].text.strip()
            #                         detail_url = record.xpath('./td[1]/div/a')[0].attrib['href'].strip()

            #                     if _date <= last_date:
            #                         continue

            #                     _opening_price = record.xpath('./td[2]/div')[0].text.strip()
            #                     _highest_price = record.xpath('./td[3]/div')[0].text.strip()
            #                     _closing_price = record.xpath('./td[4]/div')[0].text.strip()
            #                     _floor_price = record.xpath('./td[5]/div')[0].text.strip()
            #                     _trading_volume = record.xpath('./td[6]/div')[0].text.strip()
            #                     _transaction_amount = record.xpath('./td[7]/div')[0].text.strip()

            #                     _id = '%s_%s' % (code, _date)
            #                     _date = _date.replace('-', '')

            #                     if is_detail:
            #                         details = []
            #                         if detail_url:

            #                             params = parse_qs(urlparse(detail_url).query, True)
            #                             detail_last_page = 'http://market.finance.sina.com.cn/transHis.php?date=%s&symbol=%s' % (params['date'][0], params['symbol'][0])

            #                             # time.sleep(1)
            #                             self.load(detail_last_page)
            #                             # logger.info('Load Detail: %s: %s' % (code, detail_down_url))

            #                             details.extend(self._fetch_detail())
            #                             if self.lr.body.find('var detailPages=') > -1:
            #                                 pages = json.loads(self.lr.body.split('var detailPages=', 1)[-1].split(';;')[0].replace("'", '"'))[1:]

            #                                 for page in pages:
            #                                     self._check_delay()
            #                                     # time.sleep(1) # random.randint(1, 5))
            #                                     detail_page = '%s&page=%s' % (detail_last_page, page[0])
            #                                     self.load(detail_page)

            #                                     details.extend(self._fetch_detail())

            #                         details.reverse()
            #                         for d in details:
            #                             # detail['id'] = _id
            #                             detail['date'] = _date
            #                             detail['time'] = d['time']
            #                             detail['price'] = d['price'] # d['price'].split(u'\u0000', 1)[0] if d['price'] else 0.0
            #                             detail['price_change'] = d['price_change']
            #                             detail['volume'] = d['volume']
            #                             detail['turnover'] = d['turnover']
            #                             detail['nature'] = d['nature']

            #                             detail.append()

            #                     # stock['id'] = _id
            #                     stock['date'] = _date
            #                     stock['open'] = _opening_price
            #                     stock['high'] = _highest_price
            #                     stock['close'] = _closing_price
            #                     stock['low'] = _floor_price
            #                     stock['volume'] = _trading_volume
            #                     stock['amount'] = _transaction_amount

            #                     stock.append()

            #                     h5file.flush()
            #         except:
            #             raise

            #     quarter = 1
            # # stock_table.flush()
            ###############################################

            h5file.flush()
        except:
            logger.error(traceback.format_exc())
            open('tmp/last.html', 'w').write(self.lr.body)
            raise
        finally:
            h5file.flush()
            h5file.close()

    def search_to_h5_k_line(self, code, save_path, start_year=2007, mode='a'):
        h5file = tables.open_file(save_path, mode=mode)
        # h5file = h5py.File(save_path, 'r+')

        k_line_mins = [5, 15, 30, 60]

        end_year = datetime.date.today().year + 1
        self.t1 = time.time()
        try:

            if '/stock' not in h5file:
                stocks_group = h5file.create_group('/', 'stock',
                                                   'Stock Information')
            else:
                stocks_group = h5file.get_node('/stock')

            ####################################
            #
            # new k line data 5m, 15m, 30m, 60m
            #
            ####################################
            kline_rows = {}
            for kmin in k_line_mins:
                if '/stock/kline%s' % kmin not in h5file:
                    kline_table = h5file.create_table(
                        stocks_group, 'kline%s' % kmin, StockKLines,
                        "Stock K line %sm Table" % kmin)
                else:
                    kline_table = h5file.get_node('/stock/kline%s' % kmin)
                kline_rows[kmin] = kline_table

            h5file.flush()

            # http://money.finance.sina.com.cn/quotes_service/api/json_v2.php/CN_MarketData.getKLineData?symbol=sz002095&scale=5&ma=no&datalen=1023
            for kmin in k_line_mins:
                k_line_url = 'http://money.finance.sina.com.cn/quotes_service/api/json_v2.php/CN_MarketData.getKLineData?symbol=%s&scale=%s&ma=no&datalen=1023' % (
                    code, kmin)
                try:
                    # logger.info('K line url: %s' % k_line_url)
                    kline_row = kline_rows[kmin].row

                    self.load(k_line_url)
                    if 'null' != self.lr.body.strip():
                        kline_datas = json.loads(self.lr.body)

                        last_data = None
                        if kline_rows[kmin].nrows > 0:
                            last_data = kline_rows[kmin][-1]

                        for kline_data in kline_datas:  # [{"day":"2020-08-07 15:00:00","open":"20.390","high":"20.390","low":"20.300","close":"20.300","volume":"54500"}, ...]
                            day = int(
                                datetime.datetime.strptime(
                                    kline_data['day'],
                                    '%Y-%m-%d %H:%M:%S').timestamp())

                            if last_data is None or last_data[0] < day:
                                kline_row['date'] = day
                                kline_row['open'] = kline_data['open']
                                kline_row['high'] = kline_data['high']
                                kline_row['close'] = kline_data['close']
                                kline_row['low'] = kline_data['low']
                                kline_row['volume'] = kline_data['volume']

                                kline_row.append()
                except:
                    logger.error('Error Url: %s' % k_line_url)
                    logger.error(traceback.format_exc())
                    open('tmp/last.html', 'w').write(self.lr.body)

            ############## end #################

            h5file.flush()
        except:
            logger.error(traceback.format_exc())
            open('tmp/last.html', 'w').write(self.lr.body)
            raise
        finally:
            h5file.flush()
            h5file.close()