Exemple #1
0
    def test_multiple_ctx(self):

        c1 = py_mini_racer.MiniRacer()
        c2 = py_mini_racer.MiniRacer()
        c3 = py_mini_racer.MiniRacer()

        c1.eval('var x = 1')
        c2.eval('var x = 2')
        c3.eval('var x = 3')
        self.assertEqual(c1.eval('(x)'), 1)
        self.assertEqual(c2.eval('(x)'), 2)
        self.assertEqual(c3.eval('(x)'), 3)
Exemple #2
0
def stock_board_concept_name_ths() -> pd.DataFrame:
    """
    同花顺-板块-概念板块-概念
    http://q.10jqka.com.cn/gn/detail/code/301558/
    :return: 所有概念板块的名称和链接
    :rtype: pandas.DataFrame
    """
    url = "http://q.10jqka.com.cn/gn/index/field/addtime/order/desc/page/1/ajax/1/"
    js_code = py_mini_racer.MiniRacer()
    js_content = _get_file_content_ths("ths.js")
    js_code.eval(js_content)
    v_code = js_code.call('v')
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36',
        'Cookie': f'v={v_code}'
    }
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, "lxml")
    total_page = soup.find('span', attrs={'class': 'page_info'}).text.split('/')[1]
    big_df = pd.DataFrame()
    for page in tqdm(range(1, int(total_page)+1), leave=False):
        url = f"http://q.10jqka.com.cn/gn/index/field/addtime/order/desc/page/{page}/ajax/1/"
        js_code = py_mini_racer.MiniRacer()
        js_content = _get_file_content_ths("ths.js")
        js_code.eval(js_content)
        v_code = js_code.call('v')
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36',
            'Cookie': f'v={v_code}'
        }
        r = requests.get(url, headers=headers)
        soup = BeautifulSoup(r.text, "lxml")
        soup.find('table', attrs={'class': 'm-table m-pager-table'}).find('tbody')
        url_list = []
        for item in soup.find('table', attrs={'class': 'm-table m-pager-table'}).find('tbody').find_all('tr'):
            inner_url = item.find_all("td")[1].find('a')['href']
            url_list.append(inner_url)
        temp_df = pd.read_html(r.text)[0]
        temp_df['网址'] = url_list
        big_df = big_df.append(temp_df, ignore_index=True)
    big_df = big_df[[
        '日期',
        '概念名称',
        '成分股数量',
        '网址'
    ]]
    big_df['日期'] = pd.to_datetime(big_df['日期']).dt.date
    big_df['成分股数量'] = pd.to_numeric(big_df['成分股数量'])
    big_df['代码'] = big_df['网址'].str.split("/", expand=True).iloc[:, 6]
    return big_df
Exemple #3
0
def stock_board_concept_name_ths() -> pd.DataFrame:
    """
    同花顺-板块-概念板块-概念
    http://q.10jqka.com.cn/gn/detail/code/301558/
    :return: 所有概念板块的名称和链接
    :rtype: pandas.DataFrame
    """
    url = "http://q.10jqka.com.cn/gn/index/field/addtime/order/desc/page/1/ajax/1/"
    js_code = py_mini_racer.MiniRacer()
    js_content = _get_file_content_ths("ths.js")
    js_code.eval(js_content)
    v_code = js_code.call("v")
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36",
        "Cookie": f"v={v_code}",
    }
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, "lxml")
    total_page = soup.find("span", attrs={"class": "page_info"}).text.split("/")[1]
    big_df = pd.DataFrame()
    for page in tqdm(range(1, int(total_page) + 1), leave=False):
        url = f"http://q.10jqka.com.cn/gn/index/field/addtime/order/desc/page/{page}/ajax/1/"
        js_code = py_mini_racer.MiniRacer()
        js_content = _get_file_content_ths("ths.js")
        js_code.eval(js_content)
        v_code = js_code.call("v")
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36",
            "Cookie": f"v={v_code}",
        }
        r = requests.get(url, headers=headers)
        soup = BeautifulSoup(r.text, "lxml")
        url_list = []
        for item in (
            soup.find("table", attrs={"class": "m-table m-pager-table"})
            .find("tbody")
            .find_all("tr")
        ):
            inner_url = item.find_all("td")[1].find("a")["href"]
            url_list.append(inner_url)
        temp_df = pd.read_html(r.text)[0]
        temp_df["网址"] = url_list
        big_df = big_df.append(temp_df, ignore_index=True)
    big_df = big_df[["日期", "概念名称", "成分股数量", "网址"]]
    big_df["日期"] = pd.to_datetime(big_df["日期"]).dt.date
    big_df["成分股数量"] = pd.to_numeric(big_df["成分股数量"])
    big_df["代码"] = big_df["网址"].str.split("/", expand=True).iloc[:, 6]
    return big_df
Exemple #4
0
def stock_us_daily(symbol: str = "AAPL", adjust: str = "") -> pd.DataFrame:
    """
    新浪财经-美股
    http://finance.sina.com.cn/stock/usstock/sector.shtml
    备注:CIEN 新浪复权因子错误
    :param symbol: 可以使用 get_us_stock_name 获取
    :type symbol: str
    :param adjust: "": 返回未复权的数据 ; qfq: 返回前复权后的数据; qfq-factor: 返回前复权因子和调整;
    :type adjust: str
    :return: 指定 adjust 的数据
    :rtype: pandas.DataFrame
    """
    res = requests.get(f"https://finance.sina.com.cn/staticdata/us/{symbol}")
    js_code = py_mini_racer.MiniRacer()
    js_code.eval(zh_js_decode)
    dict_list = js_code.call(
        "d", res.text.split("=")[1].split(";")[0].replace('"', "")
    )  # 执行js解密代码
    data_df = pd.DataFrame(dict_list)
    data_df.index = pd.to_datetime(data_df["date"])
    del data_df["amount"]
    del data_df["date"]
    data_df = data_df.astype("float")
    res = requests.get(us_sina_stock_hist_qfq_url.format(symbol))
    qfq_factor_df = pd.DataFrame(eval(res.text.split("=")[1].split("\n")[0])["data"])
    qfq_factor_df.rename(columns={"c": "adjust", "d": "date", "f": "qfq_factor", }, inplace=True)
    qfq_factor_df.index = pd.to_datetime(qfq_factor_df["date"])
    del qfq_factor_df["date"]

    # 处理复权因子
    temp_date_range = pd.date_range("1900-01-01", qfq_factor_df.index[0].isoformat())
    temp_df = pd.DataFrame(range(len(temp_date_range)), temp_date_range)
    new_range = pd.merge(
            temp_df, qfq_factor_df, left_index=True, right_index=True, how="left"
        )
    new_range = new_range.fillna(method="ffill")
    new_range = new_range.iloc[:, [1, 2]]

    if adjust == "qfq":
        if len(new_range) == 1:
            new_range.index.values[0] = pd.to_datetime(str(data_df.index.date[0]))
        temp_df = pd.merge(
            data_df, new_range, left_index=True, right_index=True, how="left"
        )
        temp_df.fillna(method="ffill", inplace=True)
        temp_df.fillna(method="bfill", inplace=True)
        temp_df = temp_df.astype(float)
        temp_df["open"] = temp_df["open"] * temp_df["qfq_factor"] + temp_df["adjust"]
        temp_df["high"] = temp_df["high"] * temp_df["qfq_factor"] + temp_df["adjust"]
        temp_df["close"] = temp_df["close"] * temp_df["qfq_factor"] + temp_df["adjust"]
        temp_df["low"] = temp_df["low"] * temp_df["qfq_factor"] + temp_df["adjust"]
        temp_df = temp_df.apply(lambda x: round(x, 4))
        temp_df = temp_df.astype("float")
        return temp_df.iloc[:, :-2]

    if adjust == "qfq-factor":
        return qfq_factor_df

    if adjust == "":
        return data_df
Exemple #5
0
def stock_zh_index_daily(symbol: str = "sh000922") -> pd.DataFrame:
    """
    新浪财经-指数获取某个指数的历史行情数据, 大量抓取容易封IP
    :param symbol: str e.g., sz399998
    :return: pandas.DataFrame
                    open      high       low     close      volume
    date
    2015-06-16  2526.056  2577.092  2469.216  2487.513  2224345088
    2015-06-17  2476.863  2567.842  2422.229  2560.914  2181699840
    2015-06-18  2553.739  2587.655  2480.321  2480.674  2032781312
    2015-06-19  2431.218  2453.794  2286.967  2287.758  1687013248
    2015-06-23  2280.189  2341.795  2156.396  2341.359  1627453440
                  ...       ...       ...       ...         ...
    2019-11-11  1210.968  1210.968  1182.442  1182.718   415074658
    2019-11-12  1184.118  1196.425  1184.005  1195.790   397246387
    2019-11-13  1195.925  1195.925  1180.293  1185.293   334027614
    2019-11-14  1185.788  1187.431  1178.414  1180.791   271514767
    2019-11-15  1181.090  1181.739  1165.898  1166.536   338309880
    """
    params = {"d": "2020_2_4"}
    res = requests.get(zh_sina_index_stock_hist_url.format(symbol),
                       params=params)
    js_code = py_mini_racer.MiniRacer()
    js_code.eval(hk_js_decode)
    dict_list = js_code.call("d",
                             res.text.split("=")[1].split(";")[0].replace(
                                 '"', ""))  # 执行js解密代码
    data_df = pd.DataFrame(dict_list)
    data_df.index = pd.to_datetime(data_df["date"])
    del data_df["date"]
    data_df = data_df.astype("float")
    return data_df
Exemple #6
0
    def parse_item(self, response):

        self.log('This is an item page! %s' % response.url)
        searchObj = re.search(r'#jquery_jplayer_1.*?this', response.text,
                              re.S | re.I)
        url = ''
        # open('file.txt', 'w').write(response.text)
        if searchObj:
            code = searchObj.group()
            code = ''.join(code.splitlines()[2:-1])
            ctx = py_mini_racer.MiniRacer()
            url = ctx.eval(code)

        title = response.css(".jp-title ul li ::text").extract_first()
        if title and url:
            item = IshuyinItem()
            item['title'] = title.strip().split('-')[0].strip()
            item['file_urls'] = [url]
            item['album'] = self.album
            item['picture'] = self.picture
            item['artist'] = self.artist

            yield item

        else:
            logging.debug("Parse Item error - Title:%s URL:%s", title, url)
            raise DropItem("Missing mp3 %s" % response.url)
def stock_rank_xzjp_ths() -> pd.DataFrame:
    """
    同花顺-数据中心-技术选股-险资举牌
    http://data.10jqka.com.cn/financial/xzjp/
    :return: 险资举牌
    :rtype: pandas.DataFrame
    """
    js_code = py_mini_racer.MiniRacer()
    js_content = _get_file_content_ths("ths.js")
    js_code.eval(js_content)
    v_code = js_code.call("v")
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36",
        "Cookie": f"v={v_code}",
    }
    url = f"http://data.10jqka.com.cn/ajax/xzjp/field/DECLAREDATE/order/desc/ajax/1/free/1/"
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, "lxml")
    try:
        total_page = soup.find("span", attrs={"class": "page_info"}).text.split("/")[1]
    except AttributeError as e:
        total_page = 1
    big_df = pd.DataFrame()
    for page in tqdm(range(1, int(total_page) + 1), leave=False):
        v_code = js_code.call("v")
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36",
            "Cookie": f"v={v_code}",
        }
        url = f"http://data.10jqka.com.cn/ajax/xzjp/field/DECLAREDATE/order/desc/ajax/1/free/1/"
        r = requests.get(url, headers=headers)
        temp_df = pd.read_html(r.text, converters={"股票代码": str})[0]
        big_df = big_df.append(temp_df, ignore_index=True)
    big_df.columns = [
        '序号',
        '举牌公告日',
        '股票代码',
        '股票简称',
        '现价',
        '涨跌幅',
        '举牌方',
        '增持数量',
        '交易均价',
        '增持数量占总股本比例',
        '变动后持股总数',
        '变动后持股比例',
        '历史数据',
        ]
    big_df['涨跌幅'] = big_df['涨跌幅'].astype(str).str.zfill(6)
    big_df["增持数量占总股本比例"] = big_df["增持数量占总股本比例"].astype(str).str.strip("%")
    big_df["变动后持股比例"] = big_df["变动后持股比例"].astype(str).str.strip("%")
    big_df["涨跌幅"] = pd.to_numeric(big_df["涨跌幅"], errors='coerce')
    big_df["增持数量占总股本比例"] = pd.to_numeric(big_df["增持数量占总股本比例"])
    big_df["变动后持股比例"] = pd.to_numeric(big_df["变动后持股比例"])
    big_df["举牌公告日"] = pd.to_datetime(big_df["举牌公告日"]).dt.date
    big_df["股票代码"] = big_df["股票代码"].astype(str).str.zfill(6)
    big_df["现价"] = pd.to_numeric(big_df["现价"])
    big_df["交易均价"] = pd.to_numeric(big_df["交易均价"])
    del big_df['历史数据']
    return big_df
Exemple #8
0
def stock_zh_a_cdr_daily(
    symbol: str = "sh689009", start_date: str = "19900101", end_date: str = "22201116"
) -> pd.DataFrame:
    """
    新浪财经-A股-CDR个股的历史行情数据, 大量抓取容易封 IP
    # TODO 观察复权情况
    https://finance.sina.com.cn/realstock/company/sh689009/nc.shtml
    :param start_date: 20201103; 开始日期
    :type start_date: str
    :param end_date: 20201103; 结束日期
    :type end_date: str
    :param symbol: sh689009
    :type symbol: str
    :return: specific data
    :rtype: pandas.DataFrame
    """
    res = requests.get(zh_sina_a_stock_hist_url.format(symbol))
    js_code = py_mini_racer.MiniRacer()
    js_code.eval(hk_js_decode)
    dict_list = js_code.call(
        "d", res.text.split("=")[1].split(";")[0].replace('"', "")
    )  # 执行js解密代码
    data_df = pd.DataFrame(dict_list)
    data_df.index = pd.to_datetime(data_df["date"])
    del data_df["date"]
    data_df = data_df.astype("float")
    temp_df = data_df[start_date:end_date]
    temp_df["open"] = round(temp_df["open"], 2)
    temp_df["high"] = round(temp_df["high"], 2)
    temp_df["low"] = round(temp_df["low"], 2)
    temp_df["close"] = round(temp_df["close"], 2)
    return temp_df
Exemple #9
0
    def test_cannot_parse(self):
        context = py_mini_racer.MiniRacer()
        js_source = "var f = function("

        with six.assertRaisesRegex(self, py_mini_racer.JSParseException,
                                   '.*Unexpected end of input.*'):
            context.eval(js_source)
Exemple #10
0
def get_us_stock_name() -> pd.DataFrame:
    """
    u.s. stock's english name, chinese name and symbol
    you should use symbol to get apply into the next function
    http://finance.sina.com.cn/stock/usstock/sector.shtml
    :return: stock's english name, chinese name and symbol
    :rtype: pandas.DataFrame
    """
    big_df = pd.DataFrame()
    page_count = get_us_page_count()
    for page in tqdm(range(1, page_count + 1)):
        # page = "1"
        us_js_decode = "US_CategoryService.getList?page={}&num=20&sort=&asc=0&market=&id=".format(
            page)
        js_code = py_mini_racer.MiniRacer()
        js_code.eval(js_hash_text)
        dict_list = js_code.call("d", us_js_decode)  # 执行js解密代码
        us_sina_stock_dict_payload.update({"page": "{}".format(page)})
        res = requests.get(us_sina_stock_list_url.format(dict_list),
                           params=us_sina_stock_dict_payload)
        data_json = json.loads(res.text[res.text.find("({") +
                                        1:res.text.rfind(");")])
        big_df = big_df.append(pd.DataFrame(data_json["data"]),
                               ignore_index=True)
    return big_df[["name", "cname", "symbol"]]
Exemple #11
0
def stock_zh_index_daily(symbol: str = "sh000922") -> pd.DataFrame:
    """
    新浪财经-指数-历史行情数据, 大量抓取容易封 IP
    https://finance.sina.com.cn/realstock/company/sh000909/nc.shtml
    :param symbol: sz399998, 指定指数代码
    :type symbol: str
    :return: 历史行情数据
    :rtype: pandas.DataFrame
    """
    params = {"d": "2020_2_4"}
    res = requests.get(zh_sina_index_stock_hist_url.format(symbol),
                       params=params)
    js_code = py_mini_racer.MiniRacer()
    js_code.eval(hk_js_decode)
    dict_list = js_code.call("d",
                             res.text.split("=")[1].split(";")[0].replace(
                                 '"', ""))  # 执行js解密代码
    temp_df = pd.DataFrame(dict_list)
    temp_df["date"] = pd.to_datetime(temp_df["date"]).dt.date
    temp_df["open"] = pd.to_numeric(temp_df["open"])
    temp_df["close"] = pd.to_numeric(temp_df["close"])
    temp_df["high"] = pd.to_numeric(temp_df["high"])
    temp_df["low"] = pd.to_numeric(temp_df["low"])
    temp_df["volume"] = pd.to_numeric(temp_df["volume"])
    return temp_df
Exemple #12
0
def bond_zh_hs_daily(symbol: str = "sh010107") -> pd.DataFrame:
    """
    新浪财经-债券-沪深债券-历史行情数据, 大量抓取容易封IP
    http://vip.stock.finance.sina.com.cn/mkt/#hs_z
    :param symbol: 沪深债券代码; e.g., sh010107
    :type symbol: str
    :return: 指定沪深债券代码的日 K 线数据
    :rtype: pandas.DataFrame
    """
    res = requests.get(
        zh_sina_bond_hs_hist_url.format(
            symbol,
            datetime.datetime.now().strftime("%Y_%m_%d")))
    js_code = py_mini_racer.MiniRacer()
    js_code.eval(hk_js_decode)
    dict_list = js_code.call("d",
                             res.text.split("=")[1].split(";")[0].replace(
                                 '"', ""))  # 执行js解密代码
    data_df = pd.DataFrame(dict_list)
    data_df["date"] = pd.to_datetime(data_df["date"]).dt.date
    data_df['open'] = pd.to_numeric(data_df['open'])
    data_df['high'] = pd.to_numeric(data_df['high'])
    data_df['low'] = pd.to_numeric(data_df['low'])
    data_df['close'] = pd.to_numeric(data_df['close'])
    return data_df
Exemple #13
0
    def test_cannot_parse(self):
        context = py_mini_racer.MiniRacer()
        js_source = "var f = function("

        with self.assertRaisesRegex(
                py_mini_racer.JSParseException,
                '.*Unknown JavaScript error during parse.*'):
            context.eval(js_source)
Exemple #14
0
    def test_cannot_parse(self):

        context = py_mini_racer.MiniRacer()

        js_source = "var f = function("

        with self.assertRaises(py_mini_racer.JSParseException):
            context.eval(js_source)
def stock_rank_lxxd_ths() -> pd.DataFrame:
    """
    同花顺-数据中心-技术选股-连续下跌
    http://data.10jqka.com.cn/rank/lxxd/
    :return: 连续下跌
    :rtype: pandas.DataFrame
    """
    js_code = py_mini_racer.MiniRacer()
    js_content = _get_file_content_ths("ths.js")
    js_code.eval(js_content)
    v_code = js_code.call("v")
    headers = {
        "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36",
        "Cookie": f"v={v_code}",
    }
    url = f"http://data.10jqka.com.cn/rank/lxxd/field/lxts/order/desc/page/1/ajax/1/free/1/"
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, "lxml")
    try:
        total_page = soup.find("span", attrs={
            "class": "page_info"
        }).text.split("/")[1]
    except AttributeError as e:
        total_page = 1
    big_df = pd.DataFrame()
    for page in tqdm(range(1, int(total_page) + 1), leave=False):
        v_code = js_code.call("v")
        headers = {
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36",
            "Cookie": f"v={v_code}",
        }
        url = f"http://data.10jqka.com.cn/rank/lxxd/field/lxts/order/desc/page/{page}/ajax/1/free/1/"
        r = requests.get(url, headers=headers)
        temp_df = pd.read_html(r.text, converters={"股票代码": str})[0]
        big_df = big_df.append(temp_df, ignore_index=True)
    big_df.columns = [
        "序号",
        "股票代码",
        "股票简称",
        "收盘价",
        "最高价",
        "最低价",
        "连涨天数",
        "连续涨跌幅",
        "累计换手率",
        "所属行业",
    ]
    big_df["连续涨跌幅"] = big_df["连续涨跌幅"].str.strip("%")
    big_df["累计换手率"] = big_df["累计换手率"].str.strip("%")
    big_df["连续涨跌幅"] = pd.to_numeric(big_df["连续涨跌幅"])
    big_df["累计换手率"] = pd.to_numeric(big_df["累计换手率"])
    big_df["收盘价"] = pd.to_numeric(big_df["收盘价"])
    big_df["最高价"] = pd.to_numeric(big_df["最高价"])
    big_df["最低价"] = pd.to_numeric(big_df["最低价"])
    big_df["连涨天数"] = pd.to_numeric(big_df["连涨天数"])
    return big_df
Exemple #16
0
    def _init(self):
        # Set up interpreter
        self._intp = py_mini_racer.MiniRacer()
        self._time_cs = 0

        # Make initial source read
        for fname in self._js_filenames:
            with open(fname, "r") as f:
                self._intp.eval(f.read())
Exemple #17
0
async def _get_us_stock_name_async(
        request_per_batch: int = 15) -> pd.DataFrame:
    count_per_page = 20
    big_df = pd.DataFrame()
    page_count = get_us_page_count(count_per_page)

    start = time.time()
    with tqdm(total=page_count) as pbar:
        all_pages = range(1, page_count + 1)
        finished_pages = []
        while len(finished_pages) < page_count:
            to_req_pages = [x for x in all_pages if x not in finished_pages]
            request_per_batch = min(request_per_batch, len(to_req_pages))
            tasks = {}
            for page in to_req_pages:
                if len(tasks) < request_per_batch:
                    us_js_decode = f"US_CategoryService.getList?page={page}&num={count_per_page}&sort=&asc=0&market=&id="
                    js_code = py_mini_racer.MiniRacer()
                    js_code.eval(js_hash_text)
                    dict_list = js_code.call("d", us_js_decode)  # 执行js解密代码
                    us_sina_stock_dict_payload.update(
                        {"page": "{}".format(page)})
                    tasks[page] = asyncio.create_task(
                        request(us_sina_stock_list_url.format(dict_list),
                                us_sina_stock_dict_payload))
                    continue

                # n requests per aio loop
                for _, task in tasks.items():
                    await task
                n_failed_req = 0
                for page_no, task in tasks.items():
                    try:
                        res = task.result()
                        data_json = json.loads(res[res.find("({") +
                                                   1:res.rfind(");")])
                        big_df = big_df.append(pd.DataFrame(data_json["data"]),
                                               ignore_index=True)
                        finished_pages.append(page_no)
                        pbar.update(1)
                    except requests.exceptions.ConnectionError as ident:
                        n_failed_req += 1
                        print(
                            f'{ident} page_no={page_no}, sleep for longer time and try in next batch'
                        )
                        pass
                tasks.clear()
                interval_time = 3
                if n_failed_req >= 1:
                    # wait longger for sina anti-spider
                    interval_time = 10
                time.sleep(interval_time)

    end = time.time()
    print('Cost time:', end - start)

    return big_df[["name", "cname", "symbol"]]
Exemple #18
0
    def test_exception_thrown(self):
        context = py_mini_racer.MiniRacer()

        js_source = "var f = function() {throw 'error'};"

        context.eval(js_source)

        with self.assertRaises(py_mini_racer.JSEvalException):
            context.eval("f()")
def stock_rank_cxd_ths(symbol: str = "创月新低") -> pd.DataFrame:
    """
    同花顺-数据中心-技术选股-创新低
    http://data.10jqka.com.cn/rank/cxd/
    :param symbol: choice of {"创月新低", "半年新低", "一年新低", "历史新低"}
    :type symbol: str
    :return: 创新低数据
    :rtype: pandas.DataFrame
    """
    symbol_map = {
        "创月新低": "4",
        "半年新低": "3",
        "一年新低": "2",
        "历史新低": "1",
    }
    js_code = py_mini_racer.MiniRacer()
    js_content = _get_file_content_ths("ths.js")
    js_code.eval(js_content)
    v_code = js_code.call("v")
    headers = {
        "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36",
        "Cookie": f"v={v_code}",
    }
    url = f"http://data.10jqka.com.cn/rank/cxd/board/{symbol_map[symbol]}/field/stockcode/order/asc/page/1/ajax/1/free/1/"
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, "lxml")
    try:
        total_page = soup.find("span", attrs={
            "class": "page_info"
        }).text.split("/")[1]
    except AttributeError as e:
        total_page = 1
    big_df = pd.DataFrame()
    for page in tqdm(range(1, int(total_page) + 1), leave=False):
        v_code = js_code.call("v")
        headers = {
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36",
            "Cookie": f"v={v_code}",
        }
        url = f"http://data.10jqka.com.cn/rank/cxd/board/{symbol_map[symbol]}/field/stockcode/order/asc/page/{page}/ajax/1/free/1/"
        r = requests.get(url, headers=headers)
        temp_df = pd.read_html(r.text)[0]
        big_df = big_df.append(temp_df, ignore_index=True)
    big_df.columns = [
        "序号", "股票代码", "股票简称", "涨跌幅", "换手率", "最新价", "前期低点", "前期低点日期"
    ]
    big_df["股票代码"] = big_df["股票代码"].astype(str).str.zfill(6)
    big_df["涨跌幅"] = big_df["涨跌幅"].str.strip("%")
    big_df["换手率"] = big_df["换手率"].str.strip("%")
    big_df["前期低点日期"] = pd.to_datetime(big_df["前期低点日期"]).dt.date
    big_df["涨跌幅"] = pd.to_numeric(big_df["涨跌幅"])
    big_df["换手率"] = pd.to_numeric(big_df["换手率"])
    big_df["最新价"] = pd.to_numeric(big_df["最新价"])
    big_df["前期低点"] = pd.to_numeric(big_df["前期低点"])
    return big_df
Exemple #20
0
def stock_dividents_cninfo(symbol: str = "600009") -> pd.DataFrame:
    """
    巨潮资讯-个股-历史分红
    http://webapi.cninfo.com.cn/#/company?companyid=600009
    :param symbol: 股票代码
    :type symbol: str
    :return: 历史分红
    :rtype: pandas.DataFrame
    """
    url = "http://webapi.cninfo.com.cn/api/sysapi/p_sysapi1139"
    params = {'scode': symbol}
    random_time_str = str(int(time.time()))
    js_code = py_mini_racer.MiniRacer()
    js_code.eval(js_str)
    mcode = js_code.call("mcode", random_time_str)
    headers = {
        "Accept": "*/*",
        "Accept-Encoding": "gzip, deflate",
        "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
        "Cache-Control": "no-cache",
        "Content-Length": "0",
        "Host": "webapi.cninfo.com.cn",
        "mcode": mcode,
        "Origin": "http://webapi.cninfo.com.cn",
        "Pragma": "no-cache",
        "Proxy-Connection": "keep-alive",
        "Referer": "http://webapi.cninfo.com.cn/",
        "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36",
        "X-Requested-With": "XMLHttpRequest",
    }
    r = requests.post(url, params=params, headers=headers)
    data_json = r.json()
    temp_df = pd.DataFrame(data_json["records"])
    temp_df.columns = [
        "实施方案公告日期",
        "送股比例",
        "转增比例",
        "派息比例",
        "股权登记日",
        "除权日",
        "派息日",
        "股份到账日",
        "实施方案分红说明",
        "分红类型",
        "报告时间",
    ]
    temp_df["实施方案公告日期"] = pd.to_datetime(temp_df["实施方案公告日期"]).dt.date
    temp_df["送股比例"] = pd.to_numeric(temp_df["送股比例"], errors="coerce")
    temp_df["转增比例"] = pd.to_numeric(temp_df["转增比例"], errors="coerce")
    temp_df["派息比例"] = pd.to_numeric(temp_df["派息比例"], errors="coerce")
    temp_df["股权登记日"] = pd.to_datetime(temp_df["股权登记日"],
                                      errors="coerce").dt.date
    temp_df["除权日"] = pd.to_datetime(temp_df["除权日"], errors="coerce").dt.date
    temp_df["派息日"] = pd.to_datetime(temp_df["派息日"], errors="coerce").dt.date
    return temp_df
Exemple #21
0
 def solveChallenge(self, text):
     text = re.split("{|}|;", text)
     replaceFunction = "return message.replace(/./g, function(char, position) {"
     rebuilt = [
         text[1] + "{", text[2] + ";", replaceFunction, text[9] + ";})};",
         text[0]
     ]
     jsEngine = py_mini_racer.MiniRacer()
     solution = jsEngine.eval("".join(rebuilt))
     return self._shiftBits(solution)
Exemple #22
0
    def test_null_byte(self):

        context = py_mini_racer.MiniRacer()

        s = "\x00 my string!"

        # Try return a string including a null byte
        in_val = "var str = \"" + s + "\"; str;"
        result = context.eval(in_val)
        self.assertEqual(result, s)
Exemple #23
0
    def solveChallenge(self, text):
        # Rebuilt Javascript so engine can solve it
        text = text.replace('\t', '', -1).encode('ascii', 'ignore').decode('utf-8')
        text = re.split("{|}|;", text)
        replaceFunction = "return message.replace(/./g, function(char, position) {"
        rebuilt = [text[1] + "{", text[2] + ";", replaceFunction, text[7] + ";})};", text[0]]

        jsEngine = py_mini_racer.MiniRacer()
        solution = jsEngine.eval("".join(rebuilt))
        return self._shiftBits(solution)
Exemple #24
0
def find_dlbutton(html):
    fullpage = BeautifulSoup(html, "html.parser")
    dl = fullpage.find("dlbutton")
    for e in fullpage.find_all("script", type="text/javascript"):
        if "dlbutton" in (e.text):
            line = e.text.split(";")
            for l in line:
                if "dlbutton" in l:
                    l = l.split("=")[1]
                    ctx = py_mini_racer.MiniRacer()
                    print(ctx.eval(l))
Exemple #25
0
def covid_tracker():
    ctx = py_mini_racer.MiniRacer()
    tracker_xpath = '/html/body/script[1]/text()'
    r = session.get('https://covidtracker.5lab.co/')
    if r.status_code == 200:
        raw_script = r.html.xpath(tracker_xpath)
        function = util.m_func(raw_script)
        data_exe = ctx.execute(function)
        cases_data = data_exe['state']['cases']
        Export(cases_data, 'covid-tracker')
        print('Extract Covid-Tracker Successful')
Exemple #26
0
def execute_javascript(function, source, args):
    interpreter = py_mini_racer.MiniRacer()
    interpreter.eval('%s = function () { %s }' % (function, source))

    def stringify(s):
        return '"%s"' % s.replace('"', '\\"')

    eval_args = ','.join(stringify(a) for a in args)
    return interpreter.eval('%s(%s)' % (function, eval_args),
                            timeout=EvalClient.JS_TIMEOUT,
                            max_memory=EvalClient.JS_MEMORY)
Exemple #27
0
    def predict(self, X):
        code = get_file_content(self.script_path)

        args = ",".join(map(utils.format_arg, X))
        caller = f"score([{args}])"

        ctx = py_mini_racer.MiniRacer()
        ctx.eval(code)
        result = ctx.execute(caller)

        return result
Exemple #28
0
def stock_board_concept_cons_ths(symbol: str = "阿里巴巴概念") -> pd.DataFrame:
    """
    同花顺-板块-概念板块-成份股
    http://q.10jqka.com.cn/gn/detail/code/301558/
    :param symbol: 板块名称
    :type symbol: str
    :return: 成份股
    :rtype: pandas.DataFrame
    """
    stock_board_ths_map_df = stock_board_concept_name_ths()
    symbol = (
        stock_board_ths_map_df[stock_board_ths_map_df["概念名称"] == symbol]["网址"]
        .values[0]
        .split("/")[-2]
    )
    js_code = py_mini_racer.MiniRacer()
    js_content = _get_file_content_ths("ths.js")
    js_code.eval(js_content)
    v_code = js_code.call("v")
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36",
        "Cookie": f"v={v_code}",
    }
    url = f"http://q.10jqka.com.cn/gn/detail/field/264648/order/desc/page/1/ajax/1/code/{symbol}"
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, "lxml")
    try:
        page_num = int(soup.find_all("a", attrs={"class": "changePage"})[-1]["page"])
    except IndexError as e:
        page_num = 1
    big_df = pd.DataFrame()
    for page in tqdm(range(1, page_num + 1), leave=False):
        v_code = js_code.call("v")
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36",
            "Cookie": f"v={v_code}",
        }
        url = f"http://q.10jqka.com.cn/gn/detail/field/264648/order/desc/page/{page}/ajax/1/code/{symbol}"
        r = requests.get(url, headers=headers)
        temp_df = pd.read_html(r.text)[0]
        big_df = big_df.append(temp_df, ignore_index=True)
    big_df.rename(
        {
            "涨跌幅(%)": "涨跌幅",
            "涨速(%)": "涨速",
            "换手(%)": "换手",
            "振幅(%)": "振幅",
        },
        inplace=True,
        axis=1,
    )
    del big_df["加自选"]
    big_df["代码"] = big_df["代码"].astype(str).str.zfill(6)
    return big_df
Exemple #29
0
def stock_board_industry_cons_ths(symbol: str = "半导体及元件") -> pd.DataFrame:
    """
    同花顺-板块-行业板块-成份股
    http://q.10jqka.com.cn/thshy/detail/code/881121/
    :param symbol: 板块名称
    :type symbol: str
    :return: 成份股
    :rtype: pandas.DataFrame
    """
    stock_board_ths_map_df = stock_board_industry_name_ths()
    symbol = stock_board_ths_map_df[stock_board_ths_map_df['name'] ==
                                    symbol]['url'].values[0].split('/')[-2]
    js_code = py_mini_racer.MiniRacer()
    js_content = _get_file_content_ths("ths.js")
    js_code.eval(js_content)
    v_code = js_code.call('v')
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36',
        'Cookie': f'v={v_code}'
    }
    url = f'http://q.10jqka.com.cn/thshy/detail/field/199112/order/desc/page/1/ajax/1/code/{symbol}'
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, "lxml")
    try:
        page_num = int(
            soup.find_all('a', attrs={'class': 'changePage'})[-1]['page'])
    except IndexError as e:
        page_num = 1
    big_df = pd.DataFrame()
    for page in tqdm(range(1, page_num + 1)):
        v_code = js_code.call('v')
        headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36',
            'Cookie': f'v={v_code}'
        }
        url = f'http://q.10jqka.com.cn/thshy/detail/field/199112/order/desc/page/{page}/ajax/1/code/{symbol}'
        r = requests.get(url, headers=headers)
        temp_df = pd.read_html(r.text)[0]
        big_df = big_df.append(temp_df, ignore_index=True)
    big_df.rename(
        {
            "涨跌幅(%)": "涨跌幅",
            "涨速(%)": "涨速",
            "换手(%)": "换手",
            "振幅(%)": "振幅",
        },
        inplace=True,
        axis=1)
    del big_df['加自选']
    big_df['代码'] = big_df['代码'].astype(str).str.zfill(6)
    return big_df
Exemple #30
0
 def prepare_js(self):
     logging.info('prepare js runtime')
     js_tag = self.etree.find('./{}'.format(self.etree.getroot().attrib.get(
         'script', 'script')))
     self.js = py_mini_racer.MiniRacer()
     for item in js_tag.findall('./require/item'):
         libcode = self.url.get(item.attrib['url'])
         logging.info('load script require: {}'.format(item.attrib['url']))
         self.js.eval(libcode)
     logging.info('load script')
     self.js.eval(js_tag.find('./code').text)
     self.js.eval('SiteD = {}')  # TODO 导入 SiteD 全局变量