Esempio n. 1
0
    def record(self, entity, start, end, size, timestamps):
        for page in range(1, 5):
            resp = requests.get(self.category_stocks_url.format(page, entity.code))
            try:
                if resp.text == "null" or resp.text is None:
                    break
                category_jsons = demjson3.decode(resp.text)
                the_list = []
                for category in category_jsons:
                    stock_code = category["code"]
                    stock_id = china_stock_code_to_id(stock_code)
                    block_id = entity.id
                    the_list.append(
                        {
                            "id": "{}_{}".format(block_id, stock_id),
                            "entity_id": block_id,
                            "entity_type": "block",
                            "exchange": entity.exchange,
                            "code": entity.code,
                            "name": entity.name,
                            "timestamp": now_pd_timestamp(),
                            "stock_id": stock_id,
                            "stock_code": stock_code,
                            "stock_name": category["name"],
                        }
                    )
                if the_list:
                    df = pd.DataFrame.from_records(the_list)
                    df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True)

                self.logger.info("finish recording BlockStock:{},{}".format(entity.category, entity.name))

            except Exception as e:
                self.logger.error("error:,resp.text:", e, resp.text)
            self.sleep()
Esempio n. 2
0
def tokenizer_dict(text, text_cmd='', substring='', current_cmd={}):
    tokens = {'final': set(), 'new': set()}
    if len(text) < 6:
        return tokens
    if text[:1] + text[-1:] not in ['{}', '[]']:
        return tokens

    dct = None
    try:  # JSON
        dct = json.loads(text)
    except:
        pass

    if dct is None:
        try:  # Python dict
            dct = ast.literal_eval(text)
        except:
            pass

    if dct is None:
        try:  # JavaScript Object
            dct = demjson3.decode(text)
        except:
            pass

    if dct is not None:
        dct_tokens = dict_keys_values(dct)
        values = list_str(dct_tokens['values'])
        tokens = {
            'final': set(list_str(dct_tokens['keys']) + values),
            'new': set(values)
        }
        return tokens

    return tokens
Esempio n. 3
0
    def run(self):
        # 抓取沪市 ETF 列表
        url = "http://query.sse.com.cn/commonQuery.do?sqlId=COMMON_SSE_ZQPZ_ETFLB_L_NEW"
        response = requests.get(url, headers=DEFAULT_SH_ETF_LIST_HEADER)
        response_dict = demjson3.decode(response.text)

        df = pd.DataFrame(response_dict.get("result", []))
        self.persist_etf_list(df, exchange="sh")
        self.logger.info("沪市 ETF 列表抓取完成...")

        # 抓取沪市 ETF 成分股
        self.download_sh_etf_component(df)
        self.logger.info("沪市 ETF 成分股抓取完成...")

        # 抓取深市 ETF 列表
        url = "http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=1945"
        response = requests.get(url)

        df = pd.read_excel(io.BytesIO(response.content), dtype=str)
        self.persist_etf_list(df, exchange="sz")
        self.logger.info("深市 ETF 列表抓取完成...")

        # 抓取深市 ETF 成分股
        self.download_sz_etf_component(df)
        self.logger.info("深市 ETF 成分股抓取完成...")
Esempio n. 4
0
def get_news(entity_id, ps=200, index=1):
    sec_id = to_em_sec_id(entity_id=entity_id)
    url = f"https://np-listapi.eastmoney.com/comm/wap/getListInfo?cb=callback&client=wap&type=1&mTypeAndCode={sec_id}&pageSize={ps}&pageIndex={index}&callback=jQuery1830017478247906740352_{now_timestamp() - 1}&_={now_timestamp()}"
    resp = requests.get(url)
    # {
    #     "Art_ShowTime": "2022-02-11 14:29:25",
    #     "Art_Image": "",
    #     "Art_MediaName": "每日经济新闻",
    #     "Art_Code": "202202112274017262",
    #     "Art_Title": "潍柴动力:巴拉德和锡里斯不纳入合并财务报表范围",
    #     "Art_SortStart": "1644560965017262",
    #     "Art_VideoCount": 0,
    #     "Art_OriginUrl": "http://finance.eastmoney.com/news/1354,202202112274017262.html",
    #     "Art_Url": "http://finance.eastmoney.com/a/202202112274017262.html",
    # }
    if resp.status_code == 200:
        json_text = resp.text[resp.text.index("(") + 1 : resp.text.rindex(")")]
        json_result = demjson3.decode(json_text)["data"]["list"]
        if json_result:
            json_result = [
                {
                    "id": f'{entity_id}_{item["Art_ShowTime"]}',
                    "entity_id": entity_id,
                    "timestamp": to_pd_timestamp(item["Art_ShowTime"]),
                    "news_title": item["Art_Title"],
                }
                for item in json_result
            ]
            next_data = get_news(entity_id=entity_id, ps=ps, index=index + 1)
            if next_data:
                return json_result + next_data
            else:
                return json_result
Esempio n. 5
0
    def populate_sh_etf_type(df: pd.DataFrame):
        """
        填充沪市 ETF 代码对应的 TYPE 到列表数据中
        :param df: ETF 列表数据
        :return: 包含 ETF 对应 TYPE 的列表数据
        """
        query_url = (
            "http://query.sse.com.cn/infodisplay/queryETFNewAllInfo.do?"
            "isPagination=false&type={}&pageHelp.pageSize=25")

        type_df = pd.DataFrame()
        for etf_class in [1, 2]:
            url = query_url.format(etf_class)
            response = requests.get(url, headers=DEFAULT_SH_ETF_LIST_HEADER)
            response_dict = demjson3.decode(response.text)
            response_df = pd.DataFrame(response_dict.get("result", []))
            response_df = response_df[["fundid1", "etftype"]]

            type_df = pd.concat([type_df, response_df])

        result_df = df.copy()
        result_df = result_df.sort_values(by="FUND_ID").reset_index(drop=True)
        type_df = type_df.sort_values(by="fundid1").reset_index(drop=True)

        result_df["ETF_TYPE"] = type_df["etftype"]

        return result_df
Esempio n. 6
0
    def __init__(self,
                 mid: int = 0,
                 page: str = None,
                 fromImage=False,
                 offset: int = None,
                 limit: int = None):
        self.offset = offset
        self.limit = limit
        self.artistId = mid

        if page is not None:
            payload = None
            # detect if image count != 0
            if not fromImage:
                payload = demjson3.decode(page)
                if payload["error"]:
                    raise PixivException(
                        payload["message"],
                        errorCode=PixivException.OTHER_MEMBER_ERROR,
                        htmlPage=page)
                if payload["body"] is None:
                    raise PixivException(
                        "Missing body content, possible artist id doesn't exists.",
                        errorCode=PixivException.USER_ID_NOT_EXISTS,
                        htmlPage=page)
                self.ParseImages(payload["body"])
                self.ParseMangaList(payload["body"])
                self.ParseNovelList(payload["body"])
            else:
                payload = self.parseJs(page)
                self.isLastPage = True
                self.haveImages = True

            # parse artist info
            self.ParseInfo(payload, fromImage)
Esempio n. 7
0
    def record(self, entity, start, end, size, timestamps):
        # 此 url 不支持分页,如果超过我们想取的条数,则只能取最大条数
        if start is None or size > self.default_size:
            size = 8000

        param = {
            "security_item": entity,
            "level": self.level.value,
            "size": size
        }

        security_item = param["security_item"]
        size = param["size"]

        url = ChinaETFDayKdataRecorder.url.format(security_item.exchange,
                                                  security_item.code, size)

        response = requests.get(url)
        response_json = demjson3.decode(response.text)

        if response_json is None or len(response_json) == 0:
            return []

        df = pd.DataFrame(response_json)
        df.rename(columns={"day": "timestamp"}, inplace=True)
        df["timestamp"] = pd.to_datetime(df["timestamp"])
        df["name"] = security_item.name
        df["provider"] = "sina"
        df["level"] = param["level"]

        return df.to_dict(orient="records")
Esempio n. 8
0
    def fetch_cumulative_net_value(self, security_item, start,
                                   end) -> pd.DataFrame:
        query_url = (
            "http://api.fund.eastmoney.com/f10/lsjz?"
            "fundCode={}&pageIndex={}&pageSize=200&startDate={}&endDate={}")

        page = 1
        df = pd.DataFrame()
        while True:
            url = query_url.format(security_item.code, page,
                                   to_time_str(start), to_time_str(end))

            response = requests.get(url,
                                    headers=EASTMONEY_ETF_NET_VALUE_HEADER)
            response_json = demjson3.decode(response.text)
            response_df = pd.DataFrame(response_json["Data"]["LSJZList"])

            # 最后一页
            if response_df.empty:
                break

            response_df["FSRQ"] = pd.to_datetime(response_df["FSRQ"])
            response_df["JZZZL"] = pd.to_numeric(response_df["JZZZL"],
                                                 errors="coerce")
            response_df["LJJZ"] = pd.to_numeric(response_df["LJJZ"],
                                                errors="coerce")
            response_df = response_df.fillna(0)
            response_df.set_index("FSRQ", inplace=True, drop=True)

            df = pd.concat([df, response_df])
            page += 1

            self.sleep()

        return df
Esempio n. 9
0
    def record(self, entity, start, end, size, timestamps):
        json_results = []
        for timestamp in timestamps:
            timestamp_str = to_time_str(timestamp)
            url = self.url.format(timestamp_str)
            response = requests.get(url=url, headers=DEFAULT_SH_SUMMARY_HEADER)

            results = demjson3.decode(response.text[response.text.index("(") + 1 : response.text.index(")")])["result"]
            result = [result for result in results if result["productType"] == "1"]
            if result and len(result) == 1:
                result_json = result[0]
                # 有些较老的数据不存在,默认设为0.0
                json_results.append(
                    {
                        "provider": "exchange",
                        "timestamp": timestamp,
                        "name": "上证指数",
                        "pe": to_float(result_json["profitRate"], 0.0),
                        "total_value": to_float(result_json["marketValue1"] + "亿", 0.0),
                        "total_tradable_vaule": to_float(result_json["negotiableValue1"] + "亿", 0.0),
                        "volume": to_float(result_json["trdVol1"] + "万", 0.0),
                        "turnover": to_float(result_json["trdAmt1"] + "亿", 0.0),
                        "turnover_rate": to_float(result_json["exchangeRate"], 0.0),
                    }
                )

                if len(json_results) > 30:
                    return json_results

        return json_results
Esempio n. 10
0
def get_exchange_data(interface, session=None) -> list:
    """
    This function can fetch 5min data for any PJM interface in the current day.
    Extracts load and timestamp data from html source then joins them together.
    """

    base_url = "http://www.pjm.com/Charts/InterfaceChart.aspx?open="
    url = base_url + exchange_mapping[interface]

    s = session or requests.Session()
    req = s.get(url)
    soup = BeautifulSoup(req.content, "html.parser")

    scripts = soup.find(
        "script",
        {
            "type": "text/javascript",
            "src": "/assets/js/Highcharts/HighCharts/highcharts.js",
        },
    )

    exchange_script = scripts.find_next_sibling("script")

    load_pattern = r"var load = (\[(.*)\])"
    load = re.search(load_pattern, str(exchange_script)).group(1)
    load_vals = demjson.decode(load)[0]

    # Occasionally load_vals contains a null at the end of the list which must be caught.
    actual_load = [float(val) for val in load_vals if val is not None]

    time_pattern = r"var timeArray = (\[(.*)\])"
    time_array = re.search(time_pattern, str(exchange_script)).group(1)
    time_vals = demjson.decode(time_array)

    flows = zip(actual_load, time_vals)

    arr_date = arrow.now("America/New_York").floor("day")

    converted_flows = []
    for flow in flows:
        arr_time = arrow.get(flow[1], "h:mm A")
        arr_dt = arr_date.replace(hour=arr_time.hour,
                                  minute=arr_time.minute).datetime
        converted_flow = (flow[0], arr_dt)
        converted_flows.append(converted_flow)

    return converted_flows
Esempio n. 11
0
def extract_data(session=None) -> tuple:
    """
    Makes a request to the PJM data url.
    Finds timestamp of current data and converts into a useful form.
    Finds generation data inside script tag.

    :return: tuple of generation data and datetime.
    """

    s = session or requests.Session()
    req = requests.get(url)
    soup = BeautifulSoup(req.content, 'html.parser')

    try:
        time_div = soup.find("div", id="asOfDate").text
    except AttributeError:
        raise LookupError('No data is available for US-PJM.')

    time_pattern = re.compile(r"""(\d{1,2}     #Hour can be 1/2 digits.
                                   :           #Separator.
                                   \d{2})\s    #Minutes must be 2 digits with a space after.
                                   (a.m.|p.m.) #Either am or pm allowed.""", re.X)

    latest_time = re.search(time_pattern, time_div)

    time_data = latest_time.group(1).split(":")
    am_or_pm = latest_time.group(2)
    hour = int(time_data[0])
    minute = int(time_data[1])

    # Time format used by PJM is slightly unusual and needs to be converted so arrow can use it.
    if am_or_pm == "p.m." and hour != 12:
        # Time needs to be in 24hr format
        hour += 12
    elif am_or_pm == "a.m." and hour == 12:
        # Midnight is 12 a.m.
        hour = 0

    arr_dt = arrow.now('America/New_York').replace(hour=hour, minute=minute)
    future_check = arrow.now('America/New_York')

    if arr_dt > future_check:
        # Generation mix lags 1-2hrs behind present.
        # This check prevents data near midnight being given the wrong date.
        arr_dt = arr_dt.shift(days=-1)

    dt = arr_dt.floor('minute').datetime

    generation_mix_div = soup.find("div", id="rtschartallfuelspjmGenFuelM_container")
    generation_mix_script = generation_mix_div.next_sibling

    pattern = r'series: \[(.*)\]'
    script_data = re.search(pattern, str(generation_mix_script)).group(1)

    # demjson is required because script data is javascript not valid json.
    raw_data = demjson.decode(script_data)
    data = raw_data["data"]

    return data, dt
Esempio n. 12
0
    def __init__(self, artist_id, page, tzInfo=None, dateFormat=None):
        self.posts = list()
        self.dateFormat = dateFormat
        self._tzInfo = tzInfo

        if page is not None:
            post_json = demjson3.decode(page)
            self.parse_artist(post_json["data"])
Esempio n. 13
0
 def get(self, request, *args, **kwargs):
     data_json = os.path.join(settings.BASE_DIR, 'tyadmin_api/menu.json')
     with open(data_json, encoding='utf-8') as fr:
         content = fr.read()
     import demjson3
     content = demjson3.decode(content)
     print(json.dumps(content, ensure_ascii=False))
     return JsonResponse({
         "data": content
     })
Esempio n. 14
0
    def parseJs(self, page):
        parsed = BeautifulSoup(page, features="html5lib")
        jss = parsed.find('meta', attrs={'id': 'meta-preload-data'})

        # cleanup
        parsed.decompose()
        del parsed

        if jss is None or len(jss["content"]) == 0:
            return None  # Possibly error page

        payload = demjson3.decode(jss["content"])
        return payload
Esempio n. 15
0
    def parseArtistIds(cls, page):
        ids = list()
        js = demjson3.decode(page)

        if "error" in js and js["error"]:
            raise PixivException("Error when requesting Fanbox", 9999, page)

        if "body" in js and js["body"] is not None:
            js_body = js["body"]
            if "supportingPlans" in js["body"]:
                js_body = js_body["supportingPlans"]
            for creator in js_body:
                ids.append(creator["user"]["userId"])
        return ids
Esempio n. 16
0
    def __init__(self, post_id, artist, page, tzInfo=None, dateFormat=None):
        self.imageUrls = list()
        self.imageResizedUrls = list()
        self.imageId = int(post_id)
        self._tzInfo = tzInfo
        self.dateFormat = dateFormat

        if page is not None:
            post_json = demjson3.decode(page)
            if artist is None:
                artist_id = post_json["data"]["item"]["user"]["id"]
                self.artist = SketchArtist(artist_id, page, tzInfo, dateFormat)
            else:
                self.artist = artist
            self.parse_post(post_json["data"]["item"])
Esempio n. 17
0
    def download_sh_etf_component(self, df: pd.DataFrame):
        """
        ETF_CLASS => 1. 单市场 ETF 2.跨市场 ETF 3. 跨境 ETF
                        5. 债券 ETF 6. 黄金 ETF
        :param df: ETF 列表数据
        :return: None
        """
        query_url = (
            "http://query.sse.com.cn/infodisplay/queryConstituentStockInfo.do?"
            "isPagination=false&type={}&etfClass={}")

        etf_df = df[(df["ETF_CLASS"] == "1") | (df["ETF_CLASS"] == "2")]
        etf_df = self.populate_sh_etf_type(etf_df)

        for _, etf in etf_df.iterrows():
            url = query_url.format(etf["ETF_TYPE"], etf["ETF_CLASS"])
            response = requests.get(url, headers=DEFAULT_SH_ETF_LIST_HEADER)
            response_dict = demjson3.decode(response.text)
            response_df = pd.DataFrame(response_dict.get("result", []))

            etf_code = etf["FUND_ID"]
            etf_id = f"etf_sh_{etf_code}"
            response_df = response_df[["instrumentId",
                                       "instrumentName"]].copy()
            response_df.rename(columns={
                "instrumentId": "stock_code",
                "instrumentName": "stock_name"
            },
                               inplace=True)

            response_df["entity_id"] = etf_id
            response_df["entity_type"] = "etf"
            response_df["exchange"] = "sh"
            response_df["code"] = etf_code
            response_df["name"] = etf["FUND_NAME"]
            response_df["timestamp"] = now_pd_timestamp()

            response_df["stock_id"] = response_df["stock_code"].apply(
                lambda code: china_stock_code_to_id(code))
            response_df["id"] = response_df["stock_id"].apply(
                lambda x: f"{etf_id}_{x}")

            df_to_db(data_schema=self.data_schema,
                     df=response_df,
                     provider=self.provider)
            self.logger.info(f'{etf["FUND_NAME"]} - {etf_code} 成分股抓取完成...')

            self.sleep()
Esempio n. 18
0
    def parse_posts(self, page):
        post_json = demjson3.decode(page)

        links_root = post_json["_links"]
        if "next" in links_root:
            self.next_page = links_root["next"]["href"]
        else:
            self.next_page = None

        for item in post_json["data"]["items"]:
            post_id = item["id"]
            post = SketchPost(post_id, None, None, self._tzInfo,
                              self.dateFormat)
            post.parse_post(item)
            post.artist = self
            self.posts.append(post)
Esempio n. 19
0
    async def _current_data(self) -> Dict[str, Any]:
        """
        Retrieve the data from the printer.
        Throws ValueError if host does not support SyncThru
        """
        data = {"status": {"hrDeviceStatus": SyncthruState.OFFLINE.value}}
        if self.connection_mode in [ConnectionMode.AUTO, ConnectionMode.API]:
            url = "{}{}".format(self.url, ENDPOINT_API)

            try:
                async with self._session.get(url) as response:
                    res = demjson3.decode(await response.text(),
                                          strict=False)  # type: Dict[str, Any]
                    # if we get something back from this endpoint,
                    # we directly return it
                    return res
            except (aiohttp.ClientError, asyncio.TimeoutError):
                pass
            except demjson3.JSONDecodeError:
                # If no JSON data is provided but we want to only connect
                # in this mode, raise an Exception
                if self.connection_mode != ConnectionMode.AUTO:
                    raise SyncThruAPINotSupported(
                        "Invalid host, does not support SyncThru JSON API.")

        if self.connection_mode in [ConnectionMode.AUTO, ConnectionMode.HTML]:

            any_connection_successful = False
            for endpoint_url, parsers in ENDPOINT_HTML_PARSERS.items():
                html_url = "{}{}".format(self.url, endpoint_url)
                try:
                    async with self._session.get(html_url) as response:
                        html_res = await response.text()
                    any_connection_successful = True
                    for parser in parsers:
                        parser(data).feed(html_res)
                    # if successful, set device status to unknown
                except (aiohttp.ClientError, asyncio.TimeoutError):
                    pass
            if (any_connection_successful and data["status"]["hrDeviceStatus"]
                    == SyncthruState.OFFLINE.value):
                data["status"]["hrDeviceStatus"] = SyncthruState.UNKNOWN.value

        return data
Esempio n. 20
0
def parse_resp(resp: Response, key=None):
    if resp.status_code != 200:
        raise Exception(f"code:{resp.status_code},msg:{resp.content}")
    # {
    #   "re": true,
    #   "message": "",
    #   "result": {}
    # }
    result = resp.text
    js_text = result[result.index("(") + 1:result.index(")")]

    ret = demjson3.decode(js_text)
    logger.info(f"ret:{ret}")
    data = ret.get("data")
    if data and key:
        result_value = data.get(key)
    else:
        result_value = data

    return ret["state"], result_value
Esempio n. 21
0
    def parsePosts(self, page) -> List[FanboxPost]:
        js = demjson3.decode(page)

        if "error" in js and js["error"]:
            raise PixivException(
                f"Error when requesting Fanbox artist: {self.artistId}", 9999,
                page)

        if js["body"] is not None:
            js_body = js["body"]

            posts = list()

            if "creator" in js_body:
                self.artistName = js_body["creator"]["user"]["name"]

            if "post" in js_body:
                # new api
                post_root = js_body["post"]
            else:
                # https://www.pixiv.net/ajax/fanbox/post?postId={0}
                # or old api
                post_root = js_body

            for jsPost in post_root["items"]:
                post_id = int(jsPost["id"])
                post = FanboxPost(post_id, self, jsPost, tzInfo=self._tzInfo)
                posts.append(post)
                # sanity check
                assert (self.artistId == int(jsPost["user"]["userId"])
                        ), "Different user id from constructor!"

            self.nextUrl = post_root["nextUrl"]
            if self.nextUrl is not None and len(self.nextUrl) > 0:
                self.hasNextPage = True

            return posts
 def load_json_from_file(self, file_name):
     with open(file_name, 'r') as f:
         self.options = Dict(
             demjson.decode(f.read().encode("ascii", "ignore")))
     return self.options
    def load_json(self, options_string):
        self.options = Dict(
            demjson.decode(options_string.encode("ascii", "ignore")))

        return self.options
 def add_dataset(self, dataset_plot_cfg):
     self.options.data.datasets.append(
         Dict(demjson.decode(dataset_plot_cfg.encode("ascii", "ignore"))))
Esempio n. 25
0
    def get_page_info(self, page, **kwargs) -> Profile:
        result = {}
        desc = None

        try:
            about_url = f'/{page}/about/'
            logger.debug(f"Requesting page from: {about_url}")
            resp = self.get(about_url)
            desc = resp.html.find("meta[name='description']", first=True)
            result["about"] = resp.html.find(
                '#pages_msite_body_contents,div.aboutme', first=True).text
            cover_photo = resp.html.find(
                "#msite-pages-header-contents i.coverPhoto", first=True)
            if cover_photo:
                match = re.search(r"url\('(.+)'\)", cover_photo.attrs["style"])
                if match:
                    result["cover_photo"] = utils.decode_css_url(
                        match.groups()[0])
            profile_photo = resp.html.find("#msite-pages-header-contents img",
                                           first=True)
            if profile_photo:
                result["profile_photo"] = profile_photo.attrs["src"]
        except Exception as e:
            logger.error(e)
        try:
            url = f'/{page}/'
            logger.debug(f"Requesting page from: {url}")
            resp = self.get(url)
            desc = resp.html.find("meta[name='description']", first=True)
            ld_json = None
            try:
                ld_json = resp.html.find("script[type='application/ld+json']",
                                         first=True).text
            except:
                logger.error("No ld+json element")
                url = f'/{page}/community'
                logger.debug(f"Requesting page from: {url}")
                try:
                    community_resp = self.get(url)
                    ld_json = community_resp.html.find(
                        "script[type='application/ld+json']", first=True).text
                except:
                    logger.error("No ld+json element")
            if ld_json:
                meta = demjson.decode(ld_json)
                result.update(meta["author"])
                result["type"] = result.pop("@type")
                for interaction in meta.get("interactionStatistic", []):
                    if interaction[
                            "interactionType"] == "http://schema.org/FollowAction":
                        result["followers"] = interaction[
                            "userInteractionCount"]
            try:
                result["about"] = resp.html.find(
                    '#pages_msite_body_contents>div>div:nth-child(2)',
                    first=True).text
            except Exception as e:
                logger.error(e)
                result = self.get_profile(page)
            for elem in resp.html.find(
                    "div[data-sigil*='profile-intro-card-log']"):
                text = elem.text.split("\n")[0]
                if " Followers" in text:
                    result["followers"] = utils.convert_numeric_abbr(
                        text.replace(" Followers", ""))
                if text.startswith("Price Range"):
                    result["Price Range"] = text.split(" · ")[-1]
                link = elem.find("a[href]", first=True)
                if link:
                    link = link.attrs["href"]
                    if "active_ads" in link:
                        result["active_ads_link"] = link
                    if "maps.google.com" in link:
                        result["map_link"] = parse_qs(
                            urlparse(link).query).get("u")[0]
                        result["address"] = text
                    if link.startswith("tel:"):
                        result["phone"] = link.replace("tel:", "")
                    if link.startswith("mailto:"):
                        result["email"] = link.replace("mailto:", "")
            result["rating"] = resp.html.find(
                "div[data-nt='FB:TEXT4']")[1].text
        except Exception as e:
            logger.error(e)
        if desc:
            logger.debug(desc.attrs["content"])
            match = re.search(r'\..+?(\d[\d,.]+).+·', desc.attrs["content"])
            if match:
                result["likes"] = utils.parse_int(match.groups()[0])
            bits = desc.attrs["content"].split("·")
            if len(bits) == 3:
                result["people_talking_about_this"] = utils.parse_int(bits[1])
                result["checkins"] = utils.parse_int(bits[2])
        result["reviews"] = self.get_page_reviews(page, **kwargs)

        return result