Esempio n. 1
0
    def __init__(self, debug=False):
        self._api = 'https://api.bitkub.com/api'
        self._key = os.getenv('BITKUB_API_KEY', '')
        self._secret = os.getenv('BITKUB_API_SECRET', '').encode()

        self._debug = debug
        self._browser = Browser(debug=self._debug)
Esempio n. 2
0
    def __init__(self, debug=False):
        self._api = 'https://api.satang.pro/api'
        self._uid = os.getenv('SATANG_USER_ID', '')
        self._key = os.getenv('SATANG_API_KEY', '')
        self._secret = os.getenv('SATANG_API_SECRET', '').encode('utf-8')

        self._debug = debug
        self._browser = Browser(debug=self._debug)
Esempio n. 3
0
 def __init__(self, host):
     """Init object with forum url (host) and Browser object."""
     self.host = host
     try:
         self.browser = Browser()
     except HTTPError as e:
         print(e)
         sys.exit(1)
Esempio n. 4
0
    def setUp(self) -> None:
        self.driver = Browser("chrome", r".\tools\chromedriver.exe")
        self.driver.open_browser("http://www.baidu.com")
        logging.info("打开浏览器")
        logging.info(
            f"浏览器名称:{self.driver.browser_name},浏览器版本:{self.driver.browser_version}"
        )

        self.homepage = HomePage(self.driver)
        self.newspage = NewsPage(self.driver)
Esempio n. 5
0
class Platform(abc.ABC):
    def __init__(self, platform):
        self.browser = Browser()
        self.config = Config(platform)
        self.email_alert = EmailAlert()
        self.By = By

    @abc.abstractmethod
    def login(self):
        pass

    @abc.abstractmethod
    def get_account_value(self):
        pass

    @abc.abstractmethod
    def get_available_funds(self):
        pass

    def get_project_value(self, project_id):
        return 0

    def get_name(self):
        return self.__class__.__name__

    def get_username(self):
        return self.config.username

    def get_password(self):
        return self.config.password

    def get_account(self):
        return self.config.account

    def get_currency(self):
        return self.config.currency

    def has_projects(self):
        return self.config.has_projects()

    def get_projects(self):
        return self.config.projects

    def send_alert_email(self, platform_name, message):
        self.email_alert.send_email_alert(platform_name, message)

    def quit(self):
        self.browser.quit()
Esempio n. 6
0
	def setUp(self):
		self.driver = Browser().get_browserdriver()
		self.login_page = LoginPage(self.driver)
		self.config = Config().get('ZPC')
		self.login_page.url = self.config.get('url')
		self.login_page.visit()
		self.login_page.wait(5)
		self.login_page.set_value(element=self.login_page.rec_user_input(), text=self.config.get('user'))
		self.login_page.set_value(element=self.login_page.rec_passwd_input(), text=self.config.get('pwd'))
		self.main_page = self.login_page.click_login_btn()
Esempio n. 7
0
class Baidu(unittest.TestCase):
    def setUp(self) -> None:
        self.driver = Browser("chrome", r".\tools\chromedriver.exe")
        self.driver.open_browser("http://www.baidu.com")
        logging.info("打开浏览器")
        logging.info(
            f"浏览器名称:{self.driver.browser_name},浏览器版本:{self.driver.browser_version}"
        )

        self.homepage = HomePage(self.driver)
        self.newspage = NewsPage(self.driver)

    def tearDown(self) -> None:
        self.driver.quit()
        logging.info("关闭浏览器")

    def test_search(self):
        """测试百度搜索框输入selenium能搜索出包含selenium相关的信息"""
        logging.info("用例1:测试百度搜索框输入selenium能搜索出包含selenium相关的信息")

        # 输入搜索信息
        self.homepage.input_box.send_keys("selenium")
        logging.info("输入搜索信息")

        # 点击按钮
        self.homepage.search_button.click()
        logging.info("点击搜索按钮")
        time.sleep(2)

        # 校验搜索结果
        els = self.driver.find_element_by_partial_link_text("selenium")
        self.assertIsNotNone(els)

    def test_access_game_news(self):
        """测试通过百度首页能进入新闻界面的游戏专题"""
        logging.info("用例2:测试通过百度首页能进入新闻界面的游戏专题")

        # 点击新闻链接
        self.homepage.news_link.click()
        logging.info("点击新闻链接")

        # 切换窗口
        self.driver.switch_to_new_page()
        logging.info("切换窗口")

        # 点击游戏链接
        self.newspage.game_link.click()
        logging.info("点击游戏链接")

        # 校验url
        current_url = self.driver.current_url
        self.assertEqual(current_url, "http://news.baidu.com/game")
Esempio n. 8
0
def runCrawl(limitNum=0, queryList=[], is_all_comments=False):
    browser = Browser("driver/chromedriver")
    for query in queryList:
        browser.clearLink()
        makeDir("data")
        makeDir("data/" + query)
        mUrl = ""
        if query[0] == "#":
            mUrl = "https://www.instagram.com/explore/tags/" + query[
                1:] + "/?hl=en"
        else:
            mUrl = "https://www.instagram.com/" + query + "/?hl=en"
        browser.goToPage(mUrl)
        print("collecting url of " + query + "...")
        browser.scrollPageToBottomUntilEnd(browser.collectDpageUrl, limitNum)
        print("finish scoll collecting!")

        print("collecting data...")
        slist = list(set(browser.urlList))
        for url in tqdm(slist):
            dirName = url.split("/")[4]
            # skip if already crawled
            if not makeDir("data/" + query + "/" + dirName):
                continue
            browser.goToPage(url)
            if is_all_comments:
                browser.expandComments()
            cur = browser.getPageSource()
            writeToFile("data/" + query + "/" + dirName + "/raw.html", [cur])
            infoData = cur.split("<meta content=")[1].split(" ")
            # extract data
            lang = extractLang(cur)
            likes = extractLikes(infoData, lang)
            comments = extractComments(infoData, lang)
            caption = extractCaption(cur)
            dateTime = extractDateTime(cur)
            commentMessages = extractCommentsMessage(cur)
            # print("likes:",likes," comments:", comments," caption:", caption,
            #     "commentMessages:", commentMessages, "dateTime:", dateTime)
            writeToFile("data/" + query + "/" + dirName + "/info.txt", [
                "likes: ", likes, "", "comments: ", comments, "", "caption: ",
                caption, "", "commentMessages: ", commentMessages, "",
                "dateTime: ", dateTime, ""
            ])
            # download image
            imageUrl = cur.split(
                'meta property="og:image" content="')[1].split('"')[0]
            downloadImage(imageUrl,
                          "data/" + query + "/" + dirName + "/image.jpg")
            time.sleep(1)
        print("query " + query + " collecting finish")

    time.sleep(2)
    browser.driver.quit()
    print("FINISH!")
Esempio n. 9
0
class BITKUB:
    """
    Official Documentation for Bitkub APIs
    https://github.com/bitkub/bitkub-official-api-docs
    """
    def __init__(self, debug=False):
        self._api = 'https://api.bitkub.com/api'
        self._key = os.getenv('BITKUB_API_KEY', '')
        self._secret = os.getenv('BITKUB_API_SECRET', '').encode()

        self._debug = debug
        self._browser = Browser(debug=self._debug)

    ##########
    # --- public api ---
    # Get ticker information.
    def ticker(self, sym=''):
        sym = '' if sym == '' else f'?sym={sym.upper()}'
        payload = {'url': self._api + f'/market/ticker{sym}'}
        return self._resp(self._browser.get(**payload))

    # List open (bids/asks) orders.
    def get_bids_asks(self, sym='THB_BTC', lmt=10):
        """
        :return: {
         'asks': [[rate, amount], [174629, 0.00010107], ...],
         'bids': [[rate, amount], [174629, 0.00010107], ...]
        }
        """
        payload = {
            'url': self._api + f'/market/depth?sym={sym.upper()}&lmt={lmt}'
        }
        return self._resp(self._browser.get(**payload))

    # List open buy(bids) orders.
    def get_bids(self, sym='THB_BTC', lmt=10):
        """
        :return: [[rate, volume, amount], [174629, 17.65, 0.00010107], ...]
        """
        payload = {
            'url': self._api + f'/market/bids?sym={sym.upper()}&lmt={lmt}'
        }
        return self._resp_order(self._resp(self._browser.get(**payload)))

    # List open sell(asks) orders.
    def get_asks(self, sym='THB_BTC', lmt=10):
        """
        :return: [[rate, volume, amount], [175500, 928.14, 0.00528859], ...]
        """
        payload = {
            'url': self._api + f'/market/asks?sym={sym.upper()}&lmt={lmt}'
        }
        return self._resp_order(self._resp(self._browser.get(**payload)))

    ##########
    # --- private api ---
    # Get balances info
    def balance(self):
        payload = {
            'url': self._api + '/market/balances',
            'headers': self._build_headers(),
            'data': self._build_sign({})
        }
        return self._resp(self._browser.post(**payload))

    # Create a sell order.
    def sell(self, **kwargs):
        """
        :param kwargs: sym, amt, rat, typ
        """
        payload = {
            'url': self._api + '/market/place-ask',
            'headers': self._build_headers(),
            'data': self._build_sign(self._data_rules(**kwargs))
        }
        return self._resp(self._browser.post(**payload))

    # Create a buy order.
    def buy(self, **kwargs):
        """
        :param kwargs: sym, amt, rat, typ
        """
        payload = {
            'url': self._api + '/market/place-bid',
            'headers': self._build_headers(),
            'data': self._build_sign(self._data_rules(**kwargs))
        }
        return self._resp(self._browser.post(**payload))

    ##########
    # utility
    def _build_headers(self):
        return {
            'Accept': 'application/json',
            'Content-Type': 'application/json',
            'X-BTK-APIKEY': self._key,
        }

    def _build_sign(self, data):
        data['ts'] = nonce()
        data['sig'] = hmac.new(self._secret,
                               self._json_encode(data).encode(),
                               hashlib.sha256).hexdigest()
        return self._json_encode(data)

    @staticmethod
    def _data_rules(**kwargs):
        params = {}
        if 'sym' in kwargs:  # symbol is upper case
            params['sym'] = kwargs['sym'].upper()
        if 'amt' in kwargs:  # 0.10000000 is invalid, 0.1 is ok
            params['amt'] = format_float(kwargs['amt'])
        if 'rat' in kwargs:  # (e.g 1000.00 is invalid, 1000 is ok)
            params['rat'] = format_float(kwargs['rat'])
        if 'typ' in kwargs:  # limit or market
            params['typ'] = ('market'
                             if kwargs['typ'] == 'market' else 'limit')
        return params

    def _resp(self, resp):
        if resp.status_code == 200:
            if resp.json().get('error') == 0:
                return resp.json()['result']
            return resp.json()
        if self._debug:  # catch error !?
            raise Exception(resp)

    @staticmethod
    def _resp_order(o):
        # idx, timestamp, volume, rate, amount
        return [[r, v, a] for i, t, v, r, a in o]

    @staticmethod
    def _json_encode(data):
        return json.dumps(data, separators=(',', ':'), sort_keys=True)
Esempio n. 10
0
class PhpBB(object):
    """Class to interract with phpBB forum."""

    delete_form_id = 'confirm'
    reply_url = 'posting.php?mode=reply&f={f}&t={t}'
    edit_url = 'posting.php?mode=edit&f={f}&p={p}'
    form_id = 'postform'
    private_mess_url = 'ucp.php?i=pm&mode=compose'

    def __init__(self, host):
        """Init object with forum url (host) and Browser object."""
        self.host = host
        try:
            self.browser = Browser()
        except HTTPError as e:
            print(e)
            sys.exit(1)

    def __del__(self):
        """Close the session and delete object."""
        try:
            self.browser.session.close()
        except HTTPError as e:
            print(e)
            sys.exit(1)

    def is_logged(self):
        """Check if logged in."""
        u = self._get_user_id()
        if u != 1:
            print(f"login OK : {str(u)}")
            return True
        else:
            print(f"login failed : {str(u)}")
            return False

    def is_logged_out(self):
        """Check if logged out."""
        u = self._get_user_id()
        if u != 1:
            print(f"Still logged in : {str(u)}")
            return True
        else:
            print(f"Signed out : {str(u)}")
            return False

    def _get_user_id(self):
        cookies = self.browser.list_cookies()
        for cookie in cookies:
            if re.search(cookie_u_pattern, cookie.name):
                return int(cookie.value)

    def _get_sid(self):
        cookies = self.browser.list_cookies()
        for cookie in cookies:
            if re.search(cookie_sid_pattern, cookie.name):
                sid = cookie.value
                return sid

    def login(self, username, password):
        """Log in phpBB forum."""
        try:
            forum_ucp = urljoin(self.host, ucp_url)
            payload = self.browser.select_tag(forum_ucp, "input")
            # for key, value in payload.items():
            #     print(key, value)
            payload['username'] = username
            payload['password'] = password
            time.sleep(1)
            self.browser.post(forum_ucp, params=login_mode, data=payload)
            return self.is_logged()

        except HTTPError as e:
            print(e)
            return False

    def logout(self):
        """Log out of phpBB forum."""
        try:
            # u_logout = Login(self.browser.session, self.host)
            # u_logout.send_logout()
            forum_ucp = urljoin(self.host, ucp_url)
            params = {'mode': 'logout', 'sid': self._get_sid()}
            self.browser.post(
                forum_ucp,
                # headers=headers,
                params=params)
            return self.is_logged_out()
        except HTTPError as e:
            print(e)
            return False

    def close(self):
        """Close request session (HTTP connection)."""
        try:
            self.browser.session.close()
        except HTTPError as e:
            print(e)
            sys.exit(1)

    def _get_post_text_area(self, url):
        try:
            soup = self.browser.get_html(url)
            return soup.find("textarea", id="message").text
        except HTTPError as e:
            print(e)
        except AttributeError as e:
            print("Error in _get_post_text_area")
            print(e)

    def _make_delete_confirm(self, url):
        form = self.browser.get_form(url, self.delete_form_id)
        form['values']['confirm'] = 'Oui'
        url = urljoin(self.host, form['action'])
        payload = form['values']
        return url, payload

    def _make_reply_payload(self, url, message):
        form = self.browser.get_form(url, self.form_id)
        form['values']['message'] = message
        # form['values']['icon'] = 0
        del form['values']['icon']
        form['values']['post'] = 'Submit'
        url = urljoin(self.host, form['action'])
        payload = form['values']
        return url, payload

    def _make_add_receiver_payload(self, url, receiver):
        form = self.browser.get_form(url, self.form_id)
        form['values']['username_list'] = receiver
        form['values']['add_to'] = "Ajouter"
        form['values']['addbbcode20'] = 100
        del form['values']['icon']
        url = urljoin(self.host, form['action'])
        payload = form['values']
        return url, payload

    def _make_private_message_payload(self, url, subject, message):
        form = self.browser.get_form(url, self.form_id)
        form['values']['subject'] = subject
        form['values']['message'] = message
        form['values']['addbbcode20'] = 100
        form['values']['address_list[u][8435]'] = "to"
        form['values']['icon'] = 0
        # del form['values']['icon']
        form['values']['post'] = 'Envoyer'
        url = urljoin(self.host, form['action'])
        payload = form['values']
        return url, payload

    def get_post_text(self, postid):
        """Get text of a post."""
        post = Post(postid, self)
        post.get_text()
        print(post.text)

    def get_post_editmode_content(self, forum, post):
        """Get text of a post as seen in edit mode."""
        url = urljoin(self.host, self.edit_url.format(f=forum, p=post))
        return self._get_post_text_area(url)

    def edit_post(self, forum, post, new_message):
        """Edit (modify) a message in a forum."""
        url = urljoin(self.host, self.edit_url.format(f=forum, p=post))
        try:
            form = self.browser.get_form(url, self.form_id)
            form['values']['icon'] = 0
            form['values']['message'] = new_message
            form['values']['post'] = 'Submit'
            form['values']['topic_type'] = '0'

            # wait at least 2 seconds so phpBB let us post
            time.sleep(2)

            payload = form['values']

            self.browser.session.post(
                url,
                # headers=headers,
                data=payload)
        except HTTPError as e:
            print(f'\n>>> Error {e.code}: {e.msg}')

    def get_forum_topics(self, f):
        """Retrieve and print all topics in a forum.

        Used in list-forum.py, for example.
        """
        forum = Forum(f, self)
        forum.print_forum_title()
        forum.get_nb_topics()
        topics_list = forum.get_forum_topics()
        forum.print_topics()
        return topics_list

    def get_forum_view_topics(self, f):
        """Test get_forum_viewtopics()."""
        forum = Forum(f, self)
        forum.print_forum_title()
        return forum.get_forum_viewtopics()

    def get_topic_posts(self, viewtopic, max_count):
        start = 0

        # topic executes get html, get nb message and get title on creation
        topic = Topic(self, viewtopic)
        topic.print40()

        pageurl = topic.make_topic_page_url(start)

        if topic.nb_messages < max_count:
            max_count = topic.nb_messages
        while start < max_count:
            # if (start + 10) < max_count:
            #     count = 0
            # else:
            #     count = max_count - start
            page = Page(pageurl, self.browser.get_html(pageurl))
            pagelist = page.get_page_posts()
            if not pagelist:
                break
            topic.postlist.extend(pagelist)
            start += 10
            pageurl = topic.make_topic_page_url(start)

        return topic.postlist

    def get_topic_posts_with_url(self, viewtopic, txt, max_count):
        start = 0

        # topic executes get html, get nb message and get title on creation
        topic = Topic(self, viewtopic)
        topic.print40()

        pageurl = topic.make_topic_page_url(start)

        if topic.nb_messages < max_count:
            max_count = topic.nb_messages
        while start < max_count:
            # if (start + 10) < max_count:
            #     count = 0
            # else:
            #     count = max_count - start
            page = Page(pageurl, self.browser.get_html(pageurl))
            pagelist = page.get_page_posts_with_url(txt)
            if not pagelist:
                break
            topic.postlist.extend(pagelist)
            start += 10
            pageurl = topic.make_topic_page_url(start)

        return topic.postlist

    # Def get_topic_posts(self, f, t, max_count):
    def get_topic_posts_not_done(self, viewtopic, posts_done, max_count):
        """Return list of posts, not already done.

        Args:
            viewtopic (str): viewtopic url
            posts_done (int): number of posts already done
            max_count (int): max number

        Returns:
            PostList: list of posts not processed yet

        """
        # topic executes get html, get nb message and get title on creation
        topic = Topic(self, viewtopic)
        topic.print40()

        if topic.nb_messages > posts_done:
            print(f"{posts_done} sur {topic.nb_messages} "
                  f"messages déjà traités")
            start = posts_done

            pageurl = topic.make_topic_page_url(start)
            if topic.nb_messages < max_count:
                max_count = topic.nb_messages
            while start < max_count:
                # if (start + 10) < max_count:
                #     count = 0
                # else:
                #     count = max_count - start
                page = Page(pageurl, self.browser.get_html(pageurl))
                pagelist = page.get_page_posts()
                if not pagelist:
                    break
                topic.postlist.extend(pagelist)
                start += 10
                pageurl = topic.make_topic_page_url(start)
            return topic.nb_messages, topic.postlist
        else:
            print("already_done")
            return topic.nb_messages, topic.postlist

    # Get User on a topic
    def get_user_topic_posts(self, viewtopic, max_count):
        start = 0

        # topic executes get html, get nb message and get title on creation
        topic = Topic(self, viewtopic)
        topic.print40()

        pageurl = topic.make_topic_page_url(start)

        if topic.nb_messages < max_count:
            max_count = topic.nb_messages
        while start < max_count:
            # if (start + 10) < max_count:
            #     count = 0
            # else:
            #     count = max_count - start
            page = Page(pageurl, self.browser.get_html(pageurl))
            pagelist = page.get_page_posts_with_user()
            if not pagelist:
                break
            topic.postlist.extend(pagelist)
            start += 10
            pageurl = topic.make_topic_page_url(start)

        return topic.postlist

    def delete_post(self, post):
        """Delete one message. Send proper request."""
        try:
            url_get = post.make_delete_req_url(self.host)
            print("delete : " + url_get)

            url_post, payload = self._make_delete_confirm(url_get)
            time.sleep(1)

            self.browser.session.post(
                url_post,
                # headers=headers,
                data=payload)
        except HTTPError as e:
            print(e)
            print("HTTPError with post : " + post.id)
        except TypeError as e2:
            print(e2)

    def delete_post_list(self, post_list):
        """Delete multiple messages (in a list)."""
        for post in post_list:
            self.delete_post(post)

    def get_topic_posts_with_user(self, viewtopic, max_count):
        start = 0
        # topic executes get html, get nb message and get title on creation
        topic = Topic(self, viewtopic)
        topic.print40()

        if topic.nb_messages < max_count:
            max_count = topic.nb_messages
        while start < max_count:
            # print(urltopic)
            # if (start + 10) < max_count:
            #     count = 0
            # else:
            #     count = max_count - start
            pageurl = topic.make_topic_page_url(start)
            page = Page(pageurl, self.browser.get_html(pageurl))
            pagelist = page.get_page_posts_with_user()
            if not pagelist:
                break
            topic.postlist.extend(pagelist)
            start += 10

        return topic.postlist

    def post_reply(self, forum, topic, message):
        """Send a reply."""
        url = urljoin(self.host, self.reply_url.format(f=forum, t=topic))

        urlrep, payload = self._make_reply_payload(url, message)
        print(urlrep)
        print(payload)
        time.sleep(2)
        self.browser.session.post(
            urlrep,
            # headers=headers,
            # params=self.login_mode,
            data=payload)

    def send_private_message(self, receiver, subject, message):
        """Send private message."""
        url = urljoin(self.host, self.private_mess_url)
        urlrep1, payload1 = self._make_add_receiver_payload(url, receiver)
        urlrep2, payload2 = self._make_private_message_payload(
            url, subject, message)  # noqa: E501
        time.sleep(2)
        # Add receiver
        self.browser.session.post(
            urlrep1,
            # headers=headers,
            # params=self.login_mode,
            data=payload1)

        # Send message
        self.browser.session.post(
            urlrep2,
            # headers=headers,
            # params=self.login_mode,
            data=payload2)
 def verify_page_title(self, title):
     assert title in Browser.title()
Esempio n. 12
0
class SATANG:
    """
    Official Documentation for Satang Pro APIs
    https://docs.satang.pro

    ~~~ wtf(p)!!!, where to find (python)!!!
    """
    def __init__(self, debug=False):
        self._api = 'https://api.satang.pro/api'
        self._uid = os.getenv('SATANG_USER_ID', '')
        self._key = os.getenv('SATANG_API_KEY', '')
        self._secret = os.getenv('SATANG_API_SECRET', '').encode('utf-8')

        self._debug = debug
        self._browser = Browser(debug=self._debug)

    ##########
    # --- public api ---
    # Get ticker information.
    def get_bids_asks(self, sym='btc_thb'):
        """
        :return: {
         'asks': [[rate, amount], [174629, 0.00010107], ...],
         'bids': [[rate, amount], [174629, 0.00010107], ...]
        }
        """
        payload = {'url': self._api + f'/orders/?pair={sym.lower()}'}
        return self._resp_order(self._resp(self._browser.get(**payload)))

    ##########
    # --- private api ---
    # User
    def user(self):
        payload = {
            'url': self._api + f'/users/:{self._uid}',
            'headers': self._build_headers()
        }
        return self._resp(self._browser.get(**payload))

    # Create a buy order.
    def buy(self, pair, price, amount, typ='limit'):
        data = {
            'pair': pair.lower(),
            'price': price,
            'amount': amount,
            'side': 'buy',
            'type': ('limit' if typ == 'limit' else 'market'),
            'nonce': nonce()
        }
        print(data)
        return self._create_orders(**data)

    # Create a sell order.
    def sell(self, pair, price, amount, typ='limit'):
        data = {
            'pair': pair.lower(),
            'price': price,
            'amount': amount,
            'side': 'sell',
            'type': ('limit' if typ == 'limit' else 'market'),
            'nonce': nonce()
        }
        return self._create_orders(**data)

    def _create_orders(self, **kwargs):
        data = self._concatenate_params(**kwargs)
        payload = {
            'url': self._api + '/orders/',
            'headers': self._build_headers(data),
            'data': data
        }
        return self._resp(self._browser.post(**payload))

    ##########
    # utility
    def _build_headers(self, s=''):
        return {
            'Authorization': 'TDAX-API ' + self._key,
            'Signature': hmac.new(self._secret, s.encode('utf-8'), hashlib.sha512).hexdigest(),
        }

    def _resp(self, resp):
        if resp.status_code == 200:
            return resp.json()
        if self._debug:  # catch error !?
            raise Exception(resp)

    @staticmethod
    def _resp_order(o):
        return {
            'bids': [[_['price'], _['amount']] for _ in o['bid']],
            'asks': [[_['price'], _['amount']] for _ in o['ask']]
        }

    @staticmethod
    def _concatenate_params(**p):
        print(p)
        return'&'.join(sorted(['{}={}'.format(_, p[_]) for _ in p])) if p else ''
Esempio n. 13
0
 def setPath(self):
     """
     Define current path with Browser (utils)
     """
     file = Browser()
     self.get('dirname').set(file.get())
 def find_elements(self, xpath):
     return Browser.get_driver().find_elements_by_xpath(xpath)
Esempio n. 15
0
class Bovespa(object):
    """
        Class responsible to manage the operations with the website of bovespa.
    """

    def __init__(self):
        self.__browser = Browser()
        self.bovespa_url_base = 'http://bvmf.bmfbovespa.com.br'

    @staticmethod
    def _files_from_period(files_list, last_update, current_date=dt.today()):
        """
            From all the files available in the website, selects only the ones between the range date
            ( last update, current date)
        :param files_list: List of the files available on the Bovespa website
        :param last_update: Last time the update was made
        :param current_date: Current time ( normally today )
        :return: list of files to be download
        """
        result = []
        file_avaliable = None
        while last_update.date() <= current_date.date():
            day = last_update.day
            month = last_update.month
            if day < 10:
                day = '0{0}'.format(last_update.day)
            if month < 10:
                month = '0{0}'.format(last_update.month)
            if last_update.year != current_date.year:
                file_avaliable = files_list.get('COTAHIST_A{year}.ZIP'.format(year=last_update.year))
                last_update += relativedelta(years=1)
                last_update = last_update.replace(day=01)
                last_update = last_update.replace(month=01)
            elif (last_update.year == current_date.year) and (last_update.month != current_date.month):
                file_avaliable = files_list.get(
                    'COTAHIST_M{month}{year}.ZIP'.format(month=month, year=last_update.year))
                last_update += relativedelta(months=1)
            elif (last_update.year == current_date.year) and (last_update.month == current_date.month):
                file_avaliable = files_list.get('COTAHIST_D{day}{month}{year}.ZIP'.format(
                    day=day, month=month, year=last_update.year))
                last_update += td(days=1)

            if file_avaliable:
                result.append(file_avaliable)

        return result

    def _available_files(self):
        """
            Get all the file names available to download on the bovespa website.
        :return : List of available files in the website
        """
        url = self.bovespa_url_base + '/pt-br/cotacoes-historicas/FormSeriesHistoricasArq.asp'
        page = self.__browser.get_page(url)
        files = bovespa_parser.parse_files_form(page)
        return files

    def select_files(self, start_dt, finish_dt=dt.now()):
        """
            Select the files to be download to update the database
        :type start_dt: datetime.datetime
        :type finish_dt: datetime.datetime
        :return: List of files to download based on the start and finish date
        """
        available_files = self._available_files()
        return self._files_from_period(available_files, start_dt, finish_dt)

    def download_file(self, file_name):
        """
            Download a file from bovespa historic website, and returns uncompressed.
        :type file_name: str
        :return: A TXT file.
        """
        url = self.bovespa_url_base + '/InstDados/SerHist/'
        downloaded_file = self.__browser.get_page(url + file_name)
        compressed_file = StringIO(downloaded_file)
        uncompressed_file = uncompress_zipfile(compressed_file)
        return uncompressed_file
Esempio n. 16
0
 def __init__(self, platform):
     self.browser = Browser()
     self.config = Config(platform)
     self.email_alert = EmailAlert()
     self.By = By
Esempio n. 17
0
def runCrawl(limitNum=0, queryList=[], is_all_comments=False, userinfo={}):
    browser = Browser("driver/chromedriver")
    if userinfo != {}:
        print('Start logging in')
        browser.goToPage('https://www.instagram.com/accounts/login/?hl=en')
        if browser.log_in(userinfo):
            print('Success to log in')
        else:
            print('Fail to log in')
            return
    else:
        print('Continue Without logging in')
    for query in queryList:
        browser.clearLink()
        makeDir("data")
        makeDir("data/" + query)
        mUrl = ""
        if query[0] == "#":
            mUrl = "https://www.instagram.com/explore/tags/" + query[
                1:] + "/?hl=en"
        else:
            mUrl = "https://www.instagram.com/" + query + "/?hl=en"
        browser.goToPage(mUrl)
        print("collecting url of " + query + "...")
        browser.scrollPageToBottomUntilEnd(browser.collectDpageUrl, limitNum)
        print("finish scoll collecting!")

        print("collecting data...")
        slist = list(set(browser.urlList))
        for url in tqdm(slist):
            dirName = url.split("/")[4]
            # skip if already crawled
            if not makeDir("data/" + query + "/" + dirName):
                continue
            browser.goToPage(url)
            if is_all_comments:
                browser.expandComments()
            cur = browser.getPageSource()
            writeToFile("data/" + query + "/" + dirName + "/raw.html", [cur])
            infoData = BeautifulSoup(cur, "lxml")
            imageData = infoData.find("img", class_="FFVAD")
            # extract data
            likes = extractLikes(infoData)
            comments_list = extractComments(infoData)
            comments = comments_list.__len__()
            caption = extractCaption(imageData)
            dateTime = extractDateTime(infoData)
            commentMessages = extractCommentsMessage(comments_list)
            # print("likes:",likes," comments:", comments," caption:", caption,
            #     "commentMessages:", commentMessages, "dateTime:", dateTime)
            writeToFile("data/" + query + "/" + dirName + "/info.txt", [
                "likes: ", likes, "", "comments: ", comments, "", "caption: ",
                caption, "", "commentMessages: ", commentMessages, "",
                "dateTime: ", dateTime, ""
            ])
            # download image
            imageUrl = imageData.get("srcset")
            downloadImage(imageUrl,
                          "data/" + query + "/" + dirName + "/image.jpg")
            time.sleep(1)
        print("query " + query + " collecting finish")

    time.sleep(2)
    browser.driver.quit()
    print("FINISH!")
 def go_to_url(self,url):
     Browser.go_to(url)
 def __init__(self):
     self.driver = Browser.getDriver()
Esempio n. 20
0
 def __init__(self):
     self.__browser = Browser()
     self.bovespa_url_base = 'http://bvmf.bmfbovespa.com.br'
def teardown_function(function):
    Browser.quit()
 def wait_until_visibile(self, locator):
     Browser.wait().until(EC.visibility_of_element_located(locator))