Ejemplo n.º 1
0
def take_screenshots():
    driver = Chrome()
    driver.get('http://127.0.0.1:7000')
    driver.set_window_size(1260, 800)
    driver.save_screenshot('main.png')

    driver.find_element_by_css_selector('.feed-post').click()
    time.sleep(5)  # Wait for MathJax to do its thing.
    driver.save_screenshot('post.png')
    driver.close()
Ejemplo n.º 2
0
def extract_citation_for_publication(link):
    """
    this function craws the list of articles from a given link. If it has next page, it will continue to it until there is none
    @param[in]      profile_url     the link of google scholar profile you want to crawl
    @return         the list of articles as a list where each entry is dictionary
    """
    browser=Browser('chromedriver.exe')
    citation={}
    # go the citation view
    # as the page is written is javascript, we are not able to get its content via urllib2
    # intead we will use Selenium to simulate a web browser to render the page
    # req=urllib2.Request(publication[k]['link'], headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:27.0) Gecko/20100101 Firefox/27.0'})
    # p=urllib2.urlopen(req)
    # sub_soup=BeautifulSoup(p.readlines()[0], 'html.parser')
    # s=sub_soup.find(id='gs_ccl')
    browser.get(link)
    while True:
        citation_root=browser.find_element_by_id('gs_ccl')
        citation_list=citation_root.find_elements_by_class_name('gs_r')
        for citation_item in citation_list:
            # title
            title=citation_item.find_element_by_class_name('gs_rt').text
            # try to get the downloading link, if there is one
            try:
                link=citation_item.find_element_by_id('gs_ggsW2')
                link=link.find_element_by_link_text(link.text).get_attribute('href')
            except:
                link=None
            # author
            author_line=citation_item.find_element_by_class_name('gs_a')
            author_name=author_line.text.split(', ')
            author={}
            # for each of the author, find its link if its exits
            for a in author_name:
                try:
                    print '.',
                    # there is a google scholar profile with author
                    item=author_line.find_element_by_link_text(a)
                    author[a]=item.get_attribute('href')
                except:
                    # there is not such profile
                    author[a]=None
            # we can also press the cite button to get the detailed citation information, skipped here
            citation[title]={'link':link, 'author': author}
        # go to next page, if there is one
        if not next_page(browser):
            break
    # close
    browser.close()
    return citation
Ejemplo n.º 3
0
def extract_publication(profile_url, verbose=verbose_citation_list):
    """
    this function crawl the publication list from the google scholar profile
    @param[in]      profile_url     the link of google scholar profile you want to crawl
    @param[in]      verbose         the level of information you want to scrawl. By default, we will scraw the detailed citation list for each of your publicaiton
    @return         the list of pulication as a list, where each entry is a dictionary
    """
    # scholar's artical list
    browser=Browser()
    browser.get(profile_url)
    publication={}
    while True:
        publication_list=browser.find_elements_by_class_name('gsc_a_tr')
        for publication_item in publication_list:
            title=publication_item.find_element_by_class_name('gsc_a_at').text
            print title
            author=publication_item.find_elements_by_class_name('gs_gray')[0].text.split(', ')
            vendor=publication_item.find_elements_by_class_name('gs_gray')[1].text
            try:
                citation=int(publication_item.find_element_by_class_name('gsc_a_ac').text)
                link=publication_item.find_element_by_class_name('gsc_a_ac').get_attribute('href')
            except:
                citation=0
                link=None
            try:
                year=int(publication_item.find_element_by_class_name('gsc_a_h').text)
            except:
                year=None
            """
            # to get citation for every paper, but will be detected as robot
            if citation>0 and verbose>=verbose_citation_list:
                print 'and its citation list',
                # to solve anti-crawl, but not work
                # time.sleep(2)
                cited_by=extract_citation_for_publication(link)
            else:
                cited_by=None

            print 'finished'
            publication[title]={'link':link,'author':author,'vendor':vendor,'citation':citation, 'cited by': cited_by, 'year':year}
            """
            publication[title]={'link':link,'author':author,'vendor':vendor,'citation':citation, 'cited by': citation, 'year':year}
        if not next_page_new(browser):
            break
    browser.close()
    return publication
Ejemplo n.º 4
0
def extract_movies(max_page_num=5):
    browser = Browser()
    browser.get(URL)
    movies = {}
    while True:
        movie_list = browser.find_elements_by_class_name('item')
        for movie in movie_list:
            title = movie.find_element_by_tag_name("p").text.strip()
            rating = movie.find_element_by_tag_name("strong").text.strip()
            movies[title] = rating
        if max_page_num > 0:
            max_page_num -= 1
            if not have_more(browser):
                break
        else:
            break
    browser.close()
    return movies
Ejemplo n.º 5
0
def extract_movies(max_page_num=5):
    browser = Browser()
    browser.get(URL)
    movies = {}
    while True:
        movie_list = browser.find_elements_by_class_name("item")
        for movie in movie_list:
            title = movie.find_element_by_tag_name("p").text.strip()
            rating = movie.find_element_by_tag_name("strong").text.strip()
            movies[title] = rating
        if max_page_num > 0:
            max_page_num -= 1
            if not have_more(browser):
                break
        else:
            break
    browser.close()
    return movies
Ejemplo n.º 6
0
    def __init__(self):
        opts = Options()
        # opts.add_argument("--user-data-dir=~/.config/google-chrome")
        opts.add_argument("start-maximized")
        opts.set_headless()
        browser = Chrome(chrome_options=opts)
        browser.get(self.URL)

        try:
            play_button = WebDriverWait(browser, 10).until(
                expected_conditions.element_to_be_clickable(
                    (By.CLASS_NAME, self.PLAY_BUTTON_CLASS)))
            # Play button might not be in view, have to move to it to click it
            ActionChains(browser).move_to_element(play_button).perform()
            play_button.click()
            self.__load_game_data(browser.page_source)
        finally:
            browser.close()
def scrape_company_data(companies):
    """Scrape data from Glassdoor on companies in list."""
    browser = Chrome()
    url = "https://www.glassdoor.com/Reviews/index.htm"
    browser.get(url)
    final_data = []
    sleep(23)

    for company in companies:
        try:
            sel = "input#KeywordSearch.keyword"
            search_bar = browser.find_element_by_css_selector(sel)

            search_bar.send_keys(company)
            search_bar.send_keys(Keys.ENTER)
            sleep(4)

            if len(browser.window_handles) > 1:
                tab_1, tab_2 = browser.window_handles
                browser.switch_to.window(tab_1)
                browser.close()
                browser.switch_to.window(tab_2)

            sel = "a.tightAll.h2"
            company_link = browser.find_element_by_css_selector(sel)
            company_link.click()
            sleep(4)

            sel = "a.eiCell.cell.reviews"
            reviews_link = browser.find_element_by_css_selector(sel)
            reviews_link.click()
            sleep(4)

            temp_stats = [company]
            temp_stats.append(get_stats(browser))
            final_data.append(temp_stats)

            browser.get(url)
        except Exception as e:
            print(e)
            url = "https://www.glassdoor.com/Reviews/index.htm"
            browser.get(url)

    return pd.DataFrame(final_data)
Ejemplo n.º 8
0
def additionalCrawl3():
    # 增加爬取各省份信息公开数据
    target = 'http://www.csrc.gov.cn/pub/zjhpublic/'

    driver = Chrome(executable_path="/usr/local/bin/chromedriver",
                    options=CHROME_OPS)

    driver.get(target)

    page_source = driver.page_source
    bs = BeautifulSoup(
        page_source,
        'lxml',
    )

    tag_a = bs.find('center').find_all('a')
    all_province_link = []
    for a in tag_a:
        href = 'http://www.csrc.gov.cn' + a.get('href')
        all_province_link.append(href)

    for link in all_province_link:
        driver.get(link)
        driver.switch_to.frame("DataList")
        page_source = driver.page_source
        bs = BeautifulSoup(
            page_source,
            'lxml',
        )
        all_row = bs.find_all('div', class_="row")
        for row in all_row:
            res_map = {}
            href = row.find('a').get('href')
            title = row.find('a').get_text()
            date = row.find('li', class_="fbrq").get('title')
            res_map["title"] = title
            res_map["date"] = format_date(date)
            res_map[
                "url"] = 'http://www.csrc.gov.cn/pub/zjhpublic/' + href.replace(
                    '../', '')

            TARGET_LISTS.append(res_map)

    driver.close()
Ejemplo n.º 9
0
def Scrape_Harvard():
    #Set up MongoDB client
    db_client = MongoClient(
        'mongodb+srv://jonesca7:[email protected]/test?retryWrites=true&w=majority'
    )
    db = db_client.CourseList  #Create database
    collection = db.collection  #Create collection

    #Set up Chrome driver for web browsing
    webdriver = "chromedriver.exe"
    driver = Chrome(webdriver)

    url = "https://online-learning.harvard.edu/catalog/free"
    driver.get(url)

    pages = driver.find_elements_by_xpath(
        "//ul[@class='pager']/li[@class='pager-item']")
    num_pages = len(pages)

    course_list = []
    for page in range(num_pages + 1):
        url = "https://online-learning.harvard.edu/catalog/Free?page=" + str(
            page)
        driver.get(url)
        courses = driver.find_elements_by_xpath(
            "//ul[@class='course-grid']/li")
        for course in courses:
            course_title = course.find_element_by_class_name(
                "field-name-title-qs").text
            course_topic = course.find_element_by_class_name(
                "field-name-subject-area").text
            course_url = course.find_element_by_xpath(
                "div/div/div/h3/a").get_attribute("href")
            course_object = {
                "name": course_title,
                "topic": course_topic,
                "platform": "Harvard",
                "url": course_url
            }
            course_list.append(course_object)

    collection.insert_many(course_list)

    driver.close()
Ejemplo n.º 10
0
def testDisplayEmailIn(system):
    driver = Chrome("chromedriver.exe")
    driver.get(index_url)
    driver.find_element_by_id('studentSelection').click()
    time.sleep(1)
    driver.find_element_by_id("email").send_keys(student_email)
    driver.find_element_by_id("password").send_keys(student_password)
    driver.find_element_by_id(system).click()
    driver.find_element_by_id("signInButton").click()
    time.sleep(4)
    alert = driver.switch_to.alert
    alert.accept()
    time.sleep(4)
    alert = driver.switch_to.alert
    assert re.match(r"You have \d unread emails?.",
                    alert.text), "wrong numUnread alert message"
    alert.accept()
    assert re.match(r"NUMBER OF UNREAD EMAILS: \d", driver.find_element_by_class_name(
        "numUnread").get_attribute("innerHTML")), "wrong numUnread message"
    email = driver.find_element_by_id("email0")
    assert email != None, "email0 not present"
    # check emails class names
    # we assume email0 is unread, email1 is unreadUrgent, email2 is read, email3 is readUrgent
    # unread email0
    assert driver.find_element_by_id("email0").get_attribute(
        "class") == "emailRow unreadRow", "wrong email className unread email"
    assert driver.find_element_by_id("emailTwoButtons0").get_attribute(
        "class") == "twoButtons unreadTwoButtons", "wrong className unread two buttons"
    # unreadUrgent email1
    assert driver.find_element_by_id("email1").get_attribute(
        "class") == "emailRow unreadUrgentRow", "wrong email className unreadUrgent email"
    assert driver.find_element_by_id("emailTwoButtons1").get_attribute(
        "class") == "twoButtons unreadUrgentTwoButtons", "wrong className unreadUrgent two buttons"
    # read email2
    assert driver.find_element_by_id("email2").get_attribute(
        "class") == "emailRow readRow", "wrong email className read email"
    assert driver.find_element_by_id("emailTwoButtons2").get_attribute(
        "class") == "twoButtons readTwoButtons", "wrong className read two buttons"
    # readUrgent email3
    assert driver.find_element_by_id("email3").get_attribute(
        "class") == "emailRow readUrgentRow", "wrong email className readUrgent email"
    assert driver.find_element_by_id("emailTwoButtons3").get_attribute(
        "class") == "twoButtons readUrgentTwoButtons", "wrong className readUrgent two buttons"
    driver.close()
Ejemplo n.º 11
0
def iterate_through_results(driver: webdriver.Chrome) -> pd.DataFrame:
    """ Go through a given page's senators. """
    col_names = [
        "tx_date",
        "file_date",
        "last_name",
        "first_name",
        "order_type",
        "ticker",
        "asset_name",
        "tx_amount",
    ]
    all_txs = pd.DataFrame().rename(columns=dict(enumerate(col_names)))
    no_rows = 0
    n_links = 0
    for row in driver.find_elements_by_tag_name("tbody")[0].find_elements_by_tag_name(
        "tr"
    ):
        cols = row.find_elements_by_tag_name("td")
        first, last, report_type, date_received = (
            cols[0].get_attribute(INNER_TEXT),
            cols[1].get_attribute(INNER_TEXT),
            cols[3],
            cols[4].get_attribute(INNER_TEXT),
        )
        link = report_type.find_elements_by_tag_name("a")[0]
        click_on(driver, link)
        driver.switch_to.window(driver.window_handles[-1])
        txs = parse_page(driver)
        if len(txs) == 0:
            no_rows += 1
        driver.close()
        driver.switch_to.window(driver.window_handles[-1])
        all_txs = all_txs.append(
            txs.assign(file_date=date_received, last_name=last, first_name=first)
        )
        time.sleep(2)
        n_links += 1
    LOGGER.info(
        "{} out of {} pages returned no extractable transaction data".format(
            no_rows, n_links
        )
    )
    return all_txs
Ejemplo n.º 12
0
def scrape_nba(url):

    tempstring = url_entry.find("Season")
    tempstring = url_entry[tempstring:tempstring + 14]
    tempstring = tempstring.replace('=', '')
    if url.find("team") > -1:
        tempstring = "C:/local/nba_stats/teams_" + tempstring.replace(
            '-', '_') + ".csv"
    else:
        tempstring = "C:/local/nba_stats/" + tempstring.replace('-',
                                                                '_') + ".csv"
    if os.path.exists(tempstring):
        return
    driver = Chrome(
        executable_path='C:/local/chromedriver_win32/chromedriver.exe')
    driver.get(url)
    driver.find_element_by_class_name('run-it').click()
    done = False
    time.sleep(10)
    more_results = driver.find_element_by_class_name('table-addrows__button')

    while not done:
        try:
            for x in range(32000):
                more_results.click()
                time.sleep(0.1)
            break
        except selenium.common.exceptions.StaleElementReferenceException:
            done = True
    tabletest = driver.find_element_by_class_name('nba-stat-table').text
    linecount = 0
    with open(tempstring, 'w') as csvfile:
        for line in tabletest.splitlines():
            tempstring = line.replace(' ', ',')
            tempstring = tempstring.replace('PLAYER', 'ID,FIRST,LAST')
            tempstring = tempstring.replace('MATCHUP',
                                            'PLAYER TEAM,HOME,OPPONENT')

            if linecount != 0:
                tempstring = str(linecount) + "," + tempstring
            csvfile.writelines(tempstring + "\n")
            linecount += 1

    driver.close()
Ejemplo n.º 13
0
def recent_post_links(chrome_path, username, post_count=10):
    """
    With the input of an account page, scrape the 10 most recent posts urls
    Args:
    username: Instagram username
    post_count: default of 10, set as many or as few as you want
    Returns:
    A list with the unique url links for the most recent posts for the provided user
    """
    print("User " + username + " started:")
    start_time = time.time()
    url = "https://www.instagram.com/" + username + "/"
    options = Options()
    options.add_argument("--headless")
    # options.add_argument('--no-sandbox')
    options.add_argument("--disable-gpu")
    options.add_argument("--remote-debugging-port=9222")
    browser = Chrome(options=options, executable_path=chrome_path)
    browser.get(url)
    post = "https://www.instagram.com/p/"
    post_links = []
    while len(post_links) < post_count:
        links = [
            a.get_attribute("href")
            for a in browser.find_elements_by_tag_name("a")
        ]
        for link in links:
            if post in link and link not in post_links:
                post_links.append(link)

        print("\tPost " + str(len(post_links)) + " Processed")
        time_elaps = time.time() - start_time
        if time_elaps > (post_count / 12 * 20):
            print("Time out on reading in post details, some posts skipped")
            browser.close()
            return post_links[:post_count]
        scroll_down = "window.scrollTo(0, document.body.scrollHeight);"
        browser.execute_script(scroll_down)
        time.sleep(3)
    else:
        # browser.stop_client()
        browser.close()
        # os.system("taskkill /f /im chromedriver.exe /T")
        return post_links[:post_count]
Ejemplo n.º 14
0
def get_links(start_url):
    mfa_links = get_json_from_file('mfa_links.json')
    driver = Chrome(executable_path="C://Users//User/chromedriver.exe")

    # open page 1 and count pages
    driver.get(start_url + '1')
    time.sleep(5)
    try:
        num_pages = int(
            driver.find_elements_by_css_selector('div.paginates > ul > li')
            [-2].text)
    except IndexError:
        driver.get(start_url + '1')
        time.sleep(10)
        num_pages = int(
            driver.find_elements_by_css_selector('div.paginates > ul > li')
            [-2].text)

    # generate pages urls list
    pages = [start_url + str(i) for i in range(1, num_pages + 1)]

    # get links to texts from every page
    all_links = []
    n = 0
    for page in pages:
        print('Working with page', n, 'out of', num_pages)
        links = []
        while len(links) == 0:
            driver.get(page)
            time.sleep(3)
            links = [
                link.get_attribute('href') for link in
                driver.find_elements_by_css_selector('a.anons-title')
            ]
            print('Found', len(links), 'links on this page')
        all_links.extend(links)
        n += 1

    # save scraped data to file
    category = re.compile('/(\w+)\?').findall(start_url)[0]
    mfa_links[category] = all_links
    update_json(mfa_links, 'mfa_links.json')

    driver.close()
Ejemplo n.º 15
0
    def run(self):
        chrome_options = Options()
        chrome_options.add_argument('--headless')  # 使用无头浏览器,不跳出窗口
        driver = Chrome(chrome_options=chrome_options)

        driver.get(
            'https://newids.seu.edu.cn/authserver/login?service=http%3A%2F%2Fehall.seu.edu.cn%2Fqljfwapp2%2Fsys%2FlwReportEpidemicSeu%2Findex.do%3Ft_s%3D1594447890898%26amp_sec_version_%3D1%26gid_%3DSTZiVXZjRnhVSS9VNERWaFNNT1hXb2VNY3FHTHFVVHMwRC9jdTdhUlllcXVkZDNrKzNEV1ZxeHVwSEloRVQ4NHZFVzRDdHRTVlZ1dEIvczVvdzVpVGc9PQ%26EMAP_LANG%3Dzh%26THEME%3Dindigo%23%2FdailyReport'
        )
        driver.maximize_window()
        driver.find_element_by_id('username').send_keys(
            self.cfg.user_id)  # 一卡通号
        driver.find_element_by_id('password').send_keys(
            self.cfg.password)  # 密码
        driver.find_element_by_xpath(
            '//*[@class="auth_login_btn primary full_width"]').click()

        status = "打卡失败"
        try:
            WebDriverWait(driver, 30,
                          0.2).until(lambda x: x.find_element_by_xpath(
                              '//*[@class="bh-btn bh-btn-primary"]'))
            driver.find_element_by_xpath(
                '//*[@class="bh-btn bh-btn-primary"]').click()

            WebDriverWait(
                driver, 30,
                0.2).until(lambda x: x.find_element_by_name('DZ_JSDTCJTW'))
            driver.find_element_by_name('DZ_JSDTCJTW').send_keys('36.5')
            driver.find_element_by_id('save').click()

            WebDriverWait(
                driver, 30, 0.2
            ).until(lambda x: x.find_element_by_xpath(
                '//*[@class="bh-dialog-btn bh-bg-primary bh-color-primary-5"]')
                    )
            driver.find_element_by_xpath(
                '//*[@class="bh-dialog-btn bh-bg-primary bh-color-primary-5"]'
            ).click()
            status = "打卡成功"
        except:
            pass

        self.send_email(status)
        driver.close()
class OpenBrowser:
    def __init__(self, flag):
        self.flag = flag

    def __enter__(self):
        if self.flag == 1:

            opt = FirefoxOptions()
            opt.set_headless()
            self.browser = Firefox(options=opt)
            return Firefox(options=opt)
        else:
            opt = ChromeOptions()
            opt.headless = True
            self.browser = Chrome(options=opt)
            return Chrome(options=opt)

    def __exit__(self):
        self.browser.close()
Ejemplo n.º 17
0
def quick_win(parametr):
    driver = Chrome()
    driver.get("https://kirsorokin.github.io/tictactoe-angular-1.5/")
    table = driver.find_element_by_class_name("field")
    table_rows = table.find_elements_by_tag_name('tr')
    count_table_rows = len(table_rows)
    headers = table_rows[0]
    cells = headers.find_elements_by_tag_name('td')
    count_cells = 0
    for item in cells:
        count_cells += 1
    first_cell = random.randint(1, count_cells)
    first_row = random.randint(1, count_table_rows)
    end_cell = first_cell
    end_row = first_row
    if parametr == "row":
        for item in range(1, 6):
            if first_row > count_table_rows:
                end_row -= 1
                table.find_element_by_xpath('.//tbody/tr[%s]/td[%s]' % (end_row, first_cell)).click()
            else:
                table.find_element_by_xpath('.//tbody/tr[%s]/td[%s]' % (first_row, first_cell)).click()
                first_row +=1
    elif parametr == "cell":
        for item in range(1, 6):
            if first_cell > count_cells:
                end_cell -= 1
                table.find_element_by_xpath('.//tbody/tr[%s]/td[%s]' % (first_row, end_cell)).click()
            else:
                table.find_element_by_xpath('.//tbody/tr[%s]/td[%s]' % (first_row, first_cell)).click()
                first_cell +=1
    else:
        print "no parametr"
        return 0
    try:
        driver.switch_to_alert().accept()
    except Exception as e:
        print e
        return 0
    table = driver.find_element_by_class_name("field")
    assert(find_o(table.text)==1)
    driver.close()
    return 1
Ejemplo n.º 18
0
def get_baidu_hot():
    option = ChromeOptions()
    option.add_argument('--headless')  # 隐藏浏览器
    option.add_argument('--no--sandbox')
    browser = Chrome(options=option, executable_path="chromedriver.exe")

    url = "https://voice.baidu.com/act/virussearch/virussearch?from=osari_map&tab=0&infomore=1"
    browser.get(url)
    but = browser.find_element_by_css_selector(
        '#ptab-0 > div > div.VirusHot_1-5-6_32AY4F.VirusHot_1-5-6_2RnRvg > section > div'
    )  # 点击加载更多按钮
    but.click()
    time.sleep(1)
    c = browser.find_elements_by_xpath(
        '//*[@id="ptab-0"]/div/div[1]/section/a/div/span[2]')
    print(len(c))
    context = [i.text for i in c]
    browser.close()
    return context
Ejemplo n.º 19
0
def getitem(name):
    ts = time()
    opts = Options()
    opts.binary_location = os.environ.get("GOOGLE_CHROME_BIN")
    opts.add_argument("--headless")
    opts.add_argument("--disable-dev-shm-usage")
    opts.add_argument("--no-sandbox")
    driver = Chrome(ChromeDriverManager().install(), options=opts)

    driver.get("https://www.bing.com/")

    WebDriverWait(driver, 25).until(
        EC.presence_of_element_located(
            (By.XPATH, r"/html/body/div[3]/div[2]/div[2]/form/input[1]")))
    search = driver.find_element_by_xpath(
        r"/html/body/div[3]/div[2]/div[2]/form/input[1]")
    search.send_keys(name + " meme")
    search.submit()

    WebDriverWait(driver, 25).until(
        EC.presence_of_element_located(
            (By.XPATH, r"/html/body/header/nav/ul/li[2]/a")))
    driver.find_element_by_xpath(r"/html/body/header/nav/ul/li[2]/a").click()
    sleep(2)

    h = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script(
            "window.scrollTo(0, document.body.scrollHeight);")
        sleep(2)
        nh = driver.execute_script("return document.body.scrollHeight")
        if nh == h:
            break
        h = nh
    WebDriverWait(driver, 25).until(
        EC.presence_of_element_located((
            By.XPATH,
            r"/html/body/div[3]/div[5]/div[3]/div[1]/ul[1]/li[1]/div/div/a/div/img"
        )))
    content = driver.find_elements_by_class_name("mimg")
    image = choice(content).get_attribute("src")
    driver.close()
    return [image, str(time() - ts) + " s"]
Ejemplo n.º 20
0
def main():
    option = ChromeOptions()
    # option.add_argument("--headless")  # 隐藏浏览器
    option.add_argument("--no-sandbox")  # linux系统下禁用sandbox
    browser = Chrome(options=option)  # 自动打开Chrome浏览器
    x = 0  # 用于记录下载的图片张数
    start = time.time()  # 程序开始时间
    url = "https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js"
    json_flie = getJSONtext(url)
    for m in range(len(json_flie['hero'])):
        heroId = json_flie['hero'][m]['heroId']  # 编号
        name = json_flie['hero'][m]['name']  # 英雄名字
        hero_dir = create_folder(name)
        new_url = "https://lol.qq.com/data/info-defail.shtml?id=" + str(heroId)
        # print(new_url)
        browser.get(new_url)
        time.sleep(1)  # 等待1秒
        button = browser.find_element_by_xpath(
            '//*[@id="skinNAV"]/li[2]/a/img')
        button.click()
        time.sleep(1)  # 等待1秒

        img = browser.find_elements_by_xpath('//*[@id="skinBG"]/li/img')
        name = browser.find_elements_by_xpath('//*[@id="skinBG"]/li')
        for i in range(len(name)):
            # print(img[i].get_attribute("src"))
            # print(name[i].get_attribute("title"))
            try:
                picture = requests.get(
                    img[i].get_attribute("src")).content  # 获取图片的二进制信息
                with open(
                        hero_dir + str(name[i].get_attribute("title")) +
                        '.jpg', 'wb') as f:  # 保存图片
                    f.write(picture)
                    x = x + 1
                    print("正在下载....第" + str(x) + "张")
            except:
                pass
        time.sleep(2)  # 等待1秒
    browser.close()
    end = time.time()  # 程序结束时间
    time_second = end - start  # 执行时间
    print("共下载" + str(x) + "张,共耗时" + str(time_second) + "秒")
def browser(config_browser, config_wait_time):
    # Initialize WebDriver
    if config_browser == 'chrome':
        driver = Chrome()
    elif config_browser == 'firefox':
        driver = Firefox()
    else:
        raise Exception('"{config_browser}" is not a supported browser')

    # Wait implicitly for elements to be ready before attempting interactions
    driver.implicitly_wait(config_wait_time)
    driver.maximize_window()

    # Return the driver object at the end of setup
    yield driver

    # For cleanup, quit the driver
    driver.close()
    driver.quit()
Ejemplo n.º 22
0
def pxi_Rtm_import():
    # set download directory path
    p = {
        'download.default_directory':
        r'C:\Users\dheer\Desktop\wrldc\RTM_BROWSER_AUTOMATION\Dumps\pxiRtmFile'
    }
    #add options to browser
    opts.add_experimental_option('prefs', p)

    browser = Chrome(options=opts)
    #maximize browser
    browser.maximize_window()
    # open the website "https://www.powerexindia.com/code/frontend/Reports/RTM/MarketVolumeProfileReport.html/"
    browser.get(
        'https://www.powerexindia.com/code/frontend/Reports/RTM/MarketVolumeProfileReport.html/'
    )

    # click on the datepicker select button
    previousDate = dt.datetime.today() - dt.timedelta(days=1)
    previousDateFormatted = previousDate.strftime(
        '%d-%m-%Y')  # format the date to ddmmyyyy
    # provide previous date
    browser.find_elements_by_name("DeliveryfromDate")[0].send_keys(
        previousDateFormatted)
    # provide previous date to "DeliverytoDate"
    browser.find_elements_by_name("DeliverytoDate")[0].send_keys(
        previousDateFormatted)
    button = browser.find_elements_by_name('submit')
    button[0].click()

    # click on different download csv option button class =  "dt-button buttons-csv buttons-html5"
    # csvDwnLd = browser.find_element_by_css_selector("dt-button buttons-csv buttons-html5")
    csvDwnLd = browser.find_elements_by_tag_name("span")

    srcFileLocation = r'C:\Users\dheer\Desktop\wrldc\RTM_BROWSER_AUTOMATION\Dumps\pxiRtmFile'
    destFileLocation = r'C:\Users\dheer\Desktop\wrldc\RTM_BROWSER_AUTOMATION\Dumps\pxiRtmFile\Archives'
    destFileName = "DASMVPReport_"
    moveFilesToArchive(srcFileLocation, destFileLocation, destFileName)
    csvDwnLd[11].click()
    print("pxi rtm fetch succesful")

    time.sleep(10)
    browser.close()
Ejemplo n.º 23
0
def map_zc_to_rep(start_end_tuple):
    """Map Zip Codes to U.S. Representatives"""
    print(f"Process {os.getpid()}: ***map_zc_to_rep***")
    start = start_end_tuple[0]
    end = start_end_tuple[1]
    shared_state_zipcode_data_map = start_end_tuple[2]
    sns = STATE_NAMES[start:end]
    browser = Chrome(executable_path=CHROME_DRIVER_PATH)
    print(f"Process {os.getpid()}: States to evaluate: {sns}")
    for state in sns:
        print(f"Process {os.getpid()}: State: {state}")
        if len(shared_state_zipcode_data_map[state]) != 0:
            for index, zip_code_city_pair in enumerate(
                    shared_state_zipcode_data_map[state]):
                zip_code = zip_code_city_pair[0]
                browser.get(REP_URL)
                sleep(2.5)
                find_rep_input_field = browser.find_elements_by_css_selector(
                    '#Find_Rep_by_Zipcode')
                find_rep_input_field[0].send_keys(zip_code)
                find_rep_button = browser.find_elements_by_css_selector(
                    '.btn-success')
                find_rep_button[0].click()
                sleep(2.5)
                rep_page_anchor_tags = browser.find_elements_by_css_selector(
                    '.rep > a')
                reps = ""
                for anchor_tag in rep_page_anchor_tags:
                    if anchor_tag.text == '':
                        continue
                    print(
                        f"Process {os.getpid()} Representative: {anchor_tag.text}"
                    )
                    reps += anchor_tag.text + ", "
                    # Remove when not debugging
                    # break
                zip_code_city_pair.append(reps)
                shared_state_zipcode_data_map[state][
                    index] = zip_code_city_pair
                # Remove when not debugging
                # break
    print("DONE")
    browser.close()
Ejemplo n.º 24
0
def main():
    webdriver = os.path.join(r"drive", "chromedriver")
    driver = Chrome(webdriver)

    url = "https://www.waytostay.com/paris-apartments/"
    driver.get(url)
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    info = driver.find_elements_by_class_name("tile")
    prices = []
    details = []
    for j in range(len(info)):
        prices.append(
            driver.find_elements_by_class_name('price-person')[j].text)
        details.append(
            driver.find_elements_by_class_name('concise-details')[j].text)
    print(prices, details)
    driver.close()
    driver.quit()
Ejemplo n.º 25
0
class TestNavigation(StaticLiveServerTestCase):
    """
        test Navigation from homepage
    """
    def setUp(self):
        self.browser = Chrome()
        self.browser.implicitly_wait(10)

    def tearDown(self):
        self.browser.close()

    def test_bad_address_returns_handler404(self):
        """
            Test bad address is caught by the handler and redirect to error page
        """
        print(inspect.currentframe().f_code.co_name)
        response = self.browser.get('%s%s' % (self.live_server_url, '/test'))
        # message = self.browser.find_element_by_tag_name('h1').text
        self.assertTemplateUsed(response, 'errors/errors.html')

    def test_click_mentions(self):
        """
            Test the click on mentions redirect to mentions page
        """
        print(inspect.currentframe().f_code.co_name)
        self.browser.get(self.live_server_url)
        user_url = self.live_server_url + reverse('home_app:mentions')
        element = self.browser.find_element_by_partial_link_text('mentions')
        actions = ActionChains(self.browser)
        actions.move_to_element(element)
        actions.click(element)
        actions.perform()
        self.assertEquals(self.browser.current_url, user_url)

    def test_click_icon_person_to_user(self):
        """
            Test click on the person image redirect to user page
        """
        print(inspect.currentframe().f_code.co_name)
        self.browser.get(self.live_server_url)
        user_url = self.live_server_url + reverse('user_app:user')
        self.browser.find_element(By.CSS_SELECTOR, ".nav-item img").click()
        self.assertEquals(self.browser.current_url, user_url)
Ejemplo n.º 26
0
def get_md_event(conn, cur):
    company = "mcdonalds"
    driver = Chrome()
    driver.get(URL_2)

    xpath_front = "//*[@id=\"promotionList\"]/li["
    xpath_rear = "]"

    for i in range(1, 6):
        driver.implicitly_wait(100)
        xpath = xpath_front + str(i) + xpath_rear
        event = driver.find_element_by_xpath(xpath)

        image_loc = driver.find_element_by_xpath(
            "//*[@id=\"promotionList\"]/li[" + str(i) + "]/a/div[1]/img")
        image = image_loc.get_attribute('src')
        event.click()
        time.sleep(2)

        title_loc = driver.find_element_by_xpath(
            "//*[@id=\"container\"]/div[1]/div[1]/div[2]/div/div/div[1]/h2")
        title = title_loc.text
        title = title.replace('\n', '')

        date_loc = driver.find_element_by_xpath(
            "//*[@id=\"container\"]/div[1]/div[1]/div[2]/div/div/div[1]/span/em[1]"
        )
        date = date_loc.text
        date = date.replace('\n', '')
        date = date.replace('등록일 :', '')

        content_loc = driver.find_element_by_xpath(
            "//*[@id=\"container\"]/div[1]/div[1]/div[2]/div/div/article/div[1]/img"
        )
        content = content_loc.get_attribute('src')

        if insert_event_list(conn, cur, company, date, image, title, content):
            message = title + ' inserted in db' + '\n'
            print(message)

        driver.back()

    driver.close()
Ejemplo n.º 27
0
    def update_page_source(self):
        url = self.build_search_url()

        driver = Chrome()
        driver.get(url)

        num_scrolls = 0
        try:

            while num_scrolls < self.scroll_max:
                driver.execute_script(random_js_scroll())
                self.page_source = driver.page_source
                random_sleep()
                num_scrolls += 1

        except Exception as e:
            l.WARN(e)

        driver.close()
def parse_page(url: str, driver: webdriver.Chrome) -> None:
    print('Parsing ', url)

    try:
        driver.get(url)
    except TimeoutException:
        # Reload Browser in case of unreachable page

        driver.close()
        driver = create_driver()
        page_number = int(url[-3::])
        next_url = url.replace(str(page_number), str(page_number + 1))
        parse_page(next_url, driver)

    comment_id_elements = driver.find_elements_by_xpath(
        "//*[contains(@id,'Comment_')]")
    comment_ids = [id.get_attribute('id') for id in comment_id_elements]

    for comment_id in comment_ids:
        user_id_element = driver.find_element_by_xpath(
            f'//*[@id="{comment_id}"]/div/div[2]/div[1]/span[1]/a[2]')
        time_element = driver.find_element_by_xpath(
            f'//*[@id="{comment_id}"]/div/div[2]/div[2]/span[1]/a/time')
        user_message = driver.find_element_by_xpath(
            f'//*[@id="{comment_id}"]/div/div[3]/div/div[1]')

        user_id = user_id_element.text
        time = time_element.get_attribute('title')
        comment = user_message.text

        mongo.insert(user_id, time, comment)

    try:
        next_page_element = driver.find_element_by_xpath(
            '//*[@id="PagerBefore"]/a[contains(@class, "Next Pager-nav")]')
        next_page_url = next_page_element.get_attribute('href')

        parse_page(next_page_url, driver)
    except NoSuchElementException:
        # In case of reaching the last page
        driver.close()
        sys.exit(0)
Ejemplo n.º 29
0
def download_song(search_song,format='mp3_l',download=False):
    search_url = 'https://y.qq.com/portal/search.html#page=1&searchid=1&remoteplace=txt.yqq.top&t=song&w={}'.format(
        search_song)
    search_download_url_url = 'http://www.douqq.com/qqmusic/qqapi.php'

    chrome_options = ChromeOptions()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    driver = Chrome('files/server/driver/chromedriver.exe', options=chrome_options)
    driver.get(search_url)
    driver.implicitly_wait(5)
    try:
        element = driver.find_element_by_xpath('//*[@id="song_box"]/div[2]/ul[2]/li[1]/div/div[2]/span/a')
        input_url = element.get_attribute('href')
    except:
        return False, '歌曲名未查询到!'
    finally:
        driver.close()

    try :
        info_dict = loads(loads(post(search_download_url_url, data={'mid': input_url}).text).replace('\/', '/'))
    except:
        return False, '破解大法失败,大侠还是购买正版吧!'

    if 'mp3' in format:
        file_name = '.'.join([info_dict['songname'], 'mp3'])
    else:
        file_name = '.'.join([info_dict['songname'], format])

    download_url = info_dict[format]
    print('download_url',download_url)
    if download:
        FILE_PATH = 'files/music/'
        if not exists(FILE_PATH):
            mkdir(FILE_PATH)
        try :
            urlretrieve(download_url, FILE_PATH+file_name)
        except :
            return False,'似乎无权作弊,你可以试试选择别的格式哟!'
        return True, FILE_PATH+file_name
    else :
        return True, download_url
Ejemplo n.º 30
0
def askUserDoesHeWant(offer_url, is_authenticated):
    print("If you want it, press Y, else N. For offer description I. For link L. To open it in your browser press O")
    user_key = readchar.readkey()
    if user_key == "y" or user_key == 'Y':
        if is_authenticated == 'Not logged in':
            print("You cannot send messages when you aren't logged in")
            print("Press B to come back to main tab and log in")
            user_key = readchar.readkey()
            if user_key == "B" or "b":
                mainTab(is_authenticated)
            askUserDoesHeWant(offer_url, is_authenticated)
        else:
            print("Input: Yes")
            offer_database = open("offerDatabase.txt", "a")
            offer_database.write(offer_url)
            offer_database.close()
            sendMessage(offer_url)
    elif user_key == 'n' or user_key == 'N':
        print("Input: No")
        offer_database = open("offerDatabase.txt", "a")
        offer_database.write(offer_url)
        offer_database.close()
    elif user_key == 'i' or user_key == 'I':
        print("------------------------------")
        print(additionalOfferInfo(offer_url))
        askUserDoesHeWant(offer_url, is_authenticated)
    elif user_key == 'l' or user_key == 'L':
        print(offer_url)
        askUserDoesHeWant(offer_url, is_authenticated)
    elif user_key == "o" or user_key == "O":
        additional_browser = Chrome()
        additional_browser.get(offer_url)
        print("Press enter when you're done")
        input()
        additional_browser.close()
        askUserDoesHeWant(offer_url, is_authenticated)
    elif user_key == readchar.key.CTRL_C:
        print("Bye")
        mainBrowser.exit()
        exit()
    else:
        askUserDoesHeWant(offer_url, is_authenticated)
Ejemplo n.º 31
0
class InstaBot:
    BASE_URL = "https://www.instagram.com/"

    def __init__(self):
        self.driver = Chrome()
        self.wait = WebDriverWait(self.driver, 15)
        self.driver.maximize_window()
        self.driver.implicitly_wait(5)
        self.driver.get(self.BASE_URL)

    def login(self, username, password):
        USERNAME_FIELD = "//input[@type='text']"
        PASSWORD_FIELD = "//input[@type='password']"

        self.driver.find_element_by_xpath(USERNAME_FIELD).send_keys(username)
        self.driver.find_element_by_xpath(PASSWORD_FIELD).send_keys(password)
        self.driver.find_element_by_xpath(PASSWORD_FIELD).send_keys(Keys.ENTER)
        self.wait.until(EC.url_contains("accounts/"))

    def follow(self, target, quantity=10):
        target = target.strip().lower()
        account_url = self.BASE_URL + f"{target}" + '/'
        self.driver.get(account_url)
        self.wait.until(EC.title_contains(target))

        FOLLOWING_LINK = f"//a[@href='/{target}/following/']"
        self.driver.find_element_by_xpath(FOLLOWING_LINK).click()
        FOLLOW_BTNS = "//button[text()='Follow']"
        follow_btns = self.driver.find_elements_by_xpath(FOLLOW_BTNS)

        if len(follow_btns) == 0:
            popup = self.driver.find_element_by_class_name('isgrP')
            self.driver.execute_script(
                'arguments[0].scrollTop = arguments[0].scrollHeight', popup)
            follow_btns = self.driver.find_elements_by_xpath(FOLLOW_BTNS)

        for btn in follow_btns[:quantity + 1]:
            btn.click()
            sleep(1)

    def stop(self):
        self.driver.close()
Ejemplo n.º 32
0
def utm():
    driver = Chrome()
    driver.implicitly_wait(5)
    driver.set_page_load_timeout(5)
    driver.get(
        "https://39.134.87.216:31943/pm/themes/default/pm/app/i2000_monitorView_pm.html?curMenuId=com.iemp.app.pm.monitorView&_=1545967221368#group_152734715982719"
    )
    # print(driver.page_source)
    usr = driver.find_element_by_xpath("//*[@id=\"username\"]")
    usr.send_keys("admin")
    pw = driver.find_element_by_xpath("//*[@id=\"password\"]")
    pw.send_keys("HuaWei12#$")

    input('Press Enter to continue...')
    # captcha = driver.find_element_by_xpath("//*[@id=\"validate\"]")
    # vc = input('输入网页上的验证码')
    # captcha.send_keys(vc)
    # captcha.send_keys(Keys.RETURN)
    # time.sleep(1)

    # action = ActionChains(driver)
    # action.send_keys(Keys.ESCAPE)
    # print(2)
    # try:
    #     action.perform()
    # except TimeoutException:
    #     print('time out')
    # # action.perform()
    # print(3)

    # button = driver.find_element_by_css_selector('#treeDiv_1_switch')
    # button.click()
    # print(driver.get_cookies())
    cookie = ''
    for item in driver.get_cookies():
        # print(item)
        if item['name'] == 'JSESSIONID':
            cookie = 'JSESSIONID=' + item['value']
    # print(cookie)
    driver.close()
    driver.quit()
    return cookie
def get_search_result(url, data):
    opt = ChromeOptions()
    opt.headless = True
    browser = Chrome(options=opt)  #指定浏览器 '''除了chrome以外的浏览器对于开发没有卵用
    browser.get(url)
    time1 = random.uniform(1, 2)
    time.sleep(time1)  #模拟休眠时间:秒
    browser.find_element_by_id('kw').send_keys(data)  #模拟输入
    #time.sleep(random.uniform(1, 2))   #这里似乎不需要停顿
    browser.find_element_by_id('su').click()  #模拟点击
    time2 = random.uniform(2, 5)
    time.sleep(time2)
    #html = browser.find_element_by_xpath("html").text  这一句很神奇可以直接拿到文本
    html = browser.execute_script(
        "return document.documentElement.outerHTML")  #这句话可以拿到html的源码
    time3 = random.uniform(1, 3)
    time.sleep(time3)
    if len(data) >= 10:
        view_len = 10
    else:
        view_len = len(data)
    print('查询内容:' + data[:view_len] + '\n'
          '模拟载入时间:' + str(round(time1, 2)) + 's\n'
          '模拟搜索时间:' + str(round(time2, 2)) + 's\n'
          '模拟观看时间:' + str(round(time3, 2)) + 's\n')
    this_url = browser.current_url
    browser.close()
    reg = r'<div class="c-abstract">(.*?)</div><div class="f13">'  ##百度html的开头:&nbsp;-&nbsp;</span> \\\\结尾:</div><div class=
    result_first = [i for i in re.findall(reg, html) if i != '']
    result_final = []
    for i in result_first:
        reg = r'<span(.*?)</span>'
        try:
            del_text = re.findall(reg, i)[0]
            text_new = i.replace('<span',
                                 '').replace('</span>',
                                             '').replace(del_text, '')
        except:
            text_new = i.replace('<span', '').replace('</span>', '')
        result_final.append(text_new)
    result = result_final + [this_url] + [data]
    return result
Ejemplo n.º 34
0
class TestSample(unittest.TestCase):
    def setUp(self):
        self.driver = Chrome("C://chromedriver.exe")
        self.driver.maximize_window()
        self.driver.implicitly_wait(40)
        self.driver.get("https://demo.actitime.com/")
        self.login = LoginPage(self.driver)
        self.home = HomePage(self.driver)
        self.user = UserPage(self.driver)

    def tearDown(self):
        self.driver.close()

    def test_invalid_login_TC13121(self):
        Data = json.load(open("./test/regression/login/UserStory123.json"))
        self.login.wait_for_login_page_to_load()
        self.login.get_username_textbox().send_keys(
            Data['TC12345']['Username1'])
        self.login.get_password_textbox().send_keys(
            Data['TC12345']['Password1'])
        self.login.get_login_button().click()
        actual_error_msg = self.login.get_login_error_msg().text
        expected_error_msg = "Username or Password is invalid. Please try again."
        assert actual_error_msg == expected_error_msg, "Its invalid"

    def test_Add_User(self):
        Data = json.load(open("./test/regression/login/UserStory123.json"))
        self.login.wait_for_login_page_to_load()
        self.login.get_username_textbox().send_keys(
            Data['TC12345']['Username'])
        self.login.get_password_textbox().send_keys(
            Data['TC12345']['Password'])
        self.login.get_login_button().click()
        self.home.get_users_button().click()
        self.user.get_add_user_button().click()
        self.user.wait_for_add_user_to_load()
        self.user.get_first_name_textbox().send_keys("Kushal")
        self.user.get_last_name_textbox().send_keys("R")
        self.user.get_email_textbox().send_keys("*****@*****.**")
        self.user.get_dropdown_list().click()
        self.user.get_department_dropdown().click()
        self.user.get_save_send_invitation_button().click()
Ejemplo n.º 35
0
def extract_hongren(max_page_num=5):
    suffix = "hongren"
    # 正常情况下,把driver文件所在路径加到Path环境变量里就可以了
    # 但是我这里不知道怎么回事就是不行,干脆放在代码所在目录下面了
    browser = Browser('chromedriver.exe')
    browser.get(BASE_URL + suffix)
    items = {}
    while True:
        item_list = browser.find_elements_by_class_name('wall_item')
        for item in item_list:
            href = item.find_element(By.CSS_SELECTOR, ".pic_box.pic").get_attribute("href")
            desc = item.find_elements_by_class_name("desc")[0].text.strip()
            items[href] = desc
        if max_page_num > 0:
            max_page_num -= 1
            if not scroll_to_next(browser):
                break
        else:
            break
    browser.close()
    return items
Ejemplo n.º 36
0
class AutomatorMixin(object):
    class UnexpectedSituation(Exception):
        pass

    data_property_class = None

    def __init__(self, steps, data_args=[]):
        self.steps = steps
        self.data = self.data_property_class(*data_args)

    def run(self):
        options = ChromeOptions()
        options.add_argument('--test-type')
        self.driver = Chrome(chrome_options=options)
        self.perform_steps()
        self.driver.close()

    def find_element(self, selector):
        LOG.info('finding selector "%s"' % selector)
        return self.driver.find_element_by_css_selector(selector)

    @property
    def action_method_lookup(self):
        return self.get_action_method__lookup()

    def get_action_method__lookup(self):
        return {
            'click': self.perform_click,
            'fill_form': self.perform_fill_form,
            'select_drop_down': self.perform_select_drop_down,
        }

    def get_css_selector(self, action):
        return action.get('css_selector')

    def get_action_value(self, action):
        if 'value' in action:
            value = action['value']
        elif 'property' in action:
            property_name = action['property']
            value = getattr(self.data, property_name)
        else:
            raise AutomatorMixin.UnexpectedSituation('Cannot find key "property" or "value"')

        return value

    def perform_steps(self):
        for step in self.steps:
            if 'url' in step:
                self.driver.get(step['url'])
            if 'actions' in step:
                self.perform_actions(step['actions'])

    def perform_actions(self, actions):
        for action in actions:
            action_method = self.action_method_lookup[action['type']]
            action_method(action)

    def perform_click(self, action):
        selector = self.get_css_selector(action)
        if selector:
            self.find_element(selector).click()
            return

        # Find by id.  This will be needed when people use "." in their id names.  Such as kfc's survey
        css_id = action['id_selector']
        LOG.info(css_id)
        self.driver.find_element_by_id(css_id).click()

    def perform_fill_form(self, action):
        selector = self.get_css_selector(action)
        value = self.get_action_value(action)

        self.find_element(selector).send_keys(value)

    def perform_select_drop_down(self, action):
        selector = self.get_css_selector(action)
        value = self.get_action_value(action)

        Select(self.find_element(selector)).select_by_value(value)
Ejemplo n.º 37
0
def start_callback():

    """
    Main loop of the scrape.
    """
    profile_username = E_username.get() # The Instagram username of the profile from which we
    # are downloading. Must be supplied.
    output_directory = E_path.get() # Will be initialized with the optional argument or a
    # default later.
    update_mode = True
    serialize = True
    latest_image = ''

    # The latest downloaded images will be the first in the directory.
    files = os.listdir(output_directory)
    if files:
        latest_image = files[0]

    # Start the browser
    driver = Chrome(executable_path='../bin/chromedriver')
    driver.get(insta_url + profile_username)

    # Find the number of posts on this Instagram profile
    post_count_tag_xpath = ('//*[@id="react-root"]/section/main/'
                            + 'article/header/div[2]/ul/li[1]/span/span')
    post_count_tag = driver.find_element_by_xpath(post_count_tag_xpath)
    post_count = int(post_count_tag.text.replace(',', ''))

    # If the target profile is private, then redirect to the login page
    login_tag_xpath = '//*[@id="react-root"]/section/main/article/div/p/a'
    try:
        login_tag = driver.find_element_by_xpath(login_tag_xpath)
        login_page_url = login_tag.get_attribute('href')
        driver.get(login_page_url)

        # Wait for the user to login
        while driver.current_url == login_page_url:
            sleep(1)

        # Return to the target profile from the homepage
        driver.get(insta_url + profile_username)
    except:
        pass

    # Click the 'Load More' element
    driver.find_element_by_class_name('_oidfu').click()

    # Load all the posts into the browser
    processed = 0
    while processed < post_count:
        # Load more content by scrolling to the bottom of the page
        driver.execute_script("window.scrollTo(0, 0);")
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        # Download 4 rows of items (4 rows are loaded upon each scroll) and
        # remove them from view
        for _ in itertools.repeat(None, 4):
            urls = fetch_row_links(driver)
            delete_row(driver)
            for url in urls:

                # Exit if we've reached the latest image that was in the
                # directory before downloading. This means the directory has
                # everything beyond this point.
                if update_mode:
                    fname = file_name.search(url).group(0)
                    if fname in latest_image:
                        exit(0)

                download_from_url(url, output_directory,
                                  serialize, post_count-processed)
                processed += 1

    driver.close()
Ejemplo n.º 38
0
    name_text_box = browser.find_element_by_class_name("paddingUnifier")
    name_text_box.send_keys(venmoInfo.payee_name)
    name_text_box.send_keys(Keys.ENTER)
    payment_box = browser.find_element_by_class_name("mainTextBox")
    time.sleep(1)
    payment_box.click()
    datetime_now = datetime.datetime.now()
    SendKeys.SendKeys(venmoInfo.amount + venmoInfo.description, with_spaces=True)
    # click the pay button
    pay_button = browser.find_element_by_id("onebox_pay_toggle")
    pay_button.click()
    name_text_box = browser.find_element_by_class_name("paddingUnifier")
    name_text_box.send_keys(venmoInfo.payee_name)

    # click the send button
    send_button = browser.find_element_by_id("onebox_send_button")
    send_button.click()

else:
    # click on the sign in link
    signin_link = browser.find_element_by_link_text("Sign in")
    signin_link.click()
    print("Couldn't find the cookie file, you will need two factor authorization and then cookie will be saved")
    # wait a while until the user fully signs in
    time.sleep(60)
    # Save the cookies
    pickle.dump(browser.get_cookies(), open("cookies.pkl", "wb"))

time.sleep(10)
browser.close()
Ejemplo n.º 39
0
Archivo: f.py Proyecto: skashem/Python
#click on ship to this address
#driver.find_element_by_xpath("//*[@id='button_ship_to']").click()

#Mouse hover to Place Order
Place_Order = Browser.find_element_by_xpath("//*[@id='placeOrderBtn']")
hover = ActionChains(Browser).move_to_element(Place_Order)
hover.perform()
#placing order
Browser.find_element_by_xpath("//*[@id='placeOrderBtn']").click()

time.sleep(5)
#Mouse hover to Place Order
Place_Order = Browser.find_element_by_xpath("//*[@id='placeOrderBtn']")
hover = ActionChains(Browser).move_to_element(Place_Order)
hover.perform()
#placing order
Browser.find_element_by_xpath("//*[@id='placeOrderBtn']").click()

time.sleep(13)

#log-out
Browser.find_element_by_xpath("//*[@id='userAccount']/a").click()
Browser.find_element_by_xpath("//*[@id='userAccount']/ul/li[10]/a").click()

#Close Browser
Browser.close()