Python save_htmlの例、utils.save_html Pythonの例

コード例 #1

0

ファイルを表示

ファイル: fetch.py プロジェクト: ilim0t/LETUS-archive

def fetch_course_soups(course_list: List[Dict[str, str]], cookies: Dict[str, str]) -> Dict[str, BeautifulSoup]:
    course_soups: Dict[str, BeautifulSoup] = {}
    for course in course_list:
        course_name = course["title"]
        href = course["href"]

        querys = parse_qs(urlparse(href).query)
        assert "id" in querys
        course_id = querys["id"][0]

        # res = requests.get(f"https://letus.ed.tus.ac.jp/course/info.php?id={course_id}")
        # syllabus_soup = BeautifulSoup(res.text, "html.parser")

        res = requests.get(href, cookies=cookies)
        assert res.status_code == 200
        soup = BeautifulSoup(res.text, "html.parser")

        if __debug__:
            page_cache_dir = Path(hydra.utils.get_original_cwd()) / ".cache" / "pages"
            os.makedirs(page_cache_dir, exist_ok=True)
            save_html(res, page_cache_dir / f"{course_name}.html", "w")

        course_soups[course_name] = soup

    return course_soups

コード例 #2

0

ファイルを表示

 def bp_preprocessor(self):
     html = read(self.url, self.local)
     # Save the article locally
     if self.save and not self.local:
         title = self.url.replace("https://", "")
         title = title.replace("/", ":")
         save_html(html, "html_pages/" + title)
     soup = BeautifulSoup(html, "html.parser")
     r = soup.find('div', {"class": "entry_content"})
     return r

コード例 #3

0

ファイルを表示

ファイル: browser.py プロジェクト: 2ooom/bot-prefecture

    def update_internal_server_state(self, previous_page):
        tree = html.fromstring(previous_page.content)
        forms = tree.xpath("//form[@id='FormBookingCreate']")
        if len(forms) and forms[0].attrib['action'].endswith('/4'):
            save_html(previous_page.content)
            self.log_step(
                '✅ Step 0 and 3: Accepted conditions and RDV type chosen')
            return

        next_button = tree.xpath("//input[@name='nextButton']")
        if next_button[0].value != "Etape suivante":
            save_html(page.content)
            raise Exception("Step 0: Dates not available :(")
        self.log_step('✅ Step 0: Accepted conditions')

        self.log_step(
            'Step 3: Submitting form and implicitly choosing RDV type')
        page = self.post(f"{self.url_base}/3",
                         {'nextButton': 'Etape suivante'})
        tree = html.fromstring(page.content)
        etape4_active = tree.xpath("//img[contains(@src, '/etape_4r.png')]")
        if not len(etape4_active):
            save_html(page.content)
            raise Exception("Step 3: Dates not available :(")
        self.log_step('✅  Step 3: Submitted')
        save_html(page.content)

コード例 #4

0

ファイルを表示

ファイル: browser.py プロジェクト: 2ooom/bot-prefecture

 def choose_first_available(self):
     self.log_step('Step 4: Choosing the first timeslot available')
     page = self.post(f"{self.url_base}/4",
                      {'nextButton': 'Première+plage+horaire+libre'})
     tree = html.fromstring(page.content)
     etape6_active = tree.xpath("//img[contains(@src, '/etape_6r.png')]")
     if not len(etape6_active):
         save_html(page.content)
         raise Exception("Step 4: Dates not available :(")
     save_html(page.content)
     date_time = tree.xpath(
         "//*[@id='inner_Booking']/fieldset")[0].text_content()
     self.log_step('\n'.join(
         ['✅  Step 4: Chosen date', f'```{date_time}```']))

コード例 #5

0

ファイルを表示

ファイル: browser.py プロジェクト: 2ooom/bot-prefecture

 def accept_conditions(self):
     self.logger.info('Step 0: Validating conditions')
     page = self.post(
         self.url_start, {
             'condition': 'on',
             'nextButton': 'Effectuer+une+demande+de+rendez-vous',
         })
     if not page:
         raise Exception('Conditions not accepted. Bad request')
     tree = html.fromstring(page.content)
     next_button = tree.xpath("//input[@name='nextButton']")
     if not len(next_button):
         save_html(page.content)
         raise Exception("Step 0: Next button not found")
     return page

コード例 #6

0

ファイルを表示

ファイル: search.py プロジェクト: zyhuang/ragno

def load_all_courses(driver, html_name, num_load=None):

    utils.open_url(driver, host + '/library/search')

    # switch to Course tab
    for elem in driver.find_elements_by_xpath('//li[@class="tab-list__item"]'):
        if elem.text == 'Courses':
            elem.click()

    # define target scraping section
    course_section = driver.find_element_by_xpath(
        '//div[@aria-selected="true"]')

    # expected number
    ncourse_expect = int(
        course_section.find_element_by_xpath(
            './/*[@class="l-search__results-page-info"]').text.split()[1])

    nload = 0
    if num_load:
        nload_max = num_load
    else:
        nload_max = (ncourse_expect // 25) + 3

    while nload < nload_max:
        courses = course_section.find_elements_by_xpath(
            './/li[@class="courses-list__item"]')
        ncourses = len(courses)
        utils.print_message('#load={}, ncourses={}'.format(nload, ncourses))

        nload += 1
        buttons = course_section.find_elements_by_xpath(
            './/a[@class="button button--outlined"]')
        if len(buttons) == 0:
            break

        buttons[0].click()
        utils.wait(3)

    # save html
    utils.save_html(driver, html_name)

    course_list = course_section.find_elements_by_xpath(
        './/li[@class="courses-list__item"]')
    utils.print_message('expect {} courses, loaded {}.'.format(
        ncourse_expect, len(course_list)))

コード例 #7

0

ファイルを表示

ファイル: http_client.py プロジェクト: 2ooom/bot-prefecture

    def req(self, method, url, max_retries=1, cookies=None, headers={}, data=None, first_attempt_with_proxy=True):
        proxy_url = self.get_next_proxy_url() if first_attempt_with_proxy else None
        last_response = None

        for attempt in range(0, max_retries):
            attempt_text = f'Attempt {attempt + 1}:'
            try:
                last_response = requests.request(
                    method,
                    url,
                    cookies=cookies,
                    headers={
                        **headers,
                        **HttpClient.DEFAULT_HEADERS,
                    },
                    data=data,
                    timeout=HttpClient.REQ_TIMEOUT,
                    proxies={'http': proxy_url, 'https': proxy_url} if proxy_url else None
                )
                if not last_response.ok:
                    self.logger.warning(f"{attempt_text} Failed with status code {last_response.status_code}")
                    if last_response.status_code == 403:
                        # delisting faulty proxy
                        self.logger.warning(f"{attempt_text} Delisting faulty proxy {proxy_url}")
                        self.proxies_queue.popleft()
                        self.proxies.remove(proxy_url)
                        with open(self.proxies_path, 'w') as f:
                            f.writelines(list(self.proxies))
                    elif last_response.status_code == 502:
                        self.logger.warning(f"{attempt_text} 502 - Bad Gateway")
                        time.sleep(3)
                    else:
                        save_html(last_response.content)
                    if attempt < max_retries - 1:
                        # setting new proxy
                        proxy_url = self.get_next_proxy_url()
                else:
                    return last_response
            except Exception as ex:
                if type(ex) in [ReadTimeout, ProxyError]:
                    self.logger.warning(f'{attempt_text} Timeout')
                else:
                    self.logger.warning(f"{attempt_text}: Failed with exception:")
                    self.logger.exception(ex)
                time.sleep(attempt + 1)
        return last_response

コード例 #8

0

ファイルを表示

ファイル: browser.py プロジェクト: 2ooom/bot-prefecture

 def book_date(self, g_captcha_response, form_data):
     page = self.post(
         f"{self.url_base}/6", {
             'g-recaptcha-response': g_captcha_response,
             'nextButton': 'Etape+suivante'
         })
     tree = html.fromstring(page.content)
     next_button = tree.xpath("//input[@name='nextButton']")
     if not len(next_button):
         save_html(page.content)
         raise Exception('☠️ Step 6: Next button not found')
     if next_button[0].value != "Etape suivante":
         save_html(page.content)
         raise Exception("☠️ Step 6: Dates not available :(")
     self.log_step('✅ Step 6: Anticaptcha accepted')
     user_email = form_data['email']
     self.log_step(f'Step 8: Submitting form for `{user_email}`')
     page = self.post(f"{self.url_base}/8", {
         **form_data, 'nextButton': 'Etape+suivante'
     })
     tree = html.fromstring(page.content)
     message_sent = tree.xpath(
         "//li[contains(text(), 'Vous disposez de') and contains(text(), 'minutes pour confirmer')]"
     )
     if not len(message_sent):
         save_html(page.content)
         self.log_step('☠️ Step 8: Not submitted :(')
         raise Exception('☠️ Step 8: Message not sent')
     self.log_step('✅ Step 8: Submitted. Check email `{user_email}`')

コード例 #9

0

ファイルを表示

ファイル: group.py プロジェクト: vi5i0n/doubanrobot

    def topics_up(self, topics_list, content=['顶', '顶帖', '自己顶', 'waiting']):
        '''
        Randomly select a content and reply a topic.
        '''
        if not self.auth.ck:
            logger.error('ck is invalid!')
            return False

        # For example --> topics_list = ['22836371','98569169']
        for topics_id in topics_list:
            post_data = {
                "ck": self.auth.ck,
                "rv_comment": random.choice(content),
                "img": "(binary)",
                "start": "0",
                "submit_btn": "发送"
            }

            url = DOUBAN_ADD_COMMENT.format(topic_id=topics_id)
            print(need_captcha(self.auth, url+"/?start=0"))
            captcha_id, captcha_url = need_captcha(self.auth, url+"/?start=0")
            if captcha_id:
                logger.info("The captcha_image url address is %s" % captcha_url)
                vcode = raw_input('图片上的验证码是：')
                post_data["captcha-solution"] = vcode
                post_data["captcha-id"] = captcha_id

            print(post_data)
            r = self.auth.session.post(url, post_data, cookies=self.auth.session.cookies.get_dict())
            if DEBUG:
                save_html("topics_up.html", r.text)

            if r.status_code == 200:
                logger.info('Okay, already up ' + topics_id + ' topic')
            logger.info(r.status_code)
            logger.info(str(topics_list.index(topics_id) + 1).join(['Waiting for ', ' ...']))
            time.sleep(20)  # Wait a minute to up next topic, You can modify it to delay longer time
        return True

コード例 #10

0

ファイルを表示

ファイル: indexer.py プロジェクト: bramboomen/BrainPicker

    def fetch_page(self, y, m, d):
        """
        Constructs an url and checks if it contains a page.
        :param y: year
        :param m: month
        :param d: day
        :return: a list of dicts { 'url': url, 'title': title, 'date': date object }
        """

        url = self.baseurl + "/" + dts(y) + "/" + dts(m) + "/" + dts(d) + "/"
        response = read(url, self.local)
        if (response == "empty"):
            return response
        else:
            logger.write_log("visiting: " + url)
            title = url.replace("https://", "")
            title = title.replace("/", ":")
            # Save the article locally
            if self.save and not self.local:
                save_html(response, "html_collection_pages/" + title)
            articles = self.fetch_articles(response)
            for article in articles:
                article['date'] = dt.date(y, m, d)
            return articles

コード例 #11

0

ファイルを表示

ファイル: browser.py プロジェクト: 2ooom/bot-prefecture

 def check_planning_dates(self, planning_id, planning_title):
     self.logger.debug(f'Step 1: Checking planning {planning_id}')
     page = self.post(f"{self.url_base}/1", {
         'planning': str(planning_id),
         'nextButton': 'Etape suivante',
     },
                      first_attempt_with_proxy=True)
     if not page:
         return page
     tree = html.fromstring(page.content)
     etape3_active = tree.xpath("//img[contains(@src, '/etape_3r.png')]")
     etape4_active = tree.xpath("//img[contains(@src, '/etape_4r.png')]")
     if len(etape3_active) or len(etape4_active):
         self.log_step(f'✅ Step 1: Dates available for "{planning_title}"')
         save_html(page.content)
         return page
     finish_button = tree.xpath("//input[@name='finishButton']")
     if finish_button[0].value == "Terminer":
         self.logger.debug(f'Step 1: No dates {planning_title}')
         return page
     self.log_step(
         f'❓ Step 1: Anomaly detected for {planning_title}. Dumping html.')
     save_html(page.content)
     return page

コード例 #12

0

ファイルを表示

ファイル: build.py プロジェクト: sanyaade-multimedia/sk1-wx-help

#
# 	You should have received a copy of the GNU General Public License
# 	along with this program.  If not, see <http://www.gnu.org/licenses/>.

import os
import shutil

import utils


SOURCE_PATH = 'test/'
BUILD_PATH = 'build/'

if os.path.lexists(BUILD_PATH):
    shutil.rmtree(BUILD_PATH)

shutil.copytree(SOURCE_PATH, BUILD_PATH)

md_files = utils.get_files_tree(BUILD_PATH, 'md')

utils.save_html.setup(html_css='markdown.css')

for item in md_files:
    dest = item[:-2] + 'html'
    print item, '==>', dest
    with open(item, 'rb') as fp:
        model = utils.parse_md(fp)
        with open(dest, 'wb') as res_fp:
            utils.save_html(res_fp, model)
    os.remove(item)

コード例 #13

0

ファイルを表示

ファイル: mail_checker.py プロジェクト: 2ooom/bot-prefecture

 def check_once(self):
     unread_msgs = self.service.users().messages().list(
         userId='me',
         labelIds=['UNREAD', 'INBOX'],
         q='subject:"Demande de rendez-vous en attente de confirmation"'
     ).execute()
     if unread_msgs['resultSizeEstimate'] <= 0:
         self.logger.info('No new messages')
         return
     self.log_step(
         f'Step 9: Found {len(unread_msgs["messages"])} new messages')
     for msg in unread_msgs['messages']:
         try:
             msg_raw = self.service.users().messages().get(
                 userId='me',
                 id=msg['id'],
                 format="raw",
                 metadataHeaders=None).execute()
             msg = message_from_bytes(
                 base64.urlsafe_b64decode(msg_raw['raw']))
             msg_payload = msg.get_payload()
             msg_html = html.fromstring(msg_payload)
             confirm_link = msg_html.xpath(
                 '//a[contains(@href, "booking/confirm")]')
             confirm_url = confirm_link[0].attrib['href']
             self.log_step(
                 f'Step 10: Following confirmation url: `{confirm_url}`')
             page = requests.get(confirm_url,
                                 headers=HttpClient.DEFAULT_HEADERS)
             if not page:
                 save_html(page.content)
                 raise Exception(
                     f'☠️ Step 10: Url was not loaded. Status code: {page.status_code}'
                 )
             self.log_step(
                 f'✅ Step 10: Confirmation page loaded `{confirm_url}`')
             tree = html.fromstring(page.content)
             confirm_form = tree.xpath("//*[@id='inner_Booking']/form")
             if not len(confirm_form):
                 save_html(page.content)
                 raise Exception('☠️ Confirmation form not found')
             confirm_url = f"{self.config.url}{confirm_form[0].attrib['action']}"
             confirm_btn = tree.xpath("//input[@name='createButton']")
             if not len(confirm_btn):
                 save_html(page.content)
                 raise Exception('☠️ Confirmation button not found')
             self.log_step(
                 f'Step 11: Submitting confirmation to: `{confirm_url}`')
             page = requests.post(
                 confirm_url,
                 headers={
                     **HttpClient.DEFAULT_HEADERS, 'referer': confirm_url,
                     'origin': confirm_url
                 },
                 cookies=page.cookies,
                 data={'createButton': confirm_btn[0].value})
             save_html(page.content)
             if not page:
                 raise Exception(
                     f'☠️ Step 11: Confirmation not submitted. Status code: {page.status_code}'
                 )
             self.log_step(f'✅ RDV fully confirmed!')
         except Exception as ex:
             self.log_step(f'☠️ Exception: {ex}')
             self.logger.exception(ex)

コード例 #14

0

ファイルを表示

ファイル: scraping_funcs.py プロジェクト: KGuzikowski/BA-thesis

def process_html(
    html: str,
    store_url: str,
    html_path: str,
    full_html_path: str,
    json_path: str,
    title: Optional[str] = None,
    image: Optional[str] = None,
) -> Optional[str]:
    try:
        soup = BeautifulSoup(html, "lxml")
        data = {"path": html_path}

        if image is None:
            # Let's see if header has img metadata
            og_img = soup.find("meta", attrs={"property": "og:image"})
            twitter_img = soup.find("meta", attrs={"name": "twitter:image"})

            if og_img is None and twitter_img is None:
                # Header has no img metadata.
                og_img_idx = html.find("og:image")
                if og_img_idx != -1:
                    idx_s = og_img_idx + 11
                    idx_e = html.find('"', idx_s)
                    if idx_e == -1:
                        idx_e = html.find("'", idx_s)
                    image = html[og_img_idx + 11:idx_e]
                else:
                    twitter_img_idx = html.find('name="twitter:image"')
                    if twitter_img_idx != -1:
                        idx_s = og_img_idx + 16
                        idx_e = html.find('"', idx_s)
                        if idx_e == -1:
                            idx_e = html.find("'", idx_s)
                        image = html[og_img_idx + 11:idx_e]
                    else:
                        return "no_image_metadata"
            else:
                if og_img is not None:
                    image = og_img.attrs["content"]
                else:
                    image = twitter_img.attrs["content"]

        data["image"] = image

        description = soup.find("meta", attrs={"property": "og:description"})
        if description is None:
            description = soup.find("meta",
                                    attrs={"name": "twitter:description"})
            if description is None:
                description = soup.find("meta", attrs={"name": "description"})

        data["description"] = description.attrs["content"].strip().replace(
            "\n", " ")

        found_title = soup.find("meta", attrs={
            "property": "og:title"
        }).attrs["content"]
        if found_title is None:
            found_title = soup.find("meta", attrs={
                "name": "twitter:title"
            }).attrs["content"]
            if found_title is None:
                found_title = soup.find("title").text
                if found_title is None and title is None:
                    return "no_title_metadata"

        if found_title is not None:
            title = found_title

        final_html_with_body = process_tree(html)
        title_arr = typed.List(title.split())
        extracted_text, unbranded_title = extract_all_text(
            final_html_with_body, title_arr)
        final_html = get_rid_of_the_body(final_html_with_body)

        if unbranded_title:
            data["title"] = unbranded_title
        else:
            data["title"] = title

        data["extracted_text"] = extracted_text
        data["store_url"] = store_url

        asyncio.run(save_json(json_path, data))
        asyncio.run(save_html(html_path, final_html))
        asyncio.run(save_html(full_html_path, html))
        print(f"Finished processing {html_path}.")
        return None
    except Exception:
        print(f"Failed at processing {html_path}.")
        return None

コード例 #15

0

ファイルを表示

def main(_args):
    agent_type = _args.agent
    results_path = _args.results_path + f"_{agent_type}"

    results = []
    environment_settings = {}
    best_agent, best_eval_score = None, float('-inf')

    if agent_type == "reinforce":
        from scripts.agent_reinforce import REINFORCE
        AGENT = REINFORCE
        environment_settings = {
            'env_name': 'CartPole-v1',
            'gamma': 1.00,
            'max_minutes': 10,
            'max_episodes': 10000,
            'goal_mean_100_reward': 475
        }
        inner_wrappers = None

    elif agent_type == "vpg":
        from scripts.agent_vpg import VPG
        AGENT = VPG
        environment_settings = {
            'env_name': 'CartPole-v1',
            'gamma': 1.00,
            'max_minutes': 10,
            'max_episodes': 10000,
            'goal_mean_100_reward': 475
        }
        inner_wrappers = None

    elif agent_type == "sac":
        from scripts.agent_sac import SAC
        AGENT = SAC
        environment_settings = {
            'env_name': 'HalfCheetah-v2',
            'gamma': 0.99,
            'max_minutes': 300,
            'max_episodes': 10000,
            'goal_mean_100_reward': 2000
        }
        inner_wrappers = [utils.RenderUint8]
    else:
        raise NotImplementedError("Other Agent types are not supported yet")

    utils.create_directory(results_path)

    for seed in SEEDS:

        env_name, gamma, max_minutes, \
        max_episodes, goal_mean_100_reward = environment_settings.values()
        agent = AGENT()
        make_env_fn, make_env_kargs = utils.get_make_env_fn(
            env_name=env_name, inner_wrappers=inner_wrappers)

        result, final_eval_score, training_time, wallclock_time = agent.train(
            make_env_fn, make_env_kargs, seed, gamma, max_minutes,
            max_episodes, goal_mean_100_reward)

        results.append(result)

        if final_eval_score > best_eval_score:
            best_eval_score = final_eval_score
            best_agent = agent

    results = np.array(results)  # dim: (num_seeds, max_episode, 5)
    _ = BEEP()

    # Agent Progression
    html_data, title = best_agent.demo_progression()
    utils.save_html(data=html_data,
                    path=os.path.join(results_path, f"{title}.html"))

    # Best Agent
    html_data, title = best_agent.demo_last()
    utils.save_html(data=html_data,
                    path=os.path.join(results_path, f"{title}.html"))

    # Extracting statistics
    """
    Over multiple seeds:
    total_steps, mean_100_reward, mean_100_eval_score, training_time, wall_clock_elapsed
    
    result is initialized with nan values and with fixed max episode length;
    so, mean/max/min will calculate until common episode length (min episode length for different seeds)
    """

    agent_max_t, agent_max_r, agent_max_s, \
    agent_max_sec, agent_max_rt = np.max(results, axis=0).T

    agent_min_t, agent_min_r, agent_min_s, \
    agent_min_sec, agent_min_rt = np.min(results, axis=0).T

    agent_mean_t, agent_mean_r, agent_mean_s, \
    agent_mean_sec, agent_mean_rt = np.mean(results, axis=0).T

    agent_x = np.arange(
        len(agent_mean_s))  # x axis values (episode numbers): 0, 1, 2, ...

    # Plot Statistics
    fig, axs = plt.subplots(5, 1, figsize=(20, 30), sharey=False, sharex=True)

    axs[0].plot(agent_max_r, 'b', linewidth=1)
    axs[0].plot(agent_min_r, 'b', linewidth=1)
    axs[0].plot(agent_mean_r, 'b', label=agent_type.upper(), linewidth=2)
    axs[0].fill_between(agent_x,
                        agent_min_r,
                        agent_max_r,
                        facecolor='b',
                        alpha=0.3)

    axs[1].plot(agent_max_s, 'b', linewidth=1)
    axs[1].plot(agent_min_s, 'b', linewidth=1)
    axs[1].plot(agent_mean_s, 'b', label=agent_type.upper(), linewidth=2)
    axs[1].fill_between(agent_x,
                        agent_min_s,
                        agent_max_s,
                        facecolor='b',
                        alpha=0.3)

    axs[2].plot(agent_max_t, 'b', linewidth=1)
    axs[2].plot(agent_min_t, 'b', linewidth=1)
    axs[2].plot(agent_mean_t, 'b', label=agent_type.upper(), linewidth=2)
    axs[2].fill_between(agent_x,
                        agent_min_t,
                        agent_max_t,
                        facecolor='b',
                        alpha=0.3)

    axs[3].plot(agent_max_sec, 'b', linewidth=1)
    axs[3].plot(agent_min_sec, 'b', linewidth=1)
    axs[3].plot(agent_mean_sec, 'b', label=agent_type.upper(), linewidth=2)
    axs[3].fill_between(agent_x,
                        agent_min_sec,
                        agent_max_sec,
                        facecolor='b',
                        alpha=0.3)

    axs[4].plot(agent_max_rt, 'b', linewidth=1)
    axs[4].plot(agent_min_rt, 'b', linewidth=1)
    axs[4].plot(agent_mean_rt, 'b', label=agent_type.upper(), linewidth=2)
    axs[4].fill_between(agent_x,
                        agent_min_rt,
                        agent_max_rt,
                        facecolor='b',
                        alpha=0.3)

    # ALL
    axs[0].set_title('Moving Avg Reward (Training)')
    axs[1].set_title('Moving Avg Reward (Evaluation)')
    axs[2].set_title('Total Steps')
    axs[3].set_title('Training Time')
    axs[4].set_title('Wall-clock Time')
    plt.xlabel('Episodes')
    axs[0].legend(loc='upper left')

    fig.savefig(
        os.path.join(results_path, f"{agent_type.upper()}_Statistics.png"))
    plt.show()

    # Saving Statistics
    statistics_dict = {
        "x": agent_x,
        "max_r": agent_max_r,
        "min_r": agent_min_r,
        "mean_r": agent_mean_r,
        "max_s": agent_max_s,
        "min_s": agent_min_s,
        "mean_s": agent_mean_s,
        "max_t": agent_max_t,
        "min_t": agent_min_t,
        "mean_t": agent_mean_t,
        "max_sec": agent_max_sec,
        "min_sec": agent_min_sec,
        "mean_sec": agent_mean_sec,
        "max_rt": agent_max_rt,
        "min_rt": agent_min_rt,
        "mean_rt": agent_mean_rt
    }

    utils.save_data(data=statistics_dict,
                    path=os.path.join(results_path,
                                      f"{agent_type.upper()}_Statistics_Dict"))

コード例 #16

0

ファイルを表示

ファイル: get_movies.py プロジェクト: 46294629/doubanMovies

def login_get_cookies():
    cookies = utils.load_cookies()
    if cookies:
        session = requests.Session()
        session.headers['User-Agent'] = User_Agent
        session.cookies = requests.utils.cookiejar_from_dict(cookies)
        return session,cookies
    chrome_options = Options()
    chrome_options.add_argument("--headless")

    driver = webdriver.Chrome(executable_path='./chromedriver_win32/chromedriver.exe')
    url = 'https://accounts.douban.com/passport/login'
    while 1:
        driver.get(url)
        link = driver.find_element_by_xpath("//ul[@class='tab-start']/li[@class='account-tab-account']")
        link.click()

        name_input = driver.find_element_by_xpath("//div[@class='account-form-field']/input[@id='username']")
        pass_input = driver.find_element_by_xpath("//div[@class='account-form-field']/input[@id='password']")
        remember_input = driver.find_element_by_xpath("//div[@class='account-form-ft']/p[@class='account-form-remember']/input[@id='account-form-remember']")
        login_botton = driver.find_element_by_xpath("//div[@class='account-form-field-submit ']/a[@class='btn btn-account']")
        name_input.clear()
        name_input.send_keys(username)
        pass_input.clear()
        pass_input.send_keys(password)
        remember_input.click()
        login_botton.click()

        start_ts = time.time()
        print("start..",start_ts)

        try:
            WebDriverWait(driver,15).until_not(lambda x:x.find_element_by_xpath("//div[@class='account-form-field-submit ']/a[@class='btn btn-account btn-active']").is_displayed())
            WebDriverWait(driver, 15).until_not(lambda x: x.find_element_by_xpath("//div[@class='account-form-field-submit ']/a[@class='btn btn-account']").is_displayed()) #wait until login
        except:
            import tracback
            print(tracback.format_exc())
            utils.save_html('exc_inex.html',driver.page_source)
            import os
            os._exit(-1)

        print ('end..',time.time()-start_ts)
        driver.save_screenshot('submit.png')
        utils.save_html('index.html',driver.page_source)

        if u'douban' in driver.page_source:
            selenium_cookies = driver.get_cookies()
            print ("selenium_cookies:",selenium_cookies)
            driver.close()
            break
        else:
            driver.close()

    #handle cookies
    session = requests.Session()
    session.headers['User-Agent'] = User_Agent
    for i in selenium_cookies:
        requests.utils.add_dict_to_cookiejar(session.cookies, {i['name']: i['value']})
    cookies = requests.utils.dict_from_cookiejar(session.cookies)
    utils.save_cookies(cookies)
    return session,cookies