Example #1
0
def main():
    # Check username and password
    global USERNAME, PASSWORD
    if not USERNAME: USERNAME = input('请输入学号:')
    if not PASSWORD: PASSWORD = input('请输入密码:')

    req = requests.Session()
    cookie_jar = RequestsCookieJar()
    login_payload = {'username': USERNAME, 'password': PASSWORD}
    url = 'http://dj.cs.ustc.edu.cn/admin/index/login.html'

    # Open Login
    print('正在登录: %s' % url)
    r = req.post(url, data=login_payload, allow_redirects=False)
    cookie_jar.update(r.cookies)
    # print(cookie_jar.items())

    # Now set url to index.html
    url = 'http://dj.cs.ustc.edu.cn/admin/index/index.html'
    r = req.get(url, cookies=cookie_jar)

    # Now we have got the page. We should know what '待办事项' refers to
    dashboard_page = etree.HTML(r.text)
    iframe_link_path = dashboard_page.xpath(
        "//*[@id='draggable']/div[2]/div[1]/dl[1]/dd[2]/a/@data-param")
    assert (len(iframe_link_path) == 1)
    iframe_link = DOMAIN + iframe_link_path[0]

    todo_events = []
    r = req.get(iframe_link, cookies=cookie_jar)
    assert (r.status_code == 200)
    events_page = etree.HTML(r.text)
    events = events_page.xpath("//div[@class='bDiv']/table/tbody/tr")
    for i in range(len(events)):
        event_name = events_page.xpath(
            "//div[@class='bDiv']/table/tbody/tr[%d]/td[1]/text()" %
            (i + 1))[0]
        event_status = events_page.xpath(
            "//div[@class='bDiv']/table/tbody/tr[%d]/td[5]/text()" %
            (i + 1))[0].strip()
        event_link = events_page.xpath(
            "//div[@class='bDiv']/table/tbody/tr[%d]/td[6]/a/@href" %
            (i + 1))[0]
        if event_status != '已办理':
            event_status = '\033[1;31m未办理\033[0m'
            todo_events.append((event_name, event_link))
        print('%s\t%s' % (event_name, event_status))

    print('=========================')
    for event in todo_events:
        sys.stdout.write('正在办理 %s' % event[0])
        event_full_link = DOMAIN + event[1]
        r = req.get(event_full_link, cookies=cookie_jar)
        commit_page = etree.HTML(r.text)
        commit_path = commit_page.xpath("//div[@class='bot']/a[1]/@href")[0]
        commit_url = DOMAIN + commit_path
        r = req.get(commit_url, cookies=cookie_jar)
        print(r.status_code == 200 and '成功' or '失败')

    return 0
Example #2
0
 def restore_session(self):
     cookies = RequestsCookieJar()
     try:
         data = keyring.get_password(KEYRING_SESSION_NAME, self.username)
         if data is None:
             # Session is not saved
             return
         data = json.loads(data)
     except (KeyringError, JSONDecodeError) as e:
         raise PypiKeyringError(f'{e}')
     cookies.update(data)
     self.session.cookies = cookies
Example #3
0
def create_request(session, method, url, params=None, data=None, headers=None, cookies=None, files=None, auth=None):
	cookies = cookies or {}

	if not isinstance(cookies, cookielib.CookieJar):
		cookies = cookiejar_from_dict(cookies)

	merged_cookies = RequestsCookieJar()
	merged_cookies.update(session.cookies)
	merged_cookies.update(cookies)
	cookies = merged_cookies

	params = merge_setting_safe(params, session.params)
	headers = merge_setting_safe(headers, session.headers, dict_class=CaseInsensitiveDict)
	auth = merge_setting_safe(auth, session.auth)

	return Request(method=method.upper(), url=url, headers=headers, files=files, data=data, params=params, auth=auth, cookies=cookies)
Example #4
0
    def from_dict(cls, response_dict):
        """
        利用字典获取Response对象
        @param response_dict: 原生的response.__dict__
        @return:
        """
        cookie_jar = RequestsCookieJar()
        cookie_jar.update(other=response_dict["cookies"])
        response_dict["cookies"] = cookie_jar

        response_dict["elapsed"] = datetime.timedelta(
            0, 0, response_dict["elapsed"])  # 耗时
        response_dict["connection"] = None
        response_dict["_content_consumed"] = True

        response = res()
        response.__dict__.update(response_dict)
        return cls(response)
Example #5
0
class LeaSession:
    def __init__(self, session, config: ConfigDict, lea_html: str):
        self.cfg = config
        self.cookies = RequestsCookieJar()
        self.cookies.update(session.cookies)
        self.lea_html = lea_html
        self.lea_html_query = pq(lea_html)

    def getAssignments(self):
        assignmentURL = self.lea_html_query('a[id="lienDTRV"]').attr("href")

        assignmentsPage = doRequest(
            self.cfg, self.cookies,
            requests.get(url=self.cfg["https_ovxUrl2"] + assignmentURL,
                         headers=self.cfg["headers"],
                         cookies=self.cookies,
                         allow_redirects=True))
        self.cookies.update(assignmentsPage.cookies)

        d = pq(assignmentsPage.text)
        assignmentDict = {}
        for i in range(1, 3):
            d = pq(assignmentsPage.text)
            assignmentsHTML = d('tr[class="LigneListTrav' + str(i) + '"]')
            for tab in assignmentsHTML:
                d = pq(tab)
                name = d(
                    'a[class="RemTrav_Sommaire_NomCours"]')[0].text.strip()
                listAssignmentsOfClassHTML = d(
                    'a[class="RemTrav_Sommaire_ProchainsTravaux"]')
                listAssignmentsDescOfClassHTML = d(
                    'span[class="RemTrav_Sommaire_ProchainsTravauxDesc"]')
                listAssignmentsOfClass = {}
                for assignment in listAssignmentsOfClassHTML:
                    listAssignmentsOfClass[assignment.text.strip(
                    )] = listAssignmentsDescOfClassHTML[
                        listAssignmentsOfClassHTML.index(
                            assignment)].text.replace('\n', '').replace(
                                '\r', '').replace(' ',
                                                  '').replace('\xa0', ' ')
                assignmentDict[name] = listAssignmentsOfClass
        print(assignmentDict)
Example #6
0
    def prepare_request(self, request):
        """Constructs a :class:`PreparedRequest <PreparedRequest>` for
        transmission and returns it. The :class:`PreparedRequest` has settings
        merged from the :class:`Request <Request>` instance and those of the
        :class:`Session`.

        :param request: :class:`Request` instance to prepare with this
                        session's settings.
        """
        cookies = request.cookies or {}

        # Bootstrap CookieJar.
        if not isinstance(cookies, cookielib.CookieJar):
            cookies = cookiejar_from_dict(cookies)

        # Merge with session cookies
        merged_cookies = RequestsCookieJar()
        merged_cookies.update(self.cookies)
        merged_cookies.update(cookies)

        # Set environment's basic authentication if not explicitly set.
        auth = request.auth
        if self.trust_env and not auth and not self.auth:
            auth = get_netrc_auth(request.url)

        p = PreparedRequest()
        p.prepare(
            method=request.method.upper(),
            url=request.url,
            files=request.files,
            data=request.data,
            json=request.json,
            headers=merge_setting(request.headers,
                                  self.headers,
                                  dict_class=CaseInsensitiveDict),
            params=merge_setting(request.params, self.params),
            auth=merge_setting(auth, self.auth),
            cookies=merged_cookies,
            hooks=merge_hooks(request.hooks, self.hooks),
        )
        return p
Example #7
0
def load_and_merge_cookie_jars(cookie_jar_paths):
    cookie_jar = RequestsCookieJar()
    if not cookie_jar_paths:
        return cookie_jar

    logging.debug("Attempting to load and merge the following cookie files: %s" % cookie_jar_paths)
    for f in cookie_jar_paths:
        if os.path.isfile(f):
            try:
                cookies = MozillaCookieJar(f)
                cookies.load(ignore_expires=True, ignore_discard=True)
                cookie_jar.update(cookies)
            except Exception as e:
                logging.warning("Unable to load cookie file [%s]: %s" % (f, get_typed_exception(e)))

    # Do not preserve expire values from cookies with expires=0 from the file, or requests will not use the cookie
    for cookie in iter(cookie_jar):
        if not cookie.expires:
            cookie.expires = None

    return cookie_jar
Example #8
0
    def prepare_request(self, request):
        """Constructs a :class:`PreparedRequest <PreparedRequest>` for
        transmission and returns it. The :class:`PreparedRequest` has settings
        merged from the :class:`Request <Request>` instance and those of the
        :class:`Session`.

        :param request: :class:`Request` instance to prepare with this
                        session's settings.
        """
        cookies = request.cookies or {}

        # Bootstrap CookieJar.
        if not isinstance(cookies, cookielib.CookieJar):
            cookies = cookiejar_from_dict(cookies)

        # Merge with session cookies
        merged_cookies = RequestsCookieJar()
        merged_cookies.update(self.cookies)
        merged_cookies.update(cookies)


        # Set environment's basic authentication if not explicitly set.
        auth = request.auth
        if self.trust_env and not auth and not self.auth:
            auth = get_netrc_auth(request.url)

        p = PreparedRequest()
        p.prepare(
            method=request.method.upper(),
            url=request.url,
            files=request.files,
            data=request.data,
            json=request.json,
            headers=merge_setting(request.headers, self.headers, dict_class=CaseInsensitiveDict),
            params=merge_setting(request.params, self.params),
            auth=merge_setting(auth, self.auth),
            cookies=merged_cookies,
            hooks=merge_hooks(request.hooks, self.hooks),
        )
        return p
Example #9
0
def get_modis_by_requests():
    # 获取token
    resp = request('GET', 'https://urs.earthdata.nasa.gov/home')
    pt = re.compile(
        r'.*<input type="hidden" name="authenticity_token" value="(.*)" />.*')
    print(len(resp.text))
    token = pt.findall(resp.text)[0]
    print('token: ', token)

    # 登录并保存cookie
    jar = RequestsCookieJar()
    jar.update(resp.cookies)
    url = 'https://urs.earthdata.nasa.gov/login'
    forms = {
        'username': '******',
        'password': '******',
        'redirect_uri': '',
        'commit': 'Log+in',
        'client_id': '',
        'authenticity_token': token
    }
    resp = request('POST', url, data=forms, cookies=jar)
    jar.update(resp.cookies)
    print('cookie: ', resp.cookies.items())

    # 请求下载页面,解析文件下载的url
    url = 'https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/6/MOD13Q1/2019/321/MOD13Q1.A2019321.h00v08.006.2019337235356.hdf'
    resp = request('GET', url, cookies=jar)
    pu = re.compile(r'href="(https://ladsweb.modaps.eosdis.nasa.gov.*hdf)"')
    furl = pu.findall(resp.text)[0]
    furl = furl.replace('&amp;', '&')
    print('furl: ', furl)

    # 下载文件并保存
    resp = request('GET', furl, cookies=jar)
    with open('fb/modis_requests.hdf', 'wb') as fp:
        fp.write(resp.content)
    print('OK')
Example #10
0
class LeaScheduleSelectionPage:
    """
    Represents the page to request schedules in LEA.
    """
    def __init__(self, session, schedule_reference: str):
        """
        Initializes a wrapper over the LEA schedule request page.

        :param session: The Omnivox session used to authenticate the LEA requests.
        :param schedule_reference: The schedule request reference.
        """
        self.cookies = RequestsCookieJar()
        self.cookies.update(session.cookies)
        self.schedule_reference = schedule_reference

        self._semesters: Tuple[OmnivoxSemester] = None
        self._schedule_cache: Dict[str, OmnivoxSemesterSchedule] = dict()
        self._schedule_request_url: str = None

    async def fetch(self):
        """
        Fetches the page, including the ID of the available semesters.
        :return: Nothing
        """
        schedule_page_response = requests.get(url=VANIER_DOMAIN +
                                              self.schedule_reference,
                                              headers=HEADER_UA,
                                              cookies=self.cookies)
        self.cookies.update(schedule_page_response.cookies)

        body_redirect_location = get_js_redirect(
            pq(schedule_page_response.text)("body"))
        session_load_url = LEA_DOMAIN + "/" + body_redirect_location
        session_load_response = requests.get(url=session_load_url,
                                             headers=HEADER_UA,
                                             cookies=self.cookies)
        self.cookies.update(session_load_response.cookies)

        schedule_page_response = requests.get(url=LEA_DOMAIN +
                                              "/hrre/horaire.ovx",
                                              headers=HEADER_UA,
                                              cookies=self.cookies)

        semesters = []
        page_d = pq(schedule_page_response.text)
        for option in page_d("select[name='AnSession']").children("option"):
            option_d = pq(option)
            semesters.append(
                OmnivoxSemester(option_d.val(), option_d.text(),
                                option_d.attr("selected") is not None))

        self._semesters = tuple(semesters)
        self._schedule_request_url = LEA_DOMAIN + "/hrre/" + page_d(
            "form").attr("action")

    async def get_current_semester(self) -> Optional[OmnivoxSemester]:
        """
        Retrieves the ID of the current semester, if any.
        """
        if not self._semesters:
            await self.fetch()

        for semester in self._semesters:
            if semester.current:
                return semester
        return None

    async def get_all_semesters(self) -> Tuple[OmnivoxSemester]:
        """
        Retrieves the ID of all the available semesters.
        """
        if not self._semesters:
            await self.fetch()

        return tuple(self._semesters)

    async def get_schedule(self,
                           semester: OmnivoxSemester,
                           force=False) -> OmnivoxSemesterSchedule:
        """
        Gets and caches the schedule for the given semester.
        :param semester: The semester whose schedule is being requested.
        :param force: Whether to ignore the cache for the schedules.
        :return: An object representing the schedule for the requested semester.
        """
        if not self._semesters:
            await self.fetch()

        if not force:
            if semester.id in self._schedule_cache:
                return self._schedule_cache[semester.id]

        schedule_request_response = requests.post(
            url=self._schedule_request_url,
            headers=HEADER_UA,
            cookies=self.cookies,
            data={
                "AnSession": semester.id,
                "Confirm": "Obtain+my+schedule"
            })

        body_redirect_location = LEA_DOMAIN + "/hrre/" + get_js_redirect(
            pq(schedule_request_response.text)("body"))
        schedule_page_response = requests.get(url=body_redirect_location,
                                              headers=HEADER_UA,
                                              cookies=self.cookies)

        # Parse the schedule page
        courses: List[OmnivoxSemesterScheduleCourse] = []
        schedule_grid: Dict[ScheduleDay,
                            List[OmnivoxSemesterScheduleGridClass]] = {
                                day: []
                                for day in ScheduleDay
                            }
        schedule_d = pq(schedule_page_response.text)

        # Check if there is no warning - if there is, there are no courses for this semester.
        if not schedule_d(".tbAvertissement"):
            schedule_course_list_table = pq(
                schedule_d(".tbContenantPageLayout table table")[3])
            course_list_rows = schedule_course_list_table.children("tr")

            for i in range(3, len(course_list_rows) - 1):
                course_row = pq(course_list_rows[i])
                course_number = pq(course_row.children("td")[1])("span").text()
                course_section = pq(
                    course_row.children("td")[2])("span").text()
                course_title = pq(course_row.children("td")[3])("span").text()
                teacher = pq(course_row.children("td")[4])("a").text()

                courses.append(
                    OmnivoxSemesterScheduleCourse(number=course_number,
                                                  section=course_section,
                                                  title=course_title,
                                                  teacher=teacher))

            schedule_grid_table = pq(
                schedule_d(".tbContenantPageLayout table table")[11])
            schedule_grid_rows = schedule_grid_table.children("tr")

            for row_index in range(1, len(schedule_grid_rows)):
                time_slot = row_index - 1
                schedule_grid_cols = pq(
                    schedule_grid_rows[row_index]).children("td")
                col_index = 1
                for day_index in range(5):
                    if col_index == len(schedule_grid_cols):
                        continue
                    day = ScheduleDay(day_index)
                    # check if a class has started prior to this slot
                    past_classes = schedule_grid[day]
                    for past_class in past_classes:
                        if past_class.time_slot_start <= time_slot < (
                                past_class.time_slot_start +
                                past_class.length):
                            continue

                    grid_cell = pq(schedule_grid_cols[col_index])
                    if grid_cell.attr("bgcolor") != "#ffffff":
                        col_index += 1
                        continue
                    class_length = int(grid_cell.attr("rowspan"))
                    schedule_class = OmnivoxSemesterScheduleGridClass(
                        grid_cell.text().split("\n")[0], day, time_slot,
                        class_length)
                    schedule_grid[day].append(schedule_class)
                    col_index += 1

        schedule = OmnivoxSemesterSchedule(
            semester=semester,
            courses=tuple(courses),
            grid=OmnivoxSemesterScheduleGrid(schedule_grid))
        self._schedule_cache[semester.id] = schedule
        return schedule
Example #11
0
class CookieManager:
    URL = "http://kns.cnki.net/kns/brief/default_result.aspx"
    INTERVAL = 900  # 15分钟
    POOL = []  # cookies池

    def __init__(self, url=None, pool=True):
        """
        初始化过期时间
        :param pool:
        """
        if url:
            self.URL = url  # 初始化cookie请求的url
        self.cookies = RequestsCookieJar()
        self.user_agent = UserAgent().random
        self.header = {
            "Host": "kns.cnki.net",
            "Connection": "keep-alive",
            "Cache-Control": "max-age=0",
            "Upgrade-Insecure-Requests": "1",
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
            "Accept-Encoding": "gzip,deflate",
            "Accept-Language": "zh-CN,zh;q=0.9",
            "User-Agent": self.user_agent
        }
        self.proxymanager = ProxyManager(pool=False)
        self.timeout = 10
        self.pool = pool  # 是否使用cookie池:True or False
        if self.pool:
            self.get_pool()
        else:
            self.set_cookie()

    @logger.log_decoratore
    def set_cookie(self):
        """
        cookie获取,记录cookies以及获取时间
        :return:
        """
        retry_time = 5  # 重复请求次数
        # IP, PORT = self.proxymanager.get_proxy()
        # self.proxy = {
        #     'https': 'http://{}:{}'.format(IP, PORT)
        # }
        while True:
            try:
                print(1111111)
                print(self.URL)
                resp = requests.get(
                    self.URL,
                    headers=self.header,
                    # proxies=self.proxy,
                    cookies=self.cookies,
                    timeout=self.timeout)
                print(resp.status_code)
                if resp.status_code == 200:
                    self.EXPIRES_TIME = int(time.time())  # 过期时间
                    self.cookies.update(resp.cookies)
                    print('resp.cookies>>', resp.cookies)
                    print('self.cookies', self.cookies)
                return self.cookies, self.header, self.EXPIRES_TIME
            except Exception as e:
                retry_time -= 1
                if retry_time <= 0:
                    print('self.cookies>> {}...!'.format(self.cookies))
                    print(e)
                    return self.cookies, 0
                time.sleep(0.1)

    @logger.log_decoratore
    def get_cookie(self):
        """
        若__init__的pool参数为True,则从POOL随机获取一个cookie;否则自动获取一个cookie;
        并都对cookie进行了过期维护
        :return: 返回获取cookie
        """
        now_time = int(time.time())
        if not self.pool:  # 没用使用cookie池时
            cookie_expires = self.EXPIRES_TIME  # 过期时间
            if cookie_expires + self.INTERVAL < now_time:  # 过期从新获取
                self.set_cookie()
            return self.cookies
        else:  # 使用cookie池时
            while True:
                try:
                    cookies = random.choice(self.POOL)  # 过期维护
                    # 过期时间
                    cookie_expires = cookies[1]
                    if cookie_expires + self.INTERVAL < now_time:  # 移除过期cookie
                        self.cookie_remove(cookies)
                    else:
                        return cookies[0]
                except IndexError as e:
                    self.get_pool()

    def _add_cookie(self, i):
        """
        使用cookie池时,添加cookie到POOL
        :return:
        """
        self.set_cookie()
        item = (self.cookies, self.EXPIRES_TIME)
        self.POOL.append(item)

    def get_pool(self):
        """
        获取一个数量为200的cookie池POOL
        :return:
        """
        pool = threadpool.ThreadPool(32)
        req = threadpool.makeRequests(self._add_cookie, range(200))
        [pool.putRequest(i) for i in req]
        pool.wait()

    def cookie_remove(self, cookie):
        """
        从POOL中移除一个cookie,并再添加一个
        :param cookie: cookie
        :return:
        """
        self.POOL.remove(cookie)
        self._add_cookie()
Example #12
0
class IdeaLoomIdeaSource(IdeaSource):
    __tablename__ = 'idealoom_idea_source'
    id = Column(Integer, ForeignKey(IdeaSource.id), primary_key=True)
    # or use a token?
    username = Column(String())
    password = Column(String())
    # add credentials!
    use_local = False

    __mapper_args__ = {
        'polymorphic_identity': 'idealoom',
    }

    @reconstructor
    def init_on_load(self):
        super(IdeaLoomIdeaSource, self).init_on_load()
        # TODO: find a way to reuse Users when self.source_uri.startswith(self.global_url)
        self.cookies = CookieJar()

    def class_from_data(self, json):
        typename = json.get('@type', None)
        if typename:
            return get_named_class(typename)

    def base_source_uri(self):
        return urljoin(self.source_uri, '/data/')

    def process_data(self, data):
        dtype = data.get('@type', None)
        if dtype == 'RootIdea':
            self.base_set_item(self.normalize_id(data['@id']), self.discussion.root_idea)
            return None
        if dtype == 'GenericIdeaNode':
            data['pub_state_name'] = self.target_state.name
        elif dtype == 'DirectedIdeaRelation':
            source_id = self.normalize_id(data['source'])
            if source_id not in self.instance_by_id:
                self.base_set_item(source_id, self.discussion.root_idea)
                self[source_id] = self.discussion.root_idea
        return data

    def external_id_to_uri(self, external_id):
        if '//' in external_id:
            return external_id
        if external_id.startswith('local:'):
            return self.base_source_uri() + external_id[6:]
        return external_id  # as urn?

    def uri_to_external_id(self, uri):
        base = self.base_source_uri()
        if uri.startswith(base) and self.use_local:
            uri = 'local:' + uri[len(base):]
        return uri

    def get_imported_from_in_data(self, data):
        return data.get('imported_from_url', None)

    def normalize_id(self, id):
        id = self.id_from_data(id)
        if not id:
            return
        if id.startswith('local:') and not self.use_local:
            return self.external_id_to_uri(id)
        return super(IdeaLoomIdeaSource, self).normalize_id(id)

    def login(self, admin_user_id=None):
        login_url = urljoin(self.source_uri, '/login')
        r = requests.post(login_url, cookies=self.cookies, data={
            'identifier': self.username, 'password': self.password},
            allow_redirects=False)
        assert r.ok
        if 'login' in r.headers['Location']:
            return False
        self.cookies.update(r.cookies)
        self._last_login = datetime.now()
        return True

    def read(self, admin_user_id=None):
        admin_user_id = admin_user_id or self.discussion.creator_id
        super(IdeaLoomIdeaSource, self).read(admin_user_id)
        local_server = self.source_uri.startswith(urljoin(self.global_url, '/'))
        super(IdeaLoomIdeaSource, self).read(admin_user_id)
        last_login = getattr(self, '_last_login', None)
        if not last_login or datetime.now() - last_login > timedelta(days=1):
            assert self.login(admin_user_id)
        r = requests.get(self.source_uri, cookies=self.cookies)
        assert r.ok
        ideas = r.json()
        self.read_json(ideas, admin_user_id, True)
        discussion_id = self.source_uri.split('/')[-2]
        link_uri = urljoin(
            self.source_uri,
            '/data/Conversation/%s/idea_links' % (discussion_id,))
        r = requests.get(link_uri, cookies=self.cookies)
        assert r.ok
        links = r.json()
        link_subset = [
            l for l in links
            if self.normalize_id(l['target']) in self.instance_by_id]
        self.read_json(link_subset, admin_user_id)
        missing_oids = list(self.promises_by_target_id.keys())
        missing_classes = {oid.split('/')[-2] for oid in missing_oids}
        missing_classes.discard('Agent')
        assert not missing_classes, "Promises for unknown classes " + str(missing_classes)
        if local_server:
            for oid in missing_oids:
                loid = 'local:'+oid[len(self.global_url):]
                self.base_set_item(oid, AgentProfile.get_instance(loid))
        else:
            self.read_json([
                requests.get(oid, cookies=self.cookies).json()
                for oid in missing_oids], admin_user_id)

        self.db.flush()
        self.add_missing_links()

    def read_json(self, data, admin_user_id, apply_filter=False):
        if isinstance(data, string_types):
            data = json.loads(data)

        def find_objects(j):
            if isinstance(j, list):
                for x in j:
                    for obj in find_objects(x):
                        yield obj
            elif isinstance(j, dict):
                jid = j.get('@id', None)
                if jid:
                    yield j
                for x in j.values():
                    for obj in find_objects(x):
                        yield obj

        self.read_data_gen(find_objects(data), admin_user_id, apply_filter)
Example #13
0
def main():
    # Check username and password
    global USERNAME, PASSWORD
    if not USERNAME: USERNAME = input('请输入学号:')
    if not PASSWORD: PASSWORD = input('请输入密码:')

    # Prepare for the session
    req = requests.Session()
    cookie_jar = RequestsCookieJar()
    login_payload = {
        'username': USERNAME,
        'password': PASSWORD,
        'service': 'https://weixine.ustc.edu.cn/2020/caslogin'
    }
    url = 'https://passport.ustc.edu.cn/login?service=https%3A%2F%2Fweixine.ustc.edu.cn%2F2020%2Fcaslogin'

    # Login start
    print('Requesting for cookies from: %s' % url)
    r = req.post(url, data=login_payload, allow_redirects=False)

    # Redirections
    while r.status_code in range(300, 304):
        new_location = r.headers['Location']
        print('Redirecting to %s' % new_location)
        cookie_jar.update(r.cookies)
        r = req.get(new_location, allow_redirects=False)

    # Finally update my cookies
    cookie_jar.update(r.cookies)
    # print(cookie_jar.keys())

    # Get my token for later commit
    login_form_data = etree.HTML(r.text)
    token_line = login_form_data.xpath(
        "//*[@id='daliy-report']/form/input/@value")
    assert (len(token_line) == 1)
    token = token_line[0]

    # Close login request
    r.close()

    # Prepare for report request
    headers = {
        'User-Agent':
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
    }
    param = {
        # 'Accept': 'text/html, application/xhtml+xml, application/xml; q=0.9, image/webp,image/apng, */*; q=0.8, application/signed-exchange; v=b3; q=0.9',
        'Accept - Encoding': 'gzip, deflate, br',
        'Accept - Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
        'Cache - Control': 'max-age=0',
        'Content - Type': 'application/x-www-form-urlencoded',
        'Origin': 'https://weixine.ustc.edu.cn',
        'Referer': 'https://weixine.ustc.edu.cn/2020/home',
        'Src - Fetch - Dest': 'document',
        'Src - Fetch - Mode': 'navigate',
        'Src - Fetch - Site': 'same-origin',
        'Src - Fetch - User': '******',
        'Upgrade - Insecure - Requests': '1'
    }
    report_payload = {
        '_token': token,  # 加入上面获得的token
        'now_address': '1',  # 当前所在地:内地
        'gps_now_address': '',  #
        'now_province': '340000',  # 当前所在地:安徽
        'gps_province': '',  #
        'now_city': '340100',  # 当前所在地:合肥
        'gps_city': '',  #
        'now_detail': '',  #
        'is_inschool': '6',  # 是否在校:西校区
        'body_condition': '1',  # 当前身体状况:正常
        'body_condition_detail': '',  # 
        'now_status': '1',  # 当前状态:正常在校园内
        'now_status_detail': '',  #
        'has_fever': '0',  # 当前有无发热症状:无
        'last_touch_sars': '0',  # 有无接触患者:无
        'last_touch_sars_date': '',  #
        'last_touch_sars_detail': '',  #
        'last_touch_hubei': '0',  # 有无接触湖北人员:无
        'last_touch_hubei_date': '',  #
        'last_touch_hubei_detail': '',  #
        'last_cross_hubei': '0',  # 有无在湖北停留或路过:无
        'last_cross_sars_date': '',  #
        'last_cross_sars_detail': '',  #
        'return_dest': '1',  # 返校目的地:合肥校本部
        'return_dest_detail': '',  #
        'other_detail': '',  # 其他情况说明:(无)
    }
    print(report_payload)

    print('=======================')
    # print(cookie_jar.items())
    r = req.post('https://weixine.ustc.edu.cn/2020/daliy_report',
                 cookies=cookie_jar,
                 data=report_payload,
                 headers=headers,
                 params=param,
                 allow_redirects=False,
                 timeout=50)

    # Redirections
    while r.status_code in range(300, 304):
        new_location = r.headers['Location']
        print('Redirecting to %s' % new_location)
        cookie_jar.update(r.cookies)
        r = req.get(new_location, allow_redirects=False)

    print('Last status code: %d' % r.status_code)
    if (r.status_code == 200):
        ret_text = r.text
        last_report_info_pos = r.text.find('上次上报时间')
        print(ret_text[last_report_info_pos:(last_report_info_pos + 26)])
    r.close()
Example #14
0
class CustomsforgeClient:
    """
    Implements customsforge API for CDLCs. (Should be) thread-safe.

    To access the API, logging in is required. This is attempted exactly once for every API call that returns
    a redirect indicating lack of (or invalid) credentials. Cookies resulting from login can be stored to avoid
    this process in subsequent executions.
    """
    def __init__(self,
                 api_key: str,
                 batch_size: int = DEFAULT_BATCH_SIZE,
                 timeout: int = DEFAULT_TIMEOUT,
                 cookie_jar_file: Optional[str] = DEFAULT_COOKIE_FILE,
                 username: str = None,
                 password: str = None,
                 get_today: Callable[[], date] = date.today):
        self.__api_key = api_key
        self.__batch_size = batch_size if Verify.batch_size(
            batch_size) else DEFAULT_BATCH_SIZE
        self.__timeout = max(0, timeout) or DEFAULT_TIMEOUT
        self.__cookie_jar_file = cookie_jar_file

        self.__username = username
        self.__password = password
        self.__login_rejected = False
        self.__prevent_multiple_login_lock = RLock()

        self.__sessions = SessionFactory(unsafe=['ips_password'])
        self.__cookies = RequestsCookieJar()
        self.__with_cookie_jar('rb',
                               lambda f: self.__cookies.update(pickle.load(f)))
        # no error, since cookie file probably doesn't exist; we'll try to write it later and log any error then

        self.__get_today = get_today

    def login(self, username: str = None, password: str = None) -> bool:
        """
        Tries to log in using given credentials. They are stored for future use (e.g. automatic re-log).

        If no credentials are passed into the method, tries to use already stored credentials, if any.

        In some cases it is possible to determine that login failed due to invalid credentials. In such cases
        this method will avoid logging in until new credentials are passed into it.

        :returns true if login succeeded, false otherwise
        """
        with self.__prevent_multiple_login_lock:
            if not self.__has_credentials(username, password):
                return False

            form = {
                'ips_username': self.__username,
                'ips_password': self.__password,
                'auth_key': self.__api_key,
                'rememberMe': '1',
                'referer': MAIN_PAGE,
            }
            with self.__sessions.with_retry() as session:
                r = self.__call('login',
                                session.post,
                                LOGIN_API,
                                data=form,
                                cookies=None,
                                try_login=False)

            if not r:  # this indicates an error - repeated attempts may still succeed
                return False

            if not r.is_redirect or not r.headers.get('Location',
                                                      '') == MAIN_PAGE:
                LOG.error('Login failed. Please check your credentials.')
                self.__login_rejected = True
                return False

            self.__with_cookie_jar('wb',
                                   lambda f: pickle.dump(r.cookies, f),
                                   trying_to='update cookie jar')
            self.__cookies = r.cookies
            return True

    def ping(self) -> bool:
        """
        :returns true if a simple call to customsforge succeeded (including login), false otherwise
        """
        with self.__sessions.with_retry() as session:
            return self.__date_count(session=session) is not None

    def dates(self, since: date = None) -> Iterator[str]:
        """
        Generates all dates which had an updated CDLC, since the date given, in ascending order.
        If no date is given, starts at the beginning.

        The dates are returned in ISO format strings, intended to be used by other APIs.
        """
        with self.__sessions.with_retry() as session:
            yield from self.__dates(since, session)

    def cdlcs(self,
              since: date = None,
              since_exact: int = 0) -> Iterator[dict]:
        """
        Generates all CDLCs which are available in customsforge, since the date and/or exact time given.
        Exact time takes precedence over the date, unless it is 0 (or negative).
        If no date or exact time is given, starts at the beginning.

        CDLCs are returned in order of ascending last update time.
        CDLCs are returned as dicts containing information, such as artist, title, id, link, etc.
        Refer to To.cdlcs method for specifics.
        """
        since_exact = since_exact or 0
        since = self.__estimate_date(since_exact) if since_exact else since

        with self.__sessions.with_retry() as session:
            for d in self.__dates(since, session):
                lazy_cdlcs = self.__lazy_all(trying_to='find CDLCs',
                                             call=session.get,
                                             url=CDLC_BY_DATE_API,
                                             params={'filter': d},
                                             convert=To.cdlcs)

                yield from dropwhile(
                    lambda c: c['snapshot_timestamp'] < since_exact,
                    lazy_cdlcs)

    def direct_link(self, cdlc_id: Union[str, int]) -> str:
        """
        :returns link to the direct download of the CDLC with given id, if such exists, empty string otherwise
        """
        url = DOWNLOAD_API.format(cdlc_id)

        with self.__sessions.with_retry() as session:
            r = self.__call('get direct link', session.get, url)

        return r.headers.get('Location', '') if r and r.is_redirect else ''

    def calculate_date_skip(self, since: date, date_count: int) -> int:
        """
        :returns how many dates can be skipped to arrive closer to expected date; this is usually a generous estimate,
        but can become outdated; therefore, only estimate right before calling for dates
        """
        passed_since = self.__get_today() - since
        # we subtract one to include the date, one to account for time passing, one to avoid timezone shenanigans
        skip_estimate = date_count - passed_since.days - 3
        return max(0, skip_estimate)

    def __has_credentials(self, username: str, password: str) -> bool:
        if username and password:
            self.__username = username
            self.__password = password
            self.__login_rejected = False

        if self.__login_rejected:
            LOG.debug(
                'Login rejected. Please provide new credentials to try again.')
            return False

        if not self.__username and not self.__password:
            LOG.error('No credentials have been provided.')
            self.__login_rejected = True
            return False

        return True

    def __dates(self, since: date, session: Session) -> Iterator[str]:
        since = since or EONS_AGO

        lazy_dates = self.__lazy_all(trying_to='find dates for CDLC updates',
                                     call=session.get,
                                     url=DATES_API,
                                     convert=To.dates,
                                     skip=self.__estimate_date_skip(
                                         since, session))

        yield from dropwhile(lambda d: date.fromisoformat(d) < since,
                             lazy_dates)

    def __estimate_date_skip(self, since: date, session: Session) -> int:
        if since <= EONS_AGO:
            return 0

        date_count = self.__date_count(session)
        if not date_count:
            return 0

        return self.calculate_date_skip(since, date_count)

    def __estimate_date(self, epoch_seconds: int) -> date:
        # we subtract one day to account for timezone shenanigans
        return date.fromtimestamp(epoch_seconds) - timedelta(
            days=1) if epoch_seconds > 0 else EONS_AGO

    def __date_count(self, session: Session) -> Optional[int]:
        date_count = self.__lazy_all(trying_to='total count of dates',
                                     call=session.get,
                                     url=DATES_API,
                                     convert=To.date_count,
                                     batch=1)
        return next(date_count, None)

    def __call(self,
               trying_to: str,
               call: Callable[..., Response],
               url: str,
               try_login: bool = True,
               **kwargs) -> Optional[Response]:
        kwargs.setdefault('cookies', self.__cookies)

        try:
            r = call(url=url,
                     timeout=self.__timeout,
                     allow_redirects=False,
                     **kwargs)
        except Exception as e:
            return debug_ex(e, trying_to, LOG)

        if not try_login or not r.is_redirect or not r.headers.get(
                'Location', '') == LOGIN_PAGE:
            return r

        if not self.login():
            LOG.debug('Cannot %s: automatic login to customsforge failed.',
                      trying_to)
            return None

        kwargs.pop('cookies', None)
        return self.__call(trying_to, call, url, try_login=False, **kwargs)

    def __lazy_all(self,
                   convert: Callable[[Any], Iterator[T]],
                   skip: int = 0,
                   batch: int = None,
                   **call_params) -> Iterator[T]:
        batch = batch if Verify.batch_size(batch) else self.__batch_size

        while True:
            params = call_params.setdefault('params', {})
            params['skip'] = skip
            params['take'] = batch

            r = self.__call(**call_params)
            if not r or not r.text:
                break

            try:
                it = convert(r.json())
                first = next(it, NON_EXISTENT)
                if first is NON_EXISTENT:
                    break

                yield first
                yield from it
            except Exception as e:
                trying_to = call_params['trying_to']
                return debug_ex(e, f'parse response of <{trying_to}> as JSON',
                                LOG)

            skip += batch

    def __with_cookie_jar(self,
                          options: str,
                          on_file: Callable[[IO], T] = identity,
                          trying_to: str = None) -> T:
        if self.__cookie_jar_file:
            try:
                f = open(self.__cookie_jar_file, options)
            except Exception as e:
                if trying_to:
                    debug_ex(e, trying_to, LOG)
            else:
                with f:
                    return on_file(f)
Example #15
0
def main():
    print('Start time: %s' % time.ctime())
    try:
        mysql_conn = MySQLdb.connect(
            host=config.mysql_host,
            port=config.mysql_port,
            user=config.mysql_user,
            passwd=config.mysql_passwd,
            db=config.mysql_db,
            charset=config.mysql_charset
        )
        mysql_conn.autocommit(True)
        mysql_cursor = mysql_conn.cursor(MySQLdb.cursors.DictCursor)
    except Exception as e:
        error_msg = 'Failed to connect to MySQL: {error_msg}'.format(error_msg=traceback.format_exc())
        logging.error(error_msg)

    first_visit_status = login_status = logout_status = 0

    session = requests.Session()
    try:
        resp = session.get(url = '', verify=True, timeout=5)
        first_visit_status = 1
    except Exception as e:
        print('First visit failed : %s' % e)

    post_data = {}
    html = PyQuery(resp.text)
    input_list = html('')('')
    for item in input_list:
        if item.type == '':
            continue
        post_data[item.name] = item.value
        post_data['username'] = ad_username
        post_data['password'] = ad_password
    
    try:
        resp = session.post(url = '', verify=False, timeout=5, data = post_data)
        if resp.status_code == 302 or resp.status_code == 307 or resp.status_code == 200 :
            login_status = 1
        else :
            print('Login failed ')
    except Exception as e:
        print('Login failed : %s' % e)

    
    if first_visit_status == 1 and login_status == 1:
        cookie_jar = RequestsCookieJar()
        cookie_jar.update(resp.cookies)

        mysql_cursor.execute('SELECT * FROM domain WHERE status=1')
        domains = mysql_cursor.fetchall()
        mysql_conn.close()
        gevent_pool = Pool(POOL_SIZE)
        gevent_pool.map(test_domain, [(d, cookie_jar)for d in domains])
        gevent_pool.join()
                

        logouturl = ''
        try:
            resp = session.get(url=logouturl, verify=True, timeout=5)
            if resp.status_code == 302 or resp.status_code == 307 or resp.status_code == 200 :
                logout_status = 1
            else :
                print('Logout failed ')
        except Exception as e:
            result += u'\tLogout ERROR {e}\n'.format(e=e)
     
    if logout_status == 1:
        results.sort(key=lambda x: (x['domain']))
        for result in results:
            print('%s' % result['domain'])
            print(result['result'].rstrip('\n'))
            print('')

        print('%s domain redirect to APSSO' % redirect2apsso_cnt)
        print('%s domain redirect to SSO' % redirect2sso_cnt)
        print('%s domain return 5xx' % len(error_5xx_domain))
        print(json.dumps(error_5xx_domain))
        print('%s domain return 4xx' % len(error_4xx_domain))
        print(json.dumps(error_4xx_domain))

        print('Finish time: %s' % time.ctime())
        print('-' * 20)
        print(' ')
Example #16
0
class Account:

    headers = c.headers
    data = None
    proxies = c.proxies

    def __init__(self, email, password=None, cookie=None):
        self.email = email
        self.cookies = RequestsCookieJar()
        if password is None:
            temp_cookie = SimpleCookie()
            temp_cookie.load(cookie)
            for key, morsel in temp_cookie.items():
                self.cookies[key] = morsel.value
            self.cookie = True
        else:
            self.password = password
            self.cookie = False

    def login(self, mobile=False, useProxy=False):
        self.headers = c.headers
        if not self.cookie:
            postURL = self.preLogin(useProxy=useProxy)
            res = self.post(postURL, data=self.data, useProxy=useProxy)
            # Parse HTML Form
            form = BeautifulSoup(res.text,
                                 "html.parser").findAll("form")[0]  # Get Form
            params = dict()
            for field in form:
                # Add each field to params
                params[field["name"]] = field["value"]
            self.headers["Host"] = c.host  # Set Host to Bing Server
            self.cookies.clear()
            res = self.post(form.get("action"), data=params, useProxy=useProxy)
        if mobile:
            self.headers = c.mobileHeaders

    def preLogin(self, useProxy=False):
        res = self.get(c.hostURL, useProxy=useProxy)
        # Get Login URL
        index = res.text.index("WindowsLiveId")  # Find URL
        cutText = res.text[index + 16:]  # Cut Text at Start of URL
        loginURL = cutText[:cutText.index("\"")]  # Cut at End of URL
        # Unescape URL
        loginURL = bytes(loginURL, encoding="UTF-8").decode("unicode_escape")
        # Get Login Cookies
        self.headers["Host"] = c.loginHost  # Set Host to Login Server
        res = self.get(loginURL, useProxy=useProxy)
        self.data = self.getAuthData()
        self.cookies["CkTst"] = "G" + \
            str(int(time.time() * 1000))  # Add Time Cookie
        # Get Post URL
        index = res.text.index(c.loginPostURL)  # Find URL
        cutText = res.text[index:]  # Cut Text at Start of URL
        postURL = cutText[:cutText.index("\'")]  # Cut at End of URL
        # Get PPFT
        index = res.text.index("sFTTag")  # Find PPFT
        cutText = res.text[index:]  # Cut Text Near PPFT
        PPFT = cutText[cutText.index("value=") +
                       7:cutText.index("\"/>")]  # Cut PPFT
        self.data["PPFT"] = PPFT
        # Get PPSX
        index = res.text.index(",bH:\'")  # Find PPSX
        cutText = res.text[index + 4:]  # Cut Text at Start of PPSX
        PPSX = cutText[:cutText.index("\'")]  # Cut at End of PPSX
        self.data["PPSX"] = PPSX
        # Finish Up
        self.cookies["wlidperf"] = "FR=L&ST=" + \
            str(int(time.time() * 1000))  # Add Another Time Cookie
        return postURL

    def logout(self):
        if not self.cookie:
            self.cookies.clear()

    def getAuthData(self):
        return {
            "login": self.email,
            "loginfmt": self.email,
            "passwd": self.password,
            "i13": "0",
            "type": "11",
            "LoginOptions": "3",
            "lrt": "",
            "ps": "2",
            "psRNGCDefaultType": "",
            "psRNGCEntropy": "",
            "psRNGCSLK": "",
            "canary": "",
            "ctx": "",
            "NewUser": "******",
            "FoundMSAs": "",
            "fspost": "0",
            "i21": "0",
            "i2": "1",
            "i17": "0",
            "i18":
            "__ConvergedLoginPaginatedStrings%7C1%2C__ConvergedLogin_PCore%7C1%2C",
            "i19": "2" + str(randint(0, 5000))
        }

    def request(self,
                method,
                URL,
                headers=USE_SELF,
                cookies=USE_SELF,
                params=None,
                data=None,
                proxies=USE_SELF,
                useProxy=False,
                setReferer=True,
                setCookies=True):
        headers = self.headers if headers is USE_SELF else headers
        cookies = self.cookies if cookies is USE_SELF else cookies
        proxies = self.proxies if proxies is USE_SELF else proxies
        res = requests.request(method,
                               URL,
                               headers=headers,
                               cookies=cookies,
                               params=params,
                               data=data,
                               proxies=proxies if useProxy else None)
        if setReferer:
            self.headers["Referer"] = URL
        if setCookies:
            self.cookies.update(res.cookies)
        return res

    def get(self,
            URL,
            headers=USE_SELF,
            cookies=USE_SELF,
            params=None,
            data=None,
            proxies=USE_SELF,
            useProxy=False,
            setReferer=True,
            setCookies=True):
        return self.request('GET', URL, headers, cookies, params, data,
                            proxies, useProxy, setReferer, setCookies)

    def post(self,
             URL,
             headers=USE_SELF,
             cookies=USE_SELF,
             params=None,
             data=None,
             proxies=USE_SELF,
             useProxy=False,
             setReferer=True,
             setCookies=True):
        return self.request('POST', URL, headers, cookies, params, data,
                            proxies, useProxy, setReferer, setCookies)
Example #17
0
class weibo_login(object):
    def __init__(self):
        self.cookies = RequestsCookieJar()
        self.pre_login_info = None
        self.sp = None
        self.username = 13760398874
        self.pwd = 'Yangfei123@'
        self.rsa2_password = None

        self.session = requests.session()
        self.session.headers = {
            'Accept':
            '*/*',
            'Accept-Encoding':
            'gzip, deflate, br',
            'Accept-Language':
            'zh-CN,zh;q=0.9,en;q=0.8',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',
        }
        self.prelt = 0

    def gen_rsa_sp(self):
        rsapubkey = int(self.pre_login_info.get('pubkey'), 16)
        key = rsa.PublicKey(rsapubkey, 65537)
        message = '{}\t{}\n{}'.format(self.pre_login_info.get('servertime'),
                                      self.pre_login_info.get('nonce'),
                                      self.pwd).encode()
        password = rsa.encrypt(message, key)
        self.rsa2_password = binascii.b2a_hex(password)
        return self.rsa2_password

    def login(self):
        url1 = 'https://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.19)'
        # url1 = 'https://login.sina.com.cn/signup/signin.php'
        data = {
            'su': b64encode(b'13760398874'),
            'entry': 'weibo',
            'geteway': 1,
            'from': None,
            'savestate': 7,
            'qrcode_flag': False,
            'useticket': 1,
            'pagerefer':
            'https://login.sina.com.cn/crossdomain2.php?action=login&r=https%3A%2F%2Fpassport.weibo.com%2Fwbsso%2Flogout%3Fr%3Dhttps%253A%252F%252Fweibo.com%26returntype%3D1',
            'vsnf': 1,
            'service': 'miniblog',
            'servertime': time.time(),
            'nonce': self.pre_login_info.get('nonce'),
            'pwencode': 'rsa2',
            'rsakv': self.pre_login_info.get('rsakv'),
            'sp': self.gen_rsa_sp(),
            'sr': '1920*1080',
            'encoding': 'UTF-8',
            'prelt': self.prelt,
            'url':
            'https://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack',
            'returntype': 'META'
        }
        r1 = self.session.post(url1, data=data, timeout=10)
        r1.raise_for_status()
        self.cookies.update(r1.cookies)
        try:
            url2 = re.search(r'location.replace\("(.*?)"\);',
                             r1.content.decode('gbk'), re.S).group(1)
            r2 = self.session.get(url=url2, timeout=10)
            r2.raise_for_status()
            self.cookies.update(r2.cookies)
            url3 = re.search(r'location.replace\(\'(.*?)\'\);',
                             r2.content.decode('gbk'), re.S).group(1)
            r2 = self.session.get(url=url3, timeout=10)
            r2.raise_for_status()
            self.cookies = self.session.cookies
        except Exception as e:
            logging.exception(e)

    def pre_login(self):
        try:
            pre_login_time = int(1000 * time.time())
            r = self.session.get(
                'https://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=&rsakt=mod&client=ssologin.js(v1.4.19)&_=1567405550547'
            )
            r.raise_for_status()
            d = json.loads(
                r.text.lstrip('sinaSSOController.preloginCallBack(').rstrip(
                    ')'))
            self.pre_login_info = d
            self.prelt = int(
                1000 * time.time()) - pre_login_time - d.get('exectime')
        except Exception as e:
            logging.exception(e)

    def logout(self):
        try:
            r = self.session.get(
                'https://login.sina.com.cn/sso/logout.php?entry=miniblog&r=https%3A%2F%2Fweibo.com',
                timeout=10)
            r.raise_for_status()
            self.cookies.update(r.cookies)
        except Exception as e:
            logging.exception(e)

    def init_cookie(self):
        try:
            r = self.session.get('https://weibo.com')
            r.raise_for_status()
        except Exception as e:
            logging.exception(e)

    def run(self):
        self.init_cookie()
        self.pre_login()
        self.login()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.logout()
Example #18
0
class RedisCookieManager:
    """
    创建redis的cookie管理类:为请求添加cookie,并进行过期维护
    """
    URL = "http://kns.cnki.net/kns/brief/default_result.aspx"
    INTERVAL = 1500  # 25分钟

    def __init__(self, url=None, num=10):
        """
        初始化过期时间
        :param pool:
        """
        self.URL = url  # 初始化cookie请求的url
        self.cookies = RequestsCookieJar()
        self.user_agent = UserAgent().chrome
        self.headers = {
            "Host": "kns.cnki.net",
            "Connection": "keep-alive",
            "Cache-Control": "max-age=0",
            "Upgrade-Insecure-Requests": "1",
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
            "Accept-Encoding": "gzip,deflate",
            "Accept-Language": "zh-CN,zh;q=0.9",
            "User-Agent": self.user_agent
        }
        self.proxymanager = ProxyManager(pool=False)
        self.timeout = 20
        self.redis_host = 'localhost'
        self.redis_port = 6379
        self.num = num  # redis中存储的cookie数量, 默认10个

    @logger.log_decoratore
    def set_cookie(self):
        """
        cookie获取,记录cookies以及获取时间
        :return:
        """
        retry_time = 5  # 重复请求次数
        # IP, PORT = self.proxymanager.get_proxy()
        # self.proxy = {
        #     'https': 'http://{}:{}'.format(IP, PORT)
        # }
        while True:
            try:
                resp = requests.get(
                    self.URL,
                    headers=self.headers,
                    # proxies=self.proxy,
                    cookies=self.cookies,
                    timeout=self.timeout)
                if resp.status_code == 200:
                    self.cookies.update(resp.cookies)
                    self.EXPIRES_TIME = int(time.time())  # 过期时间
                    print('resp.cookies>>', resp.cookies)
                    print('self.cookies', self.cookies)
                return self.cookies, self.EXPIRES_TIME
            except Exception as e:
                retry_time -= 1
                if retry_time <= 0:
                    resp = Response()
                    cookie = resp.cookies
                    print('cookie>> {}...!'.format(cookie))
                    return cookie
                time.sleep(0.1)

    @logger.log_decoratore
    def get_cookie(self):
        """
        从Redis的mala_cookies列表中最左侧获取一个cookie判断过期时间,没过期返回
        并都对cookie进行了过期维护
        :return: 返回获取cookie
        """
        try:
            while True:
                r = redis.Redis(host=self.redis_host, port=self.redis_port)
                res = r.blpop('cnki:cookies')
                if not res:
                    self.create_redis()
                    continue
                cookie_expires = res[1]
                now_time = int(time.time())
                if cookie_expires + self.INTERVAL < now_time:  # 过期从新获取
                    continue
                else:
                    r.rpush('cnki:cookies', res)
                    return res[0]
        except Exception as e:  # redis为空时,添加cookie
            raise e

    def create_redis(self):
        """
        往redis插入cookie
        :return:
        """
        pool = threadpool.ThreadPool(32)
        req = threadpool.makeRequests(self._add_cookie, list(range(self.num)))
        [pool.putRequest(i) for i in req]
        pool.wait()

    @logger.log_decoratore
    def _add_cookie(self, i):
        """
        把cookie存到redis
        :return:
        """
        try:
            r = redis.Redis(host=self.redis_host, port=self.redis_port, db=0)
            res = self.set_cookie()
            if res:
                print(type(res), res.get_dict())
                r.rpush('mala_cookies', res.get_dict())
        except Exception as e:
            pass
Example #19
0
            # 超出最大重试次数,把最后一个异常(肯定是重试异常或者空白页面异常)向上爆
            raise err

        self.logger.debug('[%s]<< %s' % (method.upper(), url))

        merged_cookies = RequestsCookieJar()

        if not isinstance(kwargs['cookies'], cookielib.CookieJar):
            kwargs['cookies'] = cookiejar_from_dict(
                kwargs['cookies'])

        # 先更新旧的cookies
        response.cookies.update(kwargs['cookies'])

        # 再更新新的cookies,顺序不能乱
        merged_cookies.update(response.cookies)

        response.cookies = merged_cookies
        return response

    def switch_proxy(self, old_proxy=None):
        # 加锁,一个爬虫只能有一个协程在切换代理
        self.logger.debug('Try to switch proxy from %s.', old_proxy)
        with self._proxy_lock:

            # 不是你叫我切换代理我就会帮你切的,除非是你现在在用的代理跟现在设置的一样,
            # 否则有可能是其他线程已经切换过代理

            if old_proxy and old_proxy not in self._crawler.proxies.values():
                return