def test_is_logged_in(self, requests_mock): requests_mock.get(TEST_URL + '/Account/Login', text=(HTML / 'login.html').read_text()) requests_mock.post( TEST_URL + '/Account/Login', cookies={'.ASPXAUTH': 'XXX'}, text=(HTML / 'home.html').read_text(), ) requests_mock.get( TEST_URL + '/Account/LogOff', cookies={'.ASPXAUTH': None}, text=(HTML / 'login.html').read_text(), ) browser = StatefulBrowser() session = Session(TEST_URL, browser) assert not session.is_logged_in session.log_in('joe.bloggs', 'abc123') # The ``requests-mock`` library currently doesn't mock cookies in sessions properly. # In the meantime, mock the cookie by directly setting it on the ``browser`` object. # https://github.com/jamielennox/requests-mock/issues/17 browser.get_cookiejar().set_cookie( create_cookie(name='.ASPXAUTH', value='XXX')) assert session.is_logged_in session.log_out() # As above. browser.get_cookiejar().set_cookie( create_cookie(name='.ASPXAUTH', value=None)) assert not session.is_logged_in
def site_cookies(): jar = cookies.RequestsCookieJar() jar.set_cookie( cookies.create_cookie( name='__cfduid', value='d38637fd5fcdd51ba17c23f4f34fd12be1566670847', )) jar.set_cookie( cookies.create_cookie( name='PHPSESSID', value='f3240botpio78qaqjqa75os9q7', )) return jar
def generate_cookiejar(cookies): jar = RequestsCookieJar() for domain, content in cookies.items(): for line in content.split("\n"): name, value = None, None kwargs = {} for i, record in enumerate(line.split(";")): columns = record.split("=") if len(columns) == 2: key = columns[0].strip() value_ = columns[1].strip() if i == 0: name = key value = value_ continue if key == "expires": try: dt = datetime.strptime(value_, "%A, %d-%b-%Y %H:%M:%S GMT").replace(tzinfo=pytz.UTC) expire = unix_time_seconds(dt) except ValueError: try: dt = datetime.strptime(value_, "%a, %d-%b-%Y %H:%M:%S GMT").replace(tzinfo=pytz.UTC) expire = unix_time_seconds(dt) except ValueError: logger.warniing("Can't parse datetime [{}]".format(value_)) expire = unix_time_seconds() + 100 kwargs[key] = expire continue kwargs[key] = value_ continue if len(columns) == 1: value_ = columns[0].strip() kwargs["secure"] = value_ == "secure" jar.set_cookie(create_cookie(name, value, **kwargs)) return jar
def login(self): session = self.http_settings.session proxy = self.http_settings.proxy go_to_xhamster = session.get('http://www.xhamster.com', proxies=proxy) timestamp = generate_timestamp() get_stats = generate_stats() stats = "{res1}:{res2}".format(res1=get_stats[0], res2=get_stats[1]) xsid = create_cookie('xsid', stats) session.cookies._cookies['.xhamster.com']['/']['xsid'] = xsid url="http://xhamster.com/ajax/login.php?act=login&ref=" \ "http%3A%2F%2Fxhamster.com%2F&stats={stats}&username={username}" \ "&password={password}&remember={remember}&_={timestamp}".format(stats=stats, username=self.username, password=self.password, remember="on" if self.remember_me else "off", timestamp=timestamp) session.headers.update({ "X-Requested-With": "XMLHttpRequest", "Accept": "Accept:text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01", "Referer": "http://xhamster.com/login.php" }) attempt_login = session.get(url, proxies=proxy) if 'login.stats' in attempt_login.content: return True else: raise AccountProblem('Unknown problem while login into xhamster')
def login(self, credentials): """ Log in to THM :param credentials: Login and password OR a session cookie :return: null """ if 'session' not in credentials: csrf_token = fetch_pattern(self.session, '/login', 'csrf-input') data = { 'email': credentials['username'], 'password': credentials['password'], '_csrf': csrf_token } http_post(self.session, '/login', data, res_format='') elif 'session' in credentials: cookie = create_cookie('connect.sid', credentials['session'], domain='tryhackme.com') self.session.cookies.set_cookie(cookie) try: test_request = http_get(self.session, '/message/get-unseen') if test_request['success']: self.authenticated = True return else: raise Exception(f'Failed to authenticate') except Exception as e: raise e
def _onCookieAdded(self, cookie): cookie_str = str(cookie.toRawForm(QNetworkCookie.NameAndValueOnly), encoding='utf-8') cookie_name = str(cookie.name(), encoding='utf-8') cookie_val = str(cookie.value(), encoding='utf-8') if (cookie_name == self.authn_cookie_name) and (cookie.domain() == self.config.get("host")): logging.trace("%s cookie added:\n\n%s\n\n" % (self.authn_cookie_name, cookie_str)) self.credential["cookie"] = "%s=%s" % (self.authn_cookie_name, cookie_val) host = self.auth_url.host() cred_entry = dict() cred_entry[host] = self.credential if self.credential_file: creds = read_credential(self.credential_file, create_default=True) creds.update(cred_entry) write_credential(self.credential_file, creds) self.token = cookie_val self._session.cookies.set(self.authn_cookie_name, cookie_val, domain=host, path='/') if self.cookie_jar is not None: self.cookie_jar.set_cookie( create_cookie(self.authn_cookie_name, cookie_val, domain=host, path='/', expires=0, discard=False, secure=True)) for path in self.config.get("cookie_jars", DEFAULT_CONFIG["cookie_jars"]): path_dir = os.path.dirname(path) if os.path.isdir(path_dir): logging.debug("Saving cookie jar to: %s" % path) self.cookie_jar.save(path, ignore_discard=True, ignore_expires=True) else: logging.debug("Cookie jar save path [%s] does not exist." % path_dir)
def cookies(self) -> RequestsCookieJar: jar = RequestsCookieJar() for name, cookie_dict in self['cookies'].items(): jar.set_cookie( create_cookie(name, cookie_dict.pop('value'), **cookie_dict)) jar.clear_expired_cookies() return jar
def cookies(self): jar = RequestsCookieJar() for name, cookie_dict in self['cookies'].items(): jar.set_cookie(create_cookie( name, cookie_dict.pop('value'), **cookie_dict)) jar.clear_expired_cookies() return jar
def login(self): csrf_token = uuid.uuid4().hex self.session.headers.update({'X-CSRFToken': csrf_token}) cookie = create_cookie('csrftoken', csrf_token, domain=self.domain) self.session.cookies.set_cookie(cookie) response = self.make_request('/accounts/auth/api/login', 'post', data=dict(email=self.email, password=self.password)) return response
def get_content(self, url: str, parameters: dict, session: Session) -> PyRSSWContent: cookie_obj = cookies.create_cookie(domain="reddit.com", name="over18", value="1") session.cookies.set_cookie(cookie_obj) return self.get_reddit_content(url, session, True)
def http_request(method, url, params={}, data=None, headers={}, cookies=None, timeout=30, ignore_errors=True): # Urlencode does not accept unicode, so convert to str first url = url.encode('utf-8') if isinstance(url, unicode) else url for k, v in params.items(): params[k] = v.encode('utf-8') if isinstance(v, unicode) else v for k, v in headers.items(): headers[k] = v.encode('utf-8') if isinstance(v, unicode) else v # Add any additional params to the url url_parts = list(urlparse.urlparse(url)) query = dict(urlparse.parse_qsl(url_parts[4])) query.update(params) url_parts[4] = urllib.urlencode(query, doseq=True) url = urlparse.urlunparse(url_parts) # Handle cookies if isinstance(cookies, cookielib.CookieJar): cookiejar = cookies else: cookiejar = cookielib.CookieJar() for name, value in (cookies or {}).iteritems(): cookiejar.set_cookie(create_cookie(name=name, value=value)) # Urlencode the data, if needed if isinstance(data, dict): data = urllib.urlencode(data) headers['Content-Type'] = 'application/x-www-form-urlencoded' agent = Agent(reactor, connectTimeout=timeout) cookie_agent = CookieAgent(agent, cookiejar) body = FileBodyProducer(StringIO(data)) if data else None d = cookie_agent.request(method, url, Headers({k: [v] for k, v in headers.iteritems()}), body) def handle_response(response, cookiejar): if 'audio/mpeg' in response.headers.getRawHeaders('content-type')[-1]: # Don't download any multimedia files raise Exception('reponse contains a multimedia file') d = defer.Deferred() response.deliverBody(BodyReceiver(response.code, dict(response.headers.getAllRawHeaders()), cookiejar, d)) return d def handle_error(error): if isinstance(error, _WrapperException): reason = ', '.join(error.reasons) else: reason = error.getErrorMessage() logger = logging.getLogger(__name__) logger.error('Failed to GET %s (reason: %s)', url, reason) return Response(0, {}, cookielib.CookieJar(), '') d.addCallback(handle_response, cookiejar) if ignore_errors: d.addErrback(handle_error) return d
def ListToSession(session, cookie_list): ''' :param session: :param cookie_list: :return: ''' for cookie in cookie_list: session.cookies.set_cookie(create_cookie(**cookie)) return session
def read_cookies(self, filename='cookies.txt'): cookiesjar = RequestsCookieJar() with open(filename, 'r') as f: for c in f: _c = c.strip().split('=') cookiesjar.set_cookie(create_cookie(_c[0], _c[1], path=_c[2])) return cookiesjar
def test_save_logs(self, requests_mock, tmp_path: Path): requests_mock.get(TEST_URL + '/Account/Login', text=(HTML / 'login.html').read_text()) requests_mock.post( TEST_URL + '/Account/Login', cookies={'.ASPXAUTH': 'XXX'}, text=(HTML / 'home.html').read_text(), ) requests_mock.get( TEST_URL + '/Account/LogOff', cookies={'.ASPXAUTH': None}, text=(HTML / 'login.html').read_text(), ) browser = StatefulBrowser() session = Session(TEST_URL, browser) session.log_in('joe.bloggs', 'abc123') # The ``requests-mock`` library currently doesn't mock cookies in sessions properly. # In the meantime, mock the cookie by directly setting it on the ``browser`` object. # https://github.com/jamielennox/requests-mock/issues/17 browser.get_cookiejar().set_cookie( create_cookie(name='.ASPXAUTH', value='XXX')) session.log_out() # As above. browser.get_cookiejar().set_cookie( create_cookie(name='.ASPXAUTH', value=None)) session.save_logs(str(tmp_path)) files = list(sorted(tmp_path.iterdir())) assert len(files) == 3 assert files[0].name.endswith('Z-login-0.txt') assert files[1].name.endswith('Z-home-0.txt') assert files[2].name.endswith('Z-logout-0.txt') with files[0].open() as log: assert isinstance(datetime.fromisoformat(next(log).strip()), datetime) assert next(log).startswith('GET ' + TEST_URL) assert next(log).startswith('200 None') assert next(log) == '\n' assert files[0].read_text().endswith((HTML / 'login.html').read_text())
def init_cookie_jar(cookie_file=None, cookie_t=None, cookie_y=None, cookie_euconsent=None): cookie_jar = LWPCookieJar( cookie_file) if cookie_file else RequestsCookieJar() if cookie_file and os.path.exists(cookie_file): cookie_jar.load(ignore_discard=True) if args.cookie_t: cookie_jar.set_cookie(create_cookie('T', cookie_t)) if cookie_y: cookie_jar.set_cookie(create_cookie('Y', cookie_y)) if cookie_euconsent: cookie_jar.set_cookie(create_cookie('EuConsent', cookie_euconsent)) if cookie_file: cookie_jar.save(ignore_discard=True) return cookie_jar
def set_session(self, session: Session): """ Setup the session through the use of a cookie Cookie is created with a unique session id :param session: :return: """ session_id = self.ip_addr + '-' + str(random() * random() * 123456789) cookie = cookies.create_cookie('id', session_id) session.cookies.set_cookie(cookie) self.session = session
def create_cookie_from_dict(d: dict) -> Cookie: """ Turn cookie dict into arguments for requests.cookies.create_cookie, and returns the calling. """ kwargs = dict(d) args = [kwargs.pop('name'), kwargs.pop('value')] httponly = kwargs.pop('httponly') if httponly: kwargs['rest']['HttpOnly'] = None return create_cookie(*args, **kwargs)
def load_json_to_session(self, json_cookies, session=Session()): _cookies = session.cookies._cookies for domain in json_cookies: _cookies[domain] = _cookies.get(domain, {}) for path in json_cookies[domain]: _cookies[domain][path] = _cookies[domain].get(path, {}) for cookie_name in json_cookies[domain][path]: cookie_value = json_cookies[domain][path][cookie_name] _cookies[domain][path][cookie_name] = create_cookie( name=cookie_name, value=cookie_value, path=path) return session
def __init__(self, product_urls: list, profile: Profile): self._skus = self._get_skus_from_url(product_urls) self._checkout_profile = profile self._http_session = Session() self._http_session.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36" self._http_session.headers['X-CLIENT-ID'] = "browse" # fetch cookies using selenium and apply to HTTP Session # BestBuy has some sort of cookie validation endpoint that is embedded in a JS file self._driver = webdriver.Chrome(ChromeDriverManager().install()) self._driver.get(self._base_url) for cookie in self._driver.get_cookies(): a_cookie = cookies.create_cookie(domain=cookie["domain"], name=cookie["name"], value=cookie["value"]) self._http_session.cookies.set_cookie(a_cookie) self._driver.close()
def _trans_cookies_from_dict_to_cookiejar(cls, source_dicts): target_cookie_jar = cookielib.CookieJar() for cookie_dict in source_dicts: # extract name and value. name = cookie_dict.pop('name', None) value = cookie_dict.pop('value', None) # change key name. expiry_value = cookie_dict.pop('expiry', None) if expiry_value: cookie_dict['expires'] = expiry_value cookie = create_cookie(name, value, **cookie_dict) target_cookie_jar.set_cookie(cookie) return target_cookie_jar
def _init_logined_session(self): self._session.injectors = [ BeforeLoginInjector(), AddAccessKeyInjector(self._token) ] cookies = [ patch_cookies(c, domain) for domain in self._token.cookies_info['domains'] for c in self._token.cookies_info['cookies'] ] self._web_session = Session() for c in cookies: self._web_session.cookies.set_cookie(create_cookie(**c))
def assert_verify_login(user): """ Use this function to make sure a test user is logged in on the server. Tests whether a user that authenticated with SAML can use the verify endpoint. :param user: a user object loaded from a csv file :return: (text, session) the text of the get verify response as an associative array and the authenticated session for subsequent requests """ post_response = auth_requests.post_verify(user['wwuid'], user['full_name'], user['email']) assert_verify_response(post_response, user) auth_token = json.loads(post_response.text)['token'] auth_cookie = cookies.create_cookie('token', auth_token) session = Session() session.cookies.set_cookie(auth_cookie) get_response = auth_requests.get_verify(session) assert_verify_response(get_response, user) return json.loads(get_response.text), session
def cookiejar_from_dict(cookie_dict, cookiejar=None, overwrite=True, cookies_specified_kw=None): """ 以下代码引用自requests库 具体参数说明参考:requests.cookies.cookiejar_from_dict。 """ if not cookies_specified_kw: cookies_specified_kw = {} if cookiejar is None: cookiejar = RequestsCookieJar() if cookie_dict is not None: names_from_jar = [cookie.name for cookie in cookiejar] for name in cookie_dict: if overwrite or (name not in names_from_jar): # 添加参数 cookies_specified_kw cookiejar.set_cookie(create_cookie(name, cookie_dict[name], **cookies_specified_kw)) return cookiejar
def update_cookies(self): domain = urlparse(self.driver.current_url).netloc cookie_names = self.config.get("cookies", ()) for cookie_name in cookie_names: cookie = self.driver.get_cookie(cookie_name) if not cookie: continue cookie_domain = cookie.get("domain") cookie_name = cookie.get("name") cookie_value = cookie.get("value") if domain.endswith(cookie_domain): cookie_object = create_cookie(cookie_name, cookie_value) self.cookies.set_cookie(cookie_object)
def build_requests_session(driver: WebDriver): import requests from requests.cookies import create_cookie s = requests.session() s.headers['User-Agent'] = default_user_agent for cookie in driver.get_cookies(): c = create_cookie( **{ 'name': cookie['name'], 'value': cookie['value'], 'domain': cookie['domain'], 'path': cookie['path'], 'expires': cookie['expiry'], 'secure': cookie['secure'], 'rest': { 'HttpOnly': cookie['httpOnly'], }, }) s.cookies.set_cookie(c) return s
def login(self, username, password): # Set right cookies self.session.get(BASE_URL) self.session.cookies.set_cookie( cookies.create_cookie(domain='fd.nl', name='cookieconsent', value='true')) login_page = self.session.get(LOGIN_URL) login_doc = lxml.html.fromstring(login_page.content) login_form = login_doc.cssselect(".modal-content form")[0] login_post_url = parse.urljoin(LOGIN_URL, login_form.get("action")) # Login post_data = parse_form(login_form) post_data.update({"username": username, "password": password}) response = self.session.post(login_post_url, post_data, verify=False, allow_redirects=True) # Check if logging in worked :) return response.url == BASE_URL
def add_data(self, session): from requests.cookies import create_cookie session.cookies.set_cookie( create_cookie('COLUMNLIST', self.COLUMNS_COOKIE) )
def __establish(self) -> None: try: self.cookies.clear() session = super(Session, self) logger.debug( 'попытка чистой авторизации (без сохраненных куки)...') resp = session.get('https://stats.mos.ru/eds.gif', headers=self.__get_header(), params={ "eventType": "home_page", "eventDst": "stats", "eventSrc": "mos.ru", "eventObject": { "Главная": { "Поиск": "view" } }, "eventTime": datetime.now().microsecond, "mosId": None }) resp = session.get( 'https://www.mos.ru/api/acs/v1/login?back_url=https%3A%2F%2Fwww.mos.ru%2F', headers=self.__get_header()) js = re.search( r'<script charset=\"utf-8\" src=\"(.+?)\"><\/script>', str(resp.content)).group(1) logger.debug(f'получили код {js}') resp = session.get(f'https://login.mos.ru{js}', headers=self.__get_header()) js = re.search(r'COORDS:\"/(.+?)\"', str(resp.content)).group(1) logger.debug(f'получили COORDS {js}') #--> resp = session.get( 'https://login.mos.ru/sps/login/methods/password', headers=self.__get_header({ "referer": "https://login.mos.ru/sps/login/methods/password", "origin": "https://login.mos.ru", "content-Type": "application/x-www-form-urlencoded;charset=UTF-8", "host": "login.mos.ru", "accept": "text/css,*/*;q=0.1", })) csrftokenw = re.search( r"meta name='csrf-token-value' content='(.+?)'\/>", resp.text).group(1) cid = re.search(r'session_promise|(\d+)|find', resp.text).group(1) resp = session.get( f'https://mstat.gosuslugi.ru/oxwdsq?cid={cid}', headers=self.__get_header({ "referer": "https://login.mos.ru/sps/login/methods/password", "origin": "https://login.mos.ru", "sec-ch-ua": "\"Google Chrome\";v=\"87\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"87\"", "sec-ch-ua-mobile": "?0", "Referer": "https://login.mos.ru/", "accept": "text/css,*/*;q=0.1", })) id1, id2, id3 = re.search( r'window\.kfp\.jsonp_oxwdsq\(\{ "id":"(.+?)", "e":"(\d+?)", "t":"(\d+?)"', resp.text).groups() cookie = cookies.create_cookie(domain='.mos.ru', name='oyystart', value=f'{id1}_1') self.cookies.set_cookie(cookie) cookie = cookies.create_cookie(domain='.mos.ru', name='oxxfgh', value=f'{id1}#2#{id2}#{id3}#600000') self.cookies.set_cookie(cookie) #<--- session.post(f'https://login.mos.ru/{js}', headers=self.__get_header()) logger.debug(f'переходим к аутентификации') resp = session.post( url="https://login.mos.ru/sps/login/methods/password", headers=self.__get_header({ "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", "referer": "https://login.mos.ru/sps/login/methods/password", "origin": "https://login.mos.ru", "content-Type": "application/x-www-form-urlencoded;charset=UTF-8", "host": "login.mos.ru", "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", }), data={ "isDelayed": "false", "login": self.login, "password": self.password, "csrftokenw": csrftokenw, "alien": "false" }, allow_redirects=False) if resp.headers.get('location') == None: raise SessionException( 'аутентификация провалилась, так как УЗ заблокирована') logger.debug( f"переходим на адрес, который вернул запрос аутентификации {resp.headers.get('location')}" ) session.get(resp.headers.get('location'), headers=self.__get_header()) if not self.authenticated(): raise SessionException( 'аутентификация прошла успешно но сессия осталась не валидной' ) self.__refresh_date = datetime.now() logger.debug("авторизация на портале Москвы прошла успешно!") except BaseException as e: raise SessionException( f'ошибка авторизации на портале Москвы: {e}')
def phantomjs_download(preq, **kw): p = Phantomjs() p.prepare(preq, kw) resp = Response() try: data = p.run() if data: # print '-=-=-=-=-=-=-=-=-', data['meta'] rdata = data['resp'] try: rdata['content'] = rdata['content'].encode('utf8') except Exception: pass cookies = rdata.get('cookies') if cookies: new_cookies = CookieJar() for cookie in cookies: c = create_cookie(cookie['name'], cookie['value'], domain=cookie['domain'], path=cookie['path'], secure=cookie['secure'], expires=cookie['expiry']) new_cookies.set_cookie(c) rdata['cookies'] = new_cookies resp = Response.from_dict(rdata) resp.meta = data['meta'] # data = result.pop() # resp.url = data['url'] # content = data.get('content', '') # try: # content = content.encode('utf8') # except Exception: # pass # status = data['type'] # if status != 'result': # # content = data['error'] # resp.status_code = data['status_code'] or 571 # resp.reason = data['reason'] # resp._content = content # else: # resp.status_code = data['status_code'] # resp.reason = data['reason'] # resp._content = content # cookies = data.get('cookies') # if cookies: # # {'domain': '.jd.com', 'name': '__jdb', 'expires': 'Wed, 05 Jul 2017 14:50:24 GMT', 'expiry': 1499266224, # # 'value': '122270672.2.1499264397652469999225|1.1499264398', 'path': '/', 'httponly': False, 'secure': False} # # version=0, name=name, value=value, port=None, domain='', path='/', secure=False, expires=None, # # discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False,) # for cookie in cookies: # # cookie.pop('expiry', None) # # cookie.pop('httponly', None) # c = create_cookie(cookie['name'], cookie['value'], domain=cookie['domain'], # path=cookie['path'], secure=cookie['secure'], expires=cookie['expiry']) # resp.cookies.set_cookie(c) # resp.encoding = 'utf-8' # resp._content_consumed = True else: resp.url = preq.url resp.status_code = 574 resp.reason = 'phantomjs:return None' resp._content = '' except PhantomjsTimeout: print('PhantomjsTimeout') # resp = Response() resp.status_code = 572 resp.reason = 'phantomjs:exception:timeout' resp._content = '' # except Exception as exc: # resp.status_code = 570 # resp.reason = 'exception: %s' % exc # resp._content = '' # logger.exception('content exception') print('messages: %s' % ('\n---------\n'.join(p.messages))) return resp
def __init__(self, config: Mapping | None = None, config_file: str = "", debug: bool = False, http_adapter_kwargs: MutableMapping | None = None): """Initialize :class:`ArchiveSession <ArchiveSession>` object with config. :param config: A config dict used for initializing the :class:`ArchiveSession <ArchiveSession>` object. :param config_file: Path to config file used for initializing the :class:`ArchiveSession <ArchiveSession>` object. :param http_adapter_kwargs: Keyword arguments used to initialize the :class:`requests.adapters.HTTPAdapter <HTTPAdapter>` object. :returns: :class:`ArchiveSession` object. """ super().__init__() http_adapter_kwargs = http_adapter_kwargs or {} debug = bool(debug) self.config = get_config(config, config_file) self.config_file = config_file for ck, cv in self.config.get('cookies', {}).items(): raw_cookie = f'{ck}={cv}' cookie_dict = parse_dict_cookies(raw_cookie) if not cookie_dict.get(ck): continue cookie = create_cookie(ck, cookie_dict[ck], domain=cookie_dict.get( 'domain', '.archive.org'), path=cookie_dict.get('path', '/')) self.cookies.set_cookie(cookie) self.secure: bool = self.config.get('general', {}).get('secure', True) self.host: str = self.config.get('general', {}).get('host', 'archive.org') if 'archive.org' not in self.host: self.host += '.archive.org' self.protocol = 'https:' if self.secure else 'http:' user_email = self.config.get('cookies', {}).get('logged-in-user') if user_email: user_email = user_email.split(';')[0] user_email = unquote(user_email) self.user_email: str = user_email self.access_key: str = self.config.get('s3', {}).get('access') self.secret_key: str = self.config.get('s3', {}).get('secret') self.http_adapter_kwargs: MutableMapping = http_adapter_kwargs or {} self.headers = default_headers() self.headers.update({'User-Agent': self._get_user_agent_string()}) self.headers.update({'Connection': 'close'}) self.mount_http_adapter() logging_config = self.config.get('logging', {}) if logging_config.get('level'): self.set_file_logger( logging_config.get('level', 'NOTSET'), logging_config.get('file', 'internetarchive.log')) if debug or (logger.level <= 10): self.set_file_logger( logging_config.get('level', 'NOTSET'), logging_config.get('file', 'internetarchive.log'), 'urllib3')
def scrapeScene(filename, date, url): ret = [] browser = StatefulBrowser(session=None) browser.open("https://ifeelmyself.com/public/main.php") cookie_obj = create_cookie(name='tags_popup_shown', value='true', domain='ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) if url: debugPrint("Url found, using that to scrape") browser.open(url) response = browser.page table = response.find( class_=["blog_wide_news_tbl entry ppss-scene", "entry ppss-scene"]) if table: ret = extract_info(table) else: debugPrint("Analyzing filename...") artist_id_match = re.search(r"(f\d{3,5})", filename, re.I) if artist_id_match: artist_id = artist_id_match.group(0) video_id = re.search(r"-(\d+)", filename, re.I).group(1) browser.open("https://ifeelmyself.com/public/main.php?page=search") browser.select_form() browser['keyword'] = artist_id browser['view_by'] = "news" browser.submit_selected() response = browser.page debugPrint("Searching for video_id") debugPrint(artist_id + "-" + video_id) tables = response.find_all(class_=[ "blog_wide_news_tbl entry ppss-scene", "entry ppss-scene" ]) for table in tables: img = str(table.find("img")['src']) debugPrint(f"Image:{str(img)}") if (f"/{video_id}/{artist_id}-" in img) and img.endswith( ("vg.jpg", "hs.jpg")): debugPrint("Found a single match video!") # Extract data from this single result ret = extract_info(table) break else: sys.stderr.write("0 matches found! Checking offset") pages = int( response.find_all("a", class_="pagging_nonsel")[-1].get_text()) if pages: for offset in range(10, pages * 10, 10): browser.open( "https://ifeelmyself.com/public/main.php?page=search_results&offset=" + str(offset)) response = browser.page tables = response.find_all(class_=[ "blog_wide_news_tbl entry ppss-scene", "entry ppss-scene" ]) for table in tables: img = str(table.find("img")) debugPrint(f"Image:{img}") if (f"/{video_id}/{artist_id}-" in img) and img.endswith( ("vg.jpg", "hs.jpg")): ret = extract_info(table) break else: sys.stderr.write("0 matches found!, check your filename") else: debugPrint("Name changed after downloading") filename = filename.lower() extract_from_filename = re.match( r"^([0-9\.]{6,10})?(?<title>.+)\s(?<artist>\w+)(\.mp4)?$", filename) if extract_from_filename: title = extract_from_filename.group('title') #if date: # date_dbY = datetime.strptime(date, '%d.%m.%Y').date().strftime('%d %b %Y') # month = datetime.strptime(date, '%d.%m.%Y').date().strftime('%B') # year = datetime.strptime(date, '%d.%m.%Y').date().strftime('%Y') # debugPrint("Date: "+date_dbY) if title: title = title.lower().replace("ifeelmyself", "") title = title.replace("-", "") title = title.replace("by", "") debugPrint(f"Title: {title}") browser.open( "https://ifeelmyself.com/public/main.php?page=search") browser.select_form() debugPrint("Searching..") browser['keyword'] = title browser['view_by'] = "news" browser.submit_selected() response = browser.page #Obtaining and counting the results. Ideally you only have a single result matches = response.find_all( "a", href='javascript:;' ) #This a href javascript contains all the titles if len(matches) == 1: debugPrint("Found a single match!") table = response.find(class_=[ "blog_wide_news_tbl entry ppss-scene", "entry ppss-scene" ]) else: if len(matches) == 0: sys.stderr.write("0 matches found! Check filename") print("{}}") exit if len(matches) > 1: debugPrint( "Multiple videos found, maybe refine search term?") index = [ i for i, s in enumerate(matches) if title in str(s) ] tables = response.find_all(class_=[ "blog_wide_news_tbl entry ppss-scene", "entry ppss-scene" ]) table = tables[0] #Getting first if table: ret = extract_info(table) else: debugPrint("Not a supported filename") print("{}") exit return ret
def __init__(self, url, uid, pwd=None, cert_name=None, key=None, verify_ssl=False, appcenter_user=False, proxies=None, resubscribe=True, graceful=True, lifetime=0, subscription_refresh_time=60, cached_token=None, token_refresh_callback=None): """ url (str) apic url such as https://1.2.3.4 uid (str) apic username or certificate name pwd (str) apic password cert_name (str) certificate name for certificate-based authentication key (str) path to certificate file used for certificate-based authentication if a password is provided then it will be prefered over certficate verify_ssl (bool) verify ssl certificate for ssl connections appcenter_user (bool) set to true when using certificate authentication from ACI app proxies (dict) optional dict containing the proxies passed to request library resubscribe (bool) auto resubscribe on if subscription or login fails if false then subscription thread is closed on refresh failure graceful(bool) trigger graceful_resubscribe at 95% of maximum lifetime which acquires new login token and gracefully restarts subscriptions. During this time, there may be duplicate data on subscription but no data should be lost. lifetime (int) maximum lifetime of the session before triggering a new login or graceful resubscribe (if graceful is enabled). If set to 0, then lifetime is set based on maximumLifetimeSeconds at login. Else the minimum value of lifetime or maximumLifetimeSeconds is used. subscription_refresh_time (int) Seconds for each subscription before it needs to be refreshed. If the value is greater than default SUBSCRITION_REFRESH, then it is provided as an http parameter in the initial subscription setup. Note, this is only supported in 4.0 and above. Caller should verify APIC/nodes are running a supported version of code before extended the subscription refresh time to custom value. cached_token (bool) valid token to use for API requests. If provided, then that token is used for every API request and refresh/subscription is disabled. token_refresh_callback (func) function triggered on each token refresh. Must accept single argument which is this session object. """ url = str(url) uid = str(uid) if pwd is not None: pwd = str(pwd) if key is not None: key = str(key) if pwd is None and (key is None or cert_name is None): raise Exception("A password or cert_name and key are required") r1 = re.search("^(?P<protocol>https?://)?(?P<hostname>.+$)", url.lower()) if r1 is not None: self.hostname = r1.group("hostname") if r1.group("protocol") is None: self.api = "https://%s" % self.hostname else: self.api = "%s%s" % (r1.group("protocol"), self.hostname) else: raise Exception("invalid APIC url: %s" % url) self.uid = uid self.pwd = pwd self.cert_name = cert_name self.key = key self.appcenter_user = appcenter_user self.default_timeout = 120 self.resubscribe = resubscribe self.graceful = graceful self.lifetime = lifetime self.subscription_refresh_time = subscription_refresh_time # limit subscription_refresh to min/max if self.subscription_refresh_time > Session.MAX_SUBSCRIPTION_REFRESH: self.subscription_refresh_time = Session.MAX_SUBSCRIPTION_REFRESH elif self.subscription_refresh_time < Session.MIN_SUBSCRIPTION_REFRESH: self.subscription_refresh_time = Session.MIN_SUBSCRIPTION_REFRESH if self.pwd is not None: self.cert_auth = False else: self.cert_auth = True try: with open(self.key, 'r') as f: self._x509Key = load_privatekey(FILETYPE_PEM, f.read()) except Exception as e: logger.debug("Traceback:\n%s", traceback.format_exc()) raise TypeError("Could not load private key(%s): %s" % (self.key, e)) self.verify_ssl = verify_ssl self.session = None self.token = None self.login_timeout = 0 self.login_lifetime = 0 self.login_thread = None self.subscription_thread = None self._logged_in = False self._proxies = proxies # Disable the warnings for SSL if not self.verify_ssl: requests.packages.urllib3.disable_warnings(InsecureRequestWarning) # cached mode when user provides a valid cached token self._cached_mode = False if cached_token is not None: self._cached_mode = True self.token = cached_token self.session = requests.Session() self.session.cookies.set_cookie( create_cookie(name="APIC-cookie", value=cached_token)) # support token refresh callbacks self.token_refresh_callback = token_refresh_callback
def __setitem__(self, name, value): if value is None: remove_cookie_by_name(self, name) else: self.set_cookie(create_cookie(name, value))
def add_data(self, session): from requests.cookies import create_cookie session.cookies.set_cookie( create_cookie('COLUMNLIST', self.COLUMNS_COOKIE))
def from_json(self, data): for cookie in data: self.set_cookie(create_cookie(**cookie))