Esempio n. 1
0
    def dict_2_cookiejar(d):
        cj = CookieJar()

        for c in d:
            ck = Cookie(
                name=c["name"],
                value=urllib.parse.unquote(c["value"]),
                domain=c["domain"],
                path=c["path"],
                secure=c["secure"],
                rest={"HttpOnly": c["httponly"]},
                version=0,
                port=None,
                port_specified=False,
                domain_specified=False,
                domain_initial_dot=False,
                path_specified=True,
                expires=None,
                discard=True,
                comment=None,
                comment_url=None,
                rfc2109=False,
            )
            cj.set_cookie(ck)

        return cj
Esempio n. 2
0
File: red.py Progetto: zoek1/Buatman
def enviaPeticion(url, dominio, ruta, cookieDicc={"TESTID": "set"}):
    try:
        # Empaquetador de cookies.
        jar = CookieJar()
        # Genera un objeto request para posterior peticion.
        peticion = urllib.request.Request(url=url)

        # crearCookie to generate a cookie and add it to the cookie jar.
        for key, item in cookieDicc.items():
            jar.set_cookie(crearCookie(key, item, dominio, ruta))

        # print(crearCookie(key, item))

        jar.add_cookie_header(peticion)

        # Generar peticion.
        edmundoDantes = urllib.request.build_opener()
        abreteSesamo = edmundoDantes.open(peticion)

        RiquezaYVenganza = verificacionAcceso(abreteSesamo)

        if RiquezaYVenganza:
            print( "Busca tu propio Arbol")
        else:
            print( "!(Busca tu propio arbol)")

        return RiquezaYVenganza
    except urllib.error.HTTPError as err:
        print("Pagina fuera de servicio")
        return "Pagina fuera de servicio"
def _getCookies(headers):
    cj = CookieJar()
    cookies = headers.split(',')
    for cookie in cookies:
        attrs = cookie.split(';')
        cookieNameValue = name = attrs[0].strip().split('=')
        pathNameValue = name = attrs[1].strip().split('=')
        ck = Cookie(version=1, name=cookieNameValue[0],
                              value=cookieNameValue[1],
                              port=443,
                              port_specified=False,
                              domain=swaDomain,
                              domain_specified=True,
                              domain_initial_dot=False,
                              path=pathNameValue[1],
                              path_specified=True,
                              secure=True,
                              expires=None,
                              discard=True,
                              comment=None,
                              comment_url=None,
                              rest={'HttpOnly': None},
                              rfc2109=False)
        cj.set_cookie(ck)
    return cj
Esempio n. 4
0
 def __init__(self, filename, autoconnect=True, policy=None):
     experimental("Firefox3CookieJar is experimental code")
     CookieJar.__init__(self, policy)
     if filename is not None and not isinstance(filename, str):
         raise ValueError("filename must be string-like")
     self.filename = filename
     self._conn = None
     if autoconnect:
         self.connect()
Esempio n. 5
0
class BuiltinBrowser(BaseBrowser):
    def __init__(self, base_url=None):
        base_url = get_default_workflowy_url(base_url)
        super().__init__(base_url)
        self.cookie_jar = CookieJar()
        self.opener = build_opener(HTTPCookieProcessor(self.cookie_jar))

    def open(self, url, *, _raw=False, _query=None, **kwargs):
        full_url = urljoin(self.base_url, url)
        if _query is not None:
            full_url += "?" + urlencode(_query)
        
        data = urlencode(kwargs).encode()
        
        headers = {
            "Content-Type" : "application/x-www-form-urlencoded",
        }

        req = Request(full_url, data, headers)
        res = self.opener.open(req)

        with closing(res) as fp:
            content = fp.read()

        content = content.decode()

        if not _raw:
            # TODO: must not raise 404 error
            content = json.loads(content)

        return res, content

    def set_cookie(self, name, value):
        url = urlparse(self.base_url)
        cookie = Cookie(
            version=0,
            name=name,
            value=value,
            port=None,
            port_specified=False,
            domain=url.netloc,
            domain_specified=False,
            domain_initial_dot=False,
            path=url.path,
            path_specified=True,
            secure=False,
            expires=sys.maxsize,
            discard=False,
            comment=None,
            comment_url=None,
            rest={},
            rfc2109=False,
        )

        self.cookie_jar.set_cookie(cookie)
Esempio n. 6
0
def cookiejar_from_dict(cookie_dict, cookiejar=None):
    """Returns a CookieJar from a key/value dictionary.

    :param cookie_dict: Dict of key/values to insert into CookieJar.
    """
    if not isinstance(cookie_dict, CookieJar):
        if cookiejar is None:
            cookiejar = CookieJar()
        if cookie_dict is not None:
            for name in cookie_dict:
                cookiejar.set_cookie(create_cookie(name, cookie_dict[name]))
        return cookiejar
    else:
        return cookie_dict
Esempio n. 7
0
 def _cookies_for_domain(self, domain, request):
     if not self._policy.domain_return_ok(domain, request):
         return []
     debug("Checking %s for cookies to return", domain)
     if self.delayload:
         self._delayload_domain(domain)
     return CookieJar._cookies_for_domain(self, domain, request)
Esempio n. 8
0
 def cookiesjar(self):
     """Returns cookie jar object
     """
     if not self._cookies_jar:
         self._cookies_jar = CookieJar()
         # add cookies from self._cookies
     return self._cookies_jar
Esempio n. 9
0
def cookiejar_from_dict(*cookie_dicts):
    """Returns a CookieJar from a key/value dictionary.

    :param cookie_dict: Dict of key/values to insert into CookieJar.
    """
    cookie_dicts = tuple((d for d in cookie_dicts if d))
    if len(cookie_dicts) == 1 and isinstance(cookie_dicts[0], CookieJar):
        return cookie_dicts[0]
    cookiejar = CookieJar()
    for cookie_dict in cookie_dicts:
        if isinstance(cookie_dict, CookieJar):
            for cookie in cookie_dict:
                cookiejar.set_cookie(cookie)
        else:
            for name in cookie_dict:
                cookiejar.set_cookie(create_cookie(name, cookie_dict[name]))
    return cookiejar
Esempio n. 10
0
    def test_length(self):
        cookie_jar = CookieJar()
        policy = DeFactoCookiePolicy(cookie_jar=cookie_jar)
        cookie_jar.set_policy(policy)

        request = urllib.request.Request('http://example.com/')
        response = FakeResponse(
            [
                'Set-Cookie: k={0}'.format('a' * 400)
            ],
            'http://example.com/'
        )

        cookie_jar.extract_cookies(response, request)

        print(cookie_jar._cookies)

        self.assertTrue(cookie_jar._cookies['example.com']['/'].get('k'))

        request = urllib.request.Request('http://example.com/')
        response = FakeResponse(
            [
                'Set-Cookie: k={0}'.format('a' * 5000)
            ],
            'http://example.com/'
        )

        cookie_jar.extract_cookies(response, request)

        self.assertFalse(cookie_jar._cookies['example.com']['/'].get('k2'))
Esempio n. 11
0
    def set_cookie(self, cookie):
        if cookie.discard:
            CookieJar.set_cookie(self, cookie)
            return

        def set_cookie(cur):
            # XXX
            # is this RFC 2965-correct?
            # could this do an UPDATE instead?
            row = self._row_from_cookie(cookie, cur)
            name, unused, domain, path = row[1:5]
            cur.execute("""\
DELETE FROM moz_cookies WHERE host = ? AND path = ? AND name = ?""",
                        (domain, path, name))
            cur.execute("""\
INSERT INTO moz_cookies VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""", row)
        self._transaction(set_cookie)
Esempio n. 12
0
    def test_empty_value(self):
        cookie_jar = CookieJar()
        policy = DeFactoCookiePolicy(cookie_jar=cookie_jar)
        cookie_jar.set_policy(policy)

        request = urllib.request.Request('http://example.com/')
        response = FakeResponse(
            [
                'Set-Cookie: k'
            ],
            'http://example.com/'
        )

        cookie_jar.extract_cookies(response, request)

        print(cookie_jar._cookies)

        self.assertTrue(cookie_jar._cookies.get('example.com'))
Esempio n. 13
0
 def clear(self, domain=None, path=None, name=None):
     CookieJar.clear(self, domain, path, name)
     where_parts = []
     sql_params = []
     if domain is not None:
         where_parts.append("host = ?")
         sql_params.append(domain)
         if path is not None:
             where_parts.append("path = ?")
             sql_params.append(path)
             if name is not None:
                 where_parts.append("name = ?")
                 sql_params.append(name)
     where = " AND ".join(where_parts)
     if where:
         where = " WHERE " + where
     def clear(cur):
         cur.execute("DELETE FROM moz_cookies%s" % where,
                     tuple(sql_params))
     self._transaction(clear)
Esempio n. 14
0
 def __init__(self, username, password, bank=SWEDBANK):
     """ Set default stuff """
     self.data = ""
     self.pch = None
     self.authkey = None
     self.cj = CookieJar()
     self.profile = None
     self.account = None
     self.useragent = None
     self.bankid = None
     self.login(username, password, bank)
Esempio n. 15
0
 def _cookies_for_request(self, request):
     session_cookies = CookieJar._cookies_for_request(self, request)
     def get_cookies(cur):
         query = cur.execute("SELECT host from moz_cookies")
         domains = [row[0] for row in query.fetchall()]
         cookies = []
         for domain in domains:
             cookies += self._persistent_cookies_for_domain(domain,
                                                            request, cur)
         return cookies
     persistent_coookies = self._transaction(get_cookies)
     return session_cookies + persistent_coookies
Esempio n. 16
0
    def __init__(self):
        self.cookiejar = CookieJar()
        self._cookie_processor = HTTPCookieProcessor(self.cookiejar)
        self.form = None

        self.url = "http://0.0.0.0:8080/"
        self.path = "/"

        self.status = None
        self.data = None
        self._response = None
        self._forms = None
Esempio n. 17
0
    def test_ascii(self):
        # Differences with FakeResponse:
        # On Python 3, MIME encoded-word syntax is used
        # On Python 2, U backslash syntax is used but it's not decoded back.
        cookie_jar = CookieJar()
        policy = DeFactoCookiePolicy(cookie_jar=cookie_jar)
        cookie_jar.set_policy(policy)

        request = urllib.request.Request('http://example.com/')
        response = FakeResponse(
            [
                'Set-Cookie: k=�'
            ],
            'http://example.com/'
        )

        cookie_jar.extract_cookies(response, request)

        print(cookie_jar._cookies)

        self.assertFalse(cookie_jar._cookies.get('example.com'))
    def _authenticate(self, username, password):
        '''
        Authenticate to Google:
        1 - make a GET request to the Login webpage so we can get the login form
        2 - make a POST request with email, password and login form input values
        '''
        # Make sure we get CSV results in English
        ck1 = Cookie(version=0, name='I4SUserLocale', value='en_US', port=None, port_specified=False, domain='.google.com', domain_specified=False,domain_initial_dot=False, path='', path_specified=False, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest=None)
        # This cookie is now mandatory
        # Not sure what the value represents but too many queries from the same value
        # lead to a Quota Exceeded error.
        # random_six_char = ''.join(random.choice('0123456789abcdef') for n in xrange(6))
        ck2 = Cookie(version=0, name='PREF', value='0000', port=None, port_specified=False, domain='.google.com', domain_specified=False,domain_initial_dot=False, path='', path_specified=False, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest=None)

        self.cj = CookieJar()
        self.cj.set_cookie(ck1)
        self.cj.set_cookie(ck2)
        if not self.proxy:
            self.opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.cj))
        else:
            proxy = urllib.request.ProxyHandler({'http': self.proxy,
                                                 'https': self.proxy})
            self.opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.cj), proxy)
        self.opener.addheaders = self.headers

        # Get all of the login form input values
        find_inputs = etree.XPath("//form[@id='gaia_loginform']//input")
        resp = self.opener.open(self.url_login)
        data = self.read_gzipped_response(resp).decode()

        try:
            xmlTree = etree.fromstring(data, parser=html.HTMLParser(recover=True, remove_comments=True))
            for input in find_inputs(xmlTree):
                name = input.get('name')
                if name:
                    name = name.encode('utf8')
                    value = input.get('value', '').encode('utf8')
                    self.login_params[name] = value
        except:
            raise AuthFailedException(("Exception while parsing: %s\n" % traceback.format_exc()))

        self.login_params["Email".encode('utf8')] = username.encode('utf8')
        self.login_params["Passwd".encode('utf8')] = password.encode('utf8')

        params = urllib.parse.urlencode(self.login_params)
        auth_resp = self.opener.open(self.url_authenticate, params.encode())

        # Testing whether Authentication was a success
        # I noticed that a correct auth sets a few cookies
        if not self.is_authentication_successfull(auth_resp):
            raise AuthFailedException('Warning: Authentication failed for user %s' % username)
Esempio n. 19
0
    def __init__(self, config_name_name):
        cfg_file = file(config_name_name)
        self.config = config.Config(cfg_file)
        logging.info('Config file %s loaded!', config_name_name)

        self.cookie_jar = CookieJar()
        self.opener = build_opener(
            HTTPCookieProcessor(self.cookie_jar)
        )
        self.logged_ = False
        self.list_loaded_ = False
        self.api_url = 'http://' + self.config.api_domain
        self.shows_data = {}
        self.episodes_data = {}
        self.watched_data = {}
Esempio n. 20
0
 def __init__(self, **kwargs):
     """
     @param kwargs: Keyword arguments.
         - B{proxy} - An http proxy to be specified on requests.
              The proxy is defined as {protocol:proxy,}
                 - type: I{dict}
                 - default: {}
         - B{timeout} - Set the url open timeout (seconds).
                 - type: I{float}
                 - default: 90
     """
     Transport.__init__(self)
     Unskin(self.options).update(kwargs)
     self.cookiejar = CookieJar()
     self.proxy = {}
     self.urlopener = None
Esempio n. 21
0
 def __init__(self,
         host=None,
         apiurl='/w/api.php',
         timeout=100,
         srlimit=500,
         apfrom=None,
         aplimit=5000,
         bllimit=5000,
         aulimit=5000,
         aclimit=5000,
         rclimit=5000,
         lelimit=5000,
     ):
     if not host:
         raise(Exception("host not defined"))
     self.host = host
     self.apiurl = apiurl
     self.url = '%s%s' % (self.host, self.apiurl)
     self.format = 'json'
     self.cj = CookieJar()
     self.opener = urllib.request.build_opener(
         urllib.request.HTTPCookieProcessor(self.cj)
     )
     self.token = None
     self.defaults = {}
     self.defaults['srlimit'] = srlimit
     self.defaults['aplimit'] = aplimit
     self.defaults['aclimit'] = aclimit
     self.defaults['bllimit'] = bllimit
     self.defaults['rclimit'] = rclimit
     self.defaults['lelimit'] = lelimit
     self.srlimit = srlimit
     self.apfrom = apfrom
     self.aplimit = aplimit
     self.bllimit = bllimit
     self.aulimit = aulimit
     self.aclimit = aclimit
     self.rclimit = rclimit
     self.lelimit = lelimit
     self.search_info = {}
     self.aufinished = False
Esempio n. 22
0
    def test_domain_limit(self):
        cookie_jar = CookieJar()
        policy = DeFactoCookiePolicy(cookie_jar=cookie_jar)
        cookie_jar.set_policy(policy)

        request = urllib.request.Request('http://example.com/')

        for key in range(55):
            response = FakeResponse(
                [
                    'Set-Cookie: k{0}=a'.format(key)
                ],
                'http://example.com/'
            )

            cookie_jar.extract_cookies(response, request)

            if key < 50:
                self.assertTrue(
                    cookie_jar._cookies['example.com']['/']
                    .get('k{0}'.format(key))
                )
            else:
                self.assertFalse(
                    cookie_jar._cookies['example.com']['/']
                    .get('k{0}'.format(key))
                )

        response = FakeResponse(
            [
                'Set-Cookie: k3=b'
            ],
            'http://example.com/'
        )

        cookie_jar.extract_cookies(response, request)
        self.assertEqual(
            'b',
            cookie_jar._cookies['example.com']['/']['k3'].value
        )
Esempio n. 23
0
    def testCookieAdapters(self):
        jar = CookieJar(policy=None)  # DefaultCookiePolicy())

        # set a cookie
        res = Response()
        tstval = str(uuid.uuid4())
        res.set_cookie("a-cookie", tstval, domain="example.com")
        cookies = jar.make_cookies(filters.ResponseCookieAdapter(res), Request.blank("http://example.com"))
        for c in cookies:
            jar.set_cookie(c)

        self.assert_(len(jar), ("where's my cookies?"))
        self.assert_("a-cookie" in [c.name for c in jar], "seriously, where's my cookie")

        # now put the header on the request please
        request = Request.blank("http://example.com")
        self.assert_(".example.com" in jar._cookies.keys(), jar._cookies.keys())
        jar.add_cookie_header(filters.RequestCookieAdapter(request))
        self.assert_("Cookie" in request.headers, (str(request), "Y NO COOKIES?"))
Esempio n. 24
0
 def get_json_data(self, url):
     opener = build_opener(HTTPCookieProcessor(CookieJar()))
     opener.addheaders.append(('Cookie', self.cookie))
     response = opener.open(url)
     return json.load(response)
Esempio n. 25
0
from index import application
import unittest
from webtest import TestApp, TestResponse
import sys
from http.cookiejar import CookieJar

test_app = TestApp(application, cookiejar=CookieJar())
test_response = TestResponse()

sys.path.append('../')


# os.environ['WEBTEST_TARGET_URL'] = 'http://localhost:8888'
# test_app.set_cookie('login', '2524')
# test_app.authorization()


class BlackBoxTest(unittest.TestCase):
    def setUp(self):
        test_app.post('/aluno_cadastro', dict(aluno_nome='eric', senha='idle'))
        test_app.post('/login', dict(usuario='eric', senha='idle'))

    def _fixaluno_turma(self):
        res = test_app.post('/aluno_cadastro', dict(aluno_nome='egg', senha='spam'))
        res2 = test_app.get('/turma_cadastro', dict(turma_nome='ni'))
        res3 = test_app.get('/turma_cadastro', dict(turma_nome='parrot'))
        # res4 = test_app.get('/cadastro_item' , dict(nome='burro quando foge',tipo='3',preco='5'))

    def _test_index(self):
        """ o indice desse servidor """
        res = test_app.get('/')
Esempio n. 26
0
class Browser(object):
    def __init__(self):
        self.cookiejar = CookieJar()
        self._cookie_processor = HTTPCookieProcessor(self.cookiejar)
        self.form = None

        self.url = "http://0.0.0.0:8080/"
        self.path = "/"

        self.status = None
        self.data = None
        self._response = None
        self._forms = None

    @property
    def text(self):
        return self.data.decode('utf-8')

    def reset(self):
        """Clears all cookies and history."""
        self.cookiejar.clear()

    def build_opener(self):
        """Builds the opener using (urllib2/urllib.request).build_opener.
        Subclasses can override this function to prodive custom openers.
        """
        return urllib_build_opener()

    def do_request(self, req):
        if DEBUG:
            print('requesting', req.get_method(), req.get_full_url())

        opener = self.build_opener()
        opener.add_handler(self._cookie_processor)
        try:
            self._response = opener.open(req)
        except HTTPError as e:
            self._response = e

        self.url = self._response.geturl()
        self.path = get_selector(Request(self.url))
        self.data = self._response.read()
        self.status = self._response.code
        self._forms = None
        self.form = None

        return self.get_response()

    def open(self, url, data=None, headers={}):
        """Opens the specified url."""
        url = urljoin(self.url, url)
        req = Request(url, data, headers)

        return self.do_request(req)

    def show(self):
        """Opens the current page in real web browser."""
        f = open('page.html', 'w')
        f.write(self.data)
        f.close()

        url = 'file://' + os.path.abspath('page.html')
        webbrowser.open(url)

    def get_response(self):
        """Returns a copy of the current response."""
        return addinfourl(BytesIO(self.data), self._response.info(), self._response.geturl())

    def get_soup(self):
        """Returns beautiful soup of the current document."""
        import BeautifulSoup
        return BeautifulSoup.BeautifulSoup(self.data)

    def get_text(self, e=None):
        """Returns content of e or the current document as plain text."""
        e = e or self.get_soup()
        return ''.join([htmlunquote(c) for c in e.recursiveChildGenerator()
                       if isinstance(c, text_type)])

    def _get_links(self):
        soup = self.get_soup()
        return [a for a in soup.findAll(name='a')]

    def get_links(self, text=None, text_regex=None, url=None, url_regex=None, predicate=None):
        """Returns all links in the document."""
        return self._filter_links(self._get_links(),
            text=text, text_regex=text_regex, url=url, url_regex=url_regex, predicate=predicate)

    def follow_link(self, link=None, text=None, text_regex=None, url=None, url_regex=None, predicate=None):
        if link is None:
            links = self._filter_links(self.get_links(),
                text=text, text_regex=text_regex, url=url, url_regex=url_regex, predicate=predicate)
            link = links and links[0]

        if link:
            return self.open(link['href'])
        else:
            raise BrowserError("No link found")

    def find_link(self, text=None, text_regex=None, url=None, url_regex=None, predicate=None):
        links = self._filter_links(self.get_links(),
            text=text, text_regex=text_regex, url=url, url_regex=url_regex, predicate=predicate)
        return links and links[0] or None

    def _filter_links(self, links,
            text=None, text_regex=None,
            url=None, url_regex=None,
            predicate=None):
        predicates = []
        if text is not None:
            predicates.append(lambda link: link.string == text)
        if text_regex is not None:
            predicates.append(lambda link: re_compile(text_regex).search(link.string or ''))
        if url is not None:
            predicates.append(lambda link: link.get('href') == url)
        if url_regex is not None:
            predicates.append(lambda link: re_compile(url_regex).search(link.get('href', '')))
        if predicate:
            predicate.append(predicate)

        def f(link):
            for p in predicates:
                if not p(link):
                    return False
            return True

        return [link for link in links if f(link)]

    def get_forms(self):
        """Returns all forms in the current document.
        The returned form objects implement the ClientForm.HTMLForm interface.
        """
        if self._forms is None:
            import ClientForm
            self._forms = ClientForm.ParseResponse(self.get_response(), backwards_compat=False)
        return self._forms

    def select_form(self, name=None, predicate=None, index=0):
        """Selects the specified form."""
        forms = self.get_forms()

        if name is not None:
            forms = [f for f in forms if f.name == name]
        if predicate:
            forms = [f for f in forms if predicate(f)]

        if forms:
            self.form = forms[index]
            return self.form
        else:
            raise BrowserError("No form selected.")

    def submit(self, **kw):
        """submits the currently selected form."""
        if self.form is None:
            raise BrowserError("No form selected.")
        req = self.form.click(**kw)
        return self.do_request(req)

    def __getitem__(self, key):
        return self.form[key]

    def __setitem__(self, key, value):
        self.form[key] = value
Esempio n. 27
0
    def download_url(self, url, referrer=None, user_agent=None, verbose=False):
        """Download a URL (likely http or RSS) from the web and return its
           contents as a str. Allow for possible vagaries like cookies,
           redirection, compression etc.
        """
        if verbose:
            print("download_url", url, "referrer=", referrer, \
                                "user_agent", user_agent, file=sys.stderr)

        request = urllib.request.Request(url)

        # If we're after the single-page URL, we may need a referrer
        if referrer:
            if verbose:
                print("Adding referrer", referrer, file=sys.stderr)
            request.add_header('Referer', referrer)

        if not user_agent:
            user_agent = VersionString
        request.add_header('User-Agent', user_agent)
        if verbose:
            print("Using User-Agent of", user_agent, file=sys.stderr)

        # Allow for cookies in the request: some sites, notably nytimes.com,
        # degrade to an infinite redirect loop if cookies aren't enabled.
        cj = CookieJar()
        opener = urllib.request.build_opener(
            urllib.request.HTTPCookieProcessor(cj))
        response = opener.open(request, timeout=100)
        # Lots of ways this can fail.
        # e.g. ValueError, "unknown url type"
        # or BadStatusLine: ''

        # At this point it would be lovely to check whether the
        # mime type is HTML or RSS. Unfortunately, all we have is a
        # httplib.HTTPMessage instance which is completely
        # undocumented (see http://bugs.python.org/issue3428).

        # It's not documented, but sometimes after urlopen
        # we can actually get a content type. If it's not
        # text/something, that's bad.
        ctype = response.headers['content-type']
        if ctype and ctype != '' and not ctype.startswith("text") \
           and not ctype.startswith("application/rss") \
           and not ctype.startswith("application/xml") \
           and not ctype.startswith("application/x-rss+xml") \
           and not ctype.startswith("application/atom+xml"):
            print(url, "isn't text -- content-type was", \
                ctype, ". Skipping.", file=sys.stderr)
            response.close()
            raise RuntimeError("Contents not text (%s)! %s" % (ctype, url))

        # Were we redirected? geturl() will tell us that.
        self.cur_url = response.geturl()

        # but sadly, that means we need another request object
        # to parse out the host and prefix:
        real_request = urllib.request.Request(self.cur_url)
        real_request.add_header('User-Agent', user_agent)

        # A few sites, like http://nymag.com, gzip their http.
        # urllib2 doesn't handle that automatically: we have to ask for it.
        # But some other sites, like the LA Monitor, return bad content
        # if you ask for gzip.
        if self.config.getboolean(self.feedname, 'allow_gzip'):
            request.add_header('Accept-encoding', 'gzip')

        # feed() is going to need to know the host, to rewrite urls.
        # So save host and prefix based on any redirects we've had:
        # feedmeparser will need them.
        self.host = real_request.host
        self.prefix = real_request.type + '://' + self.host + '/'

        # urllib2 unfortunately doesn't read unicode,
        # so try to figure out the current encoding:
        if not self.encoding:
            if verbose:
                print(
                    "download_url: self.encoding not set, getting it from headers",
                    file=sys.stderr)
            self.encoding = response.headers.get_content_charset()
            enctype = response.headers['content-type'].split('charset=')
            if len(enctype) > 1:
                self.encoding = enctype[-1]
            else:
                if verbose:
                    print("Defaulting to utf-8", file=sys.stderr)
                self.encoding = 'utf-8'
        if verbose:
            print("final encoding is", self.encoding, file=sys.stderr)

        # Is the URL gzipped? If so, we'll need to uncompress it.
        is_gzip = response.info().get('Content-Encoding') == 'gzip'

        # Read the content of the link:
        # This can die with socket.error, "connection reset by peer"
        # And it may not set html, so initialize it first:
        contents = None
        try:
            contents = response.read()
        # XXX Need to guard against IncompleteRead -- but what class owns it??
        #except httplib.IncompleteRead, e:
        #    print >>sys.stderr, "Ignoring IncompleteRead on", url
        except Exception as e:
            print("Unknown error from response.read()", url, file=sys.stderr)

        # contents can be undefined here. If so, no point in doing anything else.
        if not contents:
            print("Didn't read anything from response.read()", file=sys.stderr)
            response.close()
            raise NoContentError

        if is_gzip:
            buf = io.BytesIO(contents)
            f = gzip.GzipFile(fileobj=buf)
            contents = f.read()

        # No docs say I should close this. I can only assume.
        response.close()

        # response.read() returns bytes. Convert to str as soon as possible
        # so the rest of the program can work with str.
        return contents.decode(encoding=self.encoding)
Esempio n. 28
0
class MyShowsRu(object):
    """ work with api.myshows.ru """
    def __init__(self, config_name_name):
        cfg_file = file(config_name_name)
        self.config = config.Config(cfg_file)
        logging.info('Config file %s loaded!', config_name_name)

        self.cookie_jar = CookieJar()
        self.opener = build_opener(
            HTTPCookieProcessor(self.cookie_jar)
        )
        self.logged_ = False
        self.list_loaded_ = False
        self.api_url = 'http://' + self.config.api_domain
        self.shows_data = {}
        self.episodes_data = {}
        self.watched_data = {}

    def do_login(self):
        """ authorization """
        if self.logged_:
            return
        try:
            req_data = urllib.urlencode({
                'login': self.config.login.name,
                'password': self.config.login.md5pass
            })
            logging.debug(
                'Login, url: %s%s, data: %s', self.api_url, self.config.url.login, req_data
            )
            request = Request(
                self.api_url + self.config.url.login, req_data
            )
            handle = self.opener.open(request)
            logging.debug('Login result: %s/%s', handle.headers, handle.read())
            self.cookie_jar.clear(
                self.config.api_domain, '/', 'SiteUser[login]'
            )
            self.cookie_jar.clear(
                self.config.api_domain, '/', 'SiteUser[password]'
            )
        except HTTPError as ex:
            if ex.code == 403:
                stderr.write('Bad login name or password!\n')
            else:
                stderr.write('Login error!\n')
            logging.debug('HTTP error #%s: %s\n', ex.code, ex.read())
            exit(1)
        except URLError as ex:
            stderr.write('Login error!\n')
            logging.debug('URLError - %s\n', ex.reason)
            exit(1)

        self.logged_ = True

    def load_shows(self):
        """ load user shows """
        if self.list_loaded_:
            return
        if not self.logged_:
            self.do_login()
        logging.debug('Login: %s%s', self.api_url, self.config.url.list_shows)
        request = Request(
            self.api_url + self.config.url.list_shows
        )
        handle = self.opener.open(request)
        self.shows_data = json.loads(handle.read())
        self.list_loaded_ = True

    def list_all_shows(self):
        """ list all user shows """
        self.load_shows()
        print()
        for show_id in sorted(
            self.shows_data, key=lambda show_id: self.shows_data[show_id]['title']
        ):
            next_show = self.shows_data[show_id]
            if next_show['watchedEpisodes'] <= 0:
                show_sign = '-'
            elif next_show['watchedEpisodes'] < next_show['totalEpisodes']:
                show_sign = '+'
            else:
                show_sign = ' '

            alias = self.alias_by_title(next_show['title'])
            if not alias:
                alias = '-'

            print('{0}{1}({7}): {2}/{3} ({4}%), rating = {5}({6})'.format(
                show_sign,
                tr_out(next_show['title']),
                # next_show['ruTitle'],
                next_show['watchedEpisodes'], next_show['totalEpisodes'],
                100 * next_show['watchedEpisodes'] / next_show['totalEpisodes'],
                next_show['rating'],
                next_show['watchStatus'][0],
                alias
            ))
        print()

    def list_show(self, alias):
        """ list user show by alias """
        re_m = re.match(r'^(.*\D)(\d{1,2}){0,1}$', alias)
        if not re_m:
            print('Bad format for list - "{0}"'.format(alias))
        else:
            season = -1
            if re_m.lastindex == 2:
                season = int(re_m.group(2))
            show_id = self.id_by_title(
                self.title_by_alias(re_m.group(1), no_exit=True)
            )
            epis = self.load_episodes(show_id)
            episodes = epis['episodes']
            list_map = {}
            for epi_id in episodes:
                next_episode = episodes[epi_id]
                if season == -1 or next_episode['seasonNumber'] == season:
                    list_map[
                        next_episode['seasonNumber'] * 1000
                        + next_episode['episodeNumber']
                    ] = next_episode

            watched = self.load_watched(show_id)
            current_season = -1
            for epi_num in sorted(list_map.keys()):
                next_episode = list_map[epi_num]
                next_season = next_episode['seasonNumber']
                if current_season != next_season:
                    current_season = next_season
                    print('{0} Season {1}:'.format(
                        tr_out(epis['title']), current_season
                    ))
                comment = ''
                epi_id = str(next_episode['id'])
                if epi_id in watched:
                    comment = 'watched ' + watched[epi_id]['watchDate']
                print('  "{0}" (s{1:02d}e{2:02d}) {3}'.format(
                    tr_out(next_episode['title']),
                    next_episode['seasonNumber'],
                    next_episode['episodeNumber'],
                    comment
                ))

    def list_shows(self, alias):
        """ list user shows """
        if alias == 'all':
            self.list_all_shows()
        else:
            self.list_show(alias)

    def title_by_alias(self, query, no_exit=False):
        """ return show id by alias """
        logging.debug('title_by_alias(%s)', query)
        alias = query.lower()
        if alias not in self.config.alias:
            logging.debug('Unknown alias - "%s"', alias)
            if no_exit:
                print('Cannot find alias "{0}", will try it as title!'.format(query))
                return query
            else:
                print('Unknown alias - {0}'.format(query))
                exit(1)
        else:
            logging.debug('title_by_alias(%s) = %s', query, self.config.alias[alias])
            return self.config.alias[alias]

    def alias_by_title(self, title):
        """ return show alias by title """
        logging.debug('alias_by_title(%s)', title)
        for alias, a_title in self.config.alias.iteritems():
            if a_title == title:
                return alias

        return ''

    def id_by_title(self, title):
        """ return show id by title """
        logging.debug('id_by_title(%s)', title)
        if not self.list_loaded_:
            self.load_shows()

        for show_id in self.shows_data:
            next_show = self.shows_data[show_id]
            logging.debug('id_by_title(%s) = %s', next_show['title'], show_id)
            if next_show['title'] == title:
                logging.debug('Found id_by_title(%s) = %s', title, show_id)
                return show_id

        print('Unknown title - {0}'.format(title))
        exit(1)

    def load_episodes(self, show_id):
        """ load episode data by show id """
        if not self.logged_:
            self.do_login()
        if show_id not in self.episodes_data:
            logging.debug(
                'Load episodes: %s%s',
                self.api_url, self.config.url.list_episodes.format(show_id)
            )
            request = Request(
                self.api_url + self.config.url.list_episodes.format(show_id)
            )
            handle = self.opener.open(request)
            self.episodes_data[show_id] = json.loads(handle.read())

        return self.episodes_data[show_id]

    def load_watched(self, show_id):
        """ load watched data by show id """
        if not self.logged_:
            self.do_login()
        if show_id not in self.watched_data:
            logging.debug(
                'Load watched: %s%s',
                self.api_url, self.config.url.list_watched.format(show_id)
            )
            request = Request(
                self.api_url + self.config.url.list_watched.format(show_id)
            )
            handle = self.opener.open(request)
            self.watched_data[show_id] = json.loads(handle.read())

        return self.watched_data[show_id]

    def get_last_watched(self, show_id):
        """ return last watched episode id for show id """
        logging.debug('Searching last watched for show %s', show_id)
        episodes = self.load_episodes(show_id)['episodes']
        watched = self.load_watched(show_id)

        last_number = 0
        episode_id = None
        for epi_id in watched:
            logging.debug('Next watched id: %s', epi_id)
            next_episode = episodes[epi_id]
            logging.debug(
                'Trying next episode: %s - %s (id: %s/seq: %s)',
                next_episode['shortName'], next_episode['title'],
                next_episode['id'], next_episode['sequenceNumber']
            )
            if last_number < next_episode['sequenceNumber']:
                last_number = next_episode['sequenceNumber']
                episode_id = epi_id
                logging.debug(
                    'Saved next last episode: %s - %s (id: %s)',
                    next_episode['shortName'], next_episode['title'], episode_id
                )

        logging.debug('Found last watched %s', episode_id)

        return episode_id

    def show_last_watched_by_alias(self, alias):
        """ show last watched episode for alias """
        show_id = self.id_by_title(self.title_by_alias(alias, no_exit=True))
        epis = self.load_episodes(show_id)
        watched = self.load_watched(show_id)
        episode_id = self.get_last_watched(show_id)
        print()
        if episode_id is None:
            print('{0} is unwatched'.format(tr_out(epis['title'])))
        else:
            episode = epis['episodes'][episode_id]
            print('Last for {0} is s{1:02d}e{2:02d} ("{3}") at {4}'.format(
                tr_out(epis['title']),
                episode['seasonNumber'], episode['episodeNumber'],
                tr_out(episode['title']),
                watched[episode_id]['watchDate']
            ))
        print()

    def show_last_watched_by_date(self, alias):
        """ show last watched episode(s) for date """
        date_to = datetime.date.today()
        if alias == 'day':
            date_from = date_to + datetime.timedelta(days=-1)
        elif alias == 'week':
            date_from = date_to + datetime.timedelta(days=-7)
        elif alias == 'month':
            prev_month = date_to.replace(day=1) + datetime.timedelta(days=-1)
            date_from = date_to + datetime.timedelta(days=-prev_month.day)
        else:
            print('Unknown alias - {0}'.format(alias))
            exit(1)

        self.load_shows()
        print()
        print('Watched from {0} to {1}'.format(
            date_from.strftime('%Y-%m-%d'),
            date_to.strftime('%Y-%m-%d')
        ))
        print()
        re_c = re.compile(r'(\d{1,2})\.(\d{1,2})\.(\d{4})')
        count = 0
        for show_id in self.shows_data:
            next_show = self.shows_data[show_id]
            if next_show['watchedEpisodes'] <= 0:
                continue
            watched = self.load_watched(next_show['showId'])
            epis = None
            last_map = {}
            for epi_id in watched:
                next_episode = watched[epi_id]
                re_m = re_c.match(next_episode['watchDate'])
                if not re_m:
                    print('Warning: unknown date format - {0}'.format(
                        next_episode['watchDate']))
                    continue
                dtv = [int(s) for s in re_m.group(3, 2, 1)]
                epi_date = datetime.date(dtv[0], dtv[1], dtv[2])
                if date_from <= epi_date and epi_date <= date_to:
                    if not epis:
                        epis = self.load_episodes(show_id)
                    count += 1
                    if epi_id not in epis['episodes']:
                        print('Episode not found: {0}'.format(epi_id))
                        logging.debug('Episodes:')
                        logging.debug(epis)
                        continue
                    else:
                        episode = epis['episodes'][epi_id]
                        date_key = epi_date.toordinal() * 1000\
                            + episode['seasonNumber'] * 10\
                            + episode['episodeNumber']
                        last_map[date_key] = episode

            for date_key in sorted(last_map.keys()):
                episode = last_map[date_key]
                print('{0} s{1:02d}e{2:02d} "{3}" at {4}'.format(
                    tr_out(epis['title']),
                    episode['seasonNumber'], episode['episodeNumber'],
                    tr_out(episode['title']),
                    watched[str(episode['id'])]['watchDate']
                ))
        print()
        print('Total count: {0}'.format(count))
        print()

    def show_last_watched(self, query):
        """ show last watched episode(s) """
        alias = query.lower()
        if alias in ['day', 'week', 'month']:
            self.show_last_watched_by_date(alias)
        else:
            self.show_last_watched_by_alias(query)

    def get_first_unwatched(self, show_id):
        """ return first unwathced episode for show id """
        logging.debug('Searching first unwatched for show %s', show_id)
        episodes = self.load_episodes(show_id)['episodes']
        last_watched = self.get_last_watched(show_id)
        if last_watched is None:
            last_watched = 0
        else:
            logging.debug(
                'Last watched is: %s - %s (%s)',
                episodes[last_watched]['shortName'], episodes[last_watched]['title'],
                episodes[last_watched]['sequenceNumber']
            )
            last_watched = episodes[last_watched]['sequenceNumber']

        logging.debug('Last watched: %s', last_watched)

        episode_id = None
        first_unwatched = None
        for epi_id in episodes:
            next_episode = episodes[epi_id]
            logging.debug(
                'Trying next episode: %s - %s (%s)',
                next_episode['shortName'], next_episode['title'],
                next_episode['sequenceNumber']
            )
            if (
                (first_unwatched > next_episode['sequenceNumber']
                or not first_unwatched)
                and last_watched < next_episode['sequenceNumber']
            ):
                #
                first_unwatched = next_episode['sequenceNumber']
                episode_id = epi_id
                logging.debug(
                    'Saved next last unwatched: %s - %s (%s)',
                    next_episode['shortName'], next_episode['title'],
                    next_episode['sequenceNumber']
                )

        return episode_id

    def show_next_for_watch(self, alias):
        """ show next episode for watch for alias """
        show_id = self.id_by_title(self.title_by_alias(alias, no_exit=True))
        epis = self.load_episodes(show_id)
        episode_id = self.get_first_unwatched(show_id)
        if episode_id is None:
            print("\nCannot find first watch for {0}\n".format(tr_out(epis['title'])))
        else:
            episode = epis['episodes'][episode_id]
            print('\nFirst watch for {0} is s{1:02d}e{2:02d} ("{3}")\n'.format(
                tr_out(epis['title']),
                episode['seasonNumber'], episode['episodeNumber'],
                tr_out(episode['title']),
            ))

    def set_episode_check(self, alias, epi, check):
        """ set epi episode as watched """
        re_m = re.match(r's(\d{1,2})e(\d{1,2})', epi.lower())
        if not re_m:
            print('Bad format for check - "{0}"'.format(epi))
        else:
            season = int(re_m.group(1))
            episode = int(re_m.group(2))
            show_id = self.id_by_title(self.title_by_alias(alias, no_exit=True))
            epis = self.load_episodes(show_id)
            watched = self.load_watched(show_id)
            episodes = epis['episodes']
            for epi_id in episodes:
                next_episode = episodes[epi_id]
                if (
                    next_episode['seasonNumber'] == season
                    and next_episode['episodeNumber'] == episode
                ):
                    valid_op = False
                    old_date = ''
                    if check:
                        msg = 'checked'
                        if epi_id in watched:
                            old_date = watched[epi_id]['watchDate']
                        else:
                            url = self.config.url.check_episode.format(epi_id)
                            valid_op = True
                    else:
                        msg = 'unchecked'
                        if epi_id in watched:
                            url = self.config.url.uncheck_episode.format(epi_id)
                            valid_op = True

                    if not valid_op:
                        print()
                        print('Episode "{0}" (s{1:02d}e{2:02d}) of "{3}" already {4} {5}'\
                             .format(
                                 tr_out(next_episode['title']),
                                 next_episode['seasonNumber'],
                                 next_episode['episodeNumber'],
                                 tr_out(epis['title']),
                                 msg,
                                 old_date
                             ))
                    else:
                        logging.debug('Set checked: %s%s', self.api_url, url)
                        request = Request(self.api_url + url)
                        self.opener.open(request)
                        print()
                        print(
                            'Episode "{0}" (s{1:02d}e{2:02d}) of "{3}" set {4}'\
                            .format(
                                tr_out(next_episode['title']),
                                next_episode['seasonNumber'],
                                next_episode['episodeNumber'],
                                tr_out(epis['title']),
                                msg
                            ))
                    break

    def search_show(self, query):
        """ search show """
        if not self.logged_:
            self.do_login()
        req_data = urllib.urlencode({
            'q': query,
        })
        logging.debug(
            'Search url/data: %s%s%s',
            self.api_url, self.config.url.search, req_data
        )
        request = Request(
            self.api_url + self.config.url.search, req_data
        )
        handle = self.opener.open(request)
        search_result = json.loads(handle.read())
        logging.debug('Search result: %s', search_result)
        return search_result

    def show_search_result(self, query):
        """ show search result """
        search_result = self.search_show(query)
        print()
        for show_id in search_result:
            show = search_result[show_id]
            print('"{1}", started: {2} (id={0})'.format(
                show_id, tr_out(show['title']), show['started']
            ))
        print()

    def set_show_status(self, alias, status, accurate):
        """ set show status """
        title = self.title_by_alias(alias, no_exit=True)

        search_result = self.search_show(title)
        for show_id in search_result:
            show = search_result[show_id]
            if accurate and show['title'] != alias:
                continue
            url = self.config.url.status.format(show['id'], status)
            logging.debug('Set show status: %s%s', self.api_url, url)
            request = Request(self.api_url + url)
            self.opener.open(request)
            print('Show "{0}" status set to {1}'.format(
                tr_out(show['title']), status
            ))
            print()
Esempio n. 29
0
    def __init__(self, parent, acc):
        self._acc = acc
        self._gmail_cookies_key = ''
        self._gmail_cookies = CookieJar()

        builder = Gtk.Builder()
        builder.set_translation_domain(PACKAGE_NAME)
        builder.add_from_file(get_data_file("account_widget.ui"))
        builder.connect_signals({
            "account_type_changed":
            self._on_cmb_account_type_changed,
            "entry_changed":
            self._on_entry_changed,
            "expander_folders_activate":
            self._on_expander_folders_activate,
            "password_info_icon_released":
            self._on_password_info_icon_released,
            "log_in":
            self._on_log_in,
            "gmail_label_add":
            self._on_gmail_label_add,
            "gmail_label_remove":
            self._on_gmail_label_remove,
            "gmail_label_edited":
            self._on_gmail_label_edited,
            "gmail_label_mode_edited":
            self._on_gmail_label_mode_edited,
            "gmail_labels_selection_changed":
            self._on_gmail_labels_selection_changed,
        })

        self._window = Gtk.Dialog(title = _('Mail Account'), parent = parent, use_header_bar = True, \
         buttons = (Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL, Gtk.STOCK_OK, Gtk.ResponseType.OK))

        self._window.set_default_response(Gtk.ResponseType.OK)
        self._window.set_default_size(400, 0)

        self._box = self._window.get_content_area()
        self._box.set_border_width(12)
        self._box.set_spacing(12)

        self._box.pack_start(builder.get_object("account_widget"), True, True,
                             0)

        self._cmb_account_type = builder.get_object("cmb_account_type")
        self._label_log_in = builder.get_object("label_log_in")
        self._label_gmail_labels = builder.get_object("label_gmail_labels")
        self._box_gmail_labels = builder.get_object("box_gmail_labels")
        self._button_log_in = builder.get_object("button_log_in")
        self._button_remove_gmail_label = builder.get_object(
            "button_remove_gmail_label")
        self._label_account_name = builder.get_object("label_account_name")
        self._entry_account_name = builder.get_object("entry_account_name")
        self._label_account_user = builder.get_object("label_account_user")
        self._entry_account_user = builder.get_object("entry_account_user")
        self._label_account_password = builder.get_object(
            "label_account_password")
        self._entry_account_password = builder.get_object(
            "entry_account_password")
        self._label_account_server = builder.get_object("label_account_server")
        self._entry_account_server = builder.get_object("entry_account_server")
        self._label_account_port = builder.get_object("label_account_port")
        self._entry_account_port = builder.get_object("entry_account_port")
        self._label_account_file_path = builder.get_object(
            "label_account_file_path")
        self._chooser_account_file_path = builder.get_object(
            "chooser_file_path")
        self._label_account_directory_path = builder.get_object(
            "label_account_directory_path")
        self._chooser_account_directory_path = builder.get_object(
            "chooser_directory_path")
        self._expander_folders = builder.get_object("expander_folders")
        self._overlay = builder.get_object("overlay")
        self._treeview_folders = builder.get_object("treeview_folders")
        self._liststore_folders = builder.get_object("liststore_folders")
        self._liststore_gmail_labels = builder.get_object(
            "liststore_gmail_labels")
        self._treeview_gmail_labels = builder.get_object(
            "treeview_gmail_labels")
        self._chk_account_push = builder.get_object("chk_account_push")
        self._chk_account_ssl = builder.get_object("chk_account_ssl")

        self._button_ok = self._window.get_widget_for_response(
            Gtk.ResponseType.OK)
        self._button_ok.set_sensitive(False)

        self._error_label = None
        self._folders_received = False
        self._selected_folder_count = 0
        self._pwd_info_icon = self._entry_account_password.get_icon_name(
            Gtk.EntryIconPosition.SECONDARY)

        self._entry_account_port.set_placeholder_text(_("optional"))

        renderer_folders_enabled = Gtk.CellRendererToggle()
        renderer_folders_enabled.connect("toggled", self._on_folder_toggled)
        column_folders_enabled = Gtk.TreeViewColumn(_('Enabled'),
                                                    renderer_folders_enabled)
        column_folders_enabled.add_attribute(renderer_folders_enabled,
                                             "active", 0)
        column_folders_enabled.set_alignment(0.5)
        self._treeview_folders.append_column(column_folders_enabled)

        renderer_folders_name = Gtk.CellRendererText()
        column_folders_name = Gtk.TreeViewColumn(_('Name'),
                                                 renderer_folders_name,
                                                 text=1)
        column_folders_name.set_cell_data_func(renderer_folders_name,
                                               folder_cell_data)
        self._treeview_folders.append_column(column_folders_name)
        self._on_gmail_labels_selection_changed(
            self._treeview_gmail_labels.get_selection())
Esempio n. 30
0
def s1_download(argument_list):
    '''Function to download a single Sentinel-1 product from Copernicus scihub
    This function will download S1 products from ESA's apihub.
    Args:
        argument_list: a list with 4 entries (this is used to enable parallel
                      execution)
                      argument_list[0] is the product's uuid
                      argument_list[1] is the local path for the download
                      argument_list[2] is the username of Copernicus' scihub
                      argument_list[3] is the password of Copernicus' scihub

    '''
    # get out the arguments
    uuid = argument_list[0]
    filename = argument_list[1]
    uname = argument_list[2]
    pword = argument_list[3]

    # ask for username and password in case you have not defined as input
    if not uname:
        logger.debug('If you do not have a Copernicus Scihub user'
                     ' account go to: https://scihub.copernicus.eu')
        uname = input(' Your Copernicus Scihub Username:'******' Your Copernicus Scihub Password:'******'https://scihub.copernicus.eu/apihub/odata/v1/'
           'Products(\'{}\')/$value'.format(uuid))

    # get first response for file Size
    response = requests.get(url, stream=True, auth=(uname, pword))

    # check response
    if response.status_code == 401:
        raise ValueError(' ERROR: Username/Password are incorrect.')
    elif response.status_code == 404:
        logger.debug('Product %s missing from the archive, continuing.',
                     filename.split('/')[-1])
        return filename.split('/')[-1]
    elif response.status_code != 200:
        logger.debug(
            'ERROR: Something went wrong, will try again in 30 seconds.')
        response.raise_for_status()

    password_manager = urlreq.HTTPPasswordMgrWithDefaultRealm()
    password_manager.add_password(None, "https://scihub.copernicus.eu/apihub/",
                                  uname, pword)

    cookie_jar = CookieJar()
    opener = urlreq.build_opener(urlreq.HTTPBasicAuthHandler(password_manager),
                                 urlreq.HTTPCookieProcessor(cookie_jar))
    urlreq.install_opener(opener)
    # get download size
    total_length = int(response.headers.get('content-length', 0))

    # check if file is partially downloaded
    if os.path.exists(filename):
        first_byte = os.path.getsize(filename)
    else:
        first_byte = 0
    if first_byte >= total_length:
        return str('{}.downloaded'.format(filename))

    zip_test = 1
    while zip_test is not None and zip_test <= 10:
        logger.debug('INFO: Downloading scene to: {}'.format(filename))
        with TqdmUpTo(unit='B',
                      unit_scale=True,
                      miniters=1,
                      desc=url.split('/')[-1]) as t:
            filename, headers = urlreq.urlretrieve(url,
                                                   filename=filename,
                                                   reporthook=t.update_to)

        # zipFile check
        logger.debug(
            'INFO: Checking the zip archive of {} for inconsistency'.format(
                filename))
        zip_test = h.check_zipfile(filename)

        # if it did not pass the test, remove the file
        # in the while loop it will be downlaoded again
        if zip_test is not None:
            logger.debug('INFO: {} did not pass the zip test. \
                  Re-downloading the full scene.'.format(filename))
            os.remove(filename)
            first_byte = 0
        # otherwise we change the status to True
        else:
            logger.debug('INFO: {} passed the zip test.'.format(filename))
            with open(str('{}.downloaded'.format(filename)), 'w') as file:
                file.write('successfully downloaded \n')
    return str('{}.downloaded'.format(filename))
Esempio n. 31
0
    def get(self, options, url):
        cj = CookieJar()
        match = re.search(".*video/([0-9]+)", url)
        if not match:
            log.error("Can't find video file")
            sys.exit(2)

        video_id = match.group(1)
        if options.username and options.password:
            #bogus
            cc = Cookie(None, 'asdf', None, '80', '80', 'www.kanal5play.se', None, None, '/', None, False, False, 'TestCookie', None, None, None)
            cj.set_cookie(cc)
            #get session cookie
            data = get_http_data("http://www.kanal5play.se/", cookiejar=cj)
            authurl = "https://kanal5swe.appspot.com/api/user/login?callback=jQuery171029989&email=%s&password=%s&_=136250" % (options.username, options.password)
            data = get_http_data(authurl)
            match = re.search("({.*})\);", data)
            jsondata = json.loads(match.group(1))
            if jsondata["success"] == False:
                log.error(jsondata["message"])
                sys.exit(2)
            authToken = jsondata["userData"]["auth"]
            cc = Cookie(version=0, name='authToken',
                          value=authToken,
                          port=None, port_specified=False,
                          domain='www.kanal5play.se',
                          domain_specified=True,
                          domain_initial_dot=True, path='/',
                          path_specified=True, secure=False,
                          expires=None, discard=True, comment=None,
                          comment_url=None, rest={'HttpOnly': None})
            cj.set_cookie(cc)

        format = "FLASH"
        if options.hls:
            format = "IPHONE"
        url = "http://www.kanal5play.se/api/getVideo?format=%s&videoId=%s" % (format, video_id)
        data = json.loads(get_http_data(url, cookiejar=cj))
        options.live = data["isLive"]
        if data["hasSubtitle"]:
            subtitle = "http://www.kanal5play.se/api/subtitles/%s" % video_id
        if options.hls:
            url = data["streams"][0]["source"]
            baseurl = url[0:url.rfind("/")]
            if data["streams"][0]["drmProtected"]:
                log.error("We cant download drm files for this site.")
                sys.exit(2)
            download_hls(options, url, baseurl)
        else:
            steambaseurl = data["streamBaseUrl"]
            streams = {}

            for i in data["streams"]:
                stream = {}
                stream["source"] = i["source"]
                streams[int(i["bitrate"])] = stream

            test = select_quality(options, streams)

            filename = test["source"]
            match = re.search("^(.*):", filename)
            options.other = "-W %s -y %s " % ("http://www.kanal5play.se/flash/K5StandardPlayer.swf", filename)
            download_rtmp(options, steambaseurl)
        if options.subtitle:
            if options.output != "-":
                data = get_http_data(subtitle, cookiejar=cj)
                subtitle_json(options, data)
from urllib.request import Request, build_opener, HTTPCookieProcessor
from http.cookiejar import CookieJar
from urllib import parse

cookie_jar = CookieJar()
handler = HTTPCookieProcessor(cookiejar=cookie_jar)
opener = build_opener(handler)

url = 'http://www.renren.com/PLogin.do'
data = {'email': "*****@*****.**", 'password': '******'}
data = parse.urlencode(data).encode()
request = Request(url, data=data)
request.add_header('User-Agent',
                   'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36')
response = opener.open(request)
print(response.read().decode())
Esempio n. 33
0
def init_basicauth(config, config_mtime):
    """initialize urllib2 with the credentials for Basic Authentication"""
    def filterhdrs(meth, ishdr, *hdrs):
        # this is so ugly but httplib doesn't use
        # a logger object or such
        def new_method(self, *args, **kwargs):
            # check if this is a recursive call (note: we do not
            # have to care about thread safety)
            is_rec_call = getattr(self, '_orig_stdout', None) is not None
            try:
                if not is_rec_call:
                    self._orig_stdout = sys.stdout
                    sys.stdout = StringIO()
                meth(self, *args, **kwargs)
                hdr = sys.stdout.getvalue()
            finally:
                # restore original stdout
                if not is_rec_call:
                    sys.stdout = self._orig_stdout
                    del self._orig_stdout
            for i in hdrs:
                if ishdr:
                    hdr = re.sub(r'%s:[^\\r]*\\r\\n' % i, '', hdr)
                else:
                    hdr = re.sub(i, '', hdr)
            sys.stdout.write(hdr)

        new_method.__name__ = meth.__name__
        return new_method

    if config['http_debug'] and not config['http_full_debug']:
        HTTPConnection.send = filterhdrs(HTTPConnection.send, True, 'Cookie',
                                         'Authorization')
        HTTPResponse.begin = filterhdrs(HTTPResponse.begin, False,
                                        'header: Set-Cookie.*\n')

    if sys.version_info < (2, 6):
        # HTTPS proxy is not supported in old urllib2. It only leads to an error
        # or, at best, a warning.
        if 'https_proxy' in os.environ:
            del os.environ['https_proxy']
        if 'HTTPS_PROXY' in os.environ:
            del os.environ['HTTPS_PROXY']

    if config['http_debug']:
        # brute force
        def urllib2_debug_init(self, debuglevel=0):
            self._debuglevel = 1

        AbstractHTTPHandler.__init__ = urllib2_debug_init

    cookie_file = os.path.expanduser(config['cookiejar'])
    global cookiejar
    cookiejar = LWPCookieJar(cookie_file)
    try:
        cookiejar.load(ignore_discard=True)
        if int(round(config_mtime)) > int(os.stat(cookie_file).st_mtime):
            cookiejar.clear()
            cookiejar.save()
    except IOError:
        try:
            fd = os.open(cookie_file, os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
                         0o600)
            os.close(fd)
        except IOError:
            # hmm is any good reason why we should catch the IOError?
            #print 'Unable to create cookiejar file: \'%s\'. Using RAM-based cookies.' % cookie_file
            cookiejar = CookieJar()
Esempio n. 34
0
def download_tile(tile, url, pid, srtmv3, one, username, password):

    grass.debug("Download tile: %s" % tile, debug=1)
    output = tile + ".r.in.srtm.tmp." + str(pid)
    if srtmv3:
        if one:
            local_tile = str(tile) + ".SRTMGL1.hgt.zip"
        else:
            local_tile = str(tile) + ".SRTMGL3.hgt.zip"
    else:
        local_tile = str(tile) + ".hgt.zip"

    urllib2.urlcleanup()

    if srtmv3:
        remote_tile = str(url) + local_tile
        goturl = 1

        try:
            password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
            password_manager.add_password(None,
                                          "https://urs.earthdata.nasa.gov",
                                          username, password)

            cookie_jar = CookieJar()

            opener = urllib2.build_opener(
                urllib2.HTTPBasicAuthHandler(password_manager),
                # urllib2.HTTPHandler(debuglevel=1),    # Uncomment these two lines to see
                # urllib2.HTTPSHandler(debuglevel=1),   # details of the requests/responses
                urllib2.HTTPCookieProcessor(cookie_jar),
            )
            urllib2.install_opener(opener)

            request = urllib2.Request(remote_tile)
            response = urllib2.urlopen(request)

            fo = open(local_tile, "w+b")
            fo.write(response.read())
            fo.close
            time.sleep(0.5)
        except:
            goturl = 0
            pass

        return goturl

    # SRTM subdirs: Africa, Australia, Eurasia, Islands, North_America, South_America
    for srtmdir in (
            "Africa",
            "Australia",
            "Eurasia",
            "Islands",
            "North_America",
            "South_America",
    ):
        remote_tile = str(url) + str(srtmdir) + "/" + local_tile
        goturl = 1

        try:
            response = urllib2.urlopen(request)
            fo = open(local_tile, "w+b")
            fo.write(response.read())
            fo.close
            time.sleep(0.5)
            # does not work:
            # urllib.urlretrieve(remote_tile, local_tile, data = None)
        except:
            goturl = 0
            pass

        if goturl == 1:
            return 1

    return 0
Esempio n. 35
0
 def __init__(self):
     self.cookie_jar = CookieJar()
     self.opener = build_opener(HTTPCookieProcessor(self.cookie_jar))
Esempio n. 36
0
def get_cookie_jar():
    if not hasattr(get_cookie_jar, 'memo'):
        get_cookie_jar.memo = CookieJar()

    return get_cookie_jar.memo
Esempio n. 37
0
    def process_request(self, request, spider):
        cookie_jar = request.meta.setdefault('cookie_jar', CookieJar())
        cookie_jar.extract_cookies(request, request)

        request.meta['cookie_jar'] = cookie_jar
        request.meta['dont_merge_cookies'] = True
Esempio n. 38
0
class FoggyCam(object):
    """FoggyCam client class that performs capture operations."""

    nest_username = ''
    nest_password = ''

    nest_user_id = ''
    nest_access_token = ''
    nest_access_token_expiration = ''
    nest_current_user = None

    nest_session_url = 'https://home.nest.com/session'
    nest_user_url = 'https://home.nest.com/api/0.1/user/#USERID#/app_launch'
    nest_api_login_url = 'https://webapi.camera.home.nest.com/api/v1/login.login_nest'
    nest_image_url = 'https://nexusapi-us1.camera.home.nest.com/get_image?uuid=#CAMERAID#&width=#WIDTH#&cachebuster=#CBUSTER#'
    nest_verify_pin_url = 'https://home.nest.com/api/0.1/2fa/verify_pin'

    nest_user_request_payload = {
        "known_bucket_types": ["quartz"],
        "known_bucket_versions": []
    }

    nest_camera_array = []
    nest_camera_buffer_threshold = 50

    is_capturing = False
    cookie_jar = None
    merlin = None
    temp_dir_path = ''
    local_path = ''

    def __init__(self, username, password):
        self.nest_password = password
        self.nest_username = username
        self.cookie_jar = CookieJar()
        self.merlin = urllib.request.build_opener(
            urllib.request.HTTPCookieProcessor(self.cookie_jar))

        if not os.path.exists('_temp'):
            os.makedirs('_temp')

        self.local_path = os.path.dirname(os.path.abspath(__file__))
        self.temp_dir_path = os.path.join(self.local_path, '_temp')

        # It's important to try and load the cookies first to check
        # if we can avoid logging in.
        try:
            self.unpickle_cookies()

            utc_date = datetime.utcnow()
            utc_millis_str = str(int(utc_date.timestamp()) * 1000)
            self.initialize_twof_session(utc_millis_str)
        except:
            print("Failed to re-use the cookies. Re-initializing session...")
            self.initialize_session()

        self.login()
        self.initialize_user()

    def unpickle_cookies(self):
        """Get local cookies and load them into the cookie jar."""

        print("Unpickling cookies...")
        with open("cookies.bin", 'rb') as f:
            pickled_cookies = pickle.load(f)

            for pickled_cookie in pickled_cookies:
                self.cookie_jar.set_cookie(pickled_cookie)

            cookie_data = dict(
                (cookie.name, cookie.value) for cookie in self.cookie_jar)

            self.nest_access_token = cookie_data["cztoken"]

    def pickle_cookies(self):
        """Store the cookies locally to reduce auth calls."""

        print("Pickling cookies...")
        pickle.dump([c for c in self.cookie_jar], open("cookies.bin", "wb"))

    def initialize_twof_session(self, time_token):
        """Creates the first session to get the access token and cookie, with 2FA enabled."""

        print("Intializing 2FA session...")

        target_url = self.nest_session_url + "?=_" + time_token
        print(target_url)

        try:
            request = urllib.request.Request(target_url)
            request.add_header('Authorization',
                               'Basic %s' % self.nest_access_token)

            response = self.merlin.open(request)
            session_data = response.read()

            session_json = json.loads(session_data)

            self.nest_access_token = session_json['access_token']
            self.nest_access_token_expiration = session_json['expires_in']
            self.nest_user_id = session_json['userid']

            self.pickle_cookies()
        except urllib.request.HTTPError as err:
            print(err)

    def initialize_session(self):
        """Creates the first session to get the access token and cookie."""

        print('INFO: Initializing session...')

        payload = {'email': self.nest_username, 'password': self.nest_password}
        binary_data = json.dumps(payload).encode('utf-8')

        request = urllib.request.Request(self.nest_session_url, binary_data)
        request.add_header('Content-Type', 'application/json')

        try:
            response = self.merlin.open(request)
            session_data = response.read()
            session_json = json.loads(session_data)

            self.nest_access_token = session_json['access_token']
            self.nest_access_token_expiration = session_json['expires_in']
            self.nest_user_id = session_json['userid']

            print('INFO: [PARSED] Captured authentication token:')
            print(self.nest_access_token)

            print('INFO: [PARSED] Captured expiration date for token:')
            print(self.nest_access_token_expiration)

            cookie_data = dict(
                (cookie.name, cookie.value) for cookie in self.cookie_jar)
            for cookie in cookie_data:
                print(cookie)

            print('INFO: [COOKIE] Captured authentication token:')
            print(cookie_data["cztoken"])
        except urllib.request.HTTPError as err:
            if err.code == 401:
                error_message = err.read()
                unauth_content = json.loads(error_message)

                if unauth_content["status"].lower() == "verification_pending":
                    print("Pending 2FA verification!")

                    two_factor_token = unauth_content["2fa_token"]
                    phone_truncated = unauth_content["truncated_phone_number"]

                    print("Enter PIN you just received on number ending with",
                          phone_truncated)
                    pin = input()

                    payload = {"pin": pin, "2fa_token": two_factor_token}
                    binary_data = json.dumps(payload).encode('utf-8')

                    request = urllib.request.Request(self.nest_verify_pin_url,
                                                     binary_data)
                    request.add_header('Content-Type', 'application/json')

                    try:
                        response = self.merlin.open(request)
                        pin_attempt = response.read()

                        parsed_pin_attempt = json.loads(pin_attempt)
                        if parsed_pin_attempt["status"].lower(
                        ) == "id_match_positive":
                            print("2FA verification successful.")

                            utc_date = datetime.utcnow()
                            utc_millis_str = str(
                                int(utc_date.timestamp()) * 1000)

                            print("Targetting new session with timestamp: ",
                                  utc_millis_str)

                            cookie_data = dict((cookie.name, cookie.value)
                                               for cookie in self.cookie_jar)

                            print(
                                'INFO: [COOKIE] Captured authentication token:'
                            )
                            print(cookie_data["cztoken"])

                            self.nest_access_token = parsed_pin_attempt[
                                'access_token']

                            self.initialize_twof_session(utc_millis_str)
                        else:
                            print("Could not verify. Exiting...")
                            exit()

                    except:
                        traceback.print_exc()

                        print("Failed 2FA checks. Exiting...")
                        exit()

        print('INFO: Session initialization complete!')

    def login(self):
        """Performs user login to get the website_2 cookie."""

        print('INFO: Performing user login...')

        post_data = {'access_token': self.nest_access_token}
        post_data = urllib.parse.urlencode(post_data)
        binary_data = post_data.encode('utf-8')

        print("INFO: Auth post data")
        print(post_data)

        request = urllib.request.Request(self.nest_api_login_url,
                                         data=binary_data)
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')

        response = self.merlin.open(request)
        session_data = response.read()

        print(session_data)

    def initialize_user(self):
        """Gets the assets belonging to Nest user."""

        print('INFO: Initializing current user...')

        user_url = self.nest_user_url.replace('#USERID#', self.nest_user_id)

        print('INFO: Requesting user data from:')
        print(user_url)

        binary_data = json.dumps(
            self.nest_user_request_payload).encode('utf-8')

        request = urllib.request.Request(user_url, binary_data)

        request.add_header('Content-Type', 'application/json')
        request.add_header('Authorization',
                           'Basic %s' % self.nest_access_token)

        response = self.merlin.open(request)

        response_data = response.read()

        print(response_data)

        user_object = json.loads(response_data)
        for bucket in user_object['updated_buckets']:
            bucket_id = bucket['object_key']
            if bucket_id.startswith('quartz.'):
                camera_id = bucket_id.replace('quartz.', '')
                print('INFO: Detected camera configuration.')
                print(bucket)
                print('INFO: Camera UUID:')
                print(camera_id)
                self.nest_camera_array.append(camera_id)

    def capture_images(self, config=None):
        """Starts the multi-threaded image capture process."""

        print('INFO: Capturing images...')

        self.is_capturing = True

        if not os.path.exists('capture'):
            os.makedirs('capture')

        self.nest_camera_buffer_threshold = config.threshold

        for camera in self.nest_camera_array:
            camera_path = ''
            video_path = ''

            # Determine whether the entries should be copied to a custom path
            # or not.
            if not config.path:
                camera_path = os.path.join(self.local_path, 'capture', camera,
                                           'images')
                video_path = os.path.join(self.local_path, 'capture', camera,
                                          'video')
            else:
                camera_path = os.path.join(config.path, 'capture', camera,
                                           'images')
                video_path = os.path.join(config.path, 'capture', camera,
                                          'video')

            # Provision the necessary folders for images and videos.
            if not os.path.exists(camera_path):
                os.makedirs(camera_path)

            if not os.path.exists(video_path):
                os.makedirs(video_path)

            image_thread = threading.Thread(target=self.perform_capture,
                                            args=(config, camera, camera_path,
                                                  video_path))
            image_thread.daemon = True
            image_thread.start()

        while True:
            time.sleep(1)

    def perform_capture(self,
                        config=None,
                        camera=None,
                        camera_path='',
                        video_path=''):
        """Captures images and generates the video from them."""

        camera_buffer = defaultdict(list)

        while self.is_capturing:
            file_id = str(uuid.uuid4().hex)

            utc_date = datetime.utcnow()
            utc_millis_str = str(int(utc_date.timestamp()) * 1000)

            print('Applied cache buster: ', utc_millis_str)

            image_url = self.nest_image_url.replace(
                '#CAMERAID#', camera).replace('#CBUSTER#',
                                              utc_millis_str).replace(
                                                  '#WIDTH#', str(config.width))

            request = urllib.request.Request(image_url)
            request.add_header('accept',
                               'image/webp,image/apng,image/*,*/*;q=0.9')
            request.add_header('accept-encoding', 'gzip, deflate, br')
            request.add_header(
                'user-agent',
                'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'
            )
            request.add_header('referer', 'https://home.nest.com/')
            request.add_header('authority',
                               'nexusapi-us1.camera.home.nest.com')

            try:
                response = self.merlin.open(request)
                time.sleep(config.capture_delay)

                with open(camera_path + '/' + file_id + '.jpg',
                          'wb') as image_file:
                    for chunk in response:
                        image_file.write(chunk)

                # Check if we need to compile a video
                if config.produce_video:
                    camera_buffer_size = len(camera_buffer[camera])
                    print('[',
                          threading.current_thread().name,
                          '] INFO: Camera buffer size for ', camera, ': ',
                          camera_buffer_size)

                    if camera_buffer_size < self.nest_camera_buffer_threshold:
                        camera_buffer[camera].append(file_id)
                    else:
                        camera_image_folder = os.path.join(
                            self.local_path, camera_path)

                        # Build the batch of files that need to be made into a video.
                        file_declaration = ''
                        for buffer_entry in camera_buffer[camera]:
                            file_declaration = file_declaration + 'file \'' + camera_image_folder + '/' + buffer_entry + '.jpg\'\n'
                        concat_file_name = os.path.join(
                            self.temp_dir_path, camera + '.txt')

                        # Make sure that the content is decoded

                        with open(concat_file_name, 'w') as declaration_file:
                            declaration_file.write(file_declaration)

                        # Check if we have ffmpeg locally
                        use_terminal = False
                        ffmpeg_path = ''

                        if shutil.which("ffmpeg"):
                            ffmpeg_path = 'ffmpeg'
                            use_terminal = True
                        else:
                            ffmpeg_path = os.path.abspath(
                                os.path.join(os.path.dirname(__file__), '..',
                                             'tools', 'ffmpeg'))

                        if use_terminal or (os.path.isfile(ffmpeg_path)
                                            and use_terminal is False):
                            print('INFO: Found ffmpeg. Processing video!')
                            target_video_path = os.path.join(
                                video_path, file_id + '.mp4')
                            process = Popen([
                                ffmpeg_path, '-r',
                                str(config.frame_rate), '-f', 'concat',
                                '-safe', '0', '-i', concat_file_name,
                                '-vcodec', 'libx264', '-crf', '25', '-pix_fmt',
                                'yuv420p', target_video_path
                            ],
                                            stdout=PIPE,
                                            stderr=PIPE)
                            process.communicate()
                            os.remove(concat_file_name)
                            print('INFO: Video processing is complete!')

                            # Upload the video
                            storage_provider = AzureStorageProvider()

                            if bool(config.upload_to_azure):
                                print('INFO: Uploading to Azure Storage...')
                                target_blob = 'foggycam/' + camera + '/' + file_id + '.mp4'
                                storage_provider.upload_video(
                                    account_name=config.az_account_name,
                                    sas_token=config.az_sas_token,
                                    container='foggycam',
                                    blob=target_blob,
                                    path=target_video_path)
                                print('INFO: Upload complete.')

                            # If the user specified the need to remove images post-processing
                            # then clear the image folder from images in the buffer.
                            if config.clear_images:
                                for buffer_entry in camera_buffer[camera]:
                                    deletion_target = os.path.join(
                                        camera_path, buffer_entry + '.jpg')
                                    print('INFO: Deleting ' + deletion_target)
                                    os.remove(deletion_target)
                        else:
                            print(
                                'WARNING: No ffmpeg detected. Make sure the binary is in /tools.'
                            )

                        # Empty buffer, since we no longer need the file records that we're planning
                        # to compile in a video.
                        camera_buffer[camera] = []
            except urllib.request.HTTPError as err:
                if err.code == 403:
                    self.initialize_session()
                    self.login()
                    self.initialize_user()
            except Exception:
                print('ERROR: Could not download image from URL:')
                print(image_url)

                traceback.print_exc()
Esempio n. 39
0
# CookieJar有一些子类,分别是FileCookieJar,LWPCookieJar,MoziliaCookieJar
# CookieJar管理http生成的cookie,负责cookie的存储工作, 向http当中
# 添加指定的cookie
from http.cookiejar import CookieJar,LWPCookieJar
from urllib.request import Request,HTTPCookieProcessor,build_opener
from urllib.parse import urlencode

import ssl

ssl._create_default_https_context = ssl._create_unverified_context

# ----------------------------获取cookie
# 生成一个管理cookie的对象
cookie_obj = CookieJar()
# 创建一个支持cookie的对象,对象属于HHTTPCookieProcessor
cookie_handler = HTTPCookieProcessor(cookie_obj) #quote(url,safe=string.printable)
# build_opene内部的实现就是urlopen
# urlopen 只能进行简单的请求,不支持在线验证,cookie,代理等复杂操作
opener = build_opener(cookie_handler)

response = opener.open('http://www.neihanshequ.com')
print(response)

for cookie in cookie_obj:
    print('key:',cookie.name)
    print('value:',cookie.value)
cookie_obj
# ----------------------------------保存cookie
filename = 'neihan.txt'
# 设置cookie保存的文件
cookie_obj = LWPCookieJar(filename=filename)
Esempio n. 40
0
class HttpTransport(Transport):
    """
    HTTP transport using urllib2.  Provided basic http transport
    that provides for cookies, proxies but no authentication.
    """

    def __init__(self, **kwargs):
        """
        @param kwargs: Keyword arguments.
            - B{proxy} - An http proxy to be specified on requests.
                 The proxy is defined as {protocol:proxy,}
                    - type: I{dict}
                    - default: {}
            - B{timeout} - Set the url open timeout (seconds).
                    - type: I{float}
                    - default: 90
        """
        Transport.__init__(self)
        Unskin(self.options).update(kwargs)
        self.cookiejar = CookieJar()
        self.proxy = {}
        self.urlopener = None

    def open(self, request):
        try:
            url = request.url
            headers = request.headers
            log.debug('opening (%s)', url)
            u2request = u2.Request(url, None, headers)
            self.proxy = self.options.proxy
            return self.u2open(u2request)
        except HTTPError as e:
            raise TransportError(str(e), e.code, e.fp)

    def send(self, request):
        result = None
        url = request.url
        msg = request.message
        headers = request.headers
        try:
            u2request = u2.Request(url, msg, headers)
            self.addcookies(u2request)
            self.proxy = self.options.proxy
            request.headers.update(u2request.headers)
            log.debug('sending:\n%s', request)
            fp = self.u2open(u2request)
            self.getcookies(fp, u2request)
            result = Reply(200, fp.headers, fp.read())
            log.debug('received:\n%s', result)
        except HTTPError as e:
            if e.code in (202, 204):
                result = None
            else:
                raise TransportError(e.msg, e.code, e.fp)
        return result

    def addcookies(self, u2request):
        """
        Add cookies in the cookiejar to the request.
        @param u2request: A urllib2 request.
        @rtype: u2request: urllib2.Requet.
        """
        self.cookiejar.add_cookie_header(u2request)

    def getcookies(self, fp, u2request):
        """
        Add cookies in the request to the cookiejar.
        @param u2request: A urllib2 request.
        @rtype: u2request: urllib2.Requet.
        """
        self.cookiejar.extract_cookies(fp, u2request)

    def u2open(self, u2request):
        """
        Open a connection.
        @param u2request: A urllib2 request.
        @type u2request: urllib2.Requet.
        @return: The opened file-like urllib2 object.
        @rtype: fp
        """
        tm = self.options.timeout
        url = self.u2opener()
        if self.u2ver() < 2.6:
            socket.setdefaulttimeout(tm)
            return url.open(u2request)
        else:
            return url.open(u2request, timeout=tm)

    def u2opener(self):
        """
        Create a urllib opener.
        @return: An opener.
        @rtype: I{OpenerDirector}
        """
        if self.urlopener is None:
            return u2.build_opener(*self.u2handlers())
        else:
            return self.urlopener

    def u2handlers(self):
        """
        Get a collection of urllib handlers.
        @return: A list of handlers to be installed in the opener.
        @rtype: [Handler,...]
        """
        handlers = []
        handlers.append(u2.ProxyHandler(self.proxy))
        return handlers

    def u2ver(self):
        """
        Get the major/minor version of the urllib2 lib.
        @return: The urllib2 version.
        @rtype: float
        """
        try:
            part = u2.__version__.split('.', 1)
            n = float('.'.join(part))
            return n
        except Exception as e:
            log.exception(e)
            return 0

    def __deepcopy__(self, memo={}):
        clone = self.__class__()
        p = Unskin(self.options)
        cp = Unskin(clone.options)
        cp.update(p)
        return clone
Esempio n. 41
0
class QuartClient:
    http_connection_class: Type[TestHTTPConnectionProtocol]
    websocket_connection_class: Type[TestWebsocketConnectionProtocol]

    http_connection_class = TestHTTPConnection
    websocket_connection_class = TestWebsocketConnection

    def __init__(self, app: "Quart", use_cookies: bool = True) -> None:
        self.app = app
        self.cookie_jar: Optional[CookieJar]
        if use_cookies:
            self.cookie_jar = CookieJar()
        else:
            self.cookie_jar = None
        self.preserve_context = False
        self.push_promises: List[Tuple[str, Headers]] = []

    async def open(
        self,
        path: str,
        *,
        method: str = "GET",
        headers: Optional[Union[dict, Headers]] = None,
        data: Optional[AnyStr] = None,
        form: Optional[dict] = None,
        query_string: Optional[dict] = None,
        json: Any = sentinel,
        scheme: str = "http",
        follow_redirects: bool = False,
        root_path: str = "",
        http_version: str = "1.1",
    ) -> Response:
        self.push_promises = []
        response = await self._make_request(path, method, headers, data, form,
                                            query_string, json, scheme,
                                            root_path, http_version)
        if follow_redirects:
            while response.status_code >= 300 and response.status_code <= 399:
                # Most browsers respond to an HTTP 302 with a GET request to the new location,
                # despite what the HTTP spec says. HTTP 303 should always be responded to with
                # a GET request.
                if response.status_code == 302 or response.status_code == 303:
                    method = "GET"
                response = await self._make_request(
                    response.location,
                    method,
                    headers,
                    data,
                    form,
                    query_string,
                    json,
                    scheme,
                    root_path,
                    http_version,
                )
        if self.preserve_context:
            _request_ctx_stack.push(self.app._preserved_context)
        return response

    def request(
        self,
        path: str,
        *,
        method: str = "GET",
        headers: Optional[Union[dict, Headers]] = None,
        query_string: Optional[dict] = None,
        scheme: str = "http",
        root_path: str = "",
        http_version: str = "1.1",
    ) -> TestHTTPConnectionProtocol:
        headers, path, query_string_bytes = make_test_headers_path_and_query_string(
            self.app, path, headers, query_string)
        scope = self._build_scope("http", path, method, headers,
                                  query_string_bytes, scheme, root_path,
                                  http_version)
        return self.http_connection_class(
            self.app, scope, _preserve_context=self.preserve_context)

    def websocket(
        self,
        path: str,
        *,
        headers: Optional[Union[dict, Headers]] = None,
        query_string: Optional[dict] = None,
        scheme: str = "ws",
        subprotocols: Optional[List[str]] = None,
        root_path: str = "",
        http_version: str = "1.1",
    ) -> TestWebsocketConnectionProtocol:
        headers, path, query_string_bytes = make_test_headers_path_and_query_string(
            self.app, path, headers, query_string)
        scope = self._build_scope("websocket", path, "GET", headers,
                                  query_string_bytes, scheme, root_path,
                                  http_version)
        return self.websocket_connection_class(self.app, scope)

    async def delete(self, *args: Any, **kwargs: Any) -> Response:
        """Make a DELETE request.

        See :meth:`~quart.testing.QuartClient.open` for argument
        details.
        """
        return await self.open(*args, method="DELETE", **kwargs)

    async def get(self, *args: Any, **kwargs: Any) -> Response:
        """Make a GET request.

        See :meth:`~quart.testing.QuartClient.open` for argument
        details.
        """
        return await self.open(*args, method="GET", **kwargs)

    async def head(self, *args: Any, **kwargs: Any) -> Response:
        """Make a HEAD request.

        See :meth:`~quart.testing.QuartClient.open` for argument
        details.
        """
        return await self.open(*args, method="HEAD", **kwargs)

    async def options(self, *args: Any, **kwargs: Any) -> Response:
        """Make a OPTIONS request.

        See :meth:`~quart.testing.QuartClient.open` for argument
        details.
        """
        return await self.open(*args, method="OPTIONS", **kwargs)

    async def patch(self, *args: Any, **kwargs: Any) -> Response:
        """Make a PATCH request.

        See :meth:`~quart.testing.QuartClient.open` for argument
        details.
        """
        return await self.open(*args, method="PATCH", **kwargs)

    async def post(self, *args: Any, **kwargs: Any) -> Response:
        """Make a POST request.

        See :meth:`~quart.testing.QuartClient.open` for argument
        details.
        """
        return await self.open(*args, method="POST", **kwargs)

    async def put(self, *args: Any, **kwargs: Any) -> Response:
        """Make a PUT request.

        See :meth:`~quart.testing.QuartClient.open` for argument
        details.
        """
        return await self.open(*args, method="PUT", **kwargs)

    async def trace(self, *args: Any, **kwargs: Any) -> Response:
        """Make a TRACE request.

        See :meth:`~quart.testing.QuartClient.open` for argument
        details.
        """
        return await self.open(*args, method="TRACE", **kwargs)

    def set_cookie(
        self,
        server_name: str,
        key: str,
        value: str = "",
        max_age: Optional[Union[int, timedelta]] = None,
        expires: Optional[Union[int, float, datetime]] = None,
        path: str = "/",
        domain: Optional[str] = None,
        secure: bool = False,
        httponly: bool = False,
        samesite: str = None,
        charset: str = "utf-8",
    ) -> None:
        """Set a cookie in the cookie jar.

        The arguments are the standard cookie morsels and this is a
        wrapper around the stdlib SimpleCookie code.
        """
        cookie = dump_cookie(  # type: ignore
            key,
            value=value,
            max_age=max_age,
            expires=expires,
            path=path,
            domain=domain,
            secure=secure,
            httponly=httponly,
            charset=charset,
            samesite=samesite,
        )
        self.cookie_jar.extract_cookies(
            _TestCookieJarResponse(Headers([("set-cookie", cookie)
                                            ])),  # type: ignore
            U2Request(f"http://{server_name}{path}"),
        )

    def delete_cookie(self,
                      server_name: str,
                      key: str,
                      path: str = "/",
                      domain: Optional[str] = None) -> None:
        """Delete a cookie (set to expire immediately)."""
        self.set_cookie(server_name,
                        key,
                        expires=0,
                        max_age=0,
                        path=path,
                        domain=domain)

    @asynccontextmanager
    async def session_transaction(
        self,
        path: str = "/",
        *,
        method: str = "GET",
        headers: Optional[Union[dict, Headers]] = None,
        query_string: Optional[dict] = None,
        scheme: str = "http",
        data: Optional[AnyStr] = None,
        form: Optional[dict] = None,
        json: Any = sentinel,
        root_path: str = "",
        http_version: str = "1.1",
    ) -> AsyncGenerator[Session, None]:
        if self.cookie_jar is None:
            raise RuntimeError(
                "Session transactions only make sense with cookies enabled.")

        if headers is None:
            headers = Headers()
        elif isinstance(headers, Headers):
            headers = headers
        elif headers is not None:
            headers = Headers(headers)
        for cookie in self.cookie_jar:
            headers.add("cookie", f"{cookie.name}={cookie.value}")

        original_request_ctx = _request_ctx_stack.top
        async with self.app.test_request_context(
                path,
                method=method,
                headers=headers,
                query_string=query_string,
                scheme=scheme,
                data=data,
                form=form,
                json=json,
                root_path=root_path,
                http_version=http_version,
        ) as ctx:
            session_interface = self.app.session_interface
            session = await session_interface.open_session(
                self.app, ctx.request)
            if session is None:
                raise RuntimeError(
                    "Error opening the sesion. Check the secret_key?")

            _request_ctx_stack.push(original_request_ctx)
            try:
                yield session
            finally:
                _request_ctx_stack.pop()

            response = self.app.response_class(b"")
            if not session_interface.is_null_session(session):
                await session_interface.save_session(self.app, session,
                                                     response)
            self.cookie_jar.extract_cookies(
                _TestCookieJarResponse(response.headers),  # type: ignore
                U2Request(ctx.request.url),
            )

    async def __aenter__(self) -> "QuartClient":
        if self.preserve_context:
            raise RuntimeError("Cannot nest client invocations")
        self.preserve_context = True
        return self

    async def __aexit__(self, exc_type: type, exc_value: BaseException,
                        tb: TracebackType) -> None:
        self.preserve_context = False

        while True:
            top = _request_ctx_stack.top

            if top is not None and top.preserved:
                await top.pop(None)
            else:
                break

    async def _make_request(
        self,
        path: str,
        method: str,
        headers: Optional[Union[dict, Headers]],
        data: Optional[AnyStr],
        form: Optional[dict],
        query_string: Optional[dict],
        json: Any,
        scheme: str,
        root_path: str,
        http_version: str,
    ) -> Response:
        headers, path, query_string_bytes = make_test_headers_path_and_query_string(
            self.app, path, headers, query_string)
        request_data, body_headers = make_test_body_with_headers(
            data, form, json, self.app)
        headers.update(**body_headers)  # type: ignore

        if self.cookie_jar is not None:
            for cookie in self.cookie_jar:
                headers.add("cookie", f"{cookie.name}={cookie.value}")

        scope = self._build_scope("http", path, method, headers,
                                  query_string_bytes, scheme, root_path,
                                  http_version)
        async with self.http_connection_class(
                self.app, scope,
                _preserve_context=self.preserve_context) as connection:
            await connection.send(request_data)
            await connection.send_complete()
        response = await connection.as_response()
        if self.cookie_jar is not None:
            self.cookie_jar.extract_cookies(
                _TestCookieJarResponse(response.headers),  # type: ignore
                U2Request(f"{scheme}://{headers['host']}{path}"),
            )
        self.push_promises.extend(connection.push_promises)
        return response

    @overload
    def _build_scope(
        self,
        type_: Literal["http"],
        path: str,
        method: str,
        headers: Headers,
        query_string: bytes,
        scheme: str,
        root_path: str,
        http_version: str,
    ) -> HTTPScope:
        ...

    @overload
    def _build_scope(
        self,
        type_: Literal["websocket"],
        path: str,
        method: str,
        headers: Headers,
        query_string: bytes,
        scheme: str,
        root_path: str,
        http_version: str,
    ) -> WebsocketScope:
        ...

    def _build_scope(
        self,
        type_: str,
        path: str,
        method: str,
        headers: Headers,
        query_string: bytes,
        scheme: str,
        root_path: str,
        http_version: str,
    ) -> Scope:
        scope = {
            "type": type_,
            "http_version": http_version,
            "asgi": {
                "spec_version": "2.1"
            },
            "method": method,
            "scheme": scheme,
            "path": path,
            "raw_path": path.encode("ascii"),
            "query_string": query_string,
            "root_path": root_path,
            "headers": encode_headers(headers),
            "extensions": {},
            "_quart._preserve_context": self.preserve_context,
        }
        if type_ == "http" and http_version in {"2", "3"}:
            scope["extensions"] = {"http.response.push": {}}
        elif type_ == "websocket":
            scope["extensions"] = {"websocket.http.response": {}}
        return cast(Scope, scope)
Esempio n. 42
0
class QuartClient:
    """A Client bound to an app for testing.

    This should be used to make requests and receive responses from
    the app for testing purposes. This is best used via
    :attr:`~quart.app.Quart.test_client` method.
    """

    def __init__(self, app: "Quart", use_cookies: bool = True) -> None:
        self.cookie_jar: Optional[CookieJar]
        if use_cookies:
            self.cookie_jar = CookieJar()
        else:
            self.cookie_jar = None
        self.app = app
        self.push_promises: List[Tuple[str, Headers]] = []

    async def open(
        self,
        path: str,
        *,
        method: str = "GET",
        headers: Optional[Union[dict, Headers]] = None,
        data: Optional[AnyStr] = None,
        form: Optional[dict] = None,
        query_string: Optional[dict] = None,
        json: Any = sentinel,
        scheme: str = "http",
        follow_redirects: bool = False,
        root_path: str = "",
        http_version: str = "1.1",
    ) -> Response:
        """Open a request to the app associated with this client.

        Arguments:
            path:
                The path to request. If the query_string argument is not
                defined this argument will be partitioned on a '?' with the
                following part being considered the query_string.
            method:
                The method to make the request with, defaults to 'GET'.
            headers:
                Headers to include in the request.
            data:
                Raw data to send in the request body.
            form:
                Data to send form encoded in the request body.
            query_string:
                To send as a dictionary, alternatively the query_string can be
                determined from the path.
            json:
                Data to send json encoded in the request body.
            scheme:
                The scheme to use in the request, default http.
            follow_redirects:
                Whether or not a redirect response should be followed, defaults
                to False.

        Returns:
            The response from the app handling the request.
        """
        response = await self._make_request(
            path, method, headers, data, form, query_string, json, scheme, root_path, http_version
        )
        if follow_redirects:
            while response.status_code >= 300 and response.status_code <= 399:
                # Most browsers respond to an HTTP 302 with a GET request to the new location,
                # despite what the HTTP spec says. HTTP 303 should always be responded to with
                # a GET request.
                if response.status_code == 302 or response.status_code == 303:
                    method = "GET"
                response = await self._make_request(
                    response.location,
                    method,
                    headers,
                    data,
                    form,
                    query_string,
                    json,
                    scheme,
                    root_path,
                    http_version,
                )
        return response

    async def _make_request(
        self,
        path: str,
        method: str = "GET",
        headers: Optional[Union[dict, Headers]] = None,
        data: Optional[AnyStr] = None,
        form: Optional[dict] = None,
        query_string: Optional[dict] = None,
        json: Any = sentinel,
        scheme: str = "http",
        root_path: str = "",
        http_version: str = "1.1",
    ) -> Response:
        headers, path, query_string_bytes = make_test_headers_path_and_query_string(
            self.app, path, headers, query_string
        )

        request_data, body_headers = make_test_body_with_headers(data, form, json, self.app)
        # Replace with headers.update(**body_headers) when Werkzeug
        # supports https://github.com/pallets/werkzeug/pull/1687
        for key, value in body_headers.items():
            headers[key] = value

        if self.cookie_jar is not None:
            for cookie in self.cookie_jar:
                headers.add("cookie", f"{cookie.name}={cookie.value}")

        request = self.app.request_class(
            method,
            scheme,
            path,
            query_string_bytes,
            headers,
            root_path,
            http_version,
            send_push_promise=self._send_push_promise,
        )
        request.body.set_result(request_data)
        response = await self._handle_request(request)
        if self.cookie_jar is not None:
            self.cookie_jar.extract_cookies(
                _TestCookieJarResponse(response.headers),  # type: ignore
                U2Request(request.url),
            )
        return response

    async def _handle_request(self, request: Request) -> Response:
        return await asyncio.ensure_future(self.app.handle_request(request))

    async def _send_push_promise(self, path: str, headers: Headers) -> None:
        self.push_promises.append((path, headers))

    async def delete(self, *args: Any, **kwargs: Any) -> Response:
        """Make a DELETE request.

        See :meth:`~quart.testing.QuartClient.open` for argument
        details.
        """
        return await self.open(*args, method="DELETE", **kwargs)

    async def get(self, *args: Any, **kwargs: Any) -> Response:
        """Make a GET request.

        See :meth:`~quart.testing.QuartClient.open` for argument
        details.
        """
        return await self.open(*args, method="GET", **kwargs)

    async def head(self, *args: Any, **kwargs: Any) -> Response:
        """Make a HEAD request.

        See :meth:`~quart.testing.QuartClient.open` for argument
        details.
        """
        return await self.open(*args, method="HEAD", **kwargs)

    async def options(self, *args: Any, **kwargs: Any) -> Response:
        """Make a OPTIONS request.

        See :meth:`~quart.testing.QuartClient.open` for argument
        details.
        """
        return await self.open(*args, method="OPTIONS", **kwargs)

    async def patch(self, *args: Any, **kwargs: Any) -> Response:
        """Make a PATCH request.

        See :meth:`~quart.testing.QuartClient.open` for argument
        details.
        """
        return await self.open(*args, method="PATCH", **kwargs)

    async def post(self, *args: Any, **kwargs: Any) -> Response:
        """Make a POST request.

        See :meth:`~quart.testing.QuartClient.open` for argument
        details.
        """
        return await self.open(*args, method="POST", **kwargs)

    async def put(self, *args: Any, **kwargs: Any) -> Response:
        """Make a PUT request.

        See :meth:`~quart.testing.QuartClient.open` for argument
        details.
        """
        return await self.open(*args, method="PUT", **kwargs)

    async def trace(self, *args: Any, **kwargs: Any) -> Response:
        """Make a TRACE request.

        See :meth:`~quart.testing.QuartClient.open` for argument
        details.
        """
        return await self.open(*args, method="TRACE", **kwargs)

    def set_cookie(
        self,
        server_name: str,
        key: str,
        value: str = "",
        max_age: Optional[Union[int, timedelta]] = None,
        expires: Optional[Union[int, float, datetime]] = None,
        path: str = "/",
        domain: Optional[str] = None,
        secure: bool = False,
        httponly: bool = False,
        samesite: str = None,
        charset: str = "utf-8",
    ) -> None:
        """Set a cookie in the cookie jar.

        The arguments are the standard cookie morsels and this is a
        wrapper around the stdlib SimpleCookie code.
        """
        cookie = dump_cookie(  # type: ignore
            key,
            value=value,
            max_age=max_age,
            expires=expires,
            path=path,
            domain=domain,
            secure=secure,
            httponly=httponly,
            charset=charset,
            samesite=samesite,
        )
        self.cookie_jar.extract_cookies(
            _TestCookieJarResponse(Headers([("set-cookie", cookie)])),  # type: ignore
            U2Request(f"http://{server_name}{path}"),
        )

    def delete_cookie(
        self, server_name: str, key: str, path: str = "/", domain: Optional[str] = None
    ) -> None:
        """Delete a cookie (set to expire immediately)."""
        self.set_cookie(server_name, key, expires=0, max_age=0, path=path, domain=domain)

    @asynccontextmanager
    async def websocket(
        self,
        path: str,
        *,
        headers: Optional[Union[dict, Headers]] = None,
        query_string: Optional[dict] = None,
        scheme: str = "ws",
        subprotocols: Optional[List[str]] = None,
        root_path: str = "",
        http_version: str = "1.1",
    ) -> AsyncGenerator[_TestingWebsocket, None]:
        headers, path, query_string_bytes = make_test_headers_path_and_query_string(
            self.app, path, headers, query_string
        )
        queue: asyncio.Queue = asyncio.Queue()
        websocket_client = _TestingWebsocket(queue)

        subprotocols = subprotocols or []
        websocket = self.app.websocket_class(
            path,
            query_string_bytes,
            scheme,
            headers,
            root_path,
            http_version,
            subprotocols,
            queue.get,
            websocket_client.local_queue.put,
            websocket_client.accept,
        )
        adapter = self.app.create_url_adapter(websocket)
        try:
            adapter.match()
        except WBadRequest:
            raise BadRequest()

        websocket_client.task = asyncio.ensure_future(self.app.handle_websocket(websocket))

        try:
            yield websocket_client
        finally:
            websocket_client.task.cancel()
    def set_api_key(self, api_key):
        if api_key == 'YOUR API KEY':
            api_key = None

        self.api_key = api_key
        self.jar = CookieJar()
Esempio n. 44
0
class Cookies(MutableMapping):
    """
    HTTP Cookies, as a mutable mapping.
    """

    def __init__(self, cookies: CookieTypes = None) -> None:
        if cookies is None or isinstance(cookies, dict):
            self.jar = CookieJar()
            if isinstance(cookies, dict):
                for key, value in cookies.items():
                    self.set(key, value)
        elif isinstance(cookies, list):
            self.jar = CookieJar()
            for key, value in cookies:
                self.set(key, value)
        elif isinstance(cookies, Cookies):
            self.jar = CookieJar()
            for cookie in cookies.jar:
                self.jar.set_cookie(cookie)
        else:
            self.jar = cookies

    def extract_cookies(self, response: Response) -> None:
        """
        Loads any cookies based on the response `Set-Cookie` headers.
        """
        urlib_response = self._CookieCompatResponse(response)
        urllib_request = self._CookieCompatRequest(response.request)

        self.jar.extract_cookies(urlib_response, urllib_request)  # type: ignore

    def set_cookie_header(self, request: Request) -> None:
        """
        Sets an appropriate 'Cookie:' HTTP header on the `Request`.
        """
        urllib_request = self._CookieCompatRequest(request)
        self.jar.add_cookie_header(urllib_request)

    def set(self, name: str, value: str, domain: str = "", path: str = "/") -> None:
        """
        Set a cookie value by name. May optionally include domain and path.
        """
        kwargs = {
            "version": 0,
            "name": name,
            "value": value,
            "port": None,
            "port_specified": False,
            "domain": domain,
            "domain_specified": bool(domain),
            "domain_initial_dot": domain.startswith("."),
            "path": path,
            "path_specified": bool(path),
            "secure": False,
            "expires": None,
            "discard": True,
            "comment": None,
            "comment_url": None,
            "rest": {"HttpOnly": None},
            "rfc2109": False,
        }
        cookie = Cookie(**kwargs)  # type: ignore
        self.jar.set_cookie(cookie)

    def get(  # type: ignore
        self, name: str, default: str = None, domain: str = None, path: str = None
    ) -> typing.Optional[str]:
        """
        Get a cookie by name. May optionally include domain and path
        in order to specify exactly which cookie to retrieve.
        """
        value = None
        for cookie in self.jar:
            if cookie.name == name:
                if domain is None or cookie.domain == domain:
                    if path is None or cookie.path == path:
                        if value is not None:
                            message = f"Multiple cookies exist with name={name}"
                            raise CookieConflict(message)
                        value = cookie.value

        if value is None:
            return default
        return value

    def delete(self, name: str, domain: str = None, path: str = None) -> None:
        """
        Delete a cookie by name. May optionally include domain and path
        in order to specify exactly which cookie to delete.
        """
        if domain is not None and path is not None:
            return self.jar.clear(domain, path, name)

        remove = []
        for cookie in self.jar:
            if cookie.name == name:
                if domain is None or cookie.domain == domain:
                    if path is None or cookie.path == path:
                        remove.append(cookie)

        for cookie in remove:
            self.jar.clear(cookie.domain, cookie.path, cookie.name)

    def clear(self, domain: str = None, path: str = None) -> None:
        """
        Delete all cookies. Optionally include a domain and path in
        order to only delete a subset of all the cookies.
        """
        args = []
        if domain is not None:
            args.append(domain)
        if path is not None:
            assert domain is not None
            args.append(path)
        self.jar.clear(*args)

    def update(self, cookies: CookieTypes = None) -> None:  # type: ignore
        cookies = Cookies(cookies)
        for cookie in cookies.jar:
            self.jar.set_cookie(cookie)

    def __setitem__(self, name: str, value: str) -> None:
        return self.set(name, value)

    def __getitem__(self, name: str) -> str:
        value = self.get(name)
        if value is None:
            raise KeyError(name)
        return value

    def __delitem__(self, name: str) -> None:
        return self.delete(name)

    def __len__(self) -> int:
        return len(self.jar)

    def __iter__(self) -> typing.Iterator[str]:
        return (cookie.name for cookie in self.jar)

    def __bool__(self) -> bool:
        for _ in self.jar:
            return True
        return False

    class _CookieCompatRequest(urllib.request.Request):
        """
        Wraps a `Request` instance up in a compatibility interface suitable
        for use with `CookieJar` operations.
        """

        def __init__(self, request: Request) -> None:
            super().__init__(
                url=str(request.url),
                headers=dict(request.headers),
                method=request.method,
            )
            self.request = request

        def add_unredirected_header(self, key: str, value: str) -> None:
            super().add_unredirected_header(key, value)
            self.request.headers[key] = value

    class _CookieCompatResponse:
        """
        Wraps a `Request` instance up in a compatibility interface suitable
        for use with `CookieJar` operations.
        """

        def __init__(self, response: Response):
            self.response = response

        def info(self) -> email.message.Message:
            info = email.message.Message()
            for key, value in self.response.headers.multi_items():
                #  Note that setting `info[key]` here is an "append" operation,
                # not a "replace" operation.
                # https://docs.python.org/3/library/email.compat32-message.html#email.message.Message.__setitem__
                info[key] = value
            return info
Esempio n. 45
0
class HttpTransport(Transport):
    """
    Basic HTTP transport implemented using using urllib2, that provides for
    cookies & proxies but no authentication.

    """

    def __init__(self, **kwargs):
        """
        @param kwargs: Keyword arguments.
            - B{proxy} - An HTTP proxy to be specified on requests.
                 The proxy is defined as {protocol:proxy,}
                    - type: I{dict}
                    - default: {}
            - B{timeout} - Set the URL open timeout (seconds).
                    - type: I{float}
                    - default: 90

        """
        Transport.__init__(self)
        Unskin(self.options).update(kwargs)
        self.cookiejar = CookieJar()
        self.proxy = {}
        self.urlopener = None

    def open(self, request):
        try:
            url = self.__get_request_url_for_urllib(request)
            log.debug('opening (%s)', url)
            u2request = urllib.request.Request(url)
            self.proxy = self.options.proxy
            return self.u2open(u2request)
        except urllib.error.HTTPError as e:
            raise TransportError(str(e), e.code, e.fp)

    def send(self, request):
        url = self.__get_request_url_for_urllib(request)
        msg = request.message
        headers = request.headers
        try:
            u2request = urllib.request.Request(url, msg, headers)
            self.addcookies(u2request)
            self.proxy = self.options.proxy
            request.headers.update(u2request.headers)
            log.debug('sending:\n%s', request)
            fp = self.u2open(u2request)
            self.getcookies(fp, u2request)
            headers = fp.headers
            if sys.version_info < (3, 0):
                headers = headers.dict
            reply = Reply(http.client.OK, headers, fp.read())
            log.debug('received:\n%s', reply)
            return reply
        except urllib.error.HTTPError as e:
            if e.code not in (http.client.ACCEPTED, http.client.NO_CONTENT):
                raise TransportError(e.msg, e.code, e.fp)

    def addcookies(self, u2request):
        """
        Add cookies in the cookiejar to the request.

        @param u2request: A urllib2 request.
        @rtype: u2request: urllib2.Request.

        """
        self.cookiejar.add_cookie_header(u2request)

    def getcookies(self, fp, u2request):
        """
        Add cookies in the request to the cookiejar.

        @param u2request: A urllib2 request.
        @rtype: u2request: urllib2.Request.

        """
        self.cookiejar.extract_cookies(fp, u2request)

    def u2open(self, u2request):
        """
        Open a connection.

        @param u2request: A urllib2 request.
        @type u2request: urllib2.Request.
        @return: The opened file-like urllib2 object.
        @rtype: fp

        """
        tm = self.options.timeout
        url = self.u2opener()
        if (sys.version_info < (3, 0)) and (self.u2ver() < 2.6):
            socket.setdefaulttimeout(tm)
            return url.open(u2request)
        return url.open(u2request, timeout=tm)

    def u2opener(self):
        """
        Create a urllib opener.

        @return: An opener.
        @rtype: I{OpenerDirector}

        """
        if self.urlopener is None:
            return urllib.request.build_opener(*self.u2handlers())
        return self.urlopener

    def u2handlers(self):
        """
        Get a collection of urllib handlers.

        @return: A list of handlers to be installed in the opener.
        @rtype: [Handler,...]

        """
        return [urllib.request.ProxyHandler(self.proxy)]

    def u2ver(self):
        """
        Get the major/minor version of the urllib2 lib.

        @return: The urllib2 version.
        @rtype: float

        """
        try:
            part = urllib2.__version__.split('.', 1)
            return float('.'.join(part))
        except Exception as e:
            log.exception(e)
            return 0

    def __deepcopy__(self, memo={}):
        clone = self.__class__()
        p = Unskin(self.options)
        cp = Unskin(clone.options)
        cp.update(p)
        return clone

    @staticmethod
    def __get_request_url_for_urllib(request):
        """
        Returns the given request's URL, properly encoded for use with urllib.

        We expect that the given request object already verified that the URL
        contains ASCII characters only and stored it as a native str value.

        urllib accepts URL information as a native str value and may break
        unexpectedly if given URL information in another format.

        Python 3.x httplib.client implementation must be given a unicode string
        and not a bytes object and the given string is internally converted to
        a bytes object using an explicitly specified ASCII encoding.

        Python 2.7 httplib implementation expects the URL passed to it to not
        be a unicode string. If it is, then passing it to the underlying
        httplib Request object will cause that object to forcefully convert all
        of its data to unicode, assuming that data contains ASCII data only and
        raising a UnicodeDecodeError exception if it does not (caused by simple
        unicode + string concatenation).

        Python 2.4 httplib implementation does not really care about this as it
        does not use the internal optimization present in the Python 2.7
        implementation causing all the requested data to be converted to
        unicode.

        """
        assert isinstance(request.url, str)
        return request.url
Esempio n. 46
0
from bs4 import BeautifulSoup
import threading
import traceback
from utility.connections import ExtSSHConnection

try:  # Python 2.x
    import urllib2
    from  urllib import urlretrieve
except:  # Python 3.x
    from urllib import request as urllib2
    from urllib.request import urlretrieve

from utility import BackgroundTask
from config import user_agent, XML_decoder, request_timeout

urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor(CookieJar())))

pac_list = ("google", "xhamster.com", "t66y.com")


def PAC_list(url: str) -> bool:
    return any((
        any((
            host.find(keyword) >= 0 for keyword in pac_list
        )) for host in re.findall("^(https://.*?/|http://.*?/)",url)
    ))


class URLReadThread(threading.Thread):
    def __init__(self, URL=""):
        super(URLReadThread, self).__init__()
Esempio n. 47
0
    def _on_log_in(self, _widget):
        import gi
        gi.require_version('WebKit2', '4.0')
        from gi.repository import Gtk, WebKit2, Soup
        loop = GLib.MainLoop()
        window = Gtk.Window()
        window.set_title("Log in to GMail")
        window.set_default_size(800, 600)
        window.set_destroy_with_parent(True)
        window.set_transient_for(self._window)
        window.set_modal(True)
        scrolled_window = Gtk.ScrolledWindow()
        window.add(scrolled_window)
        webview = WebKit2.WebView()
        webview.load_uri(GMAIL_URL)
        scrolled_window.add(webview)
        window.connect("destroy", lambda _: loop.quit())
        got_to_inbox = False

        def _on_load_changed(_, load_event):
            if load_event != WebKit2.LoadEvent.FINISHED:
                return
            uri = webview.get_uri()
            nonlocal got_to_inbox
            if uri == "https://mail.google.com/mail/u/0/#inbox":
                nonlocal got_to_inbox
                got_to_inbox = True
                window.destroy()

        webview.connect("load-changed", _on_load_changed)
        window.show_all()
        loop.run()
        cookie_headers = None

        def _got_cookies(cookies, result, data):
            try:
                headers = []
                for cookie in cookies.get_cookies_finish(result):
                    headers.append(cookie.to_set_cookie_header())
                nonlocal cookie_headers
                cookie_headers = headers
            except:
                logging.warning("getting cookies failed", exc_info=True)
            loop.quit()

        webview.get_context().get_cookie_manager().get_cookies(
            GMAIL_RSS_URL, None, _got_cookies, None)
        loop.run()
        window.destroy()
        if not cookie_headers:
            logging.warning("could not set cookies from login")
            return
        if not got_to_inbox:
            logging.warning("login dialog closed before login done")
            return
        cookie_jar = CookieJar()
        cookie_jar.extract_cookies(FakeHTTPResonse(cookie_headers),
                                   urllib.request.Request(GMAIL_RSS_URL))
        self._set_gmail_credentials(generate_gmail_cookies_key(), cookie_jar)

        self._on_entry_changed(None)
Esempio n. 48
0
import unittest
from selenium import webdriver
import time
from os import walk
from urllib.request import urlopen


f = []
mypath  =  r"C:\\Users\\lanka\\Desktop\\Lab\\DARPA\\Scrape_Web\\Updated_Downloads\\Journal of Business Research"
for (dirpath, dirnames, filenames) in walk(mypath):
    f.extend(filenames)
    break
# print(f[0])
print("downloaded folder has",len(f))

cj = CookieJar()
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)' 'AppleWebKit/537.36 (KHTML, like Gecko)' 'Chrome/80.0.3987.132 Safari/537.36'}

pdf_urls = []
issue_urls_unrefined = []
issue_urls = []
start_vol = 62
end_vol = 105

for i in range(start_vol+1, end_vol+1): 
    # print(i)
    issue_urls_unrefined.append( "https://www.sciencedirect.com/journal/journal-of-business-research/vol/{0}/suppl/C".format(i) )

for ii in range(start_vol, end_vol+1): 
    # print(i)
    for j in range (1,13):
Esempio n. 49
0
 def __init__(self, base_url=None):
     base_url = get_default_workflowy_url(base_url)
     super().__init__(base_url)
     self.cookie_jar = CookieJar()
     self.opener = build_opener(HTTPCookieProcessor(self.cookie_jar))
Esempio n. 50
0
    def __init__(
        self,
        *,
        headers: dict = None,
        method: str = "GET",
        name: str,
        off_cmd: str,
        off_data: Union[Mapping, str] = None,
        on_cmd: str,
        on_data: Union[Mapping, str] = None,
        state_cmd: str = None,
        state_data: Union[Mapping, str] = None,
        state_method: str = "GET",
        state_response_off: str = None,
        state_response_on: str = None,
        password: str = None,
        port: int,
        user: str = None,
    ) -> None:
        """Initialize a SimpleHTTPPlugin instance.

        Keyword Args:
            headers: Additional headers for both `on()` and `off()`
            method: HTTP method to be used for both `on()` and `off()`
            name: Name of the device
            off_cmd: URL to be called when turning device off
            off_data: Optional POST data to turn device off
            on_cmd: URL to be called when turning device on
            on_data: Optional POST data to turn device on
            state_cmd: URL to be called to determine device state
            state_data: Optional POST data to query device state
            state_method: HTTP method to be used for `get_state()`
            state_response_off: If this string is in the response to state_cmd,
                                the device is off.
            state_response_on: If this string is in the response to state_cmd,
                               the device is on.
            password: Password for HTTP authentication (basic or digest only)
            port: Port that this device will run on
            user: Username for HTTP authentication (basic or digest only)
        """
        self.method = method
        self.state_method = state_method
        self.headers = headers or {}

        self.on_cmd = on_cmd
        self.off_cmd = off_cmd
        self.state_cmd = state_cmd

        self.on_data = self._to_bytes(on_data)
        self.off_data = self._to_bytes(off_data)
        self.state_data = self._to_bytes(state_data)

        self.state_response_on = state_response_on
        self.state_response_off = state_response_off

        if user and password:
            manager = urllib.request.HTTPPasswordMgrWithDefaultRealm()
            manager.add_password(None, (on_cmd, off_cmd), user, password)

            jar = CookieJar()
            cookie_handler = urllib.request.HTTPCookieProcessor(jar)

            basic_handler = urllib.request.HTTPBasicAuthHandler(manager)
            digest_handler = urllib.request.HTTPDigestAuthHandler(manager)

            opener = urllib.request.build_opener(basic_handler, digest_handler,
                                                 cookie_handler)
            self.urlopen = opener.open
        else:
            self.urlopen = urllib.request.urlopen

        super().__init__(name=name, port=port)
Esempio n. 51
0
def next_page_of_friends(screen_name, next_cursor_id=None):
    """
    Raises urllib.error.HTTPError if the user is private or their screen name has changed
    """
    request_url = f"https://mobile.twitter.com/{screen_name}/following"
    if next_cursor_id:
        request_url += f"?cursor={next_cursor_id}"

    cookie_jar = CookieJar()
    opener = urllib.request.build_opener(
        urllib.request.HTTPCookieProcessor(cookie_jar))
    headers = [('Host', "twitter.com"),
               ('User-Agent', "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"),
               ('Accept', "application/json, text/javascript, */*; q=0.01"),
               ('Accept-Language', "de,en-US;q=0.7,en;q=0.3"),
               ('X-Requested-With', "XMLHttpRequest"),
               ('Referer', request_url), ('Connection', "keep-alive")]
    opener.addheaders = headers
    #print(type(opener)) #> <class 'urllib.request.OpenerDirector'>

    try:
        response = opener.open(request_url)
        #print(type(response)) #> <class 'http.client.HTTPResponse'>
    except urllib.error.HTTPError as err:  # consider allowing error to bubble up and be handled at the worker level (friend_collector.py)
        if VERBOSE_SCRAPER:
            print("FRIENDS PAGE NOT FOUND:", screen_name.upper())
            breakpoint()
        return [], None

    response_body = response.read()
    #print(type(response_body)) #> bytes

    soup = BeautifulSoup(response_body.decode(), "html.parser")
    #print(type(soup)) #> <class 'bs4.BeautifulSoup'>
    #print(soup.prettify())

    #
    # <span class="count">262</span>
    #
    #friends_count = int(soup.find("span", "count").text)
    #print("FRIENDS COUNT (TOTAL / EXPECTED):", friends_count)

    #
    # <form action="/i/guest/follow/SCREEN_NAME_X" method="post">
    #   <span class="m2-auth-token">
    #     <input name="authenticity_token" type="hidden" value="..."/>
    #   </span>
    #   <span class="w-button-common w-button-follow">
    #     <input alt="..." src="https://ma.twimg.com/twitter-mobile/.../images/sprites/followplus.gif" type="image"/>
    #   </span>
    # </form>
    #
    forms = soup.find_all("form")
    substr = "/i/guest/follow/"
    friend_names = [
        f.attrs["action"].replace(substr, "") for f in forms
        if substr in f.attrs["action"]
    ]
    if VERBOSE_SCRAPER: print("FRIENDS PAGE:", len(friend_names))  #> 20

    try:
        #
        # <div class="w-button-more">
        #   <a href="/SCREEN_NAME/following?cursor=CURSOR_ID">...</a>
        # </div>
        #
        next_link = soup.find("div", "w-button-more").find("a")
        next_cursor_id = next_link.attrs["href"].split(
            "/following?cursor=")[-1]
        #print("NEXT CURSOR ID:", next_cursor_id)
    except AttributeError as err:
        # handle AttributeError: 'NoneType' object has no attribute 'find'
        # because the last page doesn't have a next page or corresponding "w-button-more"
        next_cursor_id = None

    return friend_names, next_cursor_id
Esempio n. 52
0
    def create_new(self):
        self._error = None

        if not (EMAIL and PASSWORD):
            self._error = 'Incorrect login data'
            logger.error(self._error)
            return False

        login_data = {
            "act": "users",
            "type": "login",
            "mail": EMAIL,
            "pass": PASSWORD,
            "need_captcha": "",
            "captcha": "",
            "rem": 1
        }

        url = SITE_URL + '/ajaxik.php?'
        params = parse.urlencode(login_data).encode('utf-8')

        cjar = CookieJar()
        if proxy['enable']:
            opener = request.build_opener(
                request.ProxyHandler(proxy['proxy_urls']),
                request.HTTPCookieProcessor(cjar))
            logger.debug('proxy used')
        else:
            opener = request.build_opener(request.HTTPCookieProcessor(cjar))

        try:
            response = opener.open(url, params).read().decode('utf-8')
        except Exception as e:
            self._error = 'Connection failed'
            logger.error('%s %s', self._error, e)
            return False

        result = json.loads(response)

        if 'error' in result:
            self._error = 'Incorrect login data'

        elif 'need_captcha' in result:
            self._error = 'Captcha requested'

        else:
            for cookie in cjar:
                if cookie.name == 'lf_session':
                    self.time = datetime.now()
                    self.token = cookie.value

                    logger.info('%s %s', self.token, self.time)

                    return True

            else:
                self._error = 'Token problem'

        logger.error(self._error)

        return False
Esempio n. 53
0
class HttpTransport(Transport):
    """
    HTTP transport using urllib2.  Provided basic http transport
    that provides for cookies, proxies but no authentication.
    """
    
    def __init__(self, **kwargs):
        """
        @param kwargs: Keyword arguments.
            - B{proxy} - An http proxy to be specified on requests.
                 The proxy is defined as {protocol:proxy,}
                    - type: I{dict}
                    - default: {}
            - B{timeout} - Set the url open timeout (seconds).
                    - type: I{float}
                    - default: 90
        """
        Transport.__init__(self)
        Unskin(self.options).update(kwargs)
        self.cookiejar = CookieJar()
        self.proxy = {}
        self.urlopener = None
        
    def open(self, request):
        try:
            url = request.url
            log.debug('opening (%s)', url)
            u2request = u2.Request(url)
            self.proxy = self.options.proxy
            return self.u2open(u2request)
        except HTTPError as e:
            raise TransportError(str(e), e.code, e.fp)

    def send(self, request):
        result = None
        url = request.url
        msg = request.message
        headers = request.headers
        try:
            u2request = u2.Request(url, msg, headers)
            self.addcookies(u2request)
            self.proxy = self.options.proxy
            request.headers.update(u2request.headers)
            log.debug('sending:\n%s', request)
            fp = self.u2open(u2request)
            self.getcookies(fp, u2request)
            #result = Reply(200, fp.headers.dict, fp.read())
            #print(str(fp))
            result = Reply(200, fp.headers, fp.read())
            log.debug('received:\n%s', result)
        except HTTPError as e:
            if e.code in (202,204):
                result = None
            else:
                raise TransportError(e.msg, e.code, e.fp)
        return result

    def addcookies(self, u2request):
        """
        Add cookies in the cookiejar to the request.
        @param u2request: A urllib2 request.
        @rtype: u2request: urllib2.Requet.
        """
        self.cookiejar.add_cookie_header(u2request)
        
    def getcookies(self, fp, u2request):
        """
        Add cookies in the request to the cookiejar.
        @param u2request: A urllib2 request.
        @rtype: u2request: urllib2.Requet.
        """
        self.cookiejar.extract_cookies(fp, u2request)
        
    def u2open(self, u2request):
        """
        Open a connection.
        @param u2request: A urllib2 request.
        @type u2request: urllib2.Requet.
        @return: The opened file-like urllib2 object.
        @rtype: fp
        """
        tm = self.options.timeout
        url = self.u2opener()
        if self.u2ver() < 2.6:
            socket.setdefaulttimeout(tm)
            return url.open(u2request)
        else:
            return url.open(u2request, timeout=tm)
            
    def u2opener(self):
        """
        Create a urllib opener.
        @return: An opener.
        @rtype: I{OpenerDirector}
        """
        if self.urlopener is None:
            return u2.build_opener(*self.u2handlers())
        else:
            return self.urlopener
        
    def u2handlers(self):
        """
        Get a collection of urllib handlers.
        @return: A list of handlers to be installed in the opener.
        @rtype: [Handler,...]
        """
        handlers = []
        handlers.append(u2.ProxyHandler(self.proxy))
        return handlers
            
    def u2ver(self):
        """
        Get the major/minor version of the urllib2 lib.
        @return: The urllib2 version.
        @rtype: float
        """
        try:
            part = u2.__version__.split('.', 1)
            n = float('.'.join(part))
            return n
        except Exception as e:
            log.exception(e)
            return 0
        
    def __deepcopy__(self, memo={}):
        clone = self.__class__()
        p = Unskin(self.options)
        cp = Unskin(clone.options)
        cp.update(p)
        return clone
Esempio n. 54
0
from urllib import request
from urllib import parse
# import requests
from http.cookiejar import CookieJar
# url="https://passport.csdn.net/login"
url = "https://www.csdn.net/"
# https://www.csdn.net/
# reqs=requests.get("https://www.csdn.net/")
# request.urlretrieve("https://www.lagou.com/","lagou.html")
# print(reqs.read()
# print(reqs.text)
cooks = CookieJar()
handler = request.HTTPCookieProcessor(cooks)
opener = request.build_opener(handler)

headers = {
    "cookie":
    "uuid_tt_dd=10_18967429480-1546066692684-819876; ADHOC_MEMBERSHIP_CLIENT_ID1.0=cd907735-4a72-d35a-9972-3475ee92a278; smidV2=20190314180351d753612bc456e4d739aac872786cff3400791083439258400; UN=qq_42852199; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_18967429480-1546066692684-819876!1788*1*PC_VC!5744*1*qq_42852199; dc_session_id=10_1556192991599.310188; _ga=GA1.2.843540842.1556673732; __gads=ID=218b0ee3cd9d1419:T=1582511085:S=ALNI_MaH-fKfl8vTsRc4j__N8vmMC3N-rw; firstDie=1; announcement=%257B%2522isLogin%2522%253Atrue%252C%2522announcementUrl%2522%253A%2522https%253A%252F%252Fblog.csdn.net%252Fblogdevteam%252Farticle%252Fdetails%252F103603408%2522%252C%2522announcementCount%2522%253A0%252C%2522announcementExpire%2522%253A3600000%257D; SESSION=6bf07ad7-cc4f-4afe-9ae6-2374285446b4; TY_SESSION_ID=622931f8-1957-45d2-800c-6ee78b836d39; c-toolbar-writeguide=1; c_ref=https%3A//www.baidu.com/link%3Furl%3DK6Hwb4V7yz1uxBimTrM5drW1OyxqeaZfO_kHlvvUqRRI4kgo6jGvTIkF-FXv7bdMIo_jDru7lTNNpr2SjB3OR3h_k2O-1l3x5dlPgwsn2R7%26wd%3D%26eqid%3Db910503d0001398f000000065e7d66cf; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1585274953,1585275230,1585276200,1585276630; UserName=qq_42852199; UserInfo=1a127c512a7544458fe4a25dec7dd1e9; UserToken=1a127c512a7544458fe4a25dec7dd1e9; UserNick=lufeifana; AU=2BA; BT=1585277101610; p_uid=U000000; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1585277109; dc_tos=q7tzn8",
    "User-Agent":
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36",
}
data = {
    # "username":'******',
    # "password":"******"
    'all': '18376098023',
    'pwd': '18376098023aA..'
}
# req=request.Request(url=url,data=parse.urlencode(data).encode('utf-8'),headers=headers)
req = request.Request(url=url, headers=headers)

# opener.open(req)
class pyGoogleTrendsCsvDownloader(object):
    '''
    Google Trends Downloader.

    Recommended usage:

    from pyGoogleTrendsCsvDownloader import pyGoogleTrendsCsvDownloader
    r = pyGoogleTrendsCsvDownloader(username, password)
    r.get_csv_data(cat='0-958', geo='US-ME-500')

    '''
    def __init__(self, username, password, proxy=None):
        '''
        Provide login and password to be used to connect to Google Trends
        All immutable system variables are also defined here
        '''

        # The amount of time (in secs) that the script should wait before making a request.
        # This can be used to throttle the downloading speed to avoid hitting servers too hard.
        # It is further randomized.
        self.download_delay = 2

        self.service = "trendspro"
        self.url_service = "http://www.google.com/trends/"
        self.url_download = 'https://www.google.com/trends/trendsReport?'

        self.login_params = {}
        # These headers are necessary, otherwise Google will flag the request at your account level
        self.headers = [('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36'),
                        ("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"),
                        ("Accept-Language", "en-gb,en;q=0.8"),
                        ("Accept-Encoding", "gzip,deflate,sdch"),
                        ("referer", "https://www.google.com/trends/explore"),
                        ("pragma", "no-cache"),
                        ("cache-control", "no-cache"),
                        ]
        self.url_login = '******'+self.service+'&passive=1209600&continue='+self.url_service+'&followup='+self.url_service
        self.url_authenticate = 'https://accounts.google.com/accounts/ServiceLoginAuth'

        self.proxy = proxy
        self._authenticate(username, password)

    def _authenticate(self, username, password):
        '''
        Authenticate to Google:
        1 - make a GET request to the Login webpage so we can get the login form
        2 - make a POST request with email, password and login form input values
        '''
        # Make sure we get CSV results in English
        ck1 = Cookie(version=0, name='I4SUserLocale', value='en_US', port=None, port_specified=False, domain='.google.com', domain_specified=False,domain_initial_dot=False, path='', path_specified=False, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest=None)
        # This cookie is now mandatory
        # Not sure what the value represents but too many queries from the same value
        # lead to a Quota Exceeded error.
        # random_six_char = ''.join(random.choice('0123456789abcdef') for n in xrange(6))
        ck2 = Cookie(version=0, name='PREF', value='0000', port=None, port_specified=False, domain='.google.com', domain_specified=False,domain_initial_dot=False, path='', path_specified=False, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest=None)

        self.cj = CookieJar()
        self.cj.set_cookie(ck1)
        self.cj.set_cookie(ck2)
        if not self.proxy:
            self.opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.cj))
        else:
            proxy = urllib.request.ProxyHandler({'http': self.proxy,
                                                 'https': self.proxy})
            self.opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.cj), proxy)
        self.opener.addheaders = self.headers

        # Get all of the login form input values
        find_inputs = etree.XPath("//form[@id='gaia_loginform']//input")
        resp = self.opener.open(self.url_login)
        data = self.read_gzipped_response(resp).decode()

        try:
            xmlTree = etree.fromstring(data, parser=html.HTMLParser(recover=True, remove_comments=True))
            for input in find_inputs(xmlTree):
                name = input.get('name')
                if name:
                    name = name.encode('utf8')
                    value = input.get('value', '').encode('utf8')
                    self.login_params[name] = value
        except:
            raise AuthFailedException(("Exception while parsing: %s\n" % traceback.format_exc()))

        self.login_params["Email".encode('utf8')] = username.encode('utf8')
        self.login_params["Passwd".encode('utf8')] = password.encode('utf8')

        params = urllib.parse.urlencode(self.login_params)
        auth_resp = self.opener.open(self.url_authenticate, params.encode())

        # Testing whether Authentication was a success
        # I noticed that a correct auth sets a few cookies
        if not self.is_authentication_successfull(auth_resp):
            raise AuthFailedException('Warning: Authentication failed for user %s' % username)


    def is_authentication_successfull(self, response):
        '''
            Arbitrary way of us knowing whether the authentication succeeded or not:
            we look for a SSID cookie-set header value.
            I noticed that the 4 mandatory cookies were:
              - SID
              - SSID
              - HSID
              - PREF (but does not need to be set)
        '''
        if response:
            for h in  response.headers._headers:
                if 'SSID' in h[1]:
                    return True

        return False

    def is_quota_exceeded(self, response):
        # TODO: double check that the check for the content-disposition
        # is correct
        if 'Content-Disposition' in [h[0] for h in response.headers._headers]:
            return False
        return True

    def read_gzipped_response(self, response):
        '''
            Since we are adding gzip to our http request Google can answer with gzipped data
            that needs uncompressing before handling.
            This method returns the text content of a Http response.
        '''
        if response.info().get('Content-Encoding') == 'gzip':
            f = gzip.decompress(response.read())
            content = f
        else:
            content = response.read()
        return content

    def get_csv_data(self, **kwargs):
        '''
        Download CSV reports
        '''
        time.sleep(self.download_delay)

        params = {
            'hl': 'en-us',
            'export': 1
        }
        params.update(kwargs)

        # Silly python with the urlencode method
        params = urllib.parse.urlencode(params).replace("+", "%20")
        response = self.opener.open(self.url_download + params)

        # Make sure quotas are not exceeded ;)
        if self.is_quota_exceeded(response):
           raise QuotaExceededException()

        return self.read_gzipped_response(response)
Esempio n. 56
0
# "http.cookiejar" module  is class of processing HTTP Cookie automatically.

# Create 
class http.cookiejar.CookieJar(plicy=None)


"""
Example
"""
# Ex1."The most common usage of http.cookiejar."
import http.cookiejar, urllib.request
cj = http.cookiejar.CookieJar()
opener = urllib,request.build_opener(urllib.request.HTTPCookieProcessor(cj))

# Ex2."Use Netscape, Mozilla or Lynx Cookie when open URL."
import os, http.cookiejar, urllib.request
cj = http.cookiejar.MozillaCookieJar()
cj.load(os.path.join(os.path.expanduser("~"),".netscape","cookies.txt"))
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
r = opener.open("http://example.com/")

# Ex3."Usage of DefaultCookiePolicy."
import urllib.request
from http.cookiejar import CookieJar, DefaultCookiePolicy
policy = DefaultCookiePolicy(
    rfc2965=True, strict_ns_domain=Policy.DomainStrict,
    blocked_domains=["abs.net",".ads.net"])
cj = CookieJar(policy)
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
r = opener.open("http://example.com/")
 def module_run(self, domains):
     base_url = 'https://www.bing.com/search'
     for domain in domains:
         self.heading(domain, level=0)
         base_query = 'domain:' + domain
         pattern = f'"b_algo"><h2><a href="(?:\\w*://)*(\\S+?)\\.{domain}[^"]*"'
         subs = []
         # control variables
         new = True
         page = 0
         nr = 50
         cookiejar = CookieJar()
         cookiejar.set_cookie(
             self.make_cookie('SRCHHPGUSR',
                              f"NEWWND=0&NRSLT={nr}&SRCHLANG=&AS=1",
                              '.bing.com'))
         # execute search engine queries and scrape results storing subdomains in a list
         # loop until no new subdomains are found
         while new == True:
             content = None
             query = ''
             # build query based on results of previous results
             for sub in subs:
                 query += f" -domain:{sub}.{domain}"
             full_query = base_query + query
             url = f"{base_url}?first={page*nr}&q={urllib.parse.quote_plus(full_query)}"
             # bing errors out at > 2059 characters not including the protocol
             if len(url) > 2066: url = url[:2066]
             self.verbose(f"URL: {url}")
             # send query to search engine
             resp = self.request(url, cookiejar=cookiejar)
             if resp.status_code != 200:
                 self.alert(
                     'Bing has encountered an error. Please submit an issue for debugging.'
                 )
                 break
             content = resp.text
             sites = re.findall(pattern, content)
             # create a unique list
             sites = list(set(sites))
             new = False
             # add subdomain to list if not already exists
             for site in sites:
                 if site not in subs:
                     subs.append(site)
                     new = True
                     host = f"{site}.{domain}"
                     self.insert_hosts(host)
             if not new:
                 # exit if all subdomains have been found
                 if not '>Next</a>' in content:
                     break
                 else:
                     page += 1
                     self.verbose(
                         f"No New Subdomains Found on the Current Page. Jumping to Result {(page*nr)+1}."
                     )
                     new = True
             # sleep script to avoid lock-out
             self.verbose('Sleeping to avoid lockout...')
             time.sleep(random.randint(5, 15))
Esempio n. 58
0
from base64 import b64encode
from http.cookiejar import CookieJar
from datetime import datetime, timezone, timedelta
import unittest
import unittest.mock
from functools import partial
from webtest import TestApp
from zstandard import ZstdCompressor  # type: ignore
from penguin_judge.api import app as _app, _kdf
from penguin_judge.models import (User, Environment, Contest, Problem,
                                  TestCase, Submission, JudgeResult, Token,
                                  JudgeStatus, configure, transaction)
from . import TEST_DB_URL

app = TestApp(_app, cookiejar=CookieJar())


class TestAPI(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        configure(**{'sqlalchemy.url': TEST_DB_URL}, drop_all=True)

    def setUp(self):
        from penguin_judge.main import _configure_app
        app.reset()
        _configure_app({})
        tables = (JudgeResult, Submission, TestCase, Problem, Contest,
                  Environment, Token, User)
        admin_token = bytes([i for i in range(32)])
        salt = b'penguin'
        passwd = _kdf('penguinpenguin', salt)
Esempio n. 59
0
def install_proxy_opener():
    global cookies
    cookies = CookieJar()
    proxy = ProxyHandler({})
    opener = build_opener(proxy, HTTPCookieProcessor(cookies))
    install_opener(opener)
Esempio n. 60
0
from urllib import request,parse
from http.cookiejar import CookieJar


header={
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36'
}

cookiejar = CookieJar()
handler = request.HTTPCookieProcessor(cookiejar)
opener = request.build_opener(handler)
post_url = 'https://www.pplt.net/login.php?'
post_data = parse.urlencode({
    'username':'******',
    'password':'******'
})
req = request.Request(post_url,data=post_data.encode('utf-8'))
opener.open(req)

url='https://www.pplt.net/index.php'
rq=request.Request(url,headers=header)
resp=opener.open(rq)
print(resp.read().decode('gbk'))