Exemple #1
0
def get_cookies_from_ff(db_filename):
    con = sqlite.connect(db_filename)
    con.execute("pragma journal_mode=WAL")
    cur = con.cursor()
    cur.execute(
            "select host, path, isSecure, expiry, name, value from moz_cookies"
            )
    container = []
    while True:
        try:
            row = cur.fetchone()
        except:
            continue
        if not row:
            break
        if not row[4].startswith('chkSlider'):  # FIXME: this is a dirty fix
            container.append(row)
    con.close()
    ftstr = ["FALSE", "TRUE"]
    s = StringIO()
    s.write("""\
# Netscape HTTP Cookie File
# http://www.netscape.com/newsref/std/cookie_spec.html
# This is a generated file!  Do not edit.
""")
    for item in container:
        v = "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (
            item[0], ftstr[item[0].startswith('.')], item[1],
            ftstr[item[2]], item[3], item[4], item[5])
        s.write(v)
    s.seek(0)
    cookie_jar = MozillaCookieJar()
    cookie_jar._really_load(s, '', True, True)
    return cookie_jar
Exemple #2
0
    def __init__(self, mobile, password=None, status='0',
                 cachefile='Fetion.cache', cookiesfile=''):
        '''登录状态:
        在线:400 隐身:0 忙碌:600 离开:100
        '''
        if cachefile:
            self.cache = Cache(cachefile)

        if not cookiesfile:
            cookiesfile = '%s.cookies' % mobile

        cookiejar = MozillaCookieJar(filename=cookiesfile)
        if not os.path.isfile(cookiesfile):
            open(cookiesfile, 'w').write(MozillaCookieJar.header)

        cookiejar.load(filename=cookiesfile)

        cookie_processor = HTTPCookieProcessor(cookiejar)

        self.opener = build_opener(cookie_processor,
                                   HTTPHandler)
        self.mobile, self.password = mobile, password
        if not self.alive():
            self._login()
            cookiejar.save()

        self.changestatus(status)
Exemple #3
0
 def have_cookie_login(self):
     print('Test cookies...')
     cookie = MozillaCookieJar()
     cookie.load(self.cookiesFile, ignore_discard=True, ignore_expires=True)
     self.build_opener(cookie, self.use_proxy)
     page = self.get_page_data(self.userSetUrl)
     if not search('page-setting-user', page):
         print('This cookies has been invalid.')
         remove(self.cookiesFile)
         self.have_not_cookie_login()
Exemple #4
0
class ScholarQuerier(object):

    """
    ScholarQuerier instances can conduct a search on Google Scholar
    with subsequent parsing of the resulting HTML content.  The
    articles found are collected in the articles member, a list of
    ScholarArticle instances.
    """

    # Default URLs for visiting and submitting Settings pane, as of 3/14
    GET_SETTINGS_URL = ScholarConf.SCHOLAR_SITE + '/scholar_settings?' \
        + 'sciifh=1&hl=en&as_sdt=0,5'

    SET_SETTINGS_URL = ScholarConf.SCHOLAR_SITE + '/scholar_setprefs?' \
        + 'q=' \
        + '&scisig=%(scisig)s' \
        + '&inststart=0' \
        + '&as_sdt=1,5' \
        + '&as_sdtp=' \
        + '&num=%(num)s' \
        + '&scis=%(scis)s' \
        + '%(scisf)s' \
        + '&hl=en&lang=all&instq=&inst=569367360547434339&save='

    # Older URLs:
    # ScholarConf.SCHOLAR_SITE + '/scholar?q=%s&hl=en&btnG=Search&as_sdt=2001&as_sdtp=on

    class Parser(ScholarArticleParser120726):
        def __init__(self, querier):
            ScholarArticleParser120726.__init__(self)
            self.querier = querier

        def handle_article(self, art):
            self.querier.add_article(art)

    def __init__(self):
        self.articles = []
        self.query = None
        self.cjar = MozillaCookieJar()

        # If we have a cookie file, load it:
        if ScholarConf.COOKIE_JAR_FILE and \
           os.path.exists(ScholarConf.COOKIE_JAR_FILE):
            try:
                self.cjar.load(ScholarConf.COOKIE_JAR_FILE,
                               ignore_discard=True)
                ScholarUtils.log('info', 'loaded cookies file')
            except Exception,msg:
                ScholarUtils.log('warn', 'could not load cookies file: %s' % msg)
                self.cjar = MozillaCookieJar() # Just to be safe

        self.opener = build_opener(HTTPCookieProcessor(self.cjar))
        self.settings = None # Last settings object, if any
Exemple #5
0
    def __init__(self):
        self.articles = []
        self.query = None
        self.cjar = MozillaCookieJar()

        # If we have a cookie file, load it:
        if ScholarConf.COOKIE_JAR_FILE and \
           os.path.exists(ScholarConf.COOKIE_JAR_FILE):
            try:
                self.cjar.load(ScholarConf.COOKIE_JAR_FILE,
                               ignore_discard=True)
                ScholarUtils.log('info', 'loaded cookies file')
            except Exception,msg:
                ScholarUtils.log('warn', 'could not load cookies file: %s' % msg)
                self.cjar = MozillaCookieJar() # Just to be safe
    def __init__(self):
        self.articles = []
        self.query = None
        self.cjar = MozillaCookieJar()

        # If we have a cookie file, load it:
        if ScholarConf.COOKIE_JAR_FILE and os.path.exists(ScholarConf.COOKIE_JAR_FILE):
            try:
                self.cjar.load(ScholarConf.COOKIE_JAR_FILE, ignore_discard=True)
                ScholarUtils.log("info", "loaded cookies file")
            except Exception as msg:
                ScholarUtils.log("warn", "could not load cookies file: %s" % msg)
                self.cjar = MozillaCookieJar()  # Just to be safe

        self.opener = build_opener(HTTPCookieProcessor(self.cjar))
        self.settings = None  # Last settings object, if any
Exemple #7
0
  def __init__(
    self,
    cookiejar_path=None,
    cookiejar=None,
    token=None,
    categories=None
  ):
    """
    cookiejar: a MozillaCookieJar object

    token: a user token for submitting form data

    categories: package categories
    """

    if cookiejar_path is None:
      cookiejar_path = get_default_cookiejar_path()
    self.cookiejar_path = cookiejar_path

    if cookiejar is None:
      self.cookiejar = MozillaCookieJar()
      self.load_cookies()
    else:
      self.cookiejar = cookiejar

    # TODO
    # Find way to use this with URL opener. (urlopen accepts a capath arg)
    # CA_PATH = '/etc/ssl/certs'
    self.opener = build_opener(HTTPCookieProcessor(self.cookiejar))
    self.token = token
    self.categories = categories

#     self.rpc = AUR(ttl=0, clean=False)
    self.rpc = AUR()
Exemple #8
0
class GPGAuthSessionWrapper(GPGAuthSession):
    def __init__(self, gpg, server_url, user_fingerprint, verify, **kwargs):
        # Skip GPGAuthSession.__init__
        super(GPGAuthSession, self).__init__(**kwargs)

        self.server_url = server_url.rstrip('/')
        self.auth_uri = '/auth'

        self.gpg = gpg
        self.user_specified_fingerprint = user_fingerprint
        self.verify = verify

        self._cookie_filename = os.path.join(get_workdir(),
                                             'gpgauth_session_cookies')
        self.cookies = MozillaCookieJar(self._cookie_filename)
        try:
            self.cookies.load(ignore_discard=True)
        except FileNotFoundError:
            pass
Exemple #9
0
def get_senders(mac_cnt=1, extra_cookie=None, **kwargs):
    #fnames = ["cookies/%s.cookie" % i for i in range(mac_cnt)]
    #maccookies = [MozillaCookieJar(e, policy=DefaultCookiePolicy(rfc2965=True)) for e in fnames]
    #maccookies = [MozillaCookieJar(e) for e in fnames]
    maccookies = [MozillaCookieJar() for i in range(mac_cnt)]
    if extra_cookie is not None:
        for each in maccookies:
            each.set_cookie(extra_cookie)
    async_client = client_gen(httpclient.AsyncHTTPClient(), 40)
    return [async_client(e, **kwargs) for e in maccookies]
Exemple #10
0
    def __init__(self):
        self.articles = []
        self.query = None
        self.cjar = MozillaCookieJar()

        # If we have a cookie file, load it:
        if ScholarConf.COOKIE_JAR_FILE and \
                os.path.exists(ScholarConf.COOKIE_JAR_FILE):
            try:
                self.cjar.load(ScholarConf.COOKIE_JAR_FILE,
                               ignore_discard=True)
                ScholarUtils.log('info', 'loaded cookies file')
            except Exception as msg:
                ScholarUtils.log('warn', 'could not load cookies file: %s' % msg)
                self.cjar = MozillaCookieJar()  # Just to be safe
        # proxy = ProxyHandler({'http': '111.246.31.239:8888'})
        # self.opener = build_opener(HTTPCookieProcessor(self.cjar), proxy)
        self.opener = build_opener(HTTPCookieProcessor(self.cjar))
        self.settings = None  # Last settings object, if any
Exemple #11
0
    async def async_init(self) -> None:
        if not self.session:
            s = aiohttp.ClientSession()
            self.__our_session = True
            self.session = s

            if self.cookiefile:
                s.cookies = MozillaCookieJar(self.cookiefile)
                if os.path.exists(self.cookiefile):
                    s.cookies.load()
Exemple #12
0
 def get_new_cookie(self):
     ''' Create the new coookie.
      Returns
     -------
     bool
         Whether new cookie successful.
     '''
     self.errorMsg = 'XXXX'
     # Start by prompting user to input their credentials
     requestPath = self.requestPath
     new_username = self.username
     new_password = self.password
     user_pass = base64.b64encode(
         bytes(new_username+":"+new_password, "utf-8"))
     user_pass = user_pass.decode("utf-8")
     # Authenticate against URS, grab all the cookies
     self.cookie_jar = MozillaCookieJar()
     opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                           HTTPHandler(), HTTPSHandler(**self.context))
     request = Request(requestPath,
                       headers={"Authorization": f"Basic {user_pass}"})
     # Watch out cookie rejection!
     try:
         _ = opener.open(request)
     except HTTPError as e:
         self.errorMsg = "\nError: problem obtaining a download cookie:" \
             f"{e.code}"
         if e.code == 401:
             return False
     except URLError:
         self.errorMsg = "Error: Problem communicating with URS," \
             "unable to obtain cookie. Try cookie generation later."
         return False
     # Did we get a cookie?
     if self.check_cookie_is_logged_in(self.cookie_jar):
         # COOKIE SUCCESS!
         self.cookie_jar.save(self.cookie_jar_path)
         return True
     # if we aren't successful generating the cookie, nothing will work.
     # Stop here!
     self.errorMsg = "Error: Could not generate new cookie! " \
         "Please try Username and Password again."
     return False
    def __init__(self):
        self.articles = []
        self.query = None
        self.cjar = MozillaCookieJar()

        # If we have a cookie file, load it:
        if ScholarConf.COOKIE_JAR_FILE and \
           os.path.exists(ScholarConf.COOKIE_JAR_FILE):
            try:
                self.cjar.load(ScholarConf.COOKIE_JAR_FILE,
                               ignore_discard=True)
                ScholarUtils.log('info', 'loaded cookies file')
            except Exception as msg:
                ScholarUtils.log('warn',
                                 'could not load cookies file: %s' % msg)
                self.cjar = MozillaCookieJar()  # Just to be safe

        self.opener = build_opener(HTTPCookieProcessor(self.cjar))
        self.settings = None  # Last settings object, if any
Exemple #14
0
class Session(object):
    def __init__(self, app_name, app_version, data_path, **kwargs):
        self.app_name = app_name
        self.app_version = app_version

        if not data_path or not os.path.isdir(data_path):
            raise Exception('invalid data_path: %s' % data_path)

        self.cookie_jar = MozillaCookieJar(os.path.join(data_path, default.COOKIES_FILENAME))
        try:
            self.cookie_jar.load()
        except EnvironmentError:
            pass

        self.opener = build_opener(
            HTTPRedirectHandler(),
            HTTPCookieProcessor(self.cookie_jar))

        super(Session, self).__init__(**kwargs)

    def open(self, request, default_charset=None):
        request.add_header('User-Agent', util.user_agent(self.app_name, self.app_version))

        system_string = json.dumps(util.system_info(self.app_name, self.app_version))
        request.add_header('X-Sputnik-System', system_string)

        r = self.opener.open(request)

        if hasattr(r.headers, 'get_content_charset'):  # py3
            charset = r.headers.get_content_charset() or default_charset
        elif hasattr(r.headers, 'getparam'):  # py2
            charset = r.headers.getparam('charset') or default_charset
        else:
            charset = default_charset

        if charset is None:
            return r
        return codecs.getreader(charset)(r)

    def __del__(self):
        if hasattr(self, 'cookie_jar'):
            self.cookie_jar.save()
Exemple #15
0
    def _load_credentials(self):
        """Load credentials and set up internal auth request objects."""
        wbi = httpbakery.WebBrowserInteractor(open=visit_page_with_browser)
        self._cookiejar = MozillaCookieJar(self._cookiejar_filepath)
        self._client = httpbakery.Client(cookies=self._cookiejar, interaction_methods=[wbi])

        if os.path.exists(self._cookiejar_filepath):
            logger.debug("Loading credentials from file: %r", str(self._cookiejar_filepath))
            try:
                self._cookiejar.load()
            except Exception as err:
                # alert and continue processing (without having credentials, of course, the user
                # will be asked to authenticate)
                logger.warning("Failed to read credentials: %r", err)
        else:
            logger.debug("Credentials file not found: %r", str(self._cookiejar_filepath))

        # iterates the cookiejar (which is mutable, may change later) and get the cookies
        # for comparison after hitting the endpoint
        self._old_cookies = list(self._cookiejar)
Exemple #16
0
def _get_cookie_jar(cookie_path):
    """Necessary for urllib.request."""
    cj = MozillaCookieJar()
    if not cookie_path:
        return cj
    try:
        cj.load(Path(cookie_path).absolute(), ignore_expires=True)
    except Exception as e:
        logger.error(f"Failed to load cookie file {cookie_path}: {e}. \
Defaulting to empty cookie.")
    # logger.debug(f"Cookie jar: {cj}")

    # TODO Make sure the necessary youtube cookies are there, ie. LOGIN_INFO,
    # APISID, CONSENT, HSID, NID, PREF, SID, SIDCC, SSID, VISITOR_INFO1_LIVE,
    # __Secure-3PAPISID, __Secure-3PSID, __Secure-3PSIDCC, etc.
    # otherwise we risk silently losing data!
    for cookie in cj:
        if "youtube" in cookie.domain and cookie.is_expired:
            logger.warning(f"{cookie} is expired! Might want to renew it.")
    return cj
 def get_qr_scan_status(self, oauthKey):
     data = {'oauthKey': oauthKey}
     headers = {"Content-Type": "application/x-www-form-urlencoded"}
     session = requests.Session()
     session.cookies = MozillaCookieJar(self.cookie_path)
     h = session.post(self.qr_login_info_url, headers=headers, data=data)
     status = h.json()['status']
     if status:
         return status, session
     else:
         return status, None
Exemple #18
0
def grab(url):
    cookie = MozillaCookieJar()
    cookie.load('cookies.txt', ignore_discard=True, ignore_expires=True)
    req = Request(url, headers=DEFAULT_HEADERS)
    opener = build_opener(HTTPCookieProcessor(cookie))
    response = opener.open(req, timeout=DEFAULT_TIMEOUT)
    print(response.read().decode('utf8'))
    result = opener.open('http://oa.epoint.com.cn')
    html = result.read()
    html=html.decode('utf-8')
    print(html)
    result = opener.open('http://oa.epoint.com.cn/netoffice8/ZReport/Pages/Problem/Problem_Add.aspx')
    html = result.read()
    html=html.decode('utf-8')
    resu=html
    #print(resu)
    result = opener.open('http://oa2.epoint.com.cn/EpointCommunity/EpointCommunity/Home/Home.aspx')
    html = result.read()
    html=html.decode('utf-8')
    resu=html
Exemple #19
0
 def __init__(self, vid_dir_path, cookies_file_path: str = None):
     if cookies_file_path:
         self.cookies = requests.utils.dict_from_cookiejar(MozillaCookieJar(cookies_file_path))
     else:
         self.cookies = None
     self.folder = vid_dir_path
     self.work_dir, self.id = os.path.split(os.path.realpath(vid_dir_path))
     self.part_list = os.listdir(vid_dir_path)
     self.part_sum = len(self.part_list)
     self._current_part = None
     self._current_meta = None
Exemple #20
0
def authentication(request, payload):
    url = 'https://ubuntu.com/security/releases'

    client = httpbakery.Client(cookies=MozillaCookieJar(".login"))

    if os.path.exists(client.cookies.filename):
        client.cookies.load(ignore_discard=True)

    response = client.request(request, url=url, json=payload)
    client.cookies.save(ignore_discard=True)
    print(response, response.text)
Exemple #21
0
    def __init__(self, *, baseurl=None, cookiefile=None, session=None):
        self.baseurl = baseurl
        self._session = session

        s = self.session
        if cookiefile:
            s.cookies = MozillaCookieJar(cookiefile)
            if os.path.exists(cookiefile):
                s.cookies.load()

        self._has_cookiefile = bool(cookiefile)
def get_cookies_in_cookiejar(host):
    """Export cookies and put them in a cookiejar.
    Return value: a cookiejar filled with cookies."""
    # based on http://www.guyrutenberg.com/2010/11/27/building-cookiejar-out-of-firefoxs-cookies-sqlite/
    cj = MozillaCookieJar()
    cookie_db = get_cookie_db_path(str(FIREFOX_DIR))
    conn = db.connect(cookie_db)
    cursor = conn.cursor()
    sql = "SELECT {c} FROM moz_cookies WHERE host LIKE '%{h}%'".format(
        c=CONTENTS, h=host)
    cursor.execute(sql)

    for item in cursor.fetchall():
        c = Cookie(0, item[4], item[5], None, False, item[0],
                   item[0].startswith('.'), item[0].startswith('.'), item[1],
                   False, item[2], item[3], item[3] == "", None, None, {})
        #print c
        cj.set_cookie(c)

    return cj
Exemple #23
0
def load_and_merge_cookie_jars(cookie_jar_paths):
    cookie_jar = RequestsCookieJar()
    if not cookie_jar_paths:
        return cookie_jar

    logging.debug("Attempting to load and merge the following cookie files: %s" % cookie_jar_paths)
    for f in cookie_jar_paths:
        if os.path.isfile(f):
            try:
                cookies = MozillaCookieJar(f)
                cookies.load(ignore_expires=True, ignore_discard=True)
                cookie_jar.update(cookies)
            except Exception as e:
                logging.warning("Unable to load cookie file [%s]: %s" % (f, get_typed_exception(e)))

    # Do not preserve expire values from cookies with expires=0 from the file, or requests will not use the cookie
    for cookie in iter(cookie_jar):
        if not cookie.expires:
            cookie.expires = None

    return cookie_jar
Exemple #24
0
    def login(self, pixiv_id='614634238', password='******'):
        cookie = MozillaCookieJar(self.cookiesFile)
        self.build_opener(cookie, self.use_proxy)
        print('Login...')

        page = self.get_page_data(self.loginUrl)
        pattern = compile('name="post_key"\s*value="(.*?)"')
        post_key = search(pattern, page).group(1)

        post_value = {
            'pixiv_id': pixiv_id,
            'password': password,
            'g_recaptcha_response': '',
            'post_key': post_key,
            'source': 'pc'
        }
        page = self.get_page_data(self.loginUrl, post_value)
        if search('error-msg-list', page):
            print('Login failed.')
            raise SystemExit(1)
        cookie.save(ignore_discard=True, ignore_expires=True)
Exemple #25
0
 def __init__(self, endpoint, persist_cookie, cookie_file, reauthenticate,
              login_params, authn_header=None, debug=False):
     super(SessionStore, self).__init__()
     self.session_base_url = '{0}/api/session'.format(endpoint)
     self.reauthenticate = reauthenticate
     self.persist_cookie = persist_cookie
     self.login_params = login_params
     self.authn_header = authn_header
     self._debug = debug
     if persist_cookie:
         if cookie_file is None:
             cookie_file = DEFAULT_COOKIE_FILE
         cookie_dir = os.path.dirname(cookie_file)
         self.cookies = MozillaCookieJar(cookie_file)
         # Create the $HOME/.nuvla dir if it doesn't exist
         if not os.path.isdir(cookie_dir):
             os.mkdir(cookie_dir, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
         # Load existing cookies if the cookies.txt exists
         if os.path.isfile(cookie_file):
             self.cookies.load(ignore_discard=True)
             self.cookies.clear_expired_cookies()
Exemple #26
0
class Session(Base):
    def __init__(self, data_path, **kwargs):
        if not validation.is_data_path(data_path):
            raise Exception('invalid data_path: %s' % data_path)

        self.cookie_jar = MozillaCookieJar(
            os.path.join(data_path, default.COOKIES_FILENAME))
        try:
            self.cookie_jar.load()
        except EnvironmentError:
            pass

        self.opener = build_opener(HTTPRedirectHandler(),
                                   HTTPCookieProcessor(self.cookie_jar))

        super(Session, self).__init__(**kwargs)

    def open(self, request, default_charset=None):
        request.add_header('User-Agent', self.s.user_agent())
        if self.s.name:
            request.add_header('X-Sputnik-Name', self.s.name)
        if self.s.version:
            request.add_header('X-Sputnik-Version', self.s.version)

        r = self.opener.open(request)

        if hasattr(r.headers, 'get_content_charset'):  # py3
            charset = r.headers.get_content_charset() or default_charset
        elif hasattr(r.headers, 'getparam'):  # py2
            charset = r.headers.getparam('charset') or default_charset
        else:
            charset = default_charset

        if charset is None:
            return r
        return codecs.getreader(charset)(r)

    def __del__(self):
        if hasattr(self, 'cookie_jar'):
            self.cookie_jar.save()
Exemple #27
0
    def __init__(self,
                 url,
                 session=None,
                 headers={},
                 cookies={},
                 cookies_file=None,
                 directory='.',
                 filename=None,
                 filestem=None,
                 filesuffix=None):
        super().__init__()

        self.url = url

        if session:
            self.session = session
        else:
            self.session = requests.Session()

        self.session.headers.update(HEADERS)
        self.session.headers.update(headers)

        if cookies_file:
            cookies = MozillaCookieJar(cookies_file)
            cookies.load()
            self.session.cookies = cookies
        else:
            for k, v in cookies.items():
                self.session.cookies.set(k, v)

        self.directory = Path(re.sub(r'[:|\s\*\?\\"]', '_', directory))
        if not self.directory.exists():
            self.directory.mkdir(parents=True)

        self._filename = filename
        self._filestem = filestem
        self._filesuffix = filesuffix

        self.response = self._get_response()
        self.response.raw.decode_content = True
Exemple #28
0
    def __init__(self):
        # error message
        self.error = None

        # establish connection
        self.session = build_opener()

        # add proxy handler if needed
        if config['proxy']:
            if any(config['proxies'].values()):
                self.session.add_handler(ProxyHandler(config['proxies']))
                logger.debug("Proxy is set!")
            else:
                self.error = "Proxy enabled, but not set!"

        # change user-agent
        self.session.addheaders = [('User-Agent', config['ua'])]

        # load local cookies
        mcj = MozillaCookieJar()
        try:
            mcj.load(FILE_C, ignore_discard=True)
            if 'uid' in [cookie.name for cookie in mcj]:
                # if cookie.expires < int(time.time())
                logger.info("Local cookies is loaded")
                self.session.add_handler(HTTPCookieProcessor(mcj))
            else:
                logger.info("Local cookies expired or bad")
                logger.debug(f"That we have: {[cookie for cookie in mcj]}")
                mcj.clear()
                self.login(mcj)
        except FileNotFoundError:
            self.login(mcj)
Exemple #29
0
    def urlretrieve(url: str, filename: str, context: ssl.SSLContext, reporthook=None, cookies_path=None):
        """
        original source:
        https://github.com/python/cpython/blob/
        21bee0bd71e1ad270274499f9f58194ebb52e236/Lib/urllib/request.py#L229

        Because urlopen also supports context,
        I decided to adapt the download function.
        """
        url_parsed = urlparse.urlparse(url)

        request = urllib.request.Request(url=url, headers=RequestHelper.stdHeader)
        if cookies_path is not None:
            cookie_jar = MozillaCookieJar(cookies_path)
            cookie_jar.load(ignore_discard=True, ignore_expires=True)
            cookie_jar.add_cookie_header(request)

        with contextlib.closing(urllib.request.urlopen(request, context=context)) as fp:
            headers = fp.info()

            # Just return the local path and the 'headers' for file://
            # URLs. No sense in performing a copy unless requested.
            if url_parsed.scheme == 'file' and not filename:
                return os.path.normpath(url_parsed.path), headers

            if not filename:
                raise RuntimeError('No filename specified!')

            tfp = open(filename, 'wb')

            with tfp:
                result = filename, headers

                # read overall
                read = 0

                # 4kb at once
                bs = 1024 * 8
                blocknum = 0

                # guess size
                size = int(headers.get('Content-Length', -1))

                if reporthook:
                    reporthook(blocknum, bs, size)

                while True:
                    block = fp.read(bs)
                    if not block:
                        break
                    read += len(block)
                    tfp.write(block)
                    blocknum += 1
                    if reporthook:
                        reporthook(blocknum, bs, size)

        if size >= 0 and read < size:
            raise ContentTooShortError('retrieval incomplete: got only %i out of %i bytes' % (read, size), result)

        return result
Exemple #30
0
def save_cookies_to_file(cookies: "Cookies", path: Path):
    jar = MozillaCookieJar(path)
    for i in cookies.jar:
        jar.set_cookie(i)
    if not path.is_file():
        path.parent.mkdir(parents=True, exist_ok=True)
    jar.save(ignore_discard=True)
Exemple #31
0
class Session(Base):
    def __init__(self, data_path, **kwargs):
        if not validation.is_data_path(data_path):
            raise Exception('invalid data_path: %s' % data_path)

        self.cookie_jar = MozillaCookieJar(os.path.join(data_path, default.COOKIES_FILENAME))
        try:
            self.cookie_jar.load()
        except EnvironmentError:
            pass

        self.opener = build_opener(
            HTTPRedirectHandler(),
            HTTPCookieProcessor(self.cookie_jar))

        super(Session, self).__init__(**kwargs)

    def open(self, request, default_charset=None):
        request.add_header('User-Agent', self.s.user_agent())
        if self.s.name:
            request.add_header('X-Sputnik-Name', self.s.name)
        if self.s.version:
            request.add_header('X-Sputnik-Version', self.s.version)

        r = self.opener.open(request)

        if hasattr(r.headers, 'get_content_charset'):  # py3
            charset = r.headers.get_content_charset() or default_charset
        elif hasattr(r.headers, 'getparam'):  # py2
            charset = r.headers.getparam('charset') or default_charset
        else:
            charset = default_charset

        if charset is None:
            return r
        return codecs.getreader(charset)(r)

    def __del__(self):
        if hasattr(self, 'cookie_jar'):
            self.cookie_jar.save()
Exemple #32
0
def test_authholder_credentials_save_reallysave(auth_holder):
    """Save really do save if cookies changed."""
    # create some fake cookies
    fake_cookiejar = MozillaCookieJar(auth_holder._cookiejar_filepath)
    fake_cookie = get_cookie()
    fake_cookiejar.set_cookie(fake_cookie)
    fake_cookiejar.save()

    # make auth holder to have those credentials loaded, and also load them ourselves for
    # later comparison
    auth_holder._load_credentials()
    with open(auth_holder._cookiejar_filepath, "rb") as fh:
        prv_file_content = fh.read()

    # set a different credential in the auth_holder (mimickin that the user authenticated
    # while doing the request)
    other_cookie = get_cookie(value="different")
    auth_holder._cookiejar.set_cookie(other_cookie)

    # call the tested method and ensure that file changed!
    auth_holder._save_credentials_if_changed()
    with open(auth_holder._cookiejar_filepath, "rb") as fh:
        new_file_content = fh.read()
    assert new_file_content != prv_file_content

    # call the tested method again, to verify that it was calling save on the cookiejar (and
    # not that the file changed as other side effect)
    with patch.object(auth_holder._cookiejar, "save") as mock:
        auth_holder._save_credentials_if_changed()
    assert mock.call_count == 1
Exemple #33
0
    def __init__(self,
                 mobile,
                 password=None,
                 status='0',
                 cachefile='Fetion.cache',
                 cookiesfile=''):
        '''登录状态:
        在线:400 隐身:0 忙碌:600 离开:100
        '''
        if cachefile:
            self.cache = Cache(cachefile)

        if not cookiesfile:
            cookiesfile = '%s.cookies' % mobile

        cookiejar = MozillaCookieJar(filename=cookiesfile)
        if not os.path.isfile(cookiesfile):
            open(cookiesfile, 'w').write(MozillaCookieJar.header)

        cookiejar.load(filename=cookiesfile)

        cookie_processor = HTTPCookieProcessor(cookiejar)

        self.opener = build_opener(cookie_processor, HTTPHandler)
        self.mobile, self.password = mobile, password
        if not self.alive():
            self._login()
            cookiejar.save()

        self.changestatus(status)
Exemple #34
0
def login():
    global site
    if site and site.logged_in:
        logger.info('Already logged into API site')
        return True

    api_creds = get_credentials()
    if api_creds is None:
        logger.warning('Not creating API site object, no credentials')
        return False

    cookie_path = '/tmp/cookies.txt'
    cookie_jar = MozillaCookieJar(cookie_path)
    if os.path.exists(cookie_path):
        # Load cookies from file, including session cookies (expirydate=0)
        cookie_jar.load(ignore_discard=True, ignore_expires=True)
    logger.info('Loaded %d cookies', len(cookie_jar))

    connection = requests.Session()
    connection.cookies = cookie_jar

    site = mwclient.Site('en.wikipedia.org',
                         clients_useragent=_ua,
                         pool=connection)
    if not site.logged_in:
        try:
            logger.info('Logging into API site')
            site.login(api_creds['user'], api_creds['pass'])
            logger.info('Saving cookies')
            cookie_jar.save(ignore_discard=True, ignore_expires=True)
        except mwclient.errors.LoginError:
            logger.exception('Exception logging into Wikipedia')
            return False
    return True
def test_authholder_credentials_load_file_present_ok(auth_holder):
    """Credentials are properly loaded and all internal objects setup ok."""
    # create some fake cookies
    fake_cookiejar = MozillaCookieJar(auth_holder._cookiejar_filepath)
    fake_cookie = get_cookie()
    fake_cookiejar.set_cookie(fake_cookie)
    fake_cookiejar.save()

    auth_holder._load_credentials()

    # check credentials
    loaded_cookies = list(auth_holder._cookiejar)
    assert len(loaded_cookies) == 1
    assert (
        loaded_cookies[0].value == fake_cookie.value
    )  # compare the value as no __eq__ in Cookie
    assert isinstance(auth_holder._cookiejar, MozillaCookieJar)
    assert auth_holder._old_cookies == list(auth_holder._cookiejar)

    # check other internal objects
    assert isinstance(auth_holder._client, httpbakery.Client)
    assert list(auth_holder._client.cookies)[0].value == fake_cookie.value
    (im,) = auth_holder._client._interaction_methods
    assert isinstance(im, httpbakery.WebBrowserInteractor)
    assert im._open_web_browser == visit_page_with_browser
Exemple #36
0
def get_urlopener_with_cookie(cookie):
    urllib3.disable_warnings()
    # pre-defined header
    headers = {
        'Accept-Encoding': 'gzip, deflate, br',
        'User-Agent': 'okhttp/3.10.0',
        'Connection': 'keep-alive'
    }
    # Cookie processor
    if cookie is not None:
        ck = MozillaCookieJar()
        ck.load(cookie)
        ck_list = []
        for i in ck:
            ck_list.append(i.name + '=' + i.value)
        ck_header = '; '.join(ck_list)
        headers['Cookie'] = ck_header
        hp = urllib3.PoolManager(headers=headers)
    else:
        hp = urllib3.PoolManager()

    return hp
Exemple #37
0
def authentication(request, url, payload):
    """
    Authenticate with Macaroons in order to use Webteam API
    """

    client = httpbakery.Client(cookies=MozillaCookieJar(".login"))

    if os.path.exists(client.cookies.filename):
        client.cookies.load(ignore_discard=True)

    response = client.request(request, url=url, json=payload)
    client.cookies.save(ignore_discard=True)
    print(response, response.text)
    def __init__(self,
                 base_url=DEFAULT_BASE_URL,
                 cookie_path=DEFAULT_COOKIE_PATH):
        self.base_url = base_url
        self.web = requests.session()
        self.cookie_path = cookie_path
        self.display_name_cache = {}
        if os.path.isfile(cookie_path):
            self.web.cookies = MozillaCookieJar(cookie_path)
            self.web.cookies.load()
        else:
            self.web.cookies = MozillaCookieJar()
            self.web.cookies.save(filename=cookie_path)

        self.logger = logging.getLogger("AiClothClient")
        self.logger.setLevel(logging.INFO)
        handler = RotatingFileHandler('client.log')
        formatter = logging.Formatter(
            '%(asctime)s - %(levelname)s - %(message)s')
        handler.setFormatter(formatter)
        self.logger.propagate = False
        self.logger.addHandler(handler)
Exemple #39
0
def authentication(method, url, payload):
    """
    Authenticate with Macaroons in order to use Webteam API
    """

    client = httpbakery.Client(cookies=MozillaCookieJar(os.path.expanduser("~/.ubuntu.com.login")))

    if os.path.exists(client.cookies.filename):
        client.cookies.load(ignore_discard=True)

    response = client.request(method, url=url, json=payload)
    client.cookies.save(ignore_discard=True)
    return response
Exemple #40
0
    def __init__(self, username, password, **kwargs):
        """
        init params
        :param username: your username
        :type username: str
        :param password: your password
        :type password: str
        """
        # path setup
        self._path = os.getcwd()
        self._cookies_path = os.path.join(os.getcwd(), 'cookies_data')
        self._data_path = os.path.join(os.getcwd(), 'freenom_data')
        # user setup
        self.username = username
        self.password = password
        # request setup
        self.headers = {
            'Host':
            'my.freenom.com',
            'Referer':
            'https://my.freenom.com/clientarea.php',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'
        }
        self.session = requests.session()
        self.session.headers = self.headers
        self.token = ''
        # cookies setup
        cookies = MozillaCookieJar(filename=self._cookies_path)
        if os.path.isfile(self._cookies_path):
            cookies.load(self._cookies_path,
                         ignore_discard=True,
                         ignore_expires=True)
        self.session.cookies = cookies
        # option setup --dev
        self.timeout = kwargs.get('timeout', 22)
        self.saveHtml = kwargs.get('saveHtml', False)

        self._RequireData()
Exemple #41
0
 def setCookieJar(self,cookiejar):
     """Changes the CookieJar used to manage the session.
     Existing cookies will not be transferred.
     
     Returns: the old CookieJar"""
     tmpcookies = getattr(self,"cookies",None)
     if type(cookiejar) == str:
         self.cookies = MozillaCookieJar(cookiejar)
         if os.path.exists(cookiejar):
             self.cookies.load(ignore_discard=True)
     else:
         self.cookies = cookiejar
     self.opener = build_opener(HTTPCookieProcessor(self.cookies))
     return tmpcookies
Exemple #42
0
    def get_cookie(self):
        if os.path.isfile(self.cookie_jar_path):
            self.cookie_jar = MozillaCookieJar()
            self.cookie_jar.load(self.cookie_jar_path)

            # make sure cookie is still valid
            if self.check_cookie():
                print(" > Re-using previous cookie jar.")
                return True
            else:
                print(" > Could not validate old cookie Jar")

        # We don't have a valid cookie, prompt user for creds
        print(
            "No existing URS cookie found, please enter Earthdata username & password:"******"(Credentials will not be stored, saved or logged anywhere)")

        # Keep trying 'till user gets the right U:P
        while self.check_cookie() is False:
            self.get_new_cookie()

        return True
Exemple #43
0
    def __init__(self, data_path, **kwargs):
        if not validation.is_data_path(data_path):
            raise Exception('invalid data_path: %s' % data_path)

        self.cookie_jar = MozillaCookieJar(os.path.join(data_path, default.COOKIES_FILENAME))
        try:
            self.cookie_jar.load()
        except EnvironmentError:
            pass

        self.opener = build_opener(
            HTTPRedirectHandler(),
            HTTPCookieProcessor(self.cookie_jar))

        super(Session, self).__init__(**kwargs)
Exemple #44
0
class ScholarQuerier(object):
	"""
	ScholarQuerier instances can conduct a search on Google Scholar
	with subsequent parsing of the resulting HTML content.  The
	articles found are collected in the articles member, a list of
	ScholarArticle instances.
	"""
	class Parser(ScholarArticleParser):
		def __init__(self, querier):
			ScholarArticleParser.__init__(self)
			self.querier = querier

		def handle_article(self, art):
			self.querier.add_article(art)

	def __init__(self):
		self.articles = []
		self.query = None
		self.cjar = MozillaCookieJar()

		# If we have a cookie file, load it:
		if ScholarConf.COOKIE_JAR_FILE and \
			os.path.exists(ScholarConf.COOKIE_JAR_FILE):
			try:
				self.cjar.load(ScholarConf.COOKIE_JAR_FILE, ignore_discard=True)
				ScholarUtils.log('info', 'loaded cookies file')
			except Exception as msg:
				ScholarUtils.log('warn', 'could not load cookies file: %s' % msg)
				self.cjar = MozillaCookieJar() # Just to be safe

		self.opener = build_opener(HTTPCookieProcessor(self.cjar))
		self.settings = None # Last settings object, if any


	def send_query(self, query):
		"""
		This method initiates a search query (a ScholarQuery instance)
		with subsequent parsing of the response.
		"""
		self.clear_articles()
		self.query = query

		html = self._get_http_response(url=query.get_url(),
										log_msg='dump of query response HTML',
										err_msg='results retrieval failed')
		if html is None:
			return

		#print len(html)

		self.parse(html)


	def parse(self, html):
		"""
		This method allows parsing of provided HTML content.
		"""
		parser = self.Parser(self)
		parser.parse(html)


	def add_article(self, art):
		#self.get_citation_data(art)
		self.articles.append(art)

	def clear_articles(self):
		"""Clears any existing articles stored from previous queries."""
		self.articles = []

	def _get_http_response(self, url, log_msg=None, err_msg=None):
		"""
		Helper method, sends HTTP request and returns response payload.
		"""
		if log_msg is None:
			log_msg = 'HTTP response data follow'
		if err_msg is None:
			err_msg = 'request failed'
		try:
			ScholarUtils.log('info', 'requesting %s' % unquote(url))

			req = Request(url=url, headers={'User-Agent': ScholarConf.USER_AGENT})
			hdl = self.opener.open(req)
			html = hdl.read()

			ScholarUtils.log('debug', log_msg)
			ScholarUtils.log('debug', '>>>>' + '-'*68)
			ScholarUtils.log('debug', 'url: %s' % hdl.geturl())
			ScholarUtils.log('debug', 'result: %s' % hdl.getcode())
			ScholarUtils.log('debug', 'headers:\n' + str(hdl.info()))
			ScholarUtils.log('debug', 'data:\n' + html.decode('utf-8')) # For Python 3
			ScholarUtils.log('debug', '<<<<' + '-'*68)

			return html
		except Exception as err:
			ScholarUtils.log('info', err_msg + ': %s' % err)
			return None
Exemple #45
0
class ScholarQuerier(object):

    """
    ScholarQuerier instances can conduct a search on Google Scholar
    with subsequent parsing of the resulting HTML content.  The
    articles found are collected in the articles member, a list of
    ScholarArticle instances.
    """

    # Default URLs for visiting and submitting Settings pane, as of 3/14
    GET_SETTINGS_URL = ScholarConf.SCHOLAR_SITE + '/scholar_settings?' \
        + 'sciifh=1&hl=en&as_sdt=0,5'

    SET_SETTINGS_URL = ScholarConf.SCHOLAR_SITE + '/scholar_setprefs?' \
        + 'q=' \
        + '&scisig=%(scisig)s' \
        + '&inststart=0' \
        + '&as_sdt=1,5' \
        + '&as_sdtp=' \
        + '&num=%(num)s' \
        + '&scis=%(scis)s' \
        + '%(scisf)s' \
        + '&hl=en&lang=all&instq=&inst=569367360547434339&save='

    # Older URLs:
    # ScholarConf.SCHOLAR_SITE + '/scholar?q=%s&hl=en&btnG=Search&as_sdt=2001&as_sdtp=on

    class Parser(ScholarArticleParser120726):
        def __init__(self, querier):
            ScholarArticleParser120726.__init__(self)
            self.querier = querier

        def handle_article(self, art):
            self.querier.add_article(art)

    def __init__(self):
        self.articles = []
        self.query = None
        self.cjar = MozillaCookieJar()

        # If we have a cookie file, load it:
        if ScholarConf.COOKIE_JAR_FILE and \
           os.path.exists(ScholarConf.COOKIE_JAR_FILE):
            try:
                self.cjar.load(ScholarConf.COOKIE_JAR_FILE,
                               ignore_discard=True)
                ScholarUtils.log('info', 'loaded cookies file')
            except Exception as msg:
                ScholarUtils.log('warn', 'could not load cookies file: %s' % msg)
                self.cjar = MozillaCookieJar() # Just to be safe

        self.opener = build_opener(HTTPCookieProcessor(self.cjar))
        self.settings = None # Last settings object, if any

    def apply_settings(self, settings):
        """
        Applies settings as provided by a ScholarSettings instance.
        """
        if settings is None or not settings.is_configured():
            return True

        self.settings = settings

        # This is a bit of work. We need to actually retrieve the
        # contents of the Settings pane HTML in order to extract
        # hidden fields before we can compose the query for updating
        # the settings.
        html = self._get_http_response(url=self.GET_SETTINGS_URL,
                                       log_msg='dump of settings form HTML',
                                       err_msg='requesting settings failed')
        if html is None:
            return False

        # Now parse the required stuff out of the form. We require the
        # "scisig" token to make the upload of our settings acceptable
        # to Google.
        soup = BeautifulSoup(html)

        tag = soup.find(name='form', attrs={'id': 'gs_settings_form'})
        if tag is None:
            ScholarUtils.log('info', 'parsing settings failed: no form')
            return False

        tag = tag.find('input', attrs={'type':'hidden', 'name':'scisig'})
        if tag is None:
            ScholarUtils.log('info', 'parsing settings failed: scisig')
            return False

        urlargs = {'scisig': tag['value'],
                   'num': settings.per_page_results,
                   'scis': 'no',
                   'scisf': ''}

        if settings.citform != 0:
            urlargs['scis'] = 'yes'
            urlargs['scisf'] = '&scisf=%d' % settings.citform

        html = self._get_http_response(url=self.SET_SETTINGS_URL % urlargs,
                                       log_msg='dump of settings result HTML',
                                       err_msg='applying setttings failed')
        if html is None:
            return False

        ScholarUtils.log('info', 'settings applied')
        return True

    def send_query(self, query):
        """
        This method initiates a search query (a ScholarQuery instance)
        with subsequent parsing of the response.
        """
        self.clear_articles()
        self.query = query

        html = self._get_http_response(url=query.get_url(),
                                       log_msg='dump of query response HTML',
                                       err_msg='results retrieval failed')
        if html is None:
            return

        self.parse(html)

    def get_citation_data(self, article):
        """
        Given an article, retrieves citation link. Note, this requires that
        you adjusted the settings to tell Google Scholar to actually
        provide this information, *prior* to retrieving the article.
        """
        if article['url_citation'] is None:
            return False
        if article.citation_data is not None:
            return True

        ScholarUtils.log('info', 'retrieving citation export data')
        data = self._get_http_response(url=article['url_citation'],
                                       log_msg='citation data response',
                                       err_msg='requesting citation data failed')
        if data is None:
            return False

        article.set_citation_data(data)
        return True

    def parse(self, html):
        """
        This method allows parsing of provided HTML content.
        """
        parser = self.Parser(self)
        parser.parse(html)

    def add_article(self, art):
        self.get_citation_data(art)
        self.articles.append(art)

    def clear_articles(self):
        """Clears any existing articles stored from previous queries."""
        self.articles = []

    def save_cookies(self):
        """
        This stores the latest cookies we're using to disk, for reuse in a
        later session.
        """
        if ScholarConf.COOKIE_JAR_FILE is None:
            return False
        try:
            self.cjar.save(ScholarConf.COOKIE_JAR_FILE,
                           ignore_discard=True)
            ScholarUtils.log('info', 'saved cookies file')
            return True
        except Exception as msg:
            ScholarUtils.log('warn', 'could not save cookies file: %s' % msg)
            return False

    def _get_http_response(self, url, log_msg=None, err_msg=None):
        """
        Helper method, sends HTTP request and returns response payload.
        """
        if log_msg is None:
            log_msg = 'HTTP response data follow'
        if err_msg is None:
            err_msg = 'request failed'
        try:
            ScholarUtils.log('info', 'requesting %s' % url)

            req = Request(url=url, headers={'User-Agent': ScholarConf.USER_AGENT})
            hdl = self.opener.open(req)
            html = hdl.read()

            ScholarUtils.log('debug', log_msg)
            ScholarUtils.log('debug', '>>>>' + '-'*68)
            ScholarUtils.log('debug', 'url: %s' % hdl.geturl())
            ScholarUtils.log('debug', 'result: %s' % hdl.getcode())
            ScholarUtils.log('debug', 'headers:\n' + str(hdl.info()))
            ScholarUtils.log('debug', 'data:\n' + html)
            ScholarUtils.log('debug', '<<<<' + '-'*68)

            return html
        except Exception as err:
            ScholarUtils.log('info', err_msg + ': %s' % err)
            return None
Exemple #46
0
class Aurploader(object):
  """
  A user object for interactive actions.
  """

  def __init__(
    self,
    cookiejar_path=None,
    cookiejar=None,
    token=None,
    categories=None
  ):
    """
    cookiejar: a MozillaCookieJar object

    token: a user token for submitting form data

    categories: package categories
    """

    if cookiejar_path is None:
      cookiejar_path = get_default_cookiejar_path()
    self.cookiejar_path = cookiejar_path

    if cookiejar is None:
      self.cookiejar = MozillaCookieJar()
      self.load_cookies()
    else:
      self.cookiejar = cookiejar

    # TODO
    # Find way to use this with URL opener. (urlopen accepts a capath arg)
    # CA_PATH = '/etc/ssl/certs'
    self.opener = build_opener(HTTPCookieProcessor(self.cookiejar))
    self.token = token
    self.categories = categories

#     self.rpc = AUR(ttl=0, clean=False)
    self.rpc = AUR()



  def get_info(self, pkgname):
    """
    Get package information from the RPC interface.
    """
    for pkg in self.rpc.info(pkgname):
      return pkg


  def parse_pkgsubmit(self):
    """
    Parse the pkgsubmit page.

    This will return package categories along with hidden inputs such as the
    the token. If the returned values are empty then the user is not currently
    logged in, so it doubles as a login check.
    """
    parser = pkgsubmitParser()
    with self.opener.open(PKGSUBMIT_URL) as f:
      parser.feed(f.read().decode())
    if parser.token:
      self.token = parser.token
    self.categories = parser.categories



  def login(self, user=None, passwd=None, login_file=None, remember_me=True):
    """
    Log in to the AUR.
    """
    if login_file is not None:
      user, passwd = load_login_file(login_file)

    if user is None or passwd is None:
      self.rpc.log("logging in to the AUR")

    if user is None:
      user = input('Username: '******'user', user),
      ('passwd', passwd)
    ]

    if remember_me:
      data.append(('remember_me', '1'))

    data = urlencode(data).encode('UTF-8')

    with self.opener.open(LOGIN_URL, data) as f:
      pass



  # python3-AUR could be used to cache the data, but sometimes the data must be
  # fresh, such as when confirming the upload.
  def submit_package_form(
    self, pkginfo, action,
    confirm_delete=False, merge_into=None, comment=None, category=None,
  ):
    """
    Submit a form to the AUR.
    """
    ID = pkginfo['ID']
    url = AUR_URL + '/packages/{}/'.format(pkginfo['Name'])
    # Old form actions, converted to links with AUR 2.0
    do_actions = {
  #     'do_Vote'     : 'Vote',
  #     'do_UnVote'   : 'UnVote',
  #     'do_Notify'   : 'Notify',
  #     'do_UnNotify' : 'UnNotify',
  #     'do_Flag'     : 'Flag Out-of-date',
      'do_Disown'   : 'Disown Packages',
      'do_Delete'   : 'Delete Packages',
      'do_Adopt'    : 'Adopt Packages',
    }
    if action in do_actions:
      url = AUR_URL + '/packages/'
      data = [
        ('IDs[{!s}]'.format(ID), '1'),
        ('ID', ID),
        ('token', self.token),
        (action, do_actions[action])
      ]
      if confirm_delete:
        data.append(('confirm_Delete', '1'))
      if merge_into:
        data.append(('merge_Into', merge_into))

    elif action == 'comment':
      if comment:
        data = (
          ('ID', ID),
          ('token', self.token),
          ('comment', comment)
        )
      else:
        raise AurploaderError("no comment submitted")

    elif action == 'do_ChangeCategory':
      if category:
        data = (
          ('action', 'do_ChangeCategory'),
          ('category_id', category),
          ('token', self.token)
        )
      else:
        raise AurploaderError("no category submitted for do_ChangeCategory")

    elif action == 'do_DeleteComment':
      if category:
        data = (
          ('action', 'do_DeleteComment'),
          ('comment_id', comment_id),
          ('token', self.token),
          ('submit', '1')
        )
      else:
        raise AurploaderError("no category submitted for do_ChangeCategory")


    data = urlencode(data).encode('UTF-8')
    with self.opener.open(url, data) as f:
      pass



  def do_package_action(self, pkginfo, action):
    """
    Perform one of the link-based package actions.

    Use submit_package_form() for form-based actions.
    """
    actions = PACKAGE_ACTIONS

    if action in actions:
      url = AUR_URL + '/packages/{}/{}'.format(pkginfo['Name'], action)
      with self.opener.open(url) as f:
        pass
    else:
      raise AurploaderError("unrecognized action ({})".format(action)
      )


  def prompt_categories(self, name, default_category=None):
    """
    Prompt the user to select a category for the given package.
    """
    if not self.categories:
      raise AurploaderError("no categories")
    if default_category not in self.categories:
      default_category = None
    while True:
      print('Select category for {}'.format(name))
      for n in sorted(self.categories):
        print('  {:2d}) {}'.format(n, self.categories[n]))
      print('Enter "x" to skip this package.')
      if default_category:
        category = input('Category [{}]: '.format(default_category))
      else:
        category = input('Category: ')
      if category.lower() == 'x':
        return None
      elif not category and default_category:
        return default_category
      else:
        try:
          category = int(category)
          if category in self.categories:
            return category
        except ValueError:
          continue



  # Python has had an open request for multipart/form-data since 2008-06-30
  # http://bugs.python.org/issue3244

  # At the time of writing, the latest submitted code does not work and hacking
  # together something that does is just not worth it right now.
  def upload_pkg(self, fpath, category=None, auto_category=False, confirm=True):
    """
    Upload a package to the AUR.
    """
    fname = os.path.basename(fpath)
    pkginfo = None

    try:
      pkg, ext = fname.split('.src.', 1)
      name, ver, rel = pkg.rsplit('-', 2)
    except ValueError:
      raise AurploaderError('unexpected filename format: {}\nexpected <pkgname>-<pkgver>-<pkgrel>.src.<ext>'.format(fname))

    if category not in self.categories:
      category = None
    if category is None:
      pkginfo = self.get_info(name)
      if pkginfo:
        category = int(pkginfo['CategoryID'])

    if category is None or not auto_category:
      category = self.prompt_categories(name, default_category=category)

    # This is not an error. A user may abort the upload by entering "x" at the
    # category prompt.
    if category is None:
      return

    cmd = [
      '/usr/bin/curl',
      '-#',
      '-H', 'Expect:',
      '-b', self.cookiejar_path,
      '-c', self.cookiejar_path,
      '-F', 'category={}'.format(category),
      '-F', 'pfile=@{}'.format(fpath),
      '-F', 'pkgsubmit=1',
      '-F', 'token={}'.format(self.token)
    ]

    cmd.append(PKGSUBMIT_URL)

    self.save_cookies()

    with open(os.devnull, 'w') as null:
      p = Popen(cmd, stdout=null)
      e = p.wait()
      if e != 0:
        raise AurploaderError("curl exited with non-zero status ({:d})".format(e))

    self.load_cookies()

    if confirm:
      expected = '{}-{}'.format(ver, rel)
      ttl = self.rpc.ttl
      self.rpc.ttl = 0
      try:
        pkginfo = self.get_info(name)
      finally:
        self.rpc.ttl = ttl
      if not pkginfo or pkginfo['Version'] != expected:
        raise AurploaderError('failed to confirm upload')

    return pkginfo



  def save_cookies(self, path=None):
    """
    Save cookie jar.
    """
    if path is None:
      path = self.cookiejar_path
    if path is None:
      raise AurploaderError('no cookiejar path given')
    # For Curl compatibility (not sure which one fails to comply with the standard.
    for cookie in self.cookiejar:
      if not cookie.expires:
        cookie.expires = 0
    self.cookiejar.save(path, ignore_discard=True, ignore_expires=True)


  def load_cookies(self, path=None):
    """
    Load cookie jar.
    """
    if path is None:
      path = self.cookiejar_path
    if path is None:
      raise AurploaderError('no cookiejar path given')
    try:
      # For Curl compatibility (not sure which one fails to comply with the standard.
      self.cookiejar.load(path, ignore_discard=True, ignore_expires=True)
      for cookie in self.cookiejar:
        if not cookie.expires:
          cookie.expires = None
    except LoadError:
      pass
    except IOError as e:
      if e.errno != errno.ENOENT:
        raise e



  def initialize(self, user=None, passwd=None, login_file=None, cookiejar_path=None):
    """
    Login if necessary and load categories and token.
    """
    self.load_cookies(cookiejar_path)
    self.parse_pkgsubmit()
    if not self.categories or not self.token:
      self.login(user=user, passwd=passwd, login_file=login_file)
      self.parse_pkgsubmit()
      if not self.categories or not self.token:
        raise AurploaderError('login appears to have failed\n')
      elif cookiejar_path:
        self.save_cookies(cookiejar_path)
Exemple #47
0
    def put(self, job_dict, args):

        if not isinstance(job_dict, dict):
            raise Exception("Jobs must be submitted as dictionaries")

        # Make this a DotDict to make accessing keys cleaner
        job = DotDict(job_dict)

        # URL is the only thing required in each datum
        if not "url" in job:
            raise Exception("No url specified")

        # Add an http prefix onto our URL, if its not
        # explicitly defined as HTTP/HTTPS
        if job.url[:4] != "http":
            job.url = "http://" + job.url

        # Other options can be inherited from those specified
        # on the command line.  Do some sanity checking here, too

        # Set our method (GET, POST, etc)
        if not "method" in job:
            job.method = args.method

        # Read in our job delay... 
        try:
            job.delay = (job.delay/1000.0
                if 'delay' in job else args.delay/1000.0)
        except ValueError:
            raise Exception("Delay must be an integer")

        # ... and set our query parameters
        job.params = {}
        job.orig_url = job.url
        if "?" in job.url:
            job.url, query_string = job.url.split("?", 1)
            job.params = parse_qs(query_string)

        # ... and our authentication (if any)
        if "auth" in job:
            job.auth = job.auth.split(":",1)
        elif args.auth:
            job.auth = args.auth.split(":",1)
        else:
            job.auth = None
        job.auth = None

        if "authtype" in job:
            job.authtype = job.authtype
        else:
            job.authtype = args.authtype

        if job.auth and len(job.auth) == 1:
            raise Exception("Credentials must be in username:password format")
        if job.authtype not in ("basic","digest"):
            raise Exception("Auth type must be one of: basic, digest")

        # ... and our job counter
        try:
            job.count = int(job.count) if 'count' in job else args.num
        except ValueError:
            raise Exception("Count must be an integer")

        # ... and cookies!
        try:
            cj = MozillaCookieJar()
            if "cookiejar" in job:
                cj.load(job.cookiejar)
                job.cookiejar = cj
            elif args.cookiejar:
                cj.load(args.cookiejar)
                job.cookiejar = cj
            else:
                job.cookiejar = None
        except Exception as e:
            raise Exception("Unable to load cookie jar: {}".format(e))

        # ... our insecure option
        if not "insecure" in job:
            job.insecure = args.insecure
        else:
            if not isinstance(job.insecure, bool):
                raise Exception("Insecure flag must be a boolean")

        # Fix up method case; RFCs 7230/1 state method is case sensitive,
        # but all current recognized methods are upper case, soooo...
        job.method = job.method.upper()

        # Now turn our list of header key:value pairs into
        # the dict that the requests module requires
        header_list = []

        # Coalesce headers from the command line and the job/url file, if any
        if "headers" in job:
            if not isinstance(job.headers, list):
                raise Exception("Headers must be in list form")
            header_list = job.headers + args.header
        else:
            header_list = args.header

        # Convert our list of colon-delimited k:v pairs to a dict
        header_dict = {}
        for kv in header_list:
            try:
                key, val = [s.strip() for s in kv.split(':')]
                header_dict[key.lower()] = val 
            except:
                raise Exception(
                    "'{}' header must be in 'key:value' format".format(kv)
                )

        # Set our user agent here, since it is a header too
        if not "user-agent" in header_dict:
            if "agent" in job:
                header_dict["user-agent"] = job.agent
            else:
                header_dict["user-agent"] = args.agent

        # Override the connection header if user has requests keep-alives
        # be disabled
        if args.nokeepalive:
            header_dict["connection"] = "close"

        # Overwrite the header list with the header dict for requests
        job.headers = header_dict

        # Set up POST file reads
        upload_files = (job.upload + args.upload 
            if "upload" in job else args.upload)
        job.upload = []
        for file_data in upload_files:
            i = file_data.split(":", 2)
            if len(i) < 2:
                raise Exception("Upload files must be in "
                    "form_var:file_path[:content_type] format")
            file_var, file_path = i
            
            # Make sure our file exists
            try:
                open(file_path, "rb")
            except:
                raise Exception(
                    "{} is not a readable file!".format(file_path)
                )

            # Now guess the mime type if we weren't provided one explicitly
            if len(i) == 3:
                mime_type = i[2]
            else:
                mime_type = (mimetypes.guess_type(file_path)[0] 
                    or 'application/octet-stream')

            # Now stick the file data in our upload list
            job.upload.append((file_var, file_path, mime_type))

        # Override the method if we have multipart files to POST
        if job.upload:
            job.method = "POST"

        # Now insert the job into our work queue
        with self.lock:
            self.jobs.append(job)
            self.length += 1
class ScholarQuerier(object):

    """
    ScholarQuerier instances can conduct a search on Google Scholar
    with subsequent parsing of the resulting HTML content.  The
    articles found are collected in the articles member, a list of
    ScholarArticle instances.
    """

    # Default URLs for visiting and submitting Settings pane, as of 3/14
    GET_SETTINGS_URL = ScholarConf.SCHOLAR_SITE + "/scholar_settings?" + "sciifh=1&hl=en&as_sdt=0,5"

    SET_SETTINGS_URL = (
        ScholarConf.SCHOLAR_SITE
        + "/scholar_setprefs?"
        + "q="
        + "&scisig=%(scisig)s"
        + "&inststart=0"
        + "&as_sdt=1,5"
        + "&as_sdtp="
        + "&num=%(num)s"
        + "&scis=%(scis)s"
        + "%(scisf)s"
        + "&hl=en&lang=all&instq=&inst=569367360547434339&save="
    )

    # Older URLs:
    # ScholarConf.SCHOLAR_SITE + '/scholar?q=%s&hl=en&btnG=Search&as_sdt=2001&as_sdtp=on

    class Parser(ScholarArticleParser120726):
        def __init__(self, querier):
            ScholarArticleParser120726.__init__(self)
            self.querier = querier

        def handle_num_results(self, num_results):
            if self.querier is not None and self.querier.query is not None:
                self.querier.query["num_results"] = num_results

        def handle_article(self, art):
            self.querier.add_article(art)

    def __init__(self):
        self.articles = []
        self.query = None
        self.cjar = MozillaCookieJar()

        # If we have a cookie file, load it:
        if ScholarConf.COOKIE_JAR_FILE and os.path.exists(ScholarConf.COOKIE_JAR_FILE):
            try:
                self.cjar.load(ScholarConf.COOKIE_JAR_FILE, ignore_discard=True)
                ScholarUtils.log("info", "loaded cookies file")
            except Exception as msg:
                ScholarUtils.log("warn", "could not load cookies file: %s" % msg)
                self.cjar = MozillaCookieJar()  # Just to be safe

        self.opener = build_opener(HTTPCookieProcessor(self.cjar))
        self.settings = None  # Last settings object, if any

    def apply_settings(self, settings):
        """
        Applies settings as provided by a ScholarSettings instance.
        """
        if settings is None or not settings.is_configured():
            return True

        self.settings = settings

        # This is a bit of work. We need to actually retrieve the
        # contents of the Settings pane HTML in order to extract
        # hidden fields before we can compose the query for updating
        # the settings.
        html = self._get_http_response(
            url=self.GET_SETTINGS_URL, log_msg="dump of settings form HTML", err_msg="requesting settings failed"
        )
        if html is None:
            return False

        # Now parse the required stuff out of the form. We require the
        # "scisig" token to make the upload of our settings acceptable
        # to Google.
        soup = BeautifulSoup(html)

        tag = soup.find(name="form", attrs={"id": "gs_settings_form"})
        if tag is None:
            ScholarUtils.log("info", "parsing settings failed: no form")
            return False

        tag = tag.find("input", attrs={"type": "hidden", "name": "scisig"})
        if tag is None:
            ScholarUtils.log("info", "parsing settings failed: scisig")
            return False

        urlargs = {"scisig": tag["value"], "num": settings.per_page_results, "scis": "no", "scisf": ""}

        if settings.citform != 0:
            urlargs["scis"] = "yes"
            urlargs["scisf"] = "&scisf=%d" % settings.citform

        html = self._get_http_response(
            url=self.SET_SETTINGS_URL % urlargs,
            log_msg="dump of settings result HTML",
            err_msg="applying setttings failed",
        )
        if html is None:
            return False

        ScholarUtils.log("info", "settings applied")
        return True

    def send_query(self, query):
        """
        This method initiates a search query (a ScholarQuery instance)
        with subsequent parsing of the response.
        """
        self.clear_articles()
        self.query = query

        html = self._get_http_response(
            url=query.get_url(), log_msg="dump of query response HTML", err_msg="results retrieval failed"
        )
        if html is None:
            return

        self.parse(html)

    def get_citation_data(self, article):
        """
        Given an article, retrieves citation link. Note, this requires that
        you adjusted the settings to tell Google Scholar to actually
        provide this information, *prior* to retrieving the article.
        """
        if article["url_citation"] is None:
            return False
        if article.citation_data is not None:
            return True

        ScholarUtils.log("info", "retrieving citation export data")
        data = self._get_http_response(
            url=article["url_citation"], log_msg="citation data response", err_msg="requesting citation data failed"
        )
        if data is None:
            return False

        article.set_citation_data(data)
        return True

    def parse(self, html):
        """
        This method allows parsing of provided HTML content.
        """
        parser = self.Parser(self)
        parser.parse(html)

    def add_article(self, art):
        self.get_citation_data(art)
        self.articles.append(art)

    def clear_articles(self):
        """Clears any existing articles stored from previous queries."""
        self.articles = []

    def save_cookies(self):
        """
        This stores the latest cookies we're using to disk, for reuse in a
        later session.
        """
        if ScholarConf.COOKIE_JAR_FILE is None:
            return False
        try:
            self.cjar.save(ScholarConf.COOKIE_JAR_FILE, ignore_discard=True)
            ScholarUtils.log("info", "saved cookies file")
            return True
        except Exception as msg:
            ScholarUtils.log("warn", "could not save cookies file: %s" % msg)
            return False

    def _get_http_response(self, url, log_msg=None, err_msg=None):
        """
        Helper method, sends HTTP request and returns response payload.
        """
        if log_msg is None:
            log_msg = "HTTP response data follow"
        if err_msg is None:
            err_msg = "request failed"
        try:
            ScholarUtils.log("info", "requesting %s" % unquote(url))

            req = Request(url=url, headers={"User-Agent": ScholarConf.USER_AGENT})
            hdl = self.opener.open(req)
            html = hdl.read()

            ScholarUtils.log("debug", log_msg)
            ScholarUtils.log("debug", ">>>>" + "-" * 68)
            ScholarUtils.log("debug", "url: %s" % hdl.geturl())
            ScholarUtils.log("debug", "result: %s" % hdl.getcode())
            ScholarUtils.log("debug", "headers:\n" + str(hdl.info()))
            ScholarUtils.log("debug", "data:\n" + html.decode("utf-8"))  # For Python 3
            ScholarUtils.log("debug", "<<<<" + "-" * 68)

            return html
        except Exception as err:
            ScholarUtils.log("info", err_msg + ": %s" % err)
            return None
Exemple #49
0
urls = [
    "http://crypto.stackexchange.com/users/593/b-con",
    "http://security.stackexchange.com/users/8857/b-con",
    "http://stackoverflow.com/users/1361836/b-con"
]

logging.basicConfig(filename="/tmp/site-ping.log", 
                   datefmt="%m-%d %H:%M",
                   level=logging.DEBUG)

# Extract the cookies from Firefox. The script to do so is co-located.
path = os.path.dirname(os.path.realpath(__file__))
p = subprocess.call(path + "/extract-cookies.sh")

# Load the cookies.
cj = MozillaCookieJar("/tmp/firefox-cookies.txt")
try:
    cj.load()
except FileNotFoundErr as ex:
    logging.error(ex)
    quit(1)

# Use the cookies to visit each of the URLs.
for url in urls:
    opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
    response = opener.open(url)
    
    html = response.read().decode("utf-8")
    response.close()
    
    # The "votes" tab only appears on the user profile when you're logged in.
Exemple #50
0
class PowerSchool:
    """This class manages cookies for accessing PowerSchool, as well as
    providing facilities for retrieving pages."""
    def __init__(self,host=DEFAULT_HOST,cookiejar=None,debug=False):
        """Params:
        host: the protocol, hostname, and port (without a trailing slash)
            that is the root of the PowerSchool url.
        cookiejar: An http.cookiejar.CookieJar or subclass. If a FileCookieJar,
            cookies will be saved after every request.
        debug: sets verbose mode"""
        self.DEBUG = debug
        self.host = host
        self.setCookieJar(cookiejar)

    def setCookieJar(self,cookiejar):
        """Changes the CookieJar used to manage the session.
        Existing cookies will not be transferred.
        
        Returns: the old CookieJar"""
        tmpcookies = getattr(self,"cookies",None)
        if type(cookiejar) == str:
            self.cookies = MozillaCookieJar(cookiejar)
            if os.path.exists(cookiejar):
                self.cookies.load(ignore_discard=True)
        else:
            self.cookies = cookiejar
        self.opener = build_opener(HTTPCookieProcessor(self.cookies))
        return tmpcookies

    def _get_page(self,url,data=None):
        start = time.time()
        page = (self.opener.open(url,urlencode(data).encode()) if data else
                self.opener.open(url))
        if self.DEBUG:
            print("Request time: {}".format(time.time()-start))
        if hasattr(self.cookies,"save"):
            self.cookies.save(ignore_discard=True)
        return page

    def _read_page(self,url,data=None):
        self.__last_page = self._get_page(url,data).read().decode()
        if self.DEBUG:
            fd = open("/tmp/pschool-debug-temp.html","w")
            fd.write(self.__last_page)
            fd.close()
        return self.__last_page
        
    def _get_url(self,url):
        return self.host + (url if url.startswith("/") else "/"+url)

    def _check_for_logout(self):
        if self.__last_page.find("Student and Parent Sign In") > -1:
            raise LoggedOut()

    def login(self,username,password):
        """Login to a PowerSchool session using the supplied credentials."""
        data = self._read_page(self._get_url("/public/"))
        form = dict(re.findall(r'<input .+?name="(.+?)".+?value="(.*?)".+?>',
                               data, re.MULTILINE|re.IGNORECASE))
        form["account"] = username
        form["ldappassword"] = password
        pskey = form["contextData"].encode()
        password = password.encode()
    
        b64pw = b64encode(md5(password).digest()).decode().rstrip("=")
        form["pw"] = hmac.new(pskey,b64pw.encode()).hexdigest()
        form["dbpw"] = hmac.new(pskey,password.lower()).hexdigest()

        self._read_page(self._get_url("/guardian/home.html"),form)
        try:
            self._check_for_logout()
        except LoggedOut:
            raise InvalidCredentials

    def get(self,page="Main",args=(),**kwargs):
        """Retrieves data for and constructs the supplied Page class."""
        if type(page) == str:
            page = getattr(pages,page,None)
            if not page:
                raise TypeError("Invalid page")
        data = self._read_page(self._get_url(page.get_url(*args,**kwargs)))
        self._check_for_logout()
        return page(data,self,(args,kwargs))
Exemple #51
0
class BaseClient(object):
    """
    Базовый класс для работы с удалённым API
    """

    username = None
    password = None
    url      = 'http://localhost:8000/api/'
    headers  = {
        "Content-type": "application/json",
        "Accept": "application/json",
        "Accept-Encoding": "gzip, deflate",
    }
    timeout  = 10000
    cookiejar = None
    print_info = False
    code_page = 'utf-8'
    use_basic_auth = False


    def __init__(self, cookie_filename=None, **kwargs):

        for key, val in kwargs.items():
            setattr(self, key, val)

        if cookie_filename:
            self.set_cookiejar(cookie_filename)

    def set_cookiejar(self, name):
        self.cookiejar = MozillaCookieJar(name)
        try:
            self.cookiejar.load()
        except IOError:
            self.cookiejar.save()

    def get_request(self, data):
        """
        Возвращает новый объект запроса.
        """

        params = urlencode({'jsonData': data})
        params = params.encode('ascii')
        
        headers = {}
        headers.update(self.headers)
        if self.use_basic_auth and self.username and self.password:
            s = '%s:%s' % (self.username, self.password)
            if six.PY3:
                b = bytes(s, 'utf-8')
            else:
                b = bytes(s.encode('utf-8'))

            headers['Authorization'] = b'Basic ' + base64.b64encode(b)
        
        request = Request(url=self.url, data=params, headers=headers)

        return request

    def get_opener(self):
        """
        Возвращает новый обработчик запроса с необходимыми процессорами.
        """
        args = ()

        if not self.cookiejar is None:
            cookiehand = HTTPCookieProcessor(self.cookiejar)
            args += (cookiehand,)

        return build_opener(*args)


    def get_response(self, request):
        """
        Возвращает новый обработчик запроса и устанавливает куки.
        """

        opener = self.get_opener()

        try:
            response = opener.open(request, timeout=self.timeout)
        except IOError as e:
            raise e

        if not self.cookiejar is None:
            self.cookiejar.save()

        return response

    def get_result(self, data):
        """
        Запрашивает данные из API
        """

        if self.print_info:
            print('Kwargs: %s' % data.get('kwargs', {}))

        jsondata = json.dumps(data)

        request = self.get_request(jsondata)

        response = self.get_response(request)
        info = response.info()
        encoding = info.get('Content-encoding', None)

        if self.print_info:
            print('Status: %s' % response.code)
            print(info)

        data = response.read()

        # Описание использования windowBits компрессора zlib
        # находится по ссылке http://www.zlib.net/manual.html#Advanced
        # Согласно нему:
        # RFC 1950 ZLIB (тоже DEFLATE) wbits от 8 до 15
        # RFC 1951 RAW DEFLATE wbits от -8 до -15
        # RFC 1952 GZIP wbits в диапазоне от 8 до 15 с инкрементом 16
        if encoding == 'deflate':
            try:
                return zlib.decompress(data)
            except zlib.error:
                return zlib.decompress(data, -zlib.MAX_WBITS)
        elif encoding == 'gzip':
            return zlib.decompress(data, zlib.MAX_WBITS | 16)
        else:
            return data

    def json_loads(self, data):
        """
        Переобразовывает JSON в объекты Python, учитывая кодирование
        """
        data = data.decode(self.code_page)
        data = json.loads(data)

        return data

    def prepare_data(self, data):
        """
        Предназначен для переопределения в наследуемых классах.
        Здесь просто добавляются учётные данные.
        """
        if self.username and not self.use_basic_auth:
            data['username'] = self.username 
            data['password'] = self.password 

        return data

    def clean(self, data):
        """
        Преобразует полученные данные
        """

        data = self.json_loads(data)

        if data is None:
            return data

        status = data.get('status', None)

        if status != 200:
            msg = data.get('message', None)
            if msg:
                if six.PY3:
                    error = '%s - %s' % (status, msg)
                else:
                    error = b'%s - %s' % (status, msg.encode(self.code_page))
            else:
                error = data

            raise RemoteAPIError(error)

        return data['data']

    def method(self, method, **kwargs):
        """
        Вызывает метод API и возвращает чистые данные
        """
        data = {'method': method, 'kwargs': kwargs}
        data = self.prepare_data(data)
        data = self.get_result(data)
        data = self.clean(data)
        return data
Exemple #52
0
 def set_cookiejar(self, name):
     self.cookiejar = MozillaCookieJar(name)
     try:
         self.cookiejar.load()
     except IOError:
         self.cookiejar.save()
Exemple #53
0
class ScholarQuerier(object):

    """
    ScholarQuerier instances can conduct a search on Google Scholar
    with subsequent parsing of the resulting HTML content.  The
    articles found are collected in the articles member, a list of
    ScholarArticle instances.
    """
    SCHOLAR_QUERY_URL = ScholarConf.SCHOLAR_SITE + '/scholar?' \
        + 'as_q=%(words)s' \
        + '&as_epq=%(phrase)s' \
        + '&as_oq=%(words_some)s' \
        + '&as_eq=%(words_none)s' \
        + '&as_occt=%(scope)s' \
        + '&as_sauthors=%(authors)s' \
        + '&as_publication=%(pub)s' \
        + '&as_ylo=%(ylo)s' \
        + '&as_yhi=%(yhi)s' \
        + '&btnG=&hl=en&as_sdt=0,5&num=%(num)s'

    # Default URLs for visiting and submitting Settings pane, as of 3/14
    GET_SETTINGS_URL = ScholarConf.SCHOLAR_SITE + '/scholar_settings?' \
        + 'sciifh=1&hl=en&as_sdt=0,5'

    SET_SETTINGS_URL = ScholarConf.SCHOLAR_SITE + '/scholar_setprefs?' \
        + 'q=' \
        + '&scisig=%(scisig)s' \
        + '&inststart=0' \
        + '&as_sdt=1,5' \
        + '&as_sdtp=' \
        + '&num=%(num)s' \
        + '&scis=%(scis)s' \
        + '%(scisf)s' \
        + '&hl=en&lang=all&instq=&inst=569367360547434339&save='

    # Older URLs:
    # ScholarConf.SCHOLAR_SITE + '/scholar?q=%s&hl=en&btnG=Search&as_sdt=2001&as_sdtp=on

    class Parser(ScholarArticleParser120726):
        def __init__(self, querier):
            ScholarArticleParser120726.__init__(self)
            self.querier = querier

        def handle_article(self, art):
            self.querier.add_article(art)

    def __init__(self):
        self.articles = []
        self.query = None
        self.cjar = MozillaCookieJar()

        # If we have a cookie file, load it:
        if ScholarConf.COOKIE_JAR_FILE and \
           os.path.exists(ScholarConf.COOKIE_JAR_FILE):
            try:
                self.cjar.load(ScholarConf.COOKIE_JAR_FILE,
                               ignore_discard=True)
                ScholarUtils.log('debug', 'loaded cookies file')
            except Exception as msg:
                ScholarUtils.log('warn', 'could not load cookies file: %s' % msg)
                self.cjar = MozillaCookieJar() # Just to be safe

        self.opener = build_opener(HTTPCookieProcessor(self.cjar))
        self.settings = None # Last settings object, if any

    def apply_settings(self, settings):
        """
        Applies settings as provided by a ScholarSettings instance.
        """
        if settings is None or not settings.is_configured():
            return True

        self.settings = settings

        # This is a bit of work. We need to actually retrieve the
        # contents of the Settings pane HTML in order to extract
        # hidden fields before we can compose the query for updating
        # the settings.
        try:
            req = Request(url=self.GET_SETTINGS_URL,
                          headers={'User-Agent': ScholarConf.USER_AGENT})
            hdl = self.opener.open(req)
            html = hdl.read()
        except Exception as err:
            ScholarUtils.log('debug', 'requesting settings failed: %s' % err)
            return False

        # Now parse the required stuff out of the form. We require the
        # "scisig" token to make the upload of our settings acceptable
        # to Google.
        soup = BeautifulSoup(html)

        tag = soup.find(name='form', attrs={'id': 'gs_settings_form'})
        if tag is None:
            ScholarUtils.log('debug', 'parsing settings failed: no form')
            return False

        tag = tag.find('input', attrs={'type':'hidden', 'name':'scisig'})
        if tag is None:
            ScholarUtils.log('debug', 'parsing settings failed: scisig')
            return False

        urlargs = {'scisig': tag['value'],
                   'num': settings.per_page_results,
                   'scis': 'no',
                   'scisf': '' }

        if settings.citform != 0:
            urlargs['scis'] = 'yes'
            urlargs['scisf'] = '&scisf=%d' % settings.citform

        try:
            req = Request(url=self.SET_SETTINGS_URL % urlargs,
                          headers={'User-Agent': ScholarConf.USER_AGENT})
            hdl = self.opener.open(req)
        except Exception as err:
            ScholarUtils.log('debug', 'applying settings failed: %s' % err)
            return False

        ScholarUtils.log('debug', 'settings applied')
        return True

    def send_query(self, query, scholar_url=None):
        """
        This method initiates a search query (a ScholarQuery instance)
        with subsequent parsing of the response.
        """
        self.clear_articles()
        self.query = query

        url = scholar_url if scholar_url else self.SCHOLAR_QUERY_URL

        urlargs = {'words': query.words or '',
                   'words_some': query.words_some or '',
                   'words_none': query.words_none or '',
                   'phrase': query.phrase or '',
                   'scope': 'title' if query.scope_title else 'any',
                   'authors': query.author or '',
                   'pub': query.pub or '',
                   'ylo': query.timeframe[0] or '',
                   'yhi': query.timeframe[1] or '',
                   'num': query.num_results or ScholarConf.MAX_PAGE_RESULTS } 

        # Make sure we urlencode all this stuff correctly:
        for key, val in urlargs.items():
            urlargs[key] = quote(str(val))

        url = url % urlargs

        ScholarUtils.log('debug', 'query url: %s' % url)
        req = Request(url=url, headers={'User-Agent': ScholarConf.USER_AGENT})
        hdl = self.opener.open(req)
        html = hdl.read()

        self.parse(html)

    def get_citation_data(self, article):
        """
        Given an article, retrieves citation link. Note, this requires that
        you adjusted the settings to tell Google Scholar to actually
        provide this information, *prior* to retrieving the article.
        """
        if article['citlink'] is None:
            return False
        if article.citation_data is not None:
            return True
        try:
            ScholarUtils.log('debug', 'retrieving citation export from %s' \
                             % article['citlink'])
            req = Request(url=article['citlink'],
                          headers={'User-Agent': ScholarConf.USER_AGENT})
            hdl = self.opener.open(req)
            data = hdl.read()
            article.set_citation_data(data)
        except Exception as err:
            ScholarUtils.log('debug', 'requesting citation failed: %s' % err)
            return False

        return True

    def parse(self, html):
        """
        This method allows parsing of provided HTML content.
        """
        parser = self.Parser(self)
        parser.parse(html)

    def add_article(self, art):
        self.get_citation_data(art)
        self.articles.append(art)

    def clear_articles(self):
        """Clears any existing articles stored from previous queries."""
        self.articles = []

    def save_cookies(self):
        """
        This stores the latest cookies we're using to disk, for reuse in a
        later session.
        """
        if ScholarConf.COOKIE_JAR_FILE is None:
            return False
        try:
            self.cjar.save(ScholarConf.COOKIE_JAR_FILE,
                           ignore_discard=True)
            ScholarUtils.log('debug', 'saved cookies file')
            return True
        except Exception as msg:
            ScholarUtils.log('warn', 'could not save cookies file: %s' % msg)
            return False