Python MozillaCookieJar.save Examples

Programming Language: Python

Namespace/Package Name: cookielib

Class/Type: MozillaCookieJar

Method/Function: save

Examples at hotexamples.com: 32

Python MozillaCookieJar.save - 32 examples found. These are the top rated real world Python examples of cookielib.MozillaCookieJar.save extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

MozillaCookieJar(30)

load(27)

save(19)

clear(3)

clear_expired_cookies(2)

_really_load(1)

add_cookie_header(1)

extract_cookies(1)

set_cookie(1)

Example #1

Show file

File: Fetion.py Project: GitHublong/PyWapFetion

    def __init__(self, mobile, password=None, status='0',
        cachefile='Fetion.cache', cookiesfile=''):
        '''登录状态：
        在线：400 隐身：0 忙碌：600 离开：100
        '''
        if cachefile:
            self.cache = Cache(cachefile)

        if not cookiesfile:
            cookiesfile = '%s.cookies' % mobile

        cookiejar = MozillaCookieJar(filename=cookiesfile)
        if not os.path.isfile(cookiesfile):
            open(cookiesfile, 'w').write(MozillaCookieJar.header)

        cookiejar.load(filename=cookiesfile)

        cookie_processor = HTTPCookieProcessor(cookiejar)

        self.opener = build_opener(cookie_processor,
            HTTPHandler)
        self.mobile, self.password = mobile, password
        if not self.alive():
            self._login()
            cookiejar.save()

        self.changestatus(status)

Example #2

Show file

class RDWorker:
    """
    Worker class to perform Real-Debrid related actions:
    - format login info so they can be used by Real-Debrid
    - login
    - unrestricting links
    - keeping cookies
    """

    _endpoint = 'http://www.real-debrid.com/ajax/%s'

    def __init__(self, cookie_file):
        self._cookie_file = cookie_file
        self.cookies = MozillaCookieJar(self._cookie_file)

    def login(self, username, password_hash):
        """
        Log into Real-Debrid. password_hash must be a MD5-hash of the password string.
        :param username:
        :param password_hash:
        :return: :raise:
        """
        if path.isfile(self._cookie_file):
            self.cookies.load(self._cookie_file)

            for cookie in self.cookies:
                if cookie.name == 'auth' and not cookie.is_expired():
                    return  # no need for a new cookie

        # request a new cookie if no valid cookie is found or if it's expired
        opener = build_opener(HTTPCookieProcessor(self.cookies))
        try:
            response = opener.open(self._endpoint % 'login.php?%s' % urlencode({'user': username, 'pass': password_hash}))
            resp = load(response)
            opener.close()

            if resp['error'] == 0:
                self.cookies.save(self._cookie_file)
            else:
                raise LoginError(resp['message'].encode('utf-8'), resp['error'])
        except Exception as e:
            raise Exception('Login failed: %s' % str(e))

    def unrestrict(self, link, password=''):
        """
        Unrestrict a download URL. Returns tuple of the unrestricted URL and the filename.
        :param link: url to unrestrict
        :param password: password to use for the unrestriction
        :return: :raise:
        """
        opener = build_opener(HTTPCookieProcessor(self.cookies))
        response = opener.open(self._endpoint % 'unrestrict.php?%s' % urlencode({'link': link, 'password': password}))
        resp = load(response)
        opener.close()

        if resp['error'] == 0:
            info = resp['generated_links'][0]
            return info[2], info[0].replace('/', '_')
        else:
            raise UnrestrictionError(resp['message'].encode('utf-8'), resp['error'])

Example #3

Show file

File: Fetion.py Project: falconchen/PyWapFetion

    def __init__(self, mobile, password=None, status='0',
        cachefile='Fetion.cache', cookiesfile=''):
        '''登录状态：
        在线：400 隐身：0 忙碌：600 离开：100
        '''
        if cachefile:
            self.cache = Cache(cachefile)

        if not cookiesfile:
            cookiesfile = '%s.cookies' % mobile
            
        # try:
            # with open(cookiesfile, 'rb') as f:
                # cookie_processor = load(f)
        # except:
            # cookie_processor = HTTPCookieProcessor(CookieJar())            
        cookiejar = MozillaCookieJar(filename=cookiesfile)
        try:
          f=open(cookiesfile)
        except IOError:
          f=open(cookiesfile,'w')  
          f.write(MozillaCookieJar.header)
        finally:
          f.close()                  
        cookiejar.load(filename=cookiesfile)  
        cookie_processor = HTTPCookieProcessor(cookiejar)        
        self.opener = build_opener(cookie_processor,
            HTTPHandler)
        self.mobile, self.password = mobile, password
        if not self.alive():
            if self._login(): cookiejar.save()

        #dump(cookie_processor, open(cookiesfile, 'wb'))        
        self.changestatus(status)

Example #4

Show file

File: Fetion.py Project: qqshow/PyWapFetion

    def __init__(self,
                 mobile,
                 password=None,
                 status='0',
                 cachefile='Fetion.cache',
                 cookiesfile=''):
        '''登录状态：
        在线：400 隐身：0 忙碌：600 离开：100
        '''
        if cachefile:
            self.cache = Cache(cachefile)

        if not cookiesfile:
            cookiesfile = '%s.cookies' % mobile

        cookiejar = MozillaCookieJar(filename=cookiesfile)
        if not os.path.isfile(cookiesfile):
            open(cookiesfile, 'w').write(MozillaCookieJar.header)

        cookiejar.load(filename=cookiesfile)

        cookie_processor = HTTPCookieProcessor(cookiejar)

        self.opener = build_opener(cookie_processor, HTTPHandler)
        self.mobile, self.password = mobile, password
        if not self.alive():
            self._login()
            cookiejar.save()

        self.changestatus(status)

Example #5

Show file

File: basebrowser.py Project: itolosa/transantiago_api

class WebBrowser(object):
    '''mantiene en memoria las cookies, emulando un navegador
       *actualmente no ejecuta javascript'''
    def __init__(self, uAgent=None, headers=None):
        '''uAgent es el agente de usuario'''
        self.cookie_j = MozillaCookieJar()
        if uAgent is None:
            uAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36'
        self.opener = build_opener(HTTPCookieProcessor(self.cookie_j))
        self.user_agent = uAgent
        self.opener.addheaders = [('User-Agent', self.user_agent)]
        # self.session = requests.Session()
        # self.session.headers.update({ 'User-Agent': uAgent })
        # self.session.max_redirects = 20
        self.timeout = 25
        socket.setdefaulttimeout(self.timeout)

    def newtree(f):
        return lambda *a, **k: etree.parse(f(*a, **k),
                                           parser=etree.HTMLParser())

    @newtree
    def fetch(self, url, data=None, headers=None, method='POST'):
        '''obtiene los datos de una pagina web, ingresada en url
           para enviar datos por post, pasar codificados por data'''
        if headers:
            self.opener.addheaders = headers

        if not (data == None or type(data) == str):
            data = urllib.urlencode(data)

        if method == 'POST':
            # self.last_seen = self.session.post(url, data=data)
            self.last_seen = self.opener.open(url, data)
        elif method == 'GET':
            #self.last_seen = self.session.get(url + '?' + data)
            if data is None:
                self.last_seen = self.opener.open(url)
            else:
                self.last_seen = self.opener.open(url + '?' + data)
        else:
            raise Exception
        return self.last_seen

    def geturl(self):
        return self.last_seen.geturl()

    def save_cookies(self, path):
        '''guarda los cookies en memoria al disco'''
        '''path es el directorio'''
        self.cookie_j.save(path, ignore_discard=True, ignore_expires=True)

    def load_cookies(self, path):
        '''carga cookies del disco a la memoria'''
        '''path es el directorio'''
        self.cookie_j.load(path, ignore_discard=True, ignore_expires=True)

    def print_cookies(self):
        for cookie in self.cookie_j:
            print cookie.name, cookie.value

Example #6

Show file

File: utility.py Project: tjxs/acgindex

	def GetWithCookie( url, cookie_name, data = '', retry = 3):
		global PATH_TMP, ACGINDEX_UA

		try:
			cj = MozillaCookieJar( PATH_TMP + cookie_name )

			try :
				cj.load( PATH_TMP + cookie_name )
			except:
				pass # 还没有cookie只好拉倒咯

			ckproc = urllib2.HTTPCookieProcessor( cj )

			AmagamiSS = urllib2.build_opener( ckproc )
			AmagamiSS.addheaders = [ ACGINDEX_UA ]

			if data != '':
				request = urllib2.Request( url = url, data = data )
				res = AmagamiSS.open( request )
				cj.save() # 只有在post时才保存新获得的cookie
			else:
				res = AmagamiSS.open( url )

			return Haruka.GetContent( res )

		except:
			# 这里有3次重新连接的机会，3次都超时就跳过
			if retry > 0 : 
				return Haruka.GetWithCookie( url, cookie_name, data , retry-1 )
			else:
				return False

Example #7

Show file

File: default.py Project: marki555/plugin.video.markiza.sk

def LIVE(url, relogin=False):
    if not (settings['username'] and settings['password']):
        xbmcgui.Dialog().ok('Chyba', 'Nastavte prosím moja.markiza.sk konto',
                            '', '')
        xbmcplugin.setResolvedUrl(int(sys.argv[1]), False, xbmcgui.ListItem())
        raise RuntimeError
    cj = MozillaCookieJar()
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    if not relogin:
        try:
            cj.load(cookiepath)
        except IOError:
            relogin = True
    if relogin:
        response = opener.open(loginurl).read()
        token = re.search(r'name=\"_token_\" value=\"(\S+?)\">',
                          response).group(1)
        logindata = urllib.urlencode({
            'email': settings['username'],
            'password': settings['password'],
            '_token_': token,
            '_do': 'content1-loginForm-form-submit'
        }) + '&login=Prihl%C3%A1si%C5%A5+sa'
        opener.open(loginurl, logindata)
        log('Saving cookies')
        cj.save(cookiepath)

    response = opener.open(url).read()
    link = re.search(r'<iframe src=\"(\S+?)\"', response).group(
        1)  #https://videoarchiv.markiza.sk/api/v1/user/live
    link = link.replace('&amp;', '&')
    response = opener.open(link).read()
    if '<iframe src=\"' not in response:  #handle expired cookies
        if relogin:
            xbmcgui.Dialog().ok('Chyba', 'Skontrolujte prihlasovacie údaje',
                                '', '')
            raise RuntimeError  # loop protection
        else:
            LIVE(url, relogin=True)
            return
    opener.addheaders = [('Referer', link)]
    link = re.search(r'<iframe src=\"(\S+?)\"',
                     response).group(1)  #https://media.cms.markiza.sk/embed/
    response = opener.open(link).read()
    if '<title>Error</title>' in response:
        error = re.search('<h2 class="e-title">(.*?)</h2>', response).group(
            1)  #Video nie je dostupné vo vašej krajine
        xbmcgui.Dialog().ok('Chyba', error, '', '')
        raise RuntimeError
    link = re.search(r'\"hls\": \"(\S+?)\"', response).group(
        1)  #https://h1-s6.c.markiza.sk/hls/markiza-sd-master.m3u8
    response = opener.open(link).read()

    cookies = '|Cookie='
    for cookie in cj:
        cookies += cookie.name + '=' + cookie.value + ';'
    cookies = cookies[:-1]
    play_item = xbmcgui.ListItem(path=link + cookies)
    xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, listitem=play_item)

Example #8

Show file

File: basebrowser.py Project: itolosa/transantiago_api

class WebBrowser(object):
    '''mantiene en memoria las cookies, emulando un navegador
       *actualmente no ejecuta javascript'''
    def __init__(self, uAgent=None, headers=None):
        '''uAgent es el agente de usuario'''
        self.cookie_j = MozillaCookieJar()
        if uAgent is None:
            uAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36'
        self.opener = build_opener(HTTPCookieProcessor(self.cookie_j))
        self.user_agent = uAgent
        self.opener.addheaders = [('User-Agent', self.user_agent)]
        # self.session = requests.Session()
        # self.session.headers.update({ 'User-Agent': uAgent })
        # self.session.max_redirects = 20
        self.timeout = 25
        socket.setdefaulttimeout(self.timeout)

    def newtree(f):
        return lambda *a, **k: etree.parse(f(*a, **k), parser=etree.HTMLParser())

    @newtree
    def fetch(self, url, data=None, headers=None, method='POST'):
        '''obtiene los datos de una pagina web, ingresada en url
           para enviar datos por post, pasar codificados por data'''
        if headers:
            self.opener.addheaders = headers

        if not (data == None or type(data) == str):
            data = urllib.urlencode(data)

        if method == 'POST':
            # self.last_seen = self.session.post(url, data=data)
            self.last_seen = self.opener.open(url, data)
        elif method == 'GET':
            #self.last_seen = self.session.get(url + '?' + data)
            if data is None:
                self.last_seen = self.opener.open(url)
            else:
                self.last_seen = self.opener.open(url + '?' + data)
        else:
            raise Exception
        return self.last_seen

    def geturl(self):
        return self.last_seen.geturl()

    def save_cookies(self, path):
        '''guarda los cookies en memoria al disco'''
        '''path es el directorio'''
        self.cookie_j.save(path, ignore_discard=True, ignore_expires=True)

    def load_cookies(self, path):
        '''carga cookies del disco a la memoria'''
        '''path es el directorio'''
        self.cookie_j.load(path, ignore_discard=True, ignore_expires=True)

    def print_cookies(self):
        for cookie in self.cookie_j:
            print cookie.name, cookie.value

Example #9

Show file

def main(*args):

    # Populate our options, -h/--help is already there for you.
    usage = "usage: %prog [options] URL"
    optp = optparse.OptionParser(usage=usage)
    optp.add_option("-u", "--username",
                    help="the username to login as.")
    optp.add_option("-d", "--storedir", dest="store_dir",
                     help="the directory to store the certificate/key and \
                     config file",
                     metavar="DIR",
                     default=path.join(homedir, ".shibboleth"))
    optp.add_option("-i", "--idp",
                    help="unique ID of the IdP used to log in")
    optp.add_option('-v', '--verbose', dest='verbose', action='count',
                    help="Increase verbosity (specify multiple times for more)")
    # Parse the arguments (defaults to parsing sys.argv).
    opts, args = optp.parse_args()

    # Here would be a good place to check what came in on the command line and
    # call optp.error("Useful message") to exit if all it not well.

    log_level = logging.WARNING # default
    if opts.verbose == 1:
        log_level = logging.INFO
    elif opts.verbose >= 2:
        log_level = logging.DEBUG

    # Set up basic configuration, out to stderr with a reasonable default format.
    logging.basicConfig(level=log_level)

    if not args:
        optp.print_help()
        return

    if not path.exists(opts.store_dir):
        os.mkdir(opts.store_dir)

    sp = args[0]


    idp = Idp(opts.idp)
    c = CredentialManager()
    if opts.username:
        c.username = opts.username

    # if the cookies file exists load it
    cookies_file = path.join(opts.store_dir, 'cookies.txt')
    cj = MozillaCookieJar(filename=cookies_file)
    if path.exists(cookies_file):
        cj.load()

    shibboleth = Shibboleth(idp, c, cj)
    shibboleth.openurl(sp)
    print("Successfully authenticated to %s" % sp)

    cj.save()

Example #10

Show file

File: shiblogout.py Project: grith/sibboleth

def main(*args):

    # Populate our options, -h/--help is already there for you.
    usage = "usage: %prog [options] URL"
    optp = optparse.OptionParser(usage=usage)
    optp.add_option("-d", "--storedir", dest="store_dir",
                     help="the directory to store the certificate/key and \
                     config file",
                     metavar="DIR",
                     default=path.join(homedir, ".shibboleth"))
    optp.add_option('-v', '--verbose', dest='verbose', action='count',
            help="Increase verbosity (specify multiple times for more)")
    # Parse the arguments (defaults to parsing sys.argv).
    opts, args = optp.parse_args()

    # Here would be a good place to check what came in on the command line and
    # call optp.error("Useful message") to exit if all it not well.

    log_level = logging.WARNING  # default
    if opts.verbose == 1:
        log_level = logging.INFO
    elif opts.verbose >= 2:
        log_level = logging.DEBUG

    # Set up basic configuration, out to stderr with a reasonable
    # default format.
    logging.basicConfig(level=log_level)

    if not path.exists(opts.store_dir):
        os.mkdir(opts.store_dir)

    if args:
        sp = args[0]

    # if the cookies file exists load it
    cookies_file = path.join(opts.store_dir, 'cookies.txt')
    cj = MozillaCookieJar(filename=cookies_file)
    if path.exists(cookies_file):
        cj.load()

    logout_urls = []
    for cookie in cj:
        if cookie.name.startswith('_shibsession_') or \
               cookie.name.startswith('_shibstate_'):
            logout_urls.append(
                "https://%s/Shibboleth.sso/Logout" % cookie.domain)

    logout_urls = list(set(logout_urls))

    opener = urllib2.build_opener(HTTPCookieProcessor(cookiejar=cj))
    for url in logout_urls:
        request = urllib2.Request(url)
        log.debug("GET: %s" % request.get_full_url())
        response = opener.open(request)

    cj.save()

Example #11

Show file

def save_cookies_Moz(url):
    """保存cookies到文件 —— MozillaCookieJar格式
    """
    # 设置保存cookie的文件，同级目录下的cookie.txt
    filename = 'cookies_Moz.txt'
    # 声明一个MozillaCookieJar对象实例来保存cookie，之后写入文件
    cookie = MozillaCookieJar(filename)
    opener = build_opener(HTTPCookieProcessor(cookie))
    # 创建一个请求，原理同urllib2的urlopen
    response = opener.open(url)
    # 保存cookie到文件
    cookie.save(ignore_discard=True,
                ignore_expires=True)  # 这里必须将参数置为True，否则写入文件失败

Example #12

Show file

File: utility.py Project: Akkariin/PixivRss

def Get( url, data = '', refer = 'http://www.pixiv.net/', retry = 3 ):
    global ABS_PATH

    cj = MozillaCookieJar( ABS_PATH + 'pixiv.cookie.txt' )

    try :
        cj.load( ABS_PATH + 'pixiv.cookie.txt' )
    except:
        pass # 还没有cookie只好拉倒咯

    ckproc = urllib2.HTTPCookieProcessor( cj )

    opener = urllib2.build_opener( ckproc )
    opener.addheaders = [
        ('Accept', '*/*'),
        ('Accept-Language', 'zh-CN,zh;q=0.8'),
        ('Accept-Charset', 'UTF-8,*;q=0.5'),
        ('Accept-Encoding', 'gzip,deflate'),
        ('User-Agent', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31'),
        ('Referer', refer)
    ]

    # 防止海外访问weibo变英文版
    if 'weibo.com' in url:
        opener.addheaders = [('Cookie', 'lang=zh-cn; SUB=Af3TZPWScES9bnItTjr2Ahd5zd6Niw2rzxab0hB4mX3uLwL2MikEk1FZIrAi5RvgAfCWhPyBL4jbuHRggucLT4hUQowTTAZ0ta7TYSBaNttSmZr6c7UIFYgtxRirRyJ6Ww%3D%3D; UV5PAGE=usr512_114; UV5=usrmdins311164')]

    debug('Network: url - ' + url)

    try:
        # 发出请求
        if data != '':
            debug('Network: post')
            debug(data)
            request = urllib2.Request( url = url, data = data )
            res = opener.open( request, timeout = 15 )
            cj.save() # 只有在post时才保存新获得的cookie
        else:
            debug('Network: get')
            res = opener.open( url, timeout = 15 )

        debug('Network: Status Code - ' + str(res.getcode()))

        return GetContent( res )

    except Exception, e:
        # 自动重试，每张图最多3次
        if retry > 0:
            return Get( url, data, refer, retry-1 )
        else:
            log(e, 'Error: unable to get %s' % url)
            return False

Example #13

Show file

def get_new_cookie(new_username,new_password,cookie_jar_path):

   # Build URS4 Cookie request
   auth_cookie_url = asf_urs4['url'] + '?client_id=' + asf_urs4['client'] + '&redirect_uri=' + asf_urs4['redir'] + '&response_type=code&state='

   try:
      #python2
      user_pass = base64.b64encode (bytes(new_username+":"+new_password))
   except TypeError:
      #python3
      user_pass = base64.b64encode (bytes(new_username+":"+new_password, "utf-8"))
      user_pass = user_pass.decode("utf-8")

   # Authenticate against URS, grab all the cookies
   cookie_jar = MozillaCookieJar()
   opener = build_opener(HTTPCookieProcessor(cookie_jar), HTTPHandler(), HTTPSHandler())
   request = Request(auth_cookie_url, headers={"Authorization": "Basic {0}".format(user_pass)})

   # Watch out cookie rejection!
   try:
      response = opener.open(request)
   except HTTPError as e:
      if e.code == 401:
         print (" > Username and Password combo was not successful. Please try again.")
         return False
      else:
         # If an error happens here, the user most likely has not confirmed EULA.
         print ("\nIMPORTANT: There was an error obtaining a download cookie!")
         print ("Your user appears to lack permission to download data from the ASF Datapool.")
         print ("\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov")
         exit(-1)
   except URLError as e:
      print ("\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. ")
      print ("Try cookie generation later.")
      exit(-1)

   # Did we get a cookie?
   if check_cookie_is_logged_in(cookie_jar):
      #COOKIE SUCCESS!
      print('Saving cookie jar file')
      cookie_jar.save(cookie_jar_path)
      return cookie_jar

   # if we aren't successful generating the cookie, nothing will work. Stop here!
   print ("WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again.")
   print ("Response was {0}.".format(response.getcode()))
   print ("\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov")
   exit(-1)

Example #14

Show file

    def __init__(self,
                 mobile,
                 password=None,
                 status='0',
                 cachefile='Fetion.cache',
                 cookiesfile=''):
        '''登录状态：
        在线：400 隐身：0 忙碌：600 离开：100
        '''
        if cachefile:
            self.cache = Cache(cachefile)

        if not cookiesfile:
            cookiesfile = '%s.cookies' % mobile

        # try:
        # with open(cookiesfile, 'rb') as f:
        # cookie_processor = load(f)
        # except:
        # cookie_processor = HTTPCookieProcessor(CookieJar())
        cookiejar = MozillaCookieJar(filename=cookiesfile)
        try:
            f = open(cookiesfile)
        except IOError:
            f = open(cookiesfile, 'w')
            f.write(MozillaCookieJar.header)
        finally:
            f.close()
        cookiejar.load(filename=cookiesfile)
        cookie_processor = HTTPCookieProcessor(cookiejar)
        self.opener = build_opener(cookie_processor, HTTPHandler)
        self.mobile, self.password = mobile, password
        if not self.alive():
            if self._login(): cookiejar.save()

        #dump(cookie_processor, open(cookiesfile, 'wb'))
        self.changestatus(status)

Example #15

Show file

File: cookieway.py Project: yatogamitohka1212/usefultool

class CookieWay:
    def __init__(self):
        self.cookiejar = MozillaCookieJar()

    def load(self, file="cookie.txt"):
        self.cookiejar.load(file, ignore_discard=True, ignore_expires=True)

    def save(self, file="cookie.txt"):
        self.cookiejar.save(file, ignore_discard=True, ignore_expires=True)

    def torequestscj(self, s):
        for item in self.cookiejar:
            cookiesobject = requests.cookies.create_cookie(domain=item.domain,
                                                           name=item.name,
                                                           value=item.value)
            s.cookies.set_cookie(cookiesobject)

    def toseleniumcj(self, driver):
        domains = []
        for item in self.cookiejar:
            if item.domain not in domains:
                domains.append(item.domain)
        for i in range(len(domains)):
            if domains[i][0:1] == ".":
                domains[i] = domains[i][1:]
        domains = list(set(domains))
        for item in domains:
            driver.get("https://" + item)
            for item2 in self.cookiejar:
                if item2.domain == item or item2.domain == "." + item:
                    cookie_dict = {
                        'domain': item2.domain,
                        'name': item2.name,
                        'value': item2.value,
                        'secure': item2.secure
                    }
                    if item2.path_specified:
                        cookie_dict['path'] = item2.path
                    driver.add_cookie(cookie_dict)

    def sele2resq(self, driver, s):
        self.selcj_cj(driver)
        self.torequestscj(s)

    def resq2sele(self, s, driver):
        self.reqcj_cj(s)
        self.toseleniumcj(driver)

    def selcj_cj(self, driver):
        cookie = driver.get_cookies()
        for s_cookie in cookie:
            self.cookiejar.set_cookie(
                Cookie(
                    version=0,
                    name=s_cookie['name'],
                    value=s_cookie['value'],
                    port='80',
                    port_specified=False,
                    domain=s_cookie['domain'],
                    domain_specified=True,
                    domain_initial_dot=False,
                    path=s_cookie['path'],
                    path_specified=True,
                    secure=s_cookie['secure'],
                    expires="2069592763",  # s_cookie['expiry']
                    discard=False,
                    comment=None,
                    comment_url=None,
                    rest=None,
                    rfc2109=False))

    def reqcj_cj(self, s):
        for s_cookie in s.cookies:
            self.cookiejar.set_cookie(
                Cookie(
                    version=0,
                    name=s_cookie.name,
                    value=s_cookie.value,
                    port='80',
                    port_specified=False,
                    domain=s_cookie.domain,
                    domain_specified=True,
                    domain_initial_dot=False,
                    path="/",
                    path_specified=True,
                    secure=True,
                    expires="2069592763",  # s_cookie['expiry']
                    discard=False,
                    comment=None,
                    comment_url=None,
                    rest=None,
                    rfc2109=False))

Example #16

Show file

def get_url(url,
            config,
            additional_headers=None,
            additional_query_string=None,
            post_data=None,
            fail_silent=False,
            no_cache=False,
            return_json_errors=[],
            return_final_url=False,
            cookie_file=None):

	response_content = ''
	request_hash = sha512(
	        (url + dumps(additional_headers) + dumps(additional_query_string) + dumps(post_data)).encode('utf-8')).hexdigest()

	final_url = url

	if xbmc_helper().get_bool_setting('debug_requests') is True:
		xbmc_helper().log_debug(
		        'get_url - url: {} headers {} query {} post {} no_cache {} silent {} request_hash {} return_json_errors {}, cookie_file',
		        url, additional_headers, additional_query_string, post_data, no_cache, fail_silent, request_hash, return_json_errors,
		        cookie_file)

	if no_cache is True:
		etags_data = None
	else:
		etags_data = get_etags_data(request_hash)

	try:

		headers = {
		        'Accept-Encoding': 'gzip, deflate',
		        'User-Agent': config['USER_AGENT'],
		        'Accept': '*/*',
		}

		if additional_headers is not None:
			headers.update(additional_headers)

		if config.get('http_headers', None) is not None:
			headers.update(config.get('http_headers', []))

		if etags_data is not None:
			headers.update({'If-None-Match': etags_data['etag']})

		if additional_query_string is not None:
			_url = compat._format('{}{}{}', url, '?' if url.find('?') == -1 else '&', urlencode(additional_query_string))
			url = _url
		if isinstance(post_data, dict):
			post_data = urlencode(post_data)

		cookie_processor = None
		cookie_jar = None
		if cookie_file is not None:
			cookie_jar = MozillaCookieJar(cookie_file)
			try:
				cookie_jar.load()
			except LoadError:
				xbmc_helper().log_debug('Failed to load from cookiefile {} with error {} - new session?', cookie_file, LoadError.strerror)
				pass
			cookie_processor = HTTPCookieProcessor(cookie_jar)

		if xbmc_helper().get_bool_setting('use_https_proxy') is True and xbmc_helper().get_text_setting(
		        'https_proxy_host') != '' and xbmc_helper().get_int_setting('https_proxy_port') != 0:

			proxy_uri = compat._format('{}:{}',
			                           xbmc_helper().get_text_setting('https_proxy_host'),
			                           xbmc_helper().get_text_setting('https_proxy_port'))

			xbmc_helper().log_debug('Using proxy uri {}', proxy_uri)
			prxy_handler = ProxyHandler({
			        'http': proxy_uri,
			        'https': proxy_uri,
			})
			if cookie_processor is None:
				install_opener(build_opener(prxy_handler))
			else:
				install_opener(build_opener(prxy_handler, cookie_processor))

		elif cookie_processor is not None:
			install_opener(build_opener(cookie_processor))

		if post_data is not None:
			request = Request(url, data=post_data.encode('utf-8'), headers=headers)
		else:
			request = Request(url, headers=headers)

		response = urlopen(request, timeout=40)

		if response.info().get('Content-Encoding') == 'gzip':
			response_content = compat._decode(GzipFile(fileobj=BytesIO(response.read())).read())
		else:
			response_content = compat._decode(response.read())

		if cookie_jar is not None:
			cookie_jar.save()

		final_url = response.geturl()
		_etag = response.info().get('etag', None)
		if no_cache is False and _etag is not None:
			set_etags_data(request_hash, _etag, response_content)

	except HTTPError as http_error:

		if http_error.code == 304 and etags_data.get('data', None) is not None:
			response_content = etags_data.get('data')
		else:
			try:
				if http_error.info().get('Content-Encoding') == 'gzip':
					error_body = compat._decode(GzipFile(fileobj=BytesIO(http_error.read())).read())
				else:
					error_body = compat._decode(http_error.read())

				xbmc_helper().log_debug('HTTP ERROR: {}', error_body)
				json_errors = loads(error_body)
				xbmc_helper().log_debug('JSON ERRORS: {}', json_errors)

				has_decoded_error = False
				if isinstance(json_errors, dict) and 'errors' not in json_errors.keys() and 'code' in json_errors.keys():
					json_errors = {'errors': [json_errors]}
				elif isinstance(json_errors, list) and len(json_errors) == 1 and isinstance(json_errors[0], dict):
					json_errors = {'errors': json_errors}
				err_str = str(http_error.code)
				return_errors = []

				if isinstance(json_errors, dict):
					for error in json_errors.get('errors', []):
						if 'msg' in error.keys():
							err_str = compat._format('{}|{}', err_str, error.get('msg'))
							has_decoded_error = True
						if 'code' in error.keys() and error['code'] in return_json_errors:
							return_errors.append(error['code'])
							has_decoded_error = True

				xbmc_helper().log_debug('return_json_errors {}', return_errors)

				if len(return_errors) > 0:
					response_content = dumps({'json_errors': return_errors})

				elif has_decoded_error is True:
					xbmc_helper().notification(
					        'Error',
					        err_str,
					)
					exit(0)

			except Exception:
				raise http_error

	except Exception as e:
		xbmc_helper().log_error('Failed to load url: {} headers {} post_data {} - Exception: {}', url, headers, post_data, e)

		if fail_silent is True:
			pass
		else:
			xbmc_helper().notification(compat._format(xbmc_helper().translation('ERROR'), 'URL Access'),
			                           compat._format(xbmc_helper().translation('MSG_NO_ACCESS_TO_URL'), str(url)))
			exit(0)

	if return_final_url:
		return final_url, response_content

	return response_content

Example #17

Show file

File: download-all-2020-01-23_23-11-03.py Project: jdilger/ASF_GEE

class bulk_downloader:
    def __init__(self):
        # List of files to download
        self.files = [
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20200110T101421_20200110T101446_019753_025598_C902-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191229T101421_20191229T101446_019578_025007_DB2A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191217T101422_20191217T101447_019403_024A73_D2A9-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191205T101422_20191205T101447_019228_0244DD_9778-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191123T101423_20191123T101448_019053_023F55_95B6-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191111T101423_20191111T101448_018878_0239B4_3FCF-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191030T101423_20191030T101448_018703_02340F_3D8D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191018T101423_20191018T101448_018528_022E97_0AEB-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191006T101423_20191006T101448_018353_022937_B959-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190912T101422_20190912T101447_018003_021E50_B3FB-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190831T101421_20190831T101446_017828_0218D8_1ADE-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190819T101421_20190819T101446_017653_021365_B751-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190807T101420_20190807T101445_017478_020DEF_A757-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20190801T101514_20190801T101539_028374_0334DB_E6C0-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20190801T101449_20190801T101514_028374_0334DB_6CA1-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190726T101419_20190726T101444_017303_0208A8_2D9C-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190714T101419_20190714T101444_017128_020394_A8B6-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190702T101418_20190702T101443_016953_01FE6B_BE7D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190620T101417_20190620T101442_016778_01F93E_D609-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190608T101416_20190608T101441_016603_01F407_282F-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190527T101416_20190527T101441_016428_01EECF_79D2-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190515T101415_20190515T101440_016253_01E971_7A00-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190503T101415_20190503T101440_016078_01E3E6_D149-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190421T101414_20190421T101439_015903_01DE0C_E919-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190409T101414_20190409T101439_015728_01D843_E7B3-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190328T101413_20190328T101438_015553_01D27A_7404-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190316T101413_20190316T101438_015378_01CCBE_781F-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190304T101413_20190304T101438_015203_01C713_17EF-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190220T101413_20190220T101438_015028_01C151_EA49-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190208T101413_20190208T101438_014853_01BB8C_940D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190127T101414_20190127T101439_014678_01B5D2_3B0A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190115T101414_20190115T101439_014503_01B03A_4439-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190103T101414_20190103T101439_014328_01AA92_7D9B-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181222T101415_20181222T101440_014153_01A4CF_3F05-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181210T101415_20181210T101440_013978_019F03_1C29-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181128T101416_20181128T101441_013803_01995A_6DD3-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181116T101416_20181116T101441_013628_0193C1_FE12-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181104T101416_20181104T101441_013453_018E4D_0014-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181023T101420_20181023T101445_013278_0188CC_5952-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181023T101355_20181023T101420_013278_0188CC_0FA6-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181011T101417_20181011T101442_013103_01835D_D0A0-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180929T101416_20180929T101441_012928_017E0F_226F-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180917T101416_20180917T101441_012753_0178B3_B66A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180905T101415_20180905T101440_012578_017358_3259-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180824T101415_20180824T101440_012403_016DE5_85C3-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180812T101414_20180812T101439_012228_01687D_BCA9-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180731T101414_20180731T101439_012053_01631A_ADBC-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180719T101413_20180719T101438_011878_015DD1_3E69-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180707T101412_20180707T101437_011703_015872_5055-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180625T101411_20180625T101436_011528_015300_5709-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180613T101411_20180613T101436_011353_014D8E_1799-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180601T101410_20180601T101435_011178_014821_B178-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180520T101409_20180520T101434_011003_014273_5667-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180508T101408_20180508T101433_010828_013CCB_18C3-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180426T101408_20180426T101433_010653_013720_457C-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180414T101407_20180414T101432_010478_01318E_FB0A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180402T101407_20180402T101432_010303_012BEA_9E94-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180321T101406_20180321T101431_010128_012640_6D69-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180309T101406_20180309T101431_009953_01208C_4F7B-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180225T101406_20180225T101431_009778_011AAD_2181-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180213T101407_20180213T101432_009603_0114ED_D868-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180201T101407_20180201T101432_009428_010F21_C8FA-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180120T101407_20180120T101432_009253_010968_4DBE-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180108T101408_20180108T101433_009078_0103B1_EB1D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171227T101408_20171227T101433_008903_00FDFF_A4F1-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171215T101409_20171215T101434_008728_00F863_906F-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171203T101409_20171203T101434_008553_00F2D6_B8D7-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171121T101409_20171121T101434_008378_00ED57_D6D0-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171028T101410_20171028T101435_008028_00E2F3_6DFC-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171016T101410_20171016T101435_007853_00DDE1_829D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171004T101409_20171004T101434_007678_00D8F4_5C9C-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170922T101409_20170922T101434_007503_00D3F7_9FEC-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170910T101409_20170910T101434_007328_00CED7_2D8E-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170829T101408_20170829T101433_007153_00C9B8_96C9-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170817T101408_20170817T101433_006978_00C4A9_5D92-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170805T101407_20170805T101432_006803_00BF8B_4F73-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170724T101407_20170724T101432_006628_00BA88_2017-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170712T101406_20170712T101431_006453_00B58B_7674-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170630T101405_20170630T101430_006278_00B098_CAC7-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170618T101404_20170618T101429_006103_00AB89_7D52-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170606T101404_20170606T101429_005928_00A666_6411-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170525T101403_20170525T101428_005753_00A14F_A827-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170513T101402_20170513T101427_005578_009C52_4E38-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170501T101402_20170501T101427_005403_009788_B5E9-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170419T101401_20170419T101426_005228_009270_5637-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170407T101401_20170407T101426_005053_008D67_BB68-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170326T101400_20170326T101425_004878_008859_36E8-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170314T101400_20170314T101425_004703_008359_7A42-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170302T101400_20170302T101425_004528_007E2B_F8A2-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170206T101400_20170206T101425_004178_0073C4_69B1-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170113T101401_20170113T101426_003828_00695B_0B49-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20161220T101403_20161220T101428_003478_005F1B_E6DD-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20161126T101403_20161126T101428_003128_005520_5BBB-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20161102T101404_20161102T101429_002778_004B45_F931-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20161009T101404_20161009T101429_002428_00419A_2FD0-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20160927T101404_20160927T101429_002253_003CAB_BC6E-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160909T101423_20160909T101448_012974_01487C_40C0-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160909T101448_20160909T101513_012974_01487C_E55B-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160816T101434_20160816T101503_012624_013CE1_AA51-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160723T101444_20160723T101513_012274_013152_9A67-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160629T101426_20160629T101455_011924_0125E4_7F71-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160629T101455_20160629T101520_011924_0125E4_D66A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160605T101440_20160605T101505_011574_011AE7_0C49-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160512T101439_20160512T101504_011224_010F91_FCE1-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160418T101435_20160418T101500_010874_01048F_89B1-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160325T101434_20160325T101459_010524_00FA2B_4EAD-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160301T101434_20160301T101459_010174_00F035_3B54-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160206T101440_20160206T101505_009824_00E617_C31E-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160206T101415_20160206T101440_009824_00E617_D79A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160113T101434_20160113T101459_009474_00DBEE_5DBA-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20151220T101435_20151220T101500_009124_00D1EE_CFED-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20151102T101442_20151102T101507_008424_00BE79_E2E7-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20151009T101442_20151009T101507_008074_00B50C_12FD-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150915T101441_20150915T101506_007724_00ABB3_226C-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150822T101441_20150822T101506_007374_00A234_599D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150729T101439_20150729T101504_007024_0098B2_E48E-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150705T101438_20150705T101503_006674_008EB2_3496-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20150518T101442_20150518T101507_005974_007B3F_EFEC-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20150518T101417_20150518T101442_005974_007B3F_DF42-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150424T101426_20150424T101451_005624_00734A_AD5A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150331T101444_20150331T101509_005274_006AB8_213D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150331T101419_20150331T101444_005274_006AB8_EBF3-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150307T101444_20150307T101509_004924_006269_7919-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150307T101419_20150307T101444_004924_006269_9089-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150211T101443_20150211T101508_004574_005A0A_8FEE-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150211T101418_20150211T101443_004574_005A0A_4D0C-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150118T101444_20150118T101509_004224_00522A_42D5-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150118T101419_20150118T101444_004224_00522A_26FE-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141225T101439_20141225T101504_003874_004A4B_D1FC-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141225T101414_20141225T101439_003874_004A4B_367E-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141201T101440_20141201T101505_003524_004254_556F-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141201T101415_20141201T101440_003524_004254_5C25-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141107T101441_20141107T101506_003174_003A78_745C-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141107T101416_20141107T101441_003174_003A78_5D83-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141014T101419_20141014T101444_002824_0032F1_B89E-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141014T101444_20141014T101509_002824_0032F1_B54D-PREDORB-10m-power-filt-rtc-gamma.zip"
        ]

        # Local stash of cookies so we don't always have to ask
        self.cookie_jar_path = os.path.join(os.path.expanduser('~'),
                                            ".bulk_download_cookiejar.txt")
        self.cookie_jar = None

        self.asf_urs4 = {
            'url': 'https://urs.earthdata.nasa.gov/oauth/authorize',
            'client': 'BO_n7nTIlMljdvU6kRRB3g',
            'redir': 'https://auth.asf.alaska.edu/login'
        }

        # Make sure we can write it our current directory
        if os.access(os.getcwd(), os.W_OK) is False:
            print(
                "WARNING: Cannot write to current path! Check permissions for {0}"
                .format(os.getcwd()))
            exit(-1)

        # For SSL
        self.context = {}

        # Check if user handed in a Metalink or CSV:
        if len(sys.argv) > 0:
            download_files = []
            input_files = []
            for arg in sys.argv[1:]:
                if arg == '--insecure':
                    try:
                        ctx = ssl.create_default_context()
                        ctx.check_hostname = False
                        ctx.verify_mode = ssl.CERT_NONE
                        self.context['context'] = ctx
                    except AttributeError:
                        # Python 2.6 won't complain about SSL Validation
                        pass

                elif arg.endswith('.metalink') or arg.endswith('.csv'):
                    if os.path.isfile(arg):
                        input_files.append(arg)
                        if arg.endswith('.metalink'):
                            new_files = self.process_metalink(arg)
                        else:
                            new_files = self.process_csv(arg)
                        if new_files is not None:
                            for file_url in (new_files):
                                download_files.append(file_url)
                    else:
                        print(
                            " > I cannot find the input file you specified: {0}"
                            .format(arg))
                else:
                    print(
                        " > Command line argument '{0}' makes no sense, ignoring."
                        .format(arg))

            if len(input_files) > 0:
                if len(download_files) > 0:
                    print(" > Processing {0} downloads from {1} input files. ".
                          format(len(download_files), len(input_files)))
                    self.files = download_files
                else:
                    print(
                        " > I see you asked me to download files from {0} input files, but they had no downloads!"
                        .format(len(input_files)))
                    print(" > I'm super confused and exiting.")
                    exit(-1)

        # Make sure cookie_jar is good to go!
        self.get_cookie()

        # summary
        self.total_bytes = 0
        self.total_time = 0
        self.cnt = 0
        self.success = []
        self.failed = []
        self.skipped = []

    # Get and validate a cookie
    def get_cookie(self):
        if os.path.isfile(self.cookie_jar_path):
            self.cookie_jar = MozillaCookieJar()
            self.cookie_jar.load(self.cookie_jar_path)

            # make sure cookie is still valid
            if self.check_cookie():
                print(" > Re-using previous cookie jar.")
                return True
            else:
                print(" > Could not validate old cookie Jar")

        # We don't have a valid cookie, prompt user or creds
        print(
            "No existing URS cookie found, please enter Earthdata username & password:"******"(Credentials will not be stored, saved or logged anywhere)")

        # Keep trying 'till user gets the right U:P
        while self.check_cookie() is False:
            self.get_new_cookie()

        return True

    # Validate cookie before we begin
    def check_cookie(self):

        if self.cookie_jar is None:
            print(" > Cookiejar is bunk: {0}".format(self.cookie_jar))
            return False

        # File we know is valid, used to validate cookie
        file_check = 'https://urs.earthdata.nasa.gov/profile'

        # Apply custom Redirect Hanlder
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                              HTTPHandler(), HTTPSHandler(**self.context))
        install_opener(opener)

        # Attempt a HEAD request
        request = Request(file_check)
        request.get_method = lambda: 'HEAD'
        try:
            print(" > attempting to download {0}".format(file_check))
            response = urlopen(request, timeout=30)
            resp_code = response.getcode()
            # Make sure we're logged in
            if not self.check_cookie_is_logged_in(self.cookie_jar):
                return False

            # Save cookiejar
            self.cookie_jar.save(self.cookie_jar_path)

        except HTTPError:
            # If we ge this error, again, it likely means the user has not agreed to current EULA
            print("\nIMPORTANT: ")
            print(
                "Your user appears to lack permissions to download data from the ASF Datapool."
            )
            print(
                "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
            )
            exit(-1)

        # This return codes indicate the USER has not been approved to download the data
        if resp_code in (300, 301, 302, 303):
            try:
                redir_url = response.info().getheader('Location')
            except AttributeError:
                redir_url = response.getheader('Location')

            #Funky Test env:
            if ("vertex-retired.daac.asf.alaska.edu" in redir_url
                    and "test" in self.asf_urs4['redir']):
                print("Cough, cough. It's dusty in this test env!")
                return True

            print("Redirect ({0}) occured, invalid cookie value!".format(
                resp_code))
            return False

        # These are successes!
        if resp_code in (200, 307):
            return True

        return False

    def get_new_cookie(self):
        # Start by prompting user to input their credentials

        # Another Python2/3 workaround
        try:
            new_username = raw_input("Username: "******"Username: "******"Password (will not be displayed): ")

        # Build URS4 Cookie request
        auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4[
            'client'] + '&redirect_uri=' + self.asf_urs4[
                'redir'] + '&response_type=code&state='

        try:
            #python2
            user_pass = base64.b64encode(
                bytes(new_username + ":" + new_password))
        except TypeError:
            #python3
            user_pass = base64.b64encode(
                bytes(new_username + ":" + new_password, "utf-8"))
            user_pass = user_pass.decode("utf-8")

        # Authenticate against URS, grab all the cookies
        self.cookie_jar = MozillaCookieJar()
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                              HTTPHandler(), HTTPSHandler(**self.context))
        request = Request(
            auth_cookie_url,
            headers={"Authorization": "Basic {0}".format(user_pass)})

        # Watch out cookie rejection!
        try:
            response = opener.open(request)
        except HTTPError as e:
            if "WWW-Authenticate" in e.headers and "Please enter your Earthdata Login credentials" in e.headers[
                    "WWW-Authenticate"]:
                print(
                    " > Username and Password combo was not successful. Please try again."
                )
                return False
            else:
                # If an error happens here, the user most likely has not confirmed EULA.
                print(
                    "\nIMPORTANT: There was an error obtaining a download cookie!"
                )
                print(
                    "Your user appears to lack permission to download data from the ASF Datapool."
                )
                print(
                    "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
                )
                exit(-1)
        except URLError as e:
            print(
                "\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. "
            )
            print("Try cookie generation later.")
            exit(-1)

        # Did we get a cookie?
        if self.check_cookie_is_logged_in(self.cookie_jar):
            #COOKIE SUCCESS!
            self.cookie_jar.save(self.cookie_jar_path)
            return True

        # if we aren't successful generating the cookie, nothing will work. Stop here!
        print(
            "WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again."
        )
        print("Response was {0}.".format(response.getcode()))
        print(
            "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
        )
        exit(-1)

    # make sure we're logged into URS
    def check_cookie_is_logged_in(self, cj):
        for cookie in cj:
            if cookie.name == 'urs_user_already_logged':
                # Only get this cookie if we logged in successfully!
                return True

        return False

    # Download the file
    def download_file_with_cookiejar(self,
                                     url,
                                     file_count,
                                     total,
                                     recursion=False):
        # see if we've already download this file and if it is that it is the correct size
        download_file = os.path.basename(url).split('?')[0]
        if os.path.isfile(download_file):
            try:
                request = Request(url)
                request.get_method = lambda: 'HEAD'
                response = urlopen(request, timeout=30)
                remote_size = self.get_total_size(response)
                # Check that we were able to derive a size.
                if remote_size:
                    local_size = os.path.getsize(download_file)
                    if remote_size < (local_size +
                                      (local_size * .01)) and remote_size > (
                                          local_size - (local_size * .01)):
                        print(
                            " > Download file {0} exists! \n > Skipping download of {1}. "
                            .format(download_file, url))
                        return None, None
                    #partial file size wasn't full file size, lets blow away the chunk and start again
                    print(
                        " > Found {0} but it wasn't fully downloaded. Removing file and downloading again."
                        .format(download_file))
                    os.remove(download_file)

            except ssl.CertificateError as e:
                print(" > ERROR: {0}".format(e))
                print(
                    " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag"
                )
                return False, None

            except HTTPError as e:
                if e.code == 401:
                    print(
                        " > IMPORTANT: Your user may not have permission to download this type of data!"
                    )
                else:
                    print(" > Unknown Error, Could not get file HEAD: {0}".
                          format(e))

            except URLError as e:
                print("URL Error (from HEAD): {0}, {1}".format(e.reason, url))
                if "ssl.c" in "{0}".format(e.reason):
                    print(
                        "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error."
                    )
                return False, None

        # attempt https connection
        try:
            request = Request(url)
            response = urlopen(request, timeout=30)

            # Watch for redirect
            if response.geturl() != url:

                # See if we were redirect BACK to URS for re-auth.
                if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl(
                ):

                    if recursion:
                        print(
                            " > Entering seemingly endless auth loop. Aborting. "
                        )
                        return False, None

                    # make this easier. If there is no app_type=401, add it
                    new_auth_url = response.geturl()
                    if "app_type" not in new_auth_url:
                        new_auth_url += "&app_type=401"

                    print(
                        " > While attempting to download {0}....".format(url))
                    print(" > Need to obtain new cookie from {0}".format(
                        new_auth_url))
                    old_cookies = [cookie.name for cookie in self.cookie_jar]
                    opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                                          HTTPHandler(),
                                          HTTPSHandler(**self.context))
                    request = Request(new_auth_url)
                    try:
                        response = opener.open(request)
                        for cookie in self.cookie_jar:
                            if cookie.name not in old_cookies:
                                print(" > Saved new cookie: {0}".format(
                                    cookie.name))

                                # A little hack to save session cookies
                                if cookie.discard:
                                    cookie.expires = int(
                                        time.time()) + 60 * 60 * 24 * 30
                                    print(
                                        " > Saving session Cookie that should have been discarded! "
                                    )

                        self.cookie_jar.save(self.cookie_jar_path,
                                             ignore_discard=True,
                                             ignore_expires=True)
                    except HTTPError as e:
                        print("HTTP Error: {0}, {1}".format(e.code, url))
                        return False, None

                    # Okay, now we have more cookies! Lets try again, recursively!
                    print(" > Attempting download again with new cookies!")
                    return self.download_file_with_cookiejar(url,
                                                             file_count,
                                                             total,
                                                             recursion=True)

                print(
                    " > 'Temporary' Redirect download @ Remote archive:\n > {0}"
                    .format(response.geturl()))

            # seems to be working
            print("({0}/{1}) Downloading {2}".format(file_count, total, url))

            # Open our local file for writing and build status bar
            tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.')
            self.chunk_read(response, tf, report_hook=self.chunk_report)

            # Reset download status
            sys.stdout.write('\n')

            tempfile_name = tf.name
            tf.close()

        #handle errors
        except HTTPError as e:
            print("HTTP Error: {0}, {1}".format(e.code, url))

            if e.code == 401:
                print(
                    " > IMPORTANT: Your user does not have permission to download this type of data!"
                )

            if e.code == 403:
                print(" > Got a 403 Error trying to download this file.  ")
                print(
                    " > You MAY need to log in this app and agree to a EULA. ")

            return False, None

        except URLError as e:
            print("URL Error (from GET): {0}, {1}, {2}".format(
                e, e.reason, url))
            if "ssl.c" in "{0}".format(e.reason):
                print(
                    "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error."
                )
            return False, None

        except socket.timeout as e:
            print(" > timeout requesting: {0}; {1}".format(url, e))
            return False, None

        except ssl.CertificateError as e:
            print(" > ERROR: {0}".format(e))
            print(
                " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag"
            )
            return False, None

        # Return the file size
        shutil.copy(tempfile_name, download_file)
        os.remove(tempfile_name)
        file_size = self.get_total_size(response)
        actual_size = os.path.getsize(download_file)
        if file_size is None:
            # We were unable to calculate file size.
            file_size = actual_size
        return actual_size, file_size

    def get_redirect_url_from_error(self, error):
        find_redirect = re.compile(r"id=\"redir_link\"\s+href=\"(\S+)\"")
        print("error file was: {}".format(error))
        redirect_url = find_redirect.search(error)
        if redirect_url:
            print("Found: {0}".format(redirect_url.group(0)))
            return (redirect_url.group(0))

        return None

    #  chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    def chunk_report(self, bytes_so_far, file_size):
        if file_size is not None:
            percent = float(bytes_so_far) / file_size
            percent = round(percent * 100, 2)
            sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" %
                             (bytes_so_far, file_size, percent))
        else:
            # We couldn't figure out the size.
            sys.stdout.write(" > Downloaded %d of unknown Size\r" %
                             (bytes_so_far))

    #  chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    def chunk_read(self,
                   response,
                   local_file,
                   chunk_size=8192,
                   report_hook=None):
        file_size = self.get_total_size(response)
        bytes_so_far = 0

        while 1:
            try:
                chunk = response.read(chunk_size)
            except:
                sys.stdout.write("\n > There was an error reading data. \n")
                break

            try:
                local_file.write(chunk)
            except TypeError:
                local_file.write(chunk.decode(local_file.encoding))
            bytes_so_far += len(chunk)

            if not chunk:
                break

            if report_hook:
                report_hook(bytes_so_far, file_size)

        return bytes_so_far

    def get_total_size(self, response):
        try:
            file_size = response.info().getheader('Content-Length').strip()
        except AttributeError:
            try:
                file_size = response.getheader('Content-Length').strip()
            except AttributeError:
                print("> Problem getting size")
                return None

        return int(file_size)

    # Get download urls from a metalink file
    def process_metalink(self, ml_file):
        print("Processing metalink file: {0}".format(ml_file))
        with open(ml_file, 'r') as ml:
            xml = ml.read()

        # Hack to remove annoying namespace
        it = ET.iterparse(StringIO(xml))
        for _, el in it:
            if '}' in el.tag:
                el.tag = el.tag.split('}', 1)[1]  # strip all namespaces
        root = it.root

        dl_urls = []
        ml_files = root.find('files')
        for dl in ml_files:
            dl_urls.append(dl.find('resources').find('url').text)

        if len(dl_urls) > 0:
            return dl_urls
        else:
            return None

    # Get download urls from a csv file
    def process_csv(self, csv_file):
        print("Processing csv file: {0}".format(csv_file))

        dl_urls = []
        with open(csv_file, 'r') as csvf:
            try:
                csvr = csv.DictReader(csvf)
                for row in csvr:
                    dl_urls.append(row['URL'])
            except csv.Error as e:
                print(
                    "WARNING: Could not parse file %s, line %d: %s. Skipping."
                    % (csv_file, csvr.line_num, e))
                return None
            except KeyError as e:
                print(
                    "WARNING: Could not find URL column in file %s. Skipping."
                    % (csv_file))

        if len(dl_urls) > 0:
            return dl_urls
        else:
            return None

    # Download all the files in the list
    def download_files(self):
        for file_name in self.files:

            # make sure we haven't ctrl+c'd or some other abort trap
            if abort == True:
                raise SystemExit

            # download counter
            self.cnt += 1

            # set a timer
            start = time.time()

            # run download
            size, total_size = self.download_file_with_cookiejar(
                file_name, self.cnt, len(self.files))

            # calculte rate
            end = time.time()

            # stats:
            if size is None:
                self.skipped.append(file_name)
            # Check to see that the download didn't error and is the correct size
            elif size is not False and (total_size <
                                        (size + (size * .01)) and total_size >
                                        (size - (size * .01))):
                # Download was good!
                elapsed = end - start
                elapsed = 1.0 if elapsed < 1 else elapsed
                rate = (size / 1024**2) / elapsed

                print(
                    "Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec"
                    .format(size, elapsed, rate))

                # add up metrics
                self.total_bytes += size
                self.total_time += elapsed
                self.success.append({'file': file_name, 'size': size})

            else:
                print("There was a problem downloading {0}".format(file_name))
                self.failed.append(file_name)

    def print_summary(self):
        # Print summary:
        print("\n\nDownload Summary ")
        print(
            "--------------------------------------------------------------------------------"
        )
        print("  Successes: {0} files, {1} bytes ".format(
            len(self.success), self.total_bytes))
        for success_file in self.success:
            print("           - {0}  {1:.2f}MB".format(
                success_file['file'], (success_file['size'] / 1024.0**2)))
        if len(self.failed) > 0:
            print("  Failures: {0} files".format(len(self.failed)))
            for failed_file in self.failed:
                print("          - {0}".format(failed_file))
        if len(self.skipped) > 0:
            print("  Skipped: {0} files".format(len(self.skipped)))
            for skipped_file in self.skipped:
                print("          - {0}".format(skipped_file))
        if len(self.success) > 0:
            print("  Average Rate: {0:.2f}MB/sec".format(
                (self.total_bytes / 1024.0**2) / self.total_time))
        print(
            "--------------------------------------------------------------------------------"
        )

Example #18

Show file

File: bilibili.py Project: peiit/kodi_plugins

class Bilibili():
    name = u'哔哩哔哩 (Bilibili)'

    api_url = 'http://interface.bilibili.com/playurl?'
    bangumi_api_url = 'http://bangumi.bilibili.com/player/web_api/playurl?'
    SEC1 = '94aba54af9065f71de72f5508f1cd42e'
    SEC2 = '9b288147e5474dd2aa67085f716c560d'
    supported_stream_profile = [u'流畅', u'高清', u'超清']
    stream_types = [{
        'id': 'hdflv'
    }, {
        'id': 'flv'
    }, {
        'id': 'hdmp4'
    }, {
        'id': 'mp4'
    }, {
        'id': 'live'
    }, {
        'id': 'vc'
    }]
    fmt2qlt = dict(hdflv=4, flv=3, hdmp4=2, mp4=1)

    def __init__(self,
                 appkey=APPKEY,
                 appsecret=APPSECRET,
                 width=720,
                 height=480):
        self.defaultHeader = {'Referer': 'http://www.bilibili.com'}
        #self.defaultHeader = {}
        self.appkey = appkey
        self.appsecret = appsecret
        self.WIDTH = width
        self.HEIGHT = height
        self.is_login = False
        cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie'
        self.cj = MozillaCookieJar(cookie_path)
        if os.path.isfile(cookie_path):
            self.cj.load()
            key = None
            for ck in self.cj:
                if ck.name == 'DedeUserID':
                    key = ck.value
                    break
            if key is not None:
                self.is_login = True
                self.mid = str(key)
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
        urllib2.install_opener(opener)

        try:
            os.remove(self._get_tmp_dir() + '/tmp.ass')
        except:
            pass

    def _get_tmp_dir(self):
        try:
            return tempfile.gettempdir()
        except:
            return ''

    def get_captcha(self, path=None):
        key = None
        for ck in self.cj:
            if ck.name == 'sid':
                key = ck.value
                break

        if key is None:
            get_html(
                LOGIN_CAPTCHA_URL.format(random()),
                headers={'Referer': 'https://passport.bilibili.com/login'})
        result = get_html(
            LOGIN_CAPTCHA_URL.format(random()),
            decoded=False,
            headers={'Referer': 'https://passport.bilibili.com/login'})
        if path is None:
            path = tempfile.gettempdir() + '/captcha.jpg'
        with open(path, 'wb') as f:
            f.write(result)
        return path

    def get_encryped_pwd(self, pwd):
        import rsa
        result = loads(
            get_html(
                LOGIN_HASH_URL.format(random()),
                headers={'Referer': 'https://passport.bilibili.com/login'}))
        pwd = result['hash'] + pwd
        key = result['key']
        pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key)
        pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key)
        pwd = base64.b64encode(pwd)
        pwd = urllib.quote(pwd)
        return pwd

    def api_sign(self, params):
        params['appkey'] = self.appkey
        data = ''
        keys = params.keys()
        # must sorted.  urllib.urlencode(params) doesn't work
        keys.sort()
        for key in keys:
            data += '{}={}&'.format(key, urllib.quote(str(params[key])))

        data = data[:-1]  # remove last '&'
        if self.appsecret is None:
            return data
        m = hashlib.md5()
        m.update(data + self.appsecret)
        return data + '&sign=' + m.hexdigest()

    def get_category_from_web_page(self):
        category_dict = {'0': {'title': u'全部', 'url': HOME_URL}}
        node = category_dict['0']
        url = node['url']
        result = BeautifulSoup(get_html(url),
                               "html.parser").findAll('li', {'class': 'm-i'})
        for item in result:
            if len(item['class']) != 1:
                continue
            tid = item['data-tid']
            title = item.em.contents[0]
            url = 'http:' + item.a['href']
            category_dict[tid] = {'title': title, 'url': url}
            node['subs'].append(tid)

        #Fix video and movie
        if '11' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('11')
        if '23' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('23')
        category_dict['11'] = {
            'title': u'电视剧',
            'url': 'http://bangumi.bilibili.com/tv/'
        }
        category_dict['23'] = {
            'title': u'电影',
            'url': 'http://bangumi.bilibili.com/movie/'
        }

        for sub in category_dict['0']['subs']:
            node = category_dict[sub]
            url = node['url']
            result = BeautifulSoup(get_html(url),
                                   "html.parser").select('ul.n_num li')
            for item in result[1:]:
                if not item.has_attr('tid'):
                    continue
                if not hasattr(item, 'a'):
                    continue
                if item.has_attr('class'):
                    continue
                tid = item['tid']
                title = item.a.contents[0]
                if item.a['href'][:2] == '//':
                    url = 'http:' + item.a['href']
                else:
                    url = HOME_URL + item.a['href']
                category_dict[tid] = {'title': title, 'url': url}
                node['subs'].append(tid)
        return category_dict

    def get_category(self, tid='0'):
        items = [{tid: {'title': '全部', 'url': CATEGORY[tid]['url']}}]
        for sub in CATEGORY[tid]['subs']:
            items.append({sub: CATEGORY[sub]})
        return items

    def get_category_name(self, tid):
        return CATEGORY[str(tid)]['title']

    def get_order(self):
        return ORDER

    def get_category_by_tag(self, tag=0, tid=0, page=1, pagesize=20):
        if tag == 0:
            url = LIST_BY_ALL.format(tid, pagesize, page)
        else:
            url = LIST_BY_TAG.format(tag, tid, pagesize, page)

        results = loads(get_html(url))
        return results

    def get_category_list(self,
                          tid=0,
                          order='default',
                          days=30,
                          page=1,
                          pagesize=20):
        params = {
            'tid': tid,
            'order': order,
            'days': days,
            'page': page,
            'pagesize': pagesize
        }
        url = LIST_URL.format(self.api_sign(params))

        result = loads(get_html(url, headers=self.defaultHeader))
        results = []
        for i in range(pagesize):
            if result['list'].has_key(str(i)):
                results.append(result['list'][str(i)])
            else:
                continue
        return results, result['pages']

    def get_my_info(self):
        if self.is_login == False:
            return []
        result = loads(get_html(MY_INFO_URL))
        return result['data']

    def get_bangumi_chase(self, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data']['result'], result['data']['pages']

    def get_bangumi_detail(self, season_id):
        url = BANGUMI_SEASON_URL.format(season_id)
        result = get_html(url, headers=self.defaultHeader)
        if result[0] != '{':
            start = result.find('(') + 1
            end = result.find(');')
            result = result[start:end]
        result = loads(result)
        return result['result']

    def get_history(self, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = HISTORY_URL.format(page, pagesize)
        result = loads(get_html(url, headers=self.defaultHeader))
        if len(result['data']) >= int(pagesize):
            total_page = int(page) + 1
        else:
            total_page = int(page)
        return result['data'], total_page

    def get_dynamic(self, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = DYNAMIC_URL.format(pagesize, page)
        result = loads(get_html(url, headers=self.defaultHeader))
        total_page = int(
            (result['data']['page']['count'] + pagesize - 1) / pagesize)
        return result['data']['feeds'], total_page

    def get_attention(self, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = ATTENTION_URL.format(self.mid, page, pagesize)
        result = loads(get_html(url))
        return result['data']['list']

    def get_attention_video(self, mid, tid=0, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data'], result['data']['pages']

    def get_attention_channel(self, mid):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_URL.format(mid)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data']['list']

    def get_fav_box(self):
        if self.is_login == False:
            return []
        url = FAV_BOX_URL.format(self.mid)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data']['list']

    def get_fav(self, fav_box, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = FAV_URL.format(self.mid, page, pagesize, fav_box)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data']['vlist'], result['data']['pages']

    def login(self, userid, pwd, captcha):
        #utils.get_html('http://www.bilibili.com')
        if self.is_login == True:
            return True, ''
        pwd = self.get_encryped_pwd(pwd)
        data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format(
            captcha, userid, pwd)
        result = get_html(
            LOGIN_URL, data, {
                'Origin': 'https://passport.bilibili.com',
                'Referer': 'https://passport.bilibili.com/login'
            })

        key = None
        for ck in self.cj:
            if ck.name == 'DedeUserID':
                key = ck.value
                break

        if key is None:
            return False, LOGIN_ERROR_MAP[loads(result)['code']]
        self.cj.save()
        self.is_login = True
        self.mid = str(key)
        return True, ''

    def logout(self):
        self.cj.clear()
        self.cj.save()
        self.is_login = False

    def get_av_list_detail(self, aid, page=1, fav=0, pagesize=20):
        params = {'id': aid, 'page': page}
        if fav != 0:
            params['fav'] = fav
        url = VIEW_URL.format(self.api_sign(params))
        result = loads(get_html(url, headers=self.defaultHeader))
        results = [result]
        if (int(page) < result['pages']) and (pagesize > 1):
            results += self.get_av_list_detail(aid,
                                               int(page) + 1,
                                               fav,
                                               pagesize=pagesize - 1)[0]

        return results, result['pages']

    def get_av_list(self, aid):
        url = AV_URL.format(aid)
        try:
            page = get_html(url)
            result = loads(page)
        except:
            result = {}
        return result

    # 调用niconvert生成弹幕的ass文件
    def parse_subtitle(self, cid):
        page_full_url = COMMENT_URL.format(cid)
        website = create_website(page_full_url)
        if website is None:
            return ''
        else:
            text = website.ass_subtitles_text(font_name=u'黑体',
                                              font_size=24,
                                              resolution='%d:%d' %
                                              (self.WIDTH, self.HEIGHT),
                                              line_count=12,
                                              bottom_margin=0,
                                              tune_seconds=0)
            f = open(self._get_tmp_dir() + '/tmp.ass', 'w')
            f.write(text.encode('utf8'))
            f.close()
            return 'tmp.ass'

    def get_video_urls(self, cid):
        m = hashlib.md5()
        m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER))
        url = INTERFACE_URL.format(str(cid), m.hexdigest())
        doc = parseString(get_html(url))
        urls = []
        for durl in doc.getElementsByTagName('durl'):
            u = durl.getElementsByTagName('url')[0].firstChild.nodeValue
            if re.match(r'.*\.qqvideo\.tc\.qq\.com', url):
                re.sub(r'.*\.qqvideo\.tc', 'http://vsrc.store', u)
            urls.append(u)
            #urls.append(u + '|Referer={}'.format(urllib.quote('https://www.bilibili.com/')))

        return urls

    def add_history(self, aid, cid):
        url = ADD_HISTORY_URL.format(str(cid), str(aid))
        get_html(url)

    def api_req(self, cid, quality, bangumi, bangumi_movie=False, **kwargs):
        ts = str(int(time.time()))
        if not bangumi:
            params_str = 'cid={}&player=1&quality={}&ts={}'.format(
                cid, quality, ts)
            chksum = hashlib.md5(bytes(params_str + self.SEC1)).hexdigest()
            api_url = self.api_url + params_str + '&sign=' + chksum
        else:
            mod = 'movie' if bangumi_movie else 'bangumi'
            params_str = 'cid={}&module={}&player=1&quality={}&ts={}'.format(
                cid, mod, quality, ts)
            chksum = hashlib.md5(bytes(params_str + self.SEC2)).hexdigest()
            api_url = self.bangumi_api_url + params_str + '&sign=' + chksum

        return get_html(api_url)

    def download_by_vid(self, cid, bangumi, **kwargs):
        stream_id = kwargs.get('stream_id')
        if stream_id and stream_id in self.fmt2qlt:
            quality = stream_id
        else:
            quality = 'hdflv' if bangumi else 'flv'

        level = kwargs.get('level', 0)
        xml = self.api_req(cid, level, bangumi, **kwargs)
        doc = parseString(xml)
        urls = []
        for durl in doc.getElementsByTagName('durl'):
            u = durl.getElementsByTagName('url')[0].firstChild.nodeValue
            #urls.append(u)
            urls.append(
                urllib.quote_plus(u + '|Referer=https://www.bilibili.com'))

        return urls

    def entry(self, **kwargs):
        # tencent player
        tc_flashvars = re.search(r'"bili-cid=\d+&bili-aid=\d+&vid=([^"]+)"',
                                 self.page)
        if tc_flashvars:
            tc_flashvars = tc_flashvars.group(1)
        if tc_flashvars is not None:
            self.out = True
            return qq_download_by_vid(tc_flashvars,
                                      self.title,
                                      output_dir=kwargs['output_dir'],
                                      merge=kwargs['merge'],
                                      info_only=kwargs['info_only'])

        cid = re.search(r'cid=(\d+)', self.page).group(1)
        if cid is not None:
            return self.download_by_vid(cid, False, **kwargs)
        else:
            # flashvars?
            flashvars = re.search(r'flashvars="([^"]+)"', self.page).group(1)
            if flashvars is None:
                raise Exception('Unsupported page {}'.format(self.url))
            param = flashvars.split('&')[0]
            t, cid = param.split('=')
            t = t.strip()
            cid = cid.strip()
            if t == 'vid':
                sina_download_by_vid(cid,
                                     self.title,
                                     output_dir=kwargs['output_dir'],
                                     merge=kwargs['merge'],
                                     info_only=kwargs['info_only'])
            elif t == 'ykid':
                youku_download_by_vid(cid,
                                      self.title,
                                      output_dir=kwargs['output_dir'],
                                      merge=kwargs['merge'],
                                      info_only=kwargs['info_only'])
            elif t == 'uid':
                tudou_download_by_id(cid,
                                     self.title,
                                     output_dir=kwargs['output_dir'],
                                     merge=kwargs['merge'],
                                     info_only=kwargs['info_only'])
            else:
                raise NotImplementedError(
                    'Unknown flashvars {}'.format(flashvars))
            return

    def movie_entry(self, **kwargs):
        patt = r"var\s*aid\s*=\s*'(\d+)'"
        aid = re.search(patt, self.page).group(1)
        page_list = loads(
            get_html(
                'http://www.bilibili.com/widget/getPageList?aid={}'.format(
                    aid)))
        # better ideas for bangumi_movie titles?
        self.title = page_list[0]['pagename']
        return self.download_by_vid(page_list[0]['cid'],
                                    True,
                                    bangumi_movie=True,
                                    **kwargs)

    def get_video_from_url(self, url, **kwargs):
        self.url = url_locations(url)
        frag = urlparse(self.url).fragment
        # http://www.bilibili.com/video/av3141144/index_2.html#page=3
        if frag:
            hit = re.search(r'page=(\d+)', frag)
            if hit is not None:
                page = hit.group(1)
                av_id = re.search(r'av(\d+)', self.url).group(1)
                self.url = 'http://www.bilibili.com/video/av{}/index_{}.html'.format(
                    av_id, page)
        self.page = get_html(self.url)

        if 'bangumi.bilibili.com/movie' in self.url:
            return self.movie_entry(**kwargs)
        elif 'bangumi.bilibili.com' in self.url:
            return self.bangumi_entry(**kwargs)
        elif 'live.bilibili.com' in self.url:
            return self.live_entry(**kwargs)
        elif 'vc.bilibili.com' in self.url:
            return self.vc_entry(**kwargs)
        else:
            return self.entry(**kwargs)

    def bangumi_entry(self, **kwargs):
        pass

    def live_entry(self, **kwargs):
        pass

    def vc_entry(self, **kwargs):
        pass

Example #19

Show file

File: danabox.py Project: danabox/deis

class Session(requests.Session):
    """
    Session for making API requests and interacting with the filesystem
    """

    def __init__(self):
        super(Session, self).__init__()
        self.trust_env = False
        cookie_file = os.path.expanduser('~/.danabox/cookies.txt')
        cookie_dir = os.path.dirname(cookie_file)
        self.cookies = MozillaCookieJar(cookie_file)
        # Create the $HOME/.danabox dir if it doesn't exist
        if not os.path.isdir(cookie_dir):
            os.mkdir(cookie_dir, 0700)
        # Load existing cookies if the cookies.txt exists
        if os.path.isfile(cookie_file):
            self.cookies.load()
            self.cookies.clear_expired_cookies()

    def clear(self):
        """Clear cookies"""
        try:
            self.cookies.clear()
            self.cookies.save()
        except KeyError:
            pass

    def git_root(self):
        """
        Return the absolute path from the git repository root

        If no git repository exists, raise an EnvironmentError
        """
        try:
            git_root = subprocess.check_output(
                ['git', 'rev-parse', '--show-toplevel'],
                stderr=subprocess.PIPE).strip('\n')
        except subprocess.CalledProcessError:
            raise EnvironmentError('Current directory is not a git repository')
        return git_root

    def get_app(self):
        """
        Return the application name for the current directory

        The application is determined by parsing `git remote -v` output for the origin remote.

        Because Danabox only allows deployment of public Github repos we can create unique app
        names from a combination of the Github user's name and the repo name. Eg;
        '[email protected]:opdemand/example-ruby-sinatra.git' becomes 'opdemand-example--ruby--sinatra'

        If no application is found, raise an EnvironmentError.
        """
        git_root = self.git_root()
        remotes = subprocess.check_output(['git', 'remote', '-v'], cwd=git_root)
        if remotes is None:
            raise EnvironmentError('No git remotes found.')
        for remote in remotes.splitlines():
            if 'github.com' in remote:
                url = remote.split()[1]
                break
        if url is None:
            raise EnvironmentError('No Github remotes found.')
        pieces = url.split('/')
        owner = pieces[-2].split(':')[-1]
        repo = pieces[-1].replace('.git', '')
        app_raw = owner + '/' + repo
        app_name = app_raw.replace('-', '--').replace('/', '-')
        return app_name

    app = property(get_app)

    def request(self, *args, **kwargs):
        """
        Issue an HTTP request with proper cookie handling including
        `Django CSRF tokens <https://docs.djangoproject.com/en/dev/ref/contrib/csrf/>`
        """
        for cookie in self.cookies:
            if cookie.name == 'csrftoken':
                if 'headers' in kwargs:
                    kwargs['headers']['X-CSRFToken'] = cookie.value
                else:
                    kwargs['headers'] = {'X-CSRFToken': cookie.value}
                break
        response = super(Session, self).request(*args, **kwargs)
        self.cookies.save()
        return response

Example #20

Show file

File: cookiejar.py Project: UltrosBot/Ultros

 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
     if self.mode not in ("discard", "session"):
         return MozillaCookieJar.save(
             self, filename, ignore_discard, ignore_expires
         )

Example #21

Show file

from urllib2 import HTTPCookieProcessor,build_opener
from cookielib import CookieJar,MozillaCookieJar

from redis_test import Redis


# 1. build a cookie with file name
# 2. create a cookie handler
# 3. build a opener
fileName = 'cookie.txt'
cookie = MozillaCookieJar(fileName)
handler = HTTPCookieProcessor(cookie)
opener = build_opener(handler)


response = opener.open("http://www.baidu.com")
for item in cookie:
    print 'Name = ' + item.name
    print 'Value = ' + item.value

cookie.save(ignore_discard=True,ignore_expires=True)

Example #22

Show file

File: download_image.py Project: saiplanner/sentinel-1-pipeline

class bulk_downloader:
    def __init__(self, id, username, password, table_name):
        # List of files to download
        if id[:3] == 'S1A':
            self.files = [
                'https://datapool.asf.alaska.edu/GRD_HD/SA/{}.zip'.format(id)
            ]
        elif id[:3] == 'S1B':
            self.files = [
                'https://datapool.asf.alaska.edu/GRD_HD/SB/{}.zip'.format(id)
            ]
        else:
            print('no identified sensor: {}'.format(id))
            logging.error('sensor not identified: {}'.format(id))
            return

        self.username = username
        self.password = password
        self.table_name = table_name
        self.save_to = os.getenv('IMAGES_PATH')

        # Local stash of cookies so we don't always have to ask
        self.cookie_jar_path = os.path.join(
            os.path.dirname(os.path.abspath('__file__')),
            '.bulk_download_cookiejar.txt')
        self.cookie_jar = None

        self.asf_urs4 = {
            'url': 'https://urs.earthdata.nasa.gov/oauth/authorize',
            'client': 'BO_n7nTIlMljdvU6kRRB3g',
            'redir': 'https://vertex-retired.daac.asf.alaska.edu/services/urs4_token_request' \
        }

        # Make sure we can write it our current directory
        if os.access(os.getcwd(), os.W_OK) is False:
            print(
                'WARNING: Cannot write to current path! Check permissions for {0}'
                .format(os.getcwd()))
            exit(-1)

        # For SSL
        self.context = {}

        # Make sure cookie_jar is good to go!
        self.get_cookie()

        # summary
        self.total_bytes = 0
        self.total_time = 0
        self.cnt = 0
        self.success = []
        self.failed = []
        self.skipped = []

    # Get and validate a cookie
    def get_cookie(self):

        # remove the cookie_jar_path file if its older than a day
        date_created = cookie_creation_date()
        if date_created:
            dx = datetime.now() - date_created
            hour = dx.total_seconds() / (3600)
            if hour > 10:
                print('cookie greater than 10 hours so removing it')
                os.remove(self.cookie_jar_path)

        if os.path.isfile(self.cookie_jar_path):
            self.cookie_jar = MozillaCookieJar()
            self.cookie_jar.load(self.cookie_jar_path)

            # make sure cookie is still valid
            if self.check_cookie():
                print(' > Re-using previous cookie jar.')
                return True
            else:
                print(' > Could not validate old cookie Jar')

        # We don't have a valid cookie, prompt user or creds
        print('No existing URS cookie found, creating one')
        print('(Credentials will not be stored, saved or logged anywhere)')

        # Keep trying 'till user gets the right U:P
        while self.check_cookie() is False:
            self.get_new_cookie()

        return True

    # Validate cookie before we begin
    def check_cookie(self):

        if self.cookie_jar is None:
            print(' > Cookiejar is bunk: {0}'.format(self.cookie_jar))
            return False

        # File we know is valid, used to validate cookie
        file_check = 'https://urs.earthdata.nasa.gov/profile'

        # Apply custom Redirect Hanlder
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                              HTTPHandler(), HTTPSHandler(**self.context))
        install_opener(opener)

        # Attempt a HEAD request
        request = Request(file_check)
        request.get_method = lambda: 'HEAD'
        try:
            print(' > attempting to download {0}'.format(file_check))
            response = urlopen(request, timeout=30)
            resp_code = response.getcode()
            # Make sure we're logged in
            if not self.check_cookie_is_logged_in(self.cookie_jar):
                return False

            # Save cookiejar
            self.cookie_jar.save(self.cookie_jar_path)

        except HTTPError:
            # If we ge this error, again, it likely means the user has not agreed to current EULA
            print('\nIMPORTANT: ')
            print(
                'Your user appears to lack permissions to download data from the ASF Datapool.'
            )
            print(
                '\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov'
            )
            exit(-1)

        # This return codes indicate the USER has not been approved to download the data
        if resp_code in (300, 301, 302, 303):
            try:
                redir_url = response.info().getheader('Location')
            except AttributeError:
                redir_url = response.getheader('Location')

            # Funky Test env:
            if ('vertex-retired.daac.asf.alaska.edu' in redir_url
                    and 'test' in self.asf_urs4['redir']):
                print("Cough, cough. It's dusty in this test env!")
                return True

            print('Redirect ({0}) occurred, invalid cookie value!'.format(
                resp_code))
            return False

        # These are successes!
        if resp_code in (200, 307):
            return True

        return False

    def get_new_cookie(self):
        # Start by prompting user to input their credentials

        new_username = self.username
        new_password = self.password

        # Build URS4 Cookie request
        auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4['client'] + '&redirect_uri=' + \
                          self.asf_urs4['redir'] + '&response_type=code&state='

        try:
            # python2
            user_pass = base64.b64encode(
                bytes(new_username + ':' + new_password))
        except TypeError:
            # python3
            user_pass = base64.b64encode(
                bytes(new_username + ':' + new_password, 'utf-8'))
            user_pass = user_pass.decode('utf-8')

        # Authenticate against URS, grab all the cookies
        self.cookie_jar = MozillaCookieJar()
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                              HTTPHandler(), HTTPSHandler(**self.context))
        request = Request(
            auth_cookie_url,
            headers={"Authorization": "Basic {0}".format(user_pass)})

        # Watch out cookie rejection!
        try:
            response = opener.open(request)
        except HTTPError as e:
            if e.code == 401:
                print(
                    " > Username and Password combo was not successful. Please try again."
                )
                return False
            else:
                # If an error happens here, the user most likely has not confirmed EULA.
                print(
                    "\nIMPORTANT: There was an error obtaining a download cookie!"
                )
                print(
                    "Your user appears to lack permission to download data from the ASF Datapool."
                )
                print(
                    "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
                )
                exit(-1)
        except URLError as e:
            print(
                "\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. "
            )
            print("Try cookie generation later.")
            exit(-1)

        # Did we get a cookie?
        if self.check_cookie_is_logged_in(self.cookie_jar):
            # COOKIE SUCCESS!
            print('cookie saved')
            self.cookie_jar.save(self.cookie_jar_path)
            save_cookie_creation_date()
            return True

        # if we aren't successful generating the cookie, nothing will work. Stop here!
        print(
            "WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again."
        )
        print("Response was {0}.".format(response.getcode()))
        print(
            "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
        )
        exit(-1)

    # make sure we're logged into URS
    def check_cookie_is_logged_in(self, cj):
        for cookie in cj:
            if cookie.name == 'urs_user_already_logged':
                # Only get this cookie if we logged in successfully!
                return True
        return False

    # Download the file
    def download_file_with_cookiejar(self,
                                     url,
                                     file_count,
                                     total,
                                     recursion=False):
        # see if we've already download this file and if it is that it is the correct size
        download_file = os.path.basename(url).split('?')[0]
        if os.path.isfile(os.path.join(self.save_to, download_file)):
            try:
                request = Request(url)
                request.get_method = lambda: 'HEAD'
                response = urlopen(request, timeout=30)
                remote_size = self.get_total_size(response)
                # Check that we were able to derive a size.
                if remote_size:
                    local_size = os.path.getsize(
                        os.path.join(self.save_to, download_file))
                    if remote_size < (local_size +
                                      (local_size * .01)) and remote_size > (
                                          local_size - (local_size * .01)):
                        print(
                            " > Download file {0} exists! \n > Skipping download of {1}. "
                            .format(download_file, url))
                        return None, None
                    # partial file size wasn't full file size, lets blow away the chunk and start again
                    print(
                        " > Found {0} but it wasn't fully downloaded. Removing file and downloading again."
                        .format(download_file))
                    os.remove(os.path.join(self.save_to, download_file))

            except ssl.CertificateError as e:
                print(" > ERROR: {0}".format(e))
                print(
                    " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag"
                )
                return False, None

            except HTTPError as e:
                if e.code == 401:
                    print(
                        " > IMPORTANT: Your user may not have permission to download this type of data!"
                    )
                else:
                    print(" > Unknown Error, Could not get file HEAD: {0}".
                          format(e))

            except URLError as e:
                print("URL Error (from HEAD): {0}, {1}".format(e.reason, url))
                if "ssl.c" in "{0}".format(e.reason):
                    print(
                        "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error."
                    )
                return False, None

        # attempt https connection
        try:
            request = Request(url)
            response = urlopen(request, timeout=30)

            # Watch for redirect
            if response.geturl() != url:

                # See if we were redirect BACK to URS for re-auth.
                if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl(
                ):

                    if recursion:
                        print(
                            " > Entering seemingly endless auth loop. Aborting. "
                        )
                        return False, None

                    # make this easier. If there is no app_type=401, add it
                    new_auth_url = response.geturl()
                    if "app_type" not in new_auth_url:
                        new_auth_url += "&app_type=401"

                    print(
                        " > While attempting to download {0}....".format(url))
                    print(" > Need to obtain new cookie from {0}".format(
                        new_auth_url))
                    old_cookies = [cookie.name for cookie in self.cookie_jar]
                    opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                                          HTTPHandler(),
                                          HTTPSHandler(**self.context))
                    request = Request(new_auth_url)
                    try:
                        response = opener.open(request)
                        for cookie in self.cookie_jar:
                            if cookie.name not in old_cookies:
                                print(" > Saved new cookie: {0}".format(
                                    cookie.name))

                                # A little hack to save session cookies
                                if cookie.discard:
                                    cookie.expires = int(
                                        time.time()) + 60 * 60 * 24 * 30
                                    print(
                                        " > Saving session Cookie that should have been discarded! "
                                    )

                        self.cookie_jar.save(self.cookie_jar_path,
                                             ignore_discard=True,
                                             ignore_expires=True)
                    except HTTPError as e:
                        print("HTTP Error: {0}, {1}".format(e.code, url))
                        return False, None

                    # Okay, now we have more cookies! Lets try again, recursively!
                    print(" > Attempting download again with new cookies!")
                    return self.download_file_with_cookiejar(url,
                                                             file_count,
                                                             total,
                                                             recursion=True)

                print(
                    " > 'Temporary' Redirect download @ Remote archive:\n > {0}"
                    .format(response.geturl()))

            # seems to be working
            print("({0}/{1}) Downloading {2}".format(file_count, total, url))

            # Open our local file for writing and build status bar
            tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.')
            self.chunk_read(response, tf, report_hook=self.chunk_report)

            # Reset download status
            sys.stdout.write('\n')

            tempfile_name = tf.name
            tf.close()

        # handle errors
        except HTTPError as e:
            print("HTTP Error: {0}, {1}".format(e.code, url))

            if e.code == 401:
                print(
                    " > IMPORTANT: Your user does not have permission to download this type of data!"
                )

            if e.code == 403:
                print(" > Got a 403 Error trying to download this file.  ")
                print(
                    " > You MAY need to log in this app and agree to a EULA. ")

            return False, None

        except URLError as e:
            print("URL Error (from GET): {0}, {1}, {2}".format(
                e, e.reason, url))
            if "ssl.c" in "{0}".format(e.reason):
                print(
                    "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error."
                )
            return False, None

        except ssl.CertificateError as e:
            print(" > ERROR: {0}".format(e))
            print(
                " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag"
            )
            return False, None

        # Return the file size
        shutil.copy(tempfile_name, os.path.join(self.save_to, download_file))
        os.remove(tempfile_name)
        file_size = self.get_total_size(response)
        actual_size = os.path.getsize(os.path.join(self.save_to,
                                                   download_file))
        if file_size is None:
            # We were unable to calculate file size.
            file_size = actual_size
        return actual_size, file_size

    #  chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    def chunk_report(self, bytes_so_far, file_size):
        if file_size is not None:
            percent = float(bytes_so_far) / file_size
            percent = round(percent * 100, 2)
            sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" %
                             (bytes_so_far, file_size, percent))
        else:
            # We couldn't figure out the size.
            sys.stdout.write(" > Downloaded %d of unknown Size\r" %
                             (bytes_so_far))

    #  chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    def chunk_read(self,
                   response,
                   local_file,
                   chunk_size=8192,
                   report_hook=None):
        file_size = self.get_total_size(response)
        bytes_so_far = 0

        while 1:
            try:
                chunk = response.read(chunk_size)
            except:
                sys.stdout.write("\n > There was an error reading data. \n")
                break

            try:
                local_file.write(chunk)
            except TypeError:
                local_file.write(chunk.decode(local_file.encoding))
            bytes_so_far += len(chunk)

            if not chunk:
                break

            if report_hook:
                report_hook(bytes_so_far, file_size)

        return bytes_so_far

    def get_total_size(self, response):
        try:
            file_size = response.info().getheader('Content-Length').strip()
        except AttributeError:
            try:
                file_size = response.getheader('Content-Length').strip()
            except AttributeError:
                print("> Problem getting size")
                return None

        return int(file_size)

    # Download all the files in the list
    def download_files(self):
        for file_name in self.files:

            # make sure we haven't ctrl+c'd or some other abort trap
            if abort == True:
                raise SystemExit

            # download counter
            self.cnt += 1

            # set a timer
            start = time.time()

            # run download
            size, total_size = self.download_file_with_cookiejar(
                file_name, self.cnt, len(self.files))

            # calculte rate
            end = time.time()

            # stats:
            if size is None:
                self.skipped.append(file_name)
            # Check to see that the download didn't error and is the correct size
            elif size is not False and (total_size <
                                        (size + (size * .01)) and total_size >
                                        (size - (size * .01))):
                # Download was good!
                elapsed = end - start
                elapsed = 1.0 if elapsed < 1 else elapsed
                rate = (size / 1024**2) / elapsed

                print(
                    "Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec"
                    .format(size, elapsed, rate))

                # add up metrics
                self.total_bytes += size
                self.total_time += elapsed
                self.success.append({'file': file_name, 'size': size})

            else:
                print("There was a problem downloading {0}".format(file_name))
                self.failed.append(file_name)

    def print_summary(self, rid):
        # Print summary:
        print("\n\nDownload Summary ")
        print(
            "--------------------------------------------------------------------------------"
        )
        print("  Successes: {0} files, {1} bytes ".format(
            len(self.success), self.total_bytes))
        for success_file in self.success:
            print("           - {0}  {1:.2f}MB".format(
                success_file['file'], (success_file['size'] / 1024.0**2)))
        if len(self.failed) > 0:
            print("  Failures: {0} files".format(len(self.failed)))
            for failed_file in self.failed:
                print("          - {0}".format(failed_file))
        if len(self.skipped) > 0:
            print("  Skipped: {0} files".format(len(self.skipped)))
            for skipped_file in self.skipped:
                print("          - {0}".format(skipped_file))
        if len(self.success) > 0:
            print("  Average Rate: {0:.2f}MB/sec".format(
                (self.total_bytes / 1024.0**2) / self.total_time))
        print(
            "--------------------------------------------------------------------------------\n\n"
        )

        # since we are downloading one file at a time!
        if len(self.success) > 0:
            try:
                conn, cur = connect_to_db()
                cur.execute(
                    "UPDATE {} SET downloaded=TRUE WHERE rid={}".format(
                        self.table_name, rid))
                conn.commit()
                close_connection(conn, cur)
            except Exception as e:
                print('error inserting into db because {}'.format(e))
                logging.error(e)

        # ideally should not end up here but anyway
        if len(self.skipped) > 0:
            try:
                conn, cur = connect_to_db()
                cur.execute(
                    "UPDATE {} SET downloaded=FALSE WHERE rid={}".format(
                        self.table_name, rid))
                conn.commit()
                close_connection(conn, cur)
            except Exception as e:
                print('error inserting into db because {}'.format(e))
                logging.error(e)

Example #23

Show file

class Bilibili():
    def __init__(self, appkey=APPKEY, appsecret=APPSECRET):
        self.appkey = appkey
        self.appsecret = appsecret
        self.is_login = False
        cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie'
        self.cj = MozillaCookieJar(cookie_path)
        if os.path.isfile(cookie_path):
            self.cj.load()
            if requests.utils.dict_from_cookiejar(
                    self.cj).has_key('DedeUserID'):
                self.is_login = True
                self.mid = str(
                    requests.utils.dict_from_cookiejar(self.cj)['DedeUserID'])
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
        urllib2.install_opener(opener)

    def get_captcha(self, path=None):
        if not requests.utils.dict_from_cookiejar(self.cj).has_key('sid'):
            utils.get_page_content(
                LOGIN_CAPTCHA_URL.format(random.random()),
                headers={'Referer': 'https://passport.bilibili.com/login'})
        result = utils.get_page_content(
            LOGIN_CAPTCHA_URL.format(random.random()),
            headers={'Referer': 'https://passport.bilibili.com/login'})
        if path == None:
            path = tempfile.gettempdir() + '/captcha.jpg'
        with open(path, 'wb') as f:
            f.write(result)
        return path

    def get_encryped_pwd(self, pwd):
        import rsa
        result = json.loads(
            utils.get_page_content(
                LOGIN_HASH_URL.format(random.random()),
                headers={'Referer': 'https://passport.bilibili.com/login'}))
        pwd = result['hash'] + pwd
        key = result['key']
        pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key)
        pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key)
        pwd = base64.b64encode(pwd)
        pwd = urllib.quote(pwd)
        return pwd

    def api_sign(self, params):
        params['appkey'] = self.appkey
        data = ""
        keys = params.keys()
        keys.sort()
        for key in keys:
            if data != "":
                data += "&"
            value = params[key]
            if type(value) == int:
                value = str(value)
            data += key + "=" + str(urllib.quote(value))
        if self.appsecret == None:
            return data
        m = hashlib.md5()
        m.update(data + self.appsecret)
        return data + '&sign=' + m.hexdigest()

    def get_category_from_web_page(self):
        category_dict = {'0': {'title': u'全部', 'url': HOME_URL, 'subs': []}}
        node = category_dict['0']
        url = node['url']
        result = BeautifulSoup(utils.get_page_content(url),
                               "html.parser").findAll('li', {'class': 'm-i'})
        for item in result:
            if len(item['class']) != 1:
                continue
            tid = item['data-tid']
            title = item.em.contents[0]
            url = 'http:' + item.a['href']
            category_dict[tid] = {'title': title, 'url': url, 'subs': []}
            node['subs'].append(tid)

        #Fix video and movie
        if '11' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('11')
        if '23' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('23')
        category_dict['11'] = {
            'title': u'电视剧',
            'url': 'http://bangumi.bilibili.com/tv/',
            'subs': []
        }
        category_dict['23'] = {
            'title': u'电影',
            'url': 'http://bangumi.bilibili.com/movie/',
            'subs': []
        }

        for sub in category_dict['0']['subs']:
            node = category_dict[sub]
            url = node['url']
            result = BeautifulSoup(utils.get_page_content(url),
                                   "html.parser").select('ul.n_num li')
            for item in result[1:]:
                if not item.has_attr('tid'):
                    continue
                if not hasattr(item, 'a'):
                    continue
                if item.has_attr('class'):
                    continue
                tid = item['tid']
                title = item.a.contents[0]
                if item.a['href'][:2] == '//':
                    url = 'http:' + item.a['href']
                else:
                    url = HOME_URL + item.a['href']
                category_dict[tid] = {'title': title, 'url': url, 'subs': []}
                node['subs'].append(tid)
        return category_dict

    def get_category(self, tid='0'):
        items = [{
            tid: {
                'title': '全部',
                'url': CATEGORY[tid]['url'],
                'subs': []
            }
        }]
        for sub in CATEGORY[tid]['subs']:
            items.append({sub: CATEGORY[sub]})
        return items

    def get_category_name(self, tid):
        return CATEGORY[str(tid)]['title']

    def get_order(self):
        return ORDER

    def get_category_list(self,
                          tid=0,
                          order='default',
                          days=30,
                          page=1,
                          pagesize=10):
        params = {
            'tid': tid,
            'order': order,
            'days': days,
            'page': page,
            'pagesize': pagesize
        }
        url = LIST_URL.format(self.api_sign(params))
        result = json.loads(utils.get_page_content(url))
        results = []
        for i in range(pagesize):
            if result['list'].has_key(str(i)):
                results.append(result['list'][str(i)])
            else:
                break
        return results, result['pages']

    def get_my_info(self):
        if self.is_login == False:
            return []
        result = json.loads(utils.get_page_content(MY_INFO_URL))
        return result['data']

    def get_bangumi_chase(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['result'], result['data']['pages']

    def get_bangumi_detail(self, season_id):
        url = BANGUMI_SEASON_URL.format(season_id)
        result = utils.get_page_content(url)
        if result[0] != '{':
            start = result.find('(') + 1
            end = result.find(');')
            result = result[start:end]
        result = json.loads(result)
        return result['result']

    def get_history(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = HISTORY_URL.format(page, pagesize)
        result = json.loads(utils.get_page_content(url))
        if len(result['data']) >= int(pagesize):
            total_page = int(page) + 1
        else:
            total_page = int(page)
        return result['data'], total_page

    def get_dynamic(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = DYNAMIC_URL.format(pagesize, page)
        result = json.loads(utils.get_page_content(url))
        total_page = int(
            (result['data']['page']['count'] + pagesize - 1) / pagesize)
        return result['data']['feeds'], total_page

    def get_attention(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = ATTENTION_URL.format(self.mid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list'], result['data']['pages']

    def get_attention_video(self, mid, tid=0, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid)
        result = json.loads(utils.get_page_content(url))
        return result['data'], result['data']['pages']

    def get_attention_channel(self, mid):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_URL.format(mid)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list']

    def get_attention_channel_list(self, mid, cid, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_LIST_URL.format(mid, cid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list'], result['data']['total']

    def get_fav_box(self):
        if self.is_login == False:
            return []
        url = FAV_BOX_URL.format(self.mid)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list']

    def get_fav(self, fav_box, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = FAV_URL.format(self.mid, page, pagesize, fav_box)
        result = json.loads(utils.get_page_content(url))
        return result['data']['vlist'], result['data']['pages']

    def login(self, userid, pwd, captcha):
        #utils.get_page_content('http://www.bilibili.com')
        if self.is_login == True:
            return True, ''
        pwd = self.get_encryped_pwd(pwd)
        data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format(
            captcha, userid, pwd)
        result = utils.get_page_content(
            LOGIN_URL, data, {
                'Origin': 'https://passport.bilibili.com',
                'Referer': 'https://passport.bilibili.com/login'
            })
        if not requests.utils.dict_from_cookiejar(
                self.cj).has_key('DedeUserID'):
            return False, LOGIN_ERROR_MAP[json.loads(result)['code']]
        self.cj.save()
        self.is_login = True
        self.mid = str(
            requests.utils.dict_from_cookiejar(self.cj)['DedeUserID'])
        return True, ''

    def logout(self):
        self.cj.clear()
        self.cj.save()
        self.is_login = False

    def get_av_list_detail(self, aid, page=1, fav=0, pagesize=10):
        params = {'id': aid, 'page': page}
        if fav != 0:
            params['fav'] = fav
        url = VIEW_URL.format(self.api_sign(params))
        result = json.loads(utils.get_page_content(url))
        results = [result]
        if (int(page) < result['pages']) and (pagesize > 1):
            results += self.get_av_list_detail(aid,
                                               int(page) + 1,
                                               fav,
                                               pagesize=pagesize - 1)[0]
        return results, result['pages']

    def get_av_list(self, aid):
        url = AV_URL.format(aid)
        result = json.loads(utils.get_page_content(url))
        return result

    def get_video_urls(self, cid):
        m = hashlib.md5()
        m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER))
        url = INTERFACE_URL.format(str(cid), m.hexdigest())
        doc = minidom.parseString(utils.get_page_content(url))
        urls = [
            durl.getElementsByTagName('url')[0].firstChild.nodeValue
            for durl in doc.getElementsByTagName('durl')
        ]
        urls = [
            url if not re.match(r'.*\.qqvideo\.tc\.qq\.com', url) else re.sub(
                r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', url)
            for url in urls
        ]
        return urls

    def add_history(self, aid, cid):
        url = ADD_HISTORY_URL.format(str(cid), str(aid))
        utils.get_page_content(url)

Example #24

Show file

File: bilibili.py Project: brmnh/xbmc-addons-chinese

class Bilibili():
    def __init__(self, appkey = APPKEY, appsecret = APPSECRET):
        self.appkey = appkey
        self.appsecret = appsecret
        self.is_login = False
        cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie'
        self.cj = MozillaCookieJar(cookie_path)
        if os.path.isfile(cookie_path):
            self.cj.load()
            if requests.utils.dict_from_cookiejar(self.cj).has_key('DedeUserID'):
                self.is_login = True
                self.mid = str(requests.utils.dict_from_cookiejar(self.cj)['DedeUserID'])
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
        urllib2.install_opener(opener)

    def get_captcha(self, path = None):
        if not requests.utils.dict_from_cookiejar(self.cj).has_key('sid'):
            utils.get_page_content(LOGIN_CAPTCHA_URL.format(random.random()),
                                   headers = {'Referer':'https://passport.bilibili.com/login'})
        result = utils.get_page_content(LOGIN_CAPTCHA_URL.format(random.random()),
                                        headers = {'Referer':'https://passport.bilibili.com/login'})
        if path == None:
            path = tempfile.gettempdir() + '/captcha.jpg'
        with open(path, 'wb') as f:
            f.write(result)
        return path

    def get_encryped_pwd(self, pwd):
        import rsa
        result = json.loads(utils.get_page_content(LOGIN_HASH_URL.format(random.random()),
                                                   headers={'Referer':'https://passport.bilibili.com/login'}))
        pwd = result['hash'] + pwd
        key = result['key']
        pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key)
        pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key)
        pwd = base64.b64encode(pwd)
        pwd = urllib.quote(pwd)
        return pwd

    def api_sign(self, params):
        params['appkey']=self.appkey
        data = ""
        keys = params.keys()
        keys.sort()
        for key in keys:
            if data != "":
                data += "&"
            value = params[key]
            if type(value) == int:
                value = str(value)
            data += key + "=" + str(urllib.quote(value))
        if self.appsecret == None:
            return data
        m = hashlib.md5()
        m.update(data + self.appsecret)
        return data + '&sign=' + m.hexdigest()

    def get_category_from_web_page(self):
        category_dict = {'0': {'title': u'全部', 'url': HOME_URL, 'subs':[]}}
        node = category_dict['0']
        url = node['url']
        result = BeautifulSoup(utils.get_page_content(url), "html.parser").findAll('li', {'class': 'm-i'})
        for item in result:
            if len(item['class']) != 1:
                continue
            tid = item['data-tid']
            title = item.em.contents[0]
            url = 'http:' + item.a['href']
            category_dict[tid] = {'title': title, 'url': url, 'subs':[]}
            node['subs'].append(tid)

        #Fix video and movie
        if '11' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('11')
        if '23' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('23')
        category_dict['11'] = {'title': u'电视剧', 'url': 'http://bangumi.bilibili.com/tv/', 'subs': []}
        category_dict['23'] = {'title': u'电影', 'url': 'http://bangumi.bilibili.com/movie/', 'subs': []}

        for sub in category_dict['0']['subs']:
            node = category_dict[sub]
            url = node['url']
            result = BeautifulSoup(utils.get_page_content(url), "html.parser").select('ul.n_num li')
            for item in result[1:]:
                if not item.has_attr('tid'):
                    continue
                if not hasattr(item, 'a'):
                    continue
                if item.has_attr('class'):
                    continue
                tid = item['tid']
                title = item.a.contents[0]
                if item.a['href'][:2] == '//':
                    url = 'http:' + item.a['href']
                else:
                    url = HOME_URL + item.a['href']
                category_dict[tid] = {'title': title, 'url': url, 'subs':[]}
                node['subs'].append(tid)
        return category_dict

    def get_category(self, tid = '0'):
        items = [{tid: {'title': '全部', 'url': CATEGORY[tid]['url'], 'subs': []}}]
        for sub in CATEGORY[tid]['subs']:
            items.append({sub: CATEGORY[sub]})
        return items

    def get_category_name(self, tid):
        return CATEGORY[str(tid)]['title']

    def get_order(self):
        return ORDER

    def get_category_list(self, tid = 0, order = 'default', days = 30, page = 1, pagesize = 10):
        params = {'tid': tid, 'order': order, 'days': days, 'page': page, 'pagesize': pagesize}
        url = LIST_URL.format(self.api_sign(params))
        result = json.loads(utils.get_page_content(url))
        results = []
        for i in range(pagesize):
            if result['list'].has_key(str(i)):
                results.append(result['list'][str(i)])
            else:
                break
        return results, result['pages']

    def get_my_info(self):
        if self.is_login == False:
            return []
        result = json.loads(utils.get_page_content(MY_INFO_URL))
        return result['data']

    def get_bangumi_chase(self, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['result'], result['data']['pages']

    def get_bangumi_detail(self, season_id):
        url = BANGUMI_SEASON_URL.format(season_id)
        result = utils.get_page_content(url)
        if result[0] != '{':
            start = result.find('(') + 1
            end = result.find(');')
            result = result[start:end]
        result = json.loads(result)
        return result['result']

    def get_history(self, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = HISTORY_URL.format(page, pagesize)
        result = json.loads(utils.get_page_content(url))
        if len(result['data']) >= int(pagesize):
            total_page = int(page) + 1
        else:
            total_page = int(page)
        return result['data'], total_page

    def get_dynamic(self, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = DYNAMIC_URL.format(pagesize, page)
        result = json.loads(utils.get_page_content(url))
        total_page = int((result['data']['page']['count'] + pagesize - 1) / pagesize)
        return result['data']['feeds'], total_page

    def get_attention(self, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = ATTENTION_URL.format(self.mid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list'], result['data']['pages']

    def get_attention_video(self, mid, tid = 0, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid)
        result = json.loads(utils.get_page_content(url))
        return result['data'], result['data']['pages']

    def get_attention_channel(self, mid):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_URL.format(mid)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list']

    def get_attention_channel_list(self, mid, cid, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_LIST_URL.format(mid, cid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list'], result['data']['total']

    def get_fav_box(self):
        if self.is_login == False:
            return []
        url = FAV_BOX_URL.format(self.mid)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list']

    def get_fav(self, fav_box, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = FAV_URL.format(self.mid, page, pagesize, fav_box)
        result = json.loads(utils.get_page_content(url))
        return result['data']['vlist'], result['data']['pages']

    def login(self, userid, pwd, captcha):
        #utils.get_page_content('http://www.bilibili.com')
        if self.is_login == True:
            return True, ''
        pwd = self.get_encryped_pwd(pwd)
        data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format(captcha, userid, pwd)
        result = utils.get_page_content(LOGIN_URL, data,
                                        {'Origin':'https://passport.bilibili.com',
                                         'Referer':'https://passport.bilibili.com/login'})
        if not requests.utils.dict_from_cookiejar(self.cj).has_key('DedeUserID'):
            return False, LOGIN_ERROR_MAP[json.loads(result)['code']]
        self.cj.save()
        self.is_login = True
        self.mid = str(requests.utils.dict_from_cookiejar(self.cj)['DedeUserID'])
        return True, ''

    def logout(self):
        self.cj.clear()
        self.cj.save()
        self.is_login = False

    def get_av_list_detail(self, aid, page = 1, fav = 0, pagesize = 10):
        params = {'id': aid, 'page': page}
        if fav != 0:
            params['fav'] = fav
        url = VIEW_URL.format(self.api_sign(params))
        result = json.loads(utils.get_page_content(url))
        results = [result]
        if (int(page) < result['pages']) and (pagesize > 1):
            results += self.get_av_list_detail(aid, int(page) + 1, fav, pagesize = pagesize - 1)[0]
        return results, result['pages']

    def get_av_list(self, aid):
        url = AV_URL.format(aid)
        result = json.loads(utils.get_page_content(url))
        return result

    def get_video_urls(self, cid):
        m = hashlib.md5()
        m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER))
        url = INTERFACE_URL.format(str(cid), m.hexdigest())
        doc = minidom.parseString(utils.get_page_content(url))
        urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')]
        urls = [url
                if not re.match(r'.*\.qqvideo\.tc\.qq\.com', url)
                else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', url)
                for url in urls]
        return urls

    def add_history(self, aid, cid):
        url = ADD_HISTORY_URL.format(str(cid), str(aid))
        utils.get_page_content(url)

Example #25

Show file

File: lsession.py Project: khalidhsu/qqmsgbak

class LSession():
    def __init__(self,cookiefile = None, proxy = None, timeout = 10, retime = 30,sleept = 3):
        self.timeout=timeout
        self.retime=retime
        self.sleept=sleept
        #proxy '1.234.77.96:80'
        if cookiefile == None:
            self.cookiejar = CookieJar()
        else:
            self.cookiejar = MozillaCookieJar(filename=cookiefile)
            #self.cookiejar =cookielib.LWPCookieJar(filename=cookiefile)
            if not os.path.isfile(cookiefile):
                open(cookiefile, 'w').write(MozillaCookieJar.header)
                #open(cookiefile, 'w').write('#abc\n')
                pass
            self.cookiejar.load(filename=cookiefile,ignore_discard=True)
            #print "ck:",self.cookiejar 
        self.cookie_processor = HTTPCookieProcessor(self.cookiejar)
        self.opener=build_opener(urllib2.HTTPRedirectHandler(),self.cookie_processor)
        if proxy : self.opener.add_handler(ProxyHandler({"http" : proxy}))
        #for posting a file
        try:
            import MultipartPostHandler #for posting a file,need installed
            self.opener.add_handler(MultipartPostHandler.MultipartPostHandler())
        except NameError as e:print e
            
        self.response=None
        self.request=None
        self.header=[]
    def add_header(self,k,v) : self.header.append((k,v))

    def build_request(self,url,params=None):
        self.request=Request(url,params)
        if not self.response is None:self.request.add_header('Referer',self.url())
        #self.request.add_header('User-Agent',
        #                        'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 \
        #                        (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25')
        #NokiaE63/UC Browser7.9.0.102/28/355/UCWEB
        #self.request.add_header('User-Agent','NokiaE63/UC Browser7.9.0.102/28/355/UCWEB')
        self.request.add_header('User-Agent','Opera/9.80 (J2ME/MIDP; Opera Mini/1.0/886; U; en) Presto/2.4.15')
        while  self.header :
            _k,_v = self.header.pop()
            self.request.add_header(_k,_v)
        #Mobile/7B405
        #self.request.add_header('User-Agent','Mobile/7B405')
        return self.request

    def __del__(self) : self.save_cookie()

    def urlopen(self,req):
        retime=self.retime
        while retime > 0:
            try:
                return self.opener.open(req,timeout=self.timeout)
            except Exception as e:
                retime -= 1
                traceback.print_exc(file=sys.stdout)
                print 'Wait and retry...%d'%(self.retime-retime)
                sleep(self.sleept)

    def savefile(self,filename,url):
        self.response=self.urlopen(self.build_request(url))
        CHUNK = 50 * 1024
        with open(filename, 'wb') as fp:
            while True:
                chunk = self.response.read(CHUNK)
                if not chunk: break
                fp.write(chunk)
    def post(self,url,post_data):
        self.response=self.urlopen(self.build_request(url,urlencode(post_data)))
        return  self.response
    def post_raw(self,url,post_data):
        self.response=self.urlopen(self.build_request(url,post_data))
        return  self.response

    def post_file(self,url,params):
        self.response=self.urlopen(self.build_request(url, params))
        return  self.response
    def get(self,url):
        self.response=self.urlopen(self.build_request(url))
        #import urllib
        #print  urllib.urlopen('http://mrozekma.com/302test.php').geturl()
        # import requests
        # r=requests.get(url)
        # print r.content
        return  self.response
    def text(self,dec='gbk',enc='utf') :
        return self.response.read().decode(dec).encode(enc)
    def url(self) : return self.response.url
    def logout(self) : self.cookiejar.clear()
    def Verify_proxy(self) :
        pass
    def show_cookie(self):
        #print self.cookiejar
        for i in self.cookiejar:
            print i
    def save_cookie(self):
        # if  hasattr(self.cookiejar,'save'):#in case non cookiejar
        #     self.cookiejar.save(ignore_discard=True, ignore_expires=False)
        try: 
            self.cookiejar.save(ignore_discard=True, ignore_expires=False)
        except Exception as e: 
            traceback.print_exc(file=sys.stdout)

Example #26

Show file

File: cookiejar.py Project: UnrealIncident/Ultros

 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
     if self.mode not in ("discard", "session"):
         return MozillaCookieJar.save(self, filename, ignore_discard,
                                      ignore_expires)

Example #27

Show file

File: dwn.py Project: AndriiKolotii/ukrainian_data_cube

class bulk_downloader:
    def __init__(self):
        # List of files to download
        self.files = [
            "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200626T041209_20200626T041234_033183_03D816_7063.zip",
            "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200626T041144_20200626T041209_033183_03D816_0D5E.zip",
            "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200614T041208_20200614T041233_033008_03D2C4_2DC4.zip",
            "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200614T041143_20200614T041208_033008_03D2C4_584D.zip",
            "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200602T041208_20200602T041233_032833_03CD92_6A43.zip",
            "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200602T041143_20200602T041208_032833_03CD92_5A25.zip"
        ]

        # Local stash of cookies so we don't always have to ask
        self.cookie_jar_path = os.path.join(os.path.expanduser('~'),
                                            ".bulk_download_cookiejar.txt")
        self.cookie_jar = None

        self.asf_urs4 = {
            'url': 'https://urs.earthdata.nasa.gov/oauth/authorize',
            'client': 'BO_n7nTIlMljdvU6kRRB3g',
            'redir': 'https://auth.asf.alaska.edu/login'
        }

        # Make sure we can write it our current directory
        if os.access(os.getcwd(), os.W_OK) is False:
            print(
                "WARNING: Cannot write to current path! Check permissions for {0}"
                .format(os.getcwd()))
            exit(-1)

        # For SSL
        self.context = {}

        # Check if user handed in a Metalink or CSV:
        if len(sys.argv) > 0:
            download_files = []
            input_files = []
            for arg in sys.argv[1:]:
                if arg == '--insecure':
                    try:
                        ctx = ssl.create_default_context()
                        ctx.check_hostname = False
                        ctx.verify_mode = ssl.CERT_NONE
                        self.context['context'] = ctx
                    except AttributeError:
                        # Python 2.6 won't complain about SSL Validation
                        pass

                elif arg.endswith('.metalink') or arg.endswith('.csv'):
                    if os.path.isfile(arg):
                        input_files.append(arg)
                        if arg.endswith('.metalink'):
                            new_files = self.process_metalink(arg)
                        else:
                            new_files = self.process_csv(arg)
                        if new_files is not None:
                            for file_url in (new_files):
                                download_files.append(file_url)
                    else:
                        print(
                            " > I cannot find the input file you specified: {0}"
                            .format(arg))
                else:
                    print(
                        " > Command line argument '{0}' makes no sense, ignoring."
                        .format(arg))

            if len(input_files) > 0:
                if len(download_files) > 0:
                    print(" > Processing {0} downloads from {1} input files. ".
                          format(len(download_files), len(input_files)))
                    self.files = download_files
                else:
                    print(
                        " > I see you asked me to download files from {0} input files, but they had no downloads!"
                        .format(len(input_files)))
                    print(" > I'm super confused and exiting.")
                    exit(-1)

        # Make sure cookie_jar is good to go!
        self.get_cookie()

        # summary
        self.total_bytes = 0
        self.total_time = 0
        self.cnt = 0
        self.success = []
        self.failed = []
        self.skipped = []

    # Get and validate a cookie
    def get_cookie(self):
        if os.path.isfile(self.cookie_jar_path):
            self.cookie_jar = MozillaCookieJar()
            self.cookie_jar.load(self.cookie_jar_path)

            # make sure cookie is still valid
            if self.check_cookie():
                print(" > Re-using previous cookie jar.")
                return True
            else:
                print(" > Could not validate old cookie Jar")

        # We don't have a valid cookie, prompt user or creds
        print(
            "No existing URS cookie found, please enter Earthdata username & password:"******"(Credentials will not be stored, saved or logged anywhere)")

        # Keep trying 'till user gets the right U:P
        while self.check_cookie() is False:
            self.get_new_cookie()

        return True

    # Validate cookie before we begin
    def check_cookie(self):

        if self.cookie_jar is None:
            print(" > Cookiejar is bunk: {0}".format(self.cookie_jar))
            return False

        # File we know is valid, used to validate cookie
        file_check = 'https://urs.earthdata.nasa.gov/profile'

        # Apply custom Redirect Hanlder
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                              HTTPHandler(), HTTPSHandler(**self.context))
        install_opener(opener)

        # Attempt a HEAD request
        request = Request(file_check)
        request.get_method = lambda: 'HEAD'
        try:
            print(" > attempting to download {0}".format(file_check))
            response = urlopen(request, timeout=30)
            resp_code = response.getcode()
            # Make sure we're logged in
            if not self.check_cookie_is_logged_in(self.cookie_jar):
                return False

            # Save cookiejar
            self.cookie_jar.save(self.cookie_jar_path)

        except HTTPError:
            # If we ge this error, again, it likely means the user has not agreed to current EULA
            print("\nIMPORTANT: ")
            print(
                "Your user appears to lack permissions to download data from the ASF Datapool."
            )
            print(
                "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
            )
            exit(-1)

        # This return codes indicate the USER has not been approved to download the data
        if resp_code in (300, 301, 302, 303):
            try:
                redir_url = response.info().getheader('Location')
            except AttributeError:
                redir_url = response.getheader('Location')

            #Funky Test env:
            if ("vertex-retired.daac.asf.alaska.edu" in redir_url
                    and "test" in self.asf_urs4['redir']):
                print("Cough, cough. It's dusty in this test env!")
                return True

            print("Redirect ({0}) occured, invalid cookie value!".format(
                resp_code))
            return False

        # These are successes!
        if resp_code in (200, 307):
            return True

        return False

    def get_new_cookie(self):
        # Start by prompting user to input their credentials

        # Another Python2/3 workaround
        try:
            new_username = raw_input("Username: "******"Username: "******"Password (will not be displayed): ")

        # Build URS4 Cookie request
        auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4[
            'client'] + '&redirect_uri=' + self.asf_urs4[
                'redir'] + '&response_type=code&state='

        try:
            #python2
            user_pass = base64.b64encode(
                bytes(new_username + ":" + new_password))
        except TypeError:
            #python3
            user_pass = base64.b64encode(
                bytes(new_username + ":" + new_password, "utf-8"))
            user_pass = user_pass.decode("utf-8")

        # Authenticate against URS, grab all the cookies
        self.cookie_jar = MozillaCookieJar()
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                              HTTPHandler(), HTTPSHandler(**self.context))
        request = Request(
            auth_cookie_url,
            headers={"Authorization": "Basic {0}".format(user_pass)})

        # Watch out cookie rejection!
        try:
            response = opener.open(request)
        except HTTPError as e:
            if "WWW-Authenticate" in e.headers and "Please enter your Earthdata Login credentials" in e.headers[
                    "WWW-Authenticate"]:
                print(
                    " > Username and Password combo was not successful. Please try again."
                )
                return False
            else:
                # If an error happens here, the user most likely has not confirmed EULA.
                print(
                    "\nIMPORTANT: There was an error obtaining a download cookie!"
                )
                print(
                    "Your user appears to lack permission to download data from the ASF Datapool."
                )
                print(
                    "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
                )
                exit(-1)
        except URLError as e:
            print(
                "\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. "
            )
            print("Try cookie generation later.")
            exit(-1)

        # Did we get a cookie?
        if self.check_cookie_is_logged_in(self.cookie_jar):
            #COOKIE SUCCESS!
            self.cookie_jar.save(self.cookie_jar_path)
            return True

        # if we aren't successful generating the cookie, nothing will work. Stop here!
        print(
            "WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again."
        )
        print("Response was {0}.".format(response.getcode()))
        print(
            "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
        )
        exit(-1)

    # make sure we're logged into URS
    def check_cookie_is_logged_in(self, cj):
        for cookie in cj:
            if cookie.name == 'urs_user_already_logged':
                # Only get this cookie if we logged in successfully!
                return True

        return False

    # Download the file
    def download_file_with_cookiejar(self,
                                     url,
                                     file_count,
                                     total,
                                     recursion=False):
        # see if we've already download this file and if it is that it is the correct size
        download_file = os.path.basename(url).split('?')[0]
        if os.path.isfile(download_file):
            try:
                request = Request(url)
                request.get_method = lambda: 'HEAD'
                response = urlopen(request, timeout=30)
                remote_size = self.get_total_size(response)
                # Check that we were able to derive a size.
                if remote_size:
                    local_size = os.path.getsize(download_file)
                    if remote_size < (local_size +
                                      (local_size * .01)) and remote_size > (
                                          local_size - (local_size * .01)):
                        print(
                            " > Download file {0} exists! \n > Skipping download of {1}. "
                            .format(download_file, url))
                        return None, None
                    #partial file size wasn't full file size, lets blow away the chunk and start again
                    print(
                        " > Found {0} but it wasn't fully downloaded. Removing file and downloading again."
                        .format(download_file))
                    os.remove(download_file)

            except ssl.CertificateError as e:
                print(" > ERROR: {0}".format(e))
                print(
                    " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag"
                )
                return False, None

            except HTTPError as e:
                if e.code == 401:
                    print(
                        " > IMPORTANT: Your user may not have permission to download this type of data!"
                    )
                else:
                    print(" > Unknown Error, Could not get file HEAD: {0}".
                          format(e))

            except URLError as e:
                print("URL Error (from HEAD): {0}, {1}".format(e.reason, url))
                if "ssl.c" in "{0}".format(e.reason):
                    print(
                        "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error."
                    )
                return False, None

        # attempt https connection
        try:
            request = Request(url)
            response = urlopen(request, timeout=30)

            # Watch for redirect
            if response.geturl() != url:

                # See if we were redirect BACK to URS for re-auth.
                if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl(
                ):

                    if recursion:
                        print(
                            " > Entering seemingly endless auth loop. Aborting. "
                        )
                        return False, None

                    # make this easier. If there is no app_type=401, add it
                    new_auth_url = response.geturl()
                    if "app_type" not in new_auth_url:
                        new_auth_url += "&app_type=401"

                    print(
                        " > While attempting to download {0}....".format(url))
                    print(" > Need to obtain new cookie from {0}".format(
                        new_auth_url))
                    old_cookies = [cookie.name for cookie in self.cookie_jar]
                    opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                                          HTTPHandler(),
                                          HTTPSHandler(**self.context))
                    request = Request(new_auth_url)
                    try:
                        response = opener.open(request)
                        for cookie in self.cookie_jar:
                            if cookie.name not in old_cookies:
                                print(" > Saved new cookie: {0}".format(
                                    cookie.name))

                                # A little hack to save session cookies
                                if cookie.discard:
                                    cookie.expires = int(
                                        time.time()) + 60 * 60 * 24 * 30
                                    print(
                                        " > Saving session Cookie that should have been discarded! "
                                    )

                        self.cookie_jar.save(self.cookie_jar_path,
                                             ignore_discard=True,
                                             ignore_expires=True)
                    except HTTPError as e:
                        print("HTTP Error: {0}, {1}".format(e.code, url))
                        return False, None

                    # Okay, now we have more cookies! Lets try again, recursively!
                    print(" > Attempting download again with new cookies!")
                    return self.download_file_with_cookiejar(url,
                                                             file_count,
                                                             total,
                                                             recursion=True)

                print(
                    " > 'Temporary' Redirect download @ Remote archive:\n > {0}"
                    .format(response.geturl()))

            # seems to be working
            print("({0}/{1}) Downloading {2}".format(file_count, total, url))

            # Open our local file for writing and build status bar
            tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.')
            self.chunk_read(response, tf, report_hook=self.chunk_report)

            # Reset download status
            sys.stdout.write('\n')

            tempfile_name = tf.name
            tf.close()

        #handle errors
        except HTTPError as e:
            print("HTTP Error: {0}, {1}".format(e.code, url))

            if e.code == 401:
                print(
                    " > IMPORTANT: Your user does not have permission to download this type of data!"
                )

            if e.code == 403:
                print(" > Got a 403 Error trying to download this file.  ")
                print(
                    " > You MAY need to log in this app and agree to a EULA. ")

            return False, None

        except URLError as e:
            print("URL Error (from GET): {0}, {1}, {2}".format(
                e, e.reason, url))
            if "ssl.c" in "{0}".format(e.reason):
                print(
                    "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error."
                )
            return False, None

        except socket.timeout as e:
            print(" > timeout requesting: {0}; {1}".format(url, e))
            return False, None

        except ssl.CertificateError as e:
            print(" > ERROR: {0}".format(e))
            print(
                " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag"
            )
            return False, None

        # Return the file size
        shutil.copy(tempfile_name, download_file)
        os.remove(tempfile_name)
        file_size = self.get_total_size(response)
        actual_size = os.path.getsize(download_file)
        if file_size is None:
            # We were unable to calculate file size.
            file_size = actual_size
        return actual_size, file_size

    def get_redirect_url_from_error(self, error):
        find_redirect = re.compile(r"id=\"redir_link\"\s+href=\"(\S+)\"")
        print("error file was: {}".format(error))
        redirect_url = find_redirect.search(error)
        if redirect_url:
            print("Found: {0}".format(redirect_url.group(0)))
            return (redirect_url.group(0))

        return None

    #  chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    def chunk_report(self, bytes_so_far, file_size):
        if file_size is not None:
            percent = float(bytes_so_far) / file_size
            percent = round(percent * 100, 2)
            sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" %
                             (bytes_so_far, file_size, percent))
        else:
            # We couldn't figure out the size.
            sys.stdout.write(" > Downloaded %d of unknown Size\r" %
                             (bytes_so_far))

    #  chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    def chunk_read(self,
                   response,
                   local_file,
                   chunk_size=8192,
                   report_hook=None):
        file_size = self.get_total_size(response)
        bytes_so_far = 0

        while 1:
            try:
                chunk = response.read(chunk_size)
            except:
                sys.stdout.write("\n > There was an error reading data. \n")
                break

            try:
                local_file.write(chunk)
            except TypeError:
                local_file.write(chunk.decode(local_file.encoding))
            bytes_so_far += len(chunk)

            if not chunk:
                break

            if report_hook:
                report_hook(bytes_so_far, file_size)

        return bytes_so_far

    def get_total_size(self, response):
        try:
            file_size = response.info().getheader('Content-Length').strip()
        except AttributeError:
            try:
                file_size = response.getheader('Content-Length').strip()
            except AttributeError:
                print("> Problem getting size")
                return None

        return int(file_size)

    # Get download urls from a metalink file
    def process_metalink(self, ml_file):
        print("Processing metalink file: {0}".format(ml_file))
        with open(ml_file, 'r') as ml:
            xml = ml.read()

        # Hack to remove annoying namespace
        it = ET.iterparse(StringIO(xml))
        for _, el in it:
            if '}' in el.tag:
                el.tag = el.tag.split('}', 1)[1]  # strip all namespaces
        root = it.root

        dl_urls = []
        ml_files = root.find('files')
        for dl in ml_files:
            dl_urls.append(dl.find('resources').find('url').text)

        if len(dl_urls) > 0:
            return dl_urls
        else:
            return None

    # Get download urls from a csv file
    def process_csv(self, csv_file):
        print("Processing csv file: {0}".format(csv_file))

        dl_urls = []
        with open(csv_file, 'r') as csvf:
            try:
                csvr = csv.DictReader(csvf)
                for row in csvr:
                    dl_urls.append(row['URL'])
            except csv.Error as e:
                print(
                    "WARNING: Could not parse file %s, line %d: %s. Skipping."
                    % (csv_file, csvr.line_num, e))
                return None
            except KeyError as e:
                print(
                    "WARNING: Could not find URL column in file %s. Skipping."
                    % (csv_file))

        if len(dl_urls) > 0:
            return dl_urls
        else:
            return None

    # Download all the files in the list
    def download_files(self):
        for file_name in self.files:

            # make sure we haven't ctrl+c'd or some other abort trap
            if abort == True:
                raise SystemExit

            # download counter
            self.cnt += 1

            # set a timer
            start = time.time()

            # run download
            size, total_size = self.download_file_with_cookiejar(
                file_name, self.cnt, len(self.files))

            # calculte rate
            end = time.time()

            # stats:
            if size is None:
                self.skipped.append(file_name)
            # Check to see that the download didn't error and is the correct size
            elif size is not False and (total_size <
                                        (size + (size * .01)) and total_size >
                                        (size - (size * .01))):
                # Download was good!
                elapsed = end - start
                elapsed = 1.0 if elapsed < 1 else elapsed
                rate = (size / 1024**2) / elapsed

                print(
                    "Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec"
                    .format(size, elapsed, rate))

                # add up metrics
                self.total_bytes += size
                self.total_time += elapsed
                self.success.append({'file': file_name, 'size': size})

            else:
                print("There was a problem downloading {0}".format(file_name))
                self.failed.append(file_name)

    def print_summary(self):
        # Print summary:
        print("\n\nDownload Summary ")
        print(
            "--------------------------------------------------------------------------------"
        )
        print("  Successes: {0} files, {1} bytes ".format(
            len(self.success), self.total_bytes))
        for success_file in self.success:
            print("           - {0}  {1:.2f}MB".format(
                success_file['file'], (success_file['size'] / 1024.0**2)))
        if len(self.failed) > 0:
            print("  Failures: {0} files".format(len(self.failed)))
            for failed_file in self.failed:
                print("          - {0}".format(failed_file))
        if len(self.skipped) > 0:
            print("  Skipped: {0} files".format(len(self.skipped)))
            for skipped_file in self.skipped:
                print("          - {0}".format(skipped_file))
        if len(self.success) > 0:
            print("  Average Rate: {0:.2f}MB/sec".format(
                (self.total_bytes / 1024.0**2) / self.total_time))
        print(
            "--------------------------------------------------------------------------------"
        )

Example #28

Show file

File: cli.py Project: nrao/mynrao

            if options.pretty:
                # Strip out extra whitespace, so we can have maximum prettiness.
                for el in user.iter():
                    if el.text and not el.text.strip():
                        el.text = None
                    if el.tail and not el.tail.strip():
                        el.tail = None
                print ET.tostring(user, pretty_print=True, encoding="utf-8")

            else:
                print ET.tostring(user, encoding="utf-8")

                # FIXME: Add locking to cookiejar, so concurrent instances don't clobber the cookie file.
        if options.cookiejar:
            try:
                cookiejar.save(ignore_discard=True)
            except IOError, e:
                print >> sys.stderr, "Error while saving cookie jar: %s: %s" % (options.cookiejar, e)

    except UsageError, e:
        print >> sys.stderr, e
        return 64  # EX_USAGE

    except ConfigParser.ParsingError, e:
        print >> sys.stderr, e
        return 78  # EX_CONFIG

    except NotImplementedError, e:
        print >> sys.stderr, e
        return 69  # EX_UNAVAILABLE

Example #29

Show file

File: json_rpc.py Project: Tydus/qqdown

class Json_RPC(object):
    def __init__(self):
        #self.cookie_jar=CookieJar()
        self.cookie_jar=MozillaCookieJar()
        self.opener=urllib2.build_opener(
                urllib2.HTTPCookieProcessor(self.cookie_jar),
                #urllib2.HTTPHandler(debuglevel=1),
                #urllib2.HTTPSHandler(debuglevel=1),
                )

    def load_cookie(self,filename):
        ''' Load Cookie from file '''
        self.cookie_jar.load(filename,ignore_discard=True)

    def save_cookie(self,filename):
        ''' Save Cookie to file '''
        self.cookie_jar.save(filename,ignore_discard=True)

    def json_rpc(self,url,method="GET",**kwargs):
        '''
        Performs a json rpc to url and return python-native result

        will extract dict or list from result

        Example:
        try{callback({'result':0,'data':[]});}catch(e){}
        will be transcode to
        {"result":0,"data":[]}

        See also: http_rpc

        '''
        ret=self.http_rpc(url,method,**kwargs)
        ret=sub(r'try{(.*)}catch\(.*\){.*};?',r'\1',ret)
        ret=(search(r'{.+}',ret) or search(r'\[.+\]',ret)).group()
        #ret=sub(r"'",r'"',ret)
        ret=loads(ret)
        return ret

    def http_rpc(self,url,method="GET",**kwargs):
        '''
        Perfoms a http rpc to url and return raw result

        url          base url to rpc
        method       'GET' or 'POST'
        query        query string passing by a dict
        data         post data passing by a dict
        file         post files passing by a list of 3-tuple: key, filename, data
                     ( this indicates multipart/form-data )
        
        '''
        kwe=Entity(kwargs)

        if method not in ['GET','POST']:
            raise RPCError("Method not in GET or POST")

        if kwe.query:
            url+="?"+urlencode(kwe.query)

        if method=='GET':
            request=Request(url)
        elif kwe.file:
            content_type,data=multipart_encode(kwe.data,kwe.file)
            request=Request(url,data)
            request.add_header('Content-Type', content_type)
        elif kwe.data:
            data=urlencode(kwe.data)
            request=Request(url,data)
        else:
            raise RPCError("POST with no data")

        request.add_header('User-Agent',
            "Mozilla/5.0 (Ubuntu; X11; Linux x86_64; rv:8.0) Gecko/20100101 Firefox/8.0"
            )
        request.add_header('Accept-Charset',"UTF-8")

        response=self.opener.open(request)
        ret=response.read()
        response.close()

        #print "\033[33m"+str(self.cookie_jar)+"\033[0m"

        # FIXME: An Ugly hack to Tencent server's charset indicator using BOM header
        if ret.startswith('\xef\xbb\xbf'):
            ret=ret[3:]

        return ret

Example #30

Show file

class Session(requests.Session):
    """
    Session for making API requests and interacting with the filesystem
    """
    def __init__(self):
        super(Session, self).__init__()
        self.trust_env = False
        cookie_file = os.path.expanduser('~/.danabox/cookies.txt')
        cookie_dir = os.path.dirname(cookie_file)
        self.cookies = MozillaCookieJar(cookie_file)
        # Create the $HOME/.danabox dir if it doesn't exist
        if not os.path.isdir(cookie_dir):
            os.mkdir(cookie_dir, 0700)
        # Load existing cookies if the cookies.txt exists
        if os.path.isfile(cookie_file):
            self.cookies.load()
            self.cookies.clear_expired_cookies()

    def clear(self):
        """Clear cookies"""
        try:
            self.cookies.clear()
            self.cookies.save()
        except KeyError:
            pass

    def git_root(self):
        """
        Return the absolute path from the git repository root

        If no git repository exists, raise an EnvironmentError
        """
        try:
            git_root = subprocess.check_output(
                ['git', 'rev-parse', '--show-toplevel'],
                stderr=subprocess.PIPE).strip('\n')
        except subprocess.CalledProcessError:
            raise EnvironmentError('Current directory is not a git repository')
        return git_root

    def get_app(self):
        """
        Return the application name for the current directory

        The application is determined by parsing `git remote -v` output for the origin remote.

        Because Danabox only allows deployment of public Github repos we can create unique app
        names from a combination of the Github user's name and the repo name. Eg;
        '[email protected]:opdemand/example-ruby-sinatra.git' becomes 'opdemand-example--ruby--sinatra'

        If no application is found, raise an EnvironmentError.
        """
        git_root = self.git_root()
        remotes = subprocess.check_output(['git', 'remote', '-v'],
                                          cwd=git_root)
        if remotes is None:
            raise EnvironmentError('No git remotes found.')
        for remote in remotes.splitlines():
            if 'github.com' in remote:
                url = remote.split()[1]
                break
        if url is None:
            raise EnvironmentError('No Github remotes found.')
        pieces = url.split('/')
        owner = pieces[-2].split(':')[-1]
        repo = pieces[-1].replace('.git', '')
        app_raw = owner + '/' + repo
        app_name = app_raw.replace('-', '--').replace('/', '-')
        return app_name

    app = property(get_app)

    def request(self, *args, **kwargs):
        """
        Issue an HTTP request with proper cookie handling including
        `Django CSRF tokens <https://docs.djangoproject.com/en/dev/ref/contrib/csrf/>`
        """
        for cookie in self.cookies:
            if cookie.name == 'csrftoken':
                if 'headers' in kwargs:
                    kwargs['headers']['X-CSRFToken'] = cookie.value
                else:
                    kwargs['headers'] = {'X-CSRFToken': cookie.value}
                break
        response = super(Session, self).request(*args, **kwargs)
        self.cookies.save()
        return response

Example #31

Show file

class Session(requests.Session):
    """
    Session for making API requests and interacting with the filesystem
    """

    def __init__(self):
        super(Session, self).__init__()
        self.trust_env = False
        cookie_file = os.path.expanduser('~/.deis/cookies.txt')
        cookie_dir = os.path.dirname(cookie_file)
        self.cookies = MozillaCookieJar(cookie_file)
        # Create the $HOME/.deis dir if it doesn't exist
        if not os.path.isdir(cookie_dir):
            os.mkdir(cookie_dir, 0700)
        # Load existing cookies if the cookies.txt exists
        if os.path.isfile(cookie_file):
            self.cookies.load()
            self.cookies.clear_expired_cookies()

    def git_root(self):
        """
        Return the absolute path from the git repository root

        If no git repository exists, raise an EnvironmentError
        """
        try:
            git_root = subprocess.check_output(
                ['git', 'rev-parse', '--show-toplevel'],
                stderr=subprocess.PIPE).strip('\n')
        except subprocess.CalledProcessError:
            raise EnvironmentError('Current directory is not a git repository')
        return git_root

    def get_formation(self):
        """
        Return the formation name for the current directory

        The formation is determined by parsing `git remote -v` output.
        If no formation is found, raise an EnvironmentError.
        """
        git_root = self.git_root()
        # try to match a deis remote
        remotes = subprocess.check_output(['git', 'remote', '-v'],
                                          cwd=git_root)
        m = re.match(r'^deis\W+(?P<url>\S+)\W+\(', remotes, re.MULTILINE)
        if not m:
            raise EnvironmentError(
                'Could not find deis remote in `git remote -v`')
        url = m.groupdict()['url']
        m = re.match('\S+:(?P<formation>[a-z0-9-]+)(.git)?', url)
        if not m:
            raise EnvironmentError("Could not parse: {url}".format(**locals()))
        return m.groupdict()['formation']

    formation = property(get_formation)

    def request(self, *args, **kwargs):
        """
        Issue an HTTP request with proper cookie handling including
        `Django CSRF tokens <https://docs.djangoproject.com/en/dev/ref/contrib/csrf/>`
        """
        for cookie in self.cookies:
            if cookie.name == 'csrftoken':
                if 'headers' in kwargs:
                    kwargs['headers']['X-CSRFToken'] = cookie.value
                else:
                    kwargs['headers'] = {'X-CSRFToken': cookie.value}
                break
        response = super(Session, self).request(*args, **kwargs)
        self.cookies.save()
        return response

Example #32

Show file

File: submit.py Project: wurikiji/algospot

class AOJClient(object):
    def __init__(self, cookie_file_path='aoj-cookie.txt'):
        self.cookie_file_path = cookie_file_path
        self.cookiejar = MozillaCookieJar()
        if os.path.isfile(cookie_file_path):
            self.cookiejar.load(cookie_file_path)

        self.opener = urllib2.build_opener(
                urllib2.HTTPRedirectHandler(),
                urllib2.HTTPHandler(),
                urllib2.HTTPSHandler(),
                urllib2.HTTPCookieProcessor(self.cookiejar))

    def get_csrf_token(self, url):
        request = urllib2.Request(url=url)
        response = self.opener.open(request)
        data = response.read()
        return REGEXP_CSRF.findall(data)[0]

    def refresh_session(self):
        print 'Not Logged In!'
        context = {'csrfmiddlewaretoken': self.get_csrf_token(LOGIN_URL),
                   'username': raw_input('Username: '******'password': getpass.getpass('Password: '******'accounts/login/',
                                  data=urllib.urlencode(context))
        self.opener.open(request)
        self.cookiejar.save(self.cookie_file_path)

    def check_problem_exist(self, problem_name):
        try:
            request = urllib2.Request(url=PROB_PREFIX+'read/'+problem_name)
            response = self.opener.open(request)
        except urllib2.HTTPError as err:
            if err.code == 404: # Not Found
                raise AOJProblemNotExist
            else:
                raise

    def detect_language(self, source_file):
        if '.' in source_file:
            selected_language = source_file[source_file.rfind('.')+1:]
        else:
            selected_language = ''
        
        while selected_language not in LANGUAGES:
            selected_language = raw_input('Please select your langauge: (' + '/'.join(LANGUAGES) + ') ? ').strip().lower()

        return selected_language

    def submit(self, submission):
        self.check_problem_exist(submission.problem)
        context = {}
        context['language'] = self.detect_language(submission.source)
        context['csrfmiddlewaretoken'] = self.get_csrf_token(url=PROB_PREFIX+'submit/'+submission.problem)

        try:
            with open(submission.source) as f:
                context['source'] = f.read()
        except IOError:
            raise AOJFileNotExist()

        def try_submit(first=True):
            if not first:
                self.refresh_session()
            request = urllib2.Request(url=PROB_PREFIX+'submit/'+submission.problem,
                                  data=urllib.urlencode(context))
            response = self.opener.open(request)

            if not response.geturl().lower().startswith(LOGIN_URL):
                print 'Submission Complete!'
                return
            try_submit(first=False)
        try_submit()

    def get_submission_list(self, problem_name):
        self.check_problem_exist(problem_name)
        request = urllib2.Request(url=SITE_PREFIX+'judge/submission/recent/?problem='+problem_name)
        response = self.opener.open(request)

        try:
            import lxml.html
        except ImportError:
            print 'lxml library is needed for parsing HTML'
            return

        html = lxml.html.fromstring(unicode(response.read().decode('utf8')))
        context = {}
        fields = ('id', 'problem', 'user', 'language', 'length', 'state', 'stats', 'submitted_on')
        length = {'id': 9, 'problem': 15, 'user': 15, 'language': 5, 'length': 7, 'state': 15, 'stats': 7, 'submitted_on': 15}
        template = u'%(id)s %(problem)s %(user)s %(language)s %(length)s %(state)s %(stats)s %(submitted_on)s'

        def width(string):
            return sum(1+(unicodedata.east_asian_width(c) in 'WF') for c in string)

        for tr in html.cssselect('table.submission_list tr'):
            for field in fields:
                element = tr.find_class(field)
                if element:
                    context[field] = unicode(element[0].text_content().strip())
                else:
                    context[field] = u''
                context[field] = ' ' * (length[field] - width(context[field])) + context[field]
            print template % context