Esempi in Python per MozillaCookieJar.load

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: cookielib

Classe/tipologia: MozillaCookieJar

Metodo/funzione: load

Esempi su hotexamples.com: 45

MozillaCookieJar.load in Python: 45 esempi trovati. Questi sono i migliori esempi reali in Python per cookielib.MozillaCookieJar.load, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

MozillaCookieJar(30)

load(27)

save(19)

clear(3)

clear_expired_cookies(2)

_really_load(1)

add_cookie_header(1)

extract_cookies(1)

set_cookie(1)

Esempio n. 1

Mostra file

class RDWorker:
    """
    Worker class to perform Real-Debrid related actions:
    - format login info so they can be used by Real-Debrid
    - login
    - unrestricting links
    - keeping cookies
    """

    _endpoint = 'http://www.real-debrid.com/ajax/%s'

    def __init__(self, cookie_file):
        self._cookie_file = cookie_file
        self.cookies = MozillaCookieJar(self._cookie_file)

    def login(self, username, password_hash):
        """
        Log into Real-Debrid. password_hash must be a MD5-hash of the password string.
        :param username:
        :param password_hash:
        :return: :raise:
        """
        if path.isfile(self._cookie_file):
            self.cookies.load(self._cookie_file)

            for cookie in self.cookies:
                if cookie.name == 'auth' and not cookie.is_expired():
                    return  # no need for a new cookie

        # request a new cookie if no valid cookie is found or if it's expired
        opener = build_opener(HTTPCookieProcessor(self.cookies))
        try:
            response = opener.open(self._endpoint % 'login.php?%s' % urlencode({'user': username, 'pass': password_hash}))
            resp = load(response)
            opener.close()

            if resp['error'] == 0:
                self.cookies.save(self._cookie_file)
            else:
                raise LoginError(resp['message'].encode('utf-8'), resp['error'])
        except Exception as e:
            raise Exception('Login failed: %s' % str(e))

    def unrestrict(self, link, password=''):
        """
        Unrestrict a download URL. Returns tuple of the unrestricted URL and the filename.
        :param link: url to unrestrict
        :param password: password to use for the unrestriction
        :return: :raise:
        """
        opener = build_opener(HTTPCookieProcessor(self.cookies))
        response = opener.open(self._endpoint % 'unrestrict.php?%s' % urlencode({'link': link, 'password': password}))
        resp = load(response)
        opener.close()

        if resp['error'] == 0:
            info = resp['generated_links'][0]
            return info[2], info[0].replace('/', '_')
        else:
            raise UnrestrictionError(resp['message'].encode('utf-8'), resp['error'])

Esempio n. 2

Mostra file

File: Fetion.py Progetto: falconchen/PyWapFetion

    def __init__(self, mobile, password=None, status='0',
        cachefile='Fetion.cache', cookiesfile=''):
        '''登录状态：
        在线：400 隐身：0 忙碌：600 离开：100
        '''
        if cachefile:
            self.cache = Cache(cachefile)

        if not cookiesfile:
            cookiesfile = '%s.cookies' % mobile
            
        # try:
            # with open(cookiesfile, 'rb') as f:
                # cookie_processor = load(f)
        # except:
            # cookie_processor = HTTPCookieProcessor(CookieJar())            
        cookiejar = MozillaCookieJar(filename=cookiesfile)
        try:
          f=open(cookiesfile)
        except IOError:
          f=open(cookiesfile,'w')  
          f.write(MozillaCookieJar.header)
        finally:
          f.close()                  
        cookiejar.load(filename=cookiesfile)  
        cookie_processor = HTTPCookieProcessor(cookiejar)        
        self.opener = build_opener(cookie_processor,
            HTTPHandler)
        self.mobile, self.password = mobile, password
        if not self.alive():
            if self._login(): cookiejar.save()

        #dump(cookie_processor, open(cookiesfile, 'wb'))        
        self.changestatus(status)

Esempio n. 3

Mostra file

File: Fetion.py Progetto: GitHublong/PyWapFetion

    def __init__(self, mobile, password=None, status='0',
        cachefile='Fetion.cache', cookiesfile=''):
        '''登录状态：
        在线：400 隐身：0 忙碌：600 离开：100
        '''
        if cachefile:
            self.cache = Cache(cachefile)

        if not cookiesfile:
            cookiesfile = '%s.cookies' % mobile

        cookiejar = MozillaCookieJar(filename=cookiesfile)
        if not os.path.isfile(cookiesfile):
            open(cookiesfile, 'w').write(MozillaCookieJar.header)

        cookiejar.load(filename=cookiesfile)

        cookie_processor = HTTPCookieProcessor(cookiejar)

        self.opener = build_opener(cookie_processor,
            HTTPHandler)
        self.mobile, self.password = mobile, password
        if not self.alive():
            self._login()
            cookiejar.save()

        self.changestatus(status)

Esempio n. 4

Mostra file

File: netflix.py Progetto: mrowl/filmdata

 def _get_cookie_headers(cls):
     jar = MozillaCookieJar(config.netflix.cookies_path)
     jar.load()
     cookies = []
     for line in jar:
         cookies.append('='.join((line.name, line.value)))
     return cookies

Esempio n. 5

Mostra file

File: utility.py Progetto: tjxs/acgindex

	def GetWithCookie( url, cookie_name, data = '', retry = 3):
		global PATH_TMP, ACGINDEX_UA

		try:
			cj = MozillaCookieJar( PATH_TMP + cookie_name )

			try :
				cj.load( PATH_TMP + cookie_name )
			except:
				pass # 还没有cookie只好拉倒咯

			ckproc = urllib2.HTTPCookieProcessor( cj )

			AmagamiSS = urllib2.build_opener( ckproc )
			AmagamiSS.addheaders = [ ACGINDEX_UA ]

			if data != '':
				request = urllib2.Request( url = url, data = data )
				res = AmagamiSS.open( request )
				cj.save() # 只有在post时才保存新获得的cookie
			else:
				res = AmagamiSS.open( url )

			return Haruka.GetContent( res )

		except:
			# 这里有3次重新连接的机会，3次都超时就跳过
			if retry > 0 : 
				return Haruka.GetWithCookie( url, cookie_name, data , retry-1 )
			else:
				return False

Esempio n. 6

Mostra file

File: Fetion.py Progetto: qqshow/PyWapFetion

    def __init__(self,
                 mobile,
                 password=None,
                 status='0',
                 cachefile='Fetion.cache',
                 cookiesfile=''):
        '''登录状态：
        在线：400 隐身：0 忙碌：600 离开：100
        '''
        if cachefile:
            self.cache = Cache(cachefile)

        if not cookiesfile:
            cookiesfile = '%s.cookies' % mobile

        cookiejar = MozillaCookieJar(filename=cookiesfile)
        if not os.path.isfile(cookiesfile):
            open(cookiesfile, 'w').write(MozillaCookieJar.header)

        cookiejar.load(filename=cookiesfile)

        cookie_processor = HTTPCookieProcessor(cookiejar)

        self.opener = build_opener(cookie_processor, HTTPHandler)
        self.mobile, self.password = mobile, password
        if not self.alive():
            self._login()
            cookiejar.save()

        self.changestatus(status)

Esempio n. 7

Mostra file

File: basebrowser.py Progetto: itolosa/transantiago_api

class WebBrowser(object):
    '''mantiene en memoria las cookies, emulando un navegador
       *actualmente no ejecuta javascript'''
    def __init__(self, uAgent=None, headers=None):
        '''uAgent es el agente de usuario'''
        self.cookie_j = MozillaCookieJar()
        if uAgent is None:
            uAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36'
        self.opener = build_opener(HTTPCookieProcessor(self.cookie_j))
        self.user_agent = uAgent
        self.opener.addheaders = [('User-Agent', self.user_agent)]
        # self.session = requests.Session()
        # self.session.headers.update({ 'User-Agent': uAgent })
        # self.session.max_redirects = 20
        self.timeout = 25
        socket.setdefaulttimeout(self.timeout)

    def newtree(f):
        return lambda *a, **k: etree.parse(f(*a, **k),
                                           parser=etree.HTMLParser())

    @newtree
    def fetch(self, url, data=None, headers=None, method='POST'):
        '''obtiene los datos de una pagina web, ingresada en url
           para enviar datos por post, pasar codificados por data'''
        if headers:
            self.opener.addheaders = headers

        if not (data == None or type(data) == str):
            data = urllib.urlencode(data)

        if method == 'POST':
            # self.last_seen = self.session.post(url, data=data)
            self.last_seen = self.opener.open(url, data)
        elif method == 'GET':
            #self.last_seen = self.session.get(url + '?' + data)
            if data is None:
                self.last_seen = self.opener.open(url)
            else:
                self.last_seen = self.opener.open(url + '?' + data)
        else:
            raise Exception
        return self.last_seen

    def geturl(self):
        return self.last_seen.geturl()

    def save_cookies(self, path):
        '''guarda los cookies en memoria al disco'''
        '''path es el directorio'''
        self.cookie_j.save(path, ignore_discard=True, ignore_expires=True)

    def load_cookies(self, path):
        '''carga cookies del disco a la memoria'''
        '''path es el directorio'''
        self.cookie_j.load(path, ignore_discard=True, ignore_expires=True)

    def print_cookies(self):
        for cookie in self.cookie_j:
            print cookie.name, cookie.value

Esempio n. 8

Mostra file

File: default.py Progetto: marki555/plugin.video.markiza.sk

def LIVE(url, relogin=False):
    if not (settings['username'] and settings['password']):
        xbmcgui.Dialog().ok('Chyba', 'Nastavte prosím moja.markiza.sk konto',
                            '', '')
        xbmcplugin.setResolvedUrl(int(sys.argv[1]), False, xbmcgui.ListItem())
        raise RuntimeError
    cj = MozillaCookieJar()
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    if not relogin:
        try:
            cj.load(cookiepath)
        except IOError:
            relogin = True
    if relogin:
        response = opener.open(loginurl).read()
        token = re.search(r'name=\"_token_\" value=\"(\S+?)\">',
                          response).group(1)
        logindata = urllib.urlencode({
            'email': settings['username'],
            'password': settings['password'],
            '_token_': token,
            '_do': 'content1-loginForm-form-submit'
        }) + '&login=Prihl%C3%A1si%C5%A5+sa'
        opener.open(loginurl, logindata)
        log('Saving cookies')
        cj.save(cookiepath)

    response = opener.open(url).read()
    link = re.search(r'<iframe src=\"(\S+?)\"', response).group(
        1)  #https://videoarchiv.markiza.sk/api/v1/user/live
    link = link.replace('&amp;', '&')
    response = opener.open(link).read()
    if '<iframe src=\"' not in response:  #handle expired cookies
        if relogin:
            xbmcgui.Dialog().ok('Chyba', 'Skontrolujte prihlasovacie údaje',
                                '', '')
            raise RuntimeError  # loop protection
        else:
            LIVE(url, relogin=True)
            return
    opener.addheaders = [('Referer', link)]
    link = re.search(r'<iframe src=\"(\S+?)\"',
                     response).group(1)  #https://media.cms.markiza.sk/embed/
    response = opener.open(link).read()
    if '<title>Error</title>' in response:
        error = re.search('<h2 class="e-title">(.*?)</h2>', response).group(
            1)  #Video nie je dostupné vo vašej krajine
        xbmcgui.Dialog().ok('Chyba', error, '', '')
        raise RuntimeError
    link = re.search(r'\"hls\": \"(\S+?)\"', response).group(
        1)  #https://h1-s6.c.markiza.sk/hls/markiza-sd-master.m3u8
    response = opener.open(link).read()

    cookies = '|Cookie='
    for cookie in cj:
        cookies += cookie.name + '=' + cookie.value + ';'
    cookies = cookies[:-1]
    play_item = xbmcgui.ListItem(path=link + cookies)
    xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, listitem=play_item)

Esempio n. 9

Mostra file

File: basebrowser.py Progetto: itolosa/transantiago_api

class WebBrowser(object):
    '''mantiene en memoria las cookies, emulando un navegador
       *actualmente no ejecuta javascript'''
    def __init__(self, uAgent=None, headers=None):
        '''uAgent es el agente de usuario'''
        self.cookie_j = MozillaCookieJar()
        if uAgent is None:
            uAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36'
        self.opener = build_opener(HTTPCookieProcessor(self.cookie_j))
        self.user_agent = uAgent
        self.opener.addheaders = [('User-Agent', self.user_agent)]
        # self.session = requests.Session()
        # self.session.headers.update({ 'User-Agent': uAgent })
        # self.session.max_redirects = 20
        self.timeout = 25
        socket.setdefaulttimeout(self.timeout)

    def newtree(f):
        return lambda *a, **k: etree.parse(f(*a, **k), parser=etree.HTMLParser())

    @newtree
    def fetch(self, url, data=None, headers=None, method='POST'):
        '''obtiene los datos de una pagina web, ingresada en url
           para enviar datos por post, pasar codificados por data'''
        if headers:
            self.opener.addheaders = headers

        if not (data == None or type(data) == str):
            data = urllib.urlencode(data)

        if method == 'POST':
            # self.last_seen = self.session.post(url, data=data)
            self.last_seen = self.opener.open(url, data)
        elif method == 'GET':
            #self.last_seen = self.session.get(url + '?' + data)
            if data is None:
                self.last_seen = self.opener.open(url)
            else:
                self.last_seen = self.opener.open(url + '?' + data)
        else:
            raise Exception
        return self.last_seen

    def geturl(self):
        return self.last_seen.geturl()

    def save_cookies(self, path):
        '''guarda los cookies en memoria al disco'''
        '''path es el directorio'''
        self.cookie_j.save(path, ignore_discard=True, ignore_expires=True)

    def load_cookies(self, path):
        '''carga cookies del disco a la memoria'''
        '''path es el directorio'''
        self.cookie_j.load(path, ignore_discard=True, ignore_expires=True)

    def print_cookies(self):
        for cookie in self.cookie_j:
            print cookie.name, cookie.value

Esempio n. 10

Mostra file

def main(*args):

    # Populate our options, -h/--help is already there for you.
    usage = "usage: %prog [options] URL"
    optp = optparse.OptionParser(usage=usage)
    optp.add_option("-u", "--username",
                    help="the username to login as.")
    optp.add_option("-d", "--storedir", dest="store_dir",
                     help="the directory to store the certificate/key and \
                     config file",
                     metavar="DIR",
                     default=path.join(homedir, ".shibboleth"))
    optp.add_option("-i", "--idp",
                    help="unique ID of the IdP used to log in")
    optp.add_option('-v', '--verbose', dest='verbose', action='count',
                    help="Increase verbosity (specify multiple times for more)")
    # Parse the arguments (defaults to parsing sys.argv).
    opts, args = optp.parse_args()

    # Here would be a good place to check what came in on the command line and
    # call optp.error("Useful message") to exit if all it not well.

    log_level = logging.WARNING # default
    if opts.verbose == 1:
        log_level = logging.INFO
    elif opts.verbose >= 2:
        log_level = logging.DEBUG

    # Set up basic configuration, out to stderr with a reasonable default format.
    logging.basicConfig(level=log_level)

    if not args:
        optp.print_help()
        return

    if not path.exists(opts.store_dir):
        os.mkdir(opts.store_dir)

    sp = args[0]


    idp = Idp(opts.idp)
    c = CredentialManager()
    if opts.username:
        c.username = opts.username

    # if the cookies file exists load it
    cookies_file = path.join(opts.store_dir, 'cookies.txt')
    cj = MozillaCookieJar(filename=cookies_file)
    if path.exists(cookies_file):
        cj.load()

    shibboleth = Shibboleth(idp, c, cj)
    shibboleth.openurl(sp)
    print("Successfully authenticated to %s" % sp)

    cj.save()

Esempio n. 11

Mostra file

File: shiblogout.py Progetto: grith/sibboleth

def main(*args):

    # Populate our options, -h/--help is already there for you.
    usage = "usage: %prog [options] URL"
    optp = optparse.OptionParser(usage=usage)
    optp.add_option("-d", "--storedir", dest="store_dir",
                     help="the directory to store the certificate/key and \
                     config file",
                     metavar="DIR",
                     default=path.join(homedir, ".shibboleth"))
    optp.add_option('-v', '--verbose', dest='verbose', action='count',
            help="Increase verbosity (specify multiple times for more)")
    # Parse the arguments (defaults to parsing sys.argv).
    opts, args = optp.parse_args()

    # Here would be a good place to check what came in on the command line and
    # call optp.error("Useful message") to exit if all it not well.

    log_level = logging.WARNING  # default
    if opts.verbose == 1:
        log_level = logging.INFO
    elif opts.verbose >= 2:
        log_level = logging.DEBUG

    # Set up basic configuration, out to stderr with a reasonable
    # default format.
    logging.basicConfig(level=log_level)

    if not path.exists(opts.store_dir):
        os.mkdir(opts.store_dir)

    if args:
        sp = args[0]

    # if the cookies file exists load it
    cookies_file = path.join(opts.store_dir, 'cookies.txt')
    cj = MozillaCookieJar(filename=cookies_file)
    if path.exists(cookies_file):
        cj.load()

    logout_urls = []
    for cookie in cj:
        if cookie.name.startswith('_shibsession_') or \
               cookie.name.startswith('_shibstate_'):
            logout_urls.append(
                "https://%s/Shibboleth.sso/Logout" % cookie.domain)

    logout_urls = list(set(logout_urls))

    opener = urllib2.build_opener(HTTPCookieProcessor(cookiejar=cj))
    for url in logout_urls:
        request = urllib2.Request(url)
        log.debug("GET: %s" % request.get_full_url())
        response = opener.open(request)

    cj.save()

Esempio n. 12

Mostra file

File: login_auto.py Progetto: ty9065/myspider

def load_cookies3():
    """ 加载 cookie：cookies.txt -> load()  —— MozillaCookieJar格式
    """
    save_to_txt()
    cj = MozillaCookieJar()
    cj.load('localCookiesMoz.txt', ignore_discard=True,
            ignore_expires=True)  # 这里必须将参数置为True，否则登录失败
    for index, cookie in enumerate(cj):  # 显示cookies
        print('[', index, ']', cookie)
    return cj

Esempio n. 13

Mostra file

File: utility.py Progetto: Akkariin/PixivRss

def Get( url, data = '', refer = 'http://www.pixiv.net/', retry = 3 ):
    global ABS_PATH

    cj = MozillaCookieJar( ABS_PATH + 'pixiv.cookie.txt' )

    try :
        cj.load( ABS_PATH + 'pixiv.cookie.txt' )
    except:
        pass # 还没有cookie只好拉倒咯

    ckproc = urllib2.HTTPCookieProcessor( cj )

    opener = urllib2.build_opener( ckproc )
    opener.addheaders = [
        ('Accept', '*/*'),
        ('Accept-Language', 'zh-CN,zh;q=0.8'),
        ('Accept-Charset', 'UTF-8,*;q=0.5'),
        ('Accept-Encoding', 'gzip,deflate'),
        ('User-Agent', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31'),
        ('Referer', refer)
    ]

    # 防止海外访问weibo变英文版
    if 'weibo.com' in url:
        opener.addheaders = [('Cookie', 'lang=zh-cn; SUB=Af3TZPWScES9bnItTjr2Ahd5zd6Niw2rzxab0hB4mX3uLwL2MikEk1FZIrAi5RvgAfCWhPyBL4jbuHRggucLT4hUQowTTAZ0ta7TYSBaNttSmZr6c7UIFYgtxRirRyJ6Ww%3D%3D; UV5PAGE=usr512_114; UV5=usrmdins311164')]

    debug('Network: url - ' + url)

    try:
        # 发出请求
        if data != '':
            debug('Network: post')
            debug(data)
            request = urllib2.Request( url = url, data = data )
            res = opener.open( request, timeout = 15 )
            cj.save() # 只有在post时才保存新获得的cookie
        else:
            debug('Network: get')
            res = opener.open( url, timeout = 15 )

        debug('Network: Status Code - ' + str(res.getcode()))

        return GetContent( res )

    except Exception, e:
        # 自动重试，每张图最多3次
        if retry > 0:
            return Get( url, data, refer, retry-1 )
        else:
            log(e, 'Error: unable to get %s' % url)
            return False

Esempio n. 14

Mostra file

File: nrk.py Progetto: stain/scrapbook

class NRK:
    def __init__(self):
        policy = DefaultCookiePolicy(
            rfc2965=True, strict_ns_domain=DefaultCookiePolicy.DomainStrict)
        self.cj = MozillaCookieJar(".cookies", policy)

        try:
            self.cj.load()
        except IOError, e:
            if e.errno != 2:
                raise e
            # else: Ignore "File not found"    
        self.opener = build_opener(HTTPCookieProcessor(self.cj))
        self.init()
        #self.login()
        self.setspeed()

Esempio n. 15

Mostra file

File: test_login.py Progetto: jacob22/accounting

    def login_test(self, provider):
        with self.app.test_request_context('https://localhost.admin.eutaxia.eu:5000/login',
                                           base_url='https://localhost.admin.eutaxia.eu:5000/'):
            resp = oauth.authorize(provider)
            assert resp.status_code == 302
            location = resp.headers['Location']
            session_data = dict(flask.session)

        cj = MozillaCookieJar(os.path.join(os.path.dirname(__file__), 'cookies.%s.txt' % provider))
        cj.load()

        class NoRedirectHandler(HTTPRedirectHandler):

            def redirect_request(self, req, fp, code, msg, hdrs, newurl):
                if newurl.startswith('https://localhost.admin.eutaxia.eu:5000/login/%s' % provider):
                    raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
                return HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, hdrs, newurl)

        opener = build_opener(HTTPCookieProcessor(cj),
                              NoRedirectHandler())

        try:
            res = opener.open(location)
        except HTTPError as err:
            assert err.code == 302
            url = err.hdrs['Location']
            assert url.startswith('https://localhost.admin.eutaxia.eu:5000/login/%s' % provider)
        else:
            if provider == 'windowslive':
                # Unfortunately we can't configure Windows Live to accept two separate
                # redirect URLs
                return
            else:
                assert False, 'Wrong redirect'

        with self.app.test_client() as c:
            with c.session_transaction() as session:
                session.update(session_data)

            query_string = urlparse(url).query
            resp = c.get('/login/%s' % provider, query_string=query_string)
            assert resp.status_code == 666

Esempio n. 16

Mostra file

File: kiln.py Progetto: szechyjs/dotfiles

def check_kilnauth_token(ui, url):
    cookiepath = _get_path('hgcookies')
    if (not os.path.exists(cookiepath)) or (not os.path.isdir(cookiepath)):
        return ''
    cookiepath = os.path.join(cookiepath, md5(get_username(get_dest(ui))).hexdigest())

    try:
        if not os.path.exists(cookiepath):
            return ''
        cj = MozillaCookieJar(cookiepath)
    except IOError:
        return ''

    domain = get_domain(url)

    cj.load(ignore_discard=True, ignore_expires=True)
    for cookie in cj:
        if domain == cookie.domain:
            if cookie.name == 'fbToken':
                return cookie.value

Esempio n. 17

Mostra file

def get_cookie(uname,pword,cookie_jar_path):
    print(cookie_jar_path)
    if os.path.isfile(cookie_jar_path):
       cookie_jar = MozillaCookieJar()
       cookie_jar.load(cookie_jar_path)
 
       ## make sure cookie is still valid
       print('*******************  FIRST COOKIE Check  ******************')
       if check_cookie(cookie_jar):
          print(" > Re-using previous cookie jar.")
          print('*******************  END FIRST COOKIE Check  ******************')
          return cookie_jar
       else:
          print(" > Could not validate old cookie Jar")
          cookie_jar = get_new_cookie(uname,pword,cookie_jar_path)
          check_cookie(cookie_jar)
    else:
        print('Could not find existing cookie jar -- Creating a new one')
        cookie_jar = get_new_cookie(uname,pword,cookie_jar_path)

    return cookie_jar

Esempio n. 18

Mostra file

File: kiln.py Progetto: istruble/dotfiles

def check_kilnauth_token(ui, url):
    cookiepath = _get_path('hgcookies')
    if (not os.path.exists(cookiepath)) or (not os.path.isdir(cookiepath)):
        return ''
    cookiepath = os.path.join(cookiepath,
                              md5(get_username(get_dest(ui))).hexdigest())

    try:
        if not os.path.exists(cookiepath):
            return ''
        cj = MozillaCookieJar(cookiepath)
    except IOError:
        return ''

    domain = get_domain(url)

    cj.load(ignore_discard=True, ignore_expires=True)
    for cookie in cj:
        if domain == cookie.domain:
            if cookie.name == 'fbToken':
                return cookie.value

Esempio n. 19

Mostra file

    def __init__(self,
                 mobile,
                 password=None,
                 status='0',
                 cachefile='Fetion.cache',
                 cookiesfile=''):
        '''登录状态：
        在线：400 隐身：0 忙碌：600 离开：100
        '''
        if cachefile:
            self.cache = Cache(cachefile)

        if not cookiesfile:
            cookiesfile = '%s.cookies' % mobile

        # try:
        # with open(cookiesfile, 'rb') as f:
        # cookie_processor = load(f)
        # except:
        # cookie_processor = HTTPCookieProcessor(CookieJar())
        cookiejar = MozillaCookieJar(filename=cookiesfile)
        try:
            f = open(cookiesfile)
        except IOError:
            f = open(cookiesfile, 'w')
            f.write(MozillaCookieJar.header)
        finally:
            f.close()
        cookiejar.load(filename=cookiesfile)
        cookie_processor = HTTPCookieProcessor(cookiejar)
        self.opener = build_opener(cookie_processor, HTTPHandler)
        self.mobile, self.password = mobile, password
        if not self.alive():
            if self._login(): cookiejar.save()

        #dump(cookie_processor, open(cookiesfile, 'wb'))
        self.changestatus(status)

Esempio n. 20

Mostra file

File: download-all-2020-01-23_23-11-03.py Progetto: jdilger/ASF_GEE

class bulk_downloader:
    def __init__(self):
        # List of files to download
        self.files = [
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20200110T101421_20200110T101446_019753_025598_C902-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191229T101421_20191229T101446_019578_025007_DB2A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191217T101422_20191217T101447_019403_024A73_D2A9-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191205T101422_20191205T101447_019228_0244DD_9778-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191123T101423_20191123T101448_019053_023F55_95B6-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191111T101423_20191111T101448_018878_0239B4_3FCF-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191030T101423_20191030T101448_018703_02340F_3D8D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191018T101423_20191018T101448_018528_022E97_0AEB-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191006T101423_20191006T101448_018353_022937_B959-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190912T101422_20190912T101447_018003_021E50_B3FB-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190831T101421_20190831T101446_017828_0218D8_1ADE-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190819T101421_20190819T101446_017653_021365_B751-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190807T101420_20190807T101445_017478_020DEF_A757-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20190801T101514_20190801T101539_028374_0334DB_E6C0-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20190801T101449_20190801T101514_028374_0334DB_6CA1-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190726T101419_20190726T101444_017303_0208A8_2D9C-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190714T101419_20190714T101444_017128_020394_A8B6-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190702T101418_20190702T101443_016953_01FE6B_BE7D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190620T101417_20190620T101442_016778_01F93E_D609-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190608T101416_20190608T101441_016603_01F407_282F-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190527T101416_20190527T101441_016428_01EECF_79D2-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190515T101415_20190515T101440_016253_01E971_7A00-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190503T101415_20190503T101440_016078_01E3E6_D149-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190421T101414_20190421T101439_015903_01DE0C_E919-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190409T101414_20190409T101439_015728_01D843_E7B3-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190328T101413_20190328T101438_015553_01D27A_7404-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190316T101413_20190316T101438_015378_01CCBE_781F-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190304T101413_20190304T101438_015203_01C713_17EF-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190220T101413_20190220T101438_015028_01C151_EA49-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190208T101413_20190208T101438_014853_01BB8C_940D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190127T101414_20190127T101439_014678_01B5D2_3B0A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190115T101414_20190115T101439_014503_01B03A_4439-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190103T101414_20190103T101439_014328_01AA92_7D9B-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181222T101415_20181222T101440_014153_01A4CF_3F05-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181210T101415_20181210T101440_013978_019F03_1C29-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181128T101416_20181128T101441_013803_01995A_6DD3-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181116T101416_20181116T101441_013628_0193C1_FE12-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181104T101416_20181104T101441_013453_018E4D_0014-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181023T101420_20181023T101445_013278_0188CC_5952-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181023T101355_20181023T101420_013278_0188CC_0FA6-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181011T101417_20181011T101442_013103_01835D_D0A0-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180929T101416_20180929T101441_012928_017E0F_226F-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180917T101416_20180917T101441_012753_0178B3_B66A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180905T101415_20180905T101440_012578_017358_3259-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180824T101415_20180824T101440_012403_016DE5_85C3-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180812T101414_20180812T101439_012228_01687D_BCA9-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180731T101414_20180731T101439_012053_01631A_ADBC-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180719T101413_20180719T101438_011878_015DD1_3E69-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180707T101412_20180707T101437_011703_015872_5055-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180625T101411_20180625T101436_011528_015300_5709-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180613T101411_20180613T101436_011353_014D8E_1799-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180601T101410_20180601T101435_011178_014821_B178-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180520T101409_20180520T101434_011003_014273_5667-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180508T101408_20180508T101433_010828_013CCB_18C3-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180426T101408_20180426T101433_010653_013720_457C-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180414T101407_20180414T101432_010478_01318E_FB0A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180402T101407_20180402T101432_010303_012BEA_9E94-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180321T101406_20180321T101431_010128_012640_6D69-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180309T101406_20180309T101431_009953_01208C_4F7B-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180225T101406_20180225T101431_009778_011AAD_2181-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180213T101407_20180213T101432_009603_0114ED_D868-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180201T101407_20180201T101432_009428_010F21_C8FA-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180120T101407_20180120T101432_009253_010968_4DBE-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180108T101408_20180108T101433_009078_0103B1_EB1D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171227T101408_20171227T101433_008903_00FDFF_A4F1-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171215T101409_20171215T101434_008728_00F863_906F-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171203T101409_20171203T101434_008553_00F2D6_B8D7-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171121T101409_20171121T101434_008378_00ED57_D6D0-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171028T101410_20171028T101435_008028_00E2F3_6DFC-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171016T101410_20171016T101435_007853_00DDE1_829D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171004T101409_20171004T101434_007678_00D8F4_5C9C-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170922T101409_20170922T101434_007503_00D3F7_9FEC-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170910T101409_20170910T101434_007328_00CED7_2D8E-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170829T101408_20170829T101433_007153_00C9B8_96C9-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170817T101408_20170817T101433_006978_00C4A9_5D92-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170805T101407_20170805T101432_006803_00BF8B_4F73-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170724T101407_20170724T101432_006628_00BA88_2017-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170712T101406_20170712T101431_006453_00B58B_7674-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170630T101405_20170630T101430_006278_00B098_CAC7-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170618T101404_20170618T101429_006103_00AB89_7D52-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170606T101404_20170606T101429_005928_00A666_6411-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170525T101403_20170525T101428_005753_00A14F_A827-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170513T101402_20170513T101427_005578_009C52_4E38-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170501T101402_20170501T101427_005403_009788_B5E9-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170419T101401_20170419T101426_005228_009270_5637-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170407T101401_20170407T101426_005053_008D67_BB68-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170326T101400_20170326T101425_004878_008859_36E8-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170314T101400_20170314T101425_004703_008359_7A42-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170302T101400_20170302T101425_004528_007E2B_F8A2-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170206T101400_20170206T101425_004178_0073C4_69B1-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170113T101401_20170113T101426_003828_00695B_0B49-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20161220T101403_20161220T101428_003478_005F1B_E6DD-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20161126T101403_20161126T101428_003128_005520_5BBB-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20161102T101404_20161102T101429_002778_004B45_F931-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20161009T101404_20161009T101429_002428_00419A_2FD0-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20160927T101404_20160927T101429_002253_003CAB_BC6E-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160909T101423_20160909T101448_012974_01487C_40C0-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160909T101448_20160909T101513_012974_01487C_E55B-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160816T101434_20160816T101503_012624_013CE1_AA51-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160723T101444_20160723T101513_012274_013152_9A67-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160629T101426_20160629T101455_011924_0125E4_7F71-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160629T101455_20160629T101520_011924_0125E4_D66A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160605T101440_20160605T101505_011574_011AE7_0C49-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160512T101439_20160512T101504_011224_010F91_FCE1-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160418T101435_20160418T101500_010874_01048F_89B1-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160325T101434_20160325T101459_010524_00FA2B_4EAD-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160301T101434_20160301T101459_010174_00F035_3B54-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160206T101440_20160206T101505_009824_00E617_C31E-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160206T101415_20160206T101440_009824_00E617_D79A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160113T101434_20160113T101459_009474_00DBEE_5DBA-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20151220T101435_20151220T101500_009124_00D1EE_CFED-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20151102T101442_20151102T101507_008424_00BE79_E2E7-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20151009T101442_20151009T101507_008074_00B50C_12FD-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150915T101441_20150915T101506_007724_00ABB3_226C-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150822T101441_20150822T101506_007374_00A234_599D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150729T101439_20150729T101504_007024_0098B2_E48E-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150705T101438_20150705T101503_006674_008EB2_3496-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20150518T101442_20150518T101507_005974_007B3F_EFEC-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20150518T101417_20150518T101442_005974_007B3F_DF42-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150424T101426_20150424T101451_005624_00734A_AD5A-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150331T101444_20150331T101509_005274_006AB8_213D-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150331T101419_20150331T101444_005274_006AB8_EBF3-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150307T101444_20150307T101509_004924_006269_7919-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150307T101419_20150307T101444_004924_006269_9089-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150211T101443_20150211T101508_004574_005A0A_8FEE-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150211T101418_20150211T101443_004574_005A0A_4D0C-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150118T101444_20150118T101509_004224_00522A_42D5-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150118T101419_20150118T101444_004224_00522A_26FE-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141225T101439_20141225T101504_003874_004A4B_D1FC-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141225T101414_20141225T101439_003874_004A4B_367E-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141201T101440_20141201T101505_003524_004254_556F-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141201T101415_20141201T101440_003524_004254_5C25-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141107T101441_20141107T101506_003174_003A78_745C-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141107T101416_20141107T101441_003174_003A78_5D83-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141014T101419_20141014T101444_002824_0032F1_B89E-PREDORB-10m-power-filt-rtc-gamma.zip",
            "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141014T101444_20141014T101509_002824_0032F1_B54D-PREDORB-10m-power-filt-rtc-gamma.zip"
        ]

        # Local stash of cookies so we don't always have to ask
        self.cookie_jar_path = os.path.join(os.path.expanduser('~'),
                                            ".bulk_download_cookiejar.txt")
        self.cookie_jar = None

        self.asf_urs4 = {
            'url': 'https://urs.earthdata.nasa.gov/oauth/authorize',
            'client': 'BO_n7nTIlMljdvU6kRRB3g',
            'redir': 'https://auth.asf.alaska.edu/login'
        }

        # Make sure we can write it our current directory
        if os.access(os.getcwd(), os.W_OK) is False:
            print(
                "WARNING: Cannot write to current path! Check permissions for {0}"
                .format(os.getcwd()))
            exit(-1)

        # For SSL
        self.context = {}

        # Check if user handed in a Metalink or CSV:
        if len(sys.argv) > 0:
            download_files = []
            input_files = []
            for arg in sys.argv[1:]:
                if arg == '--insecure':
                    try:
                        ctx = ssl.create_default_context()
                        ctx.check_hostname = False
                        ctx.verify_mode = ssl.CERT_NONE
                        self.context['context'] = ctx
                    except AttributeError:
                        # Python 2.6 won't complain about SSL Validation
                        pass

                elif arg.endswith('.metalink') or arg.endswith('.csv'):
                    if os.path.isfile(arg):
                        input_files.append(arg)
                        if arg.endswith('.metalink'):
                            new_files = self.process_metalink(arg)
                        else:
                            new_files = self.process_csv(arg)
                        if new_files is not None:
                            for file_url in (new_files):
                                download_files.append(file_url)
                    else:
                        print(
                            " > I cannot find the input file you specified: {0}"
                            .format(arg))
                else:
                    print(
                        " > Command line argument '{0}' makes no sense, ignoring."
                        .format(arg))

            if len(input_files) > 0:
                if len(download_files) > 0:
                    print(" > Processing {0} downloads from {1} input files. ".
                          format(len(download_files), len(input_files)))
                    self.files = download_files
                else:
                    print(
                        " > I see you asked me to download files from {0} input files, but they had no downloads!"
                        .format(len(input_files)))
                    print(" > I'm super confused and exiting.")
                    exit(-1)

        # Make sure cookie_jar is good to go!
        self.get_cookie()

        # summary
        self.total_bytes = 0
        self.total_time = 0
        self.cnt = 0
        self.success = []
        self.failed = []
        self.skipped = []

    # Get and validate a cookie
    def get_cookie(self):
        if os.path.isfile(self.cookie_jar_path):
            self.cookie_jar = MozillaCookieJar()
            self.cookie_jar.load(self.cookie_jar_path)

            # make sure cookie is still valid
            if self.check_cookie():
                print(" > Re-using previous cookie jar.")
                return True
            else:
                print(" > Could not validate old cookie Jar")

        # We don't have a valid cookie, prompt user or creds
        print(
            "No existing URS cookie found, please enter Earthdata username & password:"******"(Credentials will not be stored, saved or logged anywhere)")

        # Keep trying 'till user gets the right U:P
        while self.check_cookie() is False:
            self.get_new_cookie()

        return True

    # Validate cookie before we begin
    def check_cookie(self):

        if self.cookie_jar is None:
            print(" > Cookiejar is bunk: {0}".format(self.cookie_jar))
            return False

        # File we know is valid, used to validate cookie
        file_check = 'https://urs.earthdata.nasa.gov/profile'

        # Apply custom Redirect Hanlder
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                              HTTPHandler(), HTTPSHandler(**self.context))
        install_opener(opener)

        # Attempt a HEAD request
        request = Request(file_check)
        request.get_method = lambda: 'HEAD'
        try:
            print(" > attempting to download {0}".format(file_check))
            response = urlopen(request, timeout=30)
            resp_code = response.getcode()
            # Make sure we're logged in
            if not self.check_cookie_is_logged_in(self.cookie_jar):
                return False

            # Save cookiejar
            self.cookie_jar.save(self.cookie_jar_path)

        except HTTPError:
            # If we ge this error, again, it likely means the user has not agreed to current EULA
            print("\nIMPORTANT: ")
            print(
                "Your user appears to lack permissions to download data from the ASF Datapool."
            )
            print(
                "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
            )
            exit(-1)

        # This return codes indicate the USER has not been approved to download the data
        if resp_code in (300, 301, 302, 303):
            try:
                redir_url = response.info().getheader('Location')
            except AttributeError:
                redir_url = response.getheader('Location')

            #Funky Test env:
            if ("vertex-retired.daac.asf.alaska.edu" in redir_url
                    and "test" in self.asf_urs4['redir']):
                print("Cough, cough. It's dusty in this test env!")
                return True

            print("Redirect ({0}) occured, invalid cookie value!".format(
                resp_code))
            return False

        # These are successes!
        if resp_code in (200, 307):
            return True

        return False

    def get_new_cookie(self):
        # Start by prompting user to input their credentials

        # Another Python2/3 workaround
        try:
            new_username = raw_input("Username: "******"Username: "******"Password (will not be displayed): ")

        # Build URS4 Cookie request
        auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4[
            'client'] + '&redirect_uri=' + self.asf_urs4[
                'redir'] + '&response_type=code&state='

        try:
            #python2
            user_pass = base64.b64encode(
                bytes(new_username + ":" + new_password))
        except TypeError:
            #python3
            user_pass = base64.b64encode(
                bytes(new_username + ":" + new_password, "utf-8"))
            user_pass = user_pass.decode("utf-8")

        # Authenticate against URS, grab all the cookies
        self.cookie_jar = MozillaCookieJar()
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                              HTTPHandler(), HTTPSHandler(**self.context))
        request = Request(
            auth_cookie_url,
            headers={"Authorization": "Basic {0}".format(user_pass)})

        # Watch out cookie rejection!
        try:
            response = opener.open(request)
        except HTTPError as e:
            if "WWW-Authenticate" in e.headers and "Please enter your Earthdata Login credentials" in e.headers[
                    "WWW-Authenticate"]:
                print(
                    " > Username and Password combo was not successful. Please try again."
                )
                return False
            else:
                # If an error happens here, the user most likely has not confirmed EULA.
                print(
                    "\nIMPORTANT: There was an error obtaining a download cookie!"
                )
                print(
                    "Your user appears to lack permission to download data from the ASF Datapool."
                )
                print(
                    "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
                )
                exit(-1)
        except URLError as e:
            print(
                "\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. "
            )
            print("Try cookie generation later.")
            exit(-1)

        # Did we get a cookie?
        if self.check_cookie_is_logged_in(self.cookie_jar):
            #COOKIE SUCCESS!
            self.cookie_jar.save(self.cookie_jar_path)
            return True

        # if we aren't successful generating the cookie, nothing will work. Stop here!
        print(
            "WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again."
        )
        print("Response was {0}.".format(response.getcode()))
        print(
            "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
        )
        exit(-1)

    # make sure we're logged into URS
    def check_cookie_is_logged_in(self, cj):
        for cookie in cj:
            if cookie.name == 'urs_user_already_logged':
                # Only get this cookie if we logged in successfully!
                return True

        return False

    # Download the file
    def download_file_with_cookiejar(self,
                                     url,
                                     file_count,
                                     total,
                                     recursion=False):
        # see if we've already download this file and if it is that it is the correct size
        download_file = os.path.basename(url).split('?')[0]
        if os.path.isfile(download_file):
            try:
                request = Request(url)
                request.get_method = lambda: 'HEAD'
                response = urlopen(request, timeout=30)
                remote_size = self.get_total_size(response)
                # Check that we were able to derive a size.
                if remote_size:
                    local_size = os.path.getsize(download_file)
                    if remote_size < (local_size +
                                      (local_size * .01)) and remote_size > (
                                          local_size - (local_size * .01)):
                        print(
                            " > Download file {0} exists! \n > Skipping download of {1}. "
                            .format(download_file, url))
                        return None, None
                    #partial file size wasn't full file size, lets blow away the chunk and start again
                    print(
                        " > Found {0} but it wasn't fully downloaded. Removing file and downloading again."
                        .format(download_file))
                    os.remove(download_file)

            except ssl.CertificateError as e:
                print(" > ERROR: {0}".format(e))
                print(
                    " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag"
                )
                return False, None

            except HTTPError as e:
                if e.code == 401:
                    print(
                        " > IMPORTANT: Your user may not have permission to download this type of data!"
                    )
                else:
                    print(" > Unknown Error, Could not get file HEAD: {0}".
                          format(e))

            except URLError as e:
                print("URL Error (from HEAD): {0}, {1}".format(e.reason, url))
                if "ssl.c" in "{0}".format(e.reason):
                    print(
                        "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error."
                    )
                return False, None

        # attempt https connection
        try:
            request = Request(url)
            response = urlopen(request, timeout=30)

            # Watch for redirect
            if response.geturl() != url:

                # See if we were redirect BACK to URS for re-auth.
                if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl(
                ):

                    if recursion:
                        print(
                            " > Entering seemingly endless auth loop. Aborting. "
                        )
                        return False, None

                    # make this easier. If there is no app_type=401, add it
                    new_auth_url = response.geturl()
                    if "app_type" not in new_auth_url:
                        new_auth_url += "&app_type=401"

                    print(
                        " > While attempting to download {0}....".format(url))
                    print(" > Need to obtain new cookie from {0}".format(
                        new_auth_url))
                    old_cookies = [cookie.name for cookie in self.cookie_jar]
                    opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                                          HTTPHandler(),
                                          HTTPSHandler(**self.context))
                    request = Request(new_auth_url)
                    try:
                        response = opener.open(request)
                        for cookie in self.cookie_jar:
                            if cookie.name not in old_cookies:
                                print(" > Saved new cookie: {0}".format(
                                    cookie.name))

                                # A little hack to save session cookies
                                if cookie.discard:
                                    cookie.expires = int(
                                        time.time()) + 60 * 60 * 24 * 30
                                    print(
                                        " > Saving session Cookie that should have been discarded! "
                                    )

                        self.cookie_jar.save(self.cookie_jar_path,
                                             ignore_discard=True,
                                             ignore_expires=True)
                    except HTTPError as e:
                        print("HTTP Error: {0}, {1}".format(e.code, url))
                        return False, None

                    # Okay, now we have more cookies! Lets try again, recursively!
                    print(" > Attempting download again with new cookies!")
                    return self.download_file_with_cookiejar(url,
                                                             file_count,
                                                             total,
                                                             recursion=True)

                print(
                    " > 'Temporary' Redirect download @ Remote archive:\n > {0}"
                    .format(response.geturl()))

            # seems to be working
            print("({0}/{1}) Downloading {2}".format(file_count, total, url))

            # Open our local file for writing and build status bar
            tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.')
            self.chunk_read(response, tf, report_hook=self.chunk_report)

            # Reset download status
            sys.stdout.write('\n')

            tempfile_name = tf.name
            tf.close()

        #handle errors
        except HTTPError as e:
            print("HTTP Error: {0}, {1}".format(e.code, url))

            if e.code == 401:
                print(
                    " > IMPORTANT: Your user does not have permission to download this type of data!"
                )

            if e.code == 403:
                print(" > Got a 403 Error trying to download this file.  ")
                print(
                    " > You MAY need to log in this app and agree to a EULA. ")

            return False, None

        except URLError as e:
            print("URL Error (from GET): {0}, {1}, {2}".format(
                e, e.reason, url))
            if "ssl.c" in "{0}".format(e.reason):
                print(
                    "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error."
                )
            return False, None

        except socket.timeout as e:
            print(" > timeout requesting: {0}; {1}".format(url, e))
            return False, None

        except ssl.CertificateError as e:
            print(" > ERROR: {0}".format(e))
            print(
                " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag"
            )
            return False, None

        # Return the file size
        shutil.copy(tempfile_name, download_file)
        os.remove(tempfile_name)
        file_size = self.get_total_size(response)
        actual_size = os.path.getsize(download_file)
        if file_size is None:
            # We were unable to calculate file size.
            file_size = actual_size
        return actual_size, file_size

    def get_redirect_url_from_error(self, error):
        find_redirect = re.compile(r"id=\"redir_link\"\s+href=\"(\S+)\"")
        print("error file was: {}".format(error))
        redirect_url = find_redirect.search(error)
        if redirect_url:
            print("Found: {0}".format(redirect_url.group(0)))
            return (redirect_url.group(0))

        return None

    #  chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    def chunk_report(self, bytes_so_far, file_size):
        if file_size is not None:
            percent = float(bytes_so_far) / file_size
            percent = round(percent * 100, 2)
            sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" %
                             (bytes_so_far, file_size, percent))
        else:
            # We couldn't figure out the size.
            sys.stdout.write(" > Downloaded %d of unknown Size\r" %
                             (bytes_so_far))

    #  chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    def chunk_read(self,
                   response,
                   local_file,
                   chunk_size=8192,
                   report_hook=None):
        file_size = self.get_total_size(response)
        bytes_so_far = 0

        while 1:
            try:
                chunk = response.read(chunk_size)
            except:
                sys.stdout.write("\n > There was an error reading data. \n")
                break

            try:
                local_file.write(chunk)
            except TypeError:
                local_file.write(chunk.decode(local_file.encoding))
            bytes_so_far += len(chunk)

            if not chunk:
                break

            if report_hook:
                report_hook(bytes_so_far, file_size)

        return bytes_so_far

    def get_total_size(self, response):
        try:
            file_size = response.info().getheader('Content-Length').strip()
        except AttributeError:
            try:
                file_size = response.getheader('Content-Length').strip()
            except AttributeError:
                print("> Problem getting size")
                return None

        return int(file_size)

    # Get download urls from a metalink file
    def process_metalink(self, ml_file):
        print("Processing metalink file: {0}".format(ml_file))
        with open(ml_file, 'r') as ml:
            xml = ml.read()

        # Hack to remove annoying namespace
        it = ET.iterparse(StringIO(xml))
        for _, el in it:
            if '}' in el.tag:
                el.tag = el.tag.split('}', 1)[1]  # strip all namespaces
        root = it.root

        dl_urls = []
        ml_files = root.find('files')
        for dl in ml_files:
            dl_urls.append(dl.find('resources').find('url').text)

        if len(dl_urls) > 0:
            return dl_urls
        else:
            return None

    # Get download urls from a csv file
    def process_csv(self, csv_file):
        print("Processing csv file: {0}".format(csv_file))

        dl_urls = []
        with open(csv_file, 'r') as csvf:
            try:
                csvr = csv.DictReader(csvf)
                for row in csvr:
                    dl_urls.append(row['URL'])
            except csv.Error as e:
                print(
                    "WARNING: Could not parse file %s, line %d: %s. Skipping."
                    % (csv_file, csvr.line_num, e))
                return None
            except KeyError as e:
                print(
                    "WARNING: Could not find URL column in file %s. Skipping."
                    % (csv_file))

        if len(dl_urls) > 0:
            return dl_urls
        else:
            return None

    # Download all the files in the list
    def download_files(self):
        for file_name in self.files:

            # make sure we haven't ctrl+c'd or some other abort trap
            if abort == True:
                raise SystemExit

            # download counter
            self.cnt += 1

            # set a timer
            start = time.time()

            # run download
            size, total_size = self.download_file_with_cookiejar(
                file_name, self.cnt, len(self.files))

            # calculte rate
            end = time.time()

            # stats:
            if size is None:
                self.skipped.append(file_name)
            # Check to see that the download didn't error and is the correct size
            elif size is not False and (total_size <
                                        (size + (size * .01)) and total_size >
                                        (size - (size * .01))):
                # Download was good!
                elapsed = end - start
                elapsed = 1.0 if elapsed < 1 else elapsed
                rate = (size / 1024**2) / elapsed

                print(
                    "Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec"
                    .format(size, elapsed, rate))

                # add up metrics
                self.total_bytes += size
                self.total_time += elapsed
                self.success.append({'file': file_name, 'size': size})

            else:
                print("There was a problem downloading {0}".format(file_name))
                self.failed.append(file_name)

    def print_summary(self):
        # Print summary:
        print("\n\nDownload Summary ")
        print(
            "--------------------------------------------------------------------------------"
        )
        print("  Successes: {0} files, {1} bytes ".format(
            len(self.success), self.total_bytes))
        for success_file in self.success:
            print("           - {0}  {1:.2f}MB".format(
                success_file['file'], (success_file['size'] / 1024.0**2)))
        if len(self.failed) > 0:
            print("  Failures: {0} files".format(len(self.failed)))
            for failed_file in self.failed:
                print("          - {0}".format(failed_file))
        if len(self.skipped) > 0:
            print("  Skipped: {0} files".format(len(self.skipped)))
            for skipped_file in self.skipped:
                print("          - {0}".format(skipped_file))
        if len(self.success) > 0:
            print("  Average Rate: {0:.2f}MB/sec".format(
                (self.total_bytes / 1024.0**2) / self.total_time))
        print(
            "--------------------------------------------------------------------------------"
        )

Esempio n. 21

Mostra file

File: bilibili.py Progetto: peiit/kodi_plugins

class Bilibili():
    name = u'哔哩哔哩 (Bilibili)'

    api_url = 'http://interface.bilibili.com/playurl?'
    bangumi_api_url = 'http://bangumi.bilibili.com/player/web_api/playurl?'
    SEC1 = '94aba54af9065f71de72f5508f1cd42e'
    SEC2 = '9b288147e5474dd2aa67085f716c560d'
    supported_stream_profile = [u'流畅', u'高清', u'超清']
    stream_types = [{
        'id': 'hdflv'
    }, {
        'id': 'flv'
    }, {
        'id': 'hdmp4'
    }, {
        'id': 'mp4'
    }, {
        'id': 'live'
    }, {
        'id': 'vc'
    }]
    fmt2qlt = dict(hdflv=4, flv=3, hdmp4=2, mp4=1)

    def __init__(self,
                 appkey=APPKEY,
                 appsecret=APPSECRET,
                 width=720,
                 height=480):
        self.defaultHeader = {'Referer': 'http://www.bilibili.com'}
        #self.defaultHeader = {}
        self.appkey = appkey
        self.appsecret = appsecret
        self.WIDTH = width
        self.HEIGHT = height
        self.is_login = False
        cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie'
        self.cj = MozillaCookieJar(cookie_path)
        if os.path.isfile(cookie_path):
            self.cj.load()
            key = None
            for ck in self.cj:
                if ck.name == 'DedeUserID':
                    key = ck.value
                    break
            if key is not None:
                self.is_login = True
                self.mid = str(key)
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
        urllib2.install_opener(opener)

        try:
            os.remove(self._get_tmp_dir() + '/tmp.ass')
        except:
            pass

    def _get_tmp_dir(self):
        try:
            return tempfile.gettempdir()
        except:
            return ''

    def get_captcha(self, path=None):
        key = None
        for ck in self.cj:
            if ck.name == 'sid':
                key = ck.value
                break

        if key is None:
            get_html(
                LOGIN_CAPTCHA_URL.format(random()),
                headers={'Referer': 'https://passport.bilibili.com/login'})
        result = get_html(
            LOGIN_CAPTCHA_URL.format(random()),
            decoded=False,
            headers={'Referer': 'https://passport.bilibili.com/login'})
        if path is None:
            path = tempfile.gettempdir() + '/captcha.jpg'
        with open(path, 'wb') as f:
            f.write(result)
        return path

    def get_encryped_pwd(self, pwd):
        import rsa
        result = loads(
            get_html(
                LOGIN_HASH_URL.format(random()),
                headers={'Referer': 'https://passport.bilibili.com/login'}))
        pwd = result['hash'] + pwd
        key = result['key']
        pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key)
        pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key)
        pwd = base64.b64encode(pwd)
        pwd = urllib.quote(pwd)
        return pwd

    def api_sign(self, params):
        params['appkey'] = self.appkey
        data = ''
        keys = params.keys()
        # must sorted.  urllib.urlencode(params) doesn't work
        keys.sort()
        for key in keys:
            data += '{}={}&'.format(key, urllib.quote(str(params[key])))

        data = data[:-1]  # remove last '&'
        if self.appsecret is None:
            return data
        m = hashlib.md5()
        m.update(data + self.appsecret)
        return data + '&sign=' + m.hexdigest()

    def get_category_from_web_page(self):
        category_dict = {'0': {'title': u'全部', 'url': HOME_URL}}
        node = category_dict['0']
        url = node['url']
        result = BeautifulSoup(get_html(url),
                               "html.parser").findAll('li', {'class': 'm-i'})
        for item in result:
            if len(item['class']) != 1:
                continue
            tid = item['data-tid']
            title = item.em.contents[0]
            url = 'http:' + item.a['href']
            category_dict[tid] = {'title': title, 'url': url}
            node['subs'].append(tid)

        #Fix video and movie
        if '11' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('11')
        if '23' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('23')
        category_dict['11'] = {
            'title': u'电视剧',
            'url': 'http://bangumi.bilibili.com/tv/'
        }
        category_dict['23'] = {
            'title': u'电影',
            'url': 'http://bangumi.bilibili.com/movie/'
        }

        for sub in category_dict['0']['subs']:
            node = category_dict[sub]
            url = node['url']
            result = BeautifulSoup(get_html(url),
                                   "html.parser").select('ul.n_num li')
            for item in result[1:]:
                if not item.has_attr('tid'):
                    continue
                if not hasattr(item, 'a'):
                    continue
                if item.has_attr('class'):
                    continue
                tid = item['tid']
                title = item.a.contents[0]
                if item.a['href'][:2] == '//':
                    url = 'http:' + item.a['href']
                else:
                    url = HOME_URL + item.a['href']
                category_dict[tid] = {'title': title, 'url': url}
                node['subs'].append(tid)
        return category_dict

    def get_category(self, tid='0'):
        items = [{tid: {'title': '全部', 'url': CATEGORY[tid]['url']}}]
        for sub in CATEGORY[tid]['subs']:
            items.append({sub: CATEGORY[sub]})
        return items

    def get_category_name(self, tid):
        return CATEGORY[str(tid)]['title']

    def get_order(self):
        return ORDER

    def get_category_by_tag(self, tag=0, tid=0, page=1, pagesize=20):
        if tag == 0:
            url = LIST_BY_ALL.format(tid, pagesize, page)
        else:
            url = LIST_BY_TAG.format(tag, tid, pagesize, page)

        results = loads(get_html(url))
        return results

    def get_category_list(self,
                          tid=0,
                          order='default',
                          days=30,
                          page=1,
                          pagesize=20):
        params = {
            'tid': tid,
            'order': order,
            'days': days,
            'page': page,
            'pagesize': pagesize
        }
        url = LIST_URL.format(self.api_sign(params))

        result = loads(get_html(url, headers=self.defaultHeader))
        results = []
        for i in range(pagesize):
            if result['list'].has_key(str(i)):
                results.append(result['list'][str(i)])
            else:
                continue
        return results, result['pages']

    def get_my_info(self):
        if self.is_login == False:
            return []
        result = loads(get_html(MY_INFO_URL))
        return result['data']

    def get_bangumi_chase(self, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data']['result'], result['data']['pages']

    def get_bangumi_detail(self, season_id):
        url = BANGUMI_SEASON_URL.format(season_id)
        result = get_html(url, headers=self.defaultHeader)
        if result[0] != '{':
            start = result.find('(') + 1
            end = result.find(');')
            result = result[start:end]
        result = loads(result)
        return result['result']

    def get_history(self, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = HISTORY_URL.format(page, pagesize)
        result = loads(get_html(url, headers=self.defaultHeader))
        if len(result['data']) >= int(pagesize):
            total_page = int(page) + 1
        else:
            total_page = int(page)
        return result['data'], total_page

    def get_dynamic(self, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = DYNAMIC_URL.format(pagesize, page)
        result = loads(get_html(url, headers=self.defaultHeader))
        total_page = int(
            (result['data']['page']['count'] + pagesize - 1) / pagesize)
        return result['data']['feeds'], total_page

    def get_attention(self, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = ATTENTION_URL.format(self.mid, page, pagesize)
        result = loads(get_html(url))
        return result['data']['list']

    def get_attention_video(self, mid, tid=0, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data'], result['data']['pages']

    def get_attention_channel(self, mid):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_URL.format(mid)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data']['list']

    def get_fav_box(self):
        if self.is_login == False:
            return []
        url = FAV_BOX_URL.format(self.mid)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data']['list']

    def get_fav(self, fav_box, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = FAV_URL.format(self.mid, page, pagesize, fav_box)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data']['vlist'], result['data']['pages']

    def login(self, userid, pwd, captcha):
        #utils.get_html('http://www.bilibili.com')
        if self.is_login == True:
            return True, ''
        pwd = self.get_encryped_pwd(pwd)
        data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format(
            captcha, userid, pwd)
        result = get_html(
            LOGIN_URL, data, {
                'Origin': 'https://passport.bilibili.com',
                'Referer': 'https://passport.bilibili.com/login'
            })

        key = None
        for ck in self.cj:
            if ck.name == 'DedeUserID':
                key = ck.value
                break

        if key is None:
            return False, LOGIN_ERROR_MAP[loads(result)['code']]
        self.cj.save()
        self.is_login = True
        self.mid = str(key)
        return True, ''

    def logout(self):
        self.cj.clear()
        self.cj.save()
        self.is_login = False

    def get_av_list_detail(self, aid, page=1, fav=0, pagesize=20):
        params = {'id': aid, 'page': page}
        if fav != 0:
            params['fav'] = fav
        url = VIEW_URL.format(self.api_sign(params))
        result = loads(get_html(url, headers=self.defaultHeader))
        results = [result]
        if (int(page) < result['pages']) and (pagesize > 1):
            results += self.get_av_list_detail(aid,
                                               int(page) + 1,
                                               fav,
                                               pagesize=pagesize - 1)[0]

        return results, result['pages']

    def get_av_list(self, aid):
        url = AV_URL.format(aid)
        try:
            page = get_html(url)
            result = loads(page)
        except:
            result = {}
        return result

    # 调用niconvert生成弹幕的ass文件
    def parse_subtitle(self, cid):
        page_full_url = COMMENT_URL.format(cid)
        website = create_website(page_full_url)
        if website is None:
            return ''
        else:
            text = website.ass_subtitles_text(font_name=u'黑体',
                                              font_size=24,
                                              resolution='%d:%d' %
                                              (self.WIDTH, self.HEIGHT),
                                              line_count=12,
                                              bottom_margin=0,
                                              tune_seconds=0)
            f = open(self._get_tmp_dir() + '/tmp.ass', 'w')
            f.write(text.encode('utf8'))
            f.close()
            return 'tmp.ass'

    def get_video_urls(self, cid):
        m = hashlib.md5()
        m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER))
        url = INTERFACE_URL.format(str(cid), m.hexdigest())
        doc = parseString(get_html(url))
        urls = []
        for durl in doc.getElementsByTagName('durl'):
            u = durl.getElementsByTagName('url')[0].firstChild.nodeValue
            if re.match(r'.*\.qqvideo\.tc\.qq\.com', url):
                re.sub(r'.*\.qqvideo\.tc', 'http://vsrc.store', u)
            urls.append(u)
            #urls.append(u + '|Referer={}'.format(urllib.quote('https://www.bilibili.com/')))

        return urls

    def add_history(self, aid, cid):
        url = ADD_HISTORY_URL.format(str(cid), str(aid))
        get_html(url)

    def api_req(self, cid, quality, bangumi, bangumi_movie=False, **kwargs):
        ts = str(int(time.time()))
        if not bangumi:
            params_str = 'cid={}&player=1&quality={}&ts={}'.format(
                cid, quality, ts)
            chksum = hashlib.md5(bytes(params_str + self.SEC1)).hexdigest()
            api_url = self.api_url + params_str + '&sign=' + chksum
        else:
            mod = 'movie' if bangumi_movie else 'bangumi'
            params_str = 'cid={}&module={}&player=1&quality={}&ts={}'.format(
                cid, mod, quality, ts)
            chksum = hashlib.md5(bytes(params_str + self.SEC2)).hexdigest()
            api_url = self.bangumi_api_url + params_str + '&sign=' + chksum

        return get_html(api_url)

    def download_by_vid(self, cid, bangumi, **kwargs):
        stream_id = kwargs.get('stream_id')
        if stream_id and stream_id in self.fmt2qlt:
            quality = stream_id
        else:
            quality = 'hdflv' if bangumi else 'flv'

        level = kwargs.get('level', 0)
        xml = self.api_req(cid, level, bangumi, **kwargs)
        doc = parseString(xml)
        urls = []
        for durl in doc.getElementsByTagName('durl'):
            u = durl.getElementsByTagName('url')[0].firstChild.nodeValue
            #urls.append(u)
            urls.append(
                urllib.quote_plus(u + '|Referer=https://www.bilibili.com'))

        return urls

    def entry(self, **kwargs):
        # tencent player
        tc_flashvars = re.search(r'"bili-cid=\d+&bili-aid=\d+&vid=([^"]+)"',
                                 self.page)
        if tc_flashvars:
            tc_flashvars = tc_flashvars.group(1)
        if tc_flashvars is not None:
            self.out = True
            return qq_download_by_vid(tc_flashvars,
                                      self.title,
                                      output_dir=kwargs['output_dir'],
                                      merge=kwargs['merge'],
                                      info_only=kwargs['info_only'])

        cid = re.search(r'cid=(\d+)', self.page).group(1)
        if cid is not None:
            return self.download_by_vid(cid, False, **kwargs)
        else:
            # flashvars?
            flashvars = re.search(r'flashvars="([^"]+)"', self.page).group(1)
            if flashvars is None:
                raise Exception('Unsupported page {}'.format(self.url))
            param = flashvars.split('&')[0]
            t, cid = param.split('=')
            t = t.strip()
            cid = cid.strip()
            if t == 'vid':
                sina_download_by_vid(cid,
                                     self.title,
                                     output_dir=kwargs['output_dir'],
                                     merge=kwargs['merge'],
                                     info_only=kwargs['info_only'])
            elif t == 'ykid':
                youku_download_by_vid(cid,
                                      self.title,
                                      output_dir=kwargs['output_dir'],
                                      merge=kwargs['merge'],
                                      info_only=kwargs['info_only'])
            elif t == 'uid':
                tudou_download_by_id(cid,
                                     self.title,
                                     output_dir=kwargs['output_dir'],
                                     merge=kwargs['merge'],
                                     info_only=kwargs['info_only'])
            else:
                raise NotImplementedError(
                    'Unknown flashvars {}'.format(flashvars))
            return

    def movie_entry(self, **kwargs):
        patt = r"var\s*aid\s*=\s*'(\d+)'"
        aid = re.search(patt, self.page).group(1)
        page_list = loads(
            get_html(
                'http://www.bilibili.com/widget/getPageList?aid={}'.format(
                    aid)))
        # better ideas for bangumi_movie titles?
        self.title = page_list[0]['pagename']
        return self.download_by_vid(page_list[0]['cid'],
                                    True,
                                    bangumi_movie=True,
                                    **kwargs)

    def get_video_from_url(self, url, **kwargs):
        self.url = url_locations(url)
        frag = urlparse(self.url).fragment
        # http://www.bilibili.com/video/av3141144/index_2.html#page=3
        if frag:
            hit = re.search(r'page=(\d+)', frag)
            if hit is not None:
                page = hit.group(1)
                av_id = re.search(r'av(\d+)', self.url).group(1)
                self.url = 'http://www.bilibili.com/video/av{}/index_{}.html'.format(
                    av_id, page)
        self.page = get_html(self.url)

        if 'bangumi.bilibili.com/movie' in self.url:
            return self.movie_entry(**kwargs)
        elif 'bangumi.bilibili.com' in self.url:
            return self.bangumi_entry(**kwargs)
        elif 'live.bilibili.com' in self.url:
            return self.live_entry(**kwargs)
        elif 'vc.bilibili.com' in self.url:
            return self.vc_entry(**kwargs)
        else:
            return self.entry(**kwargs)

    def bangumi_entry(self, **kwargs):
        pass

    def live_entry(self, **kwargs):
        pass

    def vc_entry(self, **kwargs):
        pass

Esempio n. 22

Mostra file

File: danabox.py Progetto: danabox/deis

class Session(requests.Session):
    """
    Session for making API requests and interacting with the filesystem
    """

    def __init__(self):
        super(Session, self).__init__()
        self.trust_env = False
        cookie_file = os.path.expanduser('~/.danabox/cookies.txt')
        cookie_dir = os.path.dirname(cookie_file)
        self.cookies = MozillaCookieJar(cookie_file)
        # Create the $HOME/.danabox dir if it doesn't exist
        if not os.path.isdir(cookie_dir):
            os.mkdir(cookie_dir, 0700)
        # Load existing cookies if the cookies.txt exists
        if os.path.isfile(cookie_file):
            self.cookies.load()
            self.cookies.clear_expired_cookies()

    def clear(self):
        """Clear cookies"""
        try:
            self.cookies.clear()
            self.cookies.save()
        except KeyError:
            pass

    def git_root(self):
        """
        Return the absolute path from the git repository root

        If no git repository exists, raise an EnvironmentError
        """
        try:
            git_root = subprocess.check_output(
                ['git', 'rev-parse', '--show-toplevel'],
                stderr=subprocess.PIPE).strip('\n')
        except subprocess.CalledProcessError:
            raise EnvironmentError('Current directory is not a git repository')
        return git_root

    def get_app(self):
        """
        Return the application name for the current directory

        The application is determined by parsing `git remote -v` output for the origin remote.

        Because Danabox only allows deployment of public Github repos we can create unique app
        names from a combination of the Github user's name and the repo name. Eg;
        '[email protected]:opdemand/example-ruby-sinatra.git' becomes 'opdemand-example--ruby--sinatra'

        If no application is found, raise an EnvironmentError.
        """
        git_root = self.git_root()
        remotes = subprocess.check_output(['git', 'remote', '-v'], cwd=git_root)
        if remotes is None:
            raise EnvironmentError('No git remotes found.')
        for remote in remotes.splitlines():
            if 'github.com' in remote:
                url = remote.split()[1]
                break
        if url is None:
            raise EnvironmentError('No Github remotes found.')
        pieces = url.split('/')
        owner = pieces[-2].split(':')[-1]
        repo = pieces[-1].replace('.git', '')
        app_raw = owner + '/' + repo
        app_name = app_raw.replace('-', '--').replace('/', '-')
        return app_name

    app = property(get_app)

    def request(self, *args, **kwargs):
        """
        Issue an HTTP request with proper cookie handling including
        `Django CSRF tokens <https://docs.djangoproject.com/en/dev/ref/contrib/csrf/>`
        """
        for cookie in self.cookies:
            if cookie.name == 'csrftoken':
                if 'headers' in kwargs:
                    kwargs['headers']['X-CSRFToken'] = cookie.value
                else:
                    kwargs['headers'] = {'X-CSRFToken': cookie.value}
                break
        response = super(Session, self).request(*args, **kwargs)
        self.cookies.save()
        return response

Esempio n. 23

Mostra file

File: cli.py Progetto: nrao/mynrao

        # way it won't abort unless the user has configured it.
        https_handler = urllib2.HTTPSHandler
        if options.ca_certs:
            from caslib.validating_https import ValidatingHTTPSConnection

            class HTTPSConnection(ValidatingHTTPSConnection):
                ca_certs = options.ca_certs

            https_handler = HTTPSConnection.HTTPSHandler

        opener = urllib2.build_opener(https_handler)

        if options.cookiejar:
            cookiejar = MozillaCookieJar(os.path.expanduser(options.cookiejar))
            try:
                cookiejar.load(ignore_discard=True)
            except IOError:
                pass
            opener.add_handler(urllib2.HTTPCookieProcessor(cookiejar=cookiejar))

        if not options.verbose:
            logging.basicConfig(level=logging.WARNING)
        elif options.verbose == 1:
            logging.basicConfig(level=logging.INFO)
        else:
            logging.basicConfig(level=logging.DEBUG)
        userdb = NRAOUserDB(options.location, options.username, options.password, opener)

        for key in args:
            if options.query_by == DATABASE_ID:
                user = userdb.get_user_data(database_id=key)

Esempio n. 24

Mostra file

File: bilibili.py Progetto: brmnh/xbmc-addons-chinese

class Bilibili():
    def __init__(self, appkey = APPKEY, appsecret = APPSECRET):
        self.appkey = appkey
        self.appsecret = appsecret
        self.is_login = False
        cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie'
        self.cj = MozillaCookieJar(cookie_path)
        if os.path.isfile(cookie_path):
            self.cj.load()
            if requests.utils.dict_from_cookiejar(self.cj).has_key('DedeUserID'):
                self.is_login = True
                self.mid = str(requests.utils.dict_from_cookiejar(self.cj)['DedeUserID'])
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
        urllib2.install_opener(opener)

    def get_captcha(self, path = None):
        if not requests.utils.dict_from_cookiejar(self.cj).has_key('sid'):
            utils.get_page_content(LOGIN_CAPTCHA_URL.format(random.random()),
                                   headers = {'Referer':'https://passport.bilibili.com/login'})
        result = utils.get_page_content(LOGIN_CAPTCHA_URL.format(random.random()),
                                        headers = {'Referer':'https://passport.bilibili.com/login'})
        if path == None:
            path = tempfile.gettempdir() + '/captcha.jpg'
        with open(path, 'wb') as f:
            f.write(result)
        return path

    def get_encryped_pwd(self, pwd):
        import rsa
        result = json.loads(utils.get_page_content(LOGIN_HASH_URL.format(random.random()),
                                                   headers={'Referer':'https://passport.bilibili.com/login'}))
        pwd = result['hash'] + pwd
        key = result['key']
        pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key)
        pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key)
        pwd = base64.b64encode(pwd)
        pwd = urllib.quote(pwd)
        return pwd

    def api_sign(self, params):
        params['appkey']=self.appkey
        data = ""
        keys = params.keys()
        keys.sort()
        for key in keys:
            if data != "":
                data += "&"
            value = params[key]
            if type(value) == int:
                value = str(value)
            data += key + "=" + str(urllib.quote(value))
        if self.appsecret == None:
            return data
        m = hashlib.md5()
        m.update(data + self.appsecret)
        return data + '&sign=' + m.hexdigest()

    def get_category_from_web_page(self):
        category_dict = {'0': {'title': u'全部', 'url': HOME_URL, 'subs':[]}}
        node = category_dict['0']
        url = node['url']
        result = BeautifulSoup(utils.get_page_content(url), "html.parser").findAll('li', {'class': 'm-i'})
        for item in result:
            if len(item['class']) != 1:
                continue
            tid = item['data-tid']
            title = item.em.contents[0]
            url = 'http:' + item.a['href']
            category_dict[tid] = {'title': title, 'url': url, 'subs':[]}
            node['subs'].append(tid)

        #Fix video and movie
        if '11' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('11')
        if '23' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('23')
        category_dict['11'] = {'title': u'电视剧', 'url': 'http://bangumi.bilibili.com/tv/', 'subs': []}
        category_dict['23'] = {'title': u'电影', 'url': 'http://bangumi.bilibili.com/movie/', 'subs': []}

        for sub in category_dict['0']['subs']:
            node = category_dict[sub]
            url = node['url']
            result = BeautifulSoup(utils.get_page_content(url), "html.parser").select('ul.n_num li')
            for item in result[1:]:
                if not item.has_attr('tid'):
                    continue
                if not hasattr(item, 'a'):
                    continue
                if item.has_attr('class'):
                    continue
                tid = item['tid']
                title = item.a.contents[0]
                if item.a['href'][:2] == '//':
                    url = 'http:' + item.a['href']
                else:
                    url = HOME_URL + item.a['href']
                category_dict[tid] = {'title': title, 'url': url, 'subs':[]}
                node['subs'].append(tid)
        return category_dict

    def get_category(self, tid = '0'):
        items = [{tid: {'title': '全部', 'url': CATEGORY[tid]['url'], 'subs': []}}]
        for sub in CATEGORY[tid]['subs']:
            items.append({sub: CATEGORY[sub]})
        return items

    def get_category_name(self, tid):
        return CATEGORY[str(tid)]['title']

    def get_order(self):
        return ORDER

    def get_category_list(self, tid = 0, order = 'default', days = 30, page = 1, pagesize = 10):
        params = {'tid': tid, 'order': order, 'days': days, 'page': page, 'pagesize': pagesize}
        url = LIST_URL.format(self.api_sign(params))
        result = json.loads(utils.get_page_content(url))
        results = []
        for i in range(pagesize):
            if result['list'].has_key(str(i)):
                results.append(result['list'][str(i)])
            else:
                break
        return results, result['pages']

    def get_my_info(self):
        if self.is_login == False:
            return []
        result = json.loads(utils.get_page_content(MY_INFO_URL))
        return result['data']

    def get_bangumi_chase(self, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['result'], result['data']['pages']

    def get_bangumi_detail(self, season_id):
        url = BANGUMI_SEASON_URL.format(season_id)
        result = utils.get_page_content(url)
        if result[0] != '{':
            start = result.find('(') + 1
            end = result.find(');')
            result = result[start:end]
        result = json.loads(result)
        return result['result']

    def get_history(self, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = HISTORY_URL.format(page, pagesize)
        result = json.loads(utils.get_page_content(url))
        if len(result['data']) >= int(pagesize):
            total_page = int(page) + 1
        else:
            total_page = int(page)
        return result['data'], total_page

    def get_dynamic(self, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = DYNAMIC_URL.format(pagesize, page)
        result = json.loads(utils.get_page_content(url))
        total_page = int((result['data']['page']['count'] + pagesize - 1) / pagesize)
        return result['data']['feeds'], total_page

    def get_attention(self, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = ATTENTION_URL.format(self.mid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list'], result['data']['pages']

    def get_attention_video(self, mid, tid = 0, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid)
        result = json.loads(utils.get_page_content(url))
        return result['data'], result['data']['pages']

    def get_attention_channel(self, mid):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_URL.format(mid)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list']

    def get_attention_channel_list(self, mid, cid, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_LIST_URL.format(mid, cid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list'], result['data']['total']

    def get_fav_box(self):
        if self.is_login == False:
            return []
        url = FAV_BOX_URL.format(self.mid)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list']

    def get_fav(self, fav_box, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = FAV_URL.format(self.mid, page, pagesize, fav_box)
        result = json.loads(utils.get_page_content(url))
        return result['data']['vlist'], result['data']['pages']

    def login(self, userid, pwd, captcha):
        #utils.get_page_content('http://www.bilibili.com')
        if self.is_login == True:
            return True, ''
        pwd = self.get_encryped_pwd(pwd)
        data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format(captcha, userid, pwd)
        result = utils.get_page_content(LOGIN_URL, data,
                                        {'Origin':'https://passport.bilibili.com',
                                         'Referer':'https://passport.bilibili.com/login'})
        if not requests.utils.dict_from_cookiejar(self.cj).has_key('DedeUserID'):
            return False, LOGIN_ERROR_MAP[json.loads(result)['code']]
        self.cj.save()
        self.is_login = True
        self.mid = str(requests.utils.dict_from_cookiejar(self.cj)['DedeUserID'])
        return True, ''

    def logout(self):
        self.cj.clear()
        self.cj.save()
        self.is_login = False

    def get_av_list_detail(self, aid, page = 1, fav = 0, pagesize = 10):
        params = {'id': aid, 'page': page}
        if fav != 0:
            params['fav'] = fav
        url = VIEW_URL.format(self.api_sign(params))
        result = json.loads(utils.get_page_content(url))
        results = [result]
        if (int(page) < result['pages']) and (pagesize > 1):
            results += self.get_av_list_detail(aid, int(page) + 1, fav, pagesize = pagesize - 1)[0]
        return results, result['pages']

    def get_av_list(self, aid):
        url = AV_URL.format(aid)
        result = json.loads(utils.get_page_content(url))
        return result

    def get_video_urls(self, cid):
        m = hashlib.md5()
        m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER))
        url = INTERFACE_URL.format(str(cid), m.hexdigest())
        doc = minidom.parseString(utils.get_page_content(url))
        urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')]
        urls = [url
                if not re.match(r'.*\.qqvideo\.tc\.qq\.com', url)
                else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', url)
                for url in urls]
        return urls

    def add_history(self, aid, cid):
        url = ADD_HISTORY_URL.format(str(cid), str(aid))
        utils.get_page_content(url)

Esempio n. 25

Mostra file

File: cookieway.py Progetto: yatogamitohka1212/usefultool

class CookieWay:
    def __init__(self):
        self.cookiejar = MozillaCookieJar()

    def load(self, file="cookie.txt"):
        self.cookiejar.load(file, ignore_discard=True, ignore_expires=True)

    def save(self, file="cookie.txt"):
        self.cookiejar.save(file, ignore_discard=True, ignore_expires=True)

    def torequestscj(self, s):
        for item in self.cookiejar:
            cookiesobject = requests.cookies.create_cookie(domain=item.domain,
                                                           name=item.name,
                                                           value=item.value)
            s.cookies.set_cookie(cookiesobject)

    def toseleniumcj(self, driver):
        domains = []
        for item in self.cookiejar:
            if item.domain not in domains:
                domains.append(item.domain)
        for i in range(len(domains)):
            if domains[i][0:1] == ".":
                domains[i] = domains[i][1:]
        domains = list(set(domains))
        for item in domains:
            driver.get("https://" + item)
            for item2 in self.cookiejar:
                if item2.domain == item or item2.domain == "." + item:
                    cookie_dict = {
                        'domain': item2.domain,
                        'name': item2.name,
                        'value': item2.value,
                        'secure': item2.secure
                    }
                    if item2.path_specified:
                        cookie_dict['path'] = item2.path
                    driver.add_cookie(cookie_dict)

    def sele2resq(self, driver, s):
        self.selcj_cj(driver)
        self.torequestscj(s)

    def resq2sele(self, s, driver):
        self.reqcj_cj(s)
        self.toseleniumcj(driver)

    def selcj_cj(self, driver):
        cookie = driver.get_cookies()
        for s_cookie in cookie:
            self.cookiejar.set_cookie(
                Cookie(
                    version=0,
                    name=s_cookie['name'],
                    value=s_cookie['value'],
                    port='80',
                    port_specified=False,
                    domain=s_cookie['domain'],
                    domain_specified=True,
                    domain_initial_dot=False,
                    path=s_cookie['path'],
                    path_specified=True,
                    secure=s_cookie['secure'],
                    expires="2069592763",  # s_cookie['expiry']
                    discard=False,
                    comment=None,
                    comment_url=None,
                    rest=None,
                    rfc2109=False))

    def reqcj_cj(self, s):
        for s_cookie in s.cookies:
            self.cookiejar.set_cookie(
                Cookie(
                    version=0,
                    name=s_cookie.name,
                    value=s_cookie.value,
                    port='80',
                    port_specified=False,
                    domain=s_cookie.domain,
                    domain_specified=True,
                    domain_initial_dot=False,
                    path="/",
                    path_specified=True,
                    secure=True,
                    expires="2069592763",  # s_cookie['expiry']
                    discard=False,
                    comment=None,
                    comment_url=None,
                    rest=None,
                    rfc2109=False))

Esempio n. 26

Mostra file

File: scan.py Progetto: clicknull/futurescan

class HttpScan(DummyScan):

    def __init__(self, args):
        super(HttpScan, self).__init__(args)
        self.session = requesocks.session()

        adapters.DEFAULT_RETRIES = self.args.max_retries
        self.tor = None
        if self.args.tor:
            self.out.log("Enabling TOR")
            self.tor = Torify()
            self.session.proxies = {'http': 'socks5://127.0.0.1:9050',
                                    'https': 'socks5://127.0.0.1:9050'}
            if self.args.check_tor:
                # Check TOR
                self.out.log("Checking IP via TOR")
                rip, tip = self.tor.check_ip(verbose=True)
                if tip is None:
                    self.out.log('TOR is not working properly!', logging.ERROR)
                    exit(-1)

        if self.args.cookies is not None:
            if path.exists(self.args.cookies) and path.isfile(self.args.cookies):
                self.cookies = MozillaCookieJar(self.args.cookies)
                self.cookies.load()
            else:
                # self.out.log('Could not find cookie file: %s' % self.args.load_cookies, logging.ERROR)
                self.cookies = Cookies.from_request(self.args.cookies)
        else:
            self.cookies = None

        self.ua = UserAgent() if self.args.user_agent is None else self.args.user_agent

    def filter(self, response):
        if response is None:
            return False

        # Filter responses and save responses that are matching ignore, allow rules
        if (self.args.allow is None and self.args.ignore is None) or \
                (self.args.allow is not None and response.status_code in self.args.allow) or \
                (self.args.ignore is not None and response.status_code not in self.args.ignore):
            # TODO: add regex search
            return True

        return False

    def scan_url(self, url):
        # TODO: add options
        r = None
        ex = None
        try:
            r = self.session.get(url)
        except Exception as e:
            ex = e
        finally:
            self.cb_response(url, r, ex)
        return r, ex

    def scan_host(self, host, urls):
        res = []

        for u in urls:
            url = get_full_url(host, u)
            r, ex = self.scan_url(url)
            self.out.logger.write_response(url, r, ex)
            if self.filter(r):
                self.out.write(url, r, ex)
                res.append((url, r, ex))
        return res

    def cb_scan_done(self, future):
        pass

    def cb_response(self, url, reponse, exception):
        pass

Esempio n. 27

Mostra file

File: textcrawler.py Progetto: baloncek2662/data-extraction-methods

class SimpleCrawler:

    USER_AGENT = 'SimpleCrawler/0.1'
    HEADERS = {
        'User-Agent': USER_AGENT,
        'Accept-Encoding': 'gzip',
        'Connection': 'keep-alive'
    }
    CONTENT_TYPE_PAT = re.compile(r'([^\s;]+)(.*charset=([^\s;]+))?', re.I)

    def __init__(self,
                 starturl,
                 index_html='',
                 maxlevel=1,
                 cookie_file=None,
                 acldb=None,
                 urldb=None,
                 default_charset=None,
                 delay=0,
                 timeout=300,
                 debug=0):
        (proto, self.hostport, _x, _y, _z) = urlsplit(starturl)
        # assert proto == 'http'
        #Thread.__init__(self)
        self.debug = debug
        self.index_html = index_html
        if cookie_file:
            self.cookiejar = MozillaCookieJar(cookie_file)
            self.cookiejar.load()
        else:
            self.cookiejar = None
        self.robotstxt = RobotFileParser()
        self.robotstxt.set_url(urljoin(starturl, '/robots.txt'))
        # self.robotstxt.read()
        self.conn = None
        self.urldb = urldb
        self.acldb = acldb
        self.curlevel = 0
        self.delay = delay
        self.timeout = timeout
        self.default_charset = default_charset
        if starturl.endswith('/'):
            starturl += self.index_html
        self.urls = [(starturl, maxlevel)]
        self.crawled = {}  # 1:injected, 2:crawled
        return

    def accept_url(self, url):
        if url.endswith('/'):
            url += self.index_html
        if self.acldb and not self.acldb.allowed(url):
            return None
        return url

    def inject_url(self, url):
        if (not self.curlevel) or (not url) or (url in self.crawled):
            return False
        if not self.robotstxt.can_fetch(self.USER_AGENT, url):
            if self.debug:
                print >> stderr, 'DISALLOW: %r' % url
            return None
        if self.debug:
            print >> stderr, 'INJECT: %r' % url
        self.crawled[url] = 1
        self.urls.append((url, self.curlevel - 1))
        return True

    def get1(self, url, maxretry=5, maxredirect=5):
        if self.debug:
            print >> stderr, 'GET: %r' % url
        # loop
        for rtry in range(maxredirect):
            # forge urllib2.Request object.
            req = Request(url)
            # add cookie headers if necessary.
            if self.cookiejar:
                self.cookiejar.add_cookie_header(req)
                headers = req.unredirected_hdrs
                headers.update(self.HEADERS)
            else:
                headers = self.HEADERS
            # get response.
            for ctry in range(maxretry):
                try:
                    if not self.conn:
                        print >> stderr, 'Making connection: %r...' % (
                            self.hostport, )
                        self.conn = HTTPConnection(self.hostport)
                    self.conn.request('GET',
                                      req.get_selector().replace(' ', ''), '',
                                      headers)
                    # self.conn.sock.settimeout(self.timeout)
                    resp = self.conn.getresponse()
                    break
                except BadStatusLine, x:
                    # connection closed unexpectedly
                    print >> stderr, 'Connection closed unexpectedly.'
                    # it restarts the connection...
                    self.conn.close()
                    self.conn = None
                except socket.error, x:
                    # connection closed unexpectedly
                    print >> stderr, 'Socket error:', x
                    self.conn.close()
                    self.conn = None
            else:

Esempio n. 28

Mostra file

class Bilibili():
    def __init__(self, appkey=APPKEY, appsecret=APPSECRET):
        self.appkey = appkey
        self.appsecret = appsecret
        self.is_login = False
        cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie'
        self.cj = MozillaCookieJar(cookie_path)
        if os.path.isfile(cookie_path):
            self.cj.load()
            if requests.utils.dict_from_cookiejar(
                    self.cj).has_key('DedeUserID'):
                self.is_login = True
                self.mid = str(
                    requests.utils.dict_from_cookiejar(self.cj)['DedeUserID'])
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
        urllib2.install_opener(opener)

    def get_captcha(self, path=None):
        if not requests.utils.dict_from_cookiejar(self.cj).has_key('sid'):
            utils.get_page_content(
                LOGIN_CAPTCHA_URL.format(random.random()),
                headers={'Referer': 'https://passport.bilibili.com/login'})
        result = utils.get_page_content(
            LOGIN_CAPTCHA_URL.format(random.random()),
            headers={'Referer': 'https://passport.bilibili.com/login'})
        if path == None:
            path = tempfile.gettempdir() + '/captcha.jpg'
        with open(path, 'wb') as f:
            f.write(result)
        return path

    def get_encryped_pwd(self, pwd):
        import rsa
        result = json.loads(
            utils.get_page_content(
                LOGIN_HASH_URL.format(random.random()),
                headers={'Referer': 'https://passport.bilibili.com/login'}))
        pwd = result['hash'] + pwd
        key = result['key']
        pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key)
        pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key)
        pwd = base64.b64encode(pwd)
        pwd = urllib.quote(pwd)
        return pwd

    def api_sign(self, params):
        params['appkey'] = self.appkey
        data = ""
        keys = params.keys()
        keys.sort()
        for key in keys:
            if data != "":
                data += "&"
            value = params[key]
            if type(value) == int:
                value = str(value)
            data += key + "=" + str(urllib.quote(value))
        if self.appsecret == None:
            return data
        m = hashlib.md5()
        m.update(data + self.appsecret)
        return data + '&sign=' + m.hexdigest()

    def get_category_from_web_page(self):
        category_dict = {'0': {'title': u'全部', 'url': HOME_URL, 'subs': []}}
        node = category_dict['0']
        url = node['url']
        result = BeautifulSoup(utils.get_page_content(url),
                               "html.parser").findAll('li', {'class': 'm-i'})
        for item in result:
            if len(item['class']) != 1:
                continue
            tid = item['data-tid']
            title = item.em.contents[0]
            url = 'http:' + item.a['href']
            category_dict[tid] = {'title': title, 'url': url, 'subs': []}
            node['subs'].append(tid)

        #Fix video and movie
        if '11' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('11')
        if '23' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('23')
        category_dict['11'] = {
            'title': u'电视剧',
            'url': 'http://bangumi.bilibili.com/tv/',
            'subs': []
        }
        category_dict['23'] = {
            'title': u'电影',
            'url': 'http://bangumi.bilibili.com/movie/',
            'subs': []
        }

        for sub in category_dict['0']['subs']:
            node = category_dict[sub]
            url = node['url']
            result = BeautifulSoup(utils.get_page_content(url),
                                   "html.parser").select('ul.n_num li')
            for item in result[1:]:
                if not item.has_attr('tid'):
                    continue
                if not hasattr(item, 'a'):
                    continue
                if item.has_attr('class'):
                    continue
                tid = item['tid']
                title = item.a.contents[0]
                if item.a['href'][:2] == '//':
                    url = 'http:' + item.a['href']
                else:
                    url = HOME_URL + item.a['href']
                category_dict[tid] = {'title': title, 'url': url, 'subs': []}
                node['subs'].append(tid)
        return category_dict

    def get_category(self, tid='0'):
        items = [{
            tid: {
                'title': '全部',
                'url': CATEGORY[tid]['url'],
                'subs': []
            }
        }]
        for sub in CATEGORY[tid]['subs']:
            items.append({sub: CATEGORY[sub]})
        return items

    def get_category_name(self, tid):
        return CATEGORY[str(tid)]['title']

    def get_order(self):
        return ORDER

    def get_category_list(self,
                          tid=0,
                          order='default',
                          days=30,
                          page=1,
                          pagesize=10):
        params = {
            'tid': tid,
            'order': order,
            'days': days,
            'page': page,
            'pagesize': pagesize
        }
        url = LIST_URL.format(self.api_sign(params))
        result = json.loads(utils.get_page_content(url))
        results = []
        for i in range(pagesize):
            if result['list'].has_key(str(i)):
                results.append(result['list'][str(i)])
            else:
                break
        return results, result['pages']

    def get_my_info(self):
        if self.is_login == False:
            return []
        result = json.loads(utils.get_page_content(MY_INFO_URL))
        return result['data']

    def get_bangumi_chase(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['result'], result['data']['pages']

    def get_bangumi_detail(self, season_id):
        url = BANGUMI_SEASON_URL.format(season_id)
        result = utils.get_page_content(url)
        if result[0] != '{':
            start = result.find('(') + 1
            end = result.find(');')
            result = result[start:end]
        result = json.loads(result)
        return result['result']

    def get_history(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = HISTORY_URL.format(page, pagesize)
        result = json.loads(utils.get_page_content(url))
        if len(result['data']) >= int(pagesize):
            total_page = int(page) + 1
        else:
            total_page = int(page)
        return result['data'], total_page

    def get_dynamic(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = DYNAMIC_URL.format(pagesize, page)
        result = json.loads(utils.get_page_content(url))
        total_page = int(
            (result['data']['page']['count'] + pagesize - 1) / pagesize)
        return result['data']['feeds'], total_page

    def get_attention(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = ATTENTION_URL.format(self.mid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list'], result['data']['pages']

    def get_attention_video(self, mid, tid=0, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid)
        result = json.loads(utils.get_page_content(url))
        return result['data'], result['data']['pages']

    def get_attention_channel(self, mid):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_URL.format(mid)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list']

    def get_attention_channel_list(self, mid, cid, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_LIST_URL.format(mid, cid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list'], result['data']['total']

    def get_fav_box(self):
        if self.is_login == False:
            return []
        url = FAV_BOX_URL.format(self.mid)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list']

    def get_fav(self, fav_box, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = FAV_URL.format(self.mid, page, pagesize, fav_box)
        result = json.loads(utils.get_page_content(url))
        return result['data']['vlist'], result['data']['pages']

    def login(self, userid, pwd, captcha):
        #utils.get_page_content('http://www.bilibili.com')
        if self.is_login == True:
            return True, ''
        pwd = self.get_encryped_pwd(pwd)
        data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format(
            captcha, userid, pwd)
        result = utils.get_page_content(
            LOGIN_URL, data, {
                'Origin': 'https://passport.bilibili.com',
                'Referer': 'https://passport.bilibili.com/login'
            })
        if not requests.utils.dict_from_cookiejar(
                self.cj).has_key('DedeUserID'):
            return False, LOGIN_ERROR_MAP[json.loads(result)['code']]
        self.cj.save()
        self.is_login = True
        self.mid = str(
            requests.utils.dict_from_cookiejar(self.cj)['DedeUserID'])
        return True, ''

    def logout(self):
        self.cj.clear()
        self.cj.save()
        self.is_login = False

    def get_av_list_detail(self, aid, page=1, fav=0, pagesize=10):
        params = {'id': aid, 'page': page}
        if fav != 0:
            params['fav'] = fav
        url = VIEW_URL.format(self.api_sign(params))
        result = json.loads(utils.get_page_content(url))
        results = [result]
        if (int(page) < result['pages']) and (pagesize > 1):
            results += self.get_av_list_detail(aid,
                                               int(page) + 1,
                                               fav,
                                               pagesize=pagesize - 1)[0]
        return results, result['pages']

    def get_av_list(self, aid):
        url = AV_URL.format(aid)
        result = json.loads(utils.get_page_content(url))
        return result

    def get_video_urls(self, cid):
        m = hashlib.md5()
        m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER))
        url = INTERFACE_URL.format(str(cid), m.hexdigest())
        doc = minidom.parseString(utils.get_page_content(url))
        urls = [
            durl.getElementsByTagName('url')[0].firstChild.nodeValue
            for durl in doc.getElementsByTagName('durl')
        ]
        urls = [
            url if not re.match(r'.*\.qqvideo\.tc\.qq\.com', url) else re.sub(
                r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', url)
            for url in urls
        ]
        return urls

    def add_history(self, aid, cid):
        url = ADD_HISTORY_URL.format(str(cid), str(aid))
        utils.get_page_content(url)

Esempio n. 29

Mostra file

File: basis_retr.py Progetto: jshrader/student_sleep

class BasisRetr:
	"""The main entry points, once a BasisRetr object has been created, are: 1) GetDayData()-- download metrics, activity, sleep data for a single day from the basis website and save it, 2) GetActivityCsvForMonth()-- download activity summaries for an entire month, and 3) GetSleepCsvForMonth()--download sleep summaries for an entire month."""
	LOGIN_URL = 'https://app.mybasis.com/login'
	UID_URL = 'https://app.mybasis.com/api/v1/user/me.json'
	METRICS_URL = 'https://app.mybasis.com/api/v1/chart/{userid}.json?interval=60&units=s&start_date={date}&start_offset=0&end_offset=0&summary=true&bodystates=true&heartrate=true&steps=true&calories=true&gsr=true&skin_temp=true&air_temp=true'
	ACTIVITIES_URL ='https://app.mybasis.com/api/v2/users/me/days/{date}/activities?expand=activities&type=run,walk,bike,sleep'
	SLEEP_URL = 'https://app.mybasis.com/api/v2/users/me/days/{date}/activities?expand=activities&type=sleep'
	SLEEP_EVENTS_URL = 'https://app.mybasis.com/api/v2/users/me/days/{date}/activities?type=sleep&event.type=toss_and_turn&expand=activities.stages,activities.events'

	DATE_FORMAT = "%04d-%02d-%02d"
	
	# save-to filename.  date is prefix, format is suffix
	MO_ACTIVITY_FNAME_TEMPLATE = "{yr:04d}-{mo:02d}_basis_activities_summary.csv"
	MO_SLEEP_FNAME_TEMPLATE = "{yr:04d}-{mo:02d}_basis_sleep_summary.csv"
	# day sleep and activity filenames (for month summaries)
	DAY_ACTIVITY_FNAME_TEMPLATE = "{yr:04d}-{mo:02d}-{dy:02d}_basis_activities.json"
	DAY_SLEEP_FNAME_TEMPLATE = "{yr:04d}-{mo:02d}-{dy:02d}_basis_sleep.json"
	DAY_JSON_FNAME_TEMPLATE = "{date}_basis_{typ}.json"
	METRICS_FNAME_TEMPLATE = "{date}_basis_metrics.{ext}"
	SLEEP_FNAME_TEMPLATE= "{date}_basis_sleep.{format}"
	
	def __init__(self, loadconfig = None):
		# create config info
		self.cfg = Config(cfg_items = CFG_ITEMS)
		if loadconfig:
			self.cfg.Load()
		else:
			# if config file doesn't exist, save the defaults loaded above
			self.cfg.Save() #saves 
		# url opener for website retrieves
		opener = urllib2.build_opener()
		self.cj = MozillaCookieJar(self.cfg.cookie_filename)#BasisRetr.COOKIE_FILENAME)
		self.session_cookie = None
		if os.path.exists(self.cfg.cookie_filename):#BasisRetr.COOKIE_FILENAME):
			self.cj.load()
			self.CheckSessionCookie() # set session cookie if it exists and hasn't expired
		# need to use build_opener to submit cookies and post form data
		self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))

	def GetDayData(self, yr, mo, day, typ, save_csv, override_cache = False, act_metr= True):
		"""Main entry method for getting a day's worth of data, formatting, then saving it.  typ is the type of data: metrics, activities, or sleep.  Data is always saved in json format, but if save_csv is True, save to csv as well as json. override_cache ignores any already downloaded json.  act_metr, if True, saves sleep and activity state along with metrics."""
		date = BasisRetr.DATE_FORMAT % (yr, mo, day)
		ydate = self.GetYesterdayDateAsString(yr, mo, day)
		
		self.Status("Checking Login")
		self.CheckLogin() # ensure we're logged in
		self.Status("getting {} for {}".format(typ,date))
		# figure out which data to get
		data = None
		# filename 
		cfname = "{date}_basis_{typ}.csv".format(date=date, typ=typ)
				
		# if needed, download json data from website and save to file
		if typ == 'metrics':
			mjdata = self.RetrieveJsonOrCached(date, 'metrics', override_cache)
			### MOVE THIS ERROR CHECKING INTO THE ABOVE METHOD
			if type(mjdata) == str or mjdata == None: # simple error checking
				self.Status('OnGetDayData: Metrics json conversion failed.')
				print mjdata[:500]
				return
			# also load up actities
		if typ == 'activities' or act_metr:
			ajdata = self.RetrieveJsonOrCached(date, 'activities', override_cache)
			if type(ajdata) == str or ajdata == None: # simple error checking
				self.Status('OnGetDayData: Activities json conversion failed.')
				print ajdata[:500]
				return
		if typ == 'sleep' or act_metr:
			sjdata = self.RetrieveJsonOrCached(date, 'sleep', override_cache)
			if type(sjdata) == str or sjdata == None: # simple error checking
				self.Status('OnGetDayData: Sleep json conversion failed.')
				print sjdata[:500]
				return
			if act_metr: # add yesterday's sleep data
				sjdata2= self.RetrieveJsonOrCached(ydate, 'sleep')
		
		# Next, turn the list of python objects into a csv file.
		# If asked to (via act_metr), collect sleep and activity type, then add them to each timestamp.
		cdata = None
		if save_csv:
			if typ == 'activities' or act_metr:
				act_list = self.JsonActivitiesToList(ajdata)
				cdata = self.CreateCSVFromList(self.cfg.csv_activity_colnames, act_list)
			if typ == 'sleep' or act_metr:
				sleep_evts_list = self.JsonSleepEventsToList(sjdata)
				cdata = self.CreateCSVFromList(self.cfg.csv_sleep_evt_colnames, sleep_evts_list)
				if act_metr:
					# prepend yesterday's sleep events as they may start before midnight.
					sleep_evts_list[:0] = self.JsonSleepEventsToList(sjdata2)
			if typ == 'metrics':
				metrics_list = self.JsonMetricsToList(mjdata)
				if act_metr: # add activities to metrics               
					self.AddActivityTypeToMetrics(metrics_list, act_list, sleep_evts_list)
					header = self.cfg.csv_metrics_colnames + self.cfg.csv_activity_type_colnames
				else:
					header = self.cfg.csv_metrics_colnames
				cdata = self.CreateCSVFromList(header, metrics_list)
		
		# If we were able to make a csv file, save it.
		if cdata:
			fpath = os.path.join(os.path.abspath(self.cfg.savedir), cfname)
			self.SaveData(cdata, fpath)
			self.Status("Saved "+typ+" csv file at "+fpath)

	def CheckLogin(self):
		# the test below gives HTTP Error 401: Unauthorized if don't get cookie each time
		# I wonder if cookielib::FileCookieJar might do the right thing
		# i.e., save all cookies, not just session cookie.
		if not self.cfg.userid or not self.cfg.session_token:
			self.Login()

	def Login(self, login = None, passwd = None):
		"""Log in to basis website to get session (access) token via cookie. Don't need to pass in loginid and password if want to use stored info."""
		if login:
			self.cfg.loginid = login
		if passwd:
			self.cfg.passwd = passwd

		form_data = {'next': 'https://app.mybasis.com',
			'submit': 'Login',
			'username': self.cfg.loginid,
			'password': self.cfg.passwd}
		enc_form_data = urllib.urlencode(form_data)
		f = self.opener.open(BasisRetr.LOGIN_URL, enc_form_data)

		content = f.read()
		#$ do we need to close f?
		m = re.search('error_string\s*=\s*"(.+)"', content, re.MULTILINE)
		if m:
			raise Exception(m.group(1))
		
		self.CheckSessionCookie()
		
		# make sure we got the access token
		if not self.cfg.session_token:
			self.Status("Didn't find an access token in:"+["({}={}), ".format(c.name.c.value) for c in self.cj])
		else:
			self.Status("Logged in, Got Access Token = "+self.cfg.session_token)
		
	def CheckSessionCookie(self):
		for cookie in self.cj:
			if cookie.name == 'access_token':
				self.cfg.session_token = cookie.value

	def GetUserID(self):
		"""Retrieve the long hex string that uniquely identifies a user from the Basis website."""
		if not self.cfg.session_token:
			raise Exception('no token', 'no access token found-may be internet connectivity or bad login info.')
		self.opener.addheaders = [('X-Basis-Authorization', "OAuth "+self.cfg.session_token)]
		f = self.opener.open(BasisRetr.UID_URL)
		content = f.read()
		jresult = json.loads(content)
		self.cfg.userid= None
		if 'id' in jresult:
			self.cfg.userid = jresult['id']

	def GetYesterdayDateAsString(self, yr, mo, day):
		"""Need yesterday's date to get sleep events for a given calendar day. This is because sleep events, as downloaded from the Basis Website, start from the prior evening, when you actually went to sleep."""
		
		tday, tmo, tyr = day-1, mo, yr
		
		if tday <1: # previous month
			tmo -= 1
			if tmo < 1: # previous year
				tyr -= 1
				tmo = 12
			# once we adjusted the month, find the last day of that month 
			tday = calendar.monthrange(tyr, tmo)[1]
		tdate	 = BasisRetr.DATE_FORMAT % (tyr, tmo, tday)
		return tdate

	def RetrieveMetricsJsonForDay(self, date):
		# Need userid in order to get metrics
		if not self.cfg.userid:
			self.Status("BasisRetr::GetMetrics: No userid available; getting from website.")
			self.GetUserID()
			self.Status("Retrieved userid from website.")
		# Form the URL
		url = BasisRetr.METRICS_URL.format(date=date,userid=self.cfg.userid)
		return self.GetJsonData(url)
		
	def RetrieveActivitiesJsonForDay(self, date):
		url = BasisRetr.ACTIVITIES_URL.format(date = date)
		return self.GetJsonData(url)

	def RetrieveSleepSummaryJsonForDay(self, date):
		url = BasisRetr.SLEEP_URL.format(date=date)
		return self.GetJsonData(url)

	def RetrieveSleepEventsJsonForDay(self,date):
		url = BasisRetr.SLEEP_EVENTS_URL.format(date=date)
		return self.GetJsonData(url)

	def GetJsonStorageDir(self):
		"""Allow json storage dir to be absolute or relative (to csv dir) path."""
		if os.path.isabs(self.cfg.jsondir):
			return self.cfg.jsondir
		else:
			return os.path.join(os.path.abspath(self.cfg.savedir), self.cfg.jsondir)

	def RetrieveJsonOrCached(self, date, typ, user_override_cache = None):
		"""If json file exists in json dir, then just read that.  Otherwise, download from basis website.  If override_cache is set, always download from website."""
		fname = BasisRetr.DAY_JSON_FNAME_TEMPLATE.format(date=date, typ=typ)
		fpath = os.path.join(self.GetJsonStorageDir(), fname)
		# don't use cache if the saved data is very recent-- what's saved may have been before the end of the day.
		if os.path.isfile(fpath):
			# these calculations are in seconds since epoch
			days_prev = 3600*24*self.cfg.nocache_days
			last_mod_time = os.path.getmtime(fpath)
			target_time = time.mktime(datetime.datetime.strptime(date, "%Y-%m-%d").timetuple())
			force_override_cache = last_mod_time - target_time < days_prev
		# if file exists and we've said via UI, "don't override the cache", then read json from cache
		if os.path.isfile(fpath) and not user_override_cache and not force_override_cache:
			with open(fpath, "r") as f:
				data = f.read()
				jdata = json.loads(data)
				
		else: # retrieve data from website
			if typ == 'metrics':
				jdata = self.RetrieveMetricsJsonForDay(date)
			elif typ == 'activities':
				jdata = self.RetrieveActivitiesJsonForDay(date)
			elif typ == 'sleep':
				jdata = self.RetrieveSleepEventsJsonForDay(date)
			elif typ == 'sleep_summary':
				jdata = self.RetrieveSleepSummaryJsonForDay(date)
			#json_path = os.path.join(self.GetJsonStorageDir(), fname)
			# make sure directory exists
			if not os.path.isdir(self.GetJsonStorageDir()):
				os.makedirs(self.GetJsonStorageDir())
			self.SaveData(json.dumps(jdata), fpath)
		return jdata
	
		
	def GetJsonData(self, url):
		if DEBUG:
			print url
		if True:
			try:			
				f = self.opener.open(url)
				jresult= json.loads(f.read())
			except urllib2.HTTPError as e:
				reason = BaseHTTPServer.BaseHTTPRequestHandler.responses[e.code]
				jresult = {'code': e.code, 'error':reason, 'url':url} 
		
		# callback (if available) to UI manager to ensure it doesn't freeze
		if hasattr(self, 'FreezePrevention'):
			self.FreezePrevention()
		if 'code' in jresult and jresult['code'] == 401: # unauthorized.  try logging in
			self.Status("Auth error, Logging in for new session token.")
			self.Login()
			try:	# try again
				f = self.opener.open(url)
				jresult= json.loads(f.read())
			except urllib2.HTTPError as e:
				reason = BaseHTTPServer.BaseHTTPRequestHandler.responses[e.code]
				jresult = {'code': e.code, 'error':reason, 'url':url} 
		return jresult

	def SaveData(self, data, fpath):
		try:
			fh = file(os.path.abspath(fpath), "w")
			fh.write(data)
		except IOError, v:
			self.Status("problem saving file to:"+fpath+"\n--Error: "+`v`)
		try: # if problem is on open, then fh doesn't exist.
			fh.close()
		except:
			pass

Esempio n. 30

Mostra file

File: httpscan.py Progetto: Badikov/httpscan

class HttpScanner(object):
    def __init__(self, args):
        """
        Initialise HTTP scanner
        :param args:
        :return:
        """
        self.args = args
        self.output = HttpScannerOutput(args)
        self._init_scan_options()

        # Reading files
        self.output.write_log("Reading files and deduplicating.", logging.INFO)
        self.hosts = self._file_to_list(args.hosts)
        self.urls = self._file_to_list(args.urls)

        #
        self._calc_urls()
        out = 'Loaded %i hosts %i urls' % (self.hosts_count, self.urls_count)
        if self.args.ports is not None:
            out += ' %i ports' % len(self.args.ports)
        self.output.print_and_log(out)

        if self.args.ports is not None and not self.args.syn:
            new_hosts = []
            for host in self.hosts:
                for port in self.args.ports:
                    # print(host, port)
                    new_hosts.append(helper.generate_url(host, port))
            self.hosts = new_hosts

        #
        self._calc_urls()
        self.output.print_and_log('%i full urls to scan' % self.full_urls_count)

        # Queue and workers
        self.hosts_queue = JoinableQueue()
        self.workers = []

    def _file_to_list(self, filename, dedup=True):
        """
        Get list from file
        :param filename: file to read
        :return: list of lines
        """
        if not path.exists(filename) or not path.isfile(filename):
            self.output.print_and_log('File %s not found!' % filename, logging.ERROR)
            exit(-1)

        # Preparing lines list
        lines = filter(lambda line: line is not None and len(line) > 0, open(filename).read().split('\n'))
        if len(lines) == 0:
            self.output.print_and_log('File %s is empty!' % filename, logging.ERROR)
            exit(-1)

        return helper.deduplicate(lines) if dedup else lines

    def _init_scan_options(self):
        # Session
        self.session = session()
        self.session.timeout = self.args.timeout
        self.session.verify = False

        # TODO: debug and check
        # self.session.mount("http://", HTTPAdapter(max_retries=self.args.max_retries))
        # self.session.mount("https://", HTTPAdapter(max_retries=self.args.max_retries))
        # http://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request
        # Max retries
        adapters.DEFAULT_RETRIES = self.args.max_retries

        # TOR
        if self.args.tor:
            self.output.write_log("TOR usage detected. Making some checks.")
            self.session.proxies = {
                'http': 'socks5://127.0.0.1:9050',
                'https': 'socks5://127.0.0.1:9050'
            }

            url = 'http://ifconfig.me/ip'
            real_ip, tor_ip = None, None

            # Ger real IP address
            try:
                real_ip = get(url).text.strip()
            except Exception as exception:
                self.output.print_and_log("Couldn't get real IP address. Check yout internet connection.",
                                          logging.ERROR)
                self.output.write_log(str(exception), logging.ERROR)
                exit(-1)

            # Get TOR IP address
            try:
                tor_ip = self.session.get(url).text.strip()
            except Exception as exception:
                self.output.print_and_log("TOR socks proxy doesn't seem to be working.", logging.ERROR)
                self.output.write_log(str(exception), logging.ERROR)
                exit(-1)

            # Show IP addresses
            self.output.print_and_log('Real IP: %s TOR IP: %s' % (real_ip, tor_ip))
            if real_ip == tor_ip:
                self.output.print_and_log("TOR doesn't work! Stop to be secure.", logging.ERROR)
                exit(-1)

        # Proxy
        if self.args.proxy is not None:
            self.session.proxies = {"https": self.args.proxy,
                                    "http": self.args.proxy}

        # Auth
        if self.args.auth is not None:
            items = self.args.auth.split(':')
            self.session.auth = (items[0], items[1])

        # Cookies
        self.cookies = {}
        if self.args.cookies is not None:
            self.cookies = Cookies.from_request(self.args.cookies)

        # Cookies from file
        if self.args.load_cookies is not None:
            if not path.exists(self.args.load_cookies) or not path.isfile(self.args.load_cookies):
                self.output.print_and_log('Could not find cookie file: %s' % self.args.load_cookies, logging.ERROR)
                exit(-1)

            self.cookies = MozillaCookieJar(self.args.load_cookies)
            self.cookies.load()

        self.session.cookies = self.cookies

        # User-Agent
        self.ua = UserAgent() if self.args.random_agent else None

    def worker(self, worker_id):
        self.output.write_log('Worker %i started.' % worker_id)
        while not self.hosts_queue.empty():
            host = self.hosts_queue.get()
            try:
                self.scan_host(worker_id, host)
            finally:
                self.output.write_log('Worker %i finished.' % worker_id)
                self.hosts_queue.task_done()

    def _head_available(self, host):
        """
        Determine if HEAD requests is allowed
        :param host:
        :return:
        """
        # Trying to use OPTIONS request
        try:
            response = self.session.options(host, headers=self._fill_headers())
            o = response.headers['allow'] if 'allow' in response.headers else None
            if o is not None and o.find('HEAD') != -1:
                return True
        except:
            # TODO: fix
            pass

        try:
            return False if self.session.head(host, headers=self._fill_headers()).status_code == 405 else True
        except:
            # TODO: fix
            return False

    def scan_host(self, worker_id, host):
        # check if resolvable
        ip = helper.url_to_ip(host)
        if ip is None:
            self.output.write_log('Could not resolve %s  Skipping...' % host, logging.WARNING)
            self.output.urls_scanned += len(self.urls)
            return

        # Check for HEAD
        host_url = helper.host_to_url(host)
        head_available = False
        if self.args.head:
            head_available = self._head_available(host)
            if head_available:
                self.output.write_log('HEAD is supported for %s' % host)

        errors_count, urls_scanned = 0, 0
        for url in self.urls:
            full_url = urljoin(host_url, url)
            r = self.scan_url(full_url, head_available)
            urls_scanned += 1
            self.output.urls_scanned += 1

            # Output
            r['worker'] = worker_id
            self.output.write(**r)
            if r['exception'] is not None:
                errors_count += 1

            # Skip host on errors
            if self.args.skip is not None and errors_count == self.args.skip:
                self.output.write_log('Errors limit reached on %s Skipping other urls.' % host, logging.WARNING)
                self.output.urls_scanned += len(self.urls) - urls_scanned
                break

        # cookies bugfix?
        self.session.cookies.clear()

    def _fill_headers(self):
        # Fill UserAgent in headers
        headers = {}
        if self.args.user_agent is not None:
            headers['User-agent'] = self.args.user_agent
        elif self.args.random_agent:
            headers['User-agent'] = self.ua.random

        # Fill Referer in headers
        if self.args.referer is not None:
            headers['Referer'] = self.args.referer

        return headers

    def _parse_response(self, url, response, exception):
        res = {'url': url,
               'response': response,
               'exception': exception}

        if response is None or exception is not None:
            res.update({
                'status': -1,
                'length': -1,
            })
            return res

        try:
            length = int(response.headers['content-length']) if 'content-length' in response.headers else len(
                response.text)
        except Exception as exception:
            self.output.write_log(
                "Exception while getting content length for URL: %s Exception: %s" % (url, str(exception)),
                logging.ERROR)
            length = 0

        res.update({
            'status': response.status_code,
            'length': length,
        })
        return res

    def scan_url(self, url, use_head=False):
        self.output.write_log('Scanning %s' % url, logging.DEBUG)

        # Query URL and handle exceptions
        response, exception = None, None
        method = 'HEAD' if use_head else 'GET'
        try:
            # TODO: add support for user:password in URL
            response = self.session.request(method, url, headers=self._fill_headers(),
                                            allow_redirects=self.args.allow_redirects)
        except ConnectionError as ex:
            self.output.write_log('Connection error while quering %s' % url, logging.ERROR)
            exception = ex
        except HTTPError as ex:
            self.output.write_log('HTTP error while quering %s' % url, logging.ERROR)
            exception = ex
        except Timeout as ex:
            self.output.write_log('Timeout while quering %s' % url, logging.ERROR)
            exception = ex
        except TooManyRedirects as ex:
            self.output.write_log('Too many redirects while quering %s' % url, logging.ERROR)
            exception = ex
        except Exception as ex:
            self.output.write_log('Unknown exception while quering %s' % url, logging.ERROR)
            exception = ex


        # print('cookies: %s' % self.cookies)
        print('session.cookies: %s' % self.session.cookies)
        # self.session.cookies = self.cookies

        return self._parse_response(url, response, exception)

    def signal_handler(self):
        """
        Signal hdndler
        :return:
        """
        # TODO: add saving status via pickle
        self.output.print_and_log('Signal caught. Stopping...', logging.WARNING)
        self.stop()
        exit(signal.SIGINT)

    def _calc_urls(self):
        # Calculations
        self.urls_count = len(self.urls)
        self.hosts_count = len(self.hosts)
        self.full_urls_count = len(self.urls) * len(self.hosts)
        self.output.args.urls_count = self.full_urls_count

    def start(self):
        """
        Start mulithreaded scan
        :return:
        """
        # Set signal handler
        gevent.signal(signal.SIGTERM, self.signal_handler)
        gevent.signal(signal.SIGINT, self.signal_handler)
        gevent.signal(signal.SIGQUIT, self.signal_handler)

        # ICMP scan
        if self.args.icmp:
            if geteuid() != 0:
                self.output.print_and_log('To use ICMP scan option you must run as root. Skipping ICMP scan', logging.WARNING)
            else:
                self.output.print_and_log('Starting ICMP scan.')
                self.hosts = helper.icmp_scan(self.hosts, self.args.timeout)
                self._calc_urls()
                self.output.print_and_log('After ICMP scan %i hosts %i urls loaded, %i urls to scan' %
                                          (self.hosts_count, self.urls_count, self.full_urls_count))

        # SYN scan
        if self.args.syn:
            if self.args.tor or self.args.proxy is not None:
                self.output.print_and_log('SYN scan via tor or proxy is impossible!', logging.WARNING)
                self.output.print_and_log('Stopping to prevent deanonymization!', logging.WARNING)
                exit(-1)

            if geteuid() != 0:
                self.output.print_and_log('To use SYN scan option you must run as root. Skipping SYN scan', logging.WARNING)
            else:
                self.output.print_and_log('Starting SYN scan.')
                self.hosts = helper.syn_scan(self.hosts, self.args.ports, self.args.timeout)
                self._calc_urls()
                self.output.print_and_log('After SYN scan %i hosts %i urls loaded, %i urls to scan' %
                                          (self.hosts_count, self.urls_count, self.full_urls_count))

        # Check threds count vs hosts count
        if self.args.threads > self.hosts_count:
            self.output.write_log('Too many threads! Fixing threads count to %i' % self.hosts_count, logging.WARNING)
            threads_count = self.hosts_count
        else:
            threads_count = self.args.threads

        # Output urls count
        self.output.args.urls_count = self.full_urls_count

        # Start workers
        self.workers = [spawn(self.worker, i) for i in range(threads_count)]

        # Fill and join queue
        [self.hosts_queue.put(host) for host in self.hosts]
        self.hosts_queue.join()

    def stop(self):
        """
        Stop scan
        :return:
        """
        # TODO: stop correctly
        gevent.killall(self.workers)

Esempio n. 31

Mostra file

class HttpScanner(object):
    def __init__(self, args):
        """
        Initialise HTTP scanner
        :param args:
        :return:
        """
        self.args = args
        self.output = HttpScannerOutput(args)
        self._init_scan_options()

        # Reading files
        self.output.write_log("Reading files and deduplicating.", logging.INFO)
        self.hosts = self._file_to_list(args.hosts)
        self.urls = self._file_to_list(args.urls)

        #
        self._calc_urls()
        out = 'Loaded %i hosts %i urls' % (self.hosts_count, self.urls_count)
        if self.args.ports is not None:
            out += ' %i ports' % len(self.args.ports)
        self.output.print_and_log(out)

        if self.args.ports is not None and not self.args.syn:
            new_hosts = []
            for host in self.hosts:
                for port in self.args.ports:
                    # print(host, port)
                    new_hosts.append(helper.generate_url(host, port))
            self.hosts = new_hosts

        #
        self._calc_urls()
        self.output.print_and_log('%i full urls to scan' %
                                  self.full_urls_count)

        # Queue and workers
        self.hosts_queue = JoinableQueue()
        self.workers = []

    def _file_to_list(self, filename, dedup=True):
        """
        Get list from file
        :param filename: file to read
        :return: list of lines
        """
        if not path.exists(filename) or not path.isfile(filename):
            self.output.print_and_log('File %s not found!' % filename,
                                      logging.ERROR)
            exit(-1)

        # Preparing lines list
        lines = filter(lambda line: line is not None and len(line) > 0,
                       open(filename).read().split('\n'))
        if len(lines) == 0:
            self.output.print_and_log('File %s is empty!' % filename,
                                      logging.ERROR)
            exit(-1)

        return helper.deduplicate(lines) if dedup else lines

    def _init_scan_options(self):
        # Session
        self.session = session()
        self.session.timeout = self.args.timeout
        self.session.verify = False

        # TODO: debug and check
        # self.session.mount("http://", HTTPAdapter(max_retries=self.args.max_retries))
        # self.session.mount("https://", HTTPAdapter(max_retries=self.args.max_retries))
        # http://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request
        # Max retries
        adapters.DEFAULT_RETRIES = self.args.max_retries

        # TOR
        if self.args.tor:
            self.output.write_log("TOR usage detected. Making some checks.")
            self.session.proxies = {
                'http': 'socks5://127.0.0.1:9050',
                'https': 'socks5://127.0.0.1:9050'
            }

            url = 'http://ifconfig.me/ip'
            real_ip, tor_ip = None, None

            # Ger real IP address
            try:
                real_ip = get(url).text.strip()
            except Exception as exception:
                self.output.print_and_log(
                    "Couldn't get real IP address. Check yout internet connection.",
                    logging.ERROR)
                self.output.write_log(str(exception), logging.ERROR)
                exit(-1)

            # Get TOR IP address
            try:
                tor_ip = self.session.get(url).text.strip()
            except Exception as exception:
                self.output.print_and_log(
                    "TOR socks proxy doesn't seem to be working.",
                    logging.ERROR)
                self.output.write_log(str(exception), logging.ERROR)
                exit(-1)

            # Show IP addresses
            self.output.print_and_log('Real IP: %s TOR IP: %s' %
                                      (real_ip, tor_ip))
            if real_ip == tor_ip:
                self.output.print_and_log(
                    "TOR doesn't work! Stop to be secure.", logging.ERROR)
                exit(-1)

        # Proxy
        if self.args.proxy is not None:
            self.session.proxies = {
                "https": self.args.proxy,
                "http": self.args.proxy
            }

        # Auth
        if self.args.auth is not None:
            items = self.args.auth.split(':')
            self.session.auth = (items[0], items[1])

        # Cookies
        self.cookies = {}
        if self.args.cookies is not None:
            self.cookies = Cookies.from_request(self.args.cookies)

        # Cookies from file
        if self.args.load_cookies is not None:
            if not path.exists(self.args.load_cookies) or not path.isfile(
                    self.args.load_cookies):
                self.output.print_and_log(
                    'Could not find cookie file: %s' % self.args.load_cookies,
                    logging.ERROR)
                exit(-1)

            self.cookies = MozillaCookieJar(self.args.load_cookies)
            self.cookies.load()

        self.session.cookies = self.cookies

        # User-Agent
        self.ua = UserAgent() if self.args.random_agent else None

    def worker(self, worker_id):
        self.output.write_log('Worker %i started.' % worker_id)
        while not self.hosts_queue.empty():
            host = self.hosts_queue.get()
            try:
                self.scan_host(worker_id, host)
            finally:
                self.output.write_log('Worker %i finished.' % worker_id)
                self.hosts_queue.task_done()

    def _head_available(self, host):
        """
        Determine if HEAD requests is allowed
        :param host:
        :return:
        """
        # Trying to use OPTIONS request
        try:
            response = self.session.options(host, headers=self._fill_headers())
            o = response.headers[
                'allow'] if 'allow' in response.headers else None
            if o is not None and o.find('HEAD') != -1:
                return True
        except:
            # TODO: fix
            pass

        try:
            return False if self.session.head(
                host,
                headers=self._fill_headers()).status_code == 405 else True
        except:
            # TODO: fix
            return False

    def scan_host(self, worker_id, host):
        # check if resolvable
        ip = helper.url_to_ip(host)
        if ip is None:
            self.output.write_log('Could not resolve %s  Skipping...' % host,
                                  logging.WARNING)
            self.output.urls_scanned += len(self.urls)
            return

        # Check for HEAD
        host_url = helper.host_to_url(host)
        head_available = False
        if self.args.head:
            head_available = self._head_available(host)
            if head_available:
                self.output.write_log('HEAD is supported for %s' % host)

        errors_count, urls_scanned = 0, 0
        for url in self.urls:
            full_url = urljoin(host_url, url)
            r = self.scan_url(full_url, head_available)
            urls_scanned += 1
            self.output.urls_scanned += 1

            # Output
            r['worker'] = worker_id
            self.output.write(**r)
            if r['exception'] is not None:
                errors_count += 1

            # Skip host on errors
            if self.args.skip is not None and errors_count == self.args.skip:
                self.output.write_log(
                    'Errors limit reached on %s Skipping other urls.' % host,
                    logging.WARNING)
                self.output.urls_scanned += len(self.urls) - urls_scanned
                break

        # cookies bugfix?
        self.session.cookies.clear()

    def _fill_headers(self):
        # Fill UserAgent in headers
        headers = {}
        if self.args.user_agent is not None:
            headers['User-agent'] = self.args.user_agent
        elif self.args.random_agent:
            headers['User-agent'] = self.ua.random

        # Fill Referer in headers
        if self.args.referer is not None:
            headers['Referer'] = self.args.referer

        return headers

    def _parse_response(self, url, response, exception):
        res = {'url': url, 'response': response, 'exception': exception}

        if response is None or exception is not None:
            res.update({
                'status': -1,
                'length': -1,
            })
            return res

        try:
            length = int(response.headers['content-length']
                         ) if 'content-length' in response.headers else len(
                             response.text)
        except Exception as exception:
            self.output.write_log(
                "Exception while getting content length for URL: %s Exception: %s"
                % (url, str(exception)), logging.ERROR)
            length = 0

        res.update({
            'status': response.status_code,
            'length': length,
        })
        return res

    def scan_url(self, url, use_head=False):
        self.output.write_log('Scanning %s' % url, logging.DEBUG)

        # Query URL and handle exceptions
        response, exception = None, None
        method = 'HEAD' if use_head else 'GET'
        try:
            # TODO: add support for user:password in URL
            response = self.session.request(
                method,
                url,
                headers=self._fill_headers(),
                allow_redirects=self.args.allow_redirects)
        except ConnectionError as ex:
            self.output.write_log('Connection error while quering %s' % url,
                                  logging.ERROR)
            exception = ex
        except HTTPError as ex:
            self.output.write_log('HTTP error while quering %s' % url,
                                  logging.ERROR)
            exception = ex
        except Timeout as ex:
            self.output.write_log('Timeout while quering %s' % url,
                                  logging.ERROR)
            exception = ex
        except TooManyRedirects as ex:
            self.output.write_log('Too many redirects while quering %s' % url,
                                  logging.ERROR)
            exception = ex
        except Exception as ex:
            self.output.write_log('Unknown exception while quering %s' % url,
                                  logging.ERROR)
            exception = ex

        # print('cookies: %s' % self.cookies)
        print('session.cookies: %s' % self.session.cookies)
        # self.session.cookies = self.cookies

        return self._parse_response(url, response, exception)

    def signal_handler(self):
        """
        Signal hdndler
        :return:
        """
        # TODO: add saving status via pickle
        self.output.print_and_log('Signal caught. Stopping...',
                                  logging.WARNING)
        self.stop()
        exit(signal.SIGINT)

    def _calc_urls(self):
        # Calculations
        self.urls_count = len(self.urls)
        self.hosts_count = len(self.hosts)
        self.full_urls_count = len(self.urls) * len(self.hosts)
        self.output.args.urls_count = self.full_urls_count

    def start(self):
        """
        Start mulithreaded scan
        :return:
        """
        # Set signal handler
        gevent.signal(signal.SIGTERM, self.signal_handler)
        gevent.signal(signal.SIGINT, self.signal_handler)
        gevent.signal(signal.SIGQUIT, self.signal_handler)

        # ICMP scan
        if self.args.icmp:
            if geteuid() != 0:
                self.output.print_and_log(
                    'To use ICMP scan option you must run as root. Skipping ICMP scan',
                    logging.WARNING)
            else:
                self.output.print_and_log('Starting ICMP scan.')
                self.hosts = helper.icmp_scan(self.hosts, self.args.timeout)
                self._calc_urls()
                self.output.print_and_log(
                    'After ICMP scan %i hosts %i urls loaded, %i urls to scan'
                    %
                    (self.hosts_count, self.urls_count, self.full_urls_count))

        # SYN scan
        if self.args.syn:
            if self.args.tor or self.args.proxy is not None:
                self.output.print_and_log(
                    'SYN scan via tor or proxy is impossible!',
                    logging.WARNING)
                self.output.print_and_log(
                    'Stopping to prevent deanonymization!', logging.WARNING)
                exit(-1)

            if geteuid() != 0:
                self.output.print_and_log(
                    'To use SYN scan option you must run as root. Skipping SYN scan',
                    logging.WARNING)
            else:
                self.output.print_and_log('Starting SYN scan.')
                self.hosts = helper.syn_scan(self.hosts, self.args.ports,
                                             self.args.timeout)
                self._calc_urls()
                self.output.print_and_log(
                    'After SYN scan %i hosts %i urls loaded, %i urls to scan' %
                    (self.hosts_count, self.urls_count, self.full_urls_count))

        # Check threds count vs hosts count
        if self.args.threads > self.hosts_count:
            self.output.write_log(
                'Too many threads! Fixing threads count to %i' %
                self.hosts_count, logging.WARNING)
            threads_count = self.hosts_count
        else:
            threads_count = self.args.threads

        # Output urls count
        self.output.args.urls_count = self.full_urls_count

        # Start workers
        self.workers = [spawn(self.worker, i) for i in range(threads_count)]

        # Fill and join queue
        [self.hosts_queue.put(host) for host in self.hosts]
        self.hosts_queue.join()

    def stop(self):
        """
        Stop scan
        :return:
        """
        # TODO: stop correctly
        gevent.killall(self.workers)

Esempio n. 32

Mostra file

File: textcrawler.py Progetto: dreamfrog/jophiel

class SimpleCrawler:

  USER_AGENT = 'SimpleCrawler/0.1'
  HEADERS = {
    'User-Agent': USER_AGENT,
    'Accept-Encoding': 'gzip',
    'Connection': 'keep-alive'
    }
  CONTENT_TYPE_PAT = re.compile(r'([^\s;]+)(.*charset=([^\s;]+))?', re.I)
  
  def __init__(self, starturl, index_html='', maxlevel=1,
               cookie_file=None, acldb=None, urldb=None, default_charset=None,
               delay=0, timeout=300, debug=0):
    (proto, self.hostport, _x, _y, _z) = urlsplit(starturl)
    assert proto == 'http'
    #Thread.__init__(self)
    self.debug = debug
    self.index_html = index_html
    if cookie_file:
      self.cookiejar = MozillaCookieJar(cookie_file)
      self.cookiejar.load()
    else:
      self.cookiejar = None
    self.robotstxt = RobotFileParser()
    self.robotstxt.set_url(urljoin(starturl, '/robots.txt'))
    self.robotstxt.read()
    self.conn = None
    self.urldb = urldb
    self.acldb = acldb
    self.curlevel = 0
    self.delay = delay
    self.timeout = timeout
    self.default_charset = default_charset
    if starturl.endswith('/'):
      starturl += self.index_html
    self.urls = [(starturl, maxlevel)]
    self.crawled = {}                   # 1:injected, 2:crawled
    return

  def accept_url(self, url):
    if url.endswith('/'):
      url += self.index_html
    if self.acldb and not self.acldb.allowed(url):
      return None
    return url
  
  def inject_url(self, url):
    if (not self.curlevel) or (not url) or (url in self.crawled): return False
    if not self.robotstxt.can_fetch(self.USER_AGENT, url):
      if self.debug:
        print >>stderr, 'DISALLOW: %r' % url
      return None
    if self.debug:
      print >>stderr, 'INJECT: %r' % url
    self.crawled[url] = 1
    self.urls.append((url, self.curlevel-1))
    return True

  def get1(self, url, maxretry=3, maxredirect=3):
    if self.debug:
      print >>stderr, 'GET: %r' % url
    # loop
    for rtry in range(maxredirect):
      # forge urllib2.Request object.
      req = Request(url)
      # add cookie headers if necessary.
      if self.cookiejar:
        self.cookiejar.add_cookie_header(req)
        headers = req.unredirected_hdrs
        headers.update(self.HEADERS)
      else:
        headers = self.HEADERS
      # get response.
      for ctry in range(maxretry):
        try:
          if not self.conn:
            print >>stderr, 'Making connection: %r...' % (self.hostport,)
            self.conn = HTTPConnection(self.hostport)
          self.conn.request('GET', req.get_selector().replace(' ',''), '', headers)
	  self.conn.sock.settimeout(self.timeout)
          resp = self.conn.getresponse()
          break
        except BadStatusLine, x:
          # connection closed unexpectedly
          print >>stderr, 'Connection closed unexpectedly.'
          # it restarts the connection...
          self.conn.close()
          self.conn = None
        except socket.error, x:
          # connection closed unexpectedly
          print >>stderr, 'Socket error:', x
          self.conn.close()
          self.conn = None
      else:

Esempio n. 33

Mostra file

class querier(object):
    proxy = None

    def _get_page(self, pagerequest):
        """Return the data for a page on scholar.google.com"""
        # Note that we include a sleep to avoid overloading the scholar server
        time.sleep(max((5, random.uniform(0, 5))))
        # resp = _SESSION.get(pagerequest, headers=_HEADERS, cookies=_COOKIES)
        req = Request(url=pagerequest, headers=_HEADERS)
        if self.proxy:
            r = self.proxy.get(pagerequest, options={})

            while not r['body']:
                # Error on request through the proxy
                print("Error while communicating, trying again ...")
                r = self.proxy.get(pagerequest, options={})

            return r['body']
        hdl = urlopen(req)

        return hdl.read()
        # if html.status_code == 200:
        #     return resp.text
        # else:
        #     raise Exception('Error: {0} {1}'.format(html.status_code, html.reason))

    def _get_soup(self, pagerequest):
        """Return the BeautifulSoup for a page on scholar.google.com"""
        html = self._get_page(pagerequest)
        return BeautifulSoup(html, 'html.parser')

    def _search_scholar_soup(self, soup):
        """Generator that returns Publication objects from the search page"""
        while True:
            for row in soup.find_all('div', 'gs_r'):
                # Small fix: leave out the first entry, which is the article being searched
                if row.find('div', class_='gs_ri') is None:
                    continue
                yield Publication(row, self, 'scholar')
            if soup.find(class_='gs_ico gs_ico_nav_next'):
                url = soup.find(class_='gs_ico gs_ico_nav_next').parent['href']
                soup = self._get_soup(_HOST + url)
            else:
                break

    def _search_citation_soup(self, soup):
        """Generator that returns Author objects from the author search page"""
        while True:
            for row in soup.find_all('div', 'gsc_1usr'):
                yield Author(row)
            next_button = soup.find(
                class_=
                'gs_btnPR gs_in_ib gs_btn_half gs_btn_lsb gs_btn_srt gsc_pgn_pnx'
            )
            if next_button and 'disabled' not in next_button.attrs:
                url = next_button['onclick'][17:-1]
                url = codecs.getdecoder("unicode_escape")(url)[0]
                soup = self._get_soup(_HOST + url)
            else:
                break

    def search_pubs_query(self, query, years=None):
        """Search by scholar query and return a generator of Publication objects"""
        url = _PUBSEARCH.format(requests.utils.quote(query.encode('utf8')))
        if years is not None:
            if isinstance(years, list):
                url += '&as_ylo=' + str(min(years)) + '&as_yhi=' + str(
                    max(years))
            if isinstance(years, int):
                url += '&as_ylo=' + str(years) + '&as_yhi=' + str(years)

        soup = self._get_soup(_HOST + url)

        return self._search_scholar_soup(soup)

    def search_author(self, name):
        """Search by author name and return a generator of Author objects"""
        url = _AUTHSEARCH.format(requests.utils.quote(name))
        soup = self._get_soup(_HOST + url)
        return self._search_citation_soup(soup)

    def search_keyword(self, keyword):
        """Search by keyword and return a generator of Author objects"""
        url = _KEYWORDSEARCH.format(requests.utils.quote(keyword))
        soup = self._get_soup(_HOST + url)
        return self._search_citation_soup(soup)

    def search_pubs_custom_url(self, url):
        """Search by custom URL and return a generator of Publication objects
        URL should be of the form '/scholar?q=...'"""
        soup = self._get_soup(_HOST + url)
        return self._search_scholar_soup(soup)

    def search_author_custom_url(self, url):
        """Search by custom URL and return a generator of Publication objects
        URL should be of the form '/citation?q=...'"""
        soup = self._get_soup(_HOST + url)
        return self._search_citation_soup(soup)

    def save_cookies(self):
        """
        This stores the latest cookies we're using to disk, for reuse in a
        later session.
        """
        try:
            self.cjar.save(self.cFile, ignore_discard=True)
            return True
        except Exception as msg:
            return False

    def set_proxy(self, token):
        from proxycrawl import ProxyCrawlAPI

        self.proxy = ProxyCrawlAPI({'token': token})

    def reset_proxy(self):
        self.proxy = None

    def __init__(self, cookie_file=''):
        self.cjar = MozillaCookieJar()
        self.cFile = cookie_file

        # If we have a cookie file, load it:
        if self.cFile and exists(self.cFile):
            try:
                self.cjar.load(self.cFile, ignore_discard=True)
            except Exception as msg:
                self.cjar = MozillaCookieJar()  # Just to be safe

        self.opener = build_opener(HTTPCookieProcessor(self.cjar))

    def __del__(self):
        self.save_cookies()

Esempio n. 34

Mostra file

def get_url(url,
            config,
            additional_headers=None,
            additional_query_string=None,
            post_data=None,
            fail_silent=False,
            no_cache=False,
            return_json_errors=[],
            return_final_url=False,
            cookie_file=None):

	response_content = ''
	request_hash = sha512(
	        (url + dumps(additional_headers) + dumps(additional_query_string) + dumps(post_data)).encode('utf-8')).hexdigest()

	final_url = url

	if xbmc_helper().get_bool_setting('debug_requests') is True:
		xbmc_helper().log_debug(
		        'get_url - url: {} headers {} query {} post {} no_cache {} silent {} request_hash {} return_json_errors {}, cookie_file',
		        url, additional_headers, additional_query_string, post_data, no_cache, fail_silent, request_hash, return_json_errors,
		        cookie_file)

	if no_cache is True:
		etags_data = None
	else:
		etags_data = get_etags_data(request_hash)

	try:

		headers = {
		        'Accept-Encoding': 'gzip, deflate',
		        'User-Agent': config['USER_AGENT'],
		        'Accept': '*/*',
		}

		if additional_headers is not None:
			headers.update(additional_headers)

		if config.get('http_headers', None) is not None:
			headers.update(config.get('http_headers', []))

		if etags_data is not None:
			headers.update({'If-None-Match': etags_data['etag']})

		if additional_query_string is not None:
			_url = compat._format('{}{}{}', url, '?' if url.find('?') == -1 else '&', urlencode(additional_query_string))
			url = _url
		if isinstance(post_data, dict):
			post_data = urlencode(post_data)

		cookie_processor = None
		cookie_jar = None
		if cookie_file is not None:
			cookie_jar = MozillaCookieJar(cookie_file)
			try:
				cookie_jar.load()
			except LoadError:
				xbmc_helper().log_debug('Failed to load from cookiefile {} with error {} - new session?', cookie_file, LoadError.strerror)
				pass
			cookie_processor = HTTPCookieProcessor(cookie_jar)

		if xbmc_helper().get_bool_setting('use_https_proxy') is True and xbmc_helper().get_text_setting(
		        'https_proxy_host') != '' and xbmc_helper().get_int_setting('https_proxy_port') != 0:

			proxy_uri = compat._format('{}:{}',
			                           xbmc_helper().get_text_setting('https_proxy_host'),
			                           xbmc_helper().get_text_setting('https_proxy_port'))

			xbmc_helper().log_debug('Using proxy uri {}', proxy_uri)
			prxy_handler = ProxyHandler({
			        'http': proxy_uri,
			        'https': proxy_uri,
			})
			if cookie_processor is None:
				install_opener(build_opener(prxy_handler))
			else:
				install_opener(build_opener(prxy_handler, cookie_processor))

		elif cookie_processor is not None:
			install_opener(build_opener(cookie_processor))

		if post_data is not None:
			request = Request(url, data=post_data.encode('utf-8'), headers=headers)
		else:
			request = Request(url, headers=headers)

		response = urlopen(request, timeout=40)

		if response.info().get('Content-Encoding') == 'gzip':
			response_content = compat._decode(GzipFile(fileobj=BytesIO(response.read())).read())
		else:
			response_content = compat._decode(response.read())

		if cookie_jar is not None:
			cookie_jar.save()

		final_url = response.geturl()
		_etag = response.info().get('etag', None)
		if no_cache is False and _etag is not None:
			set_etags_data(request_hash, _etag, response_content)

	except HTTPError as http_error:

		if http_error.code == 304 and etags_data.get('data', None) is not None:
			response_content = etags_data.get('data')
		else:
			try:
				if http_error.info().get('Content-Encoding') == 'gzip':
					error_body = compat._decode(GzipFile(fileobj=BytesIO(http_error.read())).read())
				else:
					error_body = compat._decode(http_error.read())

				xbmc_helper().log_debug('HTTP ERROR: {}', error_body)
				json_errors = loads(error_body)
				xbmc_helper().log_debug('JSON ERRORS: {}', json_errors)

				has_decoded_error = False
				if isinstance(json_errors, dict) and 'errors' not in json_errors.keys() and 'code' in json_errors.keys():
					json_errors = {'errors': [json_errors]}
				elif isinstance(json_errors, list) and len(json_errors) == 1 and isinstance(json_errors[0], dict):
					json_errors = {'errors': json_errors}
				err_str = str(http_error.code)
				return_errors = []

				if isinstance(json_errors, dict):
					for error in json_errors.get('errors', []):
						if 'msg' in error.keys():
							err_str = compat._format('{}|{}', err_str, error.get('msg'))
							has_decoded_error = True
						if 'code' in error.keys() and error['code'] in return_json_errors:
							return_errors.append(error['code'])
							has_decoded_error = True

				xbmc_helper().log_debug('return_json_errors {}', return_errors)

				if len(return_errors) > 0:
					response_content = dumps({'json_errors': return_errors})

				elif has_decoded_error is True:
					xbmc_helper().notification(
					        'Error',
					        err_str,
					)
					exit(0)

			except Exception:
				raise http_error

	except Exception as e:
		xbmc_helper().log_error('Failed to load url: {} headers {} post_data {} - Exception: {}', url, headers, post_data, e)

		if fail_silent is True:
			pass
		else:
			xbmc_helper().notification(compat._format(xbmc_helper().translation('ERROR'), 'URL Access'),
			                           compat._format(xbmc_helper().translation('MSG_NO_ACCESS_TO_URL'), str(url)))
			exit(0)

	if return_final_url:
		return final_url, response_content

	return response_content

Esempio n. 35

Mostra file

class Session(requests.Session):
    """
    Session for making API requests and interacting with the filesystem
    """

    def __init__(self):
        super(Session, self).__init__()
        self.trust_env = False
        cookie_file = os.path.expanduser('~/.deis/cookies.txt')
        cookie_dir = os.path.dirname(cookie_file)
        self.cookies = MozillaCookieJar(cookie_file)
        # Create the $HOME/.deis dir if it doesn't exist
        if not os.path.isdir(cookie_dir):
            os.mkdir(cookie_dir, 0700)
        # Load existing cookies if the cookies.txt exists
        if os.path.isfile(cookie_file):
            self.cookies.load()
            self.cookies.clear_expired_cookies()

    def git_root(self):
        """
        Return the absolute path from the git repository root

        If no git repository exists, raise an EnvironmentError
        """
        try:
            git_root = subprocess.check_output(
                ['git', 'rev-parse', '--show-toplevel'],
                stderr=subprocess.PIPE).strip('\n')
        except subprocess.CalledProcessError:
            raise EnvironmentError('Current directory is not a git repository')
        return git_root

    def get_formation(self):
        """
        Return the formation name for the current directory

        The formation is determined by parsing `git remote -v` output.
        If no formation is found, raise an EnvironmentError.
        """
        git_root = self.git_root()
        # try to match a deis remote
        remotes = subprocess.check_output(['git', 'remote', '-v'],
                                          cwd=git_root)
        m = re.match(r'^deis\W+(?P<url>\S+)\W+\(', remotes, re.MULTILINE)
        if not m:
            raise EnvironmentError(
                'Could not find deis remote in `git remote -v`')
        url = m.groupdict()['url']
        m = re.match('\S+:(?P<formation>[a-z0-9-]+)(.git)?', url)
        if not m:
            raise EnvironmentError("Could not parse: {url}".format(**locals()))
        return m.groupdict()['formation']

    formation = property(get_formation)

    def request(self, *args, **kwargs):
        """
        Issue an HTTP request with proper cookie handling including
        `Django CSRF tokens <https://docs.djangoproject.com/en/dev/ref/contrib/csrf/>`
        """
        for cookie in self.cookies:
            if cookie.name == 'csrftoken':
                if 'headers' in kwargs:
                    kwargs['headers']['X-CSRFToken'] = cookie.value
                else:
                    kwargs['headers'] = {'X-CSRFToken': cookie.value}
                break
        response = super(Session, self).request(*args, **kwargs)
        self.cookies.save()
        return response

Esempio n. 36

Mostra file

File: download_image.py Progetto: saiplanner/sentinel-1-pipeline

class bulk_downloader:
    def __init__(self, id, username, password, table_name):
        # List of files to download
        if id[:3] == 'S1A':
            self.files = [
                'https://datapool.asf.alaska.edu/GRD_HD/SA/{}.zip'.format(id)
            ]
        elif id[:3] == 'S1B':
            self.files = [
                'https://datapool.asf.alaska.edu/GRD_HD/SB/{}.zip'.format(id)
            ]
        else:
            print('no identified sensor: {}'.format(id))
            logging.error('sensor not identified: {}'.format(id))
            return

        self.username = username
        self.password = password
        self.table_name = table_name
        self.save_to = os.getenv('IMAGES_PATH')

        # Local stash of cookies so we don't always have to ask
        self.cookie_jar_path = os.path.join(
            os.path.dirname(os.path.abspath('__file__')),
            '.bulk_download_cookiejar.txt')
        self.cookie_jar = None

        self.asf_urs4 = {
            'url': 'https://urs.earthdata.nasa.gov/oauth/authorize',
            'client': 'BO_n7nTIlMljdvU6kRRB3g',
            'redir': 'https://vertex-retired.daac.asf.alaska.edu/services/urs4_token_request' \
        }

        # Make sure we can write it our current directory
        if os.access(os.getcwd(), os.W_OK) is False:
            print(
                'WARNING: Cannot write to current path! Check permissions for {0}'
                .format(os.getcwd()))
            exit(-1)

        # For SSL
        self.context = {}

        # Make sure cookie_jar is good to go!
        self.get_cookie()

        # summary
        self.total_bytes = 0
        self.total_time = 0
        self.cnt = 0
        self.success = []
        self.failed = []
        self.skipped = []

    # Get and validate a cookie
    def get_cookie(self):

        # remove the cookie_jar_path file if its older than a day
        date_created = cookie_creation_date()
        if date_created:
            dx = datetime.now() - date_created
            hour = dx.total_seconds() / (3600)
            if hour > 10:
                print('cookie greater than 10 hours so removing it')
                os.remove(self.cookie_jar_path)

        if os.path.isfile(self.cookie_jar_path):
            self.cookie_jar = MozillaCookieJar()
            self.cookie_jar.load(self.cookie_jar_path)

            # make sure cookie is still valid
            if self.check_cookie():
                print(' > Re-using previous cookie jar.')
                return True
            else:
                print(' > Could not validate old cookie Jar')

        # We don't have a valid cookie, prompt user or creds
        print('No existing URS cookie found, creating one')
        print('(Credentials will not be stored, saved or logged anywhere)')

        # Keep trying 'till user gets the right U:P
        while self.check_cookie() is False:
            self.get_new_cookie()

        return True

    # Validate cookie before we begin
    def check_cookie(self):

        if self.cookie_jar is None:
            print(' > Cookiejar is bunk: {0}'.format(self.cookie_jar))
            return False

        # File we know is valid, used to validate cookie
        file_check = 'https://urs.earthdata.nasa.gov/profile'

        # Apply custom Redirect Hanlder
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                              HTTPHandler(), HTTPSHandler(**self.context))
        install_opener(opener)

        # Attempt a HEAD request
        request = Request(file_check)
        request.get_method = lambda: 'HEAD'
        try:
            print(' > attempting to download {0}'.format(file_check))
            response = urlopen(request, timeout=30)
            resp_code = response.getcode()
            # Make sure we're logged in
            if not self.check_cookie_is_logged_in(self.cookie_jar):
                return False

            # Save cookiejar
            self.cookie_jar.save(self.cookie_jar_path)

        except HTTPError:
            # If we ge this error, again, it likely means the user has not agreed to current EULA
            print('\nIMPORTANT: ')
            print(
                'Your user appears to lack permissions to download data from the ASF Datapool.'
            )
            print(
                '\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov'
            )
            exit(-1)

        # This return codes indicate the USER has not been approved to download the data
        if resp_code in (300, 301, 302, 303):
            try:
                redir_url = response.info().getheader('Location')
            except AttributeError:
                redir_url = response.getheader('Location')

            # Funky Test env:
            if ('vertex-retired.daac.asf.alaska.edu' in redir_url
                    and 'test' in self.asf_urs4['redir']):
                print("Cough, cough. It's dusty in this test env!")
                return True

            print('Redirect ({0}) occurred, invalid cookie value!'.format(
                resp_code))
            return False

        # These are successes!
        if resp_code in (200, 307):
            return True

        return False

    def get_new_cookie(self):
        # Start by prompting user to input their credentials

        new_username = self.username
        new_password = self.password

        # Build URS4 Cookie request
        auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4['client'] + '&redirect_uri=' + \
                          self.asf_urs4['redir'] + '&response_type=code&state='

        try:
            # python2
            user_pass = base64.b64encode(
                bytes(new_username + ':' + new_password))
        except TypeError:
            # python3
            user_pass = base64.b64encode(
                bytes(new_username + ':' + new_password, 'utf-8'))
            user_pass = user_pass.decode('utf-8')

        # Authenticate against URS, grab all the cookies
        self.cookie_jar = MozillaCookieJar()
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                              HTTPHandler(), HTTPSHandler(**self.context))
        request = Request(
            auth_cookie_url,
            headers={"Authorization": "Basic {0}".format(user_pass)})

        # Watch out cookie rejection!
        try:
            response = opener.open(request)
        except HTTPError as e:
            if e.code == 401:
                print(
                    " > Username and Password combo was not successful. Please try again."
                )
                return False
            else:
                # If an error happens here, the user most likely has not confirmed EULA.
                print(
                    "\nIMPORTANT: There was an error obtaining a download cookie!"
                )
                print(
                    "Your user appears to lack permission to download data from the ASF Datapool."
                )
                print(
                    "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
                )
                exit(-1)
        except URLError as e:
            print(
                "\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. "
            )
            print("Try cookie generation later.")
            exit(-1)

        # Did we get a cookie?
        if self.check_cookie_is_logged_in(self.cookie_jar):
            # COOKIE SUCCESS!
            print('cookie saved')
            self.cookie_jar.save(self.cookie_jar_path)
            save_cookie_creation_date()
            return True

        # if we aren't successful generating the cookie, nothing will work. Stop here!
        print(
            "WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again."
        )
        print("Response was {0}.".format(response.getcode()))
        print(
            "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
        )
        exit(-1)

    # make sure we're logged into URS
    def check_cookie_is_logged_in(self, cj):
        for cookie in cj:
            if cookie.name == 'urs_user_already_logged':
                # Only get this cookie if we logged in successfully!
                return True
        return False

    # Download the file
    def download_file_with_cookiejar(self,
                                     url,
                                     file_count,
                                     total,
                                     recursion=False):
        # see if we've already download this file and if it is that it is the correct size
        download_file = os.path.basename(url).split('?')[0]
        if os.path.isfile(os.path.join(self.save_to, download_file)):
            try:
                request = Request(url)
                request.get_method = lambda: 'HEAD'
                response = urlopen(request, timeout=30)
                remote_size = self.get_total_size(response)
                # Check that we were able to derive a size.
                if remote_size:
                    local_size = os.path.getsize(
                        os.path.join(self.save_to, download_file))
                    if remote_size < (local_size +
                                      (local_size * .01)) and remote_size > (
                                          local_size - (local_size * .01)):
                        print(
                            " > Download file {0} exists! \n > Skipping download of {1}. "
                            .format(download_file, url))
                        return None, None
                    # partial file size wasn't full file size, lets blow away the chunk and start again
                    print(
                        " > Found {0} but it wasn't fully downloaded. Removing file and downloading again."
                        .format(download_file))
                    os.remove(os.path.join(self.save_to, download_file))

            except ssl.CertificateError as e:
                print(" > ERROR: {0}".format(e))
                print(
                    " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag"
                )
                return False, None

            except HTTPError as e:
                if e.code == 401:
                    print(
                        " > IMPORTANT: Your user may not have permission to download this type of data!"
                    )
                else:
                    print(" > Unknown Error, Could not get file HEAD: {0}".
                          format(e))

            except URLError as e:
                print("URL Error (from HEAD): {0}, {1}".format(e.reason, url))
                if "ssl.c" in "{0}".format(e.reason):
                    print(
                        "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error."
                    )
                return False, None

        # attempt https connection
        try:
            request = Request(url)
            response = urlopen(request, timeout=30)

            # Watch for redirect
            if response.geturl() != url:

                # See if we were redirect BACK to URS for re-auth.
                if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl(
                ):

                    if recursion:
                        print(
                            " > Entering seemingly endless auth loop. Aborting. "
                        )
                        return False, None

                    # make this easier. If there is no app_type=401, add it
                    new_auth_url = response.geturl()
                    if "app_type" not in new_auth_url:
                        new_auth_url += "&app_type=401"

                    print(
                        " > While attempting to download {0}....".format(url))
                    print(" > Need to obtain new cookie from {0}".format(
                        new_auth_url))
                    old_cookies = [cookie.name for cookie in self.cookie_jar]
                    opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                                          HTTPHandler(),
                                          HTTPSHandler(**self.context))
                    request = Request(new_auth_url)
                    try:
                        response = opener.open(request)
                        for cookie in self.cookie_jar:
                            if cookie.name not in old_cookies:
                                print(" > Saved new cookie: {0}".format(
                                    cookie.name))

                                # A little hack to save session cookies
                                if cookie.discard:
                                    cookie.expires = int(
                                        time.time()) + 60 * 60 * 24 * 30
                                    print(
                                        " > Saving session Cookie that should have been discarded! "
                                    )

                        self.cookie_jar.save(self.cookie_jar_path,
                                             ignore_discard=True,
                                             ignore_expires=True)
                    except HTTPError as e:
                        print("HTTP Error: {0}, {1}".format(e.code, url))
                        return False, None

                    # Okay, now we have more cookies! Lets try again, recursively!
                    print(" > Attempting download again with new cookies!")
                    return self.download_file_with_cookiejar(url,
                                                             file_count,
                                                             total,
                                                             recursion=True)

                print(
                    " > 'Temporary' Redirect download @ Remote archive:\n > {0}"
                    .format(response.geturl()))

            # seems to be working
            print("({0}/{1}) Downloading {2}".format(file_count, total, url))

            # Open our local file for writing and build status bar
            tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.')
            self.chunk_read(response, tf, report_hook=self.chunk_report)

            # Reset download status
            sys.stdout.write('\n')

            tempfile_name = tf.name
            tf.close()

        # handle errors
        except HTTPError as e:
            print("HTTP Error: {0}, {1}".format(e.code, url))

            if e.code == 401:
                print(
                    " > IMPORTANT: Your user does not have permission to download this type of data!"
                )

            if e.code == 403:
                print(" > Got a 403 Error trying to download this file.  ")
                print(
                    " > You MAY need to log in this app and agree to a EULA. ")

            return False, None

        except URLError as e:
            print("URL Error (from GET): {0}, {1}, {2}".format(
                e, e.reason, url))
            if "ssl.c" in "{0}".format(e.reason):
                print(
                    "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error."
                )
            return False, None

        except ssl.CertificateError as e:
            print(" > ERROR: {0}".format(e))
            print(
                " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag"
            )
            return False, None

        # Return the file size
        shutil.copy(tempfile_name, os.path.join(self.save_to, download_file))
        os.remove(tempfile_name)
        file_size = self.get_total_size(response)
        actual_size = os.path.getsize(os.path.join(self.save_to,
                                                   download_file))
        if file_size is None:
            # We were unable to calculate file size.
            file_size = actual_size
        return actual_size, file_size

    #  chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    def chunk_report(self, bytes_so_far, file_size):
        if file_size is not None:
            percent = float(bytes_so_far) / file_size
            percent = round(percent * 100, 2)
            sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" %
                             (bytes_so_far, file_size, percent))
        else:
            # We couldn't figure out the size.
            sys.stdout.write(" > Downloaded %d of unknown Size\r" %
                             (bytes_so_far))

    #  chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    def chunk_read(self,
                   response,
                   local_file,
                   chunk_size=8192,
                   report_hook=None):
        file_size = self.get_total_size(response)
        bytes_so_far = 0

        while 1:
            try:
                chunk = response.read(chunk_size)
            except:
                sys.stdout.write("\n > There was an error reading data. \n")
                break

            try:
                local_file.write(chunk)
            except TypeError:
                local_file.write(chunk.decode(local_file.encoding))
            bytes_so_far += len(chunk)

            if not chunk:
                break

            if report_hook:
                report_hook(bytes_so_far, file_size)

        return bytes_so_far

    def get_total_size(self, response):
        try:
            file_size = response.info().getheader('Content-Length').strip()
        except AttributeError:
            try:
                file_size = response.getheader('Content-Length').strip()
            except AttributeError:
                print("> Problem getting size")
                return None

        return int(file_size)

    # Download all the files in the list
    def download_files(self):
        for file_name in self.files:

            # make sure we haven't ctrl+c'd or some other abort trap
            if abort == True:
                raise SystemExit

            # download counter
            self.cnt += 1

            # set a timer
            start = time.time()

            # run download
            size, total_size = self.download_file_with_cookiejar(
                file_name, self.cnt, len(self.files))

            # calculte rate
            end = time.time()

            # stats:
            if size is None:
                self.skipped.append(file_name)
            # Check to see that the download didn't error and is the correct size
            elif size is not False and (total_size <
                                        (size + (size * .01)) and total_size >
                                        (size - (size * .01))):
                # Download was good!
                elapsed = end - start
                elapsed = 1.0 if elapsed < 1 else elapsed
                rate = (size / 1024**2) / elapsed

                print(
                    "Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec"
                    .format(size, elapsed, rate))

                # add up metrics
                self.total_bytes += size
                self.total_time += elapsed
                self.success.append({'file': file_name, 'size': size})

            else:
                print("There was a problem downloading {0}".format(file_name))
                self.failed.append(file_name)

    def print_summary(self, rid):
        # Print summary:
        print("\n\nDownload Summary ")
        print(
            "--------------------------------------------------------------------------------"
        )
        print("  Successes: {0} files, {1} bytes ".format(
            len(self.success), self.total_bytes))
        for success_file in self.success:
            print("           - {0}  {1:.2f}MB".format(
                success_file['file'], (success_file['size'] / 1024.0**2)))
        if len(self.failed) > 0:
            print("  Failures: {0} files".format(len(self.failed)))
            for failed_file in self.failed:
                print("          - {0}".format(failed_file))
        if len(self.skipped) > 0:
            print("  Skipped: {0} files".format(len(self.skipped)))
            for skipped_file in self.skipped:
                print("          - {0}".format(skipped_file))
        if len(self.success) > 0:
            print("  Average Rate: {0:.2f}MB/sec".format(
                (self.total_bytes / 1024.0**2) / self.total_time))
        print(
            "--------------------------------------------------------------------------------\n\n"
        )

        # since we are downloading one file at a time!
        if len(self.success) > 0:
            try:
                conn, cur = connect_to_db()
                cur.execute(
                    "UPDATE {} SET downloaded=TRUE WHERE rid={}".format(
                        self.table_name, rid))
                conn.commit()
                close_connection(conn, cur)
            except Exception as e:
                print('error inserting into db because {}'.format(e))
                logging.error(e)

        # ideally should not end up here but anyway
        if len(self.skipped) > 0:
            try:
                conn, cur = connect_to_db()
                cur.execute(
                    "UPDATE {} SET downloaded=FALSE WHERE rid={}".format(
                        self.table_name, rid))
                conn.commit()
                close_connection(conn, cur)
            except Exception as e:
                print('error inserting into db because {}'.format(e))
                logging.error(e)

Esempio n. 37

Mostra file

 def load_cookies_from_mozilla(self, filename):
     ns_cookiejar = MozillaCookieJar()
     ns_cookiejar.load(filename, ignore_discard=True, ignore_expires=True)
     return ns_cookiejar

Esempio n. 38

Mostra file

File: lsession.py Progetto: khalidhsu/qqmsgbak

class LSession():
    def __init__(self,cookiefile = None, proxy = None, timeout = 10, retime = 30,sleept = 3):
        self.timeout=timeout
        self.retime=retime
        self.sleept=sleept
        #proxy '1.234.77.96:80'
        if cookiefile == None:
            self.cookiejar = CookieJar()
        else:
            self.cookiejar = MozillaCookieJar(filename=cookiefile)
            #self.cookiejar =cookielib.LWPCookieJar(filename=cookiefile)
            if not os.path.isfile(cookiefile):
                open(cookiefile, 'w').write(MozillaCookieJar.header)
                #open(cookiefile, 'w').write('#abc\n')
                pass
            self.cookiejar.load(filename=cookiefile,ignore_discard=True)
            #print "ck:",self.cookiejar 
        self.cookie_processor = HTTPCookieProcessor(self.cookiejar)
        self.opener=build_opener(urllib2.HTTPRedirectHandler(),self.cookie_processor)
        if proxy : self.opener.add_handler(ProxyHandler({"http" : proxy}))
        #for posting a file
        try:
            import MultipartPostHandler #for posting a file,need installed
            self.opener.add_handler(MultipartPostHandler.MultipartPostHandler())
        except NameError as e:print e
            
        self.response=None
        self.request=None
        self.header=[]
    def add_header(self,k,v) : self.header.append((k,v))

    def build_request(self,url,params=None):
        self.request=Request(url,params)
        if not self.response is None:self.request.add_header('Referer',self.url())
        #self.request.add_header('User-Agent',
        #                        'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 \
        #                        (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25')
        #NokiaE63/UC Browser7.9.0.102/28/355/UCWEB
        #self.request.add_header('User-Agent','NokiaE63/UC Browser7.9.0.102/28/355/UCWEB')
        self.request.add_header('User-Agent','Opera/9.80 (J2ME/MIDP; Opera Mini/1.0/886; U; en) Presto/2.4.15')
        while  self.header :
            _k,_v = self.header.pop()
            self.request.add_header(_k,_v)
        #Mobile/7B405
        #self.request.add_header('User-Agent','Mobile/7B405')
        return self.request

    def __del__(self) : self.save_cookie()

    def urlopen(self,req):
        retime=self.retime
        while retime > 0:
            try:
                return self.opener.open(req,timeout=self.timeout)
            except Exception as e:
                retime -= 1
                traceback.print_exc(file=sys.stdout)
                print 'Wait and retry...%d'%(self.retime-retime)
                sleep(self.sleept)

    def savefile(self,filename,url):
        self.response=self.urlopen(self.build_request(url))
        CHUNK = 50 * 1024
        with open(filename, 'wb') as fp:
            while True:
                chunk = self.response.read(CHUNK)
                if not chunk: break
                fp.write(chunk)
    def post(self,url,post_data):
        self.response=self.urlopen(self.build_request(url,urlencode(post_data)))
        return  self.response
    def post_raw(self,url,post_data):
        self.response=self.urlopen(self.build_request(url,post_data))
        return  self.response

    def post_file(self,url,params):
        self.response=self.urlopen(self.build_request(url, params))
        return  self.response
    def get(self,url):
        self.response=self.urlopen(self.build_request(url))
        #import urllib
        #print  urllib.urlopen('http://mrozekma.com/302test.php').geturl()
        # import requests
        # r=requests.get(url)
        # print r.content
        return  self.response
    def text(self,dec='gbk',enc='utf') :
        return self.response.read().decode(dec).encode(enc)
    def url(self) : return self.response.url
    def logout(self) : self.cookiejar.clear()
    def Verify_proxy(self) :
        pass
    def show_cookie(self):
        #print self.cookiejar
        for i in self.cookiejar:
            print i
    def save_cookie(self):
        # if  hasattr(self.cookiejar,'save'):#in case non cookiejar
        #     self.cookiejar.save(ignore_discard=True, ignore_expires=False)
        try: 
            self.cookiejar.save(ignore_discard=True, ignore_expires=False)
        except Exception as e: 
            traceback.print_exc(file=sys.stdout)

Esempio n. 39

Mostra file

File: qzone.py Progetto: jtbuaa/qzone-spider

def gen_login_cookie():
    cookie = MozillaCookieJar()
    cookie.load('cookies.txt', ignore_discard=True, ignore_expires=True)
    return cookie

Esempio n. 40

Mostra file

File: dwn.py Progetto: AndriiKolotii/ukrainian_data_cube

class bulk_downloader:
    def __init__(self):
        # List of files to download
        self.files = [
            "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200626T041209_20200626T041234_033183_03D816_7063.zip",
            "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200626T041144_20200626T041209_033183_03D816_0D5E.zip",
            "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200614T041208_20200614T041233_033008_03D2C4_2DC4.zip",
            "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200614T041143_20200614T041208_033008_03D2C4_584D.zip",
            "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200602T041208_20200602T041233_032833_03CD92_6A43.zip",
            "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200602T041143_20200602T041208_032833_03CD92_5A25.zip"
        ]

        # Local stash of cookies so we don't always have to ask
        self.cookie_jar_path = os.path.join(os.path.expanduser('~'),
                                            ".bulk_download_cookiejar.txt")
        self.cookie_jar = None

        self.asf_urs4 = {
            'url': 'https://urs.earthdata.nasa.gov/oauth/authorize',
            'client': 'BO_n7nTIlMljdvU6kRRB3g',
            'redir': 'https://auth.asf.alaska.edu/login'
        }

        # Make sure we can write it our current directory
        if os.access(os.getcwd(), os.W_OK) is False:
            print(
                "WARNING: Cannot write to current path! Check permissions for {0}"
                .format(os.getcwd()))
            exit(-1)

        # For SSL
        self.context = {}

        # Check if user handed in a Metalink or CSV:
        if len(sys.argv) > 0:
            download_files = []
            input_files = []
            for arg in sys.argv[1:]:
                if arg == '--insecure':
                    try:
                        ctx = ssl.create_default_context()
                        ctx.check_hostname = False
                        ctx.verify_mode = ssl.CERT_NONE
                        self.context['context'] = ctx
                    except AttributeError:
                        # Python 2.6 won't complain about SSL Validation
                        pass

                elif arg.endswith('.metalink') or arg.endswith('.csv'):
                    if os.path.isfile(arg):
                        input_files.append(arg)
                        if arg.endswith('.metalink'):
                            new_files = self.process_metalink(arg)
                        else:
                            new_files = self.process_csv(arg)
                        if new_files is not None:
                            for file_url in (new_files):
                                download_files.append(file_url)
                    else:
                        print(
                            " > I cannot find the input file you specified: {0}"
                            .format(arg))
                else:
                    print(
                        " > Command line argument '{0}' makes no sense, ignoring."
                        .format(arg))

            if len(input_files) > 0:
                if len(download_files) > 0:
                    print(" > Processing {0} downloads from {1} input files. ".
                          format(len(download_files), len(input_files)))
                    self.files = download_files
                else:
                    print(
                        " > I see you asked me to download files from {0} input files, but they had no downloads!"
                        .format(len(input_files)))
                    print(" > I'm super confused and exiting.")
                    exit(-1)

        # Make sure cookie_jar is good to go!
        self.get_cookie()

        # summary
        self.total_bytes = 0
        self.total_time = 0
        self.cnt = 0
        self.success = []
        self.failed = []
        self.skipped = []

    # Get and validate a cookie
    def get_cookie(self):
        if os.path.isfile(self.cookie_jar_path):
            self.cookie_jar = MozillaCookieJar()
            self.cookie_jar.load(self.cookie_jar_path)

            # make sure cookie is still valid
            if self.check_cookie():
                print(" > Re-using previous cookie jar.")
                return True
            else:
                print(" > Could not validate old cookie Jar")

        # We don't have a valid cookie, prompt user or creds
        print(
            "No existing URS cookie found, please enter Earthdata username & password:"******"(Credentials will not be stored, saved or logged anywhere)")

        # Keep trying 'till user gets the right U:P
        while self.check_cookie() is False:
            self.get_new_cookie()

        return True

    # Validate cookie before we begin
    def check_cookie(self):

        if self.cookie_jar is None:
            print(" > Cookiejar is bunk: {0}".format(self.cookie_jar))
            return False

        # File we know is valid, used to validate cookie
        file_check = 'https://urs.earthdata.nasa.gov/profile'

        # Apply custom Redirect Hanlder
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                              HTTPHandler(), HTTPSHandler(**self.context))
        install_opener(opener)

        # Attempt a HEAD request
        request = Request(file_check)
        request.get_method = lambda: 'HEAD'
        try:
            print(" > attempting to download {0}".format(file_check))
            response = urlopen(request, timeout=30)
            resp_code = response.getcode()
            # Make sure we're logged in
            if not self.check_cookie_is_logged_in(self.cookie_jar):
                return False

            # Save cookiejar
            self.cookie_jar.save(self.cookie_jar_path)

        except HTTPError:
            # If we ge this error, again, it likely means the user has not agreed to current EULA
            print("\nIMPORTANT: ")
            print(
                "Your user appears to lack permissions to download data from the ASF Datapool."
            )
            print(
                "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
            )
            exit(-1)

        # This return codes indicate the USER has not been approved to download the data
        if resp_code in (300, 301, 302, 303):
            try:
                redir_url = response.info().getheader('Location')
            except AttributeError:
                redir_url = response.getheader('Location')

            #Funky Test env:
            if ("vertex-retired.daac.asf.alaska.edu" in redir_url
                    and "test" in self.asf_urs4['redir']):
                print("Cough, cough. It's dusty in this test env!")
                return True

            print("Redirect ({0}) occured, invalid cookie value!".format(
                resp_code))
            return False

        # These are successes!
        if resp_code in (200, 307):
            return True

        return False

    def get_new_cookie(self):
        # Start by prompting user to input their credentials

        # Another Python2/3 workaround
        try:
            new_username = raw_input("Username: "******"Username: "******"Password (will not be displayed): ")

        # Build URS4 Cookie request
        auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4[
            'client'] + '&redirect_uri=' + self.asf_urs4[
                'redir'] + '&response_type=code&state='

        try:
            #python2
            user_pass = base64.b64encode(
                bytes(new_username + ":" + new_password))
        except TypeError:
            #python3
            user_pass = base64.b64encode(
                bytes(new_username + ":" + new_password, "utf-8"))
            user_pass = user_pass.decode("utf-8")

        # Authenticate against URS, grab all the cookies
        self.cookie_jar = MozillaCookieJar()
        opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                              HTTPHandler(), HTTPSHandler(**self.context))
        request = Request(
            auth_cookie_url,
            headers={"Authorization": "Basic {0}".format(user_pass)})

        # Watch out cookie rejection!
        try:
            response = opener.open(request)
        except HTTPError as e:
            if "WWW-Authenticate" in e.headers and "Please enter your Earthdata Login credentials" in e.headers[
                    "WWW-Authenticate"]:
                print(
                    " > Username and Password combo was not successful. Please try again."
                )
                return False
            else:
                # If an error happens here, the user most likely has not confirmed EULA.
                print(
                    "\nIMPORTANT: There was an error obtaining a download cookie!"
                )
                print(
                    "Your user appears to lack permission to download data from the ASF Datapool."
                )
                print(
                    "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
                )
                exit(-1)
        except URLError as e:
            print(
                "\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. "
            )
            print("Try cookie generation later.")
            exit(-1)

        # Did we get a cookie?
        if self.check_cookie_is_logged_in(self.cookie_jar):
            #COOKIE SUCCESS!
            self.cookie_jar.save(self.cookie_jar_path)
            return True

        # if we aren't successful generating the cookie, nothing will work. Stop here!
        print(
            "WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again."
        )
        print("Response was {0}.".format(response.getcode()))
        print(
            "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov"
        )
        exit(-1)

    # make sure we're logged into URS
    def check_cookie_is_logged_in(self, cj):
        for cookie in cj:
            if cookie.name == 'urs_user_already_logged':
                # Only get this cookie if we logged in successfully!
                return True

        return False

    # Download the file
    def download_file_with_cookiejar(self,
                                     url,
                                     file_count,
                                     total,
                                     recursion=False):
        # see if we've already download this file and if it is that it is the correct size
        download_file = os.path.basename(url).split('?')[0]
        if os.path.isfile(download_file):
            try:
                request = Request(url)
                request.get_method = lambda: 'HEAD'
                response = urlopen(request, timeout=30)
                remote_size = self.get_total_size(response)
                # Check that we were able to derive a size.
                if remote_size:
                    local_size = os.path.getsize(download_file)
                    if remote_size < (local_size +
                                      (local_size * .01)) and remote_size > (
                                          local_size - (local_size * .01)):
                        print(
                            " > Download file {0} exists! \n > Skipping download of {1}. "
                            .format(download_file, url))
                        return None, None
                    #partial file size wasn't full file size, lets blow away the chunk and start again
                    print(
                        " > Found {0} but it wasn't fully downloaded. Removing file and downloading again."
                        .format(download_file))
                    os.remove(download_file)

            except ssl.CertificateError as e:
                print(" > ERROR: {0}".format(e))
                print(
                    " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag"
                )
                return False, None

            except HTTPError as e:
                if e.code == 401:
                    print(
                        " > IMPORTANT: Your user may not have permission to download this type of data!"
                    )
                else:
                    print(" > Unknown Error, Could not get file HEAD: {0}".
                          format(e))

            except URLError as e:
                print("URL Error (from HEAD): {0}, {1}".format(e.reason, url))
                if "ssl.c" in "{0}".format(e.reason):
                    print(
                        "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error."
                    )
                return False, None

        # attempt https connection
        try:
            request = Request(url)
            response = urlopen(request, timeout=30)

            # Watch for redirect
            if response.geturl() != url:

                # See if we were redirect BACK to URS for re-auth.
                if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl(
                ):

                    if recursion:
                        print(
                            " > Entering seemingly endless auth loop. Aborting. "
                        )
                        return False, None

                    # make this easier. If there is no app_type=401, add it
                    new_auth_url = response.geturl()
                    if "app_type" not in new_auth_url:
                        new_auth_url += "&app_type=401"

                    print(
                        " > While attempting to download {0}....".format(url))
                    print(" > Need to obtain new cookie from {0}".format(
                        new_auth_url))
                    old_cookies = [cookie.name for cookie in self.cookie_jar]
                    opener = build_opener(HTTPCookieProcessor(self.cookie_jar),
                                          HTTPHandler(),
                                          HTTPSHandler(**self.context))
                    request = Request(new_auth_url)
                    try:
                        response = opener.open(request)
                        for cookie in self.cookie_jar:
                            if cookie.name not in old_cookies:
                                print(" > Saved new cookie: {0}".format(
                                    cookie.name))

                                # A little hack to save session cookies
                                if cookie.discard:
                                    cookie.expires = int(
                                        time.time()) + 60 * 60 * 24 * 30
                                    print(
                                        " > Saving session Cookie that should have been discarded! "
                                    )

                        self.cookie_jar.save(self.cookie_jar_path,
                                             ignore_discard=True,
                                             ignore_expires=True)
                    except HTTPError as e:
                        print("HTTP Error: {0}, {1}".format(e.code, url))
                        return False, None

                    # Okay, now we have more cookies! Lets try again, recursively!
                    print(" > Attempting download again with new cookies!")
                    return self.download_file_with_cookiejar(url,
                                                             file_count,
                                                             total,
                                                             recursion=True)

                print(
                    " > 'Temporary' Redirect download @ Remote archive:\n > {0}"
                    .format(response.geturl()))

            # seems to be working
            print("({0}/{1}) Downloading {2}".format(file_count, total, url))

            # Open our local file for writing and build status bar
            tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.')
            self.chunk_read(response, tf, report_hook=self.chunk_report)

            # Reset download status
            sys.stdout.write('\n')

            tempfile_name = tf.name
            tf.close()

        #handle errors
        except HTTPError as e:
            print("HTTP Error: {0}, {1}".format(e.code, url))

            if e.code == 401:
                print(
                    " > IMPORTANT: Your user does not have permission to download this type of data!"
                )

            if e.code == 403:
                print(" > Got a 403 Error trying to download this file.  ")
                print(
                    " > You MAY need to log in this app and agree to a EULA. ")

            return False, None

        except URLError as e:
            print("URL Error (from GET): {0}, {1}, {2}".format(
                e, e.reason, url))
            if "ssl.c" in "{0}".format(e.reason):
                print(
                    "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error."
                )
            return False, None

        except socket.timeout as e:
            print(" > timeout requesting: {0}; {1}".format(url, e))
            return False, None

        except ssl.CertificateError as e:
            print(" > ERROR: {0}".format(e))
            print(
                " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag"
            )
            return False, None

        # Return the file size
        shutil.copy(tempfile_name, download_file)
        os.remove(tempfile_name)
        file_size = self.get_total_size(response)
        actual_size = os.path.getsize(download_file)
        if file_size is None:
            # We were unable to calculate file size.
            file_size = actual_size
        return actual_size, file_size

    def get_redirect_url_from_error(self, error):
        find_redirect = re.compile(r"id=\"redir_link\"\s+href=\"(\S+)\"")
        print("error file was: {}".format(error))
        redirect_url = find_redirect.search(error)
        if redirect_url:
            print("Found: {0}".format(redirect_url.group(0)))
            return (redirect_url.group(0))

        return None

    #  chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    def chunk_report(self, bytes_so_far, file_size):
        if file_size is not None:
            percent = float(bytes_so_far) / file_size
            percent = round(percent * 100, 2)
            sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" %
                             (bytes_so_far, file_size, percent))
        else:
            # We couldn't figure out the size.
            sys.stdout.write(" > Downloaded %d of unknown Size\r" %
                             (bytes_so_far))

    #  chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    def chunk_read(self,
                   response,
                   local_file,
                   chunk_size=8192,
                   report_hook=None):
        file_size = self.get_total_size(response)
        bytes_so_far = 0

        while 1:
            try:
                chunk = response.read(chunk_size)
            except:
                sys.stdout.write("\n > There was an error reading data. \n")
                break

            try:
                local_file.write(chunk)
            except TypeError:
                local_file.write(chunk.decode(local_file.encoding))
            bytes_so_far += len(chunk)

            if not chunk:
                break

            if report_hook:
                report_hook(bytes_so_far, file_size)

        return bytes_so_far

    def get_total_size(self, response):
        try:
            file_size = response.info().getheader('Content-Length').strip()
        except AttributeError:
            try:
                file_size = response.getheader('Content-Length').strip()
            except AttributeError:
                print("> Problem getting size")
                return None

        return int(file_size)

    # Get download urls from a metalink file
    def process_metalink(self, ml_file):
        print("Processing metalink file: {0}".format(ml_file))
        with open(ml_file, 'r') as ml:
            xml = ml.read()

        # Hack to remove annoying namespace
        it = ET.iterparse(StringIO(xml))
        for _, el in it:
            if '}' in el.tag:
                el.tag = el.tag.split('}', 1)[1]  # strip all namespaces
        root = it.root

        dl_urls = []
        ml_files = root.find('files')
        for dl in ml_files:
            dl_urls.append(dl.find('resources').find('url').text)

        if len(dl_urls) > 0:
            return dl_urls
        else:
            return None

    # Get download urls from a csv file
    def process_csv(self, csv_file):
        print("Processing csv file: {0}".format(csv_file))

        dl_urls = []
        with open(csv_file, 'r') as csvf:
            try:
                csvr = csv.DictReader(csvf)
                for row in csvr:
                    dl_urls.append(row['URL'])
            except csv.Error as e:
                print(
                    "WARNING: Could not parse file %s, line %d: %s. Skipping."
                    % (csv_file, csvr.line_num, e))
                return None
            except KeyError as e:
                print(
                    "WARNING: Could not find URL column in file %s. Skipping."
                    % (csv_file))

        if len(dl_urls) > 0:
            return dl_urls
        else:
            return None

    # Download all the files in the list
    def download_files(self):
        for file_name in self.files:

            # make sure we haven't ctrl+c'd or some other abort trap
            if abort == True:
                raise SystemExit

            # download counter
            self.cnt += 1

            # set a timer
            start = time.time()

            # run download
            size, total_size = self.download_file_with_cookiejar(
                file_name, self.cnt, len(self.files))

            # calculte rate
            end = time.time()

            # stats:
            if size is None:
                self.skipped.append(file_name)
            # Check to see that the download didn't error and is the correct size
            elif size is not False and (total_size <
                                        (size + (size * .01)) and total_size >
                                        (size - (size * .01))):
                # Download was good!
                elapsed = end - start
                elapsed = 1.0 if elapsed < 1 else elapsed
                rate = (size / 1024**2) / elapsed

                print(
                    "Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec"
                    .format(size, elapsed, rate))

                # add up metrics
                self.total_bytes += size
                self.total_time += elapsed
                self.success.append({'file': file_name, 'size': size})

            else:
                print("There was a problem downloading {0}".format(file_name))
                self.failed.append(file_name)

    def print_summary(self):
        # Print summary:
        print("\n\nDownload Summary ")
        print(
            "--------------------------------------------------------------------------------"
        )
        print("  Successes: {0} files, {1} bytes ".format(
            len(self.success), self.total_bytes))
        for success_file in self.success:
            print("           - {0}  {1:.2f}MB".format(
                success_file['file'], (success_file['size'] / 1024.0**2)))
        if len(self.failed) > 0:
            print("  Failures: {0} files".format(len(self.failed)))
            for failed_file in self.failed:
                print("          - {0}".format(failed_file))
        if len(self.skipped) > 0:
            print("  Skipped: {0} files".format(len(self.skipped)))
            for skipped_file in self.skipped:
                print("          - {0}".format(skipped_file))
        if len(self.success) > 0:
            print("  Average Rate: {0:.2f}MB/sec".format(
                (self.total_bytes / 1024.0**2) / self.total_time))
        print(
            "--------------------------------------------------------------------------------"
        )

Esempio n. 41

Mostra file

class Session(requests.Session):
    """
    Session for making API requests and interacting with the filesystem
    """
    def __init__(self):
        super(Session, self).__init__()
        self.trust_env = False
        cookie_file = os.path.expanduser('~/.danabox/cookies.txt')
        cookie_dir = os.path.dirname(cookie_file)
        self.cookies = MozillaCookieJar(cookie_file)
        # Create the $HOME/.danabox dir if it doesn't exist
        if not os.path.isdir(cookie_dir):
            os.mkdir(cookie_dir, 0700)
        # Load existing cookies if the cookies.txt exists
        if os.path.isfile(cookie_file):
            self.cookies.load()
            self.cookies.clear_expired_cookies()

    def clear(self):
        """Clear cookies"""
        try:
            self.cookies.clear()
            self.cookies.save()
        except KeyError:
            pass

    def git_root(self):
        """
        Return the absolute path from the git repository root

        If no git repository exists, raise an EnvironmentError
        """
        try:
            git_root = subprocess.check_output(
                ['git', 'rev-parse', '--show-toplevel'],
                stderr=subprocess.PIPE).strip('\n')
        except subprocess.CalledProcessError:
            raise EnvironmentError('Current directory is not a git repository')
        return git_root

    def get_app(self):
        """
        Return the application name for the current directory

        The application is determined by parsing `git remote -v` output for the origin remote.

        Because Danabox only allows deployment of public Github repos we can create unique app
        names from a combination of the Github user's name and the repo name. Eg;
        '[email protected]:opdemand/example-ruby-sinatra.git' becomes 'opdemand-example--ruby--sinatra'

        If no application is found, raise an EnvironmentError.
        """
        git_root = self.git_root()
        remotes = subprocess.check_output(['git', 'remote', '-v'],
                                          cwd=git_root)
        if remotes is None:
            raise EnvironmentError('No git remotes found.')
        for remote in remotes.splitlines():
            if 'github.com' in remote:
                url = remote.split()[1]
                break
        if url is None:
            raise EnvironmentError('No Github remotes found.')
        pieces = url.split('/')
        owner = pieces[-2].split(':')[-1]
        repo = pieces[-1].replace('.git', '')
        app_raw = owner + '/' + repo
        app_name = app_raw.replace('-', '--').replace('/', '-')
        return app_name

    app = property(get_app)

    def request(self, *args, **kwargs):
        """
        Issue an HTTP request with proper cookie handling including
        `Django CSRF tokens <https://docs.djangoproject.com/en/dev/ref/contrib/csrf/>`
        """
        for cookie in self.cookies:
            if cookie.name == 'csrftoken':
                if 'headers' in kwargs:
                    kwargs['headers']['X-CSRFToken'] = cookie.value
                else:
                    kwargs['headers'] = {'X-CSRFToken': cookie.value}
                break
        response = super(Session, self).request(*args, **kwargs)
        self.cookies.save()
        return response

Esempio n. 42

Mostra file

File: json_rpc.py Progetto: Tydus/qqdown

class Json_RPC(object):
    def __init__(self):
        #self.cookie_jar=CookieJar()
        self.cookie_jar=MozillaCookieJar()
        self.opener=urllib2.build_opener(
                urllib2.HTTPCookieProcessor(self.cookie_jar),
                #urllib2.HTTPHandler(debuglevel=1),
                #urllib2.HTTPSHandler(debuglevel=1),
                )

    def load_cookie(self,filename):
        ''' Load Cookie from file '''
        self.cookie_jar.load(filename,ignore_discard=True)

    def save_cookie(self,filename):
        ''' Save Cookie to file '''
        self.cookie_jar.save(filename,ignore_discard=True)

    def json_rpc(self,url,method="GET",**kwargs):
        '''
        Performs a json rpc to url and return python-native result

        will extract dict or list from result

        Example:
        try{callback({'result':0,'data':[]});}catch(e){}
        will be transcode to
        {"result":0,"data":[]}

        See also: http_rpc

        '''
        ret=self.http_rpc(url,method,**kwargs)
        ret=sub(r'try{(.*)}catch\(.*\){.*};?',r'\1',ret)
        ret=(search(r'{.+}',ret) or search(r'\[.+\]',ret)).group()
        #ret=sub(r"'",r'"',ret)
        ret=loads(ret)
        return ret

    def http_rpc(self,url,method="GET",**kwargs):
        '''
        Perfoms a http rpc to url and return raw result

        url          base url to rpc
        method       'GET' or 'POST'
        query        query string passing by a dict
        data         post data passing by a dict
        file         post files passing by a list of 3-tuple: key, filename, data
                     ( this indicates multipart/form-data )
        
        '''
        kwe=Entity(kwargs)

        if method not in ['GET','POST']:
            raise RPCError("Method not in GET or POST")

        if kwe.query:
            url+="?"+urlencode(kwe.query)

        if method=='GET':
            request=Request(url)
        elif kwe.file:
            content_type,data=multipart_encode(kwe.data,kwe.file)
            request=Request(url,data)
            request.add_header('Content-Type', content_type)
        elif kwe.data:
            data=urlencode(kwe.data)
            request=Request(url,data)
        else:
            raise RPCError("POST with no data")

        request.add_header('User-Agent',
            "Mozilla/5.0 (Ubuntu; X11; Linux x86_64; rv:8.0) Gecko/20100101 Firefox/8.0"
            )
        request.add_header('Accept-Charset',"UTF-8")

        response=self.opener.open(request)
        ret=response.read()
        response.close()

        #print "\033[33m"+str(self.cookie_jar)+"\033[0m"

        # FIXME: An Ugly hack to Tencent server's charset indicator using BOM header
        if ret.startswith('\xef\xbb\xbf'):
            ret=ret[3:]

        return ret

Esempio n. 43

Mostra file

File: myhttplib.py Progetto: pkuazi/L8_metadata_crawl

    c = {}
    for v in args:
        if type(v) == type({}):
            c.update(v) 
    return c

user_agent = {'User-agent': "Mozilla/5.0 (Windows NT 6.3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36" }
cookie_file = "/dev/shm/urllib2_cookies.txt"

policy = DefaultCookiePolicy()

global _cookieJar

_cookieJar = MozillaCookieJar(cookie_file, policy)
if os.path.exists(cookie_file ):
    _cookieJar.load()

DEBUG_LEVEL = 2

_http = urllib2.HTTPHandler()
_http.set_http_debuglevel(DEBUG_LEVEL)

_https = urllib2.HTTPSHandler()
_https.set_http_debuglevel(DEBUG_LEVEL)

_cookies = urllib2.HTTPCookieProcessor( _cookieJar )

urllib2.install_opener(urllib2.build_opener(_http, _https, _cookies))

ajax_header = {"X-Requested-With": "XMLHttpRequest" }
json_header = {"Accept": "application/json, text/javascript, */*; q=0.01"}

Esempio n. 44

Mostra file

File: basis_retr.py Progetto: akrueger/basisSleep

class BasisRetr:
	"""The main entry points, once a BasisRetr object has been created, are: 
	1) GetDayData()-- download metrics, activity, sleep data for a single day from the basis website and save it
	2) GetActivityCsvForMonth()-- download activity summaries for an entire month
	3) GetSleepCsvForMonth()--download sleep summaries for an entire month."""
	LOGIN_URL = 'https://app.mybasis.com/login'
	METRICS_URL = 'https://app.mybasis.com/api/v1/metricsday/me?day={date}&padding=0&bodystates=true&heartrate=true&steps=true&calories=true&gsr=true&skin_temp=true&air_temp=true'
	ACTIVITIES_URL ='https://app.mybasis.com/api/v2/users/me/days/{date}/activities?expand=activities&type=run,walk,bike,sleep'
	SLEEP_URL = 'https://app.mybasis.com/api/v2/users/me/days/{date}/activities?expand=activities&type=sleep'
	SLEEP_EVENTS_URL = 'https://app.mybasis.com/api/v2/users/me/days/{date}/activities?type=sleep&event.type=toss_and_turn&expand=activities.stages,activities.events'

	def __init__(self, loadconfig = None):
		# create config info
		self.app_state = Config(defaults=DEFAULTS, fpath=STATE_FILEPATH)
		self.has_error = False
		if loadconfig:
			self.app_state.Load()
		else:
			# if config file doesn't exist, save the defaults loaded above
			self.app_state.Save() #saves 
	
		self.CFG= Config2()
		err_text = self.CFG.Parse(CFG_FILEPATH)
		if err_text:
			print 'Config file read error: '+err_text
		# url opener for website retrieves
		self.cj = MozillaCookieJar(self.CFG.cookie_filename)
		self.session_cookie = None
		if os.path.exists(self.CFG.cookie_filename):
			self.cj.load()
			self.CheckSessionCookie() # set session cookie if it exists and hasn't expired
		# need to use build_opener to submit cookies and post form data
		self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))

	def GetDayData(self, yr, mo, day, typ, save_csv, override_cache = False, act_metr= True):
		"""Main entry method for getting a day's worth of data, formatting, then saving it.  typ is the type of data: metrics, activities, or sleep.  Data is always saved in json format, but if save_csv is True, save to csv as well as json. override_cache ignores any already downloaded json.  act_metr, if True, saves sleep and activity state along with metrics."""
		date = self.YrMoDyToString(yr, mo, day)
		# Need yesterday's date to get sleep events for a given calendar day. This is because sleep events, as downloaded from the Basis Website, start from the prior evening, when you actually went to sleep.
		ydate = self.YrMoDyToString(*self.GetYesterday(yr, mo, day))
		
		self.Status("Checking Login")
		if not self.CheckLogin(): # ensure we're logged in
			return False
			
		self.Status("getting {} for {}".format(typ,date))
		# figure out which data to get
		data = None
		
		# automatically override cache if day is incomplete (doesn't have 24 hrs of data)
		if not self.DayMetricsJsonIsComplete(date):
			override_cache = True
	
		# if needed, download json data from website and save to file
		if typ == 'metrics':
			mjdata = self.RetrieveJsonOrCached(date, 'metrics', override_cache)
			if not mjdata: # there was an error
				return False

			### MOVE THIS ERROR CHECKING INTO THE ABOVE METHOD
			if type(mjdata) == str or mjdata == None: # simple error checking
				self.Status('OnGetDayData: Metrics json conversion failed.')
				return False
			# also load up actities
		if typ == 'activities' or act_metr:
			ajdata = self.RetrieveJsonOrCached(date, 'activities', override_cache)
			if type(ajdata) == str or ajdata == None: # simple error checking
				self.Status('OnGetDayData: Activities json conversion failed.')
				return False
		if typ == 'sleep' or act_metr:
			sjdata = self.RetrieveJsonOrCached(date, 'sleep', override_cache)
			if type(sjdata) == str or sjdata == None: # simple error checking
				self.Status('OnGetDayData: Sleep json conversion failed.')
				return False
			if act_metr: # add yesterday's sleep data
				sjdata2= self.RetrieveJsonOrCached(ydate, 'sleep')
		
		# Next, turn the list of python objects into a csv file.
		# If asked to (via act_metr), collect sleep and activity type, then add them to each timestamp.
		cdata = None
		if save_csv:
			if typ == 'activities' or act_metr:
				act_list = self.JsonActivitiesToList(ajdata)
				cdata = self.CreateCSVFromList(self.CFG.csv.activity_colnames, act_list)
			if typ == 'sleep' or act_metr:
				sleep_evts_list = self.JsonSleepEventsToList(sjdata)
				cdata = self.CreateCSVFromList(self.CFG.csv.sleep_evt_colnames, sleep_evts_list)
				if act_metr:
					# prepend yesterday's sleep events as they may start before midnight.
					sleep_evts_list[:0] = self.JsonSleepEventsToList(sjdata2)
			if typ == 'metrics':
				if u'error' in mjdata:
					err = mjdata[u'error']
					self.Status("HTTP response error ({}, # {}): {}".format(err[0],mjdata[u'code'], err[1]))
					return
				metrics_list = self.JsonMetricsToList(mjdata)
				if act_metr: # add activities to metrics               
					self.AddActivityTypeToMetrics(metrics_list, act_list, sleep_evts_list)
					header = self.CFG.csv.metrics_colnames + self.CFG.csv.activity_type_colnames
				else:
					header = self.CFG.csv.metrics_colnames
				cdata = self.CreateCSVFromList(header, metrics_list)
		
		# If we were able to make a csv file, save it.
		if cdata:
			fpath = self.PathForFile(date, typ, 'csv')
			fname = os.path.split(fpath)[1]
			self.SaveData(cdata, fpath)
			self.Status("Saved "+typ+" csv file "+fname)
		return True # success

	def PathForFile(self, dt, typ, fmt):
		cfname = self.CFG.day_fname_template.format(date=dt, typ=typ, fmt=fmt)
		folder = self.app_state.savedir if fmt =='csv' else self.GetJsonStorageDir()
		fpath = os.path.join(os.path.abspath(folder), cfname)
		return fpath

	##
	##	TODO: How deal with sync before you registered with Basis?
	##
	def Sync(self, do_csv, override_cache, act_metr=True, callback = None):
		"""Secondary entry point. Catch up to current day. Downloads any missing or incomplete days, going back self.app_state.sync days."""
		# download what we have for today.  It won't be complete, but you can at least get the data.
		today = datetime.date.today()
		yr, mo, dy = today.year, today.month, today.day
		file_count = 0 # tallly # of files actually changed
		if not self.CheckLogin(): # make sure we're logged in correctly before starting
			return
		for days in range(self.CFG.sync_days):
			# see if files already exists
			dt = self.YrMoDyToString(yr, mo, dy)
			self.Status('Sync: checking '+dt)
			fpath = self.PathForFile(dt, 'metrics', 'csv')
			# if file doesn't exist, then found = false, and/or break
			if not os.path.isfile(fpath) or not self.DayMetricsJsonIsComplete(dt): # download day.
				# if override_cache is True, then will always re-download all days.  Don't let that happen.
				if not self.GetDayData(yr, mo, dy, 'metrics', do_csv, override_cache = False, act_metr = act_metr):
					return # quit if problem
				file_count += 1
				if callable(callback): # callback (if available) to UI manager to prevent freeze
					callback(yr, mo, dy) # allow
			# loop change: yesterday.
			yr, mo, dy = self.GetYesterday(yr, mo, dy)
		# Done. Let user know.
		self.Status('Sync done; {} files updated'.format(file_count if file_count > 0 else 'no'))

	def CheckLogin(self):
		"""Check to see if Login is needed; if so, then log in. """
		elapsed_hr = (time.time() - self.app_state.login_timestamp)/3600
		if self.CheckSessionCookie() and self.app_state.session_token and elapsed_hr < self.CFG.login_timeout_hrs:
			success = True
		else:
			try:
				self.Login()
				success = True
			except Exception, v:
				self.Status('Login difficulty: '+`v[0]`)
				success= False
		if success:
			self.app_state.login_timestamp = time.time()
		return success

Esempio n. 45

Mostra file

File: submit.py Progetto: wurikiji/algospot

class AOJClient(object):
    def __init__(self, cookie_file_path='aoj-cookie.txt'):
        self.cookie_file_path = cookie_file_path
        self.cookiejar = MozillaCookieJar()
        if os.path.isfile(cookie_file_path):
            self.cookiejar.load(cookie_file_path)

        self.opener = urllib2.build_opener(
                urllib2.HTTPRedirectHandler(),
                urllib2.HTTPHandler(),
                urllib2.HTTPSHandler(),
                urllib2.HTTPCookieProcessor(self.cookiejar))

    def get_csrf_token(self, url):
        request = urllib2.Request(url=url)
        response = self.opener.open(request)
        data = response.read()
        return REGEXP_CSRF.findall(data)[0]

    def refresh_session(self):
        print 'Not Logged In!'
        context = {'csrfmiddlewaretoken': self.get_csrf_token(LOGIN_URL),
                   'username': raw_input('Username: '******'password': getpass.getpass('Password: '******'accounts/login/',
                                  data=urllib.urlencode(context))
        self.opener.open(request)
        self.cookiejar.save(self.cookie_file_path)

    def check_problem_exist(self, problem_name):
        try:
            request = urllib2.Request(url=PROB_PREFIX+'read/'+problem_name)
            response = self.opener.open(request)
        except urllib2.HTTPError as err:
            if err.code == 404: # Not Found
                raise AOJProblemNotExist
            else:
                raise

    def detect_language(self, source_file):
        if '.' in source_file:
            selected_language = source_file[source_file.rfind('.')+1:]
        else:
            selected_language = ''
        
        while selected_language not in LANGUAGES:
            selected_language = raw_input('Please select your langauge: (' + '/'.join(LANGUAGES) + ') ? ').strip().lower()

        return selected_language

    def submit(self, submission):
        self.check_problem_exist(submission.problem)
        context = {}
        context['language'] = self.detect_language(submission.source)
        context['csrfmiddlewaretoken'] = self.get_csrf_token(url=PROB_PREFIX+'submit/'+submission.problem)

        try:
            with open(submission.source) as f:
                context['source'] = f.read()
        except IOError:
            raise AOJFileNotExist()

        def try_submit(first=True):
            if not first:
                self.refresh_session()
            request = urllib2.Request(url=PROB_PREFIX+'submit/'+submission.problem,
                                  data=urllib.urlencode(context))
            response = self.opener.open(request)

            if not response.geturl().lower().startswith(LOGIN_URL):
                print 'Submission Complete!'
                return
            try_submit(first=False)
        try_submit()

    def get_submission_list(self, problem_name):
        self.check_problem_exist(problem_name)
        request = urllib2.Request(url=SITE_PREFIX+'judge/submission/recent/?problem='+problem_name)
        response = self.opener.open(request)

        try:
            import lxml.html
        except ImportError:
            print 'lxml library is needed for parsing HTML'
            return

        html = lxml.html.fromstring(unicode(response.read().decode('utf8')))
        context = {}
        fields = ('id', 'problem', 'user', 'language', 'length', 'state', 'stats', 'submitted_on')
        length = {'id': 9, 'problem': 15, 'user': 15, 'language': 5, 'length': 7, 'state': 15, 'stats': 7, 'submitted_on': 15}
        template = u'%(id)s %(problem)s %(user)s %(language)s %(length)s %(state)s %(stats)s %(submitted_on)s'

        def width(string):
            return sum(1+(unicodedata.east_asian_width(c) in 'WF') for c in string)

        for tr in html.cssselect('table.submission_list tr'):
            for field in fields:
                element = tr.find_class(field)
                if element:
                    context[field] = unicode(element[0].text_content().strip())
                else:
                    context[field] = u''
                context[field] = ' ' * (length[field] - width(context[field])) + context[field]
            print template % context