def __init__(self, mobile, password=None, status='0', cachefile='Fetion.cache', cookiesfile=''): '''登录状态: 在线:400 隐身:0 忙碌:600 离开:100 ''' if cachefile: self.cache = Cache(cachefile) if not cookiesfile: cookiesfile = '%s.cookies' % mobile cookiejar = MozillaCookieJar(filename=cookiesfile) if not os.path.isfile(cookiesfile): open(cookiesfile, 'w').write(MozillaCookieJar.header) cookiejar.load(filename=cookiesfile) cookie_processor = HTTPCookieProcessor(cookiejar) self.opener = build_opener(cookie_processor, HTTPHandler) self.mobile, self.password = mobile, password if not self.alive(): self._login() cookiejar.save() self.changestatus(status)
class RDWorker: """ Worker class to perform Real-Debrid related actions: - format login info so they can be used by Real-Debrid - login - unrestricting links - keeping cookies """ _endpoint = 'http://www.real-debrid.com/ajax/%s' def __init__(self, cookie_file): self._cookie_file = cookie_file self.cookies = MozillaCookieJar(self._cookie_file) def login(self, username, password_hash): """ Log into Real-Debrid. password_hash must be a MD5-hash of the password string. :param username: :param password_hash: :return: :raise: """ if path.isfile(self._cookie_file): self.cookies.load(self._cookie_file) for cookie in self.cookies: if cookie.name == 'auth' and not cookie.is_expired(): return # no need for a new cookie # request a new cookie if no valid cookie is found or if it's expired opener = build_opener(HTTPCookieProcessor(self.cookies)) try: response = opener.open(self._endpoint % 'login.php?%s' % urlencode({'user': username, 'pass': password_hash})) resp = load(response) opener.close() if resp['error'] == 0: self.cookies.save(self._cookie_file) else: raise LoginError(resp['message'].encode('utf-8'), resp['error']) except Exception as e: raise Exception('Login failed: %s' % str(e)) def unrestrict(self, link, password=''): """ Unrestrict a download URL. Returns tuple of the unrestricted URL and the filename. :param link: url to unrestrict :param password: password to use for the unrestriction :return: :raise: """ opener = build_opener(HTTPCookieProcessor(self.cookies)) response = opener.open(self._endpoint % 'unrestrict.php?%s' % urlencode({'link': link, 'password': password})) resp = load(response) opener.close() if resp['error'] == 0: info = resp['generated_links'][0] return info[2], info[0].replace('/', '_') else: raise UnrestrictionError(resp['message'].encode('utf-8'), resp['error'])
def __init__(self, mobile, password=None, status='0', cachefile='Fetion.cache', cookiesfile=''): '''登录状态: 在线:400 隐身:0 忙碌:600 离开:100 ''' if cachefile: self.cache = Cache(cachefile) if not cookiesfile: cookiesfile = '%s.cookies' % mobile # try: # with open(cookiesfile, 'rb') as f: # cookie_processor = load(f) # except: # cookie_processor = HTTPCookieProcessor(CookieJar()) cookiejar = MozillaCookieJar(filename=cookiesfile) try: f=open(cookiesfile) except IOError: f=open(cookiesfile,'w') f.write(MozillaCookieJar.header) finally: f.close() cookiejar.load(filename=cookiesfile) cookie_processor = HTTPCookieProcessor(cookiejar) self.opener = build_opener(cookie_processor, HTTPHandler) self.mobile, self.password = mobile, password if not self.alive(): if self._login(): cookiejar.save() #dump(cookie_processor, open(cookiesfile, 'wb')) self.changestatus(status)
class WebBrowser(object): '''mantiene en memoria las cookies, emulando un navegador *actualmente no ejecuta javascript''' def __init__(self, uAgent=None, headers=None): '''uAgent es el agente de usuario''' self.cookie_j = MozillaCookieJar() if uAgent is None: uAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36' self.opener = build_opener(HTTPCookieProcessor(self.cookie_j)) self.user_agent = uAgent self.opener.addheaders = [('User-Agent', self.user_agent)] # self.session = requests.Session() # self.session.headers.update({ 'User-Agent': uAgent }) # self.session.max_redirects = 20 self.timeout = 25 socket.setdefaulttimeout(self.timeout) def newtree(f): return lambda *a, **k: etree.parse(f(*a, **k), parser=etree.HTMLParser()) @newtree def fetch(self, url, data=None, headers=None, method='POST'): '''obtiene los datos de una pagina web, ingresada en url para enviar datos por post, pasar codificados por data''' if headers: self.opener.addheaders = headers if not (data == None or type(data) == str): data = urllib.urlencode(data) if method == 'POST': # self.last_seen = self.session.post(url, data=data) self.last_seen = self.opener.open(url, data) elif method == 'GET': #self.last_seen = self.session.get(url + '?' + data) if data is None: self.last_seen = self.opener.open(url) else: self.last_seen = self.opener.open(url + '?' + data) else: raise Exception return self.last_seen def geturl(self): return self.last_seen.geturl() def save_cookies(self, path): '''guarda los cookies en memoria al disco''' '''path es el directorio''' self.cookie_j.save(path, ignore_discard=True, ignore_expires=True) def load_cookies(self, path): '''carga cookies del disco a la memoria''' '''path es el directorio''' self.cookie_j.load(path, ignore_discard=True, ignore_expires=True) def print_cookies(self): for cookie in self.cookie_j: print cookie.name, cookie.value
def GetWithCookie( url, cookie_name, data = '', retry = 3): global PATH_TMP, ACGINDEX_UA try: cj = MozillaCookieJar( PATH_TMP + cookie_name ) try : cj.load( PATH_TMP + cookie_name ) except: pass # 还没有cookie只好拉倒咯 ckproc = urllib2.HTTPCookieProcessor( cj ) AmagamiSS = urllib2.build_opener( ckproc ) AmagamiSS.addheaders = [ ACGINDEX_UA ] if data != '': request = urllib2.Request( url = url, data = data ) res = AmagamiSS.open( request ) cj.save() # 只有在post时才保存新获得的cookie else: res = AmagamiSS.open( url ) return Haruka.GetContent( res ) except: # 这里有3次重新连接的机会,3次都超时就跳过 if retry > 0 : return Haruka.GetWithCookie( url, cookie_name, data , retry-1 ) else: return False
def LIVE(url, relogin=False): if not (settings['username'] and settings['password']): xbmcgui.Dialog().ok('Chyba', 'Nastavte prosím moja.markiza.sk konto', '', '') xbmcplugin.setResolvedUrl(int(sys.argv[1]), False, xbmcgui.ListItem()) raise RuntimeError cj = MozillaCookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) if not relogin: try: cj.load(cookiepath) except IOError: relogin = True if relogin: response = opener.open(loginurl).read() token = re.search(r'name=\"_token_\" value=\"(\S+?)\">', response).group(1) logindata = urllib.urlencode({ 'email': settings['username'], 'password': settings['password'], '_token_': token, '_do': 'content1-loginForm-form-submit' }) + '&login=Prihl%C3%A1si%C5%A5+sa' opener.open(loginurl, logindata) log('Saving cookies') cj.save(cookiepath) response = opener.open(url).read() link = re.search(r'<iframe src=\"(\S+?)\"', response).group( 1) #https://videoarchiv.markiza.sk/api/v1/user/live link = link.replace('&', '&') response = opener.open(link).read() if '<iframe src=\"' not in response: #handle expired cookies if relogin: xbmcgui.Dialog().ok('Chyba', 'Skontrolujte prihlasovacie údaje', '', '') raise RuntimeError # loop protection else: LIVE(url, relogin=True) return opener.addheaders = [('Referer', link)] link = re.search(r'<iframe src=\"(\S+?)\"', response).group(1) #https://media.cms.markiza.sk/embed/ response = opener.open(link).read() if '<title>Error</title>' in response: error = re.search('<h2 class="e-title">(.*?)</h2>', response).group( 1) #Video nie je dostupné vo vašej krajine xbmcgui.Dialog().ok('Chyba', error, '', '') raise RuntimeError link = re.search(r'\"hls\": \"(\S+?)\"', response).group( 1) #https://h1-s6.c.markiza.sk/hls/markiza-sd-master.m3u8 response = opener.open(link).read() cookies = '|Cookie=' for cookie in cj: cookies += cookie.name + '=' + cookie.value + ';' cookies = cookies[:-1] play_item = xbmcgui.ListItem(path=link + cookies) xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, listitem=play_item)
def main(*args): # Populate our options, -h/--help is already there for you. usage = "usage: %prog [options] URL" optp = optparse.OptionParser(usage=usage) optp.add_option("-u", "--username", help="the username to login as.") optp.add_option("-d", "--storedir", dest="store_dir", help="the directory to store the certificate/key and \ config file", metavar="DIR", default=path.join(homedir, ".shibboleth")) optp.add_option("-i", "--idp", help="unique ID of the IdP used to log in") optp.add_option('-v', '--verbose', dest='verbose', action='count', help="Increase verbosity (specify multiple times for more)") # Parse the arguments (defaults to parsing sys.argv). opts, args = optp.parse_args() # Here would be a good place to check what came in on the command line and # call optp.error("Useful message") to exit if all it not well. log_level = logging.WARNING # default if opts.verbose == 1: log_level = logging.INFO elif opts.verbose >= 2: log_level = logging.DEBUG # Set up basic configuration, out to stderr with a reasonable default format. logging.basicConfig(level=log_level) if not args: optp.print_help() return if not path.exists(opts.store_dir): os.mkdir(opts.store_dir) sp = args[0] idp = Idp(opts.idp) c = CredentialManager() if opts.username: c.username = opts.username # if the cookies file exists load it cookies_file = path.join(opts.store_dir, 'cookies.txt') cj = MozillaCookieJar(filename=cookies_file) if path.exists(cookies_file): cj.load() shibboleth = Shibboleth(idp, c, cj) shibboleth.openurl(sp) print("Successfully authenticated to %s" % sp) cj.save()
def main(*args): # Populate our options, -h/--help is already there for you. usage = "usage: %prog [options] URL" optp = optparse.OptionParser(usage=usage) optp.add_option("-d", "--storedir", dest="store_dir", help="the directory to store the certificate/key and \ config file", metavar="DIR", default=path.join(homedir, ".shibboleth")) optp.add_option('-v', '--verbose', dest='verbose', action='count', help="Increase verbosity (specify multiple times for more)") # Parse the arguments (defaults to parsing sys.argv). opts, args = optp.parse_args() # Here would be a good place to check what came in on the command line and # call optp.error("Useful message") to exit if all it not well. log_level = logging.WARNING # default if opts.verbose == 1: log_level = logging.INFO elif opts.verbose >= 2: log_level = logging.DEBUG # Set up basic configuration, out to stderr with a reasonable # default format. logging.basicConfig(level=log_level) if not path.exists(opts.store_dir): os.mkdir(opts.store_dir) if args: sp = args[0] # if the cookies file exists load it cookies_file = path.join(opts.store_dir, 'cookies.txt') cj = MozillaCookieJar(filename=cookies_file) if path.exists(cookies_file): cj.load() logout_urls = [] for cookie in cj: if cookie.name.startswith('_shibsession_') or \ cookie.name.startswith('_shibstate_'): logout_urls.append( "https://%s/Shibboleth.sso/Logout" % cookie.domain) logout_urls = list(set(logout_urls)) opener = urllib2.build_opener(HTTPCookieProcessor(cookiejar=cj)) for url in logout_urls: request = urllib2.Request(url) log.debug("GET: %s" % request.get_full_url()) response = opener.open(request) cj.save()
def save_cookies_Moz(url): """保存cookies到文件 —— MozillaCookieJar格式 """ # 设置保存cookie的文件,同级目录下的cookie.txt filename = 'cookies_Moz.txt' # 声明一个MozillaCookieJar对象实例来保存cookie,之后写入文件 cookie = MozillaCookieJar(filename) opener = build_opener(HTTPCookieProcessor(cookie)) # 创建一个请求,原理同urllib2的urlopen response = opener.open(url) # 保存cookie到文件 cookie.save(ignore_discard=True, ignore_expires=True) # 这里必须将参数置为True,否则写入文件失败
def Get( url, data = '', refer = 'http://www.pixiv.net/', retry = 3 ): global ABS_PATH cj = MozillaCookieJar( ABS_PATH + 'pixiv.cookie.txt' ) try : cj.load( ABS_PATH + 'pixiv.cookie.txt' ) except: pass # 还没有cookie只好拉倒咯 ckproc = urllib2.HTTPCookieProcessor( cj ) opener = urllib2.build_opener( ckproc ) opener.addheaders = [ ('Accept', '*/*'), ('Accept-Language', 'zh-CN,zh;q=0.8'), ('Accept-Charset', 'UTF-8,*;q=0.5'), ('Accept-Encoding', 'gzip,deflate'), ('User-Agent', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31'), ('Referer', refer) ] # 防止海外访问weibo变英文版 if 'weibo.com' in url: opener.addheaders = [('Cookie', 'lang=zh-cn; SUB=Af3TZPWScES9bnItTjr2Ahd5zd6Niw2rzxab0hB4mX3uLwL2MikEk1FZIrAi5RvgAfCWhPyBL4jbuHRggucLT4hUQowTTAZ0ta7TYSBaNttSmZr6c7UIFYgtxRirRyJ6Ww%3D%3D; UV5PAGE=usr512_114; UV5=usrmdins311164')] debug('Network: url - ' + url) try: # 发出请求 if data != '': debug('Network: post') debug(data) request = urllib2.Request( url = url, data = data ) res = opener.open( request, timeout = 15 ) cj.save() # 只有在post时才保存新获得的cookie else: debug('Network: get') res = opener.open( url, timeout = 15 ) debug('Network: Status Code - ' + str(res.getcode())) return GetContent( res ) except Exception, e: # 自动重试,每张图最多3次 if retry > 0: return Get( url, data, refer, retry-1 ) else: log(e, 'Error: unable to get %s' % url) return False
def get_new_cookie(new_username,new_password,cookie_jar_path): # Build URS4 Cookie request auth_cookie_url = asf_urs4['url'] + '?client_id=' + asf_urs4['client'] + '&redirect_uri=' + asf_urs4['redir'] + '&response_type=code&state=' try: #python2 user_pass = base64.b64encode (bytes(new_username+":"+new_password)) except TypeError: #python3 user_pass = base64.b64encode (bytes(new_username+":"+new_password, "utf-8")) user_pass = user_pass.decode("utf-8") # Authenticate against URS, grab all the cookies cookie_jar = MozillaCookieJar() opener = build_opener(HTTPCookieProcessor(cookie_jar), HTTPHandler(), HTTPSHandler()) request = Request(auth_cookie_url, headers={"Authorization": "Basic {0}".format(user_pass)}) # Watch out cookie rejection! try: response = opener.open(request) except HTTPError as e: if e.code == 401: print (" > Username and Password combo was not successful. Please try again.") return False else: # If an error happens here, the user most likely has not confirmed EULA. print ("\nIMPORTANT: There was an error obtaining a download cookie!") print ("Your user appears to lack permission to download data from the ASF Datapool.") print ("\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") exit(-1) except URLError as e: print ("\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. ") print ("Try cookie generation later.") exit(-1) # Did we get a cookie? if check_cookie_is_logged_in(cookie_jar): #COOKIE SUCCESS! print('Saving cookie jar file') cookie_jar.save(cookie_jar_path) return cookie_jar # if we aren't successful generating the cookie, nothing will work. Stop here! print ("WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again.") print ("Response was {0}.".format(response.getcode())) print ("\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") exit(-1)
def __init__(self, mobile, password=None, status='0', cachefile='Fetion.cache', cookiesfile=''): '''登录状态: 在线:400 隐身:0 忙碌:600 离开:100 ''' if cachefile: self.cache = Cache(cachefile) if not cookiesfile: cookiesfile = '%s.cookies' % mobile # try: # with open(cookiesfile, 'rb') as f: # cookie_processor = load(f) # except: # cookie_processor = HTTPCookieProcessor(CookieJar()) cookiejar = MozillaCookieJar(filename=cookiesfile) try: f = open(cookiesfile) except IOError: f = open(cookiesfile, 'w') f.write(MozillaCookieJar.header) finally: f.close() cookiejar.load(filename=cookiesfile) cookie_processor = HTTPCookieProcessor(cookiejar) self.opener = build_opener(cookie_processor, HTTPHandler) self.mobile, self.password = mobile, password if not self.alive(): if self._login(): cookiejar.save() #dump(cookie_processor, open(cookiesfile, 'wb')) self.changestatus(status)
class CookieWay: def __init__(self): self.cookiejar = MozillaCookieJar() def load(self, file="cookie.txt"): self.cookiejar.load(file, ignore_discard=True, ignore_expires=True) def save(self, file="cookie.txt"): self.cookiejar.save(file, ignore_discard=True, ignore_expires=True) def torequestscj(self, s): for item in self.cookiejar: cookiesobject = requests.cookies.create_cookie(domain=item.domain, name=item.name, value=item.value) s.cookies.set_cookie(cookiesobject) def toseleniumcj(self, driver): domains = [] for item in self.cookiejar: if item.domain not in domains: domains.append(item.domain) for i in range(len(domains)): if domains[i][0:1] == ".": domains[i] = domains[i][1:] domains = list(set(domains)) for item in domains: driver.get("https://" + item) for item2 in self.cookiejar: if item2.domain == item or item2.domain == "." + item: cookie_dict = { 'domain': item2.domain, 'name': item2.name, 'value': item2.value, 'secure': item2.secure } if item2.path_specified: cookie_dict['path'] = item2.path driver.add_cookie(cookie_dict) def sele2resq(self, driver, s): self.selcj_cj(driver) self.torequestscj(s) def resq2sele(self, s, driver): self.reqcj_cj(s) self.toseleniumcj(driver) def selcj_cj(self, driver): cookie = driver.get_cookies() for s_cookie in cookie: self.cookiejar.set_cookie( Cookie( version=0, name=s_cookie['name'], value=s_cookie['value'], port='80', port_specified=False, domain=s_cookie['domain'], domain_specified=True, domain_initial_dot=False, path=s_cookie['path'], path_specified=True, secure=s_cookie['secure'], expires="2069592763", # s_cookie['expiry'] discard=False, comment=None, comment_url=None, rest=None, rfc2109=False)) def reqcj_cj(self, s): for s_cookie in s.cookies: self.cookiejar.set_cookie( Cookie( version=0, name=s_cookie.name, value=s_cookie.value, port='80', port_specified=False, domain=s_cookie.domain, domain_specified=True, domain_initial_dot=False, path="/", path_specified=True, secure=True, expires="2069592763", # s_cookie['expiry'] discard=False, comment=None, comment_url=None, rest=None, rfc2109=False))
def get_url(url, config, additional_headers=None, additional_query_string=None, post_data=None, fail_silent=False, no_cache=False, return_json_errors=[], return_final_url=False, cookie_file=None): response_content = '' request_hash = sha512( (url + dumps(additional_headers) + dumps(additional_query_string) + dumps(post_data)).encode('utf-8')).hexdigest() final_url = url if xbmc_helper().get_bool_setting('debug_requests') is True: xbmc_helper().log_debug( 'get_url - url: {} headers {} query {} post {} no_cache {} silent {} request_hash {} return_json_errors {}, cookie_file', url, additional_headers, additional_query_string, post_data, no_cache, fail_silent, request_hash, return_json_errors, cookie_file) if no_cache is True: etags_data = None else: etags_data = get_etags_data(request_hash) try: headers = { 'Accept-Encoding': 'gzip, deflate', 'User-Agent': config['USER_AGENT'], 'Accept': '*/*', } if additional_headers is not None: headers.update(additional_headers) if config.get('http_headers', None) is not None: headers.update(config.get('http_headers', [])) if etags_data is not None: headers.update({'If-None-Match': etags_data['etag']}) if additional_query_string is not None: _url = compat._format('{}{}{}', url, '?' if url.find('?') == -1 else '&', urlencode(additional_query_string)) url = _url if isinstance(post_data, dict): post_data = urlencode(post_data) cookie_processor = None cookie_jar = None if cookie_file is not None: cookie_jar = MozillaCookieJar(cookie_file) try: cookie_jar.load() except LoadError: xbmc_helper().log_debug('Failed to load from cookiefile {} with error {} - new session?', cookie_file, LoadError.strerror) pass cookie_processor = HTTPCookieProcessor(cookie_jar) if xbmc_helper().get_bool_setting('use_https_proxy') is True and xbmc_helper().get_text_setting( 'https_proxy_host') != '' and xbmc_helper().get_int_setting('https_proxy_port') != 0: proxy_uri = compat._format('{}:{}', xbmc_helper().get_text_setting('https_proxy_host'), xbmc_helper().get_text_setting('https_proxy_port')) xbmc_helper().log_debug('Using proxy uri {}', proxy_uri) prxy_handler = ProxyHandler({ 'http': proxy_uri, 'https': proxy_uri, }) if cookie_processor is None: install_opener(build_opener(prxy_handler)) else: install_opener(build_opener(prxy_handler, cookie_processor)) elif cookie_processor is not None: install_opener(build_opener(cookie_processor)) if post_data is not None: request = Request(url, data=post_data.encode('utf-8'), headers=headers) else: request = Request(url, headers=headers) response = urlopen(request, timeout=40) if response.info().get('Content-Encoding') == 'gzip': response_content = compat._decode(GzipFile(fileobj=BytesIO(response.read())).read()) else: response_content = compat._decode(response.read()) if cookie_jar is not None: cookie_jar.save() final_url = response.geturl() _etag = response.info().get('etag', None) if no_cache is False and _etag is not None: set_etags_data(request_hash, _etag, response_content) except HTTPError as http_error: if http_error.code == 304 and etags_data.get('data', None) is not None: response_content = etags_data.get('data') else: try: if http_error.info().get('Content-Encoding') == 'gzip': error_body = compat._decode(GzipFile(fileobj=BytesIO(http_error.read())).read()) else: error_body = compat._decode(http_error.read()) xbmc_helper().log_debug('HTTP ERROR: {}', error_body) json_errors = loads(error_body) xbmc_helper().log_debug('JSON ERRORS: {}', json_errors) has_decoded_error = False if isinstance(json_errors, dict) and 'errors' not in json_errors.keys() and 'code' in json_errors.keys(): json_errors = {'errors': [json_errors]} elif isinstance(json_errors, list) and len(json_errors) == 1 and isinstance(json_errors[0], dict): json_errors = {'errors': json_errors} err_str = str(http_error.code) return_errors = [] if isinstance(json_errors, dict): for error in json_errors.get('errors', []): if 'msg' in error.keys(): err_str = compat._format('{}|{}', err_str, error.get('msg')) has_decoded_error = True if 'code' in error.keys() and error['code'] in return_json_errors: return_errors.append(error['code']) has_decoded_error = True xbmc_helper().log_debug('return_json_errors {}', return_errors) if len(return_errors) > 0: response_content = dumps({'json_errors': return_errors}) elif has_decoded_error is True: xbmc_helper().notification( 'Error', err_str, ) exit(0) except Exception: raise http_error except Exception as e: xbmc_helper().log_error('Failed to load url: {} headers {} post_data {} - Exception: {}', url, headers, post_data, e) if fail_silent is True: pass else: xbmc_helper().notification(compat._format(xbmc_helper().translation('ERROR'), 'URL Access'), compat._format(xbmc_helper().translation('MSG_NO_ACCESS_TO_URL'), str(url))) exit(0) if return_final_url: return final_url, response_content return response_content
class bulk_downloader: def __init__(self): # List of files to download self.files = [ "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20200110T101421_20200110T101446_019753_025598_C902-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191229T101421_20191229T101446_019578_025007_DB2A-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191217T101422_20191217T101447_019403_024A73_D2A9-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191205T101422_20191205T101447_019228_0244DD_9778-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191123T101423_20191123T101448_019053_023F55_95B6-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191111T101423_20191111T101448_018878_0239B4_3FCF-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191030T101423_20191030T101448_018703_02340F_3D8D-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191018T101423_20191018T101448_018528_022E97_0AEB-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191006T101423_20191006T101448_018353_022937_B959-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190912T101422_20190912T101447_018003_021E50_B3FB-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190831T101421_20190831T101446_017828_0218D8_1ADE-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190819T101421_20190819T101446_017653_021365_B751-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190807T101420_20190807T101445_017478_020DEF_A757-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20190801T101514_20190801T101539_028374_0334DB_E6C0-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20190801T101449_20190801T101514_028374_0334DB_6CA1-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190726T101419_20190726T101444_017303_0208A8_2D9C-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190714T101419_20190714T101444_017128_020394_A8B6-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190702T101418_20190702T101443_016953_01FE6B_BE7D-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190620T101417_20190620T101442_016778_01F93E_D609-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190608T101416_20190608T101441_016603_01F407_282F-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190527T101416_20190527T101441_016428_01EECF_79D2-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190515T101415_20190515T101440_016253_01E971_7A00-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190503T101415_20190503T101440_016078_01E3E6_D149-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190421T101414_20190421T101439_015903_01DE0C_E919-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190409T101414_20190409T101439_015728_01D843_E7B3-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190328T101413_20190328T101438_015553_01D27A_7404-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190316T101413_20190316T101438_015378_01CCBE_781F-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190304T101413_20190304T101438_015203_01C713_17EF-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190220T101413_20190220T101438_015028_01C151_EA49-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190208T101413_20190208T101438_014853_01BB8C_940D-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190127T101414_20190127T101439_014678_01B5D2_3B0A-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190115T101414_20190115T101439_014503_01B03A_4439-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190103T101414_20190103T101439_014328_01AA92_7D9B-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181222T101415_20181222T101440_014153_01A4CF_3F05-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181210T101415_20181210T101440_013978_019F03_1C29-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181128T101416_20181128T101441_013803_01995A_6DD3-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181116T101416_20181116T101441_013628_0193C1_FE12-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181104T101416_20181104T101441_013453_018E4D_0014-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181023T101420_20181023T101445_013278_0188CC_5952-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181023T101355_20181023T101420_013278_0188CC_0FA6-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181011T101417_20181011T101442_013103_01835D_D0A0-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180929T101416_20180929T101441_012928_017E0F_226F-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180917T101416_20180917T101441_012753_0178B3_B66A-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180905T101415_20180905T101440_012578_017358_3259-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180824T101415_20180824T101440_012403_016DE5_85C3-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180812T101414_20180812T101439_012228_01687D_BCA9-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180731T101414_20180731T101439_012053_01631A_ADBC-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180719T101413_20180719T101438_011878_015DD1_3E69-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180707T101412_20180707T101437_011703_015872_5055-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180625T101411_20180625T101436_011528_015300_5709-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180613T101411_20180613T101436_011353_014D8E_1799-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180601T101410_20180601T101435_011178_014821_B178-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180520T101409_20180520T101434_011003_014273_5667-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180508T101408_20180508T101433_010828_013CCB_18C3-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180426T101408_20180426T101433_010653_013720_457C-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180414T101407_20180414T101432_010478_01318E_FB0A-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180402T101407_20180402T101432_010303_012BEA_9E94-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180321T101406_20180321T101431_010128_012640_6D69-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180309T101406_20180309T101431_009953_01208C_4F7B-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180225T101406_20180225T101431_009778_011AAD_2181-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180213T101407_20180213T101432_009603_0114ED_D868-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180201T101407_20180201T101432_009428_010F21_C8FA-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180120T101407_20180120T101432_009253_010968_4DBE-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180108T101408_20180108T101433_009078_0103B1_EB1D-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171227T101408_20171227T101433_008903_00FDFF_A4F1-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171215T101409_20171215T101434_008728_00F863_906F-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171203T101409_20171203T101434_008553_00F2D6_B8D7-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171121T101409_20171121T101434_008378_00ED57_D6D0-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171028T101410_20171028T101435_008028_00E2F3_6DFC-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171016T101410_20171016T101435_007853_00DDE1_829D-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171004T101409_20171004T101434_007678_00D8F4_5C9C-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170922T101409_20170922T101434_007503_00D3F7_9FEC-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170910T101409_20170910T101434_007328_00CED7_2D8E-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170829T101408_20170829T101433_007153_00C9B8_96C9-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170817T101408_20170817T101433_006978_00C4A9_5D92-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170805T101407_20170805T101432_006803_00BF8B_4F73-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170724T101407_20170724T101432_006628_00BA88_2017-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170712T101406_20170712T101431_006453_00B58B_7674-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170630T101405_20170630T101430_006278_00B098_CAC7-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170618T101404_20170618T101429_006103_00AB89_7D52-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170606T101404_20170606T101429_005928_00A666_6411-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170525T101403_20170525T101428_005753_00A14F_A827-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170513T101402_20170513T101427_005578_009C52_4E38-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170501T101402_20170501T101427_005403_009788_B5E9-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170419T101401_20170419T101426_005228_009270_5637-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170407T101401_20170407T101426_005053_008D67_BB68-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170326T101400_20170326T101425_004878_008859_36E8-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170314T101400_20170314T101425_004703_008359_7A42-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170302T101400_20170302T101425_004528_007E2B_F8A2-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170206T101400_20170206T101425_004178_0073C4_69B1-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170113T101401_20170113T101426_003828_00695B_0B49-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20161220T101403_20161220T101428_003478_005F1B_E6DD-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20161126T101403_20161126T101428_003128_005520_5BBB-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20161102T101404_20161102T101429_002778_004B45_F931-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20161009T101404_20161009T101429_002428_00419A_2FD0-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20160927T101404_20160927T101429_002253_003CAB_BC6E-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160909T101423_20160909T101448_012974_01487C_40C0-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160909T101448_20160909T101513_012974_01487C_E55B-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160816T101434_20160816T101503_012624_013CE1_AA51-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160723T101444_20160723T101513_012274_013152_9A67-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160629T101426_20160629T101455_011924_0125E4_7F71-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160629T101455_20160629T101520_011924_0125E4_D66A-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160605T101440_20160605T101505_011574_011AE7_0C49-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160512T101439_20160512T101504_011224_010F91_FCE1-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160418T101435_20160418T101500_010874_01048F_89B1-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160325T101434_20160325T101459_010524_00FA2B_4EAD-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160301T101434_20160301T101459_010174_00F035_3B54-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160206T101440_20160206T101505_009824_00E617_C31E-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160206T101415_20160206T101440_009824_00E617_D79A-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160113T101434_20160113T101459_009474_00DBEE_5DBA-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20151220T101435_20151220T101500_009124_00D1EE_CFED-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20151102T101442_20151102T101507_008424_00BE79_E2E7-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20151009T101442_20151009T101507_008074_00B50C_12FD-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150915T101441_20150915T101506_007724_00ABB3_226C-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150822T101441_20150822T101506_007374_00A234_599D-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150729T101439_20150729T101504_007024_0098B2_E48E-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150705T101438_20150705T101503_006674_008EB2_3496-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20150518T101442_20150518T101507_005974_007B3F_EFEC-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20150518T101417_20150518T101442_005974_007B3F_DF42-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150424T101426_20150424T101451_005624_00734A_AD5A-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150331T101444_20150331T101509_005274_006AB8_213D-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150331T101419_20150331T101444_005274_006AB8_EBF3-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150307T101444_20150307T101509_004924_006269_7919-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150307T101419_20150307T101444_004924_006269_9089-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150211T101443_20150211T101508_004574_005A0A_8FEE-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150211T101418_20150211T101443_004574_005A0A_4D0C-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150118T101444_20150118T101509_004224_00522A_42D5-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150118T101419_20150118T101444_004224_00522A_26FE-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141225T101439_20141225T101504_003874_004A4B_D1FC-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141225T101414_20141225T101439_003874_004A4B_367E-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141201T101440_20141201T101505_003524_004254_556F-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141201T101415_20141201T101440_003524_004254_5C25-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141107T101441_20141107T101506_003174_003A78_745C-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141107T101416_20141107T101441_003174_003A78_5D83-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141014T101419_20141014T101444_002824_0032F1_B89E-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141014T101444_20141014T101509_002824_0032F1_B54D-PREDORB-10m-power-filt-rtc-gamma.zip" ] # Local stash of cookies so we don't always have to ask self.cookie_jar_path = os.path.join(os.path.expanduser('~'), ".bulk_download_cookiejar.txt") self.cookie_jar = None self.asf_urs4 = { 'url': 'https://urs.earthdata.nasa.gov/oauth/authorize', 'client': 'BO_n7nTIlMljdvU6kRRB3g', 'redir': 'https://auth.asf.alaska.edu/login' } # Make sure we can write it our current directory if os.access(os.getcwd(), os.W_OK) is False: print( "WARNING: Cannot write to current path! Check permissions for {0}" .format(os.getcwd())) exit(-1) # For SSL self.context = {} # Check if user handed in a Metalink or CSV: if len(sys.argv) > 0: download_files = [] input_files = [] for arg in sys.argv[1:]: if arg == '--insecure': try: ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE self.context['context'] = ctx except AttributeError: # Python 2.6 won't complain about SSL Validation pass elif arg.endswith('.metalink') or arg.endswith('.csv'): if os.path.isfile(arg): input_files.append(arg) if arg.endswith('.metalink'): new_files = self.process_metalink(arg) else: new_files = self.process_csv(arg) if new_files is not None: for file_url in (new_files): download_files.append(file_url) else: print( " > I cannot find the input file you specified: {0}" .format(arg)) else: print( " > Command line argument '{0}' makes no sense, ignoring." .format(arg)) if len(input_files) > 0: if len(download_files) > 0: print(" > Processing {0} downloads from {1} input files. ". format(len(download_files), len(input_files))) self.files = download_files else: print( " > I see you asked me to download files from {0} input files, but they had no downloads!" .format(len(input_files))) print(" > I'm super confused and exiting.") exit(-1) # Make sure cookie_jar is good to go! self.get_cookie() # summary self.total_bytes = 0 self.total_time = 0 self.cnt = 0 self.success = [] self.failed = [] self.skipped = [] # Get and validate a cookie def get_cookie(self): if os.path.isfile(self.cookie_jar_path): self.cookie_jar = MozillaCookieJar() self.cookie_jar.load(self.cookie_jar_path) # make sure cookie is still valid if self.check_cookie(): print(" > Re-using previous cookie jar.") return True else: print(" > Could not validate old cookie Jar") # We don't have a valid cookie, prompt user or creds print( "No existing URS cookie found, please enter Earthdata username & password:"******"(Credentials will not be stored, saved or logged anywhere)") # Keep trying 'till user gets the right U:P while self.check_cookie() is False: self.get_new_cookie() return True # Validate cookie before we begin def check_cookie(self): if self.cookie_jar is None: print(" > Cookiejar is bunk: {0}".format(self.cookie_jar)) return False # File we know is valid, used to validate cookie file_check = 'https://urs.earthdata.nasa.gov/profile' # Apply custom Redirect Hanlder opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) install_opener(opener) # Attempt a HEAD request request = Request(file_check) request.get_method = lambda: 'HEAD' try: print(" > attempting to download {0}".format(file_check)) response = urlopen(request, timeout=30) resp_code = response.getcode() # Make sure we're logged in if not self.check_cookie_is_logged_in(self.cookie_jar): return False # Save cookiejar self.cookie_jar.save(self.cookie_jar_path) except HTTPError: # If we ge this error, again, it likely means the user has not agreed to current EULA print("\nIMPORTANT: ") print( "Your user appears to lack permissions to download data from the ASF Datapool." ) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) # This return codes indicate the USER has not been approved to download the data if resp_code in (300, 301, 302, 303): try: redir_url = response.info().getheader('Location') except AttributeError: redir_url = response.getheader('Location') #Funky Test env: if ("vertex-retired.daac.asf.alaska.edu" in redir_url and "test" in self.asf_urs4['redir']): print("Cough, cough. It's dusty in this test env!") return True print("Redirect ({0}) occured, invalid cookie value!".format( resp_code)) return False # These are successes! if resp_code in (200, 307): return True return False def get_new_cookie(self): # Start by prompting user to input their credentials # Another Python2/3 workaround try: new_username = raw_input("Username: "******"Username: "******"Password (will not be displayed): ") # Build URS4 Cookie request auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4[ 'client'] + '&redirect_uri=' + self.asf_urs4[ 'redir'] + '&response_type=code&state=' try: #python2 user_pass = base64.b64encode( bytes(new_username + ":" + new_password)) except TypeError: #python3 user_pass = base64.b64encode( bytes(new_username + ":" + new_password, "utf-8")) user_pass = user_pass.decode("utf-8") # Authenticate against URS, grab all the cookies self.cookie_jar = MozillaCookieJar() opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) request = Request( auth_cookie_url, headers={"Authorization": "Basic {0}".format(user_pass)}) # Watch out cookie rejection! try: response = opener.open(request) except HTTPError as e: if "WWW-Authenticate" in e.headers and "Please enter your Earthdata Login credentials" in e.headers[ "WWW-Authenticate"]: print( " > Username and Password combo was not successful. Please try again." ) return False else: # If an error happens here, the user most likely has not confirmed EULA. print( "\nIMPORTANT: There was an error obtaining a download cookie!" ) print( "Your user appears to lack permission to download data from the ASF Datapool." ) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) except URLError as e: print( "\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. " ) print("Try cookie generation later.") exit(-1) # Did we get a cookie? if self.check_cookie_is_logged_in(self.cookie_jar): #COOKIE SUCCESS! self.cookie_jar.save(self.cookie_jar_path) return True # if we aren't successful generating the cookie, nothing will work. Stop here! print( "WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again." ) print("Response was {0}.".format(response.getcode())) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) # make sure we're logged into URS def check_cookie_is_logged_in(self, cj): for cookie in cj: if cookie.name == 'urs_user_already_logged': # Only get this cookie if we logged in successfully! return True return False # Download the file def download_file_with_cookiejar(self, url, file_count, total, recursion=False): # see if we've already download this file and if it is that it is the correct size download_file = os.path.basename(url).split('?')[0] if os.path.isfile(download_file): try: request = Request(url) request.get_method = lambda: 'HEAD' response = urlopen(request, timeout=30) remote_size = self.get_total_size(response) # Check that we were able to derive a size. if remote_size: local_size = os.path.getsize(download_file) if remote_size < (local_size + (local_size * .01)) and remote_size > ( local_size - (local_size * .01)): print( " > Download file {0} exists! \n > Skipping download of {1}. " .format(download_file, url)) return None, None #partial file size wasn't full file size, lets blow away the chunk and start again print( " > Found {0} but it wasn't fully downloaded. Removing file and downloading again." .format(download_file)) os.remove(download_file) except ssl.CertificateError as e: print(" > ERROR: {0}".format(e)) print( " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag" ) return False, None except HTTPError as e: if e.code == 401: print( " > IMPORTANT: Your user may not have permission to download this type of data!" ) else: print(" > Unknown Error, Could not get file HEAD: {0}". format(e)) except URLError as e: print("URL Error (from HEAD): {0}, {1}".format(e.reason, url)) if "ssl.c" in "{0}".format(e.reason): print( "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error." ) return False, None # attempt https connection try: request = Request(url) response = urlopen(request, timeout=30) # Watch for redirect if response.geturl() != url: # See if we were redirect BACK to URS for re-auth. if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl( ): if recursion: print( " > Entering seemingly endless auth loop. Aborting. " ) return False, None # make this easier. If there is no app_type=401, add it new_auth_url = response.geturl() if "app_type" not in new_auth_url: new_auth_url += "&app_type=401" print( " > While attempting to download {0}....".format(url)) print(" > Need to obtain new cookie from {0}".format( new_auth_url)) old_cookies = [cookie.name for cookie in self.cookie_jar] opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) request = Request(new_auth_url) try: response = opener.open(request) for cookie in self.cookie_jar: if cookie.name not in old_cookies: print(" > Saved new cookie: {0}".format( cookie.name)) # A little hack to save session cookies if cookie.discard: cookie.expires = int( time.time()) + 60 * 60 * 24 * 30 print( " > Saving session Cookie that should have been discarded! " ) self.cookie_jar.save(self.cookie_jar_path, ignore_discard=True, ignore_expires=True) except HTTPError as e: print("HTTP Error: {0}, {1}".format(e.code, url)) return False, None # Okay, now we have more cookies! Lets try again, recursively! print(" > Attempting download again with new cookies!") return self.download_file_with_cookiejar(url, file_count, total, recursion=True) print( " > 'Temporary' Redirect download @ Remote archive:\n > {0}" .format(response.geturl())) # seems to be working print("({0}/{1}) Downloading {2}".format(file_count, total, url)) # Open our local file for writing and build status bar tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.') self.chunk_read(response, tf, report_hook=self.chunk_report) # Reset download status sys.stdout.write('\n') tempfile_name = tf.name tf.close() #handle errors except HTTPError as e: print("HTTP Error: {0}, {1}".format(e.code, url)) if e.code == 401: print( " > IMPORTANT: Your user does not have permission to download this type of data!" ) if e.code == 403: print(" > Got a 403 Error trying to download this file. ") print( " > You MAY need to log in this app and agree to a EULA. ") return False, None except URLError as e: print("URL Error (from GET): {0}, {1}, {2}".format( e, e.reason, url)) if "ssl.c" in "{0}".format(e.reason): print( "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error." ) return False, None except socket.timeout as e: print(" > timeout requesting: {0}; {1}".format(url, e)) return False, None except ssl.CertificateError as e: print(" > ERROR: {0}".format(e)) print( " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag" ) return False, None # Return the file size shutil.copy(tempfile_name, download_file) os.remove(tempfile_name) file_size = self.get_total_size(response) actual_size = os.path.getsize(download_file) if file_size is None: # We were unable to calculate file size. file_size = actual_size return actual_size, file_size def get_redirect_url_from_error(self, error): find_redirect = re.compile(r"id=\"redir_link\"\s+href=\"(\S+)\"") print("error file was: {}".format(error)) redirect_url = find_redirect.search(error) if redirect_url: print("Found: {0}".format(redirect_url.group(0))) return (redirect_url.group(0)) return None # chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook def chunk_report(self, bytes_so_far, file_size): if file_size is not None: percent = float(bytes_so_far) / file_size percent = round(percent * 100, 2) sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" % (bytes_so_far, file_size, percent)) else: # We couldn't figure out the size. sys.stdout.write(" > Downloaded %d of unknown Size\r" % (bytes_so_far)) # chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook def chunk_read(self, response, local_file, chunk_size=8192, report_hook=None): file_size = self.get_total_size(response) bytes_so_far = 0 while 1: try: chunk = response.read(chunk_size) except: sys.stdout.write("\n > There was an error reading data. \n") break try: local_file.write(chunk) except TypeError: local_file.write(chunk.decode(local_file.encoding)) bytes_so_far += len(chunk) if not chunk: break if report_hook: report_hook(bytes_so_far, file_size) return bytes_so_far def get_total_size(self, response): try: file_size = response.info().getheader('Content-Length').strip() except AttributeError: try: file_size = response.getheader('Content-Length').strip() except AttributeError: print("> Problem getting size") return None return int(file_size) # Get download urls from a metalink file def process_metalink(self, ml_file): print("Processing metalink file: {0}".format(ml_file)) with open(ml_file, 'r') as ml: xml = ml.read() # Hack to remove annoying namespace it = ET.iterparse(StringIO(xml)) for _, el in it: if '}' in el.tag: el.tag = el.tag.split('}', 1)[1] # strip all namespaces root = it.root dl_urls = [] ml_files = root.find('files') for dl in ml_files: dl_urls.append(dl.find('resources').find('url').text) if len(dl_urls) > 0: return dl_urls else: return None # Get download urls from a csv file def process_csv(self, csv_file): print("Processing csv file: {0}".format(csv_file)) dl_urls = [] with open(csv_file, 'r') as csvf: try: csvr = csv.DictReader(csvf) for row in csvr: dl_urls.append(row['URL']) except csv.Error as e: print( "WARNING: Could not parse file %s, line %d: %s. Skipping." % (csv_file, csvr.line_num, e)) return None except KeyError as e: print( "WARNING: Could not find URL column in file %s. Skipping." % (csv_file)) if len(dl_urls) > 0: return dl_urls else: return None # Download all the files in the list def download_files(self): for file_name in self.files: # make sure we haven't ctrl+c'd or some other abort trap if abort == True: raise SystemExit # download counter self.cnt += 1 # set a timer start = time.time() # run download size, total_size = self.download_file_with_cookiejar( file_name, self.cnt, len(self.files)) # calculte rate end = time.time() # stats: if size is None: self.skipped.append(file_name) # Check to see that the download didn't error and is the correct size elif size is not False and (total_size < (size + (size * .01)) and total_size > (size - (size * .01))): # Download was good! elapsed = end - start elapsed = 1.0 if elapsed < 1 else elapsed rate = (size / 1024**2) / elapsed print( "Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec" .format(size, elapsed, rate)) # add up metrics self.total_bytes += size self.total_time += elapsed self.success.append({'file': file_name, 'size': size}) else: print("There was a problem downloading {0}".format(file_name)) self.failed.append(file_name) def print_summary(self): # Print summary: print("\n\nDownload Summary ") print( "--------------------------------------------------------------------------------" ) print(" Successes: {0} files, {1} bytes ".format( len(self.success), self.total_bytes)) for success_file in self.success: print(" - {0} {1:.2f}MB".format( success_file['file'], (success_file['size'] / 1024.0**2))) if len(self.failed) > 0: print(" Failures: {0} files".format(len(self.failed))) for failed_file in self.failed: print(" - {0}".format(failed_file)) if len(self.skipped) > 0: print(" Skipped: {0} files".format(len(self.skipped))) for skipped_file in self.skipped: print(" - {0}".format(skipped_file)) if len(self.success) > 0: print(" Average Rate: {0:.2f}MB/sec".format( (self.total_bytes / 1024.0**2) / self.total_time)) print( "--------------------------------------------------------------------------------" )
class Bilibili(): name = u'哔哩哔哩 (Bilibili)' api_url = 'http://interface.bilibili.com/playurl?' bangumi_api_url = 'http://bangumi.bilibili.com/player/web_api/playurl?' SEC1 = '94aba54af9065f71de72f5508f1cd42e' SEC2 = '9b288147e5474dd2aa67085f716c560d' supported_stream_profile = [u'流畅', u'高清', u'超清'] stream_types = [{ 'id': 'hdflv' }, { 'id': 'flv' }, { 'id': 'hdmp4' }, { 'id': 'mp4' }, { 'id': 'live' }, { 'id': 'vc' }] fmt2qlt = dict(hdflv=4, flv=3, hdmp4=2, mp4=1) def __init__(self, appkey=APPKEY, appsecret=APPSECRET, width=720, height=480): self.defaultHeader = {'Referer': 'http://www.bilibili.com'} #self.defaultHeader = {} self.appkey = appkey self.appsecret = appsecret self.WIDTH = width self.HEIGHT = height self.is_login = False cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie' self.cj = MozillaCookieJar(cookie_path) if os.path.isfile(cookie_path): self.cj.load() key = None for ck in self.cj: if ck.name == 'DedeUserID': key = ck.value break if key is not None: self.is_login = True self.mid = str(key) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener) try: os.remove(self._get_tmp_dir() + '/tmp.ass') except: pass def _get_tmp_dir(self): try: return tempfile.gettempdir() except: return '' def get_captcha(self, path=None): key = None for ck in self.cj: if ck.name == 'sid': key = ck.value break if key is None: get_html( LOGIN_CAPTCHA_URL.format(random()), headers={'Referer': 'https://passport.bilibili.com/login'}) result = get_html( LOGIN_CAPTCHA_URL.format(random()), decoded=False, headers={'Referer': 'https://passport.bilibili.com/login'}) if path is None: path = tempfile.gettempdir() + '/captcha.jpg' with open(path, 'wb') as f: f.write(result) return path def get_encryped_pwd(self, pwd): import rsa result = loads( get_html( LOGIN_HASH_URL.format(random()), headers={'Referer': 'https://passport.bilibili.com/login'})) pwd = result['hash'] + pwd key = result['key'] pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key) pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key) pwd = base64.b64encode(pwd) pwd = urllib.quote(pwd) return pwd def api_sign(self, params): params['appkey'] = self.appkey data = '' keys = params.keys() # must sorted. urllib.urlencode(params) doesn't work keys.sort() for key in keys: data += '{}={}&'.format(key, urllib.quote(str(params[key]))) data = data[:-1] # remove last '&' if self.appsecret is None: return data m = hashlib.md5() m.update(data + self.appsecret) return data + '&sign=' + m.hexdigest() def get_category_from_web_page(self): category_dict = {'0': {'title': u'全部', 'url': HOME_URL}} node = category_dict['0'] url = node['url'] result = BeautifulSoup(get_html(url), "html.parser").findAll('li', {'class': 'm-i'}) for item in result: if len(item['class']) != 1: continue tid = item['data-tid'] title = item.em.contents[0] url = 'http:' + item.a['href'] category_dict[tid] = {'title': title, 'url': url} node['subs'].append(tid) #Fix video and movie if '11' not in category_dict['0']['subs']: category_dict['0']['subs'].append('11') if '23' not in category_dict['0']['subs']: category_dict['0']['subs'].append('23') category_dict['11'] = { 'title': u'电视剧', 'url': 'http://bangumi.bilibili.com/tv/' } category_dict['23'] = { 'title': u'电影', 'url': 'http://bangumi.bilibili.com/movie/' } for sub in category_dict['0']['subs']: node = category_dict[sub] url = node['url'] result = BeautifulSoup(get_html(url), "html.parser").select('ul.n_num li') for item in result[1:]: if not item.has_attr('tid'): continue if not hasattr(item, 'a'): continue if item.has_attr('class'): continue tid = item['tid'] title = item.a.contents[0] if item.a['href'][:2] == '//': url = 'http:' + item.a['href'] else: url = HOME_URL + item.a['href'] category_dict[tid] = {'title': title, 'url': url} node['subs'].append(tid) return category_dict def get_category(self, tid='0'): items = [{tid: {'title': '全部', 'url': CATEGORY[tid]['url']}}] for sub in CATEGORY[tid]['subs']: items.append({sub: CATEGORY[sub]}) return items def get_category_name(self, tid): return CATEGORY[str(tid)]['title'] def get_order(self): return ORDER def get_category_by_tag(self, tag=0, tid=0, page=1, pagesize=20): if tag == 0: url = LIST_BY_ALL.format(tid, pagesize, page) else: url = LIST_BY_TAG.format(tag, tid, pagesize, page) results = loads(get_html(url)) return results def get_category_list(self, tid=0, order='default', days=30, page=1, pagesize=20): params = { 'tid': tid, 'order': order, 'days': days, 'page': page, 'pagesize': pagesize } url = LIST_URL.format(self.api_sign(params)) result = loads(get_html(url, headers=self.defaultHeader)) results = [] for i in range(pagesize): if result['list'].has_key(str(i)): results.append(result['list'][str(i)]) else: continue return results, result['pages'] def get_my_info(self): if self.is_login == False: return [] result = loads(get_html(MY_INFO_URL)) return result['data'] def get_bangumi_chase(self, page=1, pagesize=20): if self.is_login == False: return [] url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize) result = loads(get_html(url, headers=self.defaultHeader)) return result['data']['result'], result['data']['pages'] def get_bangumi_detail(self, season_id): url = BANGUMI_SEASON_URL.format(season_id) result = get_html(url, headers=self.defaultHeader) if result[0] != '{': start = result.find('(') + 1 end = result.find(');') result = result[start:end] result = loads(result) return result['result'] def get_history(self, page=1, pagesize=20): if self.is_login == False: return [] url = HISTORY_URL.format(page, pagesize) result = loads(get_html(url, headers=self.defaultHeader)) if len(result['data']) >= int(pagesize): total_page = int(page) + 1 else: total_page = int(page) return result['data'], total_page def get_dynamic(self, page=1, pagesize=20): if self.is_login == False: return [] url = DYNAMIC_URL.format(pagesize, page) result = loads(get_html(url, headers=self.defaultHeader)) total_page = int( (result['data']['page']['count'] + pagesize - 1) / pagesize) return result['data']['feeds'], total_page def get_attention(self, page=1, pagesize=20): if self.is_login == False: return [] url = ATTENTION_URL.format(self.mid, page, pagesize) result = loads(get_html(url)) return result['data']['list'] def get_attention_video(self, mid, tid=0, page=1, pagesize=20): if self.is_login == False: return [] url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid) result = loads(get_html(url, headers=self.defaultHeader)) return result['data'], result['data']['pages'] def get_attention_channel(self, mid): if self.is_login == False: return [] url = ATTENTION_CHANNEL_URL.format(mid) result = loads(get_html(url, headers=self.defaultHeader)) return result['data']['list'] def get_fav_box(self): if self.is_login == False: return [] url = FAV_BOX_URL.format(self.mid) result = loads(get_html(url, headers=self.defaultHeader)) return result['data']['list'] def get_fav(self, fav_box, page=1, pagesize=20): if self.is_login == False: return [] url = FAV_URL.format(self.mid, page, pagesize, fav_box) result = loads(get_html(url, headers=self.defaultHeader)) return result['data']['vlist'], result['data']['pages'] def login(self, userid, pwd, captcha): #utils.get_html('http://www.bilibili.com') if self.is_login == True: return True, '' pwd = self.get_encryped_pwd(pwd) data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format( captcha, userid, pwd) result = get_html( LOGIN_URL, data, { 'Origin': 'https://passport.bilibili.com', 'Referer': 'https://passport.bilibili.com/login' }) key = None for ck in self.cj: if ck.name == 'DedeUserID': key = ck.value break if key is None: return False, LOGIN_ERROR_MAP[loads(result)['code']] self.cj.save() self.is_login = True self.mid = str(key) return True, '' def logout(self): self.cj.clear() self.cj.save() self.is_login = False def get_av_list_detail(self, aid, page=1, fav=0, pagesize=20): params = {'id': aid, 'page': page} if fav != 0: params['fav'] = fav url = VIEW_URL.format(self.api_sign(params)) result = loads(get_html(url, headers=self.defaultHeader)) results = [result] if (int(page) < result['pages']) and (pagesize > 1): results += self.get_av_list_detail(aid, int(page) + 1, fav, pagesize=pagesize - 1)[0] return results, result['pages'] def get_av_list(self, aid): url = AV_URL.format(aid) try: page = get_html(url) result = loads(page) except: result = {} return result # 调用niconvert生成弹幕的ass文件 def parse_subtitle(self, cid): page_full_url = COMMENT_URL.format(cid) website = create_website(page_full_url) if website is None: return '' else: text = website.ass_subtitles_text(font_name=u'黑体', font_size=24, resolution='%d:%d' % (self.WIDTH, self.HEIGHT), line_count=12, bottom_margin=0, tune_seconds=0) f = open(self._get_tmp_dir() + '/tmp.ass', 'w') f.write(text.encode('utf8')) f.close() return 'tmp.ass' def get_video_urls(self, cid): m = hashlib.md5() m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER)) url = INTERFACE_URL.format(str(cid), m.hexdigest()) doc = parseString(get_html(url)) urls = [] for durl in doc.getElementsByTagName('durl'): u = durl.getElementsByTagName('url')[0].firstChild.nodeValue if re.match(r'.*\.qqvideo\.tc\.qq\.com', url): re.sub(r'.*\.qqvideo\.tc', 'http://vsrc.store', u) urls.append(u) #urls.append(u + '|Referer={}'.format(urllib.quote('https://www.bilibili.com/'))) return urls def add_history(self, aid, cid): url = ADD_HISTORY_URL.format(str(cid), str(aid)) get_html(url) def api_req(self, cid, quality, bangumi, bangumi_movie=False, **kwargs): ts = str(int(time.time())) if not bangumi: params_str = 'cid={}&player=1&quality={}&ts={}'.format( cid, quality, ts) chksum = hashlib.md5(bytes(params_str + self.SEC1)).hexdigest() api_url = self.api_url + params_str + '&sign=' + chksum else: mod = 'movie' if bangumi_movie else 'bangumi' params_str = 'cid={}&module={}&player=1&quality={}&ts={}'.format( cid, mod, quality, ts) chksum = hashlib.md5(bytes(params_str + self.SEC2)).hexdigest() api_url = self.bangumi_api_url + params_str + '&sign=' + chksum return get_html(api_url) def download_by_vid(self, cid, bangumi, **kwargs): stream_id = kwargs.get('stream_id') if stream_id and stream_id in self.fmt2qlt: quality = stream_id else: quality = 'hdflv' if bangumi else 'flv' level = kwargs.get('level', 0) xml = self.api_req(cid, level, bangumi, **kwargs) doc = parseString(xml) urls = [] for durl in doc.getElementsByTagName('durl'): u = durl.getElementsByTagName('url')[0].firstChild.nodeValue #urls.append(u) urls.append( urllib.quote_plus(u + '|Referer=https://www.bilibili.com')) return urls def entry(self, **kwargs): # tencent player tc_flashvars = re.search(r'"bili-cid=\d+&bili-aid=\d+&vid=([^"]+)"', self.page) if tc_flashvars: tc_flashvars = tc_flashvars.group(1) if tc_flashvars is not None: self.out = True return qq_download_by_vid(tc_flashvars, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) cid = re.search(r'cid=(\d+)', self.page).group(1) if cid is not None: return self.download_by_vid(cid, False, **kwargs) else: # flashvars? flashvars = re.search(r'flashvars="([^"]+)"', self.page).group(1) if flashvars is None: raise Exception('Unsupported page {}'.format(self.url)) param = flashvars.split('&')[0] t, cid = param.split('=') t = t.strip() cid = cid.strip() if t == 'vid': sina_download_by_vid(cid, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) elif t == 'ykid': youku_download_by_vid(cid, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) elif t == 'uid': tudou_download_by_id(cid, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) else: raise NotImplementedError( 'Unknown flashvars {}'.format(flashvars)) return def movie_entry(self, **kwargs): patt = r"var\s*aid\s*=\s*'(\d+)'" aid = re.search(patt, self.page).group(1) page_list = loads( get_html( 'http://www.bilibili.com/widget/getPageList?aid={}'.format( aid))) # better ideas for bangumi_movie titles? self.title = page_list[0]['pagename'] return self.download_by_vid(page_list[0]['cid'], True, bangumi_movie=True, **kwargs) def get_video_from_url(self, url, **kwargs): self.url = url_locations(url) frag = urlparse(self.url).fragment # http://www.bilibili.com/video/av3141144/index_2.html#page=3 if frag: hit = re.search(r'page=(\d+)', frag) if hit is not None: page = hit.group(1) av_id = re.search(r'av(\d+)', self.url).group(1) self.url = 'http://www.bilibili.com/video/av{}/index_{}.html'.format( av_id, page) self.page = get_html(self.url) if 'bangumi.bilibili.com/movie' in self.url: return self.movie_entry(**kwargs) elif 'bangumi.bilibili.com' in self.url: return self.bangumi_entry(**kwargs) elif 'live.bilibili.com' in self.url: return self.live_entry(**kwargs) elif 'vc.bilibili.com' in self.url: return self.vc_entry(**kwargs) else: return self.entry(**kwargs) def bangumi_entry(self, **kwargs): pass def live_entry(self, **kwargs): pass def vc_entry(self, **kwargs): pass
class Session(requests.Session): """ Session for making API requests and interacting with the filesystem """ def __init__(self): super(Session, self).__init__() self.trust_env = False cookie_file = os.path.expanduser('~/.danabox/cookies.txt') cookie_dir = os.path.dirname(cookie_file) self.cookies = MozillaCookieJar(cookie_file) # Create the $HOME/.danabox dir if it doesn't exist if not os.path.isdir(cookie_dir): os.mkdir(cookie_dir, 0700) # Load existing cookies if the cookies.txt exists if os.path.isfile(cookie_file): self.cookies.load() self.cookies.clear_expired_cookies() def clear(self): """Clear cookies""" try: self.cookies.clear() self.cookies.save() except KeyError: pass def git_root(self): """ Return the absolute path from the git repository root If no git repository exists, raise an EnvironmentError """ try: git_root = subprocess.check_output( ['git', 'rev-parse', '--show-toplevel'], stderr=subprocess.PIPE).strip('\n') except subprocess.CalledProcessError: raise EnvironmentError('Current directory is not a git repository') return git_root def get_app(self): """ Return the application name for the current directory The application is determined by parsing `git remote -v` output for the origin remote. Because Danabox only allows deployment of public Github repos we can create unique app names from a combination of the Github user's name and the repo name. Eg; '[email protected]:opdemand/example-ruby-sinatra.git' becomes 'opdemand-example--ruby--sinatra' If no application is found, raise an EnvironmentError. """ git_root = self.git_root() remotes = subprocess.check_output(['git', 'remote', '-v'], cwd=git_root) if remotes is None: raise EnvironmentError('No git remotes found.') for remote in remotes.splitlines(): if 'github.com' in remote: url = remote.split()[1] break if url is None: raise EnvironmentError('No Github remotes found.') pieces = url.split('/') owner = pieces[-2].split(':')[-1] repo = pieces[-1].replace('.git', '') app_raw = owner + '/' + repo app_name = app_raw.replace('-', '--').replace('/', '-') return app_name app = property(get_app) def request(self, *args, **kwargs): """ Issue an HTTP request with proper cookie handling including `Django CSRF tokens <https://docs.djangoproject.com/en/dev/ref/contrib/csrf/>` """ for cookie in self.cookies: if cookie.name == 'csrftoken': if 'headers' in kwargs: kwargs['headers']['X-CSRFToken'] = cookie.value else: kwargs['headers'] = {'X-CSRFToken': cookie.value} break response = super(Session, self).request(*args, **kwargs) self.cookies.save() return response
def save(self, filename=None, ignore_discard=False, ignore_expires=False): if self.mode not in ("discard", "session"): return MozillaCookieJar.save( self, filename, ignore_discard, ignore_expires )
from urllib2 import HTTPCookieProcessor,build_opener from cookielib import CookieJar,MozillaCookieJar from redis_test import Redis # 1. build a cookie with file name # 2. create a cookie handler # 3. build a opener fileName = 'cookie.txt' cookie = MozillaCookieJar(fileName) handler = HTTPCookieProcessor(cookie) opener = build_opener(handler) response = opener.open("http://www.baidu.com") for item in cookie: print 'Name = ' + item.name print 'Value = ' + item.value cookie.save(ignore_discard=True,ignore_expires=True)
class bulk_downloader: def __init__(self, id, username, password, table_name): # List of files to download if id[:3] == 'S1A': self.files = [ 'https://datapool.asf.alaska.edu/GRD_HD/SA/{}.zip'.format(id) ] elif id[:3] == 'S1B': self.files = [ 'https://datapool.asf.alaska.edu/GRD_HD/SB/{}.zip'.format(id) ] else: print('no identified sensor: {}'.format(id)) logging.error('sensor not identified: {}'.format(id)) return self.username = username self.password = password self.table_name = table_name self.save_to = os.getenv('IMAGES_PATH') # Local stash of cookies so we don't always have to ask self.cookie_jar_path = os.path.join( os.path.dirname(os.path.abspath('__file__')), '.bulk_download_cookiejar.txt') self.cookie_jar = None self.asf_urs4 = { 'url': 'https://urs.earthdata.nasa.gov/oauth/authorize', 'client': 'BO_n7nTIlMljdvU6kRRB3g', 'redir': 'https://vertex-retired.daac.asf.alaska.edu/services/urs4_token_request' \ } # Make sure we can write it our current directory if os.access(os.getcwd(), os.W_OK) is False: print( 'WARNING: Cannot write to current path! Check permissions for {0}' .format(os.getcwd())) exit(-1) # For SSL self.context = {} # Make sure cookie_jar is good to go! self.get_cookie() # summary self.total_bytes = 0 self.total_time = 0 self.cnt = 0 self.success = [] self.failed = [] self.skipped = [] # Get and validate a cookie def get_cookie(self): # remove the cookie_jar_path file if its older than a day date_created = cookie_creation_date() if date_created: dx = datetime.now() - date_created hour = dx.total_seconds() / (3600) if hour > 10: print('cookie greater than 10 hours so removing it') os.remove(self.cookie_jar_path) if os.path.isfile(self.cookie_jar_path): self.cookie_jar = MozillaCookieJar() self.cookie_jar.load(self.cookie_jar_path) # make sure cookie is still valid if self.check_cookie(): print(' > Re-using previous cookie jar.') return True else: print(' > Could not validate old cookie Jar') # We don't have a valid cookie, prompt user or creds print('No existing URS cookie found, creating one') print('(Credentials will not be stored, saved or logged anywhere)') # Keep trying 'till user gets the right U:P while self.check_cookie() is False: self.get_new_cookie() return True # Validate cookie before we begin def check_cookie(self): if self.cookie_jar is None: print(' > Cookiejar is bunk: {0}'.format(self.cookie_jar)) return False # File we know is valid, used to validate cookie file_check = 'https://urs.earthdata.nasa.gov/profile' # Apply custom Redirect Hanlder opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) install_opener(opener) # Attempt a HEAD request request = Request(file_check) request.get_method = lambda: 'HEAD' try: print(' > attempting to download {0}'.format(file_check)) response = urlopen(request, timeout=30) resp_code = response.getcode() # Make sure we're logged in if not self.check_cookie_is_logged_in(self.cookie_jar): return False # Save cookiejar self.cookie_jar.save(self.cookie_jar_path) except HTTPError: # If we ge this error, again, it likely means the user has not agreed to current EULA print('\nIMPORTANT: ') print( 'Your user appears to lack permissions to download data from the ASF Datapool.' ) print( '\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov' ) exit(-1) # This return codes indicate the USER has not been approved to download the data if resp_code in (300, 301, 302, 303): try: redir_url = response.info().getheader('Location') except AttributeError: redir_url = response.getheader('Location') # Funky Test env: if ('vertex-retired.daac.asf.alaska.edu' in redir_url and 'test' in self.asf_urs4['redir']): print("Cough, cough. It's dusty in this test env!") return True print('Redirect ({0}) occurred, invalid cookie value!'.format( resp_code)) return False # These are successes! if resp_code in (200, 307): return True return False def get_new_cookie(self): # Start by prompting user to input their credentials new_username = self.username new_password = self.password # Build URS4 Cookie request auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4['client'] + '&redirect_uri=' + \ self.asf_urs4['redir'] + '&response_type=code&state=' try: # python2 user_pass = base64.b64encode( bytes(new_username + ':' + new_password)) except TypeError: # python3 user_pass = base64.b64encode( bytes(new_username + ':' + new_password, 'utf-8')) user_pass = user_pass.decode('utf-8') # Authenticate against URS, grab all the cookies self.cookie_jar = MozillaCookieJar() opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) request = Request( auth_cookie_url, headers={"Authorization": "Basic {0}".format(user_pass)}) # Watch out cookie rejection! try: response = opener.open(request) except HTTPError as e: if e.code == 401: print( " > Username and Password combo was not successful. Please try again." ) return False else: # If an error happens here, the user most likely has not confirmed EULA. print( "\nIMPORTANT: There was an error obtaining a download cookie!" ) print( "Your user appears to lack permission to download data from the ASF Datapool." ) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) except URLError as e: print( "\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. " ) print("Try cookie generation later.") exit(-1) # Did we get a cookie? if self.check_cookie_is_logged_in(self.cookie_jar): # COOKIE SUCCESS! print('cookie saved') self.cookie_jar.save(self.cookie_jar_path) save_cookie_creation_date() return True # if we aren't successful generating the cookie, nothing will work. Stop here! print( "WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again." ) print("Response was {0}.".format(response.getcode())) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) # make sure we're logged into URS def check_cookie_is_logged_in(self, cj): for cookie in cj: if cookie.name == 'urs_user_already_logged': # Only get this cookie if we logged in successfully! return True return False # Download the file def download_file_with_cookiejar(self, url, file_count, total, recursion=False): # see if we've already download this file and if it is that it is the correct size download_file = os.path.basename(url).split('?')[0] if os.path.isfile(os.path.join(self.save_to, download_file)): try: request = Request(url) request.get_method = lambda: 'HEAD' response = urlopen(request, timeout=30) remote_size = self.get_total_size(response) # Check that we were able to derive a size. if remote_size: local_size = os.path.getsize( os.path.join(self.save_to, download_file)) if remote_size < (local_size + (local_size * .01)) and remote_size > ( local_size - (local_size * .01)): print( " > Download file {0} exists! \n > Skipping download of {1}. " .format(download_file, url)) return None, None # partial file size wasn't full file size, lets blow away the chunk and start again print( " > Found {0} but it wasn't fully downloaded. Removing file and downloading again." .format(download_file)) os.remove(os.path.join(self.save_to, download_file)) except ssl.CertificateError as e: print(" > ERROR: {0}".format(e)) print( " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag" ) return False, None except HTTPError as e: if e.code == 401: print( " > IMPORTANT: Your user may not have permission to download this type of data!" ) else: print(" > Unknown Error, Could not get file HEAD: {0}". format(e)) except URLError as e: print("URL Error (from HEAD): {0}, {1}".format(e.reason, url)) if "ssl.c" in "{0}".format(e.reason): print( "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error." ) return False, None # attempt https connection try: request = Request(url) response = urlopen(request, timeout=30) # Watch for redirect if response.geturl() != url: # See if we were redirect BACK to URS for re-auth. if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl( ): if recursion: print( " > Entering seemingly endless auth loop. Aborting. " ) return False, None # make this easier. If there is no app_type=401, add it new_auth_url = response.geturl() if "app_type" not in new_auth_url: new_auth_url += "&app_type=401" print( " > While attempting to download {0}....".format(url)) print(" > Need to obtain new cookie from {0}".format( new_auth_url)) old_cookies = [cookie.name for cookie in self.cookie_jar] opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) request = Request(new_auth_url) try: response = opener.open(request) for cookie in self.cookie_jar: if cookie.name not in old_cookies: print(" > Saved new cookie: {0}".format( cookie.name)) # A little hack to save session cookies if cookie.discard: cookie.expires = int( time.time()) + 60 * 60 * 24 * 30 print( " > Saving session Cookie that should have been discarded! " ) self.cookie_jar.save(self.cookie_jar_path, ignore_discard=True, ignore_expires=True) except HTTPError as e: print("HTTP Error: {0}, {1}".format(e.code, url)) return False, None # Okay, now we have more cookies! Lets try again, recursively! print(" > Attempting download again with new cookies!") return self.download_file_with_cookiejar(url, file_count, total, recursion=True) print( " > 'Temporary' Redirect download @ Remote archive:\n > {0}" .format(response.geturl())) # seems to be working print("({0}/{1}) Downloading {2}".format(file_count, total, url)) # Open our local file for writing and build status bar tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.') self.chunk_read(response, tf, report_hook=self.chunk_report) # Reset download status sys.stdout.write('\n') tempfile_name = tf.name tf.close() # handle errors except HTTPError as e: print("HTTP Error: {0}, {1}".format(e.code, url)) if e.code == 401: print( " > IMPORTANT: Your user does not have permission to download this type of data!" ) if e.code == 403: print(" > Got a 403 Error trying to download this file. ") print( " > You MAY need to log in this app and agree to a EULA. ") return False, None except URLError as e: print("URL Error (from GET): {0}, {1}, {2}".format( e, e.reason, url)) if "ssl.c" in "{0}".format(e.reason): print( "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error." ) return False, None except ssl.CertificateError as e: print(" > ERROR: {0}".format(e)) print( " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag" ) return False, None # Return the file size shutil.copy(tempfile_name, os.path.join(self.save_to, download_file)) os.remove(tempfile_name) file_size = self.get_total_size(response) actual_size = os.path.getsize(os.path.join(self.save_to, download_file)) if file_size is None: # We were unable to calculate file size. file_size = actual_size return actual_size, file_size # chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook def chunk_report(self, bytes_so_far, file_size): if file_size is not None: percent = float(bytes_so_far) / file_size percent = round(percent * 100, 2) sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" % (bytes_so_far, file_size, percent)) else: # We couldn't figure out the size. sys.stdout.write(" > Downloaded %d of unknown Size\r" % (bytes_so_far)) # chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook def chunk_read(self, response, local_file, chunk_size=8192, report_hook=None): file_size = self.get_total_size(response) bytes_so_far = 0 while 1: try: chunk = response.read(chunk_size) except: sys.stdout.write("\n > There was an error reading data. \n") break try: local_file.write(chunk) except TypeError: local_file.write(chunk.decode(local_file.encoding)) bytes_so_far += len(chunk) if not chunk: break if report_hook: report_hook(bytes_so_far, file_size) return bytes_so_far def get_total_size(self, response): try: file_size = response.info().getheader('Content-Length').strip() except AttributeError: try: file_size = response.getheader('Content-Length').strip() except AttributeError: print("> Problem getting size") return None return int(file_size) # Download all the files in the list def download_files(self): for file_name in self.files: # make sure we haven't ctrl+c'd or some other abort trap if abort == True: raise SystemExit # download counter self.cnt += 1 # set a timer start = time.time() # run download size, total_size = self.download_file_with_cookiejar( file_name, self.cnt, len(self.files)) # calculte rate end = time.time() # stats: if size is None: self.skipped.append(file_name) # Check to see that the download didn't error and is the correct size elif size is not False and (total_size < (size + (size * .01)) and total_size > (size - (size * .01))): # Download was good! elapsed = end - start elapsed = 1.0 if elapsed < 1 else elapsed rate = (size / 1024**2) / elapsed print( "Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec" .format(size, elapsed, rate)) # add up metrics self.total_bytes += size self.total_time += elapsed self.success.append({'file': file_name, 'size': size}) else: print("There was a problem downloading {0}".format(file_name)) self.failed.append(file_name) def print_summary(self, rid): # Print summary: print("\n\nDownload Summary ") print( "--------------------------------------------------------------------------------" ) print(" Successes: {0} files, {1} bytes ".format( len(self.success), self.total_bytes)) for success_file in self.success: print(" - {0} {1:.2f}MB".format( success_file['file'], (success_file['size'] / 1024.0**2))) if len(self.failed) > 0: print(" Failures: {0} files".format(len(self.failed))) for failed_file in self.failed: print(" - {0}".format(failed_file)) if len(self.skipped) > 0: print(" Skipped: {0} files".format(len(self.skipped))) for skipped_file in self.skipped: print(" - {0}".format(skipped_file)) if len(self.success) > 0: print(" Average Rate: {0:.2f}MB/sec".format( (self.total_bytes / 1024.0**2) / self.total_time)) print( "--------------------------------------------------------------------------------\n\n" ) # since we are downloading one file at a time! if len(self.success) > 0: try: conn, cur = connect_to_db() cur.execute( "UPDATE {} SET downloaded=TRUE WHERE rid={}".format( self.table_name, rid)) conn.commit() close_connection(conn, cur) except Exception as e: print('error inserting into db because {}'.format(e)) logging.error(e) # ideally should not end up here but anyway if len(self.skipped) > 0: try: conn, cur = connect_to_db() cur.execute( "UPDATE {} SET downloaded=FALSE WHERE rid={}".format( self.table_name, rid)) conn.commit() close_connection(conn, cur) except Exception as e: print('error inserting into db because {}'.format(e)) logging.error(e)
class Bilibili(): def __init__(self, appkey=APPKEY, appsecret=APPSECRET): self.appkey = appkey self.appsecret = appsecret self.is_login = False cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie' self.cj = MozillaCookieJar(cookie_path) if os.path.isfile(cookie_path): self.cj.load() if requests.utils.dict_from_cookiejar( self.cj).has_key('DedeUserID'): self.is_login = True self.mid = str( requests.utils.dict_from_cookiejar(self.cj)['DedeUserID']) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener) def get_captcha(self, path=None): if not requests.utils.dict_from_cookiejar(self.cj).has_key('sid'): utils.get_page_content( LOGIN_CAPTCHA_URL.format(random.random()), headers={'Referer': 'https://passport.bilibili.com/login'}) result = utils.get_page_content( LOGIN_CAPTCHA_URL.format(random.random()), headers={'Referer': 'https://passport.bilibili.com/login'}) if path == None: path = tempfile.gettempdir() + '/captcha.jpg' with open(path, 'wb') as f: f.write(result) return path def get_encryped_pwd(self, pwd): import rsa result = json.loads( utils.get_page_content( LOGIN_HASH_URL.format(random.random()), headers={'Referer': 'https://passport.bilibili.com/login'})) pwd = result['hash'] + pwd key = result['key'] pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key) pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key) pwd = base64.b64encode(pwd) pwd = urllib.quote(pwd) return pwd def api_sign(self, params): params['appkey'] = self.appkey data = "" keys = params.keys() keys.sort() for key in keys: if data != "": data += "&" value = params[key] if type(value) == int: value = str(value) data += key + "=" + str(urllib.quote(value)) if self.appsecret == None: return data m = hashlib.md5() m.update(data + self.appsecret) return data + '&sign=' + m.hexdigest() def get_category_from_web_page(self): category_dict = {'0': {'title': u'全部', 'url': HOME_URL, 'subs': []}} node = category_dict['0'] url = node['url'] result = BeautifulSoup(utils.get_page_content(url), "html.parser").findAll('li', {'class': 'm-i'}) for item in result: if len(item['class']) != 1: continue tid = item['data-tid'] title = item.em.contents[0] url = 'http:' + item.a['href'] category_dict[tid] = {'title': title, 'url': url, 'subs': []} node['subs'].append(tid) #Fix video and movie if '11' not in category_dict['0']['subs']: category_dict['0']['subs'].append('11') if '23' not in category_dict['0']['subs']: category_dict['0']['subs'].append('23') category_dict['11'] = { 'title': u'电视剧', 'url': 'http://bangumi.bilibili.com/tv/', 'subs': [] } category_dict['23'] = { 'title': u'电影', 'url': 'http://bangumi.bilibili.com/movie/', 'subs': [] } for sub in category_dict['0']['subs']: node = category_dict[sub] url = node['url'] result = BeautifulSoup(utils.get_page_content(url), "html.parser").select('ul.n_num li') for item in result[1:]: if not item.has_attr('tid'): continue if not hasattr(item, 'a'): continue if item.has_attr('class'): continue tid = item['tid'] title = item.a.contents[0] if item.a['href'][:2] == '//': url = 'http:' + item.a['href'] else: url = HOME_URL + item.a['href'] category_dict[tid] = {'title': title, 'url': url, 'subs': []} node['subs'].append(tid) return category_dict def get_category(self, tid='0'): items = [{ tid: { 'title': '全部', 'url': CATEGORY[tid]['url'], 'subs': [] } }] for sub in CATEGORY[tid]['subs']: items.append({sub: CATEGORY[sub]}) return items def get_category_name(self, tid): return CATEGORY[str(tid)]['title'] def get_order(self): return ORDER def get_category_list(self, tid=0, order='default', days=30, page=1, pagesize=10): params = { 'tid': tid, 'order': order, 'days': days, 'page': page, 'pagesize': pagesize } url = LIST_URL.format(self.api_sign(params)) result = json.loads(utils.get_page_content(url)) results = [] for i in range(pagesize): if result['list'].has_key(str(i)): results.append(result['list'][str(i)]) else: break return results, result['pages'] def get_my_info(self): if self.is_login == False: return [] result = json.loads(utils.get_page_content(MY_INFO_URL)) return result['data'] def get_bangumi_chase(self, page=1, pagesize=10): if self.is_login == False: return [] url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize) result = json.loads(utils.get_page_content(url)) return result['data']['result'], result['data']['pages'] def get_bangumi_detail(self, season_id): url = BANGUMI_SEASON_URL.format(season_id) result = utils.get_page_content(url) if result[0] != '{': start = result.find('(') + 1 end = result.find(');') result = result[start:end] result = json.loads(result) return result['result'] def get_history(self, page=1, pagesize=10): if self.is_login == False: return [] url = HISTORY_URL.format(page, pagesize) result = json.loads(utils.get_page_content(url)) if len(result['data']) >= int(pagesize): total_page = int(page) + 1 else: total_page = int(page) return result['data'], total_page def get_dynamic(self, page=1, pagesize=10): if self.is_login == False: return [] url = DYNAMIC_URL.format(pagesize, page) result = json.loads(utils.get_page_content(url)) total_page = int( (result['data']['page']['count'] + pagesize - 1) / pagesize) return result['data']['feeds'], total_page def get_attention(self, page=1, pagesize=10): if self.is_login == False: return [] url = ATTENTION_URL.format(self.mid, page, pagesize) result = json.loads(utils.get_page_content(url)) return result['data']['list'], result['data']['pages'] def get_attention_video(self, mid, tid=0, page=1, pagesize=10): if self.is_login == False: return [] url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid) result = json.loads(utils.get_page_content(url)) return result['data'], result['data']['pages'] def get_attention_channel(self, mid): if self.is_login == False: return [] url = ATTENTION_CHANNEL_URL.format(mid) result = json.loads(utils.get_page_content(url)) return result['data']['list'] def get_attention_channel_list(self, mid, cid, page=1, pagesize=10): if self.is_login == False: return [] url = ATTENTION_CHANNEL_LIST_URL.format(mid, cid, page, pagesize) result = json.loads(utils.get_page_content(url)) return result['data']['list'], result['data']['total'] def get_fav_box(self): if self.is_login == False: return [] url = FAV_BOX_URL.format(self.mid) result = json.loads(utils.get_page_content(url)) return result['data']['list'] def get_fav(self, fav_box, page=1, pagesize=10): if self.is_login == False: return [] url = FAV_URL.format(self.mid, page, pagesize, fav_box) result = json.loads(utils.get_page_content(url)) return result['data']['vlist'], result['data']['pages'] def login(self, userid, pwd, captcha): #utils.get_page_content('http://www.bilibili.com') if self.is_login == True: return True, '' pwd = self.get_encryped_pwd(pwd) data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format( captcha, userid, pwd) result = utils.get_page_content( LOGIN_URL, data, { 'Origin': 'https://passport.bilibili.com', 'Referer': 'https://passport.bilibili.com/login' }) if not requests.utils.dict_from_cookiejar( self.cj).has_key('DedeUserID'): return False, LOGIN_ERROR_MAP[json.loads(result)['code']] self.cj.save() self.is_login = True self.mid = str( requests.utils.dict_from_cookiejar(self.cj)['DedeUserID']) return True, '' def logout(self): self.cj.clear() self.cj.save() self.is_login = False def get_av_list_detail(self, aid, page=1, fav=0, pagesize=10): params = {'id': aid, 'page': page} if fav != 0: params['fav'] = fav url = VIEW_URL.format(self.api_sign(params)) result = json.loads(utils.get_page_content(url)) results = [result] if (int(page) < result['pages']) and (pagesize > 1): results += self.get_av_list_detail(aid, int(page) + 1, fav, pagesize=pagesize - 1)[0] return results, result['pages'] def get_av_list(self, aid): url = AV_URL.format(aid) result = json.loads(utils.get_page_content(url)) return result def get_video_urls(self, cid): m = hashlib.md5() m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER)) url = INTERFACE_URL.format(str(cid), m.hexdigest()) doc = minidom.parseString(utils.get_page_content(url)) urls = [ durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl') ] urls = [ url if not re.match(r'.*\.qqvideo\.tc\.qq\.com', url) else re.sub( r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', url) for url in urls ] return urls def add_history(self, aid, cid): url = ADD_HISTORY_URL.format(str(cid), str(aid)) utils.get_page_content(url)
class Bilibili(): def __init__(self, appkey = APPKEY, appsecret = APPSECRET): self.appkey = appkey self.appsecret = appsecret self.is_login = False cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie' self.cj = MozillaCookieJar(cookie_path) if os.path.isfile(cookie_path): self.cj.load() if requests.utils.dict_from_cookiejar(self.cj).has_key('DedeUserID'): self.is_login = True self.mid = str(requests.utils.dict_from_cookiejar(self.cj)['DedeUserID']) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener) def get_captcha(self, path = None): if not requests.utils.dict_from_cookiejar(self.cj).has_key('sid'): utils.get_page_content(LOGIN_CAPTCHA_URL.format(random.random()), headers = {'Referer':'https://passport.bilibili.com/login'}) result = utils.get_page_content(LOGIN_CAPTCHA_URL.format(random.random()), headers = {'Referer':'https://passport.bilibili.com/login'}) if path == None: path = tempfile.gettempdir() + '/captcha.jpg' with open(path, 'wb') as f: f.write(result) return path def get_encryped_pwd(self, pwd): import rsa result = json.loads(utils.get_page_content(LOGIN_HASH_URL.format(random.random()), headers={'Referer':'https://passport.bilibili.com/login'})) pwd = result['hash'] + pwd key = result['key'] pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key) pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key) pwd = base64.b64encode(pwd) pwd = urllib.quote(pwd) return pwd def api_sign(self, params): params['appkey']=self.appkey data = "" keys = params.keys() keys.sort() for key in keys: if data != "": data += "&" value = params[key] if type(value) == int: value = str(value) data += key + "=" + str(urllib.quote(value)) if self.appsecret == None: return data m = hashlib.md5() m.update(data + self.appsecret) return data + '&sign=' + m.hexdigest() def get_category_from_web_page(self): category_dict = {'0': {'title': u'全部', 'url': HOME_URL, 'subs':[]}} node = category_dict['0'] url = node['url'] result = BeautifulSoup(utils.get_page_content(url), "html.parser").findAll('li', {'class': 'm-i'}) for item in result: if len(item['class']) != 1: continue tid = item['data-tid'] title = item.em.contents[0] url = 'http:' + item.a['href'] category_dict[tid] = {'title': title, 'url': url, 'subs':[]} node['subs'].append(tid) #Fix video and movie if '11' not in category_dict['0']['subs']: category_dict['0']['subs'].append('11') if '23' not in category_dict['0']['subs']: category_dict['0']['subs'].append('23') category_dict['11'] = {'title': u'电视剧', 'url': 'http://bangumi.bilibili.com/tv/', 'subs': []} category_dict['23'] = {'title': u'电影', 'url': 'http://bangumi.bilibili.com/movie/', 'subs': []} for sub in category_dict['0']['subs']: node = category_dict[sub] url = node['url'] result = BeautifulSoup(utils.get_page_content(url), "html.parser").select('ul.n_num li') for item in result[1:]: if not item.has_attr('tid'): continue if not hasattr(item, 'a'): continue if item.has_attr('class'): continue tid = item['tid'] title = item.a.contents[0] if item.a['href'][:2] == '//': url = 'http:' + item.a['href'] else: url = HOME_URL + item.a['href'] category_dict[tid] = {'title': title, 'url': url, 'subs':[]} node['subs'].append(tid) return category_dict def get_category(self, tid = '0'): items = [{tid: {'title': '全部', 'url': CATEGORY[tid]['url'], 'subs': []}}] for sub in CATEGORY[tid]['subs']: items.append({sub: CATEGORY[sub]}) return items def get_category_name(self, tid): return CATEGORY[str(tid)]['title'] def get_order(self): return ORDER def get_category_list(self, tid = 0, order = 'default', days = 30, page = 1, pagesize = 10): params = {'tid': tid, 'order': order, 'days': days, 'page': page, 'pagesize': pagesize} url = LIST_URL.format(self.api_sign(params)) result = json.loads(utils.get_page_content(url)) results = [] for i in range(pagesize): if result['list'].has_key(str(i)): results.append(result['list'][str(i)]) else: break return results, result['pages'] def get_my_info(self): if self.is_login == False: return [] result = json.loads(utils.get_page_content(MY_INFO_URL)) return result['data'] def get_bangumi_chase(self, page = 1, pagesize = 10): if self.is_login == False: return [] url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize) result = json.loads(utils.get_page_content(url)) return result['data']['result'], result['data']['pages'] def get_bangumi_detail(self, season_id): url = BANGUMI_SEASON_URL.format(season_id) result = utils.get_page_content(url) if result[0] != '{': start = result.find('(') + 1 end = result.find(');') result = result[start:end] result = json.loads(result) return result['result'] def get_history(self, page = 1, pagesize = 10): if self.is_login == False: return [] url = HISTORY_URL.format(page, pagesize) result = json.loads(utils.get_page_content(url)) if len(result['data']) >= int(pagesize): total_page = int(page) + 1 else: total_page = int(page) return result['data'], total_page def get_dynamic(self, page = 1, pagesize = 10): if self.is_login == False: return [] url = DYNAMIC_URL.format(pagesize, page) result = json.loads(utils.get_page_content(url)) total_page = int((result['data']['page']['count'] + pagesize - 1) / pagesize) return result['data']['feeds'], total_page def get_attention(self, page = 1, pagesize = 10): if self.is_login == False: return [] url = ATTENTION_URL.format(self.mid, page, pagesize) result = json.loads(utils.get_page_content(url)) return result['data']['list'], result['data']['pages'] def get_attention_video(self, mid, tid = 0, page = 1, pagesize = 10): if self.is_login == False: return [] url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid) result = json.loads(utils.get_page_content(url)) return result['data'], result['data']['pages'] def get_attention_channel(self, mid): if self.is_login == False: return [] url = ATTENTION_CHANNEL_URL.format(mid) result = json.loads(utils.get_page_content(url)) return result['data']['list'] def get_attention_channel_list(self, mid, cid, page = 1, pagesize = 10): if self.is_login == False: return [] url = ATTENTION_CHANNEL_LIST_URL.format(mid, cid, page, pagesize) result = json.loads(utils.get_page_content(url)) return result['data']['list'], result['data']['total'] def get_fav_box(self): if self.is_login == False: return [] url = FAV_BOX_URL.format(self.mid) result = json.loads(utils.get_page_content(url)) return result['data']['list'] def get_fav(self, fav_box, page = 1, pagesize = 10): if self.is_login == False: return [] url = FAV_URL.format(self.mid, page, pagesize, fav_box) result = json.loads(utils.get_page_content(url)) return result['data']['vlist'], result['data']['pages'] def login(self, userid, pwd, captcha): #utils.get_page_content('http://www.bilibili.com') if self.is_login == True: return True, '' pwd = self.get_encryped_pwd(pwd) data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format(captcha, userid, pwd) result = utils.get_page_content(LOGIN_URL, data, {'Origin':'https://passport.bilibili.com', 'Referer':'https://passport.bilibili.com/login'}) if not requests.utils.dict_from_cookiejar(self.cj).has_key('DedeUserID'): return False, LOGIN_ERROR_MAP[json.loads(result)['code']] self.cj.save() self.is_login = True self.mid = str(requests.utils.dict_from_cookiejar(self.cj)['DedeUserID']) return True, '' def logout(self): self.cj.clear() self.cj.save() self.is_login = False def get_av_list_detail(self, aid, page = 1, fav = 0, pagesize = 10): params = {'id': aid, 'page': page} if fav != 0: params['fav'] = fav url = VIEW_URL.format(self.api_sign(params)) result = json.loads(utils.get_page_content(url)) results = [result] if (int(page) < result['pages']) and (pagesize > 1): results += self.get_av_list_detail(aid, int(page) + 1, fav, pagesize = pagesize - 1)[0] return results, result['pages'] def get_av_list(self, aid): url = AV_URL.format(aid) result = json.loads(utils.get_page_content(url)) return result def get_video_urls(self, cid): m = hashlib.md5() m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER)) url = INTERFACE_URL.format(str(cid), m.hexdigest()) doc = minidom.parseString(utils.get_page_content(url)) urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')] urls = [url if not re.match(r'.*\.qqvideo\.tc\.qq\.com', url) else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', url) for url in urls] return urls def add_history(self, aid, cid): url = ADD_HISTORY_URL.format(str(cid), str(aid)) utils.get_page_content(url)
class LSession(): def __init__(self,cookiefile = None, proxy = None, timeout = 10, retime = 30,sleept = 3): self.timeout=timeout self.retime=retime self.sleept=sleept #proxy '1.234.77.96:80' if cookiefile == None: self.cookiejar = CookieJar() else: self.cookiejar = MozillaCookieJar(filename=cookiefile) #self.cookiejar =cookielib.LWPCookieJar(filename=cookiefile) if not os.path.isfile(cookiefile): open(cookiefile, 'w').write(MozillaCookieJar.header) #open(cookiefile, 'w').write('#abc\n') pass self.cookiejar.load(filename=cookiefile,ignore_discard=True) #print "ck:",self.cookiejar self.cookie_processor = HTTPCookieProcessor(self.cookiejar) self.opener=build_opener(urllib2.HTTPRedirectHandler(),self.cookie_processor) if proxy : self.opener.add_handler(ProxyHandler({"http" : proxy})) #for posting a file try: import MultipartPostHandler #for posting a file,need installed self.opener.add_handler(MultipartPostHandler.MultipartPostHandler()) except NameError as e:print e self.response=None self.request=None self.header=[] def add_header(self,k,v) : self.header.append((k,v)) def build_request(self,url,params=None): self.request=Request(url,params) if not self.response is None:self.request.add_header('Referer',self.url()) #self.request.add_header('User-Agent', # 'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 \ # (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25') #NokiaE63/UC Browser7.9.0.102/28/355/UCWEB #self.request.add_header('User-Agent','NokiaE63/UC Browser7.9.0.102/28/355/UCWEB') self.request.add_header('User-Agent','Opera/9.80 (J2ME/MIDP; Opera Mini/1.0/886; U; en) Presto/2.4.15') while self.header : _k,_v = self.header.pop() self.request.add_header(_k,_v) #Mobile/7B405 #self.request.add_header('User-Agent','Mobile/7B405') return self.request def __del__(self) : self.save_cookie() def urlopen(self,req): retime=self.retime while retime > 0: try: return self.opener.open(req,timeout=self.timeout) except Exception as e: retime -= 1 traceback.print_exc(file=sys.stdout) print 'Wait and retry...%d'%(self.retime-retime) sleep(self.sleept) def savefile(self,filename,url): self.response=self.urlopen(self.build_request(url)) CHUNK = 50 * 1024 with open(filename, 'wb') as fp: while True: chunk = self.response.read(CHUNK) if not chunk: break fp.write(chunk) def post(self,url,post_data): self.response=self.urlopen(self.build_request(url,urlencode(post_data))) return self.response def post_raw(self,url,post_data): self.response=self.urlopen(self.build_request(url,post_data)) return self.response def post_file(self,url,params): self.response=self.urlopen(self.build_request(url, params)) return self.response def get(self,url): self.response=self.urlopen(self.build_request(url)) #import urllib #print urllib.urlopen('http://mrozekma.com/302test.php').geturl() # import requests # r=requests.get(url) # print r.content return self.response def text(self,dec='gbk',enc='utf') : return self.response.read().decode(dec).encode(enc) def url(self) : return self.response.url def logout(self) : self.cookiejar.clear() def Verify_proxy(self) : pass def show_cookie(self): #print self.cookiejar for i in self.cookiejar: print i def save_cookie(self): # if hasattr(self.cookiejar,'save'):#in case non cookiejar # self.cookiejar.save(ignore_discard=True, ignore_expires=False) try: self.cookiejar.save(ignore_discard=True, ignore_expires=False) except Exception as e: traceback.print_exc(file=sys.stdout)
def save(self, filename=None, ignore_discard=False, ignore_expires=False): if self.mode not in ("discard", "session"): return MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
class bulk_downloader: def __init__(self): # List of files to download self.files = [ "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200626T041209_20200626T041234_033183_03D816_7063.zip", "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200626T041144_20200626T041209_033183_03D816_0D5E.zip", "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200614T041208_20200614T041233_033008_03D2C4_2DC4.zip", "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200614T041143_20200614T041208_033008_03D2C4_584D.zip", "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200602T041208_20200602T041233_032833_03CD92_6A43.zip", "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200602T041143_20200602T041208_032833_03CD92_5A25.zip" ] # Local stash of cookies so we don't always have to ask self.cookie_jar_path = os.path.join(os.path.expanduser('~'), ".bulk_download_cookiejar.txt") self.cookie_jar = None self.asf_urs4 = { 'url': 'https://urs.earthdata.nasa.gov/oauth/authorize', 'client': 'BO_n7nTIlMljdvU6kRRB3g', 'redir': 'https://auth.asf.alaska.edu/login' } # Make sure we can write it our current directory if os.access(os.getcwd(), os.W_OK) is False: print( "WARNING: Cannot write to current path! Check permissions for {0}" .format(os.getcwd())) exit(-1) # For SSL self.context = {} # Check if user handed in a Metalink or CSV: if len(sys.argv) > 0: download_files = [] input_files = [] for arg in sys.argv[1:]: if arg == '--insecure': try: ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE self.context['context'] = ctx except AttributeError: # Python 2.6 won't complain about SSL Validation pass elif arg.endswith('.metalink') or arg.endswith('.csv'): if os.path.isfile(arg): input_files.append(arg) if arg.endswith('.metalink'): new_files = self.process_metalink(arg) else: new_files = self.process_csv(arg) if new_files is not None: for file_url in (new_files): download_files.append(file_url) else: print( " > I cannot find the input file you specified: {0}" .format(arg)) else: print( " > Command line argument '{0}' makes no sense, ignoring." .format(arg)) if len(input_files) > 0: if len(download_files) > 0: print(" > Processing {0} downloads from {1} input files. ". format(len(download_files), len(input_files))) self.files = download_files else: print( " > I see you asked me to download files from {0} input files, but they had no downloads!" .format(len(input_files))) print(" > I'm super confused and exiting.") exit(-1) # Make sure cookie_jar is good to go! self.get_cookie() # summary self.total_bytes = 0 self.total_time = 0 self.cnt = 0 self.success = [] self.failed = [] self.skipped = [] # Get and validate a cookie def get_cookie(self): if os.path.isfile(self.cookie_jar_path): self.cookie_jar = MozillaCookieJar() self.cookie_jar.load(self.cookie_jar_path) # make sure cookie is still valid if self.check_cookie(): print(" > Re-using previous cookie jar.") return True else: print(" > Could not validate old cookie Jar") # We don't have a valid cookie, prompt user or creds print( "No existing URS cookie found, please enter Earthdata username & password:"******"(Credentials will not be stored, saved or logged anywhere)") # Keep trying 'till user gets the right U:P while self.check_cookie() is False: self.get_new_cookie() return True # Validate cookie before we begin def check_cookie(self): if self.cookie_jar is None: print(" > Cookiejar is bunk: {0}".format(self.cookie_jar)) return False # File we know is valid, used to validate cookie file_check = 'https://urs.earthdata.nasa.gov/profile' # Apply custom Redirect Hanlder opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) install_opener(opener) # Attempt a HEAD request request = Request(file_check) request.get_method = lambda: 'HEAD' try: print(" > attempting to download {0}".format(file_check)) response = urlopen(request, timeout=30) resp_code = response.getcode() # Make sure we're logged in if not self.check_cookie_is_logged_in(self.cookie_jar): return False # Save cookiejar self.cookie_jar.save(self.cookie_jar_path) except HTTPError: # If we ge this error, again, it likely means the user has not agreed to current EULA print("\nIMPORTANT: ") print( "Your user appears to lack permissions to download data from the ASF Datapool." ) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) # This return codes indicate the USER has not been approved to download the data if resp_code in (300, 301, 302, 303): try: redir_url = response.info().getheader('Location') except AttributeError: redir_url = response.getheader('Location') #Funky Test env: if ("vertex-retired.daac.asf.alaska.edu" in redir_url and "test" in self.asf_urs4['redir']): print("Cough, cough. It's dusty in this test env!") return True print("Redirect ({0}) occured, invalid cookie value!".format( resp_code)) return False # These are successes! if resp_code in (200, 307): return True return False def get_new_cookie(self): # Start by prompting user to input their credentials # Another Python2/3 workaround try: new_username = raw_input("Username: "******"Username: "******"Password (will not be displayed): ") # Build URS4 Cookie request auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4[ 'client'] + '&redirect_uri=' + self.asf_urs4[ 'redir'] + '&response_type=code&state=' try: #python2 user_pass = base64.b64encode( bytes(new_username + ":" + new_password)) except TypeError: #python3 user_pass = base64.b64encode( bytes(new_username + ":" + new_password, "utf-8")) user_pass = user_pass.decode("utf-8") # Authenticate against URS, grab all the cookies self.cookie_jar = MozillaCookieJar() opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) request = Request( auth_cookie_url, headers={"Authorization": "Basic {0}".format(user_pass)}) # Watch out cookie rejection! try: response = opener.open(request) except HTTPError as e: if "WWW-Authenticate" in e.headers and "Please enter your Earthdata Login credentials" in e.headers[ "WWW-Authenticate"]: print( " > Username and Password combo was not successful. Please try again." ) return False else: # If an error happens here, the user most likely has not confirmed EULA. print( "\nIMPORTANT: There was an error obtaining a download cookie!" ) print( "Your user appears to lack permission to download data from the ASF Datapool." ) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) except URLError as e: print( "\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. " ) print("Try cookie generation later.") exit(-1) # Did we get a cookie? if self.check_cookie_is_logged_in(self.cookie_jar): #COOKIE SUCCESS! self.cookie_jar.save(self.cookie_jar_path) return True # if we aren't successful generating the cookie, nothing will work. Stop here! print( "WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again." ) print("Response was {0}.".format(response.getcode())) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) # make sure we're logged into URS def check_cookie_is_logged_in(self, cj): for cookie in cj: if cookie.name == 'urs_user_already_logged': # Only get this cookie if we logged in successfully! return True return False # Download the file def download_file_with_cookiejar(self, url, file_count, total, recursion=False): # see if we've already download this file and if it is that it is the correct size download_file = os.path.basename(url).split('?')[0] if os.path.isfile(download_file): try: request = Request(url) request.get_method = lambda: 'HEAD' response = urlopen(request, timeout=30) remote_size = self.get_total_size(response) # Check that we were able to derive a size. if remote_size: local_size = os.path.getsize(download_file) if remote_size < (local_size + (local_size * .01)) and remote_size > ( local_size - (local_size * .01)): print( " > Download file {0} exists! \n > Skipping download of {1}. " .format(download_file, url)) return None, None #partial file size wasn't full file size, lets blow away the chunk and start again print( " > Found {0} but it wasn't fully downloaded. Removing file and downloading again." .format(download_file)) os.remove(download_file) except ssl.CertificateError as e: print(" > ERROR: {0}".format(e)) print( " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag" ) return False, None except HTTPError as e: if e.code == 401: print( " > IMPORTANT: Your user may not have permission to download this type of data!" ) else: print(" > Unknown Error, Could not get file HEAD: {0}". format(e)) except URLError as e: print("URL Error (from HEAD): {0}, {1}".format(e.reason, url)) if "ssl.c" in "{0}".format(e.reason): print( "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error." ) return False, None # attempt https connection try: request = Request(url) response = urlopen(request, timeout=30) # Watch for redirect if response.geturl() != url: # See if we were redirect BACK to URS for re-auth. if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl( ): if recursion: print( " > Entering seemingly endless auth loop. Aborting. " ) return False, None # make this easier. If there is no app_type=401, add it new_auth_url = response.geturl() if "app_type" not in new_auth_url: new_auth_url += "&app_type=401" print( " > While attempting to download {0}....".format(url)) print(" > Need to obtain new cookie from {0}".format( new_auth_url)) old_cookies = [cookie.name for cookie in self.cookie_jar] opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) request = Request(new_auth_url) try: response = opener.open(request) for cookie in self.cookie_jar: if cookie.name not in old_cookies: print(" > Saved new cookie: {0}".format( cookie.name)) # A little hack to save session cookies if cookie.discard: cookie.expires = int( time.time()) + 60 * 60 * 24 * 30 print( " > Saving session Cookie that should have been discarded! " ) self.cookie_jar.save(self.cookie_jar_path, ignore_discard=True, ignore_expires=True) except HTTPError as e: print("HTTP Error: {0}, {1}".format(e.code, url)) return False, None # Okay, now we have more cookies! Lets try again, recursively! print(" > Attempting download again with new cookies!") return self.download_file_with_cookiejar(url, file_count, total, recursion=True) print( " > 'Temporary' Redirect download @ Remote archive:\n > {0}" .format(response.geturl())) # seems to be working print("({0}/{1}) Downloading {2}".format(file_count, total, url)) # Open our local file for writing and build status bar tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.') self.chunk_read(response, tf, report_hook=self.chunk_report) # Reset download status sys.stdout.write('\n') tempfile_name = tf.name tf.close() #handle errors except HTTPError as e: print("HTTP Error: {0}, {1}".format(e.code, url)) if e.code == 401: print( " > IMPORTANT: Your user does not have permission to download this type of data!" ) if e.code == 403: print(" > Got a 403 Error trying to download this file. ") print( " > You MAY need to log in this app and agree to a EULA. ") return False, None except URLError as e: print("URL Error (from GET): {0}, {1}, {2}".format( e, e.reason, url)) if "ssl.c" in "{0}".format(e.reason): print( "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error." ) return False, None except socket.timeout as e: print(" > timeout requesting: {0}; {1}".format(url, e)) return False, None except ssl.CertificateError as e: print(" > ERROR: {0}".format(e)) print( " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag" ) return False, None # Return the file size shutil.copy(tempfile_name, download_file) os.remove(tempfile_name) file_size = self.get_total_size(response) actual_size = os.path.getsize(download_file) if file_size is None: # We were unable to calculate file size. file_size = actual_size return actual_size, file_size def get_redirect_url_from_error(self, error): find_redirect = re.compile(r"id=\"redir_link\"\s+href=\"(\S+)\"") print("error file was: {}".format(error)) redirect_url = find_redirect.search(error) if redirect_url: print("Found: {0}".format(redirect_url.group(0))) return (redirect_url.group(0)) return None # chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook def chunk_report(self, bytes_so_far, file_size): if file_size is not None: percent = float(bytes_so_far) / file_size percent = round(percent * 100, 2) sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" % (bytes_so_far, file_size, percent)) else: # We couldn't figure out the size. sys.stdout.write(" > Downloaded %d of unknown Size\r" % (bytes_so_far)) # chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook def chunk_read(self, response, local_file, chunk_size=8192, report_hook=None): file_size = self.get_total_size(response) bytes_so_far = 0 while 1: try: chunk = response.read(chunk_size) except: sys.stdout.write("\n > There was an error reading data. \n") break try: local_file.write(chunk) except TypeError: local_file.write(chunk.decode(local_file.encoding)) bytes_so_far += len(chunk) if not chunk: break if report_hook: report_hook(bytes_so_far, file_size) return bytes_so_far def get_total_size(self, response): try: file_size = response.info().getheader('Content-Length').strip() except AttributeError: try: file_size = response.getheader('Content-Length').strip() except AttributeError: print("> Problem getting size") return None return int(file_size) # Get download urls from a metalink file def process_metalink(self, ml_file): print("Processing metalink file: {0}".format(ml_file)) with open(ml_file, 'r') as ml: xml = ml.read() # Hack to remove annoying namespace it = ET.iterparse(StringIO(xml)) for _, el in it: if '}' in el.tag: el.tag = el.tag.split('}', 1)[1] # strip all namespaces root = it.root dl_urls = [] ml_files = root.find('files') for dl in ml_files: dl_urls.append(dl.find('resources').find('url').text) if len(dl_urls) > 0: return dl_urls else: return None # Get download urls from a csv file def process_csv(self, csv_file): print("Processing csv file: {0}".format(csv_file)) dl_urls = [] with open(csv_file, 'r') as csvf: try: csvr = csv.DictReader(csvf) for row in csvr: dl_urls.append(row['URL']) except csv.Error as e: print( "WARNING: Could not parse file %s, line %d: %s. Skipping." % (csv_file, csvr.line_num, e)) return None except KeyError as e: print( "WARNING: Could not find URL column in file %s. Skipping." % (csv_file)) if len(dl_urls) > 0: return dl_urls else: return None # Download all the files in the list def download_files(self): for file_name in self.files: # make sure we haven't ctrl+c'd or some other abort trap if abort == True: raise SystemExit # download counter self.cnt += 1 # set a timer start = time.time() # run download size, total_size = self.download_file_with_cookiejar( file_name, self.cnt, len(self.files)) # calculte rate end = time.time() # stats: if size is None: self.skipped.append(file_name) # Check to see that the download didn't error and is the correct size elif size is not False and (total_size < (size + (size * .01)) and total_size > (size - (size * .01))): # Download was good! elapsed = end - start elapsed = 1.0 if elapsed < 1 else elapsed rate = (size / 1024**2) / elapsed print( "Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec" .format(size, elapsed, rate)) # add up metrics self.total_bytes += size self.total_time += elapsed self.success.append({'file': file_name, 'size': size}) else: print("There was a problem downloading {0}".format(file_name)) self.failed.append(file_name) def print_summary(self): # Print summary: print("\n\nDownload Summary ") print( "--------------------------------------------------------------------------------" ) print(" Successes: {0} files, {1} bytes ".format( len(self.success), self.total_bytes)) for success_file in self.success: print(" - {0} {1:.2f}MB".format( success_file['file'], (success_file['size'] / 1024.0**2))) if len(self.failed) > 0: print(" Failures: {0} files".format(len(self.failed))) for failed_file in self.failed: print(" - {0}".format(failed_file)) if len(self.skipped) > 0: print(" Skipped: {0} files".format(len(self.skipped))) for skipped_file in self.skipped: print(" - {0}".format(skipped_file)) if len(self.success) > 0: print(" Average Rate: {0:.2f}MB/sec".format( (self.total_bytes / 1024.0**2) / self.total_time)) print( "--------------------------------------------------------------------------------" )
if options.pretty: # Strip out extra whitespace, so we can have maximum prettiness. for el in user.iter(): if el.text and not el.text.strip(): el.text = None if el.tail and not el.tail.strip(): el.tail = None print ET.tostring(user, pretty_print=True, encoding="utf-8") else: print ET.tostring(user, encoding="utf-8") # FIXME: Add locking to cookiejar, so concurrent instances don't clobber the cookie file. if options.cookiejar: try: cookiejar.save(ignore_discard=True) except IOError, e: print >> sys.stderr, "Error while saving cookie jar: %s: %s" % (options.cookiejar, e) except UsageError, e: print >> sys.stderr, e return 64 # EX_USAGE except ConfigParser.ParsingError, e: print >> sys.stderr, e return 78 # EX_CONFIG except NotImplementedError, e: print >> sys.stderr, e return 69 # EX_UNAVAILABLE
class Json_RPC(object): def __init__(self): #self.cookie_jar=CookieJar() self.cookie_jar=MozillaCookieJar() self.opener=urllib2.build_opener( urllib2.HTTPCookieProcessor(self.cookie_jar), #urllib2.HTTPHandler(debuglevel=1), #urllib2.HTTPSHandler(debuglevel=1), ) def load_cookie(self,filename): ''' Load Cookie from file ''' self.cookie_jar.load(filename,ignore_discard=True) def save_cookie(self,filename): ''' Save Cookie to file ''' self.cookie_jar.save(filename,ignore_discard=True) def json_rpc(self,url,method="GET",**kwargs): ''' Performs a json rpc to url and return python-native result will extract dict or list from result Example: try{callback({'result':0,'data':[]});}catch(e){} will be transcode to {"result":0,"data":[]} See also: http_rpc ''' ret=self.http_rpc(url,method,**kwargs) ret=sub(r'try{(.*)}catch\(.*\){.*};?',r'\1',ret) ret=(search(r'{.+}',ret) or search(r'\[.+\]',ret)).group() #ret=sub(r"'",r'"',ret) ret=loads(ret) return ret def http_rpc(self,url,method="GET",**kwargs): ''' Perfoms a http rpc to url and return raw result url base url to rpc method 'GET' or 'POST' query query string passing by a dict data post data passing by a dict file post files passing by a list of 3-tuple: key, filename, data ( this indicates multipart/form-data ) ''' kwe=Entity(kwargs) if method not in ['GET','POST']: raise RPCError("Method not in GET or POST") if kwe.query: url+="?"+urlencode(kwe.query) if method=='GET': request=Request(url) elif kwe.file: content_type,data=multipart_encode(kwe.data,kwe.file) request=Request(url,data) request.add_header('Content-Type', content_type) elif kwe.data: data=urlencode(kwe.data) request=Request(url,data) else: raise RPCError("POST with no data") request.add_header('User-Agent', "Mozilla/5.0 (Ubuntu; X11; Linux x86_64; rv:8.0) Gecko/20100101 Firefox/8.0" ) request.add_header('Accept-Charset',"UTF-8") response=self.opener.open(request) ret=response.read() response.close() #print "\033[33m"+str(self.cookie_jar)+"\033[0m" # FIXME: An Ugly hack to Tencent server's charset indicator using BOM header if ret.startswith('\xef\xbb\xbf'): ret=ret[3:] return ret
class Session(requests.Session): """ Session for making API requests and interacting with the filesystem """ def __init__(self): super(Session, self).__init__() self.trust_env = False cookie_file = os.path.expanduser('~/.deis/cookies.txt') cookie_dir = os.path.dirname(cookie_file) self.cookies = MozillaCookieJar(cookie_file) # Create the $HOME/.deis dir if it doesn't exist if not os.path.isdir(cookie_dir): os.mkdir(cookie_dir, 0700) # Load existing cookies if the cookies.txt exists if os.path.isfile(cookie_file): self.cookies.load() self.cookies.clear_expired_cookies() def git_root(self): """ Return the absolute path from the git repository root If no git repository exists, raise an EnvironmentError """ try: git_root = subprocess.check_output( ['git', 'rev-parse', '--show-toplevel'], stderr=subprocess.PIPE).strip('\n') except subprocess.CalledProcessError: raise EnvironmentError('Current directory is not a git repository') return git_root def get_formation(self): """ Return the formation name for the current directory The formation is determined by parsing `git remote -v` output. If no formation is found, raise an EnvironmentError. """ git_root = self.git_root() # try to match a deis remote remotes = subprocess.check_output(['git', 'remote', '-v'], cwd=git_root) m = re.match(r'^deis\W+(?P<url>\S+)\W+\(', remotes, re.MULTILINE) if not m: raise EnvironmentError( 'Could not find deis remote in `git remote -v`') url = m.groupdict()['url'] m = re.match('\S+:(?P<formation>[a-z0-9-]+)(.git)?', url) if not m: raise EnvironmentError("Could not parse: {url}".format(**locals())) return m.groupdict()['formation'] formation = property(get_formation) def request(self, *args, **kwargs): """ Issue an HTTP request with proper cookie handling including `Django CSRF tokens <https://docs.djangoproject.com/en/dev/ref/contrib/csrf/>` """ for cookie in self.cookies: if cookie.name == 'csrftoken': if 'headers' in kwargs: kwargs['headers']['X-CSRFToken'] = cookie.value else: kwargs['headers'] = {'X-CSRFToken': cookie.value} break response = super(Session, self).request(*args, **kwargs) self.cookies.save() return response
class AOJClient(object): def __init__(self, cookie_file_path='aoj-cookie.txt'): self.cookie_file_path = cookie_file_path self.cookiejar = MozillaCookieJar() if os.path.isfile(cookie_file_path): self.cookiejar.load(cookie_file_path) self.opener = urllib2.build_opener( urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(self.cookiejar)) def get_csrf_token(self, url): request = urllib2.Request(url=url) response = self.opener.open(request) data = response.read() return REGEXP_CSRF.findall(data)[0] def refresh_session(self): print 'Not Logged In!' context = {'csrfmiddlewaretoken': self.get_csrf_token(LOGIN_URL), 'username': raw_input('Username: '******'password': getpass.getpass('Password: '******'accounts/login/', data=urllib.urlencode(context)) self.opener.open(request) self.cookiejar.save(self.cookie_file_path) def check_problem_exist(self, problem_name): try: request = urllib2.Request(url=PROB_PREFIX+'read/'+problem_name) response = self.opener.open(request) except urllib2.HTTPError as err: if err.code == 404: # Not Found raise AOJProblemNotExist else: raise def detect_language(self, source_file): if '.' in source_file: selected_language = source_file[source_file.rfind('.')+1:] else: selected_language = '' while selected_language not in LANGUAGES: selected_language = raw_input('Please select your langauge: (' + '/'.join(LANGUAGES) + ') ? ').strip().lower() return selected_language def submit(self, submission): self.check_problem_exist(submission.problem) context = {} context['language'] = self.detect_language(submission.source) context['csrfmiddlewaretoken'] = self.get_csrf_token(url=PROB_PREFIX+'submit/'+submission.problem) try: with open(submission.source) as f: context['source'] = f.read() except IOError: raise AOJFileNotExist() def try_submit(first=True): if not first: self.refresh_session() request = urllib2.Request(url=PROB_PREFIX+'submit/'+submission.problem, data=urllib.urlencode(context)) response = self.opener.open(request) if not response.geturl().lower().startswith(LOGIN_URL): print 'Submission Complete!' return try_submit(first=False) try_submit() def get_submission_list(self, problem_name): self.check_problem_exist(problem_name) request = urllib2.Request(url=SITE_PREFIX+'judge/submission/recent/?problem='+problem_name) response = self.opener.open(request) try: import lxml.html except ImportError: print 'lxml library is needed for parsing HTML' return html = lxml.html.fromstring(unicode(response.read().decode('utf8'))) context = {} fields = ('id', 'problem', 'user', 'language', 'length', 'state', 'stats', 'submitted_on') length = {'id': 9, 'problem': 15, 'user': 15, 'language': 5, 'length': 7, 'state': 15, 'stats': 7, 'submitted_on': 15} template = u'%(id)s %(problem)s %(user)s %(language)s %(length)s %(state)s %(stats)s %(submitted_on)s' def width(string): return sum(1+(unicodedata.east_asian_width(c) in 'WF') for c in string) for tr in html.cssselect('table.submission_list tr'): for field in fields: element = tr.find_class(field) if element: context[field] = unicode(element[0].text_content().strip()) else: context[field] = u'' context[field] = ' ' * (length[field] - width(context[field])) + context[field] print template % context