def grab(test=False): if test and os.path.exists('cache.html'): print "Using cached html page" f = open('cache.html') data = f.read() f.close() else: # Create special URL opener (for User-Agent) and cookieJar cookieJar = ClientCookie.CookieJar() opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cookieJar)) opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")] ClientCookie.install_opener(opener) fp = ClientCookie.urlopen("http://login.yahoo.com") forms = ClientForm.ParseResponse(fp) fp.close() form = forms[0] form["login"] = settings.YAHOOGROUPS_USERNAME form["passwd"] = settings.YAHOOGROUPS_PASSWORD fp = ClientCookie.urlopen(form.click()) fp.close() fp = ClientCookie.urlopen("http://groups.yahoo.com/group/norwichfreegle/messages") # use your group data = ''.join(fp.readlines()) fp.close() if test: f = open('cache.html', 'w') f.write(data) f.close() return data
def run(self): global success value = getword() try: print "-"*12 print "User:"******"Password:"******"User-agent", random.sample(headers, 1)[0])] ClientCookie.install_opener(opener) fp = ClientCookie.urlopen(sys.argv[1]) forms = ClientForm.ParseResponse(fp) form = forms[0] form["user"] = sys.argv[2] form["pass"] = value fp = ClientCookie.urlopen(form.click()) site = fp.readlines() for line in site: if re.search("Login failed.", line.lower()) != None: print "\tSuccessful Login:", value success = value sys.exit(1) fp.close() except(socket.gaierror, urllib2.HTTPError), msg: print msg pass
def setCookie(self, path=False): """ set cookie handler """ if path: self.__url_cookiepath = path try: import cookielib except ImportError: try: import ClientCookie except ImportError: urlopen = urllib2.urlopen Request = urllib2.Request else: urlopen = ClientCookie.urlopen Request = ClientCookie.Request self.__url_cookie = ClientCookie.MozillaCookieJar() if path and os.path.isfile(path): #noinspection PyBroadException try: self.__url_cookcookie.load(path) except Exception, e: pass opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(self.__url_cookie)) ClientCookie.install_opener(opener) self.__url_request = Request self.__url_urlopen = urlopen
def run(self): global success value = getword() try: print "-"*12 print "User:"******"Password:"******"User-agent","Mozilla/5.0 (compatible)")] ClientCookie.install_opener(opener) fp = ClientCookie.urlopen("https://www.gmail.com/") forms = ClientForm.ParseResponse(fp) form = forms[0] form["Email"] = sys.argv[1] form["Passwd"] = value fp = ClientCookie.urlopen(form.click()) site = fp.readlines() for line in site: if re.search("Gmail - Inbox", line): print "\tSuccessful Login:", value success = value sys.exit(1) fp.close() except(socket.gaierror), msg: pass
def run(self): global success value = getword() try: print "-" * 12 print "User:"******"Password:"******"User-agent", random.sample(headers, 1)[0])] ClientCookie.install_opener(opener) fp = ClientCookie.urlopen(sys.argv[1]) forms = ClientForm.ParseResponse(fp) form = forms[0] form["username"] = sys.argv[2] form["password"] = value fp = ClientCookie.urlopen(form.click()) site = fp.readlines() for line in site: if re.search("invalid password", line.lower()) != None: print "\tSuccessful Login:"******"The maximum number of 5 login attempts has been exceeded.", line): print "Attempts exceeded" fp.close() except (socket.gaierror), msg: pass
def grabdata(url): # proxy_handler = urllib2.ProxyHandler(proxy) # opener = urllib2.build_opener(proxy_handler) opener = urllib2.build_opener() opener.addheaders = [('User-agent', user_agent)] status = False count = 0 data = '' while status == False and count < 5: try: usock = opener.open(url) data = usock.read() usock.close() def checkRefresh(string): pattern = re.compile(r'http-equiv="refresh"') return pattern.search(string) != None if checkRefresh(data): import ClientCookie sock = ClientCookie.build_opener(ClientCookie.HTTPEquivProcessor, ClientCookie.HTTPRefreshProcessor ) ClientCookie.install_opener(sock) data = ClientCookie.urlopen(url).read() status = True except Exception, msg: if count == 4: print "error: grab %s\n%s" % (url, msg) sleep(count) count += 1
def run(self): global success value = getword() try: print "-"*12 print "User:"******"Password:"******"User-agent", random.sample(headers, 1)[0])] ClientCookie.install_opener(opener) fp = ClientCookie.urlopen(sys.argv[1]) forms = ClientForm.ParseResponse(fp) form = forms[0] form["username"] = sys.argv[2] form["password"] = value fp = ClientCookie.urlopen(form.click()) site = fp.readlines() for line in site: if re.search("invalid password", line.lower()) != None: print "\tSuccessful Login:"******"The maximum number of 5 login attempts has been exceeded.",line): print "Attempts exceeded" fp.close() except(socket.gaierror), msg: pass
def execute(self): print "proxyFire module loaded" sub_urls = [] sub_titles = [] sub_index = 0 plist = [] trys = 0 # Create special URL opener (for User-Agent) and cookieJar cookieJar = ClientCookie.CookieJar() opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cookieJar)) opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")] ClientCookie.install_opener(opener) try: fp = ClientCookie.urlopen("http://www.proxyfire.net/forum/login.php") forms = ClientForm.ParseResponse(fp) fp.close() except Exception, e: print e if trys < 6: time.sleep(5) print "trying again..." trys += 1 else: print "proxyfire.net is timing out" return plist;
def loginByUser(self, user, url = None): res = True if UrlLoader.loginByUser(self, user, url) == False: return False; signinUrl = self.getUrl() + '/signin' try: cookieJar = ClientCookie.CookieJar() opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cookieJar)) opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")] ClientCookie.install_opener(opener) fp = ClientCookie.urlopen(signinUrl) forms = ClientForm.ParseResponse(fp) fp.close() form = forms[0] form['userName'] = user.get('userName') form['password'] = user.get('password') self._cookie = ClientCookie fpTestOpen = ClientCookie.urlopen(form.click()) fpTestOpen.close() except Exception, e: print('Error when login: ' + e.message) res = False
def checkIfLoggedIn(oldPage, url, sensitive): ## method to check it the current cookies allow a successful login ## This has to be a seperate connection and cookie jar. not sure why though cj = None ClientCookie = None cookielib = None try: # Trying cookielib import cookielib except ImportError: ## Falling back to clientcookie try: import ClientCookie except ImportError: ## falling back to no cookie jar urlopen = urllib2.urlopen Request = urllib2.Request else: ## using ClientCookie for cookie jar urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: ## using cookielib for cookie jar urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() if cj is not None: ## if we succesfully imported a cookie jar library if os.path.isfile(COOKIEFILE): ## if cookiefile exists cj.load(COOKIEFILE) if cookielib is not None: ## we used cookielib opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) opener.addheaders = [('User-agent', 'Mozilla/5.0')] # Some sites block requests w/o user-agent header urllib2.install_opener(opener) else: ## if we used ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) opener.addheaders = [('User-agent', 'Mozilla/5.0')] # Some sites block requests w/o user-agent header ClientCookie.install_opener(opener) attempt = urlopen(url) ## finally open the page return difflib.SequenceMatcher(None, oldPage, attempt.read()).ratio() ## return the similary ratio of the old page to the new page
def open_connection(email, password): '''Log in to MySpace and store login data into a global opener.''' # 1. Prepare a cookie jar and a global opener jar = ClientCookie.CookieJar() opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(jar)) opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")] ClientCookie.install_opener(opener) # 2. Open the Web page with the login form home_resp = ClientCookie.urlopen("http://www.myspace.com") forms = ClientForm.ParseResponse(home_resp, backwards_compat=False) home_resp.close() # 3. Fill the login form and submit login_form = forms[1] login_form[login_email_field] = email login_form[login_password_field] = password login_resp = ClientCookie.urlopen(login_form.click()) result = login_resp.read() login_resp.close() # with open("exit.html", 'w') as f: # f.write(result) # 4. Check if login was successful try: loginPatt = '"UserId":(.*?),' id = int(re.search(loginPatt, result).group(1)) return id > 0 except (TypeError, ValueError, AttributeError): return False
def set_up_cookie_stuff(): COOKIEFILE = './cookies.txt' cj = None ClientCookie = None cookielib = None try: import cookielib except ImportError: try: import ClientCookie except ImportError: urlopen = urllib2.urlopen Request = urllib2.Request else: urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() if cj is not None: if os.path.isfile(COOKIEFILE): cj.load(COOKIEFILE) if cookielib is not None: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener)
def set_up_cookie_stuff(): COOKIEFILE = './cookies.txt' cj = None ClientCookie = None cookielib = None try: import cookielib except ImportError: try: import ClientCookie except ImportError: urlopen = urllib2.urlopen Request = urllib2.Request else: urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() if cj is not None: if os.path.isfile(COOKIEFILE): cj.load(COOKIEFILE) if cookielib is not None: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener)
def run(self): global success value = getword() try: print "-" * 12 print "User:"******"Password:"******"User-agent", random.sample(headers, 1)[0])] ClientCookie.install_opener(opener) fp = ClientCookie.urlopen(sys.argv[1]) forms = ClientForm.ParseResponse(fp) form = forms[0] form["user"] = sys.argv[2] form["pass"] = value fp = ClientCookie.urlopen(form.click()) site = fp.readlines() for line in site: if re.search("Login failed.", line.lower()) != None: print "\tSuccessful Login:", value success = value sys.exit(1) fp.close() except (socket.gaierror, urllib2.HTTPError), msg: print msg pass
def loadURL(url): # Cookie stuff from: # http://www.voidspace.org.uk/python/articles/cookielib.shtml COOKIEFILE = '/var/www/vhosts/davesblogbot/cookies.lwp' #/home/virtual/site1/fst/home/newstoday/BayesBlogBot/cookies.lwp' cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: try: import ClientCookie except ImportError: urlopen = urllib2.urlopen Request = urllib2.Request else: urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() if cj is not None: if os.path.isfile(COOKIEFILE): cj.load(COOKIEFILE) if cookielib is not None: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = {'User-agent' : 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT)'} try: req = Request(url, txdata, txheaders) handle = urlopen(req) except IOError, e: print 'Failed to open "%s".' % url if hasattr(e, 'code'): print 'Failed with error code - %s.' % e.code elif hasattr(e, 'reason'): print "Reason: %s" % e.reason return None
def __init__(self, url, debug=False): UserDict.__init__(self) self['url'] = url self['COOKIEFILE'] = 'freemed-cookies.lwp' if debug: import logging logger = logging.getLogger("cookielib") logger.addHandler(logging.StreamHandler(sys.stdout)) logger.setLevel(logging.DEBUG) cj = None ClientCookie = None cookielib = None try: import cookielib except ImportError: pass else: import urllib2 urlopen = urllib2.urlopen cj = cookielib.LWPCookieJar() Request = urllib2.Request if not cookielib: try: import ClientCookie except ImportError: import urllib2 urlopen = urllib2.urlopen Request = urllib2.Request else: urlopen = ClientCookie.urlopen cj = ClientCookie.LWPCookieJar() Request = ClientCookie.Request if cj != None: if os.path.isfile(self['COOKIEFILE']): if debug: print 'DEBUG: Loading cookiefile ' + self['COOKIEFILE'] cj.load(self['COOKIEFILE']) if cookielib: opener = urllib2.build_opener( urllib2.HTTPCookieProcessor(cj), MultipartPostHandler.MultipartPostHandler) urllib2.install_opener(opener) else: opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj), MultipartPostHandler.MultipartPostHandler) ClientCookie.install_opener(opener) self['Request'] = Request self['urlopen'] = urlopen self['cj'] = cj
def __init__( self, url, debug=False ): UserDict.__init__( self ) self['url'] = url self['COOKIEFILE'] = 'freemed-cookies.lwp' if debug: import logging logger = logging.getLogger("cookielib") logger.addHandler(logging.StreamHandler(sys.stdout)) logger.setLevel(logging.DEBUG) cj = None ClientCookie = None cookielib = None try: import cookielib except ImportError: pass else: import urllib2 urlopen = urllib2.urlopen cj = cookielib.LWPCookieJar() Request = urllib2.Request if not cookielib: try: import ClientCookie except ImportError: import urllib2 urlopen = urllib2.urlopen Request = urllib2.Request else: urlopen = ClientCookie.urlopen cj = ClientCookie.LWPCookieJar() Request = ClientCookie.Request if cj != None: if os.path.isfile( self['COOKIEFILE'] ): if debug: print 'DEBUG: Loading cookiefile ' + self['COOKIEFILE'] cj.load( self['COOKIEFILE'] ) if cookielib: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj), MultipartPostHandler.MultipartPostHandler) urllib2.install_opener(opener) else: opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj), MultipartPostHandler.MultipartPostHandler) ClientCookie.install_opener(opener) self['Request'] = Request self['urlopen'] = urlopen self['cj'] = cj
def login(self): # Sets the client webbrowser cookieJar = ClientCookie.CookieJar() opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cookieJar)) opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")] ClientCookie.install_opener(opener) # Opens the login page for yahoo.com fp = ClientCookie.urlopen("http://login.yahoo.com") forms = ClientForm.ParseResponse(fp) fp.close() form = forms[0] form["login"] = self.name form["passwd"] = self.pw fp = ClientCookie.urlopen(form.click()) fp.close()
def request(self, daterange): self.params["q"] = self.query + daterange url = self.URL % self.makeParams() request = urllib2.Request( url, headers=self.headers ) proxy_support = urllib2.ProxyHandler({"http": "189.47.194.196:8118"}) opener = ClientCookie.build_opener(proxy_support) ClientCookie.install_opener(opener) response = ClientCookie.urlopen(request) # proxy_support = urllib2.ProxyHandler({"http": "127.0.0.1:8118"}) # opener = urllib2.build_opener(proxy_support) # urllib2.install_opener(opener) # response = urllib2.urlopen(request, timeout=50000) return response.read()
def _authenticate(): """Logs the user in to Facebook""" # Create special URL opener (for User-Agent) and cookieJar cookieJar = ClientCookie.CookieJar() opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cookieJar)) opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")] ClientCookie.install_opener(opener) fp = ClientCookie.urlopen("https://www.facebook.com/") forms = ClientForm.ParseResponse(fp) fp.close() form = forms[0] # supply user id and pw form["email"] = _usr_id form["pass"] = _pswrd fp = ClientCookie.urlopen(form.click()) fp.close()
def _authenticate(): """Logs the user in to Facebook""" # Create special URL opener (for User-Agent) and cookieJar cookieJar = ClientCookie.CookieJar() opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cookieJar)) opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")] ClientCookie.install_opener(opener) fp = ClientCookie.urlopen("https://graph.facebook.com/oauth/authorize?type=user_agent&client_id=163193033824504&redirect_uri=http://www.yahoo.com&scope=email,user_birthday,user_online_presence,read_stream,offline_access") forms = ClientForm.ParseResponse(fp) fp.close() form = forms[0] # supply user id and pw form["email"] = _usr_id form["pass"] = _pswrd fp = ClientCookie.urlopen(form.click()) #print "IN AUTHENTICATE:",fp.geturl() fp.close()
def __init__(self, cookiePath): self.cookiePath = cookiePath self.cj = None ClientCookie = None cookielib = None try: # Let's see if cookielib is available import cookielib except ImportError: pass else: import urllib2 self.urlopen = urllib2.urlopen self.cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar that has useful load and save methods self.Request = urllib2.Request if not cookielib: # If importing cookielib fails let's try ClientCookie try: import ClientCookie except ImportError: import urllib2 self.urlopen = urllib2.urlopen self.Request = urllib2.Request else: self.urlopen = ClientCookie.urlopen self.cj = ClientCookie.LWPCookieJar() self.Request = ClientCookie.Request if self.cj != None: # now we have to install our CookieJar so that it is used as the default CookieProcessor in the default opener handler if os.path.isfile(cookiePath): self.cj.load(cookiePath) if cookielib: self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(self.opener) else: self.opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(self.cj)) ClientCookie.install_opener(self.opener)
def loader(): """Try to import cookielib or ClientCookie""" cj = None ClientCookie = None cookielib = None try: # see if cookielib is available import cookielib except ImportError: pass else: import urllib2 urlopen = urllib2.urlopen cj = cookielib.LWPCookieJar() Request = urllib2.Request if not cookielib: # if importing cookielib fails, try ClientCookie try: import ClientCookie except ImportError: raise else: urlopen = ClientCookie.urlopen cj = ClientCookie.LWPCookieJar() Request = ClientCookie.Request # install CookieJar so that it is used as the default CookieProcessor in the default opener handler if cj != None: if cookielib: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) return Request, urlopen
def geturl(urlvideo): videoid = urlvideo # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass #xbmc.output("ficherocookies %s", ficherocookies) # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = "http://www.vimeo.com/moogaloop/load/clip:%s/local/" % videoid #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', 'Referer': 'http://vimeo/%s' % urlvideo } # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) #cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() print data #parseamos el xml en busca del codigo de signatura dom = parseString(data) xml = dom.getElementsByTagName("xml") for node in xml: try: request_signature = getNodeValue( node, "request_signature", "Unknown Uploader").encode("utf-8") request_signature_expires = getNodeValue( node, "request_signature_expires", "Unknown Uploader").encode("utf-8") except: logger.info("Error : Video borrado") return "" try: quality = ((config.getSetting("quality_flv") == "1" and "hd") or "sd") except: quality = "sd" video_url = "http://www.vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=%s" % ( videoid, request_signature, request_signature_expires, quality) print video_url # Timeout del socket a 60 segundos socket.setdefaulttimeout(10) h = urllib2.HTTPHandler(debuglevel=0) request = urllib2.Request(video_url) opener = urllib2.build_opener(h) urllib2.install_opener(opener) try: connexion = opener.open(request) video_url = connexion.geturl() except urllib2.HTTPError, e: xbmc.output("[vimeo.py] error %d (%s) al abrir la url %s" % (e.code, e.msg, video_url)) print e.read()
def geturl(urlvideo): xbmc.output("[vk.py] url=" + urlvideo) # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = urlvideo.replace("&", "&") #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)' } # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data # Extrae la URL print data regexp = re.compile(r'vkid=([^\&]+)\&') match = regexp.search(data) vkid = "" print 'match %s' % str(match) if match is not None: vkid = match.group(1) else: print "no encontro vkid" patron = "var video_host = '([^']+)'.*?" patron += "var video_uid = '([^']+)'.*?" patron += "var video_vtag = '([^']+)'.*?" patron += "var video_no_flv = ([^;]+);.*?" patron += "var video_max_hd = '([^']+)'" matches = re.compile(patron, re.DOTALL).findall(data) if len(matches) > 0: for match in matches: if match[3].strip() == "0" and match[1] != "0": tipo = "flv" if "http://" in match[0]: videourl = "%s/u%s/video/%s.%s" % (match[0], match[1], match[2], tipo) else: videourl = "http://%s/u%s/video/%s.%s" % ( match[0], match[1], match[2], tipo) elif match[ 1] == "0" and vkid != "": #http://447.gt3.vkadre.ru/assets/videos/2638f17ddd39-75081019.vk.flv tipo = "flv" if "http://" in match[0]: videourl = "%s/assets/videos/%s%s.vk.%s" % ( match[0], match[2], vkid, tipo) else: videourl = "http://%s/assets/videos/%s%s.vk.%s" % ( match[0], match[2], vkid, tipo) else: #http://cs12385.vkontakte.ru/u88260894/video/d09802a95b.360.mp4 tipo = "360.mp4" if match[0].endswith("/"): videourl = "%su%s/video/%s.%s" % (match[0], match[1], match[2], tipo) else: videourl = "%s/u%s/video/%s.%s" % (match[0], match[1], match[2], tipo) return videourl
def downloadpage(url,post=None,headers=[['User-Agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; es-ES; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12']],follow_redirects=True, timeout=socket.getdefaulttimeout()): logger.info("[scrapertools.py] downloadpage") logger.info("[scrapertools.py] url="+url) if post is not None: logger.info("[scrapertools.py] post="+post) else: logger.info("[scrapertools.py] post=None") # --------------------------------- # Instala las cookies # --------------------------------- # Inicializa la librería de las cookies ficherocookies = os.path.join( config.get_setting("cookies.dir"), 'cookies.lwp' ) logger.info("[scrapertools.py] ficherocookies="+ficherocookies) cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: logger.info("[scrapertools.py] Importando cookielib") import cookielib except ImportError: logger.info("[scrapertools.py] cookielib no disponible") # If importing cookielib fails # let's try ClientCookie try: logger.info("[scrapertools.py] Importando ClientCookie") import ClientCookie except ImportError: logger.info("[scrapertools.py] ClientCookie no disponible") # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: logger.info("[scrapertools.py] ClientCookie disponible") # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: logger.info("[scrapertools.py] cookielib disponible") # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods if cj is not None: # we successfully imported # one of the two cookie handling modules logger.info("[scrapertools.py] Hay cookies") if os.path.isfile(ficherocookies): logger.info("[scrapertools.py] Leyendo fichero cookies") # if we have a cookie file already saved # then load the cookies into the Cookie Jar try: cj.load(ficherocookies) except: logger.info("[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra") os.remove(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: logger.info("[scrapertools.py] opener usando urllib2 (cookielib)") # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 if not follow_redirects: opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL),urllib2.HTTPCookieProcessor(cj),NoRedirectHandler()) else: opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL),urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: logger.info("[scrapertools.py] opener usando ClientCookie") # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) # ------------------------------------------------- # Cookies instaladas, lanza la petición # ------------------------------------------------- # Contador inicio = time.clock() # Diccionario para las cabeceras txheaders = {} # Construye el request if post is None: logger.info("[scrapertools.py] petición GET") else: logger.info("[scrapertools.py] petición POST") # Añade las cabeceras logger.info("[scrapertools.py] ---------------------------") for header in headers: logger.info("[scrapertools.py] header %s=%s" % (str(header[0]),str(header[1])) ) txheaders[header[0]]=header[1] logger.info("[scrapertools.py] ---------------------------") req = Request(url, post, txheaders) if timeout is None: handle=urlopen(req) else: #Disponible en python 2.6 en adelante --> handle = urlopen(req, timeout=timeout) #Para todas las versiones: deftimeout = socket.getdefaulttimeout() try: socket.setdefaulttimeout(timeout) handle=urlopen(req) except: import sys for line in sys.exc_info(): logger.error( "%s" % line ) socket.setdefaulttimeout(deftimeout) # Actualiza el almacén de cookies cj.save(ficherocookies) # Lee los datos y cierra data=handle.read() info = handle.info() logger.info("[scrapertools.py] Respuesta") logger.info("[scrapertools.py] ---------------------------") for header in info: logger.info("[scrapertools.py] "+header+"="+info[header]) handle.close() logger.info("[scrapertools.py] ---------------------------") ''' # Lanza la petición try: response = urllib2.urlopen(req) # Si falla la repite sustituyendo caracteres especiales except: req = urllib2.Request(url.replace(" ","%20")) # Añade las cabeceras for header in headers: req.add_header(header[0],header[1]) response = urllib2.urlopen(req) ''' # Tiempo transcurrido fin = time.clock() logger.info("[scrapertools.py] Descargado en %d segundos " % (fin-inicio+1)) return data
def main(): optparse = OptionParser( usage= 'Usage: %prog --username=<username> --password=<password> [options] <persons.csv> ...' ) optparse.add_option('-C', '--no-contacts', dest='no_contacts', action='store_true', help='if given, only create cases, no contacts') optparse.add_option('--sars-travel', dest='sars_travel', action='store_true', help='if given, fill in a number of sars travel forms') optparse.add_option( '--use_lynx', dest='use_lynx', action='store_true', help='if given, program will use /usr/bin/lynx to extract clean HTML') optparse.add_option('-F', '--log-format', dest='log_format', default=def_log_format, help='log format string (default: %s)' % def_log_format) optparse.add_option('-l', '--log', dest='log', action='append', help='specify log targets') optparse.add_option('-q', '--quiet', dest='quiet', action='store_true', help='disable normal output') optparse.add_option('--retry', dest='retry', action='store_true', help='retry after errors') optparse.add_option('-U', '--url', dest='url', default='http://127.0.0.1/cgi-bin/casemgr/app.py', help='specify application URL [default: URL]') optparse.add_option('-s', '--sleep', dest='sleep', type='float', help='sleep SLEEP seconds between interactions') optparse.add_option('--skip', dest='skip', type='int', help='skip SKIP rows into the person data') optparse.add_option('-D', '--debug', dest='debug', action='store_true', help='enable debugging') optparse.add_option('-u', '--username', dest='username', default="", help='username to use to login to NetEpi Collection') optparse.add_option('-p', '--password', dest='password', default="", help='password to use to login to NetEpi Collection') options, args = optparse.parse_args() if not args: optparse.error('Must specify at least one person.csv file') if len(options.username) == 0 or len(options.password) == 0: optparse.error('Must specify username= and password= paramaters') root_logger = logging.getLogger() if options.debug: root_logger.setLevel(logging.DEBUG) else: root_logger.setLevel(logging.INFO) formatter = logging.Formatter(options.log_format) if not options.quiet: handler = logging.StreamHandler() handler.setFormatter(formatter) root_logger.addHandler(handler) if options.log: for log in options.log: handler = logging.FileHandler(log) handler.setFormatter(formatter) root_logger.addHandler(handler) r = Random() total_count = 0 restart_count = 0 persons = PersonsSource(args) if options.skip: n = 0 for n, record in enumerate(persons): if n + 1 == options.skip: break else: sys.exit('Can\'t skip %d rows - only %d rows of persons' %\ (options.skip, n)) while 1: interact = FormInteract(options.url, debug=options.debug, use_lynx=options.use_lynx) try: interact.get() interact["username"] = options.username interact["password"] = options.password interact.click('login') interact.expect('Welcome') info('Logged in') for case_count in xrange(1, 100000): i_count = interact.count record = persons.next() case_start = time.time() interact.click('new:2') interact.click('do_search') interact.click('new_case') xform_person(record, interact, r) interact.click('update') total_count += 1 info("add case took %.2fs, %d/%d(%d) cases" % \ ((time.time() - case_start), case_count, total_count, restart_count)) random_sleep(r, options.sleep) if options.sars_travel: for sars_travel_count in range(1, r.randint(2, 5)): interact.click('new:sars_travel') form_start = time.time() sars_travel(interact, r) interact.click('form_submit') info(' %5d - added a form, took %.2f seconds' % \ (sars_travel_count, time.time() - form_start)) # Unable to get contacts creation working reliably - bug in FormClient?? # It works fine interactively and with Selenium!!! if False: # if not options.no_contacts: interact.click('contacts') # for contact_count in range(1, r.randint(2,5)): for contact_count in range(1, 2): contact_start = time.time() interact.click('add_contact') interact.click('do_search') interact.click('new_contact') record = persons.next() xform_contact(record, interact, r) interact.click('update') interact.click('back') info(' %5d - added a contact, took %.2f seconds' % \ (contact_count, time.time() - contact_start)) random_sleep(r, options.sleep) interact.click('action') info(' total time %.2f, %d/%d interactions, %.2fs av' % \ ((time.time() - case_start), interact.count - i_count, interact.count, interact.av_time())) except (KeyboardInterrupt, StopIteration): sys.exit(0) except: error('\n'.join(traceback.format_exception(*sys.exc_info()))) interact.dump_response() ClientCookie.install_opener(None) # Should discard CookieJar if not options.retry: break restart_count += 1 info('Exception thrown (%d times so far), sleeping 20 seconds' % restart_count) time.sleep(20)
def downloadpageGzip(url): # Inicializa la librería de las cookies ficherocookies = os.path.join(config.get_data_path(), 'cookies.dat') logger.info("Cookiefile=" + ficherocookies) inicio = time.clock() cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.MozillaCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.MozillaCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar try: cj.load(ficherocookies) except: logger.info( "[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra" ) os.remove(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! #txheaders = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', # 'Referer':'http://www.megavideo.com/?s=signup'} parsedurl = urlparse.urlparse(url) logger.info("parsedurl=" + str(parsedurl)) txheaders = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'es-es,es;q=0.8,en-us;q=0.5,en;q=0.3', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept-Encoding': 'gzip,deflate', 'Keep-Alive': '300', 'Connection': 'keep-alive', 'Referer': parsedurl[0] + "://" + parsedurl[1] } logger.info(str(txheaders)) # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, None, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() fin = time.clock() logger.info("[scrapertools.py] Descargado 'Gzipped data' en %d segundos " % (fin - inicio + 1)) # Descomprime el archivo de datos Gzip try: fin = inicio import StringIO compressedstream = StringIO.StringIO(data) import gzip gzipper = gzip.GzipFile(fileobj=compressedstream) data1 = gzipper.read() gzipper.close() fin = time.clock() logger.info( "[scrapertools.py] 'Gzipped data' descomprimido en %d segundos " % (fin - inicio + 1)) return data1 except: return data
def getmegauploaduser(login,password): # --------------------------------------- # Inicializa la librería de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url="http://www.megaupload.com/?c=login" #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! passwordesc=password.replace("&","%26") txdata = "login=1&redir=1&username="******"&password="******"----------------------") logger.info("Cookies despues") logger.info("----------------------") logger.info(cookiedata) logger.info("----------------------") ''' login = re.search('Welcome', data) premium = re.search('flashvars.status = "premium";', data) if login is not None: if premium is not None: return 'premium' elif premium is None: return 'gratis' elif login is None: return None
def do_login_and_fetch(self, cj, COOKIEFILE, LOGIN_URL, login_params, fetch_url, save_to, **args): """ Method to do an automated login and save the cookie. This is required for presentation download. """ ClientCookie = None cookielib = None # Properly import the correct cookie lib try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() if cj is not None: if os.path.isfile(COOKIEFILE): cj.load(COOKIEFILE) if cookielib is not None: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) headers = {'User-agent' : 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'} request = Request(LOGIN_URL, login_params, headers) handle = urlopen(request) if cj: cj.save(COOKIEFILE) request = Request(fetch_url, None, headers) try: handle = urlopen(request) except urllib2.HTTPError: print >> sys.stderr, 'Presentation not available for download!' return data = handle.read() info = handle.info() ext = 'ppt' type = info['Content-Type'] ext = self.get_extension(type) if not save_to: save_to = fetch_url.split('/')[-2] + '.' save_to = save_to + ext fp = open(save_to, 'wb') fp.write(data) fp.close() if self.verbose: print 'Presentation downloaded and saved to %s' %save_to
def downloadpagewithcookies(url): # Inicializa la librería de las cookies ficherocookies = os.path.join(config.DATA_PATH, 'cookies.lwp') print "Cookiefile=" + ficherocookies cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! #txheaders = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', # 'Referer':'http://www.megavideo.com/?s=signup'} txheaders = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Host': 'www.meristation.com', 'Accept-Language': 'es-es,es;q=0.8,en-us;q=0.5,en;q=0.3', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Keep-Alive': '300', 'Connection': 'keep-alive' } # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, None, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() return data
def geturl(urlvideo): xbmc.output("[divxlink.py] url=" + urlvideo) # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = urlvideo #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)' } # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data # Lo pide una segunda vez, como si hubieras hecho click en el banner patron = 'http\:\/\/www\.divxlink\.com/([^\/]+)/(.*?)\.html' matches = re.compile(patron, re.DOTALL).findall(url) xbmc.output("[divxlink.py] fragmentos de la URL") scrapertools.printMatches(matches) codigo = "" nombre = "" if len(matches) > 0: codigo = matches[0][0] nombre = matches[0][1] patron = '<input type="hidden" name="rand" value="([^"]+)">' matches = re.compile(patron, re.DOTALL).findall(data) #scrapertools.printMatches(matches) randomstring = "" if len(matches) > 0: randomstring = matches[0] xbmc.output("[divxlink.py] randomstring=" + randomstring) txdata = "op=download2&id=" + codigo + "&rand=" + randomstring + "&referer=&method_free=&method_premium=&down_direct=1" xbmc.output(txdata) req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data patron = '<div id="embedcontmvshre"[^>]+>(.*?)</div>' matches = re.compile(patron, re.DOTALL).findall(data) #scrapertools.printMatches(matches) data = "" if len(matches) > 0: data = matches[0] xbmc.output("[divxlink.py] bloque packed=" + data) else: return "" # Lo descifra descifrado = unpackerjs.unpackjs(data) xbmc.output("descifrado=" + descifrado) # Extrae la URL patron = '<param name="src"value="([^"]+)"/>' matches = re.compile(patron, re.DOTALL).findall(descifrado) scrapertools.printMatches(matches) url = "" if len(matches) > 0: url = matches[0] xbmc.output("[divxlink.py] url=" + url) return url
def geturl(urlvideo): # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass #xbmc.output("ficherocookies %s", ficherocookies) # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = "http://video.yahoo.com/watch/%s" % urlvideo #url = "http://new.music.yahoo.com/videos/" #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', 'Referer': 'http://video.yahoo.com/', 'X-Forwarded-For': '12.13.14.15' } # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data ''' # Extract video height and width mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', data) if mobj is None: logger.info('ERROR: unable to extract video height') return "" yv_video_height = mobj.group(1) mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', data) if mobj is None: logger.info('ERROR: unable to extract video width') return "" yv_video_width = mobj.group(1) ''' # Retrieve video playlist to extract media URL # I'm not completely sure what all these options are, but we # seem to need most of them, otherwise the server sends a 401. yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents yv_bitrate = '700' # according to Wikipedia this is hard-coded url = ( 'http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + urlvideo + '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=720' + '&vidW=1280' + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797' ) #http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=v205690975&tech=flash&mode=playlist&lg=xRen3QvzZ_5wj1x8BbzEcR&bitrate=700&vidH=324&vidW=576&swf=as3&rd=video.yahoo.com-offsite&tk=null&adsupported=v1,v2,&eventid=1301797 #url = 'http://video.music.yahoo.com/up/music_e/process/getPlaylistFOP.php?node_id='+ urlvideo + '&tech=flash&bitrate=20000&mode=&vidH=720&vidW=1280' req = Request(url, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data2 = handle.read() handle.close() print data2 # Extract media URL from playlist XML mobj = re.search( r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', data2) if mobj is not None: video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8') video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url) print video_url return video_url else: logger.info('ERROR: Unable to extract media URL http') mobj = re.search(r'<STREAM (APP="[^>]+)>', data2) if mobj is None: logger.info('ERROR: Unable to extract media URL rtmp') return "" #video_url = mobj.group(1).replace("&","&") video_url = urllib.unquote(mobj.group(1).decode('utf-8')) video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url) ''' <STREAM APP="rtmp://s1sflod020.bcst.cdn.s1s.yimg.com/StreamCache" FULLPATH="/s1snfs06r01/001/__S__/lauvpf/76414327.flv?StreamID=76414327&xdata=Njc3Mzc4MzA2NGNiNzI5MW-205754530-0&pl_auth=2598a5574b592b7c6ab262e4775b3930&ht=180&b=eca0lm561k1gn4cb7291a&s=396502118&br=700&q=ahfG2he5gqV40Laz.RUcnB&rd=video.yahoo.com-offsite&so=%2FMUSIC" CLIPID="v205690975" TYPE="STREAMING" AD="NO" APPNAME="ContentMgmt" URLPREFIX="rtmp://" SERVER="s1sflod020.bcst.cdn.s1s.yimg.com" BITRATE="7000" PORT="" PATH="/s1snfs06r01/001/__S__/lauvpf/76414327.flv" QUERYSTRING="StreamID=76414327&xdata=Njc3Mzc4MzA2NGNiNzI5MW-205754530-0&pl_auth=2598a5574b592b7c6ab262e4775b3930&ht=180&b=eca0lm561k1gn4cb7291a&s=396502118&br=700&q=ahfG2he5gqV40Laz.RUcnB&rd=video.yahoo.com-offsite&so=%2FMUSIC" URL="" TITLE="-" AUTHOR="-" COPYRIGHT="(c) Yahoo! Inc. 2006" STARTTIME="" ENDTIME=""/> ''' swfUrl = 'http://d.yimg.com/ht/yep/vyc_player.swf' try: App = re.compile(r'APP="([^"]+)"').findall(video_url)[0] Fullpath = re.compile(r'FULLPATH="([^"]+)"').findall(video_url)[0] Appname = re.compile(r'APPNAME="([^"]+)"').findall(video_url)[0] #Server = re.compile(r'SERVER="([^"]+)"').findall(video_url)[0] Path = re.compile(r'PORT="" PATH="([^"]+)"').findall( video_url)[0].replace(".flv", "") #Querystring = re.compile(r'QUERYSTRING="([^"]+)"').findall(video_url)[0] playpath = Fullpath App = App.replace("/StreamCache", ":1935/StreamCache/") video_url = "%s%s%s playpath=%s swfurl=%s swfvfy=true" % ( App, Appname, playpath, Path, swfUrl) except: logger.info('ERROR: re.compile failed') video_url = "" print video_url.encode("utf-8") return video_url
def read_body_and_headers(url, post=None, headers=[], follow_redirects=False, timeout=None): _log("read_body_and_headers "+url) if post is not None: _log("read_body_and_headers post="+post) if len(headers)==0: headers.append(["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:18.0) Gecko/20100101 Firefox/18.0"]) # Start cookie lib ficherocookies = os.path.join( get_data_path(), 'cookies.dat' ) _log("read_body_and_headers cookies_file="+ficherocookies) cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: _log("read_body_and_headers importing cookielib") import cookielib except ImportError: _log("read_body_and_headers cookielib no disponible") # If importing cookielib fails # let's try ClientCookie try: _log("read_body_and_headers importing ClientCookie") import ClientCookie except ImportError: _log("read_body_and_headers ClientCookie not available") # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: _log("read_body_and_headers ClientCookie available") # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.MozillaCookieJar() else: _log("read_body_and_headers cookielib available") # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.MozillaCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods if cj is not None: # we successfully imported # one of the two cookie handling modules _log("read_body_and_headers Cookies enabled") if os.path.isfile(ficherocookies): _log("read_body_and_headers Reading cookie file") # if we have a cookie file already saved # then load the cookies into the Cookie Jar try: cj.load(ficherocookies) except: _log("read_body_and_headers Wrong cookie file, deleting...") os.remove(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: _log("read_body_and_headers opener using urllib2 (cookielib)") # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 if not follow_redirects: opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=http_debug_log_enabled),urllib2.HTTPCookieProcessor(cj),NoRedirectHandler()) else: opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=http_debug_log_enabled),urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: _log("read_body_and_headers opener using ClientCookie") # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) # ------------------------------------------------- # Cookies instaladas, lanza la petición # ------------------------------------------------- # Contador inicio = time.clock() # Diccionario para las cabeceras txheaders = {} # Construye el request if post is None: _log("read_body_and_headers GET request") else: _log("read_body_and_headers POST request") # Añade las cabeceras _log("read_body_and_headers ---------------------------") for header in headers: _log("read_body_and_headers header %s=%s" % (str(header[0]),str(header[1])) ) txheaders[header[0]]=header[1] _log("read_body_and_headers ---------------------------") req = Request(url, post, txheaders) if timeout is None: handle=urlopen(req) else: #Disponible en python 2.6 en adelante --> handle = urlopen(req, timeout=timeout) #Para todas las versiones: try: import socket deftimeout = socket.getdefaulttimeout() socket.setdefaulttimeout(timeout) handle=urlopen(req) socket.setdefaulttimeout(deftimeout) except: import sys for line in sys.exc_info(): _log( "%s" % line ) # Actualiza el almacén de cookies cj.save(ficherocookies) # Lee los datos y cierra if handle.info().get('Content-Encoding') == 'gzip': buf = StringIO( handle.read()) f = gzip.GzipFile(fileobj=buf) data = f.read() else: data=handle.read() info = handle.info() _log("read_body_and_headers Response") returnheaders=[] _log("read_body_and_headers ---------------------------") for header in info: _log("read_body_and_headers "+header+"="+info[header]) returnheaders.append([header,info[header]]) handle.close() _log("read_body_and_headers ---------------------------") ''' # Lanza la petición try: response = urllib2.urlopen(req) # Si falla la repite sustituyendo caracteres especiales except: req = urllib2.Request(url.replace(" ","%20")) # Añade las cabeceras for header in headers: req.add_header(header[0],header[1]) response = urllib2.urlopen(req) ''' # Tiempo transcurrido fin = time.clock() _log("read_body_and_headers Downloaded in %d seconds " % (fin-inicio+1)) return data,returnheaders
def get_email_contents_data(self, auth): if True == self.VERBOSE: print print self.VERBOSE_PREFIX + 'from "%s": %s' \ % (__name__, "[INFO] START") cj = None ClientCookie = None cookielib = None try: # Let's see if cookielib is available import cookielib except ImportError: pass else: import urllib2 urlopen = urllib2.urlopen cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar that has useful load and save methods Request = urllib2.Request if not cookielib: # If importing cookielib fails let's try ClientCookie try: import ClientCookie except ImportError: import urllib2 urlopen = urllib2.urlopen Request = urllib2.Request else: urlopen = ClientCookie.urlopen cj = ClientCookie.LWPCookieJar() Request = ClientCookie.Request #################################################### # We've now imported the relevant library - whichever library is being used urlopen is bound to the right function for retrieving URLs # Request is bound to the right function for creating Request objects # Let's load the cookies, if they exist. if cj != None: # now we have to install our CookieJar so that it is used as the default CookieProcessor in the default opener handler if os.path.isfile(COOKIEFILE): cj.load(COOKIEFILE) if cookielib: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) # If one of the cookie libraries is available, any call to urlopen will handle cookies using the CookieJar instance we've created # (Note that if we are using ClientCookie we haven't explicitly imported urllib2) # as an example : auth_url_params = "owa/auth/owaauth.dll" if auth['owa_url'][0] == "/": theurl = auth['owa_url'] + auth_url_params else: theurl = auth['owa_url'] + "/" + auth_url_params theurl = theurl + "?url=" + auth['owa_url'] + "&reason=0" if True == self.VERBOSE: print print self.VERBOSE_PREFIX + '"%s" : %s' % (__name__, \ "[INFO] The owa_url: " + theurl) txdata = None # if we were making a POST type request, we could encode a dictionary of values here - using urllib.urlencode txheaders = {'User-agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1;\ en-US; rv:1.8.1) Gecko/20061010 Firefox/2.0.011', 'Referer': auth['owa_url']} params = { 'destination': auth['owa_url'], 'flags': '0', 'forcedownlevel': '0', 'trusted': '0', 'username': auth['login'], 'password': auth['pass'], 'isUtf8': '1' } # some Java script functions linked with submit clkLgn() login button #function gbid(s){return document.getElementById(s);} #function clkLgn(){if(gbid("rdoPrvt").checked){var oD=new Date();oD.setTime(oD.getTime()+2*7*24*60*60*1000);var sA="acc="+(gbid("chkBsc").checked?1:0);var sL="lgn="+gbid("username").value;document.cookie="logondata="+sA+"&"+sL+"; expires="+oD.toUTCString();}} txdata = urllib.urlencode(params) try: req = Request(theurl, txdata, txheaders) # create a request object handle = urlopen(req) # and open it to return a handle on the url except IOError as e: if self.VERBOSE: print self.VERBOSE_PREFIX + 'from "%s": %s' \ % (__name__, '[ERROR] We failed to open "%s".' % theurl) if hasattr(e, 'code'): print self.VERBOSE_PREFIX + 'from "%s": %s' \ % (__name__, '[ERROR] We failed with error code - %s.' % e.code) pass else: if True == self.VERBOSE: print self.VERBOSE_PREFIX + 'from "%s": %s' \ % (__name__, '[INFO] Here are the headers of the page :') print self.VERBOSE_PREFIX + 'from "%s": %s' \ % (__name__, handle.info()) print self.VERBOSE_PREFIX + 'from "%s"' % __name__ # handle.read() returns the page, handle.geturl() returns the true url of the page fetched (in case urlopen has followed any redirects, which it sometimes does) if cj == None: if True == self.VERBOSE: print self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, \ "[ERROR] We don't have a cookie library available - sorry.") print self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, \ "[ERROR] I can't show you any cookies.") pass else: if True == self.VERBOSE: print self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, \ 'These are the cookies we have received so far :') for index, cookie in enumerate(cj): print self.VERBOSE_PREFIX + 'from "%s": %s - %s' \ % (__name__, index, cookie) cj.save(COOKIEFILE) # save the cookies again if True == self.VERBOSE: if handle is not None: print self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, \ "[INFO] Handle is not None.") else: print self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, \ "[ERROR] Handle is None. Maybe connection shutdowned ...") try: page_contents = handle.read() except: if True == self.VERBOSE: print self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, \ "[ERROR] Problem with reading from handle") pass else: if True == self.VERBOSE: print self.VERBOSE_PREFIX + 'from "%s": %s' \ % (__name__, \ "[INFO] Readed contents size: '" + str(len(page_contents)) + "'") # uncommnt for save content view print self.VERBOSE_PREFIX + \ " and write to temporary file '" + OWA_CHECKER_OUTPUT + "'" fh = open(OWA_CHECKER_OUTPUT, "w") fh.write(page_contents) fh.close() # ------------------------------------------- """ theurl2 = auth['owa_url'] txdata2 = None txheaders2 = {'User-agent' : 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1) Gecko/20061010 Firefox/2.0.011'} params2 = None # some Java script functions linked with submit clkLgn() login button #function gbid(s){return document.getElementById(s);} #function clkLgn(){if(gbid("rdoPrvt").checked){var oD=new Date();oD.setTime(oD.getTime()+2*7*24*60*60*1000);var sA="acc="+(gbid("chkBsc").checked?1:0);var sL="lgn="+gbid("username").value;document.cookie="logondata="+sA+"&"+sL+"; expires="+oD.toUTCString();}} txdata2 = urllib.urlencode(params2) try: req2 = Request(theurl2, txdata2, txheaders2) # create a request object handle2 = urlopen(req2) # and open it to return a handle on the url except IOError, e: if True == self.VERBOSE: print self.VERBOSE_PREFIX + 'from "%s": %s'\ % (__name__, '[ERROR] We failed to open "%s".' % theurl) if hasattr(e, 'code'): print self.VERBOSE_PREFIX + 'from "%s": %s'\ % (__name__, '[ERROR] We failed with error code - %s.' % e.code) pass else: if True == self.VERBOSE: print self.VERBOSE_PREFIX + 'from "%s": %s'\ % (__name__, '[INFO] Here are the headers of the page :') print self.VERBOSE_PREFIX + 'from "%s": %s'\ % (__name__, handle.info()) print self.VERBOSE_PREFIX + 'from "%s"' % __name__ try: page_contents2 = handle2.read() except: if True == self.VERBOSE: print self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__,\ "[ERROR] Problem with reading from handle") pass else: if True == self.VERBOSE: print self.VERBOSE_PREFIX + 'from "%s": %s'\ % (__name__,\ "[INFO] Readed contents size: '" + str(len(page_contents2))+ "'") # uncommnt for save content view print self.VERBOSE_PREFIX + \ " and write to temporary file '" + OWA_CHECKER_OUTPUT+".GET"+"'" fh = open(OWA_CHECKER_OUTPUT + ".GET", "w") fh.write(page_contents2) fh.close() soup = BeautifulSoup(page_contents2) """ #TODO: problem with GET contents with COOKIES # url = opener.open(auth['owa_url']) # page_contents2 = url.read(200000) # print page_contents2 soup = BeautifulSoup(page_contents) #p = soup.findAll('html', '') #print self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, soup.html.body.table #tds = soup.findAll("table", 'lvw') tds = soup.findAll("h1", 'bld') count = len(tds) #re.compile("", '') #print self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, "tds len = " + str(count) #print self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, tds[0] unreaded_mail_messages_subjects = [] if 0 < count: if True == self.VERBOSE: print self.VERBOSE_PREFIX + 'from "%s": %s' \ % (__name__, "Found " + str(count) + " subject(s) :") for id in xrange(len(tds)): #print self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, " %d -> %s" % (id, tds[id]) re_subject_h1 = re.compile('<h1 class="bld"><a href="#" onclick=".*">(.*)</a></h1>') subject = re_subject_h1.search(str(tds[id])).groups()[0] unreaded_mail_messages_subjects.append(str(subject).strip()) #TODO # show_notification("[EMAIL]", subject) if True == self.VERBOSE: print self.VERBOSE_PREFIX + 'from "%s": %s' \ % (__name__, "[EMAIL] %d -> %s" % (id, subject)) else: unreaded_mail_messages_subjects = [] #TODO # show_notification("[EMAIL]", subject) if True == self.VERBOSE: subject = "[INFO] There was no EMAILs ..." print self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, subject) if True == self.VERBOSE: print self.VERBOSE_PREFIX + 'from "%s"' % __name__ print self.VERBOSE_PREFIX + 'from "%s": %s' \ % (__name__, "[INFO] STOP") print self.VERBOSE_PREFIX + \ 'from "%s": [INFO] unreaded subjects: %s' \ % (__name__, str(unreaded_mail_messages_subjects)) return unreaded_mail_messages_subjects
def poast(self, plain, speed, detail, title, sum): last_poast = "none" today = date.today() date_file = [] with open('xroxy_poast.txt', 'r') as f: for line in f: date_file.append(int(line)) last_poast = date(date_file[0],date_file[1],date_file[2]) print "" print today print "last ", last_poast if last_poast < today:# or last_poast == None: trys = 0 print "Creating new xroxy forum poast..." try: #Create special URL opener (for User-Agent) and cookieJar cookieJar = ClientCookie.CookieJar() opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cookieJar)) opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")] ClientCookie.install_opener(opener) try: fp = ClientCookie.urlopen("http://www.xroxy.com/xorum/login.php") forms = ClientForm.ParseResponse(fp) fp.close() except Exception, e: print e if trys < 6: time.sleep(5) print "trying again..." trys += 1 else: print "xroxy is timing out" form = forms[0] form["username"] = "******" # use your userid form["password"] = "******" # use your password fp = ClientCookie.urlopen(form.click()) fp.close() """login part""" trys = 0 request2 = ClientCookie.urlopen("http://www.xroxy.com/xorum/viewforum.php?f=3") request2.close() request3 = ClientCookie.urlopen("http://www.proxyfire.net/forum/newthread.php?do=newthread&f=14") forms = ClientForm.ParseResponse(request3, backwards_compat=False) ## f = open("example.html") ## forms = ClientForm.ParseFile(f, "http://example.com/example.html", ## backwards_compat=False) ## f.close() form = forms[0] print form # very useful! form["subject"] = title # use your userid form["message"] = "Working L1/L2 Proxies. Screened for and removed planetlab, .mil, .gov, .edu, and any non-anonomous proxies. No USA proxies either, these are all international (from an american point of view, at least), since that is all that I use. If you are curious about which country you would be using check out the python output for a two letter country code in the network information block." + "\nPython engine output (" + sum + "):" + "\n[CODE]"+ detail +"[/CODE]" + '\n' + "Ranked by Speed (" + sum + "):\n[CODE]"+ speed +"[/CODE]" + '\n' + "Stripped (" + sum + "):\n[CODE]" + plain +"[/CODE]" + '\n' + "More proxies at proxejaculate.blogspot.com" + '\n' + "Enjoy :)" + "\n" + "Experienced python, shell, or perl coders please contact me through pm if you are interested in helping code." #form["message"] = "Working L1/L2 Proxies(" + sum + ")" + "\n" + "Stripped format:" + "\n" + "[CODE]" + plain + "[/CODE]" + '\n' + "Ranked by speed:" + "\n" + "[CODE]"+ speed +"[/CODE]" + "\n" + "More proxies at proxejaculate.blogspot.com" + '\n' + "Enjoy :)" form.set_value(["6"], name="iconid", kind="list") fp = ClientCookie.urlopen(form.click()) fp.close() with open('fpoast.txt', 'w') as f: f.write(str(today.year) + '\n') f.write(str(today.month) + '\n') f.write(str(today.day) + '\n') except Exception, e: print "Maybe we didn't poast to proxyfire?" pass
def getvideo(urlpagina): # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass xbmc.output("ficherocookies %s" % ficherocookies) # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = urlpagina #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', 'Referer': 'http://www.movshare.net/' } # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data # Lo pide una segunda vez, como si hubieras hecho click en el banner req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data patronvideos = '<embed type="video/divx" src="([^"]+)"' matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) == 0: patronvideos = '"file","([^"]+)"' matches = re.compile(patronvideos, re.DOTALL).findall(data) return matches[0]
def downloadpageGzip(url): # Inicializa la librer�a de las cookies ficherocookies = os.path.join( config.DATA_PATH, 'cookies.lwp' ) print "Cookiefile="+ficherocookies cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! #txheaders = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', # 'Referer':'http://www.megavideo.com/?s=signup'} import httplib parsedurl = urlparse.urlparse(url) print "parsedurl=",parsedurl txheaders = { 'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language':'es-es,es;q=0.8,en-us;q=0.5,en;q=0.3', 'Accept-Charset':'UTF-8,utf-8;q=0.7,*;q=0.7', 'Accept-Encoding':'gzip,deflate', 'Keep-Alive':'300', 'Connection':'keep-alive', 'Referer':parsedurl} # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, None, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data=handle.read() handle.close() # Descomprime el archivo de datos Gzip try: compressedstream = StringIO.StringIO(data) gzipper = gzip.GzipFile(fileobj=compressedstream) data1 = gzipper.read() gzipper.close() return data1 except: return data
def GetMegavideoUser(login, password, megavidcookiepath): #New Login code derived from old code by Voinage etc. Makes no need for mechanize module. #if no user or pass are provided, open login file to get them. if login is False or password is False: if os.path.exists(megavidcookiepath): loginf = openfile(self.login) login = get_user(loginf) password = get_pass(loginf) # --------------------------------------- # Cookie stuff # --------------------------------------- ficherocookies = megavidcookiepath # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # install the cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = "http://www.megavideo.com/?s=signup" #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = "action=login&cnext=&snext=&touser=&user=&nickname=" + login + "&password="******"([^"]+)"' matches = re.compile(patronvideos, re.DOTALL).findall(cookiedata) if len(matches) == 0: patronvideos = 'user=([^\;]+);' matches = re.compile(patronvideos, re.DOTALL).findall(cookiedata) if len(matches) == 0: print 'something bad happened' return matches[0]
def __init__(self, cookiesUrl, userAgent='Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'): self.cookiesUrl = cookiesUrl self.userAgent = {'User-agent': userAgent} # the path and filename to save your cookies in self.cj = None ClientCookie = None cookielib = None self.httpsForm = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either self.urlopen = urllib2.urlopen self.Request = urllib2.Request else: # imported ClientCookie self.urlopen = ClientCookie.urlopen self.Request = ClientCookie.Request self.cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked self.urlopen = urllib2.urlopen self.Request = urllib2.Request self.cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods if self.cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(self.cookiesUrl): # if we have a cookie file already saved # then load the cookies into the Cookie Jar self.cj.load(self.cookiesUrl) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener( urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(self.cj)) ClientCookie.install_opener(opener)
def downloadpage(url,post=None,headers=[['User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.0; es-ES; rv:1.9.0.14) Gecko/2009082707 Firefox/3.0.14']]): logger.info("[scrapertools.py] downloadpage") logger.info("[scrapertools.py] url="+url) if post is not None: logger.info("[scrapertools.py] post="+post) else: logger.info("[scrapertools.py] post=None") # --------------------------------- # Instala las cookies # --------------------------------- # Inicializa la librería de las cookies ficherocookies = os.path.join( config.get_setting("cookies.dir"), 'cookies.lwp' ) logger.info("[scrapertools.py] Cookiefile="+ficherocookies) cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: logger.info("[scrapertools.py] Importando cookielib") import cookielib except ImportError: logger.info("[scrapertools.py] cookielib no disponible") # If importing cookielib fails # let's try ClientCookie try: logger.info("[scrapertools.py] Importando ClientCookie") import ClientCookie except ImportError: logger.info("[scrapertools.py] ClientCookie no disponible") # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: logger.info("[scrapertools.py] ClientCookie disponible") # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: logger.info("[scrapertools.py] cookielib disponible") # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods if cj is not None: # we successfully imported # one of the two cookie handling modules logger.info("[scrapertools.py] Hay cookies") if os.path.isfile(ficherocookies): logger.info("[scrapertools.py] Leyendo fichero cookies") # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: logger.info("[scrapertools.py] opener usando urllib2 (cookielib)") # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: logger.info("[scrapertools.py] opener usando ClientCookie") # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) # ------------------------------------------------- # Cookies instaladas, lanza la petición # ------------------------------------------------- # Contador inicio = time.clock() # Diccionario para las cabeceras txheaders = {} # Añade las cabeceras for header in headers: logger.info("[scrapertools.py] header="+header[0]+": "+header[1]) txheaders[header[0]]=header[1] # Construye el request if post is None: logger.info("[scrapertools.py] petición GET") else: logger.info("[scrapertools.py] petición POST") req = Request(url, post, txheaders) handle = urlopen(req) # Actualiza el almacén de cookies cj.save(ficherocookies) # Lee los datos y cierra data=handle.read() handle.close() ''' # Lanza la petición try: response = urllib2.urlopen(req) # Si falla la repite sustituyendo caracteres especiales except: req = urllib2.Request(url.replace(" ","%20")) # Añade las cabeceras for header in headers: req.add_header(header[0],header[1]) response = urllib2.urlopen(req) ''' # Tiempo transcurrido fin = time.clock() logger.info("[scrapertools.py] Descargado en %d segundos " % (fin-inicio+1)) return data
def geturl(urlvideo): xbmc.output("[divxlink.py] url="+urlvideo) # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url=urlvideo #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = {'User-Agent':'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)'} # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data=handle.read() handle.close() #print data # Lo pide una segunda vez, como si hubieras hecho click en el banner patron = 'http\:\/\/www\.divxlink\.com/([^\/]+)/(.*?)\.html' matches = re.compile(patron,re.DOTALL).findall(url) xbmc.output("[divxlink.py] fragmentos de la URL") scrapertools.printMatches(matches) codigo = "" nombre = "" if len(matches)>0: codigo = matches[0][0] nombre = matches[0][1] patron = '<input type="hidden" name="rand" value="([^"]+)">' matches = re.compile(patron,re.DOTALL).findall(data) #scrapertools.printMatches(matches) randomstring="" if len(matches)>0: randomstring=matches[0] xbmc.output("[divxlink.py] randomstring="+randomstring) txdata = "op=download2&id="+codigo+"&rand="+randomstring+"&referer=&method_free=&method_premium=&down_direct=1" xbmc.output(txdata) req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data=handle.read() handle.close() #print data patron = '<div id="embedcontmvshre"[^>]+>(.*?)</div>' matches = re.compile(patron,re.DOTALL).findall(data) #scrapertools.printMatches(matches) data = "" if len(matches)>0: data = matches[0] xbmc.output("[divxlink.py] bloque packed="+data) else: return "" # Lo descifra descifrado = unpackerjs.unpackjs(data) xbmc.output("descifrado="+descifrado) # Extrae la URL patron = '<param name="src"value="([^"]+)"/>' matches = re.compile(patron,re.DOTALL).findall(descifrado) scrapertools.printMatches(matches) url = "" if len(matches)>0: url = matches[0] xbmc.output("[divxlink.py] url="+url) return url
def geturl(urlvideo): logger.info("[metadivx.py] url="+urlvideo) # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url=urlvideo #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = {'User-Agent':'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)'} # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data=handle.read() handle.close() #print data # Lo pide una segunda vez, como si hubieras hecho click en el banner patron = 'http\:\/\/www\.metadivx\.com/([^\/]+)/(.*?)\.html' matches = re.compile(patron,re.DOTALL).findall(url) logger.info("[metadivx.py] fragmentos de la URL") scrapertools.printMatches(matches) codigo = "" nombre = "" if len(matches)>0: codigo = matches[0][0] nombre = matches[0][1] txdata = "op=download1&usr_login=&id="+codigo+"&fname="+nombre+"&referer=&method_free=Continue" logger.info(txdata) req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data=handle.read() handle.close() #print data patron = '<div id="embedcontmvshre[^>]+>(.*?)</div>' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) logger.info("[metadivx.py] bloque packed") if len(matches)>0: logger.info(matches[0]) ''' <center> <script type='text/javascript'>eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\'<7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"><2 1="j"0="i"><2 1="v"0="u"><2 1="b"0="5"/><2 1="c"0="5"/><2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/><8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"></8></7>\');',36,51,'value|name|param|com|http|false|divx|object|embed|plugin|go|bannerEnabled|autoPlay||320px|height|630px|width|none|custommode|avi|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_|Capitancinema|pfq3vaf2xypwtrv77uw334hb55ctx5tcd6dva|182|206|45|73|76|src|auto|bufferingMode|id|download|pluginspage|video|type|embedmvshre|cab|DivXBrowserPlugin|codebase|CC0F21721616|9C46|41fa|D0AB|67DABFBF|clsid|classid|embedcontmvshre|write|document'.split('|'))) </script> </center> ''' # El javascript empaquetado es #eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\'<7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"><2 1="j"0="i"><2 1="v"0="u"><2 1="b"0="5"/><2 1="c"0="5"/><2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/><8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"></8></7>\');',36,51,'value|name|param|com|http|false|divx|object|embed|plugin|go|bannerEnabled|autoPlay||320px|height|630px|width|none|custommode|avi|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_|Capitancinema|pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa|182|206|45|73|76|src|auto|bufferingMode|id|download|pluginspage|video|type|embedmvshre|cab|DivXBrowserPlugin|codebase|CC0F21721616|9C46|41fa|D0AB|67DABFBF|clsid|classid|embedcontmvshre|write|document'.split('|'))) ''' eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\' <7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"> <2 1="j"0="i"> <2 1="v"0="u"> <2 1="b"0="5"/> <2 1="c"0="5"/> <2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/> <8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"> <embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/"> </8> </7>\');',36,51, 0'value 1|name 2|param 3|com 4|http 5|false 6|divx 7|object 8|embed 9|plugin a|go b|bannerEnabled c|autoPlay d| e|320px f|height g|630px h|width i|none j|custommode k|avi l|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_ m|Capitancinema n|pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa o|182 p|206 q|45 r|73 s|76 t|src u|auto v|bufferingMode w|id x|download y|pluginspage z|video 10|type 11|embedmvshre 12|cab 13|DivXBrowserPlugin 14|codebase 15|CC0F21721616 16|9C46 17|41fa 18|D0AB 19|67DABFBF 1a|clsid 1b|classid 1c|embedcontmvshre 1d|write 1e|document '.split(' |'))) ''' # El javascript desempaquetado es #document.write('<object id="embedcontmvshre"classid="clsid:67DABFBF-D0AB-41fa-9C46-CC0F21721616"width="630px"height="320px"codebase="http://go.divx.com/plugin/DivXBrowserPlugin.cab"><param name="custommode"value="none"><param name="bufferingMode"value="auto"><param name="bannerEnabled"value="false"/><param name="autoPlay"value="false"/><param name="src"value="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"/><embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/"></embed></object>'); ''' <object id="embedcontmvshre"classid="clsid:67DABFBF-D0AB-41fa-9C46-CC0F21721616"width="630px"height="320px"codebase="http://go.divx.com/plugin/DivXBrowserPlugin.cab"> <param name="custommode"value="none"> <param name="bufferingMode"value="auto"> <param name="bannerEnabled"value="false"/> <param name="autoPlay"value="false"/> <param name="src"value="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"/> <embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/"> </embed> </object>'); ''' # La URL del video es #http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi # Lo descifra descifrado = unpackerjs.unpackjs(data) logger.info("descifrado="+descifrado) # Extrae la URL patron = '<param name="src"value="([^"]+)"/>' matches = re.compile(patron,re.DOTALL).findall(descifrado) scrapertools.printMatches(matches) url = "" if len(matches)>0: url = matches[0] logger.info("[metadivx.py] url="+url) return url
def downloadpage( url, post=None, headers=[[ 'User-Agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; es-ES; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12' ]], follow_redirects=True, timeout=socket.getdefaulttimeout()): logger.info("[scrapertools.py] downloadpage") logger.info("[scrapertools.py] url=" + url) if post is not None: logger.info("[scrapertools.py] post=" + post) else: logger.info("[scrapertools.py] post=None") # --------------------------------- # Instala las cookies # --------------------------------- # Inicializa la librería de las cookies ficherocookies = os.path.join(config.get_setting("cookies.dir"), 'cookies.dat') logger.info("[scrapertools.py] ficherocookies=" + ficherocookies) cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: logger.info("[scrapertools.py] Importando cookielib") import cookielib except ImportError: logger.info("[scrapertools.py] cookielib no disponible") # If importing cookielib fails # let's try ClientCookie try: logger.info("[scrapertools.py] Importando ClientCookie") import ClientCookie except ImportError: logger.info("[scrapertools.py] ClientCookie no disponible") # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: logger.info("[scrapertools.py] ClientCookie disponible") # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.MozillaCookieJar() else: logger.info("[scrapertools.py] cookielib disponible") # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.MozillaCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods if cj is not None: # we successfully imported # one of the two cookie handling modules logger.info("[scrapertools.py] Hay cookies") if os.path.isfile(ficherocookies): logger.info("[scrapertools.py] Leyendo fichero cookies") # if we have a cookie file already saved # then load the cookies into the Cookie Jar try: cj.load(ficherocookies) except: logger.info( "[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra" ) os.remove(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: logger.info("[scrapertools.py] opener usando urllib2 (cookielib)") # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 if not follow_redirects: opener = urllib2.build_opener( urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL), urllib2.HTTPCookieProcessor(cj), NoRedirectHandler()) else: opener = urllib2.build_opener( urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL), urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: logger.info("[scrapertools.py] opener usando ClientCookie") # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) # ------------------------------------------------- # Cookies instaladas, lanza la petición # ------------------------------------------------- # Contador inicio = time.clock() # Diccionario para las cabeceras txheaders = {} # Construye el request if post is None: logger.info("[scrapertools.py] petición GET") else: logger.info("[scrapertools.py] petición POST") # Añade las cabeceras logger.info("[scrapertools.py] ---------------------------") for header in headers: logger.info("[scrapertools.py] header %s=%s" % (str(header[0]), str(header[1]))) txheaders[header[0]] = header[1] logger.info("[scrapertools.py] ---------------------------") req = Request(url, post, txheaders) try: if timeout is None: handle = urlopen(req) else: #Para todas las versiones: deftimeout = socket.getdefaulttimeout() socket.setdefaulttimeout(timeout) handle = urlopen(req) socket.setdefaulttimeout(deftimeout) # Actualiza el almacén de cookies #Exception #cj.save(ficherocookies) # Lee los datos y cierra if handle.info().get('Content-Encoding') == 'gzip': logger.info("[scrapertools.py] gzipped") import StringIO data = handle.read() compressedstream = StringIO.StringIO(data) import gzip gzipper = gzip.GzipFile(fileobj=compressedstream) data = gzipper.read() gzipper.close() else: logger.info("[scrapertools.py] normal") data = handle.read() except urllib2.HTTPError, e: logger.info("error " + repr(e)) import traceback traceback.print_exc() data = e.read() #logger.info("data="+repr(data)) return data
def geturl(urlvideo): xbmc.output("[gigabyupload.py] url="+urlvideo) # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url=urlvideo #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = {'User-Agent':'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)'} # fake a user agent, some websites (like google) don't like automated exploration try: req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data=handle.read() handle.close() except: data = "" pass #print data # Lo pide una segunda vez, como si hubieras hecho click en el banner patron = 'http\:\/\/www\.gigabyteupload\.com/download\-([^\-]+)\-.*?' matches = re.compile(patron,re.DOTALL).findall(url) id = matches[0] patron = '<form method="post" action="([^"]+)">[^<]+<input type="hidden" name="security_key" value="([^"]+)" \/>' #patron += '<p><input type="submit" name="submit" value="([^"]+)" class="cbutton" \/>' matches = re.compile(patron,re.DOTALL).findall(data) xbmc.output("[gigabyupload.py] fragmentos de la URL : " + str(len(matches))) scrapertools.printMatches(matches) cecid = "" submit = "" url2 = theurl if len(matches)>0: url2 = matches[0][0] #id = matches[0][5] cecid = matches[0][1] submit = "Watch Online" #aff = matches[0][3] #came_from = matches[0][4] txdata = "op=download&usr_login=&id="+id+"&security_key="+cecid+"&submit="+submit+"&aff=&came_from=referer=&method_free=Free+Stream" xbmc.output(txdata) try: req = Request(url2, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data=handle.read() handle.close() #print data except: data = "" pass # Extrae el trozo cifrado patron = '<div id="player">[^<]+<script type="text/javascript">(eval.*?)</script>' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) data = "" if len(matches)>0: data = matches[0] xbmc.output("[Gigabyteupload.py] bloque packed="+data) else: return "" # Lo descifra descifrado = unpackerjs2.unpackjs(data) # Extrae la URL del vídeo xbmc.output("descifrado="+descifrado) # Extrae la URL patron = '<param name="src" value="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(descifrado) scrapertools.printMatches(matches) url = "" if len(matches)>0: url = matches[0] xbmc.output("[gigabyteupload.py] url="+url) return url
def geturl(urlvideo): xbmc.output("[gigabyupload.py] url=" + urlvideo) # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = urlvideo #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)' } # fake a user agent, some websites (like google) don't like automated exploration try: req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() except: data = "" pass #print data # Lo pide una segunda vez, como si hubieras hecho click en el banner patron = 'http\:\/\/www\.gigabyteupload\.com/download\-([^\-]+)\-.*?' matches = re.compile(patron, re.DOTALL).findall(url) id = matches[0] patron = '<form method="post" action="([^"]+)">[^<]+<input type="hidden" name="security_key" value="([^"]+)" \/>' #patron += '<p><input type="submit" name="submit" value="([^"]+)" class="cbutton" \/>' matches = re.compile(patron, re.DOTALL).findall(data) xbmc.output("[gigabyupload.py] fragmentos de la URL : " + str(len(matches))) scrapertools.printMatches(matches) cecid = "" submit = "" url2 = theurl if len(matches) > 0: url2 = matches[0][0] #id = matches[0][5] cecid = matches[0][1] submit = "Watch Online" #aff = matches[0][3] #came_from = matches[0][4] txdata = "op=download&usr_login=&id=" + id + "&security_key=" + cecid + "&submit=" + submit + "&aff=&came_from=referer=&method_free=Free+Stream" xbmc.output(txdata) try: req = Request(url2, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data except: data = "" pass # Extrae el trozo cifrado patron = '<div id="player">[^<]+<script type="text/javascript">(eval.*?)</script>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) data = "" if len(matches) > 0: data = matches[0] xbmc.output("[Gigabyteupload.py] bloque packed=" + data) else: return "" # Lo descifra descifrado = unpackerjs2.unpackjs(data) # Extrae la URL del vídeo xbmc.output("descifrado=" + descifrado) # Extrae la URL patron = '<param name="src" value="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(descifrado) scrapertools.printMatches(matches) url = "" if len(matches) > 0: url = matches[0] xbmc.output("[gigabyteupload.py] url=" + url) return url
def do_login_and_fetch(self, cj, COOKIEFILE, LOGIN_URL, login_params, fetch_url, save_to, **args): """ Method to do an automated login and save the cookie. This is required for presentation download. """ ClientCookie = None cookielib = None # Properly import the correct cookie lib try: import http.cookiejar except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib.request.urlopen Request = urllib.request.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib.request.urlopen Request = urllib.request.Request cj = http.cookiejar.LWPCookieJar() if cj is not None: if os.path.isfile(COOKIEFILE): cj.load(COOKIEFILE) if cookielib is not None: opener = urllib.request.build_opener( urllib.request.HTTPCookieProcessor(cj)) urllib.request.install_opener(opener) else: opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) headers = { 'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' } request = Request(LOGIN_URL, login_params, headers) handle = urlopen(request) if cj: cj.save(COOKIEFILE) request = Request(fetch_url, None, headers) try: handle = urlopen(request) except urllib.error.HTTPError: print('Presentation not available for download!', file=sys.stderr) return data = handle.read() info = handle.info() ext = 'ppt' type = info['Content-Type'] ext = self.get_extension(type) if not save_to: save_to = fetch_url.split('/')[-2] + '.' save_to = save_to + ext fp = open(save_to, 'wb') fp.write(data) fp.close() if self.verbose: print('Presentation downloaded and saved to %s' % save_to)
# Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) def saxpost(sid,source,message,type): if SILENTMODE: return #print message kickthem = 0 for badword in BADWORDS: if (-1 < message.lower().find(badword.lower())): kickthem = 1; kickwhy = 'Disallowed word'; if (-1 < message.find('\x03')): kickthem = 1;
def cachePagePostCookies(url, data): xbmc.output("[scrapertools.py] cachePagePostCookies - " + url) xbmc.output("[scrapertools.py] cachePagePostCookies - data=" + data) inicio = time.clock() # Inicializa la librería de las cookies ficherocookies = os.path.join(os.getcwd(), 'cookies.lwp') xbmc.output("[scrapertools.py] cachePagePostCookies - Cookiefile=" + ficherocookies) cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! #txheaders = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', # 'Referer':'http://www.megavideo.com/?s=signup'} txheaders = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3' } # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, data, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() fin = time.clock() xbmc.output("[scrapertools.py] Descargado en %d segundos " % (fin - inicio + 1)) return data
def getvideo(urlpagina): # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass xbmc.output("ficherocookies %s" % ficherocookies) # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url=urlpagina #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', 'Referer':'http://www.movshare.net/'} # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data=handle.read() handle.close() #print data # Lo pide una segunda vez, como si hubieras hecho click en el banner req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data=handle.read() handle.close() #print data patronvideos = '<embed type="video/divx" src="([^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) return matches[0]
def getmegauploaduser(login, password): # --------------------------------------- # Inicializa la librería de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = "http://www.megaupload.com/?c=login" #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! passwordesc = password.replace("&", "%26") txdata = "login=1&redir=1&username="******"&password="******"----------------------") xbmc.output("Cookies despues") xbmc.output("----------------------") xbmc.output(cookiedata) xbmc.output("----------------------") ''' patronvideos = 'user="******"]+)"' matches = re.compile(patronvideos, re.DOTALL).findall(cookiedata) if len(matches) == 0: patronvideos = 'user=([^\;]+);' matches = re.compile(patronvideos, re.DOTALL).findall(cookiedata) if len(matches) == 0 and DEBUG: xbmc.output("No se ha encontrado la cookie de Megaupload") xbmc.output("----------------------") xbmc.output("Respuesta de Megaupload") xbmc.output("----------------------") xbmc.output(data) xbmc.output("----------------------") xbmc.output("----------------------") xbmc.output("Cookies despues") xbmc.output("----------------------") xbmc.output(cookiedata) xbmc.output("----------------------") devuelve = "" else: devuelve = matches[0] return devuelve
def geturl(urlvideo): xbmc.output("[metadivx.py] url="+urlvideo) # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url=urlvideo #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = {'User-Agent':'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)'} # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data=handle.read() handle.close() #print data # Lo pide una segunda vez, como si hubieras hecho click en el banner patron = 'http\:\/\/www\.metadivx\.com/([^\/]+)/(.*?)\.html' matches = re.compile(patron,re.DOTALL).findall(url) xbmc.output("[metadivx.py] fragmentos de la URL") scrapertools.printMatches(matches) codigo = "" nombre = "" if len(matches)>0: codigo = matches[0][0] nombre = matches[0][1] txdata = "op=download1&usr_login=&id="+codigo+"&fname="+nombre+"&referer=&method_free=Continue" xbmc.output(txdata) req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data=handle.read() handle.close() #print data patron = '<div id="embedcontmvshre[^>]+>(.*?)</div>' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) xbmc.output("[metadivx.py] bloque packed") if len(matches)>0: xbmc.output(matches[0]) ''' <center> <script type='text/javascript'>eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\'<7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"><2 1="j"0="i"><2 1="v"0="u"><2 1="b"0="5"/><2 1="c"0="5"/><2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/><8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"></8></7>\');',36,51,'value|name|param|com|http|false|divx|object|embed|plugin|go|bannerEnabled|autoPlay||320px|height|630px|width|none|custommode|avi|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_|Capitancinema|pfq3vaf2xypwtrv77uw334hb55ctx5tcd6dva|182|206|45|73|76|src|auto|bufferingMode|id|download|pluginspage|video|type|embedmvshre|cab|DivXBrowserPlugin|codebase|CC0F21721616|9C46|41fa|D0AB|67DABFBF|clsid|classid|embedcontmvshre|write|document'.split('|'))) </script> </center> ''' # El javascript empaquetado es #eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\'<7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"><2 1="j"0="i"><2 1="v"0="u"><2 1="b"0="5"/><2 1="c"0="5"/><2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/><8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"></8></7>\');',36,51,'value|name|param|com|http|false|divx|object|embed|plugin|go|bannerEnabled|autoPlay||320px|height|630px|width|none|custommode|avi|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_|Capitancinema|pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa|182|206|45|73|76|src|auto|bufferingMode|id|download|pluginspage|video|type|embedmvshre|cab|DivXBrowserPlugin|codebase|CC0F21721616|9C46|41fa|D0AB|67DABFBF|clsid|classid|embedcontmvshre|write|document'.split('|'))) ''' eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\' <7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"> <2 1="j"0="i"> <2 1="v"0="u"> <2 1="b"0="5"/> <2 1="c"0="5"/> <2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/> <8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"> <embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/"> </8> </7>\');',36,51, 0'value 1|name 2|param 3|com 4|http 5|false 6|divx 7|object 8|embed 9|plugin a|go b|bannerEnabled c|autoPlay d| e|320px f|height g|630px h|width i|none j|custommode k|avi l|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_ m|Capitancinema n|pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa o|182 p|206 q|45 r|73 s|76 t|src u|auto v|bufferingMode w|id x|download y|pluginspage z|video 10|type 11|embedmvshre 12|cab 13|DivXBrowserPlugin 14|codebase 15|CC0F21721616 16|9C46 17|41fa 18|D0AB 19|67DABFBF 1a|clsid 1b|classid 1c|embedcontmvshre 1d|write 1e|document '.split(' |'))) ''' # El javascript desempaquetado es #document.write('<object id="embedcontmvshre"classid="clsid:67DABFBF-D0AB-41fa-9C46-CC0F21721616"width="630px"height="320px"codebase="http://go.divx.com/plugin/DivXBrowserPlugin.cab"><param name="custommode"value="none"><param name="bufferingMode"value="auto"><param name="bannerEnabled"value="false"/><param name="autoPlay"value="false"/><param name="src"value="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"/><embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/"></embed></object>'); ''' <object id="embedcontmvshre"classid="clsid:67DABFBF-D0AB-41fa-9C46-CC0F21721616"width="630px"height="320px"codebase="http://go.divx.com/plugin/DivXBrowserPlugin.cab"> <param name="custommode"value="none"> <param name="bufferingMode"value="auto"> <param name="bannerEnabled"value="false"/> <param name="autoPlay"value="false"/> <param name="src"value="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"/> <embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/"> </embed> </object>'); ''' # La URL del video es #http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi # Lo descifra descifrado = unpackerjs.unpackjs(data) xbmc.output("descifrado="+descifrado) # Extrae la URL patron = '<param name="src"value="([^"]+)"/>' matches = re.compile(patron,re.DOTALL).findall(descifrado) scrapertools.printMatches(matches) url = "" if len(matches)>0: url = matches[0] xbmc.output("[metadivx.py] url="+url) return url
def getAuthentication(self, user, password): COOKIEFILE = 'cookies.lwp' # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(COOKIEFILE): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(COOKIEFILE) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) values = {'campo_login': user, 'campo_password': password} #We set the user and the pass theurl = 'http://www.basketpc.com/index.php?mod=autentificacion' # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = urllib.urlencode(values) # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = {'User-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64)'} # fake a user agent, some websites (like google) don't like automated exploration try: req = Request(theurl, txdata, txheaders) # create a request object handle = urlopen(req) # and open it to return a handle on the url except IOError, e: print 'We failed to open "%s".' % theurl if hasattr(e, 'code'): print 'We failed with error code - %s.' % e.code elif hasattr(e, 'reason'): print "The error object has the following 'reason' attribute :" print e.reason print "This usually means the server doesn't exist,'," print "is down, or we don't have an internet connection." sys.exit()