Ejemplo n.º 1
0
 def run(self):
     global success
     value = getword()
     try:
         print "-" * 12
         print "User:"******"Password:"******"User-agent", random.sample(headers, 1)[0])]
         ClientCookie.install_opener(opener)
         fp = ClientCookie.urlopen(sys.argv[1])
         forms = ClientForm.ParseResponse(fp)
         form = forms[0]
         form["username"] = sys.argv[2]
         form["password"] = value
         fp = ClientCookie.urlopen(form.click())
         site = fp.readlines()
         for line in site:
             if re.search("invalid password", line.lower()) != None:
                 print "\tSuccessful Login:"******"The maximum number of 5 login attempts has been exceeded.",
                     line):
                 print "Attempts exceeded"
         fp.close()
     except (socket.gaierror), msg:
         pass
Ejemplo n.º 2
0
	def __init__(self):
		self.userAgent='OobaCacheMgr/'+Version
		self.urlContext=''
		self.socketTimeout=7.0
		self.cacheFolder='Z:\\~HttpCache\\'
		
		if Emulating: self.cacheFolder=ScriptPath+'Cache\\'
		try:
			os.makedirs(self.cacheFolder)
		except: pass
		
		self.cookiefile=self.cacheFolder+'~cookies.txt' 
		#the tilde is there to ensure that url2xfilename doesn't create a file that might overwrite this
		
		self.defaultCachetime=24*60.0 #minutes

		self.cookies=ClientCookie.LWPCookieJar()
		try:
			self.cookies.revert(self.cookiefile)
		except:
			print('Could not open cookie file: '+self.cookiefile)
			
		hh=CustomHandler(self.cookies)
		
		self.opener=ClientCookie.build_opener(hh)
Ejemplo n.º 3
0
    def call(self, addr, data, namespace, soapaction=None, encoding=None, http_proxy=None, config=Config):

        if not isinstance(addr, SOAPAddress):
            addr = SOAPAddress(addr, config)

        cookie_cutter = ClientCookie.HTTPCookieProcessor(config.cookieJar)
        hh = ClientCookie.HTTPHandler()

        opener = ClientCookie.build_opener(cookie_cutter, hh)

        t = "text/xml"
        if encoding != None:
            t += '; charset="%s"' % encoding
        opener.addheaders = [
            ("Content-Type", t),
            ("Cookie", "Username=foobar"),  # ClientCookie should handle
            ("SOAPAction", "%s" % (soapaction)),
        ]

        response = opener.open(addr.proto + "://" + addr.host + addr.path, data)
        data = response.read()

        # get the new namespace
        if namespace is None:
            new_ns = None
        else:
            new_ns = self.getNS(namespace, data)

        # return response payload
        return data, new_ns
Ejemplo n.º 4
0
	def run(self):
		global success
		value = getword()
		try:
			print "-"*12
			print "User:"******"Password:"******"User-agent","Mozilla/5.0 (compatible)")]
			ClientCookie.install_opener(opener)
			fp = ClientCookie.urlopen("https://www.gmail.com/")
			forms = ClientForm.ParseResponse(fp)
			form = forms[0]
			form["Email"]  = sys.argv[1] 
			form["Passwd"] = value      
			fp = ClientCookie.urlopen(form.click())
			site = fp.readlines()
			for line in site:
				if re.search("Gmail - Inbox", line):
					print "\tSuccessful Login:", value
					success =  value
					sys.exit(1)
			fp.close()
		except(socket.gaierror), msg: 
			pass
Ejemplo n.º 5
0
	def run(self):
		global success
		value = getword()
		try:
			print "-"*12
			print "User:"******"Password:"******"User-agent", random.sample(headers,  1)[0])]
			ClientCookie.install_opener(opener)
			fp = ClientCookie.urlopen(sys.argv[1])
			forms = ClientForm.ParseResponse(fp)
			form = forms[0]
			form["user"] = sys.argv[2] 
			form["pass"] = value      
			fp = ClientCookie.urlopen(form.click())
			site = fp.readlines()
			for line in site:
				if re.search("Login failed.", line.lower()) != None:
					print "\tSuccessful Login:", value
					success =  value
					sys.exit(1)
			fp.close()
		except(socket.gaierror, urllib2.HTTPError), msg: 
			print msg 
			pass
Ejemplo n.º 6
0
	def run(self):
		global success
		value = getword()
		try:
			print "-"*12
			print "User:"******"Password:"******"User-agent", random.sample(headers,  1)[0])]
			ClientCookie.install_opener(opener)
			fp = ClientCookie.urlopen(sys.argv[1])
			forms = ClientForm.ParseResponse(fp)
			form = forms[0]
			form["username"] = sys.argv[2] 
			form["password"] = value      
			fp = ClientCookie.urlopen(form.click())
			site = fp.readlines()
			for line in site:
				if re.search("invalid password", line.lower()) != None:
					print "\tSuccessful Login:"******"The maximum number of 5 login attempts has been exceeded.",line):
					print "Attempts exceeded" 
			fp.close()
		except(socket.gaierror), msg: 
			pass
def grab(test=False):
    if test and os.path.exists('cache.html'):
        print "Using cached html page"
        f = open('cache.html')
        data = f.read()
        f.close()
    else:
        # Create special URL opener (for User-Agent) and cookieJar
        cookieJar = ClientCookie.CookieJar()
        opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cookieJar))
        opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")]
        ClientCookie.install_opener(opener)
        fp = ClientCookie.urlopen("http://login.yahoo.com")
        forms = ClientForm.ParseResponse(fp)
        fp.close()

        form = forms[0]
        form["login"]  = settings.YAHOOGROUPS_USERNAME
        form["passwd"] = settings.YAHOOGROUPS_PASSWORD
        fp = ClientCookie.urlopen(form.click())
        fp.close()
        fp = ClientCookie.urlopen("http://groups.yahoo.com/group/norwichfreegle/messages") # use your group
        data = ''.join(fp.readlines())
        fp.close()
    if test:
        f = open('cache.html', 'w')
        f.write(data)
        f.close()
    return data
Ejemplo n.º 8
0
    def loginByUser(self, user, url = None):
        res = True

        if UrlLoader.loginByUser(self, user, url) == False:
            return False;

        signinUrl = self.getUrl() + '/signin'

        try:
            cookieJar = ClientCookie.CookieJar()
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cookieJar))
            opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")]
            ClientCookie.install_opener(opener)
            fp = ClientCookie.urlopen(signinUrl)
            forms = ClientForm.ParseResponse(fp)
            fp.close()

            form = forms[0]
            form['userName'] = user.get('userName')
            form['password'] = user.get('password')

            self._cookie = ClientCookie

            fpTestOpen = ClientCookie.urlopen(form.click())
            fpTestOpen.close()

        except Exception, e:
            print('Error when login: ' + e.message)
            res = False
Ejemplo n.º 9
0
def checkIfLoggedIn(oldPage, url, sensitive): ## method to check it the current cookies allow a successful login
											  ## This has to be a seperate connection and cookie jar. not sure why though
	cj = None
	ClientCookie = None
	cookielib = None
	try: # Trying cookielib
	    import cookielib
	except ImportError: ## Falling back to clientcookie
	    try:
	        import ClientCookie
	    except ImportError: ## falling back to no cookie jar
	        urlopen = urllib2.urlopen
	        Request = urllib2.Request
	    else: ## using ClientCookie for cookie jar
	        urlopen = ClientCookie.urlopen
	        Request = ClientCookie.Request
	        cj = ClientCookie.LWPCookieJar()
	else: ## using cookielib for cookie jar
	    urlopen = urllib2.urlopen
	    Request = urllib2.Request
	    cj = cookielib.LWPCookieJar()
	if cj is not None: ## if we succesfully imported a cookie jar library
	    if os.path.isfile(COOKIEFILE): ## if cookiefile exists
	        cj.load(COOKIEFILE)
	    if cookielib is not None: ## we used cookielib
	        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
	        opener.addheaders = [('User-agent', 'Mozilla/5.0')] # Some sites block requests w/o user-agent header
	        urllib2.install_opener(opener)
	    else: ## if we used ClientCookie
	        opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
	        opener.addheaders = [('User-agent', 'Mozilla/5.0')] # Some sites block requests w/o user-agent header
	        ClientCookie.install_opener(opener)
	attempt = urlopen(url) ## finally open the page
	return difflib.SequenceMatcher(None, oldPage, attempt.read()).ratio() ## return the similary ratio of the old page to the new page
Ejemplo n.º 10
0
 def run(self):
     global success
     value = getword()
     try:
         print "-" * 12
         print "User:"******"Password:"******"User-agent", random.sample(headers, 1)[0])]
         ClientCookie.install_opener(opener)
         fp = ClientCookie.urlopen(sys.argv[1])
         forms = ClientForm.ParseResponse(fp)
         form = forms[0]
         form["user"] = sys.argv[2]
         form["pass"] = value
         fp = ClientCookie.urlopen(form.click())
         site = fp.readlines()
         for line in site:
             if re.search("Login failed.", line.lower()) != None:
                 print "\tSuccessful Login:", value
                 success = value
                 sys.exit(1)
         fp.close()
     except (socket.gaierror, urllib2.HTTPError), msg:
         print msg
         pass
Ejemplo n.º 11
0
    def execute(self):
		
		print "proxyFire module loaded"
		sub_urls = []
		sub_titles = []
		sub_index = 0
		plist = []
		trys = 0
		
		# Create special URL opener (for User-Agent) and cookieJar
		cookieJar = ClientCookie.CookieJar()
		opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cookieJar))
		opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")]
		ClientCookie.install_opener(opener)
		try:
			fp = ClientCookie.urlopen("http://www.proxyfire.net/forum/login.php")
			forms = ClientForm.ParseResponse(fp)
			fp.close()
		except Exception, e:
			print e
			if trys < 6:
				time.sleep(5)
				print "trying again..."
				trys += 1
			else:
				print "proxyfire.net is timing out"
				return plist;
Ejemplo n.º 12
0
def set_up_cookie_stuff():
    COOKIEFILE = './cookies.txt'

    cj = None
    ClientCookie = None
    cookielib = None

    try:
        import cookielib
    except ImportError:
        try:
            import ClientCookie
        except ImportError:
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.LWPCookieJar()
    else:
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.LWPCookieJar()

    if cj is not None:
        if os.path.isfile(COOKIEFILE):
            cj.load(COOKIEFILE)
        if cookielib is not None:
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)
        else:
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)
Ejemplo n.º 13
0
def grabdata(url):
#	proxy_handler = urllib2.ProxyHandler(proxy)
#	opener = urllib2.build_opener(proxy_handler)
	opener = urllib2.build_opener()
	opener.addheaders = [('User-agent', user_agent)]
	status = False
	count = 0
	data = ''
	while status == False and count < 5:
		try:
			usock = opener.open(url)
			data = usock.read()
			usock.close()
			def checkRefresh(string):
				pattern = re.compile(r'http-equiv="refresh"')
				return pattern.search(string) != None
			if checkRefresh(data):
				import ClientCookie
				sock = ClientCookie.build_opener(ClientCookie.HTTPEquivProcessor,
						 ClientCookie.HTTPRefreshProcessor
						 )
				ClientCookie.install_opener(sock)
				data = ClientCookie.urlopen(url).read()
			status = True
		except Exception, msg:
			if count == 4:
				print "error: grab %s\n%s" % (url, msg)
			sleep(count)
			count += 1
Ejemplo n.º 14
0
def open_connection(email, password):
    '''Log in to MySpace and store login data into a global opener.'''
    # 1. Prepare a cookie jar and a global opener
    jar = ClientCookie.CookieJar()
    opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(jar))
    opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")]
    ClientCookie.install_opener(opener)

    # 2. Open the Web page with the login form
    home_resp = ClientCookie.urlopen("http://www.myspace.com")
    forms = ClientForm.ParseResponse(home_resp, backwards_compat=False)
    home_resp.close()
    
    # 3. Fill the login form and submit
    login_form = forms[1]
    login_form[login_email_field] = email
    login_form[login_password_field] = password
    login_resp = ClientCookie.urlopen(login_form.click())
    result = login_resp.read()
    login_resp.close()
#    with open("exit.html", 'w') as f:
#        f.write(result)

    # 4. Check if login was successful
    try:        
        loginPatt = '"UserId":(.*?),'
        id = int(re.search(loginPatt, result).group(1))
        return id > 0
    except (TypeError, ValueError, AttributeError):
        return False
Ejemplo n.º 15
0
def set_up_cookie_stuff():
    COOKIEFILE = './cookies.txt'

    cj = None
    ClientCookie = None
    cookielib = None
    
    try:
        import cookielib
    except ImportError:
        try:
            import ClientCookie
        except ImportError:
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.LWPCookieJar()
    else:
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.LWPCookieJar()

    if cj is not None:
        if os.path.isfile(COOKIEFILE):
            cj.load(COOKIEFILE)
        if cookielib is not None:
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)
        else:
            opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)
Ejemplo n.º 16
0
  def call(self, addr, data, namespace, soapaction = None, encoding = None,
    http_proxy = None, config = Config):

    if not isinstance(addr, SOAPAddress):
      addr = SOAPAddress(addr, config)

    cookie_cutter = ClientCookie.HTTPCookieProcessor(config.cookieJar)
    hh = ClientCookie.HTTPHandler()

    opener = ClientCookie.build_opener(cookie_cutter, hh)

    t = 'text/xml';
    if encoding != None:
      t += '; charset="%s"' % encoding
    opener.addheaders = [("Content-Type", t),
              ("Cookie", "Username=foobar"), # ClientCookie should handle
              ("SOAPAction" , "%s" % (soapaction))]

    response = opener.open(addr.proto + "://" + addr.host + addr.path, data)
    data = response.read()

    # get the new namespace
    if namespace is None:
      new_ns = None
    else:
      new_ns = self.getNS(namespace, data)

    # return response payload
    return data, new_ns
Ejemplo n.º 17
0
 def setCookie(self, path=False):
     """
     set cookie handler
     """
     if path:
         self.__url_cookiepath = path
     try:
         import cookielib
     except ImportError:
         try:
             import ClientCookie
         except ImportError:
             urlopen = urllib2.urlopen
             Request = urllib2.Request
         else:
             urlopen = ClientCookie.urlopen
             Request = ClientCookie.Request
             self.__url_cookie = ClientCookie.MozillaCookieJar()
             if path and os.path.isfile(path):
                 #noinspection PyBroadException
                 try:
                     self.__url_cookcookie.load(path)
                 except Exception, e:
                     pass
             opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(self.__url_cookie))
             ClientCookie.install_opener(opener)
             self.__url_request = Request
             self.__url_urlopen = urlopen
Ejemplo n.º 18
0
    def __init__(self):
        self.userAgent = 'OobaCacheMgr/' + Version
        self.urlContext = ''
        self.socketTimeout = 7.0
        self.cacheFolder = 'Z:\\~HttpCache\\'

        if Emulating: self.cacheFolder = ScriptPath + 'Cache\\'
        try:
            os.makedirs(self.cacheFolder)
        except:
            pass

        self.cookiefile = self.cacheFolder + '~cookies.txt'
        #the tilde is there to ensure that url2xfilename doesn't create a file that might overwrite this

        self.defaultCachetime = 24 * 60.0  #minutes

        self.cookies = ClientCookie.LWPCookieJar()
        try:
            self.cookies.revert(self.cookiefile)
        except:
            print('Could not open cookie file: ' + self.cookiefile)

        hh = CustomHandler(self.cookies)

        self.opener = ClientCookie.build_opener(hh)
Ejemplo n.º 19
0
 def test_response_close_and_read(self):
     opener = ClientCookie.build_opener(ClientCookie.SeekableProcessor)
     r = opener.open("http://wwwsearch.sf.net/bits/cctest2.txt")
     # closing response shouldn't stop methods working if we're using
     # SeekableProcessor (ie. _Util.response_seek_wrapper)
     r.read()
     r.close()
     r.seek(0)
     self.assertEqual(r.read(), "Hello ClientCookie functional test suite.\n")
Ejemplo n.º 20
0
def loadURL(url):
	# Cookie stuff from: 
	# http://www.voidspace.org.uk/python/articles/cookielib.shtml
	
	COOKIEFILE = '/var/www/vhosts/davesblogbot/cookies.lwp' #/home/virtual/site1/fst/home/newstoday/BayesBlogBot/cookies.lwp'
	
	cj = None
	ClientCookie = None
	cookielib = None
	
	# Let's see if cookielib is available
	try:
	    import cookielib
	except ImportError:
	    try:
	        import ClientCookie
	    except ImportError:
	        urlopen = urllib2.urlopen
	        Request = urllib2.Request
	    else:
	        urlopen = ClientCookie.urlopen
	        Request = ClientCookie.Request
	        cj = ClientCookie.LWPCookieJar()
	
	else:
	    urlopen = urllib2.urlopen
	    Request = urllib2.Request
	    cj = cookielib.LWPCookieJar()
	    
	if cj is not None:
	    if os.path.isfile(COOKIEFILE):
	        cj.load(COOKIEFILE)
	    if cookielib is not None:
	        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
	        urllib2.install_opener(opener)
	    else:
	        opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
	        ClientCookie.install_opener(opener)
	
	txdata = None
	# if we were making a POST type request,
	# we could encode a dictionary of values here,
	# using urllib.urlencode(somedict)
	
	txheaders =  {'User-agent' : 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT)'}
	
	try:
	    req = Request(url, txdata, txheaders)
	    handle = urlopen(req)
	except IOError, e:
	    print 'Failed to open "%s".' % url
	    if hasattr(e, 'code'):
	        print 'Failed with error code - %s.' % e.code
	    elif hasattr(e, 'reason'):
	        print "Reason: %s" % e.reason
	    return None
Ejemplo n.º 21
0
 def test_response_close_and_read(self):
     opener = ClientCookie.build_opener(ClientCookie.SeekableProcessor)
     r = opener.open("http://wwwsearch.sf.net/bits/cctest2.txt")
     # closing response shouldn't stop methods working if we're using
     # SeekableProcessor (ie. _Util.response_seek_wrapper)
     r.read()
     r.close()
     r.seek(0)
     self.assertEqual(r.read(),
                      "Hello ClientCookie functional test suite.\n")
Ejemplo n.º 22
0
    def __init__(self, url, debug=False):
        UserDict.__init__(self)
        self['url'] = url

        self['COOKIEFILE'] = 'freemed-cookies.lwp'

        if debug:
            import logging
            logger = logging.getLogger("cookielib")
            logger.addHandler(logging.StreamHandler(sys.stdout))
            logger.setLevel(logging.DEBUG)

        cj = None
        ClientCookie = None
        cookielib = None

        try:
            import cookielib
        except ImportError:
            pass
        else:
            import urllib2
            urlopen = urllib2.urlopen
            cj = cookielib.LWPCookieJar()
            Request = urllib2.Request

        if not cookielib:
            try:
                import ClientCookie
            except ImportError:
                import urllib2
                urlopen = urllib2.urlopen
                Request = urllib2.Request
            else:
                urlopen = ClientCookie.urlopen
                cj = ClientCookie.LWPCookieJar()
                Request = ClientCookie.Request
        if cj != None:
            if os.path.isfile(self['COOKIEFILE']):
                if debug:
                    print 'DEBUG: Loading cookiefile ' + self['COOKIEFILE']
                cj.load(self['COOKIEFILE'])
            if cookielib:
                opener = urllib2.build_opener(
                    urllib2.HTTPCookieProcessor(cj),
                    MultipartPostHandler.MultipartPostHandler)
                urllib2.install_opener(opener)
            else:
                opener = ClientCookie.build_opener(
                    ClientCookie.HTTPCookieProcessor(cj),
                    MultipartPostHandler.MultipartPostHandler)
                ClientCookie.install_opener(opener)
        self['Request'] = Request
        self['urlopen'] = urlopen
        self['cj'] = cj
Ejemplo n.º 23
0
	def __init__( self, url, debug=False ):
		UserDict.__init__( self )
		self['url'] = url

		self['COOKIEFILE'] = 'freemed-cookies.lwp'

		if debug:
			import logging
			logger = logging.getLogger("cookielib")
			logger.addHandler(logging.StreamHandler(sys.stdout))
			logger.setLevel(logging.DEBUG)
	
		cj = None
		ClientCookie = None
		cookielib = None
	
		try:
			import cookielib            
		except ImportError:
			pass
		else:
			import urllib2    
			urlopen = urllib2.urlopen
			cj = cookielib.LWPCookieJar()
			Request = urllib2.Request

		if not cookielib:
			try:                                            
				import ClientCookie 
			except ImportError:
				import urllib2
				urlopen = urllib2.urlopen
				Request = urllib2.Request
			else:
				urlopen = ClientCookie.urlopen
				cj = ClientCookie.LWPCookieJar()
				Request = ClientCookie.Request
		if cj != None:
			if os.path.isfile( self['COOKIEFILE'] ):
				if debug:
					print 'DEBUG: Loading cookiefile ' + self['COOKIEFILE']
				cj.load( self['COOKIEFILE'] )
			if cookielib:
				opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj), MultipartPostHandler.MultipartPostHandler)
				urllib2.install_opener(opener)
			else:
				opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj), MultipartPostHandler.MultipartPostHandler)
				ClientCookie.install_opener(opener)
		self['Request'] = Request
		self['urlopen'] = urlopen
		self['cj'] = cj
Ejemplo n.º 24
0
 def login(self):
     # Sets the client webbrowser
     cookieJar = ClientCookie.CookieJar()
     opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cookieJar))
     opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")]
     ClientCookie.install_opener(opener)
     # Opens the login page for yahoo.com
     fp = ClientCookie.urlopen("http://login.yahoo.com")
     forms = ClientForm.ParseResponse(fp)
     fp.close()
     form = forms[0]
     form["login"] = self.name
     form["passwd"] = self.pw
     fp = ClientCookie.urlopen(form.click())
     fp.close()
Ejemplo n.º 25
0
    def __init__(self):
        #global opener
        self.cookies = ClientCookie.CookieJar()
        self.hh = ClientCookie.HTTPHandler()
    
        # turn debug on
        self.hh.set_http_debuglevel(0)

        # build the opener
        self.mech = ClientCookie.build_opener(\
            HTTPCookieProcessor(self.cookies),self.hh)
        
        #use this to mimic browser IE6
        self.mech.addheaders =  [("User-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows 98)"),
                                 ("Accept-Charset", "iso-8859-1,*"),
                                 ("Accept-Language", "en-US"),
                                 ("Accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*")]
Ejemplo n.º 26
0
    def request(self, daterange):
        self.params["q"] = self.query + daterange
        url = self.URL % self.makeParams()
        request = urllib2.Request(
            url,
            headers=self.headers
        )
        proxy_support = urllib2.ProxyHandler({"http": "189.47.194.196:8118"})
        opener = ClientCookie.build_opener(proxy_support)
        ClientCookie.install_opener(opener)
        response = ClientCookie.urlopen(request)

        # proxy_support = urllib2.ProxyHandler({"http": "127.0.0.1:8118"})
        # opener = urllib2.build_opener(proxy_support)
        # urllib2.install_opener(opener)
        # response = urllib2.urlopen(request, timeout=50000)


        return response.read()
Ejemplo n.º 27
0
def _authenticate():
	"""Logs the user in to Facebook"""
	# Create special URL opener (for User-Agent) and cookieJar
	cookieJar = ClientCookie.CookieJar()

	opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cookieJar))
	opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")]
	ClientCookie.install_opener(opener)
	fp = ClientCookie.urlopen("https://www.facebook.com/")
	forms = ClientForm.ParseResponse(fp)
	fp.close()

	form = forms[0]

	# supply user id and pw
	form["email"]  = _usr_id
	form["pass"] = _pswrd

	fp = ClientCookie.urlopen(form.click())
	fp.close()
Ejemplo n.º 28
0
def _authenticate():
	"""Logs the user in to Facebook"""
	# Create special URL opener (for User-Agent) and cookieJar
	cookieJar = ClientCookie.CookieJar()

	opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cookieJar))
	opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")]
	ClientCookie.install_opener(opener)
	fp = ClientCookie.urlopen("https://graph.facebook.com/oauth/authorize?type=user_agent&client_id=163193033824504&redirect_uri=http://www.yahoo.com&scope=email,user_birthday,user_online_presence,read_stream,offline_access")
	forms = ClientForm.ParseResponse(fp)
	fp.close()

	form = forms[0]

	# supply user id and pw
	form["email"]  = _usr_id
	form["pass"] = _pswrd
	
	fp = ClientCookie.urlopen(form.click())
	#print "IN AUTHENTICATE:",fp.geturl()
	fp.close()
Ejemplo n.º 29
0
	def __init__(self):

		#regex to extract karma + comment karma.
		self.karma_re = re.compile('<b>(\d+)</b></li><li>comment karma: &#32;<b>(\d+)</b>')

		#Because the login is an ajax post before we need cookies.
		#That's what made this code annoying to write.
		#This code should work against either cookielib or ClientCookie depending on
		#which ever one you have.
		try:
			import cookielib

			#Were taking references to functions / objects here
			#so later on we don't need to worry about which actual
			#import we used.
			self.Request = urllib2.Request
			self.urlopen = urllib2.urlopen

			cookie_jar = cookielib.LWPCookieJar()
			opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie_jar))
			urllib2.install_opener(opener)

		except ImportError:
			try:
				import ClientCookie

				self.Request = ClientCookie.Request
				self.urlopen = ClientCookie.urlopen

				cookie_jar = ClientCookie.LWPCookieJar()
				opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cookie_jar))

			except ImportError:
				raise ImportError("""This code is dependent on either
						 \'cookielib\' or \'ClientCookie\'
						 #and you have neither.
						""")

		self.user = None
Ejemplo n.º 30
0
    def __init__(self, cookiePath):
        self.cookiePath = cookiePath
        self.cj = None
        ClientCookie = None
        cookielib = None

        try:                                    # Let's see if cookielib is available
            import cookielib
        except ImportError:
            pass
        else:
            import urllib2
            self.urlopen = urllib2.urlopen
            self.cj = cookielib.LWPCookieJar()       # This is a subclass of FileCookieJar that has useful load and save methods
            self.Request = urllib2.Request

        if not cookielib:                   # If importing cookielib fails let's try ClientCookie
            try:
                import ClientCookie
            except ImportError:
                import urllib2
                self.urlopen = urllib2.urlopen
                self.Request = urllib2.Request
            else:
                self.urlopen = ClientCookie.urlopen
                self.cj = ClientCookie.LWPCookieJar()
                self.Request = ClientCookie.Request

        if self.cj != None:                                  # now we have to install our CookieJar so that it is used as the default CookieProcessor in the default opener handler
            if os.path.isfile(cookiePath):
                self.cj.load(cookiePath)
            if cookielib:
                self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
                urllib2.install_opener(self.opener)
            else:
                self.opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(self.cj))
                ClientCookie.install_opener(self.opener)
Ejemplo n.º 31
0
def loader():
    """Try to import cookielib or ClientCookie"""

    cj = None
    ClientCookie = None
    cookielib = None
    
    try:  # see if cookielib is available
        import cookielib            
    except ImportError:
        pass
    else:
        import urllib2    
        urlopen = urllib2.urlopen
        cj = cookielib.LWPCookieJar()  
        Request = urllib2.Request
    
    if not cookielib:  # if importing cookielib fails, try ClientCookie
        try:                                            
            import ClientCookie 
        except ImportError:
            raise
        else:
            urlopen = ClientCookie.urlopen
            cj = ClientCookie.LWPCookieJar()
            Request = ClientCookie.Request

    # install CookieJar so that it is used as the default CookieProcessor in the default opener handler
    if cj != None:
        if cookielib:
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)
        else:
            opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)
    return Request, urlopen
Ejemplo n.º 32
0
    def __init__(self):

        #Because the login is an ajax post before we need cookies.
        #That's what made this code annoying to write.
        #This code should work against either cookielib or ClientCookie depending on
        #which ever one you have.
        try:
            import cookielib

            #Were taking references to functions / objects here
            #so later on we don't need to worry about which actual
            #import we used.
            self.Request = urllib2.Request
            self.urlopen = urllib2.urlopen

            cookie_jar = cookielib.LWPCookieJar()
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie_jar))
            urllib2.install_opener(opener)

        except ImportError:
            try:
                import ClientCookie

                self.Request = ClientCookie.Request
                self.urlopen = ClientCookie.urlopen

                cookie_jar = ClientCookie.LWPCookieJar()
                opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cookie_jar))

            except ImportError:
                raise ImportError("""This code is dependent on either
                         \'cookielib\' or \'ClientCookie\'
                         #and you have neither.
                        """)

        self.user = None
Ejemplo n.º 33
0
def downloadpageGzip(url):

    #  Inicializa la librería de las cookies
    ficherocookies = os.path.join(config.get_data_path(), 'cookies.dat')
    logger.info("Cookiefile=" + ficherocookies)
    inicio = time.clock()

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        import cookielib
    except ImportError:
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            import ClientCookie
        except ImportError:
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.MozillaCookieJar()

    else:
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.MozillaCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules

        if os.path.isfile(ficherocookies):
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            try:
                cj.load(ficherocookies)
            except:
                logger.info(
                    "[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra"
                )
                os.remove(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    #print "-------------------------------------------------------"
    theurl = url
    # an example url that sets a cookie,
    # try different urls here and see the cookie collection you can make !

    #txheaders =  {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3',
    #              'Referer':'http://www.megavideo.com/?s=signup'}

    parsedurl = urlparse.urlparse(url)
    logger.info("parsedurl=" + str(parsedurl))

    txheaders = {
        'User-Agent':
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3',
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'es-es,es;q=0.8,en-us;q=0.5,en;q=0.3',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
        'Accept-Encoding': 'gzip,deflate',
        'Keep-Alive': '300',
        'Connection': 'keep-alive',
        'Referer': parsedurl[0] + "://" + parsedurl[1]
    }
    logger.info(str(txheaders))

    # fake a user agent, some websites (like google) don't like automated exploration

    req = Request(theurl, None, txheaders)
    handle = urlopen(req)
    cj.save(ficherocookies)  # save the cookies again

    data = handle.read()
    handle.close()

    fin = time.clock()
    logger.info("[scrapertools.py] Descargado 'Gzipped data' en %d segundos " %
                (fin - inicio + 1))

    # Descomprime el archivo de datos Gzip
    try:
        fin = inicio
        import StringIO
        compressedstream = StringIO.StringIO(data)
        import gzip
        gzipper = gzip.GzipFile(fileobj=compressedstream)
        data1 = gzipper.read()
        gzipper.close()
        fin = time.clock()
        logger.info(
            "[scrapertools.py] 'Gzipped data' descomprimido en %d segundos " %
            (fin - inicio + 1))
        return data1
    except:
        return data
Ejemplo n.º 34
0
    def __init__(self,
                 cookiesUrl,
                 userAgent='Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'):

        self.cookiesUrl = cookiesUrl
        self.userAgent = {'User-agent': userAgent}
        # the path and filename to save your cookies in

        self.cj = None
        ClientCookie = None
        cookielib = None
        self.httpsForm = None

        # Let's see if cookielib is available
        try:
            import cookielib
        except ImportError:
            # If importing cookielib fails
            # let's try ClientCookie
            try:
                import ClientCookie
            except ImportError:
                # ClientCookie isn't available either
                self.urlopen = urllib2.urlopen
                self.Request = urllib2.Request
            else:
                # imported ClientCookie
                self.urlopen = ClientCookie.urlopen
                self.Request = ClientCookie.Request
                self.cj = ClientCookie.LWPCookieJar()

        else:
            # importing cookielib worked
            self.urlopen = urllib2.urlopen
            self.Request = urllib2.Request
            self.cj = cookielib.LWPCookieJar()
            # This is a subclass of FileCookieJar
            # that has useful load and save methods

        if self.cj is not None:
            # we successfully imported
            # one of the two cookie handling modules

            if os.path.isfile(self.cookiesUrl):
                # if we have a cookie file already saved
                # then load the cookies into the Cookie Jar
                self.cj.load(self.cookiesUrl)

            # Now we need to get our Cookie Jar
            # installed in the opener;
            # for fetching URLs
            if cookielib is not None:
                # if we use cookielib
                # then we get the HTTPCookieProcessor
                # and install the opener in urllib2
                opener = urllib2.build_opener(
                    urllib2.HTTPCookieProcessor(self.cj))
                urllib2.install_opener(opener)

            else:
                # if we use ClientCookie
                # then we get the HTTPCookieProcessor
                # and install the opener in ClientCookie
                opener = ClientCookie.build_opener(
                    ClientCookie.HTTPCookieProcessor(self.cj))
                ClientCookie.install_opener(opener)
def geturl(urlvideo):
	xbmc.output("[divxlink.py] url="+urlvideo)
	# ---------------------------------------
	#  Inicializa la libreria de las cookies
	# ---------------------------------------
	ficherocookies = COOKIEFILE
	try:
		os.remove(ficherocookies)
	except:
		pass
	# the path and filename to save your cookies in

	cj = None
	ClientCookie = None
	cookielib = None

	# Let's see if cookielib is available
	try:
		import cookielib
	except ImportError:
		# If importing cookielib fails
		# let's try ClientCookie
		try:
			import ClientCookie
		except ImportError:
			# ClientCookie isn't available either
			urlopen = urllib2.urlopen
			Request = urllib2.Request
		else:
			# imported ClientCookie
			urlopen = ClientCookie.urlopen
			Request = ClientCookie.Request
			cj = ClientCookie.LWPCookieJar()

	else:
		# importing cookielib worked
		urlopen = urllib2.urlopen
		Request = urllib2.Request
		cj = cookielib.LWPCookieJar()
		# This is a subclass of FileCookieJar
		# that has useful load and save methods

	# ---------------------------------
	# Instala las cookies
	# ---------------------------------

	if cj is not None:
	# we successfully imported
	# one of the two cookie handling modules

		if os.path.isfile(ficherocookies):
			# if we have a cookie file already saved
			# then load the cookies into the Cookie Jar
			cj.load(ficherocookies)

		# Now we need to get our Cookie Jar
		# installed in the opener;
		# for fetching URLs
		if cookielib is not None:
			# if we use cookielib
			# then we get the HTTPCookieProcessor
			# and install the opener in urllib2
			opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
			urllib2.install_opener(opener)

		else:
			# if we use ClientCookie
			# then we get the HTTPCookieProcessor
			# and install the opener in ClientCookie
			opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
			ClientCookie.install_opener(opener)

	#print "-------------------------------------------------------"
	url=urlvideo
	#print url
	#print "-------------------------------------------------------"
	theurl = url
	# an example url that sets a cookie,
	# try different urls here and see the cookie collection you can make !

	txdata = None
	# if we were making a POST type request,
	# we could encode a dictionary of values here,
	# using urllib.urlencode(somedict)

	txheaders =  {'User-Agent':'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)'}
	# fake a user agent, some websites (like google) don't like automated exploration

	req = Request(theurl, txdata, txheaders)
	handle = urlopen(req)
	cj.save(ficherocookies)                     # save the cookies again    

	data=handle.read()
	handle.close()
	#print data

	# Lo pide una segunda vez, como si hubieras hecho click en el banner
	patron = 'http\:\/\/www\.divxlink\.com/([^\/]+)/(.*?)\.html'
	matches = re.compile(patron,re.DOTALL).findall(url)
	xbmc.output("[divxlink.py] fragmentos de la URL")
	scrapertools.printMatches(matches)
	
	codigo = ""
	nombre = ""
	if len(matches)>0:
		codigo = matches[0][0]
		nombre = matches[0][1]

	patron = '<input type="hidden" name="rand" value="([^"]+)">'
	matches = re.compile(patron,re.DOTALL).findall(data)
	#scrapertools.printMatches(matches)
	randomstring=""
	if len(matches)>0:
		randomstring=matches[0]
	xbmc.output("[divxlink.py] randomstring="+randomstring)

	txdata = "op=download2&id="+codigo+"&rand="+randomstring+"&referer=&method_free=&method_premium=&down_direct=1"
	xbmc.output(txdata)
	req = Request(theurl, txdata, txheaders)
	handle = urlopen(req)
	cj.save(ficherocookies)                     # save the cookies again    

	data=handle.read()
	handle.close()
	#print data
	patron = '<div id="embedcontmvshre"[^>]+>(.*?)</div>'
	matches = re.compile(patron,re.DOTALL).findall(data)
	#scrapertools.printMatches(matches)
	data = ""
	if len(matches)>0:
		data = matches[0]
		xbmc.output("[divxlink.py] bloque packed="+data)
	else:
		return ""
	
	# Lo descifra
	descifrado = unpackerjs.unpackjs(data)
	
	xbmc.output("descifrado="+descifrado)
	# Extrae la URL
	patron = '<param name="src"value="([^"]+)"/>'
	matches = re.compile(patron,re.DOTALL).findall(descifrado)
	scrapertools.printMatches(matches)
	
	url = ""
	
	if len(matches)>0:
		url = matches[0]

	xbmc.output("[divxlink.py] url="+url)
	return url
Ejemplo n.º 36
0
def read_body_and_headers(url, post=None, headers=[], follow_redirects=False, timeout=None):
    _log("read_body_and_headers "+url)

    if post is not None:
        _log("read_body_and_headers post="+post)

    if len(headers)==0:
        headers.append(["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:18.0) Gecko/20100101 Firefox/18.0"])

    # Start cookie lib
    ficherocookies = os.path.join( get_data_path(), 'cookies.dat' )
    _log("read_body_and_headers cookies_file="+ficherocookies)

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        _log("read_body_and_headers importing cookielib")
        import cookielib
    except ImportError:
        _log("read_body_and_headers cookielib no disponible")
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            _log("read_body_and_headers importing ClientCookie")
            import ClientCookie
        except ImportError:
            _log("read_body_and_headers ClientCookie not available")
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            _log("read_body_and_headers ClientCookie available")
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.MozillaCookieJar()

    else:
        _log("read_body_and_headers cookielib available")
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.MozillaCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    if cj is not None:
    # we successfully imported
    # one of the two cookie handling modules
        _log("read_body_and_headers Cookies enabled")

        if os.path.isfile(ficherocookies):
            _log("read_body_and_headers Reading cookie file")
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            try:
                cj.load(ficherocookies)
            except:
                _log("read_body_and_headers Wrong cookie file, deleting...")
                os.remove(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            _log("read_body_and_headers opener using urllib2 (cookielib)")
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            if not follow_redirects:
                opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=http_debug_log_enabled),urllib2.HTTPCookieProcessor(cj),NoRedirectHandler())
            else:
                opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=http_debug_log_enabled),urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            _log("read_body_and_headers opener using ClientCookie")
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    # -------------------------------------------------
    # Cookies instaladas, lanza la petición
    # -------------------------------------------------

    # Contador
    inicio = time.clock()

    # Diccionario para las cabeceras
    txheaders = {}

    # Construye el request
    if post is None:
        _log("read_body_and_headers GET request")
    else:
        _log("read_body_and_headers POST request")
    
    # Añade las cabeceras
    _log("read_body_and_headers ---------------------------")
    for header in headers:
        _log("read_body_and_headers header %s=%s" % (str(header[0]),str(header[1])) )
        txheaders[header[0]]=header[1]
    _log("read_body_and_headers ---------------------------")

    req = Request(url, post, txheaders)
    if timeout is None:
        handle=urlopen(req)
    else:        
        #Disponible en python 2.6 en adelante --> handle = urlopen(req, timeout=timeout)
        #Para todas las versiones:
        try:
            import socket
            deftimeout = socket.getdefaulttimeout()
            socket.setdefaulttimeout(timeout)
            handle=urlopen(req)            
            socket.setdefaulttimeout(deftimeout)
        except:
            import sys
            for line in sys.exc_info():
                _log( "%s" % line )
    
    # Actualiza el almacén de cookies
    cj.save(ficherocookies)

    # Lee los datos y cierra
    if handle.info().get('Content-Encoding') == 'gzip':
        buf = StringIO( handle.read())
        f = gzip.GzipFile(fileobj=buf)
        data = f.read()
    else:
        data=handle.read()

    info = handle.info()
    _log("read_body_and_headers Response")

    returnheaders=[]
    _log("read_body_and_headers ---------------------------")
    for header in info:
        _log("read_body_and_headers "+header+"="+info[header])
        returnheaders.append([header,info[header]])
    handle.close()
    _log("read_body_and_headers ---------------------------")

    '''
    # Lanza la petición
    try:
        response = urllib2.urlopen(req)
    # Si falla la repite sustituyendo caracteres especiales
    except:
        req = urllib2.Request(url.replace(" ","%20"))
    
        # Añade las cabeceras
        for header in headers:
            req.add_header(header[0],header[1])

        response = urllib2.urlopen(req)
    '''
    
    # Tiempo transcurrido
    fin = time.clock()
    _log("read_body_and_headers Downloaded in %d seconds " % (fin-inicio+1))

    return data,returnheaders
Ejemplo n.º 37
0
def cachePagePostCookies(url, data):
    xbmc.output("[scrapertools.py] cachePagePostCookies - " + url)
    xbmc.output("[scrapertools.py] cachePagePostCookies - data=" + data)
    inicio = time.clock()
    #  Inicializa la librería de las cookies
    ficherocookies = os.path.join(os.getcwd(), 'cookies.lwp')
    xbmc.output("[scrapertools.py] cachePagePostCookies - Cookiefile=" +
                ficherocookies)

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        import cookielib
    except ImportError:
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            import ClientCookie
        except ImportError:
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.LWPCookieJar()

    else:
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.LWPCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules

        if os.path.isfile(ficherocookies):
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            cj.load(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    #print "-------------------------------------------------------"
    theurl = url
    # an example url that sets a cookie,
    # try different urls here and see the cookie collection you can make !

    #txheaders =  {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3',
    #			  'Referer':'http://www.megavideo.com/?s=signup'}
    txheaders = {
        'User-Agent':
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'
    }

    # fake a user agent, some websites (like google) don't like automated exploration

    req = Request(theurl, data, txheaders)
    handle = urlopen(req)
    cj.save(ficherocookies)  # save the cookies again

    data = handle.read()
    handle.close()
    fin = time.clock()
    xbmc.output("[scrapertools.py] Descargado en %d segundos " %
                (fin - inicio + 1))

    return data
Ejemplo n.º 38
0
def geturl(urlvideo):
    videoid = urlvideo

    # ---------------------------------------
    #  Inicializa la libreria de las cookies
    # ---------------------------------------
    ficherocookies = COOKIEFILE
    try:
        os.remove(ficherocookies)
    except:
        pass
    #xbmc.output("ficherocookies %s", ficherocookies)
    # the path and filename to save your cookies in

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        import cookielib
    except ImportError:
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            import ClientCookie
        except ImportError:
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.LWPCookieJar()

    else:
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.LWPCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules

        if os.path.isfile(ficherocookies):
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            cj.load(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    #print "-------------------------------------------------------"
    url = "http://www.vimeo.com/moogaloop/load/clip:%s/local/" % videoid
    #print url
    #print "-------------------------------------------------------"

    theurl = url
    # an example url that sets a cookie,
    # try different urls here and see the cookie collection you can make !

    txdata = None
    # if we were making a POST type request,
    # we could encode a dictionary of values here,
    # using urllib.urlencode(somedict)

    txheaders = {
        'User-Agent':
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3',
        'Referer': 'http://vimeo/%s' % urlvideo
    }
    # fake a user agent, some websites (like google) don't like automated exploration

    req = Request(theurl, txdata, txheaders)
    handle = urlopen(req)
    #cj.save(ficherocookies)                     # save the cookies again

    data = handle.read()
    handle.close()
    print data

    #parseamos el xml en busca del codigo de signatura
    dom = parseString(data)
    xml = dom.getElementsByTagName("xml")

    for node in xml:
        try:
            request_signature = getNodeValue(
                node, "request_signature", "Unknown Uploader").encode("utf-8")
            request_signature_expires = getNodeValue(
                node, "request_signature_expires",
                "Unknown Uploader").encode("utf-8")
        except:
            logger.info("Error : Video borrado")
            return ""
    try:
        quality = ((config.getSetting("quality_flv") == "1" and "hd") or "sd")
    except:
        quality = "sd"
    video_url = "http://www.vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=%s" % (
        videoid, request_signature, request_signature_expires, quality)
    print video_url
    # Timeout del socket a 60 segundos
    socket.setdefaulttimeout(10)

    h = urllib2.HTTPHandler(debuglevel=0)
    request = urllib2.Request(video_url)

    opener = urllib2.build_opener(h)
    urllib2.install_opener(opener)
    try:
        connexion = opener.open(request)
        video_url = connexion.geturl()
    except urllib2.HTTPError, e:
        xbmc.output("[vimeo.py]  error %d (%s) al abrir la url %s" %
                    (e.code, e.msg, video_url))

        print e.read()
    def get_email_contents_data(self, auth):
        if True == self.VERBOSE:
            print
            print
            self.VERBOSE_PREFIX + 'from "%s": %s' \
                                  % (__name__, "[INFO] START")

        cj = None
        ClientCookie = None
        cookielib = None

        try:  # Let's see if cookielib is available
            import cookielib
        except ImportError:
            pass
        else:
            import urllib2

            urlopen = urllib2.urlopen
            cj = cookielib.LWPCookieJar()  # This is a subclass of FileCookieJar that has useful load and save methods
            Request = urllib2.Request

        if not cookielib:  # If importing cookielib fails let's try ClientCookie
            try:
                import ClientCookie
            except ImportError:
                import urllib2

                urlopen = urllib2.urlopen
                Request = urllib2.Request
            else:
                urlopen = ClientCookie.urlopen
                cj = ClientCookie.LWPCookieJar()
                Request = ClientCookie.Request

        ####################################################
        # We've now imported the relevant library - whichever library is being used urlopen is bound to the right function for retrieving URLs
        # Request is bound to the right function for creating Request objects
        # Let's load the cookies, if they exist.

        if cj != None:  # now we have to install our CookieJar so that it is used as the default CookieProcessor in the default opener handler
            if os.path.isfile(COOKIEFILE):
                cj.load(COOKIEFILE)
            if cookielib:
                opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
                urllib2.install_opener(opener)
            else:
                opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
                ClientCookie.install_opener(opener)

        # If one of the cookie libraries is available, any call to urlopen will handle cookies using the CookieJar instance we've created
        # (Note that if we are using ClientCookie we haven't explicitly imported urllib2)
        # as an example :

        auth_url_params = "owa/auth/owaauth.dll"

        if auth['owa_url'][0] == "/":
            theurl = auth['owa_url'] + auth_url_params
        else:
            theurl = auth['owa_url'] + "/" + auth_url_params

        theurl = theurl + "?url=" + auth['owa_url'] + "&reason=0"

        if True == self.VERBOSE:
            print
            print
            self.VERBOSE_PREFIX + '"%s" : %s' % (__name__, \
                                                 "[INFO] The owa_url: " + theurl)

        txdata = None  # if we were making a POST type request, we could encode a dictionary of values here - using urllib.urlencode

        txheaders = {'User-agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1;\
                     en-US; rv:1.8.1) Gecko/20061010 Firefox/2.0.011',
                     'Referer': auth['owa_url']}

        params = {
            'destination': auth['owa_url'],
            'flags': '0',
            'forcedownlevel': '0',
            'trusted': '0',
            'username': auth['login'],
            'password': auth['pass'],
            'isUtf8': '1'
        }

        # some Java script functions linked with submit clkLgn() login button
        #function gbid(s){return document.getElementById(s);}
        #function clkLgn(){if(gbid("rdoPrvt").checked){var oD=new Date();oD.setTime(oD.getTime()+2*7*24*60*60*1000);var sA="acc="+(gbid("chkBsc").checked?1:0);var sL="lgn="+gbid("username").value;document.cookie="logondata="+sA+"&"+sL+"; expires="+oD.toUTCString();}}

        txdata = urllib.urlencode(params)

        try:
            req = Request(theurl, txdata, txheaders)  # create a request object
            handle = urlopen(req)  # and open it to return a handle on the url
        except IOError as e:
            if self.VERBOSE:
                print
                self.VERBOSE_PREFIX + 'from "%s": %s' \
                                      % (__name__, '[ERROR] We failed to open "%s".' % theurl)
                if hasattr(e, 'code'):
                    print
                    self.VERBOSE_PREFIX + 'from "%s": %s' \
                                          % (__name__, '[ERROR] We failed with error code - %s.' % e.code)
            pass
        else:
            if True == self.VERBOSE:
                print
                self.VERBOSE_PREFIX + 'from "%s": %s' \
                                      % (__name__, '[INFO] Here are the headers of the page :')
                print
                self.VERBOSE_PREFIX + 'from "%s": %s' \
                                      % (__name__, handle.info())
                print
                self.VERBOSE_PREFIX + 'from "%s"' % __name__
        # handle.read() returns the page, handle.geturl() returns the true url of the page fetched (in case urlopen has followed any redirects, which it sometimes does)


        if cj == None:
            if True == self.VERBOSE:
                print
                self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, \
                                                         "[ERROR] We don't have a cookie library available - sorry.")
                print
                self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, \
                                                         "[ERROR] I can't show you any cookies.")
            pass
        else:
            if True == self.VERBOSE:
                print
                self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, \
                                                         'These are the cookies we have received so far :')
                for index, cookie in enumerate(cj):
                    print
                    self.VERBOSE_PREFIX + 'from "%s": %s - %s' \
                                          % (__name__, index, cookie)
            cj.save(COOKIEFILE)  # save the cookies again

        if True == self.VERBOSE:
            if handle is not None:
                print
                self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, \
                                                         "[INFO] Handle is not None.")
            else:
                print
                self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, \
                                                         "[ERROR] Handle is None. Maybe connection shutdowned ...")

        try:
            page_contents = handle.read()
        except:
            if True == self.VERBOSE:
                print
                self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, \
                                                         "[ERROR] Problem with reading from handle")
            pass
        else:
            if True == self.VERBOSE:
                print
                self.VERBOSE_PREFIX + 'from "%s": %s' \
                                      % (__name__, \
                                         "[INFO] Readed contents size: '" + str(len(page_contents)) + "'")

                # uncommnt for save content view
                print
                self.VERBOSE_PREFIX + \
                " and write to temporary file '" + OWA_CHECKER_OUTPUT + "'"
                fh = open(OWA_CHECKER_OUTPUT, "w")
                fh.write(page_contents)
                fh.close()

            # -------------------------------------------
        """
        theurl2 = auth['owa_url']

        txdata2 = None

        txheaders2 = {'User-agent' : 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1) Gecko/20061010 Firefox/2.0.011'}

        params2 = None

        # some Java script functions linked with submit clkLgn() login button
        #function gbid(s){return document.getElementById(s);}
        #function clkLgn(){if(gbid("rdoPrvt").checked){var oD=new Date();oD.setTime(oD.getTime()+2*7*24*60*60*1000);var sA="acc="+(gbid("chkBsc").checked?1:0);var sL="lgn="+gbid("username").value;document.cookie="logondata="+sA+"&"+sL+"; expires="+oD.toUTCString();}}

        txdata2 = urllib.urlencode(params2)

        try:
            req2 = Request(theurl2, txdata2, txheaders2)            # create a request object
            handle2 = urlopen(req2)                               # and open it to return a handle on the url
        except IOError, e:
            if True == self.VERBOSE:
                print self.VERBOSE_PREFIX + 'from "%s": %s'\
                    % (__name__, '[ERROR] We failed to open "%s".' % theurl)
                if hasattr(e, 'code'):
                    print self.VERBOSE_PREFIX + 'from "%s": %s'\
                    % (__name__, '[ERROR] We failed with error code - %s.' % e.code)
            pass
        else:
            if True == self.VERBOSE:
                print self.VERBOSE_PREFIX + 'from "%s": %s'\
                 % (__name__, '[INFO] Here are the headers of the page :')
                print self.VERBOSE_PREFIX + 'from "%s": %s'\
                 % (__name__, handle.info())
                print self.VERBOSE_PREFIX + 'from "%s"' % __name__

        try:
            page_contents2 = handle2.read()
        except:
            if True == self.VERBOSE:
                print self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__,\
                   "[ERROR] Problem with reading from handle")
            pass
        else:
            if True == self.VERBOSE:
                print self.VERBOSE_PREFIX + 'from "%s": %s'\
                % (__name__,\
                "[INFO] Readed contents size: '" + str(len(page_contents2))+ "'")

                # uncommnt for save content view
                print self.VERBOSE_PREFIX + \
                " and write to temporary file '" + OWA_CHECKER_OUTPUT+".GET"+"'"
                fh = open(OWA_CHECKER_OUTPUT + ".GET", "w")
                fh.write(page_contents2)
                fh.close()

        soup = BeautifulSoup(page_contents2)
        """

        #TODO: problem with GET contents with COOKIES
        #        url = opener.open(auth['owa_url'])
        #        page_contents2 = url.read(200000)
        #         print page_contents2

        soup = BeautifulSoup(page_contents)
        #p = soup.findAll('html', '')
        #print self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, soup.html.body.table
        #tds = soup.findAll("table", 'lvw')
        tds = soup.findAll("h1", 'bld')
        count = len(tds)
        #re.compile("", '')
        #print self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, "tds len = " + str(count)
        #print self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, tds[0]

        unreaded_mail_messages_subjects = []
        if 0 < count:
            if True == self.VERBOSE:
                print
                self.VERBOSE_PREFIX + 'from "%s": %s' \
                                      % (__name__, "Found " + str(count) + " subject(s) :")

            for id in xrange(len(tds)):
                #print self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, " %d -> %s" % (id, tds[id])
                re_subject_h1 = re.compile('<h1 class="bld"><a href="#" onclick=".*">(.*)</a></h1>')
                subject = re_subject_h1.search(str(tds[id])).groups()[0]
                unreaded_mail_messages_subjects.append(str(subject).strip())
                #TODO
                #                 show_notification("[EMAIL]", subject)
                if True == self.VERBOSE:
                    print
                    self.VERBOSE_PREFIX + 'from "%s": %s' \
                                          % (__name__, "[EMAIL] %d -> %s" % (id, subject))
        else:
            unreaded_mail_messages_subjects = []
            #TODO
            #                show_notification("[EMAIL]", subject)
            if True == self.VERBOSE:
                subject = "[INFO] There was no EMAILs ..."
                print
                self.VERBOSE_PREFIX + 'from "%s": %s' % (__name__, subject)

        if True == self.VERBOSE:
            print
            self.VERBOSE_PREFIX + 'from "%s"' % __name__
            print
            self.VERBOSE_PREFIX + 'from "%s": %s' \
                                  % (__name__, "[INFO] STOP")

            print
            self.VERBOSE_PREFIX + \
            'from "%s": [INFO] unreaded subjects: %s' \
            % (__name__, str(unreaded_mail_messages_subjects))

        return unreaded_mail_messages_subjects
Ejemplo n.º 40
0
def downloadpage(url,post=None,headers=[['User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.0; es-ES; rv:1.9.0.14) Gecko/2009082707 Firefox/3.0.14']]):
    logger.info("[scrapertools.py] downloadpage")
    logger.info("[scrapertools.py] url="+url)
    
    if post is not None:
        logger.info("[scrapertools.py] post="+post)
    else:
        logger.info("[scrapertools.py] post=None")
    
    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    #  Inicializa la librería de las cookies
    ficherocookies = os.path.join( config.get_setting("cookies.dir"), 'cookies.lwp' )
    logger.info("[scrapertools.py] Cookiefile="+ficherocookies)

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        logger.info("[scrapertools.py] Importando cookielib")
        import cookielib
    except ImportError:
        logger.info("[scrapertools.py] cookielib no disponible")
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            logger.info("[scrapertools.py] Importando ClientCookie")
            import ClientCookie
        except ImportError:
            logger.info("[scrapertools.py] ClientCookie no disponible")
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            logger.info("[scrapertools.py] ClientCookie disponible")
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.LWPCookieJar()

    else:
        logger.info("[scrapertools.py] cookielib disponible")
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.LWPCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    if cj is not None:
    # we successfully imported
    # one of the two cookie handling modules
        logger.info("[scrapertools.py] Hay cookies")

        if os.path.isfile(ficherocookies):
            logger.info("[scrapertools.py] Leyendo fichero cookies")
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            cj.load(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            logger.info("[scrapertools.py] opener usando urllib2 (cookielib)")
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            logger.info("[scrapertools.py] opener usando ClientCookie")
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    # -------------------------------------------------
    # Cookies instaladas, lanza la petición
    # -------------------------------------------------

    # Contador
    inicio = time.clock()

    # Diccionario para las cabeceras
    txheaders = {}

    # Añade las cabeceras
    for header in headers:
        logger.info("[scrapertools.py] header="+header[0]+": "+header[1])
        txheaders[header[0]]=header[1]

    # Construye el request
    if post is None:
        logger.info("[scrapertools.py] petición GET")
    else:
        logger.info("[scrapertools.py] petición POST")
    
    req = Request(url, post, txheaders)
    handle = urlopen(req)
    
    # Actualiza el almacén de cookies
    cj.save(ficherocookies)

    # Lee los datos y cierra
    data=handle.read()
    handle.close()

    '''
    # Lanza la petición
    try:
        response = urllib2.urlopen(req)
    # Si falla la repite sustituyendo caracteres especiales
    except:
        req = urllib2.Request(url.replace(" ","%20"))
    
        # Añade las cabeceras
        for header in headers:
            req.add_header(header[0],header[1])

        response = urllib2.urlopen(req)
    '''
    
    # Tiempo transcurrido
    fin = time.clock()
    logger.info("[scrapertools.py] Descargado en %d segundos " % (fin-inicio+1))

    return data
Ejemplo n.º 41
0
            for header in request.header_items():
                self.httpout.write("%s: %s\n" % header[:])

            self.httpout.write('\n')

        return request

    def http_response(self, request, response):
        if __debug__:
            code, msg, hdrs = response.code, response.msg, response.info()
            self.httpout.write("HTTP/1.x %s %s\n" % (code, msg))
            self.httpout.write(str(hdrs))

        return response

    https_request = http_request
    https_response = http_response


# Example
cjar = ClientCookie.LWPCookieJar()
opener = ClientCookie.build_opener(
    ClientCookie.HTTPCookieProcessor(cjar),
    ClientCookie.HTTPRefererProcessor(),
    HTTPMyDebugProcessor(),
)
ClientCookie.install_opener(opener)
response = ClientCookie.urlopen("http://www.google.com")
#...
Ejemplo n.º 42
0
def getvideo(urlpagina):
    # ---------------------------------------
    #  Inicializa la libreria de las cookies
    # ---------------------------------------
    ficherocookies = COOKIEFILE
    try:
        os.remove(ficherocookies)
    except:
        pass
    xbmc.output("ficherocookies %s" % ficherocookies)
    # the path and filename to save your cookies in

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        import cookielib
    except ImportError:
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            import ClientCookie
        except ImportError:
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.LWPCookieJar()

    else:
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.LWPCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules

        if os.path.isfile(ficherocookies):
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            cj.load(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    #print "-------------------------------------------------------"
    url = urlpagina
    #print url
    #print "-------------------------------------------------------"
    theurl = url
    # an example url that sets a cookie,
    # try different urls here and see the cookie collection you can make !

    txdata = None
    # if we were making a POST type request,
    # we could encode a dictionary of values here,
    # using urllib.urlencode(somedict)

    txheaders = {
        'User-Agent':
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3',
        'Referer': 'http://www.movshare.net/'
    }
    # fake a user agent, some websites (like google) don't like automated exploration

    req = Request(theurl, txdata, txheaders)
    handle = urlopen(req)
    cj.save(ficherocookies)  # save the cookies again

    data = handle.read()
    handle.close()
    #print data

    # Lo pide una segunda vez, como si hubieras hecho click en el banner
    req = Request(theurl, txdata, txheaders)
    handle = urlopen(req)
    cj.save(ficherocookies)  # save the cookies again

    data = handle.read()
    handle.close()
    #print data

    patronvideos = '<embed type="video/divx" src="([^"]+)"'

    matches = re.compile(patronvideos, re.DOTALL).findall(data)
    if len(matches) == 0:
        patronvideos = '"file","([^"]+)"'
        matches = re.compile(patronvideos, re.DOTALL).findall(data)
    return matches[0]
Ejemplo n.º 43
0
def geturl(urlvideo):
    # ---------------------------------------
    #  Inicializa la libreria de las cookies
    # ---------------------------------------
    ficherocookies = COOKIEFILE
    try:
        os.remove(ficherocookies)
    except:
        pass
    #xbmc.output("ficherocookies %s", ficherocookies)
    # the path and filename to save your cookies in

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        import cookielib
    except ImportError:
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            import ClientCookie
        except ImportError:
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.LWPCookieJar()

    else:
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.LWPCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules

        if os.path.isfile(ficherocookies):
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            cj.load(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    #print "-------------------------------------------------------"
    url = "http://video.yahoo.com/watch/%s" % urlvideo
    #url = "http://new.music.yahoo.com/videos/"
    #print url
    #print "-------------------------------------------------------"
    theurl = url
    # an example url that sets a cookie,
    # try different urls here and see the cookie collection you can make !

    txdata = None
    # if we were making a POST type request,
    # we could encode a dictionary of values here,
    # using urllib.urlencode(somedict)

    txheaders = {
        'User-Agent':
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3',
        'Referer': 'http://video.yahoo.com/',
        'X-Forwarded-For': '12.13.14.15'
    }
    # fake a user agent, some websites (like google) don't like automated exploration

    req = Request(theurl, txdata, txheaders)
    handle = urlopen(req)
    cj.save(ficherocookies)  # save the cookies again

    data = handle.read()
    handle.close()
    #print data
    '''
	# Extract video height and width
	mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', data)
	if mobj is None:
		logger.info('ERROR: unable to extract video height')
		return ""
	yv_video_height = mobj.group(1)

	mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', data)
	if mobj is None:
		logger.info('ERROR: unable to extract video width')
		return ""
	yv_video_width = mobj.group(1)
	'''

    # Retrieve video playlist to extract media URL
    # I'm not completely sure what all these options are, but we
    # seem to need most of them, otherwise the server sends a 401.
    yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
    yv_bitrate = '700'  # according to Wikipedia this is hard-coded
    url = (
        'http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id='
        + urlvideo + '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' +
        yv_bitrate + '&vidH=720' + '&vidW=1280' +
        '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797'
    )
    #http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=v205690975&tech=flash&mode=playlist&lg=xRen3QvzZ_5wj1x8BbzEcR&bitrate=700&vidH=324&vidW=576&swf=as3&rd=video.yahoo.com-offsite&tk=null&adsupported=v1,v2,&eventid=1301797
    #url = 'http://video.music.yahoo.com/up/music_e/process/getPlaylistFOP.php?node_id='+ urlvideo  + '&tech=flash&bitrate=20000&mode=&vidH=720&vidW=1280'

    req = Request(url, txdata, txheaders)
    handle = urlopen(req)
    cj.save(ficherocookies)  # save the cookies again

    data2 = handle.read()
    handle.close()
    print data2

    # Extract media URL from playlist XML
    mobj = re.search(
        r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', data2)
    if mobj is not None:
        video_url = urllib.unquote(mobj.group(1) +
                                   mobj.group(2)).decode('utf-8')
        video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
        print video_url
        return video_url
    else:
        logger.info('ERROR: Unable to extract media URL http')
        mobj = re.search(r'<STREAM (APP="[^>]+)>', data2)
        if mobj is None:
            logger.info('ERROR: Unable to extract media URL rtmp')
            return ""
        #video_url = mobj.group(1).replace("&amp;","&")
        video_url = urllib.unquote(mobj.group(1).decode('utf-8'))
        video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
        '''
		<STREAM APP="rtmp://s1sflod020.bcst.cdn.s1s.yimg.com/StreamCache" 
		FULLPATH="/s1snfs06r01/001/__S__/lauvpf/76414327.flv?StreamID=76414327&xdata=Njc3Mzc4MzA2NGNiNzI5MW-205754530-0&pl_auth=2598a5574b592b7c6ab262e4775b3930&ht=180&b=eca0lm561k1gn4cb7291a&s=396502118&br=700&q=ahfG2he5gqV40Laz.RUcnB&rd=video.yahoo.com-offsite&so=%2FMUSIC" 
		CLIPID="v205690975" TYPE="STREAMING" AD="NO" 
		APPNAME="ContentMgmt" URLPREFIX="rtmp://" 
		SERVER="s1sflod020.bcst.cdn.s1s.yimg.com" 
		BITRATE="7000" PORT="" 
		PATH="/s1snfs06r01/001/__S__/lauvpf/76414327.flv" 
		QUERYSTRING="StreamID=76414327&xdata=Njc3Mzc4MzA2NGNiNzI5MW-205754530-0&pl_auth=2598a5574b592b7c6ab262e4775b3930&ht=180&b=eca0lm561k1gn4cb7291a&s=396502118&br=700&q=ahfG2he5gqV40Laz.RUcnB&rd=video.yahoo.com-offsite&so=%2FMUSIC" 
		URL="" TITLE="-" AUTHOR="-" COPYRIGHT="(c) Yahoo! Inc. 2006" STARTTIME="" ENDTIME=""/>
		'''
        swfUrl = 'http://d.yimg.com/ht/yep/vyc_player.swf'
        try:
            App = re.compile(r'APP="([^"]+)"').findall(video_url)[0]
            Fullpath = re.compile(r'FULLPATH="([^"]+)"').findall(video_url)[0]
            Appname = re.compile(r'APPNAME="([^"]+)"').findall(video_url)[0]
            #Server      = re.compile(r'SERVER="([^"]+)"').findall(video_url)[0]
            Path = re.compile(r'PORT=""  PATH="([^"]+)"').findall(
                video_url)[0].replace(".flv", "")
            #Querystring = re.compile(r'QUERYSTRING="([^"]+)"').findall(video_url)[0]
            playpath = Fullpath
            App = App.replace("/StreamCache", ":1935/StreamCache/")
            video_url = "%s%s%s playpath=%s swfurl=%s swfvfy=true" % (
                App, Appname, playpath, Path, swfUrl)
        except:
            logger.info('ERROR: re.compile failed')
            video_url = ""

    print video_url.encode("utf-8")
    return video_url
Ejemplo n.º 44
0
def downloadpagewithcookies(url):
    #  Inicializa la librería de las cookies
    ficherocookies = os.path.join(config.DATA_PATH, 'cookies.lwp')
    print "Cookiefile=" + ficherocookies

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        import cookielib
    except ImportError:
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            import ClientCookie
        except ImportError:
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.LWPCookieJar()

    else:
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.LWPCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules

        if os.path.isfile(ficherocookies):
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            cj.load(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    #print "-------------------------------------------------------"
    theurl = url
    # an example url that sets a cookie,
    # try different urls here and see the cookie collection you can make !

    #txheaders =  {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3',
    #              'Referer':'http://www.megavideo.com/?s=signup'}
    txheaders = {
        'User-Agent':
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3',
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Host': 'www.meristation.com',
        'Accept-Language': 'es-es,es;q=0.8,en-us;q=0.5,en;q=0.3',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
        'Keep-Alive': '300',
        'Connection': 'keep-alive'
    }

    # fake a user agent, some websites (like google) don't like automated exploration

    req = Request(theurl, None, txheaders)
    handle = urlopen(req)
    cj.save(ficherocookies)  # save the cookies again

    data = handle.read()
    handle.close()

    return data
Ejemplo n.º 45
0
def geturl(urlvideo):
    xbmc.output("[divxlink.py] url=" + urlvideo)
    # ---------------------------------------
    #  Inicializa la libreria de las cookies
    # ---------------------------------------
    ficherocookies = COOKIEFILE
    try:
        os.remove(ficherocookies)
    except:
        pass
    # the path and filename to save your cookies in

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        import cookielib
    except ImportError:
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            import ClientCookie
        except ImportError:
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.LWPCookieJar()

    else:
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.LWPCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules

        if os.path.isfile(ficherocookies):
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            cj.load(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    #print "-------------------------------------------------------"
    url = urlvideo
    #print url
    #print "-------------------------------------------------------"
    theurl = url
    # an example url that sets a cookie,
    # try different urls here and see the cookie collection you can make !

    txdata = None
    # if we were making a POST type request,
    # we could encode a dictionary of values here,
    # using urllib.urlencode(somedict)

    txheaders = {
        'User-Agent':
        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)'
    }
    # fake a user agent, some websites (like google) don't like automated exploration

    req = Request(theurl, txdata, txheaders)
    handle = urlopen(req)
    cj.save(ficherocookies)  # save the cookies again

    data = handle.read()
    handle.close()
    #print data

    # Lo pide una segunda vez, como si hubieras hecho click en el banner
    patron = 'http\:\/\/www\.divxlink\.com/([^\/]+)/(.*?)\.html'
    matches = re.compile(patron, re.DOTALL).findall(url)
    xbmc.output("[divxlink.py] fragmentos de la URL")
    scrapertools.printMatches(matches)

    codigo = ""
    nombre = ""
    if len(matches) > 0:
        codigo = matches[0][0]
        nombre = matches[0][1]

    patron = '<input type="hidden" name="rand" value="([^"]+)">'
    matches = re.compile(patron, re.DOTALL).findall(data)
    #scrapertools.printMatches(matches)
    randomstring = ""
    if len(matches) > 0:
        randomstring = matches[0]
    xbmc.output("[divxlink.py] randomstring=" + randomstring)

    txdata = "op=download2&id=" + codigo + "&rand=" + randomstring + "&referer=&method_free=&method_premium=&down_direct=1"
    xbmc.output(txdata)
    req = Request(theurl, txdata, txheaders)
    handle = urlopen(req)
    cj.save(ficherocookies)  # save the cookies again

    data = handle.read()
    handle.close()
    #print data
    patron = '<div id="embedcontmvshre"[^>]+>(.*?)</div>'
    matches = re.compile(patron, re.DOTALL).findall(data)
    #scrapertools.printMatches(matches)
    data = ""
    if len(matches) > 0:
        data = matches[0]
        xbmc.output("[divxlink.py] bloque packed=" + data)
    else:
        return ""

    # Lo descifra
    descifrado = unpackerjs.unpackjs(data)

    xbmc.output("descifrado=" + descifrado)
    # Extrae la URL
    patron = '<param name="src"value="([^"]+)"/>'
    matches = re.compile(patron, re.DOTALL).findall(descifrado)
    scrapertools.printMatches(matches)

    url = ""

    if len(matches) > 0:
        url = matches[0]

    xbmc.output("[divxlink.py] url=" + url)
    return url
Ejemplo n.º 46
0
def downloadpage(url,post=None,headers=[['User-Agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; es-ES; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12']],follow_redirects=True, timeout=socket.getdefaulttimeout()):
    logger.info("[scrapertools.py] downloadpage")
    logger.info("[scrapertools.py] url="+url)
    
    if post is not None:
        logger.info("[scrapertools.py] post="+post)
    else:
        logger.info("[scrapertools.py] post=None")
    
    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    #  Inicializa la librería de las cookies
    ficherocookies = os.path.join( config.get_setting("cookies.dir"), 'cookies.lwp' )
    logger.info("[scrapertools.py] ficherocookies="+ficherocookies)

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        logger.info("[scrapertools.py] Importando cookielib")
        import cookielib
    except ImportError:
        logger.info("[scrapertools.py] cookielib no disponible")
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            logger.info("[scrapertools.py] Importando ClientCookie")
            import ClientCookie
        except ImportError:
            logger.info("[scrapertools.py] ClientCookie no disponible")
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            logger.info("[scrapertools.py] ClientCookie disponible")
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.LWPCookieJar()

    else:
        logger.info("[scrapertools.py] cookielib disponible")
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.LWPCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    if cj is not None:
    # we successfully imported
    # one of the two cookie handling modules
        logger.info("[scrapertools.py] Hay cookies")

        if os.path.isfile(ficherocookies):
            logger.info("[scrapertools.py] Leyendo fichero cookies")
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            try:
                cj.load(ficherocookies)
            except:
                logger.info("[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra")
                os.remove(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            logger.info("[scrapertools.py] opener usando urllib2 (cookielib)")
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            if not follow_redirects:
                opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL),urllib2.HTTPCookieProcessor(cj),NoRedirectHandler())
            else:
                opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL),urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            logger.info("[scrapertools.py] opener usando ClientCookie")
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    # -------------------------------------------------
    # Cookies instaladas, lanza la petición
    # -------------------------------------------------

    # Contador
    inicio = time.clock()

    # Diccionario para las cabeceras
    txheaders = {}

    # Construye el request
    if post is None:
        logger.info("[scrapertools.py] petición GET")
    else:
        logger.info("[scrapertools.py] petición POST")
    
    # Añade las cabeceras
    logger.info("[scrapertools.py] ---------------------------")
    for header in headers:
        logger.info("[scrapertools.py] header %s=%s" % (str(header[0]),str(header[1])) )
        txheaders[header[0]]=header[1]
    logger.info("[scrapertools.py] ---------------------------")

    req = Request(url, post, txheaders)
    if timeout is None:
        handle=urlopen(req)
    else:        
        #Disponible en python 2.6 en adelante --> handle = urlopen(req, timeout=timeout)
        #Para todas las versiones:
        deftimeout = socket.getdefaulttimeout()
        try:
            socket.setdefaulttimeout(timeout)
            handle=urlopen(req)            
        except:
            import sys
            for line in sys.exc_info():
                logger.error( "%s" % line ) 
        
        socket.setdefaulttimeout(deftimeout)
    
    # Actualiza el almacén de cookies
    cj.save(ficherocookies)

    # Lee los datos y cierra
    data=handle.read()
    info = handle.info()
    logger.info("[scrapertools.py] Respuesta")
    logger.info("[scrapertools.py] ---------------------------")
    for header in info:
        logger.info("[scrapertools.py] "+header+"="+info[header])
    handle.close()
    logger.info("[scrapertools.py] ---------------------------")

    '''
    # Lanza la petición
    try:
        response = urllib2.urlopen(req)
    # Si falla la repite sustituyendo caracteres especiales
    except:
        req = urllib2.Request(url.replace(" ","%20"))
    
        # Añade las cabeceras
        for header in headers:
            req.add_header(header[0],header[1])

        response = urllib2.urlopen(req)
    '''
    
    # Tiempo transcurrido
    fin = time.clock()
    logger.info("[scrapertools.py] Descargado en %d segundos " % (fin-inicio+1))

    return data
Ejemplo n.º 47
0
def downloadpage(
    url,
    post=None,
    headers=[[
        'User-Agent',
        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; es-ES; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12'
    ]],
    follow_redirects=True,
    timeout=socket.getdefaulttimeout()):
    logger.info("[scrapertools.py] downloadpage")
    logger.info("[scrapertools.py] url=" + url)

    if post is not None:
        logger.info("[scrapertools.py] post=" + post)
    else:
        logger.info("[scrapertools.py] post=None")

    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    #  Inicializa la librería de las cookies
    ficherocookies = os.path.join(config.get_setting("cookies.dir"),
                                  'cookies.dat')
    logger.info("[scrapertools.py] ficherocookies=" + ficherocookies)

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        logger.info("[scrapertools.py] Importando cookielib")
        import cookielib
    except ImportError:
        logger.info("[scrapertools.py] cookielib no disponible")
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            logger.info("[scrapertools.py] Importando ClientCookie")
            import ClientCookie
        except ImportError:
            logger.info("[scrapertools.py] ClientCookie no disponible")
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            logger.info("[scrapertools.py] ClientCookie disponible")
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.MozillaCookieJar()

    else:
        logger.info("[scrapertools.py] cookielib disponible")
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.MozillaCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules
        logger.info("[scrapertools.py] Hay cookies")

        if os.path.isfile(ficherocookies):
            logger.info("[scrapertools.py] Leyendo fichero cookies")
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            try:
                cj.load(ficherocookies)
            except:
                logger.info(
                    "[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra"
                )
                os.remove(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            logger.info("[scrapertools.py] opener usando urllib2 (cookielib)")
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            if not follow_redirects:
                opener = urllib2.build_opener(
                    urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL),
                    urllib2.HTTPCookieProcessor(cj), NoRedirectHandler())
            else:
                opener = urllib2.build_opener(
                    urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL),
                    urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            logger.info("[scrapertools.py] opener usando ClientCookie")
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    # -------------------------------------------------
    # Cookies instaladas, lanza la petición
    # -------------------------------------------------

    # Contador
    inicio = time.clock()

    # Diccionario para las cabeceras
    txheaders = {}

    # Construye el request
    if post is None:
        logger.info("[scrapertools.py] petición GET")
    else:
        logger.info("[scrapertools.py] petición POST")

    # Añade las cabeceras
    logger.info("[scrapertools.py] ---------------------------")
    for header in headers:
        logger.info("[scrapertools.py] header %s=%s" %
                    (str(header[0]), str(header[1])))
        txheaders[header[0]] = header[1]
    logger.info("[scrapertools.py] ---------------------------")

    req = Request(url, post, txheaders)

    try:

        if timeout is None:
            handle = urlopen(req)
        else:
            #Para todas las versiones:
            deftimeout = socket.getdefaulttimeout()
            socket.setdefaulttimeout(timeout)
            handle = urlopen(req)
            socket.setdefaulttimeout(deftimeout)

        # Actualiza el almacén de cookies
        #Exception
        #cj.save(ficherocookies)

        # Lee los datos y cierra
        if handle.info().get('Content-Encoding') == 'gzip':
            logger.info("[scrapertools.py] gzipped")
            import StringIO
            data = handle.read()
            compressedstream = StringIO.StringIO(data)
            import gzip
            gzipper = gzip.GzipFile(fileobj=compressedstream)
            data = gzipper.read()
            gzipper.close()
        else:
            logger.info("[scrapertools.py] normal")
            data = handle.read()
    except urllib2.HTTPError, e:
        logger.info("error " + repr(e))
        import traceback
        traceback.print_exc()
        data = e.read()
        #logger.info("data="+repr(data))
        return data
Ejemplo n.º 48
0
def geturl(urlvideo):
    logger.info("[metadivx.py] url="+urlvideo)
    # ---------------------------------------
    #  Inicializa la libreria de las cookies
    # ---------------------------------------
    ficherocookies = COOKIEFILE
    try:
        os.remove(ficherocookies)
    except:
        pass
    # the path and filename to save your cookies in

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        import cookielib
    except ImportError:
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            import ClientCookie
        except ImportError:
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.LWPCookieJar()

    else:
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.LWPCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    if cj is not None:
    # we successfully imported
    # one of the two cookie handling modules

        if os.path.isfile(ficherocookies):
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            cj.load(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    #print "-------------------------------------------------------"
    url=urlvideo
    #print url
    #print "-------------------------------------------------------"
    theurl = url
    # an example url that sets a cookie,
    # try different urls here and see the cookie collection you can make !

    txdata = None
    # if we were making a POST type request,
    # we could encode a dictionary of values here,
    # using urllib.urlencode(somedict)

    txheaders =  {'User-Agent':'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)'}
    # fake a user agent, some websites (like google) don't like automated exploration

    req = Request(theurl, txdata, txheaders)
    handle = urlopen(req)
    cj.save(ficherocookies)                     # save the cookies again    

    data=handle.read()
    handle.close()
    #print data

    # Lo pide una segunda vez, como si hubieras hecho click en el banner
    patron = 'http\:\/\/www\.metadivx\.com/([^\/]+)/(.*?)\.html'
    matches = re.compile(patron,re.DOTALL).findall(url)
    logger.info("[metadivx.py] fragmentos de la URL")
    scrapertools.printMatches(matches)
    
    codigo = ""
    nombre = ""
    if len(matches)>0:
        codigo = matches[0][0]
        nombre = matches[0][1]

    txdata = "op=download1&usr_login=&id="+codigo+"&fname="+nombre+"&referer=&method_free=Continue"
    logger.info(txdata)
    req = Request(theurl, txdata, txheaders)
    handle = urlopen(req)
    cj.save(ficherocookies)                     # save the cookies again    

    data=handle.read()
    handle.close()
    #print data
    
    patron = '<div id="embedcontmvshre[^>]+>(.*?)</div>'
    matches = re.compile(patron,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)
    logger.info("[metadivx.py] bloque packed")
    if len(matches)>0:
        logger.info(matches[0])
    '''
    <center>
    <script type='text/javascript'>eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\'<7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"><2 1="j"0="i"><2 1="v"0="u"><2 1="b"0="5"/><2 1="c"0="5"/><2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/><8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"></8></7>\');',36,51,'value|name|param|com|http|false|divx|object|embed|plugin|go|bannerEnabled|autoPlay||320px|height|630px|width|none|custommode|avi|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_|Capitancinema|pfq3vaf2xypwtrv77uw334hb55ctx5tcd6dva|182|206|45|73|76|src|auto|bufferingMode|id|download|pluginspage|video|type|embedmvshre|cab|DivXBrowserPlugin|codebase|CC0F21721616|9C46|41fa|D0AB|67DABFBF|clsid|classid|embedcontmvshre|write|document'.split('|')))
    </script>
    </center>
    '''
    # El javascript empaquetado es
    #eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\'<7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"><2 1="j"0="i"><2 1="v"0="u"><2 1="b"0="5"/><2 1="c"0="5"/><2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/><8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"></8></7>\');',36,51,'value|name|param|com|http|false|divx|object|embed|plugin|go|bannerEnabled|autoPlay||320px|height|630px|width|none|custommode|avi|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_|Capitancinema|pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa|182|206|45|73|76|src|auto|bufferingMode|id|download|pluginspage|video|type|embedmvshre|cab|DivXBrowserPlugin|codebase|CC0F21721616|9C46|41fa|D0AB|67DABFBF|clsid|classid|embedcontmvshre|write|document'.split('|')))
    '''
    eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\'
    <7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12">
    <2 1="j"0="i">
    <2 1="v"0="u">
    <2 1="b"0="5"/>
    <2 1="c"0="5"/>
    <2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/>
    <8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/">
    <embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/">
    </8>
    </7>\');',36,51,
    0'value
    1|name
    2|param
    3|com
    4|http
    5|false
    6|divx
    7|object
    8|embed
    9|plugin
    a|go
    b|bannerEnabled
    c|autoPlay
    d|
    e|320px
    f|height
    g|630px
    h|width
    i|none
    j|custommode
    k|avi
    l|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_
    m|Capitancinema
    n|pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa
    o|182
    p|206
    q|45
    r|73
    s|76
    t|src
    u|auto
    v|bufferingMode
    w|id
    x|download
    y|pluginspage
    z|video
    10|type
    11|embedmvshre
    12|cab
    13|DivXBrowserPlugin
    14|codebase
    15|CC0F21721616
    16|9C46
    17|41fa
    18|D0AB
    19|67DABFBF
    1a|clsid
    1b|classid
    1c|embedcontmvshre
    1d|write
    1e|document
    '.split('
    |')))
    '''
    # El javascript desempaquetado es
    #document.write('<object id="embedcontmvshre"classid="clsid:67DABFBF-D0AB-41fa-9C46-CC0F21721616"width="630px"height="320px"codebase="http://go.divx.com/plugin/DivXBrowserPlugin.cab"><param name="custommode"value="none"><param name="bufferingMode"value="auto"><param name="bannerEnabled"value="false"/><param name="autoPlay"value="false"/><param name="src"value="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"/><embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/"></embed></object>');
    '''
    <object id="embedcontmvshre"classid="clsid:67DABFBF-D0AB-41fa-9C46-CC0F21721616"width="630px"height="320px"codebase="http://go.divx.com/plugin/DivXBrowserPlugin.cab">
    <param name="custommode"value="none">
    <param name="bufferingMode"value="auto">
    <param name="bannerEnabled"value="false"/>
    <param name="autoPlay"value="false"/>
    <param name="src"value="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"/>
    <embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/">
    </embed>
    </object>');
    '''
    # La URL del video es 
    #http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi
    
    # Lo descifra
    descifrado = unpackerjs.unpackjs(data)
    logger.info("descifrado="+descifrado)
    
    # Extrae la URL
    patron = '<param name="src"value="([^"]+)"/>'
    matches = re.compile(patron,re.DOTALL).findall(descifrado)
    scrapertools.printMatches(matches)
    
    url = ""
    
    if len(matches)>0:
        url = matches[0]

    logger.info("[metadivx.py] url="+url)
    return url
Ejemplo n.º 49
0
def geturl(urlvideo):
    xbmc.output("[vk.py] url=" + urlvideo)
    # ---------------------------------------
    #  Inicializa la libreria de las cookies
    # ---------------------------------------
    ficherocookies = COOKIEFILE
    try:
        os.remove(ficherocookies)
    except:
        pass
    # the path and filename to save your cookies in

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        import cookielib
    except ImportError:
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            import ClientCookie
        except ImportError:
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.LWPCookieJar()

    else:
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.LWPCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules

        if os.path.isfile(ficherocookies):
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            cj.load(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    #print "-------------------------------------------------------"
    url = urlvideo.replace("&amp;", "&")
    #print url
    #print "-------------------------------------------------------"
    theurl = url
    # an example url that sets a cookie,
    # try different urls here and see the cookie collection you can make !

    txdata = None
    # if we were making a POST type request,
    # we could encode a dictionary of values here,
    # using urllib.urlencode(somedict)

    txheaders = {
        'User-Agent':
        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)'
    }
    # fake a user agent, some websites (like google) don't like automated exploration

    req = Request(theurl, txdata, txheaders)
    handle = urlopen(req)
    cj.save(ficherocookies)  # save the cookies again

    data = handle.read()
    handle.close()
    #print data

    # Extrae la URL

    print data
    regexp = re.compile(r'vkid=([^\&]+)\&')
    match = regexp.search(data)
    vkid = ""
    print 'match %s' % str(match)
    if match is not None:
        vkid = match.group(1)
    else:
        print "no encontro vkid"

    patron = "var video_host = '([^']+)'.*?"
    patron += "var video_uid = '([^']+)'.*?"
    patron += "var video_vtag = '([^']+)'.*?"
    patron += "var video_no_flv = ([^;]+);.*?"
    patron += "var video_max_hd = '([^']+)'"
    matches = re.compile(patron, re.DOTALL).findall(data)
    if len(matches) > 0:
        for match in matches:
            if match[3].strip() == "0" and match[1] != "0":
                tipo = "flv"
                if "http://" in match[0]:
                    videourl = "%s/u%s/video/%s.%s" % (match[0], match[1],
                                                       match[2], tipo)
                else:
                    videourl = "http://%s/u%s/video/%s.%s" % (
                        match[0], match[1], match[2], tipo)

            elif match[
                    1] == "0" and vkid != "":  #http://447.gt3.vkadre.ru/assets/videos/2638f17ddd39-75081019.vk.flv
                tipo = "flv"
                if "http://" in match[0]:
                    videourl = "%s/assets/videos/%s%s.vk.%s" % (
                        match[0], match[2], vkid, tipo)
                else:
                    videourl = "http://%s/assets/videos/%s%s.vk.%s" % (
                        match[0], match[2], vkid, tipo)

            else:  #http://cs12385.vkontakte.ru/u88260894/video/d09802a95b.360.mp4
                tipo = "360.mp4"
                if match[0].endswith("/"):
                    videourl = "%su%s/video/%s.%s" % (match[0], match[1],
                                                      match[2], tipo)
                else:
                    videourl = "%s/u%s/video/%s.%s" % (match[0], match[1],
                                                       match[2], tipo)

    return videourl
Ejemplo n.º 50
0
    def do_login_and_fetch(self, cj, COOKIEFILE, LOGIN_URL, login_params,
                           fetch_url, save_to, **args):
        """
        Method to do an automated login and save the cookie. This is required for presentation download.
        """
        ClientCookie = None
        cookielib = None
        # Properly import the correct cookie lib
        try:
            import http.cookiejar
        except ImportError:
            # If importing cookielib fails
            # let's try ClientCookie
            try:
                import ClientCookie
            except ImportError:
                # ClientCookie isn't available either
                urlopen = urllib.request.urlopen
                Request = urllib.request.Request
            else:
                # imported ClientCookie
                urlopen = ClientCookie.urlopen
                Request = ClientCookie.Request
                cj = ClientCookie.LWPCookieJar()
        else:
            # importing cookielib worked
            urlopen = urllib.request.urlopen
            Request = urllib.request.Request
            cj = http.cookiejar.LWPCookieJar()

        if cj is not None:
            if os.path.isfile(COOKIEFILE):
                cj.load(COOKIEFILE)
            if cookielib is not None:
                opener = urllib.request.build_opener(
                    urllib.request.HTTPCookieProcessor(cj))
                urllib.request.install_opener(opener)
            else:
                opener = ClientCookie.build_opener(
                    ClientCookie.HTTPCookieProcessor(cj))
                ClientCookie.install_opener(opener)

        headers = {
            'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
        }
        request = Request(LOGIN_URL, login_params, headers)
        handle = urlopen(request)
        if cj:
            cj.save(COOKIEFILE)
        request = Request(fetch_url, None, headers)
        try:
            handle = urlopen(request)
        except urllib.error.HTTPError:
            print('Presentation not available for download!', file=sys.stderr)
            return
        data = handle.read()
        info = handle.info()
        ext = 'ppt'
        type = info['Content-Type']
        ext = self.get_extension(type)
        if not save_to:
            save_to = fetch_url.split('/')[-2] + '.'
        save_to = save_to + ext
        fp = open(save_to, 'wb')
        fp.write(data)
        fp.close()
        if self.verbose:
            print('Presentation downloaded and saved to %s' % save_to)
Ejemplo n.º 51
0
	# Now we need to get our Cookie Jar
	# installed in the opener;
	# for fetching URLs
	if cookielib is not None:
		# if we use cookielib
		# then we get the HTTPCookieProcessor
		# and install the opener in urllib2
		opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
		urllib2.install_opener(opener)

	else:
		# if we use ClientCookie
		# then we get the HTTPCookieProcessor
		# and install the opener in ClientCookie
		opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
		ClientCookie.install_opener(opener)

	
def saxpost(sid,source,message,type):

	if SILENTMODE:
		return

	#print message
	kickthem = 0
	for badword in BADWORDS:
		if (-1 < message.lower().find(badword.lower())):
			kickthem = 1;
			kickwhy = 'Disallowed word';
	if (-1 < message.find('\x03')):
Ejemplo n.º 52
0
def GetMegavideoUser(login, password, megavidcookiepath):
    #New Login code derived from old code by Voinage etc. Makes no need for mechanize module.

    #if no user or pass are provided, open login file to get them.
    if login is False or password is False:
        if os.path.exists(megavidcookiepath):
            loginf = openfile(self.login)
            login = get_user(loginf)
            password = get_pass(loginf)

    # ---------------------------------------
    #  Cookie stuff
    # ---------------------------------------
    ficherocookies = megavidcookiepath
    # the path and filename to save your cookies in

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        import cookielib
    except ImportError:
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            import ClientCookie
        except ImportError:
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.LWPCookieJar()

    else:
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.LWPCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    # ---------------------------------
    # install the cookies
    # ---------------------------------

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules

        if os.path.isfile(ficherocookies):
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            cj.load(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    #print "-------------------------------------------------------"
    url = "http://www.megavideo.com/?s=signup"
    #print url
    #print "-------------------------------------------------------"
    theurl = url
    # an example url that sets a cookie,
    # try different urls here and see the cookie collection you can make !

    txdata = "action=login&cnext=&snext=&touser=&user=&nickname=" + login + "&password="******"([^"]+)"'
    matches = re.compile(patronvideos, re.DOTALL).findall(cookiedata)
    if len(matches) == 0:
        patronvideos = 'user=([^\;]+);'
        matches = re.compile(patronvideos, re.DOTALL).findall(cookiedata)

    if len(matches) == 0:
        print 'something bad happened'

    return matches[0]
def getvideo(urlpagina):
    # ---------------------------------------
    #  Inicializa la libreria de las cookies
    # ---------------------------------------
    ficherocookies = COOKIEFILE
    try:
        os.remove(ficherocookies)
    except:
        pass
    xbmc.output("ficherocookies %s" % ficherocookies)
    # the path and filename to save your cookies in

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        import cookielib
    except ImportError:
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            import ClientCookie
        except ImportError:
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.LWPCookieJar()

    else:
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.LWPCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    if cj is not None:
    # we successfully imported
    # one of the two cookie handling modules

        if os.path.isfile(ficherocookies):
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            cj.load(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    #print "-------------------------------------------------------"
    url=urlpagina
    #print url
    #print "-------------------------------------------------------"
    theurl = url
    # an example url that sets a cookie,
    # try different urls here and see the cookie collection you can make !

    txdata = None
    # if we were making a POST type request,
    # we could encode a dictionary of values here,
    # using urllib.urlencode(somedict)

    txheaders =  {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3',
                  'Referer':'http://www.movshare.net/'}
    # fake a user agent, some websites (like google) don't like automated exploration

    req = Request(theurl, txdata, txheaders)
    handle = urlopen(req)
    cj.save(ficherocookies)                     # save the cookies again    

    data=handle.read()
    handle.close()
    #print data

    # Lo pide una segunda vez, como si hubieras hecho click en el banner
    req = Request(theurl, txdata, txheaders)
    handle = urlopen(req)
    cj.save(ficherocookies)                     # save the cookies again    

    data=handle.read()
    handle.close()
    #print data

    patronvideos  = '<embed type="video/divx" src="([^"]+)"'

    matches = re.compile(patronvideos,re.DOTALL).findall(data)
    return matches[0]
Ejemplo n.º 54
0
def geturl(urlvideo):
    xbmc.output("[gigabyupload.py] url=" + urlvideo)
    # ---------------------------------------
    #  Inicializa la libreria de las cookies
    # ---------------------------------------
    ficherocookies = COOKIEFILE
    try:
        os.remove(ficherocookies)
    except:
        pass
    # the path and filename to save your cookies in

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        import cookielib
    except ImportError:
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            import ClientCookie
        except ImportError:
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.LWPCookieJar()

    else:
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.LWPCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules

        if os.path.isfile(ficherocookies):
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            cj.load(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    #print "-------------------------------------------------------"
    url = urlvideo
    #print url
    #print "-------------------------------------------------------"
    theurl = url
    # an example url that sets a cookie,
    # try different urls here and see the cookie collection you can make !

    txdata = None
    # if we were making a POST type request,
    # we could encode a dictionary of values here,
    # using urllib.urlencode(somedict)

    txheaders = {
        'User-Agent':
        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)'
    }
    # fake a user agent, some websites (like google) don't like automated exploration
    try:
        req = Request(theurl, txdata, txheaders)
        handle = urlopen(req)
        cj.save(ficherocookies)  # save the cookies again

        data = handle.read()
        handle.close()
    except:
        data = ""
        pass
    #print data

    # Lo pide una segunda vez, como si hubieras hecho click en el banner
    patron = 'http\:\/\/www\.gigabyteupload\.com/download\-([^\-]+)\-.*?'
    matches = re.compile(patron, re.DOTALL).findall(url)
    id = matches[0]
    patron = '<form method="post" action="([^"]+)">[^<]+<input type="hidden" name="security_key" value="([^"]+)" \/>'
    #patron += '<p><input type="submit" name="submit" value="([^"]+)" class="cbutton" \/>'

    matches = re.compile(patron, re.DOTALL).findall(data)
    xbmc.output("[gigabyupload.py] fragmentos de la URL : " +
                str(len(matches)))
    scrapertools.printMatches(matches)

    cecid = ""
    submit = ""

    url2 = theurl
    if len(matches) > 0:
        url2 = matches[0][0]
        #id = matches[0][5]
        cecid = matches[0][1]
        submit = "Watch Online"
        #aff = matches[0][3]
        #came_from = matches[0][4]

    txdata = "op=download&usr_login=&id=" + id + "&security_key=" + cecid + "&submit=" + submit + "&aff=&came_from=referer=&method_free=Free+Stream"
    xbmc.output(txdata)
    try:
        req = Request(url2, txdata, txheaders)
        handle = urlopen(req)
        cj.save(ficherocookies)  # save the cookies again

        data = handle.read()
        handle.close()
        #print data
    except:
        data = ""
        pass

    # Extrae el trozo cifrado
    patron = '<div id="player">[^<]+<script type="text/javascript">(eval.*?)</script>'
    matches = re.compile(patron, re.DOTALL).findall(data)
    scrapertools.printMatches(matches)
    data = ""
    if len(matches) > 0:
        data = matches[0]
        xbmc.output("[Gigabyteupload.py] bloque packed=" + data)
    else:
        return ""

    # Lo descifra
    descifrado = unpackerjs2.unpackjs(data)

    # Extrae la URL del vídeo
    xbmc.output("descifrado=" + descifrado)
    # Extrae la URL
    patron = '<param name="src" value="([^"]+)"'
    matches = re.compile(patron, re.DOTALL).findall(descifrado)
    scrapertools.printMatches(matches)

    url = ""

    if len(matches) > 0:
        url = matches[0]

    xbmc.output("[gigabyteupload.py] url=" + url)
    return url
def geturl(urlvideo):
	xbmc.output("[metadivx.py] url="+urlvideo)
	# ---------------------------------------
	#  Inicializa la libreria de las cookies
	# ---------------------------------------
	ficherocookies = COOKIEFILE
	try:
		os.remove(ficherocookies)
	except:
		pass
	# the path and filename to save your cookies in

	cj = None
	ClientCookie = None
	cookielib = None

	# Let's see if cookielib is available
	try:
		import cookielib
	except ImportError:
		# If importing cookielib fails
		# let's try ClientCookie
		try:
			import ClientCookie
		except ImportError:
			# ClientCookie isn't available either
			urlopen = urllib2.urlopen
			Request = urllib2.Request
		else:
			# imported ClientCookie
			urlopen = ClientCookie.urlopen
			Request = ClientCookie.Request
			cj = ClientCookie.LWPCookieJar()

	else:
		# importing cookielib worked
		urlopen = urllib2.urlopen
		Request = urllib2.Request
		cj = cookielib.LWPCookieJar()
		# This is a subclass of FileCookieJar
		# that has useful load and save methods

	# ---------------------------------
	# Instala las cookies
	# ---------------------------------

	if cj is not None:
	# we successfully imported
	# one of the two cookie handling modules

		if os.path.isfile(ficherocookies):
			# if we have a cookie file already saved
			# then load the cookies into the Cookie Jar
			cj.load(ficherocookies)

		# Now we need to get our Cookie Jar
		# installed in the opener;
		# for fetching URLs
		if cookielib is not None:
			# if we use cookielib
			# then we get the HTTPCookieProcessor
			# and install the opener in urllib2
			opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
			urllib2.install_opener(opener)

		else:
			# if we use ClientCookie
			# then we get the HTTPCookieProcessor
			# and install the opener in ClientCookie
			opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
			ClientCookie.install_opener(opener)

	#print "-------------------------------------------------------"
	url=urlvideo
	#print url
	#print "-------------------------------------------------------"
	theurl = url
	# an example url that sets a cookie,
	# try different urls here and see the cookie collection you can make !

	txdata = None
	# if we were making a POST type request,
	# we could encode a dictionary of values here,
	# using urllib.urlencode(somedict)

	txheaders =  {'User-Agent':'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)'}
	# fake a user agent, some websites (like google) don't like automated exploration

	req = Request(theurl, txdata, txheaders)
	handle = urlopen(req)
	cj.save(ficherocookies)                     # save the cookies again    

	data=handle.read()
	handle.close()
	#print data

	# Lo pide una segunda vez, como si hubieras hecho click en el banner
	patron = 'http\:\/\/www\.metadivx\.com/([^\/]+)/(.*?)\.html'
	matches = re.compile(patron,re.DOTALL).findall(url)
	xbmc.output("[metadivx.py] fragmentos de la URL")
	scrapertools.printMatches(matches)
	
	codigo = ""
	nombre = ""
	if len(matches)>0:
		codigo = matches[0][0]
		nombre = matches[0][1]

	txdata = "op=download1&usr_login=&id="+codigo+"&fname="+nombre+"&referer=&method_free=Continue"
	xbmc.output(txdata)
	req = Request(theurl, txdata, txheaders)
	handle = urlopen(req)
	cj.save(ficherocookies)                     # save the cookies again    

	data=handle.read()
	handle.close()
	#print data
	
	patron = '<div id="embedcontmvshre[^>]+>(.*?)</div>'
	matches = re.compile(patron,re.DOTALL).findall(data)
	scrapertools.printMatches(matches)
	xbmc.output("[metadivx.py] bloque packed")
	if len(matches)>0:
		xbmc.output(matches[0])
	'''
	<center>
	<script type='text/javascript'>eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\'<7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"><2 1="j"0="i"><2 1="v"0="u"><2 1="b"0="5"/><2 1="c"0="5"/><2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/><8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"></8></7>\');',36,51,'value|name|param|com|http|false|divx|object|embed|plugin|go|bannerEnabled|autoPlay||320px|height|630px|width|none|custommode|avi|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_|Capitancinema|pfq3vaf2xypwtrv77uw334hb55ctx5tcd6dva|182|206|45|73|76|src|auto|bufferingMode|id|download|pluginspage|video|type|embedmvshre|cab|DivXBrowserPlugin|codebase|CC0F21721616|9C46|41fa|D0AB|67DABFBF|clsid|classid|embedcontmvshre|write|document'.split('|')))
	</script>
	</center>
	'''
	# El javascript empaquetado es
	#eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\'<7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12"><2 1="j"0="i"><2 1="v"0="u"><2 1="b"0="5"/><2 1="c"0="5"/><2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/><8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/"></8></7>\');',36,51,'value|name|param|com|http|false|divx|object|embed|plugin|go|bannerEnabled|autoPlay||320px|height|630px|width|none|custommode|avi|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_|Capitancinema|pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa|182|206|45|73|76|src|auto|bufferingMode|id|download|pluginspage|video|type|embedmvshre|cab|DivXBrowserPlugin|codebase|CC0F21721616|9C46|41fa|D0AB|67DABFBF|clsid|classid|embedcontmvshre|write|document'.split('|')))
	'''
	eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('1e.1d(\'
	<7 w="1c"1b="1a:19-18-17-16-15"h="g"f="e"14="4://a.6.3/9/13.12">
	<2 1="j"0="i">
	<2 1="v"0="u">
	<2 1="b"0="5"/>
	<2 1="c"0="5"/>
	<2 1="t"0="4://s.r.q.p:o/d/n/m.3.-l.k"/>
	<8 w="11"v="u"10="z/6"t="4://s.r.q.p:o/d/n/m.3.-l.k"j="i"h="g"f="e"c="5"b="5"y="4://a.6.3/9/x/">
	<embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/">
	</8>
	</7>\');',36,51,
	0'value
	1|name
	2|param
	3|com
	4|http
	5|false
	6|divx
	7|object
	8|embed
	9|plugin
	a|go
	b|bannerEnabled
	c|autoPlay
	d|
	e|320px
	f|height
	g|630px
	h|width
	i|none
	j|custommode
	k|avi
	l|El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_
	m|Capitancinema
	n|pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa
	o|182
	p|206
	q|45
	r|73
	s|76
	t|src
	u|auto
	v|bufferingMode
	w|id
	x|download
	y|pluginspage
	z|video
	10|type
	11|embedmvshre
	12|cab
	13|DivXBrowserPlugin
	14|codebase
	15|CC0F21721616
	16|9C46
	17|41fa
	18|D0AB
	19|67DABFBF
	1a|clsid
	1b|classid
	1c|embedcontmvshre
	1d|write
	1e|document
	'.split('
	|')))
	'''
	# El javascript desempaquetado es
	#document.write('<object id="embedcontmvshre"classid="clsid:67DABFBF-D0AB-41fa-9C46-CC0F21721616"width="630px"height="320px"codebase="http://go.divx.com/plugin/DivXBrowserPlugin.cab"><param name="custommode"value="none"><param name="bufferingMode"value="auto"><param name="bannerEnabled"value="false"/><param name="autoPlay"value="false"/><param name="src"value="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"/><embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/"></embed></object>');
	'''
	<object id="embedcontmvshre"classid="clsid:67DABFBF-D0AB-41fa-9C46-CC0F21721616"width="630px"height="320px"codebase="http://go.divx.com/plugin/DivXBrowserPlugin.cab">
	<param name="custommode"value="none">
	<param name="bufferingMode"value="auto">
	<param name="bannerEnabled"value="false"/>
	<param name="autoPlay"value="false"/>
	<param name="src"value="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"/>
	<embed id="embedmvshre"bufferingMode="auto"type="video/divx"src="http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi"custommode="none"width="630px"height="320px"autoPlay="false"bannerEnabled="false"pluginspage="http://go.divx.com/plugin/download/">
	</embed>
	</object>');
	'''
	# La URL del video es 
	#http://76.73.45.206:182/d/pfq3vaf2xypwtrv77uw334hb55ctx5qa5wdfa/Capitancinema.com.-El_Concierto__BrSc__Spanish_HOMIEZTEAM__2010_.avi
	
	# Lo descifra
	descifrado = unpackerjs.unpackjs(data)
	xbmc.output("descifrado="+descifrado)
	
	# Extrae la URL
	patron = '<param name="src"value="([^"]+)"/>'
	matches = re.compile(patron,re.DOTALL).findall(descifrado)
	scrapertools.printMatches(matches)
	
	url = ""
	
	if len(matches)>0:
		url = matches[0]

	xbmc.output("[metadivx.py] url="+url)
	return url
Ejemplo n.º 56
0
def getmegauploaduser(login, password):

    # ---------------------------------------
    #  Inicializa la librería de las cookies
    # ---------------------------------------
    ficherocookies = COOKIEFILE
    try:
        os.remove(ficherocookies)
    except:
        pass

    # the path and filename to save your cookies in

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        import cookielib
    except ImportError:
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            import ClientCookie
        except ImportError:
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.LWPCookieJar()

    else:
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.LWPCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules

        if os.path.isfile(ficherocookies):
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            cj.load(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    #print "-------------------------------------------------------"
    url = "http://www.megaupload.com/?c=login"
    #print url
    #print "-------------------------------------------------------"
    theurl = url
    # an example url that sets a cookie,
    # try different urls here and see the cookie collection you can make !

    passwordesc = password.replace("&", "%26")
    txdata = "login=1&redir=1&username="******"&password="******"----------------------")
	xbmc.output("Cookies despues")
	xbmc.output("----------------------")
	xbmc.output(cookiedata)
	xbmc.output("----------------------")
	'''

    patronvideos = 'user="******"]+)"'
    matches = re.compile(patronvideos, re.DOTALL).findall(cookiedata)
    if len(matches) == 0:
        patronvideos = 'user=([^\;]+);'
        matches = re.compile(patronvideos, re.DOTALL).findall(cookiedata)

    if len(matches) == 0 and DEBUG:
        xbmc.output("No se ha encontrado la cookie de Megaupload")
        xbmc.output("----------------------")
        xbmc.output("Respuesta de Megaupload")
        xbmc.output("----------------------")
        xbmc.output(data)
        xbmc.output("----------------------")
        xbmc.output("----------------------")
        xbmc.output("Cookies despues")
        xbmc.output("----------------------")
        xbmc.output(cookiedata)
        xbmc.output("----------------------")
        devuelve = ""
    else:
        devuelve = matches[0]

    return devuelve