Python ClientCookie.MozillaCookieJar Exemples

Exemple #1

0

Afficher le fichier

 def setCookie(self, path=False):
     """
     set cookie handler
     """
     if path:
         self.__url_cookiepath = path
     try:
         import cookielib
     except ImportError:
         try:
             import ClientCookie
         except ImportError:
             urlopen = urllib2.urlopen
             Request = urllib2.Request
         else:
             urlopen = ClientCookie.urlopen
             Request = ClientCookie.Request
             self.__url_cookie = ClientCookie.MozillaCookieJar()
             if path and os.path.isfile(path):
                 #noinspection PyBroadException
                 try:
                     self.__url_cookcookie.load(path)
                 except Exception, e:
                     pass
             opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(self.__url_cookie))
             ClientCookie.install_opener(opener)
             self.__url_request = Request
             self.__url_urlopen = urlopen

Exemple #2

0

Afficher le fichier

def read_body_and_headers(url, post=None, headers=[], follow_redirects=False, timeout=None):
    _log("read_body_and_headers "+url)

    if post is not None:
        _log("read_body_and_headers post="+post)

    if len(headers)==0:
        headers.append(["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:18.0) Gecko/20100101 Firefox/18.0"])

    # Start cookie lib
    ficherocookies = os.path.join( get_data_path(), 'cookies.dat' )
    _log("read_body_and_headers cookies_file="+ficherocookies)

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        _log("read_body_and_headers importing cookielib")
        import cookielib
    except ImportError:
        _log("read_body_and_headers cookielib no disponible")
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            _log("read_body_and_headers importing ClientCookie")
            import ClientCookie
        except ImportError:
            _log("read_body_and_headers ClientCookie not available")
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            _log("read_body_and_headers ClientCookie available")
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.MozillaCookieJar()

    else:
        _log("read_body_and_headers cookielib available")
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.MozillaCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    if cj is not None:
    # we successfully imported
    # one of the two cookie handling modules
        _log("read_body_and_headers Cookies enabled")

        if os.path.isfile(ficherocookies):
            _log("read_body_and_headers Reading cookie file")
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            try:
                cj.load(ficherocookies)
            except:
                _log("read_body_and_headers Wrong cookie file, deleting...")
                os.remove(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            _log("read_body_and_headers opener using urllib2 (cookielib)")
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            if not follow_redirects:
                opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=http_debug_log_enabled),urllib2.HTTPCookieProcessor(cj),NoRedirectHandler())
            else:
                opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=http_debug_log_enabled),urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            _log("read_body_and_headers opener using ClientCookie")
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    # -------------------------------------------------
    # Cookies instaladas, lanza la petición
    # -------------------------------------------------

    # Contador
    inicio = time.clock()

    # Diccionario para las cabeceras
    txheaders = {}

    # Construye el request
    if post is None:
        _log("read_body_and_headers GET request")
    else:
        _log("read_body_and_headers POST request")
    
    # Añade las cabeceras
    _log("read_body_and_headers ---------------------------")
    for header in headers:
        _log("read_body_and_headers header %s=%s" % (str(header[0]),str(header[1])) )
        txheaders[header[0]]=header[1]
    _log("read_body_and_headers ---------------------------")

    req = Request(url, post, txheaders)
    if timeout is None:
        handle=urlopen(req)
    else:        
        #Disponible en python 2.6 en adelante --> handle = urlopen(req, timeout=timeout)
        #Para todas las versiones:
        try:
            import socket
            deftimeout = socket.getdefaulttimeout()
            socket.setdefaulttimeout(timeout)
            handle=urlopen(req)            
            socket.setdefaulttimeout(deftimeout)
        except:
            import sys
            for line in sys.exc_info():
                _log( "%s" % line )
    
    # Actualiza el almacén de cookies
    cj.save(ficherocookies)

    # Lee los datos y cierra
    if handle.info().get('Content-Encoding') == 'gzip':
        buf = StringIO( handle.read())
        f = gzip.GzipFile(fileobj=buf)
        data = f.read()
    else:
        data=handle.read()

    info = handle.info()
    _log("read_body_and_headers Response")

    returnheaders=[]
    _log("read_body_and_headers ---------------------------")
    for header in info:
        _log("read_body_and_headers "+header+"="+info[header])
        returnheaders.append([header,info[header]])
    handle.close()
    _log("read_body_and_headers ---------------------------")

    '''
    # Lanza la petición
    try:
        response = urllib2.urlopen(req)
    # Si falla la repite sustituyendo caracteres especiales
    except:
        req = urllib2.Request(url.replace(" ","%20"))
    
        # Añade las cabeceras
        for header in headers:
            req.add_header(header[0],header[1])

        response = urllib2.urlopen(req)
    '''
    
    # Tiempo transcurrido
    fin = time.clock()
    _log("read_body_and_headers Downloaded in %d seconds " % (fin-inicio+1))

    return data,returnheaders

Exemple #3

0

Afficher le fichier

def downloadpage(
    url,
    post=None,
    headers=[[
        'User-Agent',
        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; es-ES; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12'
    ]],
    follow_redirects=True,
    timeout=socket.getdefaulttimeout()):
    logger.info("[scrapertools.py] downloadpage")
    logger.info("[scrapertools.py] url=" + url)

    if post is not None:
        logger.info("[scrapertools.py] post=" + post)
    else:
        logger.info("[scrapertools.py] post=None")

    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    #  Inicializa la librería de las cookies
    ficherocookies = os.path.join(config.get_setting("cookies.dir"),
                                  'cookies.dat')
    logger.info("[scrapertools.py] ficherocookies=" + ficherocookies)

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        logger.info("[scrapertools.py] Importando cookielib")
        import cookielib
    except ImportError:
        logger.info("[scrapertools.py] cookielib no disponible")
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            logger.info("[scrapertools.py] Importando ClientCookie")
            import ClientCookie
        except ImportError:
            logger.info("[scrapertools.py] ClientCookie no disponible")
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            logger.info("[scrapertools.py] ClientCookie disponible")
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.MozillaCookieJar()

    else:
        logger.info("[scrapertools.py] cookielib disponible")
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.MozillaCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules
        logger.info("[scrapertools.py] Hay cookies")

        if os.path.isfile(ficherocookies):
            logger.info("[scrapertools.py] Leyendo fichero cookies")
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            try:
                cj.load(ficherocookies)
            except:
                logger.info(
                    "[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra"
                )
                os.remove(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            logger.info("[scrapertools.py] opener usando urllib2 (cookielib)")
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            if not follow_redirects:
                opener = urllib2.build_opener(
                    urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL),
                    urllib2.HTTPCookieProcessor(cj), NoRedirectHandler())
            else:
                opener = urllib2.build_opener(
                    urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL),
                    urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            logger.info("[scrapertools.py] opener usando ClientCookie")
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    # -------------------------------------------------
    # Cookies instaladas, lanza la petición
    # -------------------------------------------------

    # Contador
    inicio = time.clock()

    # Diccionario para las cabeceras
    txheaders = {}

    # Construye el request
    if post is None:
        logger.info("[scrapertools.py] petición GET")
    else:
        logger.info("[scrapertools.py] petición POST")

    # Añade las cabeceras
    logger.info("[scrapertools.py] ---------------------------")
    for header in headers:
        logger.info("[scrapertools.py] header %s=%s" %
                    (str(header[0]), str(header[1])))
        txheaders[header[0]] = header[1]
    logger.info("[scrapertools.py] ---------------------------")

    req = Request(url, post, txheaders)

    try:

        if timeout is None:
            handle = urlopen(req)
        else:
            #Para todas las versiones:
            deftimeout = socket.getdefaulttimeout()
            socket.setdefaulttimeout(timeout)
            handle = urlopen(req)
            socket.setdefaulttimeout(deftimeout)

        # Actualiza el almacén de cookies
        #Exception
        #cj.save(ficherocookies)

        # Lee los datos y cierra
        if handle.info().get('Content-Encoding') == 'gzip':
            logger.info("[scrapertools.py] gzipped")
            import StringIO
            data = handle.read()
            compressedstream = StringIO.StringIO(data)
            import gzip
            gzipper = gzip.GzipFile(fileobj=compressedstream)
            data = gzipper.read()
            gzipper.close()
        else:
            logger.info("[scrapertools.py] normal")
            data = handle.read()
    except urllib2.HTTPError, e:
        logger.info("error " + repr(e))
        import traceback
        traceback.print_exc()
        data = e.read()
        #logger.info("data="+repr(data))
        return data

Exemple #4

0

Afficher le fichier

def downloadpageGzip(url):

    #  Inicializa la librería de las cookies
    ficherocookies = os.path.join(config.get_data_path(), 'cookies.dat')
    logger.info("Cookiefile=" + ficherocookies)
    inicio = time.clock()

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        import cookielib
    except ImportError:
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            import ClientCookie
        except ImportError:
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.MozillaCookieJar()

    else:
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.MozillaCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules

        if os.path.isfile(ficherocookies):
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            try:
                cj.load(ficherocookies)
            except:
                logger.info(
                    "[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra"
                )
                os.remove(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    #print "-------------------------------------------------------"
    theurl = url
    # an example url that sets a cookie,
    # try different urls here and see the cookie collection you can make !

    #txheaders =  {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3',
    #              'Referer':'http://www.megavideo.com/?s=signup'}

    parsedurl = urlparse.urlparse(url)
    logger.info("parsedurl=" + str(parsedurl))

    txheaders = {
        'User-Agent':
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3',
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'es-es,es;q=0.8,en-us;q=0.5,en;q=0.3',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
        'Accept-Encoding': 'gzip,deflate',
        'Keep-Alive': '300',
        'Connection': 'keep-alive',
        'Referer': parsedurl[0] + "://" + parsedurl[1]
    }
    logger.info(str(txheaders))

    # fake a user agent, some websites (like google) don't like automated exploration

    req = Request(theurl, None, txheaders)
    handle = urlopen(req)
    cj.save(ficherocookies)  # save the cookies again

    data = handle.read()
    handle.close()

    fin = time.clock()
    logger.info("[scrapertools.py] Descargado 'Gzipped data' en %d segundos " %
                (fin - inicio + 1))

    # Descomprime el archivo de datos Gzip
    try:
        fin = inicio
        import StringIO
        compressedstream = StringIO.StringIO(data)
        import gzip
        gzipper = gzip.GzipFile(fileobj=compressedstream)
        data1 = gzipper.read()
        gzipper.close()
        fin = time.clock()
        logger.info(
            "[scrapertools.py] 'Gzipped data' descomprimido en %d segundos " %
            (fin - inicio + 1))
        return data1
    except:
        return data

Exemple #5

0

Afficher le fichier

#
# Author : t3rmin4t0r
# Mail   : gopalv82 -AT- yahoo.com
# Site   : http://t3.dotgnu.info/
#

import sys, os, string
from SOAPpy import WSDL, HTTPTransport, Config, SOAPAddress
import ClientCookie
import urllib2

Config.cookieJar = ClientCookie.MozillaCookieJar()
# Uncomment the following line if you have cookies.txt
# Config.cookieJar.load("cookies.txt")


class CookieTransport(HTTPTransport):
    def call(self,
             addr,
             data,
             namespace,
             soapaction=None,
             encoding=None,
             http_proxy=None,
             config=Config):

        if not isinstance(addr, SOAPAddress):
            addr = SOAPAddress(addr, config)

        cookie_cutter = ClientCookie.HTTPCookieProcessor(config.cookieJar)
        hh = ClientCookie.HTTPHandler()

Exemple #6

0

Afficher le fichier

Fichier : __init__.py Projet : quri/recognizeim-python-client

class recognizeApi(object):
  """Class to handle requests to recognize.im API.

  :param client_id: Your unique client ID. You can find it in the Account tab after logging in at recognize.im.
  :type client_id: str.
  :param api_key: Your unique API key. You can find it in the Account tab after logging in at recognize.im..
  :type api_key: str.
  :param clapi_key: Your unique secret client key. You can find it in the Account tab after logging in at recognize.im.
  :type clapi_key: str.
  :returns: dict -- the server response.
  """

  wsdl = "http://clapi.itraff.pl/wsdl"
  rest = "http://recognize.im/v2/recognize/"

  Config.cookieJar = ClientCookie.MozillaCookieJar()

  def __init__(self, client_id, api_key, clapi_key):
    self.client_id = client_id
    self.clapi_key = clapi_key
    self.api_key = api_key
    self._server = WSDL.Proxy(self.wsdl, transport = CookieTransport)
    result = self._server.auth(client_id, clapi_key, None)

  def convertOutput(self, soap):
    """Converts SOAPpy.Types.structType to dict.

    :param soap: The URL to the method you want us to call.
    :type soap: SOAPpy.Types.structType.
    :returns: dict -- the server response converted to dict.
    """

    d = {}
    if type(soap).__name__=='instance' and 'item' in soap._keys():
        soap = soap[0]
    if type(soap).__name__=='list':
        for i in range(0,len(soap)):
            if type(soap[i]['value']).__name__=='instance':
                d[soap[i]['key']] = self.convertOutput(soap[i]['value'])
            else:
                d[soap[i]['key']] = soap[i]['value']
    elif type(soap).__name__=='instance':
        d[soap['key']] = soap['value']
    return d

  def imageInsert(self, image_id, image_name, path):
    """Add new picture to your pictures list

    :param image_id: A unique identifier of the inserted image.
    :type image_id: str.
    :param image_name: A label you want to assign to the inserted image.
    :type image_name: str.
    :param path: Path to the image file.
    :type path: str.
    :returns: dict -- the server response.
    """

    image = open(path, "rb").read()
    encoded = base64.b64encode(image)
    result = self._server.imageInsert(image_id, image_name, encoded);
    return self.convertOutput(result)

  def indexBuild(self):
    """You need to call indexBuild method in order to apply all your recent
    (from the previous call of this method) changes, including adding new images
    and deleting images.

    :returns: dict -- the server response.
    """

    result = self._server.indexBuild()
    return self.convertOutput(result)

  def callback(self, callback_url):
    """There are some situations when we might need to call one of your methods.
    For example when we finish applying changes we may need to let you know that
    all your images are ready to be recognized.

    :param callback_url: The URL to the method you want us to call.
    :type callback_url: str.
    :returns: dict -- the server response.
    """

    result = self._server.callback(callback_url)
    return self.convertOutput(result)

  def imageDelete(self, image_id):
    """If you don't need an image to be recognizable anymore you have to remove
    this image from the database. You can do this by calling imageDelete method
    passing the ID of the image you want to remove. You can also remove all of
    your images with one call of this method. In order to achieve this you need
    to pass null value as a parameter.

    :param image_id: ID of the image you would like to remove (this is the same ID you pass a an argument to the imageInsert method). Pass null value if you want to remove all of your images.
    :type image_id: str.
    :returns: dict -- the server response.
    """

    result = self._server.imageDelete(image_id)
    return self.convertOutput(result)

  def imageUpdate(self, image_id, new_image_id, new_image_name):
    """There may be some situations when you would like to change the name or ID of
    an image stored in the database. You can do this by calling the imageUpdate method.

    :param image_id: ID of the image which data you would like to change (this is the same ID you pass a an argument to the imageInsert method).
    :type image_id: str.
    :param new_image_id: New ID of an image.
    :type new_image_id: str.
    :param new_image_name: New name of an image
    :type new_image_name: str.
    :returns: dict -- the server response.
    """

    data = {"id": new_image_id,
            "name": new_image_name}
    result = self._server.imageUpdate(image_id, data)
    return self.convertOutput(result)

  def indexStatus(self):
    """You may be curious what is the progress of applying your changes.
    In order to do this you need to call indexStatus method.

    :returns: dict -- the server response.
    """

    result = self._server.indexStatus()
    return self.convertOutput(result)

  def userLimits(self):
    """When using our API you are limited with regards the number of images
    and number of scans (recognition operations). The limits depend on the type
    of account you have. In order to check how many more images you can add and
    how many scans you have left use the userLimits method.

    :returns: dict -- the server response.
    """

    result = self._server.userLimits()
    return self.convertOutput(result)

  def imageCount(self):
    """Returns number of images in your list.

    :returns: dict -- the server response.
    """

    result = self._server.imageCount()
    return self.convertOutput(result)

  def imageGet(self, image_id):
    """Returns detailed information about image.

    :param image_id: ID of the image.
    :type image_id: str.
    :returns: dict -- the server response.
    """

    result = self._server.imageGet(image_id)
    return self.convertOutput(result)

  def modeGet(self):
    """Returns recognition mode.

    :returns: dict -- the server response.
    """

    result = self._server.modeGet()
    return self.convertOutput(result)

  def modeChange(self, mode):
    """Changes recognition mode.

    :returns: dict -- the server response.
    """

    result = self._server.modeChange(mode)
    return self.convertOutput(result)

  def recognize(self, path, getAll=False, multi=False, shelf=False):
    """Sends image recognition request.

    :param path: Path to the image file.
    :type path: str.
    :returns: dict -- the server response.
    """

    #fetch image data
    size = os.stat(path).st_size / 1024.0 #KB
    image = Image.open(path)
    width, height = image.size
    area = width * height / 10.0**6 #Mpix

    #check image data
    if (multi):
      if (size > MULTIIR_MAX_FILE_SIZE or
          width < MULTIIR_MIN_DIMENSION or
          height < MULTIIR_MIN_DIMENSION or
          area < MULTIIR_MIN_IMAGE_AREA or
          area > MULTIIR_MAX_IMAGE_AREA):
        return "Image does not meet the requirements of multi mode query image.\n"
    elif (shelf):
      if (size > SHELFIR_MAX_FILE_SIZE or
          width < SHELFIR_MIN_DIMENSION or
          height < SHELFIR_MIN_DIMENSION or
          area < SHELFIR_MIN_IMAGE_AREA or
          area > SHELFIR_MAX_IMAGE_AREA):
        return "Image does not meet the requirements of shelf mode query image.\n"
    else:
      if (size > SINGLEIR_MAX_FILE_SIZE or
          width < SINGLEIR_MIN_DIMENSION or
          height < SINGLEIR_MIN_DIMENSION or
          area < SINGLEIR_MIN_IMAGE_AREA or
          area > SINGLEIR_MAX_IMAGE_AREA):
        return "Image does not meet the requirements of single mode query image.\n"

    #get url
    url = self.rest
    if (multi):
      url += 'multi/'
    elif (shelf):
      url += 'shelf/'
    else:
      url += 'single/'
    if (getAll):
      url += 'all/'
    url += self.client_id

    imageData = open(path, "rb").read()

    m = hashlib.md5()
    m.update(self.api_key)
    m.update(imageData)
    md5hash = m.hexdigest()

    headers = { 'content-type':'image/jpeg',
                'x-itraff-hash' : md5hash}

    request = urllib2.Request(url, imageData, headers)
    response = urllib2.urlopen(request)
    result = response.read()

    return ast.literal_eval(result)

  def drawFrames(self, path, result):
    """Draws frames on image.

    :param path: Path to the image file.
    :type path: str.
    :param result: Recognition results.
    :type result: dict.
    :returns: Image -- Image with frames.
    """

    if (result['status'] == 0):
      image = Image.open(path)
      draw = ImageDraw.Draw(image)
      for obj in result['objects']:
        loc = obj['location']
        draw.line((loc[0]['x'], loc[0]['y'], loc[1]['x'], loc[1]['y']), fill=(255,0,0,255), width=5)
        draw.line((loc[1]['x'], loc[1]['y'], loc[2]['x'], loc[2]['y']), fill=(255,0,0,255), width=5)
        draw.line((loc[2]['x'], loc[2]['y'], loc[3]['x'], loc[3]['y']), fill=(255,0,0,255), width=5)
        draw.line((loc[3]['x'], loc[3]['y'], loc[0]['x'], loc[0]['y']), fill=(255,0,0,255), width=5)
      return image
    else:
      return None

Exemple #7

0

Afficher le fichier

Fichier : scrapertools.py Projet : vanhung1710/MyRepository

def downloadpagewithcookies(url):
    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    #  Inicializa la librería de las cookies
    ficherocookies = os.path.join(config.get_data_path(), 'cookies.dat')
    if (DEBUG == True):
        logger.info("[scrapertools.py] Cookiefile=" + ficherocookies)

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        import cookielib
    except ImportError:
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            import ClientCookie
        except ImportError:
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.MozillaCookieJar()

    else:
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.MozillaCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules

        if os.path.isfile(ficherocookies):
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            try:
                cj.load(ficherocookies)
            except:
                if (DEBUG == True):
                    logger.info(
                        "[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra"
                    )
                os.remove(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    #print "-------------------------------------------------------"
    theurl = url
    # an example url that sets a cookie,
    # try different urls here and see the cookie collection you can make !

    #txheaders =  {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3',
    #              'Referer':'http://www.megavideo.com/?s=signup'}
    txheaders = {
        'User-Agent':
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3',
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Host': 'www.meristation.com',
        'Accept-Language': 'es-es,es;q=0.8,en-us;q=0.5,en;q=0.3',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
        'Keep-Alive': '300',
        'Connection': 'keep-alive'
    }

    # fake a user agent, some websites (like google) don't like automated exploration

    req = Request(theurl, None, txheaders)
    handle = urlopen(req)
    cj.save(ficherocookies)  # save the cookies again

    data = handle.read()
    handle.close()

    return data

Exemple #8

0

Afficher le fichier

Fichier : scrapertools.py Projet : vanhung1710/MyRepository

def downloadpage(
    url,
    post=None,
    headers=[[
        'User-Agent',
        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; es-ES; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12'
    ]],
    follow_redirects=True,
    timeout=socket.getdefaulttimeout()):
    if (DEBUG == True): logger.info("[scrapertools.py] downloadpage")
    if (DEBUG == True): logger.info("[scrapertools.py] url=" + url)

    if post is not None:
        if (DEBUG == True): logger.info("[scrapertools.py] post=" + post)
    else:
        if (DEBUG == True): logger.info("[scrapertools.py] post=None")

    # ---------------------------------
    # Instala las cookies
    # ---------------------------------

    #  Inicializa la librería de las cookies
    ficherocookies = os.path.join(config.get_setting("cookies.dir"),
                                  'cookies.dat')
    if (DEBUG == True):
        logger.info("[scrapertools.py] ficherocookies=" + ficherocookies)

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        if (DEBUG == True):
            logger.info("[scrapertools.py] Importando cookielib")
        import cookielib
    except ImportError:
        if (DEBUG == True):
            logger.info("[scrapertools.py] cookielib no disponible")
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            if (DEBUG == True):
                logger.info("[scrapertools.py] Importando ClientCookie")
            import ClientCookie
        except ImportError:
            if (DEBUG == True):
                logger.info("[scrapertools.py] ClientCookie no disponible")
            # ClientCookie isn't available either
            urlopen = urllib2.urlopen
            Request = urllib2.Request
        else:
            if (DEBUG == True):
                logger.info("[scrapertools.py] ClientCookie disponible")
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.MozillaCookieJar()

    else:
        if (DEBUG == True):
            logger.info("[scrapertools.py] cookielib disponible")
        # importing cookielib worked
        urlopen = urllib2.urlopen
        Request = urllib2.Request
        cj = cookielib.MozillaCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules
        if (DEBUG == True): logger.info("[scrapertools.py] Hay cookies")

        if os.path.isfile(ficherocookies):
            if (DEBUG == True):
                logger.info("[scrapertools.py] Leyendo fichero cookies")
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            try:
                cj.load(ficherocookies)
            except:
                if (DEBUG == True):
                    logger.info(
                        "[scrapertools.py] El fichero de cookies existe pero es ilegible, se borra"
                    )
                os.remove(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            if (DEBUG == True):
                logger.info(
                    "[scrapertools.py] opener usando urllib2 (cookielib)")
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib2
            if not follow_redirects:
                opener = urllib2.build_opener(
                    urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL),
                    urllib2.HTTPCookieProcessor(cj), NoRedirectHandler())
            else:
                opener = urllib2.build_opener(
                    urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL),
                    urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            if (DEBUG == True):
                logger.info("[scrapertools.py] opener usando ClientCookie")
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    # -------------------------------------------------
    # Cookies instaladas, lanza la petición
    # -------------------------------------------------

    # Contador
    inicio = time.clock()

    # Diccionario para las cabeceras
    txheaders = {}

    # Construye el request
    if post is None:
        if (DEBUG == True): logger.info("[scrapertools.py] petición GET")
    else:
        if (DEBUG == True): logger.info("[scrapertools.py] petición POST")

    # Añade las cabeceras
    if (DEBUG == True):
        logger.info("[scrapertools.py] ---------------------------")
    for header in headers:
        if (DEBUG == True):
            logger.info("[scrapertools.py] header %s=%s" %
                        (str(header[0]), str(header[1])))
        txheaders[header[0]] = header[1]
    if (DEBUG == True):
        logger.info("[scrapertools.py] ---------------------------")

    req = Request(url, post, txheaders)
    if timeout is None:
        handle = urlopen(req)
    else:
        #Disponible en python 2.6 en adelante --> handle = urlopen(req, timeout=timeout)
        #Para todas las versiones:
        deftimeout = socket.getdefaulttimeout()
        try:
            socket.setdefaulttimeout(timeout)
            handle = urlopen(req)
        except:
            import sys
            for line in sys.exc_info():
                logger.error("%s" % line)

        socket.setdefaulttimeout(deftimeout)

    # Actualiza el almacén de cookies
    cj.save(ficherocookies)

    # Lee los datos y cierra
    data = handle.read()
    info = handle.info()
    if (DEBUG == True): logger.info("[scrapertools.py] Respuesta")
    if (DEBUG == True):
        logger.info("[scrapertools.py] ---------------------------")
    for header in info:
        if (DEBUG == True):
            logger.info("[scrapertools.py] " + header + "=" + info[header])
    handle.close()
    if (DEBUG == True):
        logger.info("[scrapertools.py] ---------------------------")
    '''
    # Lanza la petición
    try:
        response = urllib2.urlopen(req)
    # Si falla la repite sustituyendo caracteres especiales
    except:
        req = urllib2.Request(url.replace(" ","%20"))
    
        # Añade las cabeceras
        for header in headers:
            req.add_header(header[0],header[1])

        response = urllib2.urlopen(req)
    '''

    # Tiempo transcurrido
    fin = time.clock()
    if (DEBUG == True):
        logger.info("[scrapertools.py] Descargado en %d segundos " %
                    (fin - inicio + 1))

    return data