Esempio n. 1
0
 def iriToUri(self, iri):
     import urlparse
     parts = urlparse.urlparse(iri.decode('utf-8'))
     return urlparse.urlunparse(
         part.encode('idna') if parti ==
         1 else self.urlEncodeNonAscii(part.encode('utf-8'))
         for parti, part in enumerate(parts))
Esempio n. 2
0
def add_params(url, params):
    import urllib
    import urlparse

    url_parts = list(urlparse.urlparse(url))
    query = dict(urlparse.parse_qsl(url_parts[4]))
    query.update(params)

    url_parts[4] = urllib.urlencode(query)

    return urlparse.urlunparse(url_parts)
Esempio n. 3
0
    def clean_url(self, url=None):
        import cgi
        import urlparse
        import urllib

        if not url:
            url = self.url

        u = urlparse.urlparse(url)
        qs = cgi.parse_qs(u[4])
        qs = dict((k, v) for k, v in qs.iteritems() if not k.startswith('utm_'))
        u = u._replace(query=urllib.urlencode(qs, True))
        url = urlparse.urlunparse(u)
        url = url.replace('#!', '?_escaped_fragment_=')
        self.logger.info("cleaned url : %s" % url)
        return url
def download_file(datafile, scraped_title, book, page, maxpage):
    with open(datafile) as f:
        logging.info("Parsing %s, creating download url" % datafile)
        lines = f.readlines()

    dw_options = parse_qs(lines[0])
    title = dw_options["title"][0]
    if title != scraped_title:
        logging.info("Found real title: %s" % (title,))
    logging.info("Parsed data for book '%s'" % (title,))

    url = dw_options["assemble_url"][0]

    params = {}
    for param in ["user_id","product_id","codec", "awtype","cust_id"]:
        if dw_options[param][0] == "LC_64_22050_stereo":
            params[param] = "LC_64_22050_ster"
        else:
            params[param] = dw_options[param][0]

    url_parts = list(urlparse.urlparse(url))
    query = dict(urlparse.parse_qsl(url_parts[4]))
    query.update(params)

    url_parts[4] = urlencode(query)

    url = urlparse.urlunparse(url_parts)
    logging.info("Book URL: %s" % url)

    logging.info("Downloading file data")
    request_head = HeadRequest(url)
    request_head.add_header('User-Agent', 'Audible ADM 6.6.0.19;Windows Vista Service Pack 1 Build 7601')

    tries = 0
    head_ok = False
    while head_ok == False:
        try:
            head = urllib2.urlopen(request_head)
            head_ok = True
        except urllib2.HTTPError as e_head:
            if tries < 5: 
                tries = tries + 1
                time.sleep(60)
            else:
                raise e_head
        except socket.error as se:
            if tries < 5: 
                tries = tries + 1
                time.sleep(60)
            else:
                raise e_head

    val, par = cgi.parse_header(head.info().dict['content-disposition']) 
    filename = par['filename'].split("_")[0]
    filename = filename + "." +  par['filename'].split(".")[-1]
    size = head.info().dict['content-length']

    logging.info("Filename: %s" % filename)
    logging.info("Size: %s" % size)

    path = "%s%s" % (options.dw_dir, filename)

    logging.info("Book %s of 20 on page %s of %s" % (book, page, maxpage))

    if os.path.isfile(path):
        logging.info("File %s exist, checking size", path)
        if int(size) == os.path.getsize(path):
            logging.info("File %s has correct size, not downloading" % (path,))
            time.sleep(60) # sleep a minute to not be throttled
            return False
        else:
            logging.warning("File %s had unexpected size, downloading" % (path,))
    else:
        logging.info("File %s does not exist, downloading" % (path,))

    if True:
        opener = LyingFancyURLopener() 
        local_filename, headers = opener.retrieve(url, path, reporthook=print_progress)
        #local_filename, headers = urlretrieve(url, path, reporthook=print_progress)
        
        #import pdb; pdb.set_trace()
        
        #filename = ""
        #try:
        #    val, par = cgi.parse_header(headers.dict['content-disposition']) 
        #    filename = par['filename'].split("_")[0]
        #    filename = filename + "." +  par['filename'].split(".")[-1]
        #except KeyError: 
        #    import pdb; pdb.set_trace()
        
        #logging.info("Filename: %s" % filename)
        #logging.info("Size: %s" % size)
        
        #path = "%s%s" % (options.dw_dir, filename)
        #os.rename(local_filename,path)
        logging.info("Completed download of '%s' to %s" % (title, path))
    else:
        logging.info("Completed download of '%s' to %s (not really)" % (title, path))
    return True
Esempio n. 5
0
def test_getservbyport():
    import urlparse
    for port in [ 80, 443, 21, 70, 25, 143, 993, 110, 995 ]:
        print urlparse.urlunparse(
            (socket.getservbyport(port), 'example.com', '/', '', '', '')
        )