Example #1
0
 def create_from_query_string(qs):
     ans = MultiDict()
     if ispy3:
         qs = as_unicode(qs)
     for k, v in iteritems(parse_qs(qs, keep_blank_values=True)):
         dict.__setitem__(ans, as_unicode(k), [as_unicode(x) for x in v])
     return ans
Example #2
0
def get_series(title, authors, timeout=60):
    mi = Metadata(title, authors)
    if title and title[0] in _ignore_starts:
        title = title[1:]
    title = re.sub(r'^(A|The|An)\s+', '', title).strip()
    if not title:
        return mi
    if isinstance(title, unicode_type):
        title = title.encode('utf-8')

    title = quote_plus(title)

    author = authors[0].strip()
    if not author:
        return mi
    if ',' in author:
        author = author.split(',')[0]
    else:
        author = author.split()[-1]

    url = URL.format(author, title)
    br = browser()
    try:
        raw = br.open_novisit(url, timeout=timeout).read()
    except URLError as e:
        if isinstance(e.reason, socket.timeout):
            raise Exception('KDL Server busy, try again later')
        raise
    if 'see the full results' not in raw:
        return mi
    raw = xml_to_unicode(raw)[0]
    soup = BeautifulSoup(raw)
    searcharea = soup.find('div', attrs={'class': 'searcharea'})
    if searcharea is None:
        return mi
    ss = searcharea.find('div', attrs={'class': 'seriessearch'})
    if ss is None:
        return mi
    a = ss.find('a', href=True)
    if a is None:
        return mi
    href = a['href'].partition('?')[-1]
    data = parse_qs(href)
    series = data.get('SeriesName', [])
    if not series:
        return mi
    series = series[0]
    series = re.sub(r' series$', '', series).strip()
    if series:
        mi.series = series
    ns = ss.nextSibling
    if ns.contents:
        raw = unicode_type(ns.contents[0])
        raw = raw.partition('.')[0].strip()
        try:
            mi.series_index = int(raw)
        except Exception:
            pass
    return mi
Example #3
0
def get_series(title, authors, timeout=60):
    mi = Metadata(title, authors)
    if title and title[0] in _ignore_starts:
        title = title[1:]
    title = re.sub(r'^(A|The|An)\s+', '', title).strip()
    if not title:
        return mi
    if isinstance(title, unicode_type):
        title = title.encode('utf-8')

    title = quote_plus(title)

    author = authors[0].strip()
    if not author:
        return mi
    if ',' in author:
        author = author.split(',')[0]
    else:
        author = author.split()[-1]

    url = URL.format(author, title)
    br = browser()
    try:
        raw = br.open_novisit(url, timeout=timeout).read()
    except URLError as e:
        if isinstance(e.reason, socket.timeout):
            raise Exception('KDL Server busy, try again later')
        raise
    if 'see the full results' not in raw:
        return mi
    raw = xml_to_unicode(raw)[0]
    soup = BeautifulSoup(raw)
    searcharea = soup.find('div', attrs={'class':'searcharea'})
    if searcharea is None:
        return mi
    ss = searcharea.find('div', attrs={'class':'seriessearch'})
    if ss is None:
        return mi
    a = ss.find('a', href=True)
    if a is None:
        return mi
    href = a['href'].partition('?')[-1]
    data = parse_qs(href)
    series = data.get('SeriesName', [])
    if not series:
        return mi
    series = series[0]
    series = re.sub(r' series$', '', series).strip()
    if series:
        mi.series = series
    ns = ss.nextSibling
    if ns.contents:
        raw = unicode_type(ns.contents[0])
        raw = raw.partition('.')[0].strip()
        try:
            mi.series_index = int(raw)
        except:
            pass
    return mi
Example #4
0
 def id_from_url(self, url):  # {{{
     from polyglot.urllib import urlparse, parse_qs
     purl = urlparse(url)
     if purl.netloc == 'books.google.com':
         q = parse_qs(purl.query)
         gid = q.get('id')
         if gid:
             return 'google', gid[0]
Example #5
0
    def __init__(self, format):
        '''
        Create a query object by passing it the url format obtained
        from the opensearch Description.
        '''
        self.format = format

        # unpack the url to a tuple
        self.url_parts = urlparse(format)

        # unpack the query string to a dictionary
        self.query_string = parse_qs(self.url_parts[4])

        # look for standard macros and create a mapping of the
        # opensearch names to the service specific ones
        # so q={searchTerms} will result in a mapping between searchTerms and q
        self.macro_map = {}
        for key,values in self.query_string.items():
            # TODO eventually optional/required params should be
            # distinguished somehow (the ones with/without trailing ?
            macro = values[0].replace('{', '').replace('}', '').replace('?', '')
            if macro in Query.standard_macros:
                self.macro_map[macro] = key
Example #6
0
    def __init__(self, format):
        '''
        Create a query object by passing it the url format obtained
        from the opensearch Description.
        '''
        self.format = format

        # unpack the url to a tuple
        self.url_parts = urlparse(format)

        # unpack the query string to a dictionary
        self.query_string = parse_qs(self.url_parts[4])

        # look for standard macros and create a mapping of the
        # opensearch names to the service specific ones
        # so q={searchTerms} will result in a mapping between searchTerms and q
        self.macro_map = {}
        for key,values in self.query_string.items():
            # TODO eventually optional/required params should be
            # distinguished somehow (the ones with/without trailing ?
            macro = values[0].replace('{', '').replace('}', '').replace('?', '')
            if macro in Query.standard_macros:
                self.macro_map[macro] = key
Example #7
0
 def create_from_query_string(qs):
     ans = MultiDict()
     for k, v in iteritems(parse_qs(qs, keep_blank_values=True)):
         dict.__setitem__(ans, k.decode('utf-8'), [x.decode('utf-8') for x in v])
     return ans
Example #8
0
 def create_from_query_string(qs):
     ans = MultiDict()
     for k, v in iteritems(parse_qs(qs, keep_blank_values=True)):
         dict.__setitem__(ans, k.decode('utf-8'),
                          [x.decode('utf-8') for x in v])
     return ans