def create_from_query_string(qs): ans = MultiDict() if ispy3: qs = as_unicode(qs) for k, v in iteritems(parse_qs(qs, keep_blank_values=True)): dict.__setitem__(ans, as_unicode(k), [as_unicode(x) for x in v]) return ans
def get_series(title, authors, timeout=60): mi = Metadata(title, authors) if title and title[0] in _ignore_starts: title = title[1:] title = re.sub(r'^(A|The|An)\s+', '', title).strip() if not title: return mi if isinstance(title, unicode_type): title = title.encode('utf-8') title = quote_plus(title) author = authors[0].strip() if not author: return mi if ',' in author: author = author.split(',')[0] else: author = author.split()[-1] url = URL.format(author, title) br = browser() try: raw = br.open_novisit(url, timeout=timeout).read() except URLError as e: if isinstance(e.reason, socket.timeout): raise Exception('KDL Server busy, try again later') raise if 'see the full results' not in raw: return mi raw = xml_to_unicode(raw)[0] soup = BeautifulSoup(raw) searcharea = soup.find('div', attrs={'class': 'searcharea'}) if searcharea is None: return mi ss = searcharea.find('div', attrs={'class': 'seriessearch'}) if ss is None: return mi a = ss.find('a', href=True) if a is None: return mi href = a['href'].partition('?')[-1] data = parse_qs(href) series = data.get('SeriesName', []) if not series: return mi series = series[0] series = re.sub(r' series$', '', series).strip() if series: mi.series = series ns = ss.nextSibling if ns.contents: raw = unicode_type(ns.contents[0]) raw = raw.partition('.')[0].strip() try: mi.series_index = int(raw) except Exception: pass return mi
def get_series(title, authors, timeout=60): mi = Metadata(title, authors) if title and title[0] in _ignore_starts: title = title[1:] title = re.sub(r'^(A|The|An)\s+', '', title).strip() if not title: return mi if isinstance(title, unicode_type): title = title.encode('utf-8') title = quote_plus(title) author = authors[0].strip() if not author: return mi if ',' in author: author = author.split(',')[0] else: author = author.split()[-1] url = URL.format(author, title) br = browser() try: raw = br.open_novisit(url, timeout=timeout).read() except URLError as e: if isinstance(e.reason, socket.timeout): raise Exception('KDL Server busy, try again later') raise if 'see the full results' not in raw: return mi raw = xml_to_unicode(raw)[0] soup = BeautifulSoup(raw) searcharea = soup.find('div', attrs={'class':'searcharea'}) if searcharea is None: return mi ss = searcharea.find('div', attrs={'class':'seriessearch'}) if ss is None: return mi a = ss.find('a', href=True) if a is None: return mi href = a['href'].partition('?')[-1] data = parse_qs(href) series = data.get('SeriesName', []) if not series: return mi series = series[0] series = re.sub(r' series$', '', series).strip() if series: mi.series = series ns = ss.nextSibling if ns.contents: raw = unicode_type(ns.contents[0]) raw = raw.partition('.')[0].strip() try: mi.series_index = int(raw) except: pass return mi
def id_from_url(self, url): # {{{ from polyglot.urllib import urlparse, parse_qs purl = urlparse(url) if purl.netloc == 'books.google.com': q = parse_qs(purl.query) gid = q.get('id') if gid: return 'google', gid[0]
def __init__(self, format): ''' Create a query object by passing it the url format obtained from the opensearch Description. ''' self.format = format # unpack the url to a tuple self.url_parts = urlparse(format) # unpack the query string to a dictionary self.query_string = parse_qs(self.url_parts[4]) # look for standard macros and create a mapping of the # opensearch names to the service specific ones # so q={searchTerms} will result in a mapping between searchTerms and q self.macro_map = {} for key,values in self.query_string.items(): # TODO eventually optional/required params should be # distinguished somehow (the ones with/without trailing ? macro = values[0].replace('{', '').replace('}', '').replace('?', '') if macro in Query.standard_macros: self.macro_map[macro] = key
def create_from_query_string(qs): ans = MultiDict() for k, v in iteritems(parse_qs(qs, keep_blank_values=True)): dict.__setitem__(ans, k.decode('utf-8'), [x.decode('utf-8') for x in v]) return ans