def test_with_code_which_result_is_sequence(t): results = sxq.execute_all( "data(/user/name)", "<user><name>Taro</name><name>Jiro</name></user>") expects = ["Taro", "Jiro"] for index, expected in enumerate(expects): t.assertEqual(results[index], expected) pass pass
def find_news(self, method='xquery', search_text='', not_in=False): import sys reload(sys) sys.setdefaultencoding('utf8') news = [] if method == '': method = 'xquery' for source in self.rss_urls.keys(): r = requests.get(self.rss_urls[source]) rss = r.content if method == 'xquery': query_s = """for $i in //item where contains(lower-case($i/title), lower-case('""" + search_text + """')) return <new>{$i/title, $i/pubDate, $i/link }</new> """ if not_in: query_s = """for $i in //item where not(contains(lower-case($i/title), lower-case('""" + search_text + """'))) return <new>{$i/title, $i/pubDate, $i/link }</new> """ news_list = sxq.execute_all(query_s, rss) for n_i in news_list: i = etree.fromstring(n_i) title = i.xpath("./title")[0] pubDate = i.xpath("./pubDate")[0] link = i.xpath("./link")[0] n = New(title=title.text, link=link.text, pubdate=pubDate.text) news.append(n) elif method == 'regexp': h = HTMLParser.HTMLParser() rss = h.unescape(rss) pattern = ur'<item>(.*?)</item>' regex = re.compile(pattern, re.DOTALL + re.UNICODE + re.IGNORECASE) for match in regex.finditer(rss): item = match.group(1) pattern = ur'<title>((.*?)' + search_text + '(.*?))</title>' if not_in: pattern = ur'<title>((.(?<!' + search_text + '))*?)</title>' regex = re.compile(pattern, re.DOTALL + re.UNICODE + re.IGNORECASE) matching = regex.search(item) if matching: title = matching.group(1) n = New(title=title) news.append(n) return news
def filtro_xquery(request): if request.method != 'GET' or 'q'not in request.GET: return HttpResponseBadRequest() keyword = request.GET['q'].encode('ascii', 'xmlcharrefreplace').lower() query = build_query(keyword) items = [] for xml in get_feeds_xml(): results = simplexquery.execute_all(query, resolver=Resolver(xml)) if results: items.extend(results) return HttpResponse(items, mimetype='text/html')
def find_news(self, method='xquery', search_text='', not_in= False): import sys reload(sys) sys.setdefaultencoding('utf8') news=[] if method=='': method = 'xquery' for source in self.rss_urls.keys(): r = requests.get(self.rss_urls[source]) rss = r.content if method == 'xquery': query_s="""for $i in //item where contains(lower-case($i/title), lower-case('"""+ search_text+ """')) return <new>{$i/title, $i/pubDate, $i/link }</new> """ if not_in: query_s="""for $i in //item where not(contains(lower-case($i/title), lower-case('"""+ search_text+ """'))) return <new>{$i/title, $i/pubDate, $i/link }</new> """ news_list= sxq.execute_all(query_s, rss); for n_i in news_list: i = etree.fromstring(n_i) title = i.xpath("./title")[0] pubDate = i.xpath("./pubDate")[0] link = i.xpath("./link")[0] n= New(title= title.text, link= link.text, pubdate=pubDate.text) news.append(n) elif method == 'regexp': h = HTMLParser.HTMLParser() rss= h.unescape(rss) pattern = ur'<item>(.*?)</item>' regex = re.compile(pattern, re.DOTALL + re.UNICODE + re.IGNORECASE) for match in regex.finditer(rss): item= match.group(1) pattern = ur'<title>((.*?)'+ search_text+'(.*?))</title>' if not_in: pattern = ur'<title>((.(?<!' + search_text+ '))*?)</title>' regex = re.compile(pattern, re.DOTALL + re.UNICODE + re.IGNORECASE) matching= regex.search(item) if matching: title= matching.group(1) n= New(title= title) news.append(n) return news
def test_with_resolver_returns_non_unicode_type(t): t.assertRaises( TypeError, lambda: sxq.execute_all('doc("foo.xml")/name', resolver=lambda uri: 0)) pass
def test_with_resolver_cannot_accept_just_one_arg(t): t.assertRaises( TypeError, lambda: sxq.execute_all('doc("foo.xml")/name', resolver=lambda: "abc")) pass
def test_with_non_callable_resolver_type(t): t.assertRaises(TypeError, lambda: sxq.execute_all("/user", "<_/>", 0)) pass
def test_with_non_unicode_context_type(t): t.assertRaises(TypeError, lambda: sxq.execute_all("/user", 0)) pass
def test_with_non_unicode_xquery_type(t): t.assertRaises(TypeError, lambda: sxq.execute_all(0)) pass
def test_with_non_xml_resolver_result(t): t.assertRaises( ValueError, lambda: sxq.execute_all('doc("foo.xml")/name', resolver=lambda uri: "non xml")) pass
def test_with_non_unicode_xquery_type(t): t.assertRaises( TypeError, lambda : sxq.execute_all(0)) pass
def test_with_resolver_cannot_accept_just_one_arg(t): t.assertRaises( TypeError, lambda : sxq.execute_all('doc("foo.xml")/name', resolver=lambda : "abc")) pass
###xquery='xquery version "1.0"; for $noticia in doc("output1.xml")/feed/entry return data($noticia/title)' # Ejecución de consulta xquery. ###x=sxq.execute_all(xquery) # TEST. Verificación que python entiende correctamente el uso de caracteres acentuados desde el código fuente. # print([y.index('ó') for y in x]) # TEST. Verificación del tipo de dato devuelto en la consulta xquery # print(type(x)) # print(x[0]) # Visualización de los contenidos de la lista de resultados devueltos por la consulta xquery # Instrucción anterior que no interpretaba correctamente caracteres Unicode # print([y.encode('utf-8') for y in x]) ###for y in x : print(y) # MUESTRA TODOS LOS TÍTULOS xquery = 'xquery version "1.0"; for $noticia in doc("output1.xml")/feed/entry return data($noticia/title)' x = sxq.execute_all(xquery) print('BÚSQUEDA DE TODOS LOS TÍTULOS DE NOTICIAS\n') for y in x: print(y) print('\n\nBÚSQUEDA POR ELEMENTO Y CONTENIDO\n') tipoBusqueda = raw_input( 'Ingrese el Tipo de búsqueda: [T]-Título [D]-Descripción [C]-Categorías:\n' ) palabra = raw_input('Escriba palabra a buscar:\n') if tipoBusqueda == 'T': textoBusqueda = 'TITULO' elif tipoBusqueda == 'D': textoBusqueda = 'DESCRIPCIÓN' elif tipoBusqueda == 'C': textoBusqueda = 'CATEGORÍA'
def test_with_non_callable_resolver_type(t): t.assertRaises( TypeError, lambda : sxq.execute_all("/user", "<_/>", 0)) pass
def test_with_non_xml_context(t): t.assertRaises( ValueError, lambda : sxq.execute_all("/user", "non xml")) pass
def test_with_non_unicode_context_type(t): t.assertRaises( TypeError, lambda : sxq.execute_all("/user", 0)) pass
def test_with_syntax_error_xquery(t): t.assertRaises( ValueError, lambda : sxq.execute_all("/user'")) pass
def test_with_non_xml_context(t): t.assertRaises(ValueError, lambda: sxq.execute_all("/user", "non xml")) pass
def test_with_resolver_returns_non_unicode_type(t): t.assertRaises( TypeError, lambda : sxq.execute_all('doc("foo.xml")/name', resolver=lambda uri: 0)) pass
def test_with_non_xml_resolver_result(t): t.assertRaises( ValueError, lambda : sxq.execute_all('doc("foo.xml")/name', resolver=lambda uri : "non xml")) pass
class Resolver(object): def __call__(self, uri): print(uri) return "<name>Jiro</name>" pass print(sxq.execute('doc("foo.xml")/name', resolver=Resolver())) prompt("['execute' returns None if fails by anyway]") print(sxq.execute("<user>")) print(sxq.execute("<user>{string(/name)}</user>", "<name>Taro<name>")) prompt("['execute_all' returns a list of multiple results]") print(repr(sxq.execute_all("/user/name", "<user><name>Taro</name><name>Jiro</name></user>"))) prompt("['execute_all' raise ValueError if xquery execution is failed]") try: sxq.execute_all("/user'") except ValueError: traceback.print_exc() try: sxq.execute_all('doc("foo.xml")/name', resolver=lambda uri : "non xml") except ValueError: traceback.print_exc() prompt("['execute_all' raise TypeError if arg has invalid type]") try: sxq.execute_all(0) except TypeError: traceback.print_exc() try: sxq.execute_all("", "", 0)
def test_with_syntax_error_xquery(t): t.assertRaises(ValueError, lambda: sxq.execute_all("/user'")) pass