def test_with_code_which_result_is_sequence(t):
     results = sxq.execute_all(
         "data(/user/name)",
         "<user><name>Taro</name><name>Jiro</name></user>")
     expects = ["Taro", "Jiro"]
     for index, expected in enumerate(expects):
         t.assertEqual(results[index], expected)
         pass
     pass
Example #2
0
 def test_with_code_which_result_is_sequence(t):
     results = sxq.execute_all(
         "data(/user/name)",
         "<user><name>Taro</name><name>Jiro</name></user>")
     expects = ["Taro", "Jiro"]
     for index, expected in enumerate(expects):
         t.assertEqual(results[index], expected)
         pass
     pass
Example #3
0
    def find_news(self, method='xquery', search_text='', not_in=False):
        import sys
        reload(sys)
        sys.setdefaultencoding('utf8')

        news = []
        if method == '':
            method = 'xquery'

        for source in self.rss_urls.keys():
            r = requests.get(self.rss_urls[source])
            rss = r.content

            if method == 'xquery':
                query_s = """for $i in //item
						where contains(lower-case($i/title), lower-case('""" + search_text + """'))
						return <new>{$i/title, $i/pubDate, $i/link }</new>
						"""
                if not_in:
                    query_s = """for $i in //item
						where not(contains(lower-case($i/title), lower-case('""" + search_text + """')))
						return <new>{$i/title, $i/pubDate, $i/link }</new>
						"""

                news_list = sxq.execute_all(query_s, rss)
                for n_i in news_list:
                    i = etree.fromstring(n_i)
                    title = i.xpath("./title")[0]
                    pubDate = i.xpath("./pubDate")[0]
                    link = i.xpath("./link")[0]
                    n = New(title=title.text,
                            link=link.text,
                            pubdate=pubDate.text)
                    news.append(n)

            elif method == 'regexp':
                h = HTMLParser.HTMLParser()
                rss = h.unescape(rss)
                pattern = ur'<item>(.*?)</item>'
                regex = re.compile(pattern,
                                   re.DOTALL + re.UNICODE + re.IGNORECASE)
                for match in regex.finditer(rss):
                    item = match.group(1)

                    pattern = ur'<title>((.*?)' + search_text + '(.*?))</title>'
                    if not_in:
                        pattern = ur'<title>((.(?<!' + search_text + '))*?)</title>'

                    regex = re.compile(pattern,
                                       re.DOTALL + re.UNICODE + re.IGNORECASE)
                    matching = regex.search(item)
                    if matching:
                        title = matching.group(1)
                        n = New(title=title)
                        news.append(n)
        return news
Example #4
0
def filtro_xquery(request):
    if request.method != 'GET' or 'q'not in request.GET:
        return HttpResponseBadRequest()
    keyword = request.GET['q'].encode('ascii', 'xmlcharrefreplace').lower()
    query = build_query(keyword)
    items = []
    for xml in get_feeds_xml():
        results = simplexquery.execute_all(query, resolver=Resolver(xml))
        if results:
            items.extend(results)
    return HttpResponse(items, mimetype='text/html')
Example #5
0
	def find_news(self, method='xquery', search_text='', not_in= False):
		import sys  
		reload(sys)  
		sys.setdefaultencoding('utf8')		


		news=[]
		if method=='':
			method = 'xquery'

		for source in self.rss_urls.keys():
			r = requests.get(self.rss_urls[source])
			rss = r.content


			if method == 'xquery':
				query_s="""for $i in //item
						where contains(lower-case($i/title), lower-case('"""+ search_text+ """'))
						return <new>{$i/title, $i/pubDate, $i/link }</new>
						"""
				if not_in:
					query_s="""for $i in //item
						where not(contains(lower-case($i/title), lower-case('"""+ search_text+ """')))
						return <new>{$i/title, $i/pubDate, $i/link }</new>
						"""
				
				news_list= sxq.execute_all(query_s, rss);
				for n_i in news_list:
					i = etree.fromstring(n_i)
					title = i.xpath("./title")[0]
					pubDate = i.xpath("./pubDate")[0]
					link = i.xpath("./link")[0]
					n= New(title= title.text, link= link.text, pubdate=pubDate.text)
					news.append(n)
				
			elif method == 'regexp':
				h = HTMLParser.HTMLParser()
				rss= h.unescape(rss)
				pattern = ur'<item>(.*?)</item>'				
				regex = re.compile(pattern, re.DOTALL + re.UNICODE + re.IGNORECASE)
				for match in regex.finditer(rss):													
					item= match.group(1)	
					
					pattern = ur'<title>((.*?)'+ search_text+'(.*?))</title>'				
					if not_in:
						pattern = ur'<title>((.(?<!' + search_text+ '))*?)</title>'				
					
					regex = re.compile(pattern, re.DOTALL + re.UNICODE + re.IGNORECASE)
					matching= regex.search(item)
					if matching:
						title= matching.group(1)						
						n= New(title= title)
						news.append(n)
		return news
Example #6
0
 def test_with_resolver_returns_non_unicode_type(t):
     t.assertRaises(
         TypeError, lambda: sxq.execute_all('doc("foo.xml")/name',
                                            resolver=lambda uri: 0))
     pass
Example #7
0
 def test_with_resolver_cannot_accept_just_one_arg(t):
     t.assertRaises(
         TypeError, lambda: sxq.execute_all('doc("foo.xml")/name',
                                            resolver=lambda: "abc"))
     pass
Example #8
0
 def test_with_non_callable_resolver_type(t):
     t.assertRaises(TypeError, lambda: sxq.execute_all("/user", "<_/>", 0))
     pass
Example #9
0
 def test_with_non_unicode_context_type(t):
     t.assertRaises(TypeError, lambda: sxq.execute_all("/user", 0))
     pass
Example #10
0
 def test_with_non_unicode_xquery_type(t):
     t.assertRaises(TypeError, lambda: sxq.execute_all(0))
     pass
Example #11
0
 def test_with_non_xml_resolver_result(t):
     t.assertRaises(
         ValueError,
         lambda: sxq.execute_all('doc("foo.xml")/name',
                                 resolver=lambda uri: "non xml"))
     pass
 def test_with_non_unicode_xquery_type(t):
     t.assertRaises(
         TypeError,
         lambda : sxq.execute_all(0))
     pass
 def test_with_resolver_cannot_accept_just_one_arg(t):
     t.assertRaises(
         TypeError,
         lambda : sxq.execute_all('doc("foo.xml")/name',
                                  resolver=lambda : "abc"))
     pass
###xquery='xquery version "1.0"; for $noticia in doc("output1.xml")/feed/entry return data($noticia/title)'
# Ejecución de consulta xquery.
###x=sxq.execute_all(xquery)
# TEST. Verificación que python entiende correctamente el uso de caracteres acentuados desde el código fuente.
# print([y.index('ó') for y in x])
# TEST. Verificación del tipo de dato devuelto en la consulta xquery
# print(type(x))
# print(x[0])
# Visualización de los contenidos de la lista de resultados devueltos por la consulta xquery
# Instrucción anterior que no interpretaba correctamente caracteres Unicode
# print([y.encode('utf-8') for y in x])
###for y in x : print(y)

# MUESTRA TODOS LOS TÍTULOS
xquery = 'xquery version "1.0"; for $noticia in doc("output1.xml")/feed/entry return data($noticia/title)'
x = sxq.execute_all(xquery)
print('BÚSQUEDA DE TODOS LOS TÍTULOS DE NOTICIAS\n')
for y in x:
    print(y)

print('\n\nBÚSQUEDA POR ELEMENTO Y CONTENIDO\n')
tipoBusqueda = raw_input(
    'Ingrese el Tipo de búsqueda: [T]-Título [D]-Descripción [C]-Categorías:\n'
)
palabra = raw_input('Escriba palabra a buscar:\n')
if tipoBusqueda == 'T':
    textoBusqueda = 'TITULO'
elif tipoBusqueda == 'D':
    textoBusqueda = 'DESCRIPCIÓN'
elif tipoBusqueda == 'C':
    textoBusqueda = 'CATEGORÍA'
 def test_with_non_callable_resolver_type(t):
     t.assertRaises(
         TypeError,
         lambda : sxq.execute_all("/user", "<_/>", 0))
     pass
 def test_with_non_xml_context(t):
     t.assertRaises(
         ValueError,
         lambda : sxq.execute_all("/user", "non xml"))
     pass
 def test_with_non_unicode_context_type(t):
     t.assertRaises(
         TypeError,
         lambda : sxq.execute_all("/user", 0))
     pass
 def test_with_syntax_error_xquery(t):
     t.assertRaises(
         ValueError,
         lambda : sxq.execute_all("/user'"))
     pass
Example #19
0
 def test_with_non_xml_context(t):
     t.assertRaises(ValueError, lambda: sxq.execute_all("/user", "non xml"))
     pass
 def test_with_resolver_returns_non_unicode_type(t):
     t.assertRaises(
         TypeError,
         lambda : sxq.execute_all('doc("foo.xml")/name',
                                  resolver=lambda uri: 0))
     pass
 def test_with_non_xml_resolver_result(t):
     t.assertRaises(
         ValueError,
         lambda : sxq.execute_all('doc("foo.xml")/name',
                                  resolver=lambda uri : "non xml"))
     pass
Example #22
0
class Resolver(object):
    def __call__(self, uri):
        print(uri)
        return "<name>Jiro</name>"
    pass
print(sxq.execute('doc("foo.xml")/name', resolver=Resolver()))


prompt("['execute' returns None if fails by anyway]")
print(sxq.execute("<user>")) 
print(sxq.execute("<user>{string(/name)}</user>", "<name>Taro<name>")) 


prompt("['execute_all' returns a list of multiple results]")
print(repr(sxq.execute_all("/user/name",
                           "<user><name>Taro</name><name>Jiro</name></user>")))


prompt("['execute_all' raise ValueError if xquery execution is failed]")
try: sxq.execute_all("/user'")
except ValueError: traceback.print_exc()

try: sxq.execute_all('doc("foo.xml")/name', resolver=lambda uri : "non xml")
except ValueError: traceback.print_exc()


prompt("['execute_all' raise TypeError if arg has invalid type]")
try: sxq.execute_all(0)
except TypeError: traceback.print_exc()

try: sxq.execute_all("", "", 0)
Example #23
0
 def test_with_syntax_error_xquery(t):
     t.assertRaises(ValueError, lambda: sxq.execute_all("/user'"))
     pass