Beispiel #1
0
 def parse_parsley(self, response):
   exit()
   parslet = PyParsley(T1, output='python') 
   res = UturnItem(T1, parslet.parse(string=response.body))
   pprint.pprint(res)
   print "parsley end"
   return res
Beispiel #2
0
 def parse_parsley(self, response):
     exit()
     parslet = PyParsley(T1, output='python')
     res = UturnItem(T1, parslet.parse(string=response.body))
     pprint.pprint(res)
     print "parsley end"
     return res
Beispiel #3
0
    def __init__(self, parseletfile=None):
        if parseletfile:
            with open(parseletfile) as jsonfp:

                self.parselet = PyParsley(jsonfp)
        else:
            #print "using T1"
            self.parselet = PyParsley(FAP_PARSELET)
def f(dictionary, ten_range):
    dictionary[1] = '1'
    ten_range.reverse()

    structure = {"title": "/div/div/div"}
    filepath = '/home/ubuntu/Programs/drupal/scrapy-parsley_wrappers/scrapy_parsley/tests/yelp/yelp.html'
    parselet = PyParsley(structure)
    dictionary['sub'] = parselet.parse(file=filepath, output='json')
    dictionary['2'] = 2
    dictionary[0.25] = None
Beispiel #5
0
def f(dictionary, ten_range):
    dictionary[1] = '1'
    ten_range.reverse()

    structure = {"title": "/div/div/div"}
    filepath = '/home/ubuntu/Programs/drupal/scrapy-parsley_wrappers/scrapy_parsley/tests/yelp/yelp.html'
    parselet = PyParsley(structure)
    dictionary['sub'] = parselet.parse(file=filepath, output='json')
    dictionary['2'] = 2
    dictionary[0.25] = None
Beispiel #6
0
	def setUp(self):
		self.parsley = PyParsley({'title': 'title'})
		self.alt_parsley = PyParsley('{"title": "title"}')
		self.a_parsley = PyParsley({'links': ['regexp:match(a @href, ".*sign.*")']})
		self.__file__ = currentframe().f_code.co_filename
		self.__dir__ = dirname(self.__file__)
		self.file = self.__dir__ + '/yelp.html'
		self.json = '{ "title": "\\t\\tNick\'s Crispy Tacos - Russian Hill - San Francisco, CA\\n" }'
		self.native = { "title": "\t\tNick's Crispy Tacos - Russian Hill - San Francisco, CA\n" }
		self.links = '{ "links": [ "\\/signup?return_url=%2Fuser_details", "\\/signup?return_url=%2Fwriteareview", "\\/signup?return_url=%2Finvite_friends", "\\/signup?return_url=%2Fmail", "\\/signup?return_url=%2Fprofile", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup" ] }'
		self.unicode_string = u'\u2019blah blah blah\u2019'
		self.unicode_document = u'<html><title>\u2019blah blah blah\u2019</title></html>'
Beispiel #7
0
 def parse(self, response):
     
     if(self.parselet):
       parselet4 = PyParsley(FAP_PARSELET)
       
       extract = parselet4.parse(file = "http://www.imagefap.com/gallery.php", output = "python")
       #extract = parselet4.parse(string=config.gallery_php, output = "python")
       
       #l = GalleryItemLoader(item=GalleryItem, response=response)
       for g in extract["galls"]:
           #self.log("###%s" % g["title"])
           res = GalleryItem(g)
           #res["title"] = g["title"]
           yield res
Beispiel #8
0
    def parse(self, response):

        if (self.parselet):
            parselet4 = PyParsley(FAP_PARSELET)

            extract = parselet4.parse(
                file="http://www.imagefap.com/gallery.php", output="python")
            #extract = parselet4.parse(string=config.gallery_php, output = "python")

            #l = GalleryItemLoader(item=GalleryItem, response=response)
            for g in extract["galls"]:
                #self.log("###%s" % g["title"])
                res = GalleryItem(g)
                #res["title"] = g["title"]
                yield res
Beispiel #9
0
class TestPyParsley(unittest.TestCase):
	
	def setUp(self):
		self.parsley = PyParsley({'title': 'title'})
		self.alt_parsley = PyParsley('{"title": "title"}')
		self.a_parsley = PyParsley({'links': ['regexp:match(a @href, ".*sign.*")']})
		self.__file__ = currentframe().f_code.co_filename
		self.__dir__ = dirname(self.__file__)
		self.file = self.__dir__ + '/yelp.html'
		self.json = '{ "title": "\\t\\tNick\'s Crispy Tacos - Russian Hill - San Francisco, CA\\n" }'
		self.native = { "title": "\t\tNick's Crispy Tacos - Russian Hill - San Francisco, CA\n" }
		self.links = '{ "links": [ "\\/signup?return_url=%2Fuser_details", "\\/signup?return_url=%2Fwriteareview", "\\/signup?return_url=%2Finvite_friends", "\\/signup?return_url=%2Fmail", "\\/signup?return_url=%2Fprofile", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup", "\\/signup" ] }'
		self.unicode_string = u'\u2019blah blah blah\u2019'
		self.unicode_document = u'<html><title>\u2019blah blah blah\u2019</title></html>'

	def test_unicode(self):	
		parsed = self.parsley.parse(string = self.unicode_document.encode("utf-8"), output = "python", utf8 = 1)
		self.assertEquals(parsed['title'].decode("utf-8"), self.unicode_string)
	
	def test_file_xml(self):	
		parsed = self.parsley.parse(file = self.file, output = "json")
		self.assertEquals(self.json, parsed)
		
	def test_pruning(self):
		parsed = self.a_parsley.parse(file = self.file, output = "json")
		self.assertEquals(self.links, parsed)
			
	def test_json_file_xml(self):	
		parsed = self.alt_parsley.parse(file = self.file, output = "json")
		self.assertEquals(self.json, parsed)
		
	def test_native(self):
		parsed = self.alt_parsley.parse(file = self.file, output = "python")
		self.assertEquals(self.native, parsed)		
		parsed = self.alt_parsley.parse(file = self.file)
		self.assertEquals(self.native, parsed)
Beispiel #10
0
 def parse_parsley(self, response):
     parslet = PyParsley(self.parslet_code, output='python')
     return ParsleyItem(self.parslet_code, parslet.parse(string=response.body))
Beispiel #11
0
 def parse_parsley(self, response):
     parslet = PyParsley(self.parslet_code, output='python')
     return ParsleyItem(self.parslet_code,
                        parslet.parse(string=response.body))