コード例 #1
0
    def parse(self, response):
        offenderList = []
        
        for links in response.xpath("//tr[@align='left']"):
            name = links.xpath('td[3]/a[1]/text()').extract()
            ID = links.xpath('td[3]/a[1]/@href').re("javascript: OpenDetail\('(.*?)'")

            offender={}

            nameString = ''.join(name)

            # Checks if valid path, valid path will have name
            if len(nameString)>1:
                offender['FullName'] = Helpers.fixName(nameString) 
                offender['FirstName'] = Helpers.firstName(offender['FullName'])
                offender['LastName'] = Helpers.lastName(offender['FullName'])
            
                IDString = ''.join(ID)
                offender['offenderID'] = IDString

                offender['link'] = "http://www.meganslaw.ca.gov/cgi/prosoma.dll?w6=719389&searchby=offender&id=" + IDString
                offenderList.append(offender)

        for offender in offenderList:
            # Checks if valid path, valid path will have name
            if len(nameString)>1:
                    yield scrapy.Request(offender['link'], meta={'person': offender}, callback=self.parse_offender)