def get_performers(self, response): performers = response.xpath('//div[@class="updateItem model"]') for performerrow in performers: item = PerformerItem() image = performerrow.xpath('./div/a/img/@src0_1x').get() if image: item['image'] = "https://www.philavise.com" + image.strip() else: image = None item['image_blob'] = None performer = performerrow.xpath('./p/a/text()').get() if performer: item['name'] = performer.strip() else: performer = False url = performerrow.xpath('./div/a/@href').get() if url: item['url'] = url.strip() else: url = False item['network'] = "Philavise" item['bio'] = '' item['gender'] = '' item['astrology'] = '' item['birthday'] = '' item['birthplace'] = '' item['ethnicity'] = '' item['nationality'] = '' item['haircolor'] = '' item['eyecolor'] = '' item['weight'] = '' item['height'] = '' item['measurements'] = '' item['tattoos'] = '' item['piercings'] = '' item['cupsize'] = '' item['fakeboobs'] = '' if performer and image and url: yield item item.clear()
def parse_performerpage(self, response): global json itemlist = [] jsondata = json.loads(response.text) data = jsondata['data'] for jsonentry in data: item = PerformerItem() item['name'] = jsonentry['name'].title().strip() item['network'] = 'Teen Core Club' item[ 'url'] = "https://www.teencoreclub.com/browsevideos/actor/" + str( jsonentry['id']) + "/" + jsonentry['name'].replace( " ", "%20").strip() item['image'] = "https://www.teencoreclub.com" + jsonentry[ 'image'].replace("\\", "").strip() item['image_blob'] = None item['bio'] = jsonentry['bio'] if not item['bio']: item['bio'] = '' item['gender'] = "Female" item['birthday'] = '' item['astrology'] = '' item['birthplace'] = '' item['ethnicity'] = '' item['nationality'] = '' item['haircolor'] = '' item['weight'] = '' item['height'] = '' item['measurements'] = '' item['tattoos'] = '' item['piercings'] = '' item['cupsize'] = '' item['fakeboobs'] = '' item['eyecolor'] = '' itemlist.append(item.copy()) item.clear() return itemlist
def get_performers(self, response): item_list = [] jsondata = response.json() jsondata = jsondata['snippets'] jsondata = jsondata['snippet-modelsGrid-modelItemsAppend'].lower() jsonsel = Selector(text=jsondata) performers = jsonsel.xpath( '//div[contains(@class,"color_12-shadow-sm-hover")]') count = 0 for performer in performers: count = count + 1 item = PerformerItem() item['bio'] = '' item['gender'] = '' item['birthday'] = '' item['astrology'] = '' item['birthplace'] = '' item['ethnicity'] = '' item['nationality'] = '' item['haircolor'] = '' item['measurements'] = '' item['tattoos'] = '' item['piercings'] = '' item['fakeboobs'] = '' item['eyecolor'] = '' item['cupsize'] = '' item['height'] = '' item['weight'] = '' item['network'] = "PornCZ" name = performer.xpath('./div/h3/a/text()').get() if name: item['name'] = name.strip().title() url = performer.xpath('./a/@href').get() if url: item['url'] = "https://www.porncz.com/" + url.strip() image = performer.xpath('./a/img/@data-src').get() if image: item['image'] = "https://www.porncz.com" + image.strip() item['image_blob'] = None descline = performer.xpath('./a/div/p/text()').get() if descline: descline = descline.replace("-", "").strip() if re.search('size:(.*)weight', descline): cupsize = re.search('size:(.*)weight', descline).group(1) if cupsize: item['cupsize'] = cupsize.strip().title() if re.search(r'(\d+\ kg)', descline): weight = re.search(r'(\d+\ kg)', descline).group(1) if weight: item['weight'] = weight.strip().title() if re.search(r'(\d+\ cm)', descline): height = re.search(r'(\d+\ cm)', descline).group(1) if height: item['height'] = height.strip().title() item_list.append(item.copy()) item.clear() return item_list
def get_performers(self, response): global json responseresult = response.xpath( '//script[contains(text(),"window.__DATA__")]/text()').get() responsedata = re.search(r'__DATA__\ =\ (.*)', responseresult).group(1) jsondata = json.loads(responsedata) data = jsondata['data']['models']['items'] for jsonentry in data: item = PerformerItem() item['gender'] = "Female" item['name'] = jsonentry['name'] item['image'] = jsonentry['thumb'] urltext = re.sub(r'[^A-Za-z0-9 ]+', '', jsonentry['name']).lower() urltext = urltext.replace(" ", " ") urltext = urltext.replace(" ", "-") urltext = "https://tour.topwebmodels.com/models/" + str( jsonentry['id']) + "/" + urltext item['url'] = urltext item['network'] = 'TopWebModels' if 'birthdate' in jsonentry['attributes']: item['birthday'] = jsonentry['attributes']['birthdate'][ 'value'] else: item['birthday'] = '' if 'born' in jsonentry['attributes']: item['birthplace'] = jsonentry['attributes']['born']['value'] else: item['birthplace'] = '' if 'ethnicity' in jsonentry['attributes']: item['ethnicity'] = jsonentry['attributes']['ethnicity'][ 'value'] else: item['ethnicity'] = '' if 'hair' in jsonentry['attributes']: item['haircolor'] = jsonentry['attributes']['hair']['value'] else: item['haircolor'] = '' if 'eyes' in jsonentry['attributes']: item['eyecolor'] = jsonentry['attributes']['eyes']['value'] else: item['eyecolor'] = '' if 'weight' in jsonentry['attributes']: item['weight'] = jsonentry['attributes']['weight']['value'] else: item['weight'] = '' if item['weight']: item['weight'] = str(item['weight']) + "lbs" if 'height' in jsonentry['attributes']: item['height'] = jsonentry['attributes']['height']['value'] else: item['height'] = '' if 'measurements' in jsonentry['attributes']: item['measurements'] = jsonentry['attributes']['measurements'][ 'value'] else: item['measurements'] = '' if item['measurements'] and re.match('(.*-.*-.*)', item['measurements']): cupsize = re.search(r'(?:\s+)?(.*)-.*-', item['measurements']).group(1) if cupsize: item['cupsize'] = cupsize else: item['cupsize'] = '' else: item['cupsize'] = '' # Couldn't find examples on site item['bio'] = '' item['astrology'] = '' item['nationality'] = '' item['piercings'] = '' item['fakeboobs'] = '' item['tattoos'] = '' item['image_blob'] = None if self.debug: print(item) else: yield item item.clear()
def get_performers(self, response): global json jsondata = json.loads(response.text) data = jsondata['results'] for jsonentry in data: item = PerformerItem() item['name'] = jsonentry['name'] item['image'] = jsonentry['thumb']['image'] item['image_blob'] = None item['url'] = "https://femjoy.com" + jsonentry['url'] if jsonentry['astrology']: item['astrology'] = jsonentry['astrology'].title() else: item['astrology'] = '' item['bio'] = '' item['birthday'] = jsonentry['birth_date'] if jsonentry['birth_place']['name']['name']: item['birthplace'] = jsonentry['birth_place']['name'][ 'name'].title() else: item['birthplace'] = '' if jsonentry['cup_size']: item['cupsize'] = jsonentry['cup_size'].upper() else: item['cupsize'] = '' if jsonentry['ethnicity']: item['ethnicity'] = jsonentry['ethnicity'].title() else: item['ethnicity'] = '' if jsonentry['eye_color']: item['eyecolor'] = jsonentry['eye_color'].title() else: item['eyecolor'] = '' if jsonentry['hair_color']: item['haircolor'] = jsonentry['hair_color'].title() else: item['haircolor'] = '' item['fakeboobs'] = '' item['gender'] = 'Female' item['height'] = jsonentry['height'] if jsonentry['chest'] and jsonentry['chest'] and jsonentry['waist']: item['measurements'] = jsonentry['chest'] + jsonentry[ 'cup_size'] + "-" + jsonentry['waist'] + "-" + jsonentry[ 'hip'] else: item['measurements'] = '' if jsonentry['nationality']: item['nationality'] = jsonentry['nationality'].title() else: item['nationality'] = '' item['piercings'] = '' item['tattoos'] = '' item['weight'] = jsonentry['weight'] item['network'] = 'FemJoy' if self.debug: print(item) else: yield item item.clear()