def parse(self, response): i = 0 for div in response.xpath('//li[@class="conference vevent"]'): item = AfeventItem() item['location'] = div.xpath('.//p[@class="location"]/a[3]/text()').extract_first() item['title'] = div.xpath('//h4/a/text()').extract()[i] item['date'] = div.xpath('//p[@class="date"]/abbr[1]/@title').extract()[i] item['host'] = '' item['time'] = '' item['description'] = '' follow_url_1 = div.xpath('//h4/a/@href').extract()[i] follow_url = 'http://lanyrd.com' + follow_url_1 request = Request(follow_url, callback = self.parse_url) url = div.xpath('//h4/a/@href').extract()[i] url = 'http://lanyrd.com' + url request = Request(url, callback = self.parse_url) request.meta['item'] = item if i < len(response.xpath('//li[@class="conference vevent"]')): i = i + 1 yield request
def parser(self, response): i = 0 for div in response.xpath('//*[@id="CalendarContainer"]/div/div/a'): item = AfeventItem() print "response.xpath" item['title'] = div.xpath( '//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article/h1/text()' ).extract()[i] item['venue'] = div.xpath( '//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article/p[2]/text()' ).extract()[i] item['date'] = div.xpath( '//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article/span/text()' ).extract()[i] item['time'] = div.xpath( '//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article/p[2]/span[2]/span/text()|//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article/p[3]/span/text()[3]' ).extract()[i] item['url'] = div.xpath( '//*[@id="mainContent"]/section/div/div/nav/ul/li[4]/a/@href' ).extract()[i] follow_url_1 = div.xpath( '//*[@id="mainContent"]/section/div/div/nav/ul/li[4]/a/@href' ).extract()[i] follow_url = 'http://www.afconsult.com' + follow_url_1 request = Request(follow_url, callback=self.parse_url) request.meta['item'] = item if i < len( response.xpath('//*[@id="CalendarContainer"]/div/div/a')): i = i + 1 print i yield request
def parse(self, response): i = 0 for div in response.xpath('//div[@class="exhibit clearfix"]'): item = AfeventItem() #Store data into lists item['title'] = div.xpath( '//a/div/div[@class="header"]/text()').extract()[i] url = div.xpath('.//a[@href]/@href').extract()[0] url = 'http://allamassor.se/' + url item['url'] = url #Remove unwanted characters location = div.xpath('//span[@class="ort"]/text()').extract()[i] for char in " | ": location = location.replace(char, "") item['location'] = location #The following code changes the format of the date origDate = div.xpath('//span[@class="date"]/text()').extract()[i] #split up the text in the date newDate = origDate.split() #Handles if date is between two dates, e.g. "10 - 11 maj 2016" if len(newDate) > 3: rightDate = [] rightDate.extend((newDate[0], newDate[3], newDate[4])) newDate = rightDate #Assign values to month names month = [ "", "januari", "februari", "mars", "april", "maj", "juni", "juli", "augusti", "september", "oktober", "november", "december" ].index(newDate[1]) #Assign a "0" in the beginning if month number is < 10 if month < 10: zeroMonth = [0, month] zeroMonth = ''.join(map(str, zeroMonth)) else: zeroMonth = month #same thing as above with day if int(newDate[0]) < 10: zeroDate = [0, newDate[0]] zeroDate = ''.join(map(str, zeroDate)) else: zeroDate = newDate[0] #Puts everything together and stores into item['date'] finalDate = [newDate[2], zeroMonth, zeroDate] item['date'] = '-'.join(finalDate) request = Request(url, callback=self.parse_second) request.meta['item'] = item if i < len(response.xpath('//div[@class="exhibit clearfix"]')): i = i + 1 yield request
def parse(self, response): i = 0 print i for div in response.xpath('//*[@id="content"]/div/div'): print "IN FOR" item = AfeventItem() #Store data into lists item['title'] = div.xpath('//h2/a/text()').extract()[i] item['url'] = div.xpath('//h2/a/@href').extract()[i] item['location'] = '' item['description'] = div.xpath( '//*[@id="content"]/div/div[1]/a[1]/p/text()').extract()[i] #The following code changes the format of the date origDate = div.xpath('//p/text()').extract()[i] newDate = ''.join(origDate).replace(',', '').split() #Assign values to month names month = [ "", "januari", "februari", "mars", "april", "maj", "juni", "juli", "augusti", "september", "oktober", "november", "december" ].index(newDate[1]) #Assign a "0" in the beginning if month number is < 10 if month < 10: zeroMonth = [0, month] zeroMonth = ''.join(map(str, zeroMonth)) else: zeroMonth = month #same thing as above with day if int(newDate[0]) < 10: zeroDate = [0, newDate[0]] zeroDate = ''.join(map(str, zeroDate)) else: zeroDate = newDate[0] #Puts everything together and stores into item['date'] finalDate = [newDate[2], zeroMonth, zeroDate] item['date'] = '-'.join(finalDate) print i if i < len(response.xpath('//*[@id="content"]/div/div')): print "I IF" print len(response.xpath('//*[@id="content"]/div/div')) i = i + 1 yield item
def parse(self, response): i = 0 for div in response.xpath('//div[@class="article-content"]'): item = AfeventItem() item['title'] = div.xpath('//h1[@class="h2 entry-title"]/text()').extract()[i] item['location'] = div.xpath('//*[@id]/div[3]/h2/text()').extract()[i] item['venue'] = div.xpath('//*[@id]/div[3]/table/tr[1]/td[2]/text()').extract()[i] origDate = div.xpath('//*[@id]/div[3]/p/text()').extract()[i] #split up the text in the date newDate = origDate.split() #handles if date is between two dates, e.g. "10 - 11 maj 2016" if len(newDate) > 3: rightDate = [] rightDate.extend((newDate[0], newDate[3], newDate[4])) newDate = rightDate #Assign values to month names month = ["", "januari", "februari", "mars", "april", "maj", "juni", "juli", "augusti", "september", "oktober", "november", "december"].index(newDate[1]) #Assign a "0" in the beginning if month number is < 10 if month < 10: zeroMonth = [0, month] zeroMonth = ''.join(map(str, zeroMonth)) else: zeroMonth = month #same thing as above with day if int(newDate[0]) < 10: zeroDate = [0, newDate[0]] zeroDate = ''.join(map(str, zeroDate)) else: zeroDate = newDate[0] #Puts everything together and stores into item['date'] finalDate = [newDate[2], zeroMonth, zeroDate] item['date'] = '-'.join(finalDate) item['url'] = div.xpath('//*[@id="main"]/div/div/a/@href').extract()[i] follow_url_1 = div.xpath('//*[@id="main"]/div/div/a/@href').extract()[i] follow_url = 'http://swedsoft.se/event-ovrigt/' + follow_url_1 request = Request(follow_url, callback = self.parse_url) request.meta['item'] = item if i < len(response.xpath('//div[@class="article-content"]')): i = i + 1 yield request
def parse(self, response): divs = response.xpath('//tbody/tr') for div in divs: item = AfeventItem() item['location'] = div.xpath('./td[4]/text()').extract_first().strip() item['title'] = div.xpath('./td/div[@class = "event-title"]/span[@class = "link"]/text()').extract()[0] item['description'] = div.xpath('./td[3]/div[2]/div//text()').extract()[0] date_time = div.xpath('./td[1]/span/text()').extract()[0].split(' ') item['date'] = date_time[0] item['time'] = date_time[1] item['host'] = div.xpath('./td[5]/a/text()').extract()[0] url = div.xpath('./td[3]/div[2]/a/@href').extract()[0] url = 'https://natverk.dfs.se' + url item['url'] = url request = Request(url, callback = self.parse_url) request.meta['item'] = item yield request
def parser(self, response): divs = response.xpath('//body') item = AfeventItem() location = [ "Malmö", "Göteborg", "Stockholm", "Linköping", "Uppsala", "Helsingborg", "Enköping", "Jönköping", "Solna" ] title_list = ''.join( divs.xpath( '//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article/h1/text()' ).extract()) date_list = ''.join( divs.xpath( '//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article/span/text()' ).extract()) time_list = ''.join( divs.xpath( '//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article/p[2]/span[2]/span/text()|//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article/p[3]/span/text()[3]' ).extract()) url_list = ''.join( divs.xpath( '//*[@id="mainContent"]/section/div/div/nav/ul/li[4]/a/@href'). extract()) url_list = 'http://www.afconsult.com' + url_list description_list = ''.join( divs.xpath( '//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article//text()' ).extract()) item['title'] = title_list item['url'] = url_list item['date'] = date_list item['time'] = time_list item['description'] = description_list for i in range(0, len(location)): if location[i] in item['description']: item['location'] = location[i] yield item
def parse(self, response): i = 0 for div in response.xpath('//article'): item = AfeventItem() #Store data into lists item['title'] = div.xpath( '//div/div/div[1]/div[1]/a/*[self::h1 or self::h2]/text()' ).extract()[i] url = div.xpath('.//div/div/div[1]/div[1]/a/@href').extract()[0] url = 'http://iva.se' + url item['url'] = url item['description'] = div.xpath( '//div/div/div[1]/div[2]/figure/div/p/text()').extract()[i] #Remove unwanted characters location = div.xpath( '//div/div/div[2]/section/ul/li[@class="schedule-where icon-where schedule__row"]/text()' ).extract()[i] item['location'] = location.strip() #The following code changes the format of the date origDate = div.xpath( '//div/div/div[2]/section/ul/li[@class="schedule-when icon-when schedule__row"]/text()' ).extract()[i] #split up the text in the date newDate = origDate.split() #Handles if date is between two dates, e.g. "10 - 11 maj 2016" if len(newDate) > 3: rightDate = [] rightDate.extend((newDate[2], newDate[3], newDate[4])) newDate = rightDate #Assign values to month names month = [ "", "januari", "februari", "mars", "april", "maj", "juni", "juli", "augusti", "september", "oktober", "november", "december" ].index(newDate[1]) #Assign a "0" in the beginning if month number is < 10 if month < 10: zeroMonth = [0, month] zeroMonth = ''.join(map(str, zeroMonth)) else: zeroMonth = month #same thing as above with day if int(newDate[0]) < 10: zeroDate = [0, newDate[0]] zeroDate = ''.join(map(str, zeroDate)) else: zeroDate = newDate[0] #Puts everything together and stores into item['date'] finalDate = [newDate[2], zeroMonth, zeroDate] item['date'] = '-'.join(finalDate) request = Request(url, callback=self.parse_second) request.meta['item'] = item if i < len(response.xpath('//article[@class="item"]')): i = i + 1 yield request yield item