def parse(self, response): for record in response.xpath('//div[@id=\'main\']//h3[a]'): item = ConferenceItem() print record item['year'] = "2009" title = record.xpath('text()').extract() print title item['title'] = title[0].strip() authors = record.xpath( 'following-sibling::p/i/text()').extract()[0] print authors data = [] if ' and ' in authors: authors = authors.split(' and ') for a in authors: if ',' in a: data.extend(a.split(', ')) else: data.append(a) else: data.append(authors) item['authors'] = data yield item
def parse(self, response): for record in response.xpath('//p'): data = record.xpath('span/text()').extract() print data if len(data)>0: item = ConferenceItem() item['year'] = "2009" title = record.xpath('br/following::text()').extract() item['title'] = title[0].strip().lstrip() if record.xpath('span/following::span'): authors = record.xpath('span/text()').extract()[0] + record.xpath('span/following::span/text()').extract()[0] else: authors = record.xpath('span/text()').extract()[0].strip().lstrip() if "and" in authors: authors = authors.split(', ') item['authors'] = authors[0:-1] item['authors'].append(authors[-1].split('and')[0].strip()) item['authors'].append(authors[-1].split('and')[1].lstrip()) else: item['authors'] = [authors] yield item else: pass
def parse(self, response): for record in response.xpath('//p[position()>2]'): item = ConferenceItem() item['year'] = '2013' data = record.xpath('.//text()').extract() data = ''.join(data) print data if 'by' not in data: data = data.split(', ') else: data = data.split(', by ') title = data[0] print title item['title'] = title authors = data[1] print authors authors = authors.split(', ') au = [] for a in authors: if 'and' in a: au.extend(a.split(' and ')) else: au.append(a) item['authors'] = au yield item
def parse(self, response): for record in response.xpath('//ul//li'): item = ConferenceItem() print record item['year'] = "2015" title = record.xpath('b/text()').extract() print title title = title[0].lstrip().strip().replace('\r\n', ' ') item['title'] = title authors = record.xpath('text()').extract() print authors authors = authors[0].split(', ') data = [] for a in authors: a = re.sub('\(.*\)|\(.*\r\n.*\)', '', a) a = a.lstrip().strip() print a if 'and' in a: data.extend(a.split(' and ')) else: data.append(a) item['authors'] = data yield item
def parse(self, response): for record in response.xpath('//table[@class=\'tbl\']//tr'): item = ConferenceItem() item['year'] = "2012" title = record.xpath('td[2]/p/text()').extract() if len(title) > 0: item['title'] = re.sub('\r\n +', '', title[0].strip()) if not record.xpath('td[2]/p[em]'): authors = title[1].strip() else: authors = record.xpath('td[2]/p/em/text()').extract()[0].strip().lstrip() if "and" in authors: authors = authors.split(', ') item['authors'] = authors[0:-1] item['authors'].append(authors[-1].split('and')[0].strip()) item['authors'].append(authors[-1].split('and')[1].lstrip()) else: item['authors'] = [authors] yield item else: pass
def parse(self, response): for record in response.xpath('//pre'): data = record.xpath('text()').extract()[0].strip().lstrip() for paper in data.split('\r\n\r\n'): item = ConferenceItem() item['year'] = "2011" text = paper.split('\r\n') if len(text) == 2: item['title'] = text[0] authors = text[1] elif ',' in text[1]: item['title'] = text[0] authors = text[1] + ' ' + text[2] else: item['title'] = text[0] + ' ' + text[1] if len(text) == 3: authors = text[2] elif len(text) == 4: authors = text[2] + ' ' + text[3] else: authors = text[2] + ' ' + text[3] + ' ' + text[4] if "and" in authors: authors = authors.split(', ') item['authors'] = authors[0:-1] item['authors'].append(authors[-1].split('and')[0].strip()) item['authors'].append( authors[-1].split('and')[1].lstrip()) else: item['authors'] = [authors] yield item
def parse(self, response): for record in response.xpath('//div[@class=\'content\']//ul//li'): item = ConferenceItem() print record item['year'] = "2008" title = record.xpath('text()').extract() print title item['title'] = title[0].strip() authors = record.xpath('i/text()').extract()[0] print authors data = [] if ' and ' in authors: authors = authors.split(' and ') for a in authors: if ',' in a: data.extend(a.split(', ')) else: data.append(a) else: data.append(authors) item['authors'] = data yield item
def parse(self, response): for record in response.xpath('//div//p'): item = ConferenceItem() print record item['year'] = "2010" title = record.xpath('span[@class=\'title\']/a/text()').extract() print title if title: item['title'] = title[0].strip() else: item['title'] = "Coordination for Uncertain Outcomes using Distributed Neighbor Exchange" print record.xpath('span[@class=\'authors\']//text()').extract() authors = [] for a in record.xpath('span[@class=\'authors\']//text()').extract(): if 'papers' not in a: data = a.split(',') for i in data: if i.lstrip().rstrip(): authors.append(i.lstrip().strip()) item['authors'] = authors yield item
def parse(self, response): data = response.xpath('/html/head/title/text()').extract() year = data[0].split(' ')[1] print year for record in response.xpath('//body/div[2]//ul/li'): item = ConferenceItem() item['year'] = year item['title'] = record.xpath('a[1]/text()').extract() item['authors'] = record.xpath('a[position()>1]/text()').extract() yield item
def parse(self, response): for record in response.xpath('//div[@id=\'DLcontent\']/h3'): item = ConferenceItem() item['year'] = '2015' item['title'] = record.xpath('a/text()').extract()[0] authors = [] for author in record.xpath('following::ul[1]//li'): authors.append(author.xpath('text()').extract()[0]) item['authors'] = authors yield item
def parse(self, response): for record in response.xpath('//ol//li'): item = ConferenceItem() item['year'] = "2010" title = record.xpath('b/text()').extract() print title title = title[0].lstrip().strip() item['title'] = title authors = record.xpath('text()').extract() print authors item['authors'] = authors yield item
def parse(self, response): for record in response.xpath('//tbody'): data = record.xpath('.//text()').extract()[0] print data data = data.split(' \n\n') for d in data: item = ConferenceItem() item['year'] = '2012' title = d.split('\n')[0] item['title'] = title authors = d.split('\n')[1] item['authors'] = authors.split(' and ') yield item
def parse(self, response): for record in response.xpath('//h3/following::li'): item = ConferenceItem() print record item['year'] = "2014" data = record.xpath('text()').extract() print data item['title'] = data[0].strip().lstrip() authors = record.xpath('i/text()').extract()[0] print authors authors = authors.split(', ') item['authors'] = authors[0:-1] yield item
def parse(self, response): print response.xpath('//h1/text()').extract()[0] for record in response.xpath('//p'): print record item = ConferenceItem() item['year'] = '2012' print record.xpath('text()') item['title'] = record.xpath('following::div[1]/text()').extract()[0].strip() authors = record.xpath('following::div[2]/text()').extract() authors = ''.join(authors).split('\n')[1].split(', ') item['authors'] = authors yield item
def parse(self, response): for record in response.xpath( '//div[@class=\"span12\" and position()=3]//p[position()>1]'): print record item = ConferenceItem() item['year'] = "2006" title = record.xpath('strong/a/text()').extract() print title item['title'] = title[0].strip().lstrip() authors = [] for path in record.xpath('strong/following-sibling::a'): authors.append(path.xpath('text()').extract()[0]) print authors item['authors'] = authors yield item
def parse(self, response): for record in response.xpath('//tr//div[@class=\'cp_pp\']'): item = ConferenceItem() item['year'] = '2007' title = record.xpath('a/text()').extract() print title item['title'] = title[0] authors = record.xpath('a/@title').extract() print authors authors = authors[0].split(', ') item['authors'] = authors yield item
def parse(self, response): for record in response.xpath('//dt'): item = ConferenceItem() item['year'] = "2007" title = record.xpath('following::dd/text()').extract() item['title'] = title[0].strip().lstrip() authors = record.xpath('text()').extract()[0].strip().lstrip() if "and" in authors: authors = authors.split(', ') item['authors'] = authors[0:-1] item['authors'].append(authors[-1].split('and')[0].strip()) item['authors'].append(authors[-1].split('and')[1].lstrip()) else: item['authors'] = [authors] yield item
def parse(self, response): for record in response.xpath('//a[h3]'): item = ConferenceItem() print record item['year'] = "2011" title = record.xpath('h3/text()').extract() print title item['title'] = title[0].strip() authors = record.xpath('span[@class=\'name\']/text()').extract()[0] print authors authors = authors.split(', ') item['authors'] = authors yield item
def parse(self, response): for record in response.xpath('//tr'): item = ConferenceItem() print record item['year'] = "2006" title = record.xpath('td[strong]/strong/text()').extract() print title item['title'] = title[0] authors = record.xpath('td[strong]/text()').extract()[0] print authors authors = authors.lstrip().strip().split(', ') item['authors'] = authors yield item
def parse(self, response): for record in response.xpath('//tr/td[a]'): item = ConferenceItem() print record item['year'] = "2008" title = record.xpath('a/text()').extract() print title item['title'] = title[0] authors = record.xpath('i/text()').extract()[0] print authors authors = authors.split(', ') item['authors'] = authors yield item
def parse(self, response): for record in response.xpath('//body/div/ul//ul/li'): item = ConferenceItem() item['year'] = "2015" data = record.xpath('text()').extract() item['title'] = data[0].strip().lstrip() authors = data[1].strip().lstrip() if "and" in authors: authors = authors.split(', ') item['authors'] = authors[0:-1] item['authors'].append(authors[-1].split('and')[0].strip()) item['authors'].append(authors[-1].split('and')[1].lstrip()) else: item['authors'] = [authors] yield item
def parse(self, response): for record in response.xpath('//div[@class=\'page\']//a'): item = ConferenceItem() item['year'] = "2013" data = record.xpath('text()').extract() item['title'] = data[0].strip().lstrip() authors = record.xpath('following::table[1]//em/text()').extract()[0].strip().lstrip() if "and" in authors: authors = authors.split(', ') item['authors'] = authors[0:-1] item['authors'].append(authors[-1].split('and')[0].strip()) item['authors'].append(authors[-1].split('and')[1].lstrip()) else: item['authors'] = [authors] yield item
def parse(self, response): for record in response.xpath('//div[@class=\'main\']//ul/li'): item = ConferenceItem() item['year'] = "2014" title = record.xpath('text()').extract() item['title'] = title[0].strip().lstrip() authors = record.xpath('i/text()').extract()[0].strip().lstrip() if "and" in authors: authors = authors.split(', ') item['authors'] = authors[0:-1] item['authors'].append(authors[-1].split('and')[0].strip()) item['authors'].append(authors[-1].split('and')[1].lstrip()) else: item['authors'] = [authors] yield item
def parse(self, response): for record in response.xpath('//p//a[following-sibling::em]'): item = ConferenceItem() print record item['year'] = "2009" title = record.xpath('text()').extract() print title item['title'] = title[0] authors = record.xpath('following-sibling::em/text()').extract()[0] print authors authors = authors.split(', ') item['authors'] = authors yield item
def parse(self, response): for record in response.xpath('//p[@class=\'left\']'): item = ConferenceItem() print record item['year'] = "2015" data = record.xpath('a/text()').extract() print data item['title'] = data[0].strip().lstrip() authors = record.xpath('i/text()').extract()[0] print authors authors = authors.split(', ') item['authors'] = authors[0:-1] item['authors'].append(authors[-1]) yield item
def parse(self, response): for record in response.xpath('//section//strong'): item = ConferenceItem() item['year'] = '2015' title = record.xpath('text()').extract() item['title'] = title[0] authors = record.xpath('following-sibling::text()[1]').extract() print authors authors = authors[0].split('; ') au = [] for a in authors: au.append(a.split(',')[0]) item['authors'] = au yield item
def parse(self, response): for record in response.xpath('//dt'): item = ConferenceItem() print record item['year'] = "2014" title = record.xpath('text()').extract() print title item['title'] = title[0].strip() authors = record.xpath( 'following-sibling::dd[1]/text()').extract()[0] print authors authors = authors.strip().split(', ') item['authors'] = authors yield item
def parse(self, response): for record in response.xpath('//tr[not(@class)]'): item = ConferenceItem() item['year'] = "2008" title = record.xpath('td[2]/text()').extract() item['title'] = re.sub('\n ', '', title[0]) authors = re.sub('\n ', '', record.xpath('td[1]/text()').extract()[0]) if "and" in authors: authors = authors.split(', ') item['authors'] = authors[0:-1] item['authors'].append(authors[-1].split('and')[0].strip()) item['authors'].append(authors[-1].split('and')[1].lstrip()) else: item['authors'] = [authors] yield item
def parse(self, response): i = 3 for x in response.xpath('//table[@class=\'text12\']//tr'): record = response.xpath('//table[@class=\'text12\']//tr[position()=%d]'%i) if record.xpath('td[2]/span/a[not(@title)]'): data = record.xpath('td[2]/span/a[not(@title)]/text()').extract() item = ConferenceItem() item['year'] = '2006' item['title'] = data[0] print data authors = record.xpath('following-sibling::tr[1]//a/text()').extract() print authors item['authors'] = authors yield item i += 2 else: i += 1
def parse(self, response): for record in response.xpath('//h4/following::p[@class=\'left\']'): item = ConferenceItem() print record year = response.xpath( '//div[@class=\'content\']//p[position()=3]/text()').extract( )[0] item['year'] = year.split(', ')[1] data = record.xpath('a/text()').extract() print data item['title'] = data[0].strip().lstrip() authors = record.xpath('i/text()').extract()[0] print authors authors = authors.split(', ') item['authors'] = authors[0:-1] item['authors'].append(authors[-1]) yield item