Example #1
0
    def parse(self, response):

        for record in response.xpath('//div[@id=\'main\']//h3[a]'):

            item = ConferenceItem()
            print record
            item['year'] = "2009"
            title = record.xpath('text()').extract()
            print title
            item['title'] = title[0].strip()

            authors = record.xpath(
                'following-sibling::p/i/text()').extract()[0]
            print authors
            data = []
            if ' and ' in authors:
                authors = authors.split(' and ')

                for a in authors:
                    if ',' in a:
                        data.extend(a.split(', '))
                    else:
                        data.append(a)
            else:
                data.append(authors)

            item['authors'] = data

            yield item
Example #2
0
    def parse(self, response):

        for record in response.xpath('//p'):

            data = record.xpath('span/text()').extract()
            print data

            if len(data)>0:
                item = ConferenceItem()
                item['year'] = "2009"
                title = record.xpath('br/following::text()').extract()
                item['title'] = title[0].strip().lstrip()
                if record.xpath('span/following::span'):
                    authors = record.xpath('span/text()').extract()[0] + record.xpath('span/following::span/text()').extract()[0]
                else:
                    authors = record.xpath('span/text()').extract()[0].strip().lstrip()
                if "and" in authors:
                    authors = authors.split(', ')
                    item['authors'] = authors[0:-1]
                    item['authors'].append(authors[-1].split('and')[0].strip())
                    item['authors'].append(authors[-1].split('and')[1].lstrip())
                else:
                    item['authors'] = [authors]
                yield item
            else:
                pass
Example #3
0
    def parse(self, response):

        for record in response.xpath('//p[position()>2]'):

            item = ConferenceItem()
            item['year'] = '2013'
            data = record.xpath('.//text()').extract()
            data = ''.join(data)
            print data
            if 'by' not in data:
                data = data.split(', ')
            else:
                data = data.split(', by ')
            title = data[0]
            print title
            item['title'] = title
            authors = data[1]
            print authors
            authors = authors.split(', ')
            au = []
            for a in authors:
                if 'and' in a:
                    au.extend(a.split(' and '))
                else:
                    au.append(a)
            item['authors'] = au
            yield item
Example #4
0
    def parse(self, response):

        for record in response.xpath('//ul//li'):

            item = ConferenceItem()
            print record
            item['year'] = "2015"
            title = record.xpath('b/text()').extract()
            print title
            title = title[0].lstrip().strip().replace('\r\n', ' ')
            item['title'] = title

            authors = record.xpath('text()').extract()
            print authors

            authors = authors[0].split(', ')
            data = []
            for a in authors:
                a = re.sub('\(.*\)|\(.*\r\n.*\)', '', a)
                a = a.lstrip().strip()
                print a
                if 'and' in a:
                    data.extend(a.split(' and '))
                else:
                    data.append(a)
            item['authors'] = data

            yield item
Example #5
0
    def parse(self, response):

        for record in response.xpath('//table[@class=\'tbl\']//tr'):

            item = ConferenceItem()
            item['year'] = "2012"
            title = record.xpath('td[2]/p/text()').extract()

            if len(title) > 0:

                item['title'] = re.sub('\r\n +', '', title[0].strip())
                if not record.xpath('td[2]/p[em]'):
                    authors = title[1].strip()
                else:
                    authors = record.xpath('td[2]/p/em/text()').extract()[0].strip().lstrip()
                if "and" in authors:
                    authors = authors.split(', ')
                    item['authors'] = authors[0:-1]
                    item['authors'].append(authors[-1].split('and')[0].strip())
                    item['authors'].append(authors[-1].split('and')[1].lstrip())
                else:
                    item['authors'] = [authors]
                yield item

            else:
                pass
Example #6
0
    def parse(self, response):

        for record in response.xpath('//pre'):
            data = record.xpath('text()').extract()[0].strip().lstrip()

            for paper in data.split('\r\n\r\n'):

                item = ConferenceItem()
                item['year'] = "2011"
                text = paper.split('\r\n')
                if len(text) == 2:
                    item['title'] = text[0]
                    authors = text[1]
                elif ',' in text[1]:
                    item['title'] = text[0]
                    authors = text[1] + ' ' + text[2]
                else:
                    item['title'] = text[0] + ' ' + text[1]
                    if len(text) == 3:
                        authors = text[2]
                    elif len(text) == 4:
                        authors = text[2] + ' ' + text[3]
                    else:
                        authors = text[2] + ' ' + text[3] + ' ' + text[4]

                if "and" in authors:
                    authors = authors.split(', ')
                    item['authors'] = authors[0:-1]
                    item['authors'].append(authors[-1].split('and')[0].strip())
                    item['authors'].append(
                        authors[-1].split('and')[1].lstrip())
                else:
                    item['authors'] = [authors]
                yield item
Example #7
0
    def parse(self, response):

        for record in response.xpath('//div[@class=\'content\']//ul//li'):

            item = ConferenceItem()
            print record
            item['year'] = "2008"
            title = record.xpath('text()').extract()
            print title
            item['title'] = title[0].strip()

            authors = record.xpath('i/text()').extract()[0]
            print authors
            data = []
            if ' and ' in authors:
                authors = authors.split(' and ')

                for a in authors:
                    if ',' in a:
                        data.extend(a.split(', '))
                    else:
                        data.append(a)
            else:
                data.append(authors)

            item['authors'] = data

            yield item
Example #8
0
    def parse(self, response):

        for record in response.xpath('//div//p'):

            item = ConferenceItem()
            print record
            item['year'] = "2010"
            title = record.xpath('span[@class=\'title\']/a/text()').extract()
            print title
            if title:
                item['title'] = title[0].strip()
            else:
                item['title'] = "Coordination for Uncertain Outcomes using Distributed Neighbor Exchange"

            print record.xpath('span[@class=\'authors\']//text()').extract()
            authors = []
            for a in record.xpath('span[@class=\'authors\']//text()').extract():
                if 'papers' not in a:
                    data = a.split(',')
                    for i in data:
                        if i.lstrip().rstrip():
                            authors.append(i.lstrip().strip())
            item['authors'] = authors

            yield item
Example #9
0
    def parse(self, response):
        data = response.xpath('/html/head/title/text()').extract()
        year = data[0].split(' ')[1]
        print year

        for record in response.xpath('//body/div[2]//ul/li'):

            item = ConferenceItem()
            item['year'] = year
            item['title'] = record.xpath('a[1]/text()').extract()
            item['authors'] = record.xpath('a[position()>1]/text()').extract()
            yield item
Example #10
0
    def parse(self, response):

        for record in response.xpath('//div[@id=\'DLcontent\']/h3'):

            item = ConferenceItem()
            item['year'] = '2015'
            item['title'] = record.xpath('a/text()').extract()[0]

            authors = []
            for author in record.xpath('following::ul[1]//li'):
                authors.append(author.xpath('text()').extract()[0])

            item['authors'] = authors

            yield item
Example #11
0
    def parse(self, response):

        for record in response.xpath('//ol//li'):

            item = ConferenceItem()
            item['year'] = "2010"
            title = record.xpath('b/text()').extract()
            print title
            title = title[0].lstrip().strip()
            item['title'] = title

            authors = record.xpath('text()').extract()
            print authors
            item['authors'] = authors
            yield item
Example #12
0
    def parse(self, response):

        for record in response.xpath('//tbody'):

            data = record.xpath('.//text()').extract()[0]
            print data
            data = data.split(' \n\n')
            for d in data:
                item = ConferenceItem()
                item['year'] = '2012'
                title = d.split('\n')[0]
                item['title'] = title
                authors = d.split('\n')[1]
                item['authors'] = authors.split(' and ')
                yield item
    def parse(self, response):

        for record in response.xpath('//h3/following::li'):

            item = ConferenceItem()
            print record
            item['year'] = "2014"
            data = record.xpath('text()').extract()
            print data
            item['title'] = data[0].strip().lstrip()
            authors = record.xpath('i/text()').extract()[0]
            print authors
            authors = authors.split(', ')
            item['authors'] = authors[0:-1]

            yield item
Example #14
0
    def parse(self, response):

        print response.xpath('//h1/text()').extract()[0]

        for record in response.xpath('//p'):
            print record
            item = ConferenceItem()
            item['year'] = '2012'
            print record.xpath('text()')
            item['title'] = record.xpath('following::div[1]/text()').extract()[0].strip()

            authors = record.xpath('following::div[2]/text()').extract()
            authors = ''.join(authors).split('\n')[1].split(', ')
            item['authors'] = authors

            yield item
Example #15
0
    def parse(self, response):

        for record in response.xpath(
                '//div[@class=\"span12\" and position()=3]//p[position()>1]'):
            print record
            item = ConferenceItem()
            item['year'] = "2006"
            title = record.xpath('strong/a/text()').extract()
            print title
            item['title'] = title[0].strip().lstrip()
            authors = []
            for path in record.xpath('strong/following-sibling::a'):
                authors.append(path.xpath('text()').extract()[0])
            print authors
            item['authors'] = authors
            yield item
Example #16
0
    def parse(self, response):

        for record in response.xpath('//tr//div[@class=\'cp_pp\']'):

            item = ConferenceItem()

            item['year'] = '2007'
            title = record.xpath('a/text()').extract()
            print title
            item['title'] = title[0]
            authors = record.xpath('a/@title').extract()
            print authors
            authors = authors[0].split(', ')
            item['authors'] = authors

            yield item
Example #17
0
    def parse(self, response):

        for record in response.xpath('//dt'):

            item = ConferenceItem()
            item['year'] = "2007"
            title = record.xpath('following::dd/text()').extract()
            item['title'] = title[0].strip().lstrip()
            authors = record.xpath('text()').extract()[0].strip().lstrip()
            if "and" in authors:
                authors = authors.split(', ')
                item['authors'] = authors[0:-1]
                item['authors'].append(authors[-1].split('and')[0].strip())
                item['authors'].append(authors[-1].split('and')[1].lstrip())
            else:
                item['authors'] = [authors]
            yield item
Example #18
0
    def parse(self, response):

        for record in response.xpath('//a[h3]'):

            item = ConferenceItem()
            print record
            item['year'] = "2011"
            title = record.xpath('h3/text()').extract()
            print title
            item['title'] = title[0].strip()

            authors = record.xpath('span[@class=\'name\']/text()').extract()[0]
            print authors
            authors = authors.split(', ')
            item['authors'] = authors

            yield item
    def parse(self, response):

        for record in response.xpath('//tr'):

            item = ConferenceItem()
            print record
            item['year'] = "2006"
            title = record.xpath('td[strong]/strong/text()').extract()
            print title
            item['title'] = title[0]

            authors = record.xpath('td[strong]/text()').extract()[0]
            print authors
            authors = authors.lstrip().strip().split(', ')
            item['authors'] = authors

            yield item
Example #20
0
    def parse(self, response):

        for record in response.xpath('//tr/td[a]'):

            item = ConferenceItem()
            print record
            item['year'] = "2008"
            title = record.xpath('a/text()').extract()
            print title
            item['title'] = title[0]

            authors = record.xpath('i/text()').extract()[0]
            print authors
            authors = authors.split(', ')
            item['authors'] = authors

            yield item
Example #21
0
    def parse(self, response):

        for record in response.xpath('//body/div/ul//ul/li'):

            item = ConferenceItem()
            item['year'] = "2015"
            data = record.xpath('text()').extract()
            item['title'] = data[0].strip().lstrip()
            authors = data[1].strip().lstrip()
            if "and" in authors:
                authors = authors.split(', ')
                item['authors'] = authors[0:-1]
                item['authors'].append(authors[-1].split('and')[0].strip())
                item['authors'].append(authors[-1].split('and')[1].lstrip())
            else:
                item['authors'] = [authors]
            yield item
Example #22
0
    def parse(self, response):

        for record in response.xpath('//div[@class=\'page\']//a'):

            item = ConferenceItem()
            item['year'] = "2013"
            data = record.xpath('text()').extract()
            item['title'] = data[0].strip().lstrip()
            authors = record.xpath('following::table[1]//em/text()').extract()[0].strip().lstrip()
            if "and" in authors:
                authors = authors.split(', ')
                item['authors'] = authors[0:-1]
                item['authors'].append(authors[-1].split('and')[0].strip())
                item['authors'].append(authors[-1].split('and')[1].lstrip())
            else:
                item['authors'] = [authors]
            yield item
Example #23
0
    def parse(self, response):

        for record in response.xpath('//div[@class=\'main\']//ul/li'):

            item = ConferenceItem()
            item['year'] = "2014"
            title = record.xpath('text()').extract()
            item['title'] = title[0].strip().lstrip()
            authors = record.xpath('i/text()').extract()[0].strip().lstrip()
            if "and" in authors:
                authors = authors.split(', ')
                item['authors'] = authors[0:-1]
                item['authors'].append(authors[-1].split('and')[0].strip())
                item['authors'].append(authors[-1].split('and')[1].lstrip())
            else:
                item['authors'] = [authors]
            yield item
    def parse(self, response):

        for record in response.xpath('//p//a[following-sibling::em]'):

            item = ConferenceItem()
            print record
            item['year'] = "2009"
            title = record.xpath('text()').extract()
            print title
            item['title'] = title[0]

            authors = record.xpath('following-sibling::em/text()').extract()[0]
            print authors
            authors = authors.split(', ')
            item['authors'] = authors

            yield item
Example #25
0
    def parse(self, response):

        for record in response.xpath('//p[@class=\'left\']'):

            item = ConferenceItem()
            print record
            item['year'] = "2015"
            data = record.xpath('a/text()').extract()
            print data
            item['title'] = data[0].strip().lstrip()
            authors = record.xpath('i/text()').extract()[0]
            print authors
            authors = authors.split(', ')
            item['authors'] = authors[0:-1]
            item['authors'].append(authors[-1])

            yield item
Example #26
0
    def parse(self, response):

        for record in response.xpath('//section//strong'):

            item = ConferenceItem()
            item['year'] = '2015'
            title = record.xpath('text()').extract()
            item['title'] = title[0]
            authors = record.xpath('following-sibling::text()[1]').extract()
            print authors
            authors = authors[0].split('; ')
            au = []
            for a in authors:
                au.append(a.split(',')[0])
            item['authors'] = au

            yield item
Example #27
0
    def parse(self, response):

        for record in response.xpath('//dt'):

            item = ConferenceItem()
            print record
            item['year'] = "2014"
            title = record.xpath('text()').extract()
            print title
            item['title'] = title[0].strip()

            authors = record.xpath(
                'following-sibling::dd[1]/text()').extract()[0]
            print authors
            authors = authors.strip().split(', ')
            item['authors'] = authors

            yield item
Example #28
0
    def parse(self, response):

        for record in response.xpath('//tr[not(@class)]'):

            item = ConferenceItem()
            item['year'] = "2008"
            title = record.xpath('td[2]/text()').extract()
            item['title'] = re.sub('\n ', '', title[0])
            authors = re.sub('\n ', '',
                             record.xpath('td[1]/text()').extract()[0])
            if "and" in authors:
                authors = authors.split(', ')
                item['authors'] = authors[0:-1]
                item['authors'].append(authors[-1].split('and')[0].strip())
                item['authors'].append(authors[-1].split('and')[1].lstrip())
            else:
                item['authors'] = [authors]
            yield item
Example #29
0
    def parse(self, response):
        i = 3
        for x in response.xpath('//table[@class=\'text12\']//tr'):

            record = response.xpath('//table[@class=\'text12\']//tr[position()=%d]'%i)
            if record.xpath('td[2]/span/a[not(@title)]'):
                data = record.xpath('td[2]/span/a[not(@title)]/text()').extract()
                item = ConferenceItem()
                item['year'] = '2006'
                item['title'] = data[0]
                print data
                authors = record.xpath('following-sibling::tr[1]//a/text()').extract()
                print authors
                item['authors'] = authors
                yield item
                i += 2
            else:
                i += 1
Example #30
0
    def parse(self, response):

        for record in response.xpath('//h4/following::p[@class=\'left\']'):

            item = ConferenceItem()
            print record
            year = response.xpath(
                '//div[@class=\'content\']//p[position()=3]/text()').extract(
                )[0]
            item['year'] = year.split(', ')[1]
            data = record.xpath('a/text()').extract()
            print data
            item['title'] = data[0].strip().lstrip()
            authors = record.xpath('i/text()').extract()[0]
            print authors
            authors = authors.split(', ')
            item['authors'] = authors[0:-1]
            item['authors'].append(authors[-1])

            yield item