Python SpeakerLoader Examples, pycon_speakers.loaders.SpeakerLoader Python Examples

Example #1

0

Show file

 def _parse_2010(self, response):
     for section in Selector(response).xpath('//div[@class="proposal_list_summary"]'):
         il = SpeakerLoader(selector=section)
         il.add_xpath('name', './span[1]')
         il.add_value('year', str(response.meta['year']))
         il.add_value('conference', 'PyCon US')
         yield il.load_item()

Example #2

0

Show file

File: europython_eu.py Project: JuanMiGabarron/pycon-speakers

 def parse_new(self, response):
     sel = Selector(response)
     speakers = sel.css('.archive .talk .speakers > .speaker')
     for speaker in speakers:
         il = SpeakerLoader(selector=speaker)
         il.add_value('conference', 'EuroPython')
         il.add_css('name', "span::text")
         il.add_css('image_urls', "a > img::attr(src)", lambda x:
                     [urljoin(response.url, y) for y in x])
         il.add_value('year', str(response.meta['cookiejar']))
         yield il.load_item()

Example #3

0

Show file

 def parse(self, response):
     sel = Selector(response)
     for speaker in sel.xpath('//span[@class="en_speaker_name"]').extract():
         il = SpeakerLoader(response=response)
         il.add_value('name', speaker)
         il.add_value('year', str(response.meta['year']))
         il.add_value('conference', 'OSCON')
         yield il.load_item()

Example #4

0

Show file

 def _parse_video(self, response):
     for section in Selector(response).xpath(
             "//div[@class = 'videos']//div[@class = 'presenters']/a"):
         il = SpeakerLoader(selector=section)
         il.add_xpath('name', ".")
         il.add_value('conference', str(response.meta['conference']))
         il.add_value('year', str(response.meta['year']))
         yield il.load_item()

Example #5

0

Show file

 def _parse_2014(self, response):
     for section in Selector(response).xpath(
             "//div[@class='sched-person']"):
         il = SpeakerLoader(selector=section)
         il.add_xpath('name', "./h2/a")
         il.add_value('conference', str(response.meta['conference']))
         il.add_value('year', str(response.meta['year']))
         yield il.load_item()

Example #6

0

Show file

 def _parse_2013(self, response):
     for section in Selector(response).xpath(
             "//div[@class='data-mid2']/h2[1]/a[1]"):
         il = SpeakerLoader(selector=section)
         il.add_xpath('name', ".")
         il.add_value('conference', str(response.meta['conference']))
         il.add_value('year', str(response.meta['year']))
         yield il.load_item()

Example #7

0

Show file

 def _parse_2006(self, response):
     sel = Selector(response)
     for name in sel.xpath('//div[@id="content"]/p[strong]/following-sibling::*[1]'):
         il = SpeakerLoader(selector=name)
         il.add_xpath('name', '.')
         il.add_value('year', str(response.meta['year']))
         il.add_value('conference', 'PyCon US')
         yield il.load_item()

Example #8

0

Show file

 def parse_2010(self, response):
     sel = Selector(response)
     for authors in sel.css('ul > li > em::text').extract():
         for author in authors.split(','):
             sl = SpeakerLoader(selector=sel, response=response)
             sl.add_value('name', author)
             sl.add_value('year', response.meta['year'])
             sl.add_value('conference', 'SciPy')
             yield sl.load_item()

Example #9

0

Show file

File: developerweek_com.py Project: JuanMiGabarron/pycon-speakers

 def _parse_2013(self, response):
     for section in Selector(response).xpath("//div[@class='data-mid2']/h2[1]/a[1]"):
         il = SpeakerLoader(selector=section)
         il.add_xpath('name', ".")
         il.add_value('conference', str(response.meta['conference']))
         il.add_value('year', str(response.meta['year']))
         yield il.load_item()

Example #10

0

Show file

File: europython_eu.py Project: atassumer/pycon-speakers

 def parse(self, response):
     sel = Selector(response)
     speakers = sel.css('div.mini-profile')
     for speaker in speakers:
         il = SpeakerLoader(selector=speaker)
         il.add_css('name', ".name > a::text")
         il.add_css('image_urls', "img::attr(src)")
         il.add_value('year', str(response.meta['cookiejar']))
         il.add_value('conference', 'EuroPython')
         yield il.load_item()
     # pagination
     pages = sel.css('.pagination a::attr(href)').extract()
     for page in pages:
         yield Request(urljoin(response.url, page), meta=response.meta)

Example #11

0

Show file

File: developerweek_com.py Project: JuanMiGabarron/pycon-speakers

 def _parse_2014(self, response):
     for section in Selector(response).xpath("//div[@class='sched-person']"):
         il = SpeakerLoader(selector=section)
         il.add_xpath('name', "./h2/a")
         il.add_value('conference', str(response.meta['conference']))
         il.add_value('year', str(response.meta['year']))
         yield il.load_item()

Example #12

0

Show file

File: confreaks_com.py Project: JuanMiGabarron/pycon-speakers

 def _parse_video(self, response):
     for section in Selector(response).xpath("//div[@class = 'videos']//div[@class = 'presenters']/a"):
         il = SpeakerLoader(selector=section)
         il.add_xpath('name', ".")
         il.add_value('conference', str(response.meta['conference']))
         il.add_value('year', str(response.meta['year']))
         yield il.load_item()

Example #13

0

Show file

File: oscon_com.py Project: JuanMiGabarron/pycon-speakers

 def parse(self, response):
     sel = Selector(response)
     for speaker in sel.xpath('//span[@class="en_speaker_name"]').extract():
         il = SpeakerLoader(response=response)
         il.add_value('name', speaker)
         il.add_value('year', str(response.meta['year']))
         il.add_value('conference', 'OSCON')
         yield il.load_item()

Example #14

0

Show file

 def parse(self, response):
     # The parameter __force_display allows to return all talks without
     # pagination.
     sel = Selector(response)
     for author in sel.xpath('//tr/td[2]/text()').extract():
         sl = SpeakerLoader(selector=sel, response=response)
         # TODO: handle/remove affiliation value and possibly multiple
         # authors.
         sl.add_value('conference', 'EuroSciPy')
         sl.add_value('name', author)
         sl.add_value('year', response.meta['year'])
         yield sl.load_item()

Example #15

0

Show file

File: scipy_org.py Project: JuanMiGabarron/pycon-speakers

 def parse_2010(self, response):
     sel = Selector(response)
     for authors in sel.css('ul > li > em::text').extract():
         for author in authors.split(','):
             sl = SpeakerLoader(selector=sel, response=response)
             sl.add_value('name', author)
             sl.add_value('year', response.meta['year'])
             sl.add_value('conference', 'SciPy')
             yield sl.load_item()

Example #16

0

Show file

    def parse_2013(self, response):
        sel = Selector(response)
        # Probably this is the nicest layout of all versions.
        for authors in sel.css('.authors::text').extract():
            # FIXME: few entries miss the multiple-author separator ';'.
            for author in authors.split(';'):
                sl = SpeakerLoader(selector=sel, response=response)
                # FIXME: most author entry have the institution at the end.
                sl.add_value('name', author)
                sl.add_value('year', response.meta['year'])
                sl.add_value('conference', 'SciPy')

                yield sl.load_item()

Example #17

0

Show file

 def parse_2012(self, response):
     sel = Selector(response)
     # Here we take a pure-regex approach as the layout varies between the
     # entries a little and the authors text have a fair uniform pattern.
     for author in sel.css('#registrants_table').re(
             '>\s*-\s*(.+?)\s*(?:$|<)'):
         if author == '--':  # No author.
             continue
         sl = SpeakerLoader(selector=sel, response=response)
         sl.add_value('name', author)
         sl.add_value('year', response.meta['year'])
         sl.add_value('conference', 'SciPy')
         yield sl.load_item()

Example #18

0

Show file

 def _parse(self, response):
     for section in Selector(response).xpath(
             '//div[@class="speakers"]//div[@class="name"]'):
         il = SpeakerLoader(selector=section)
         il.add_xpath('name', '.')
         il.add_value('year', str(response.meta['year']))
         yield il.load_item()

Example #19

0

Show file

 def _parse_2013(self, response):
     for section in Selector(response).xpath(
             "//div[contains(@class,'speaker')]"):
         il = SpeakerLoader(selector=section)
         il.add_xpath('name', "./a[@class='name']")
         il.add_value('year', str(response.meta['year']))
         yield il.load_item()

Example #20

0

Show file

 def parse_speakers(self, response):
     sel = Selector(response)
     for speaker_div in sel.xpath("//div[contains(@class, 'speaker')]"):
         loader = SpeakerLoader(selector=speaker_div)
         loader.add_xpath('name', ".//h5/a[@target='_blank']/text()")
         loader.add_value('year', str(response.meta['year']))
         yield loader.load_item()

Example #21

0

Show file

 def parse_2009(self, response):
     sel = Selector(response)
     author_re = '<strong>.+</strong>.+\((.+)\)<'
     for authors in sel.css('.section > p').re(author_re):
         # There are few multiple authors entries, some of them separated by
         # '&' and others with comma. The problem comes from entires with
         # author plus institution, i.e.: "Armando Sole, ESRF, France".
         # For now, we extract only the first author.
         author = authors.partition(',')[0]
         sl = SpeakerLoader(selector=sel, response=response)
         sl.add_value('name', author)
         sl.add_value('year', response.meta['year'])
         sl.add_value('conference', 'SciPy')
         yield sl.load_item()

Example #22

0

Show file

 def _parse_workshop_2012(self, response):
     for section in Selector(response).xpath(
             "//div[contains(@class,'speaker')]"):
         for name in section.xpath(".//p/text()").extract():
             il = SpeakerLoader(selector=section)
             il.add_value('name', name)
             il.add_value('year', str(response.meta['year']))
             yield il.load_item()

Example #23

0

Show file

 def parse_2011(self, response):
     sel = Selector(response)
     speakers = sel.css('.speakers')
     for speaker in speakers:
         il = SpeakerLoader(selector=speaker)
         il.add_css('name', '.speakers::text')
         il.add_value('year', str(response.meta['cookiejar']))
         yield il.load_item()

Example #24

0

Show file

File: scipy_org.py Project: JuanMiGabarron/pycon-speakers

 def parse_2012(self, response):
     sel = Selector(response)
     # Here we take a pure-regex approach as the layout varies between the
     # entries a little and the authors text have a fair uniform pattern.
     for author in sel.css('#registrants_table').re('>\s*-\s*(.+?)\s*(?:$|<)'):
         if author == '--':  # No author.
             continue
         sl = SpeakerLoader(selector=sel, response=response)
         sl.add_value('name', author)
         sl.add_value('year', response.meta['year'])
         sl.add_value('conference', 'SciPy')
         yield sl.load_item()

Example #25

0

Show file

 def parse_2008(self, response):
     sel = Selector(response)
     talk_author_re = re.compile('^(?P<title>.+) \((?P<authors>.+?)\)$')
     for event in sel.css('.section > p::text').extract():
         # For some reason, some entries have the character '\n' between the
         # talk name/author.
         event = event.replace('\n', ' ').strip()
         m = talk_author_re.search(event)
         if m:
             data = talk_author_re.search(event).groupdict()
             for author in data['authors'].split(','):
                 sl = SpeakerLoader(selector=sel, response=response)
                 sl.add_value('name', author)
                 sl.add_value('year', response.meta['year'])
                 sl.add_value('conference', 'SciPy')
                 yield sl.load_item()

Example #26

0

Show file

 def _parse_workshop_2013(self, response):
     for section in Selector(response).xpath(
             "//div[contains(@id,'workshop')]"):
         names = section.xpath(".//h2/text()").extract()[0]
         for name in self._split_names(names):
             il = SpeakerLoader(selector=section)
             il.add_value('name', name)
             il.add_value('year', str(response.meta['year']))
             yield il.load_item()

Example #27

0

Show file

File: scipy_org.py Project: JuanMiGabarron/pycon-speakers

    def parse_2013(self, response):
        sel = Selector(response)
        # Probably this is the nicest layout of all versions.
        for authors in sel.css('.authors::text').extract():
            # FIXME: few entries miss the multiple-author separator ';'.
            for author in authors.split(';'):
                sl = SpeakerLoader(selector=sel, response=response)
                # FIXME: most author entry have the institution at the end.
                sl.add_value('name', author)
                sl.add_value('year', response.meta['year'])
                sl.add_value('conference', 'SciPy')

                yield sl.load_item()

Example #28

0

Show file

File: strangeloop.py Project: semurat/pycon-speakers

 def parse_speakers(self, response):
     sel = Selector(response)
     for speaker_div in sel.xpath("//div[contains(@class, 'speaker')]"):
         loader = SpeakerLoader(selector=speaker_div)
         loader.add_xpath("name", ".//h5/a[@target='_blank']/text()")
         loader.add_value("year", str(response.meta["year"]))
         yield loader.load_item()

Example #29

0

Show file

File: scipy_org.py Project: JuanMiGabarron/pycon-speakers

 def parse_2009(self, response):
     sel = Selector(response)
     author_re = '<strong>.+</strong>.+\((.+)\)<'
     for authors in sel.css('.section > p').re(author_re):
         # There are few multiple authors entries, some of them separated by
         # '&' and others with comma. The problem comes from entires with
         # author plus institution, i.e.: "Armando Sole, ESRF, France".
         # For now, we extract only the first author.
         author = authors.partition(',')[0]
         sl = SpeakerLoader(selector=sel, response=response)
         sl.add_value('name', author)
         sl.add_value('year', response.meta['year'])
         sl.add_value('conference', 'SciPy')
         yield sl.load_item()

Example #30

0

Show file

File: rockymtnruby_com.py Project: JuanMiGabarron/pycon-speakers

 def _parse_workshop_2012(self, response):
     for section in Selector(response).xpath("//div[contains(@class,'speaker')]"):
         for name in section.xpath(".//p/text()").extract():
             il = SpeakerLoader(selector=section)
             il.add_value('name', name)
             il.add_value('year', str(response.meta['year']))
             yield il.load_item()

Example #31

0

Show file

File: rockymtnruby_com.py Project: JuanMiGabarron/pycon-speakers

 def _parse_workshop_2013(self, response):
     for section in Selector(response).xpath("//div[contains(@id,'workshop')]"):
         names = section.xpath(".//h2/text()").extract()[0]
         for name in self._split_names(names):
             il = SpeakerLoader(selector=section)
             il.add_value('name', name)
             il.add_value('year', str(response.meta['year']))
             yield il.load_item()

Example #32

0

Show file

File: djangocon.py Project: JuanMiGabarron/pycon-speakers

	def parse_2011(self, response):
		sel = Selector(response)
		speakers = sel.css('.speakers')
		for speaker in speakers:
			il = SpeakerLoader(selector=speaker)
			il.add_css('name', '.speakers::text')
			il.add_value('year', str(response.meta['cookiejar']))
			yield il.load_item()

Example #33

0

Show file

File: scipy_org.py Project: JuanMiGabarron/pycon-speakers

 def parse_2008(self, response):
     sel = Selector(response)
     talk_author_re = re.compile('^(?P<title>.+) \((?P<authors>.+?)\)$')
     for event in sel.css('.section > p::text').extract():
         # For some reason, some entries have the character '\n' between the
         # talk name/author.
         event = event.replace('\n', ' ').strip()
         m = talk_author_re.search(event)
         if m:
             data = talk_author_re.search(event).groupdict()
             for author in data['authors'].split(','):
                 sl = SpeakerLoader(selector=sel, response=response)
                 sl.add_value('name', author)
                 sl.add_value('year', response.meta['year'])
                 sl.add_value('conference', 'SciPy')
                 yield sl.load_item()

Example #34

0

Show file

File: oracleus_activeevents_com.py Project: jwm/pycon-speakers

 def _follow_sessions(self, response):
     for speaker in Selector(response).xpath("//a[contains(@href, 'speakerDetail.ww')]"):
         il = SpeakerLoader(selector=speaker)
         il.add_xpath('name', "./text()")
         il.add_value('year', str(response.meta['year']))
         yield il.load_item()

Example #35

0

Show file

File: confoo_ca.py Project: JuanMiGabarron/pycon-speakers

 def _parse(self, response):
     for section in Selector(response).xpath('//div[@class="speakers"]//div[@class="name"]'):
         il = SpeakerLoader(selector=section)
         il.add_xpath('name', '.')
         il.add_value('year', str(response.meta['year']))
         yield il.load_item()

Example #36

0

Show file

 def parse(self, response):
     sel = Selector(response)
     speakers = sel.css('div.mini-profile')
     for speaker in speakers:
         il = SpeakerLoader(selector=speaker)
         il.add_css('name', ".name > a::text")
         il.add_css('name', ".name::text")
         il.add_css('image_urls', "img::attr(src)")
         il.add_value('year', str(response.meta['cookiejar']))
         il.add_value('conference', 'EuroPython')
         yield il.load_item()
     # pagination
     pages = sel.css('.pagination a::attr(href)').extract()
     for page in pages:
         yield Request(urljoin(response.url, page), meta=response.meta)

Example #37

0

Show file

 def parse_new(self, response):
     sel = Selector(response)
     speakers = sel.css('.archive .talk .speakers > .speaker')
     for speaker in speakers:
         il = SpeakerLoader(selector=speaker)
         il.add_value('conference', 'EuroPython')
         il.add_css('name', "span::text")
         il.add_css('image_urls', "a > img::attr(src)",
                    lambda x: [urljoin(response.url, y) for y in x])
         il.add_value('year', str(response.meta['cookiejar']))
         yield il.load_item()

Example #38

0

Show file

File: rockymtnruby_com.py Project: JuanMiGabarron/pycon-speakers

 def _parse_2013(self, response):
     for section in Selector(response).xpath("//div[contains(@class,'speaker')]"):
         il = SpeakerLoader(selector=section)
         il.add_xpath('name', "./a[@class='name']")
         il.add_value('year', str(response.meta['year']))
         yield il.load_item()

Example #39

0

Show file

 def parse_old_format(self, response):
     sel = Selector(response)
     speakers = sel.xpath('//div[@class="speaker-blurb"]//h3').extract()
     for speaker in speakers:
         il = SpeakerLoader(response=response)
         il.add_value('name', speaker)
         il.add_value('year', str(response.meta['year']))
         il.add_value('conference', 'OSCON')
         yield il.load_item()
     more_speakers = sel.xpath(
         '//span/a[contains(@href, "e_spkr")]//text()').extract()
     for speaker in more_speakers:
         il = SpeakerLoader(response=response)
         il.add_value('name', speaker.replace('N/A', ''))
         il.add_value('year', str(response.meta['year']))
         il.add_value('conference', 'OSCON')
         yield il.load_item()

Example #40

0

Show file

File: oscon_com.py Project: JuanMiGabarron/pycon-speakers

 def parse_old_format(self, response):
     sel = Selector(response)
     speakers = sel.xpath('//div[@class="speaker-blurb"]//h3').extract()
     for speaker in speakers:
         il = SpeakerLoader(response=response)
         il.add_value('name', speaker)
         il.add_value('year', str(response.meta['year']))
         il.add_value('conference', 'OSCON')
         yield il.load_item()
     more_speakers = sel.xpath(
         '//span/a[contains(@href, "e_spkr")]//text()').extract()
     for speaker in more_speakers:
         il = SpeakerLoader(response=response)
         il.add_value('name', speaker.replace('N/A', ''))
         il.add_value('year', str(response.meta['year']))
         il.add_value('conference', 'OSCON')
         yield il.load_item()

Example #41

0

Show file

File: pycon_org.py Project: kmike/pycon-speakers

 def _follow_speakers(self, response):
     il = SpeakerLoader(response=response)
     il.add_xpath('name', "//a[contains(@href, '/speaker/profile/')]")
     il.add_value('year', str(response.meta['year']))
     yield il.load_item()

Example #42

0

Show file

 def _follow_speakers(self, response):
     il = SpeakerLoader(response=response)
     il.add_xpath('name', "//a[contains(@href, '/speaker/profile/')]")
     il.add_value('year', str(response.meta['year']))
     il.add_value('conference', 'PyCon US')
     yield il.load_item()