Example #1
0
    def refine_item(self, response, item):
        birth, death = text.parse_birth_death(item['other']['born_died'])
        if birth or death:
            item['birth'] = birth or None
            item['death'] = death or None

        return super(MedicalBioSpider, self).refine_item(response, item)
Example #2
0
    def refine_item(self, response, item):
        birth, death = text.parse_birth_death(item['other']['born_died'])
        if birth or death:
            item['birth'] = birth or None
            item['death'] = death or None

        return super(MedicalBioSpider, self).refine_item(response, item)
Example #3
0
    def refine_item(self, response, item):
        for each in item['other']['short-desc']:
            birth, death = text.parse_birth_death(each)
            if birth or death:
                item['birth'] = birth
                item['death'] = death

        item['bio'] += item['other'].pop('read-more')
        return super(BbcCoUkSpider, self).refine_item(response, item)
Example #4
0
    def refine_item(self, response, item):
        for each in item['other']['short-desc']:
            birth, death = text.parse_birth_death(each)
            if birth or death:
                item['birth'] = birth
                item['death'] = death

        item['bio'] += item['other'].pop('read-more')
        return super(BbcCoUkSpider, self).refine_item(response, item)
Example #5
0
    def refine_item(self, response, item):
        try:
            dates = text.clean_extract(
                response, './/div[@id="text"]/span[@class="article_header"]//text()'
            ).split('(')[1].split(')')[0]
        except IndexError:
            pass
        else:
            birth, death = text.parse_birth_death(dates.replace('\n', ''))
            if birth or death:
                item['birth'] = birth or None
                item['death'] = death or None

        item['name'] = '%s, %s' % (item['other'].pop('forename'),
                                   item['other'].pop('surname'))

        return super(YbaLlgcOrgUkSpider, self).refine_item(response, item)
Example #6
0
 def test(self):
     for string, birth, death in [
                 ('b. 1234', '1234', None),
                 ('b. ca. 1234', '1234', None),
                 ('b. c. 1234', '1234', None),
                 ('d. 1234', None, '1234'),
                 ('d. ca. 1234', None, '1234'),
                 ('d. c. 1234', None, '1234'),
                 ('19th century', '1801', '1900'),
                 ('1234-5678', '1234', '5678'),
                 ('ca. 1234-5678', '1234', '5678'),
                 ('c. 1234-5678', '1234', '5678'),
                 ('1234-ca. 5678', '1234', '5678'),
                 ('1234-c. 5678', '1234', '5678'),
             ]:
         b, d = text.parse_birth_death(string)
         self.assertEqual(birth, b)
         self.assertEqual(death, d)
Example #7
0
    def refine_item(self, response, item):
        try:
            dates = text.clean_extract(
                response,
                './/div[@id="text"]/span[@class="article_header"]//text()'
            ).split('(')[1].split(')')[0]
        except IndexError:
            pass
        else:
            birth, death = text.parse_birth_death(dates.replace('\n', ''))
            if birth or death:
                item['birth'] = birth or None
                item['death'] = death or None

        item['name'] = '%s, %s' % (item['other'].pop('forename'),
                                   item['other'].pop('surname'))

        return super(YbaLlgcOrgUkSpider, self).refine_item(response, item)
Example #8
0
    def refine_item(self, response, item):
        born = item['other']['born']
        if born:
            birth, death = text.parse_birth_death(born.split(',')[-1])
            if birth or death:
                if birth and death and len(death) == 2:
                    # catch dates like 1515-35
                    death = birth[0:2] + death
                item['birth'] = birth
                item['death'] = death
            else:
                try:
                    birth, death = born.split('-', 1)
                    m = re.search(r'\d{3,4}$', birth.strip())
                    item['birth'] = m.group(0) if m else None

                    m = re.search(r'\d{3,4}$', death.strip())
                    item['death'] = m.group(0) if m else None
                except ValueError:
                    pass

        return item
Example #9
0
    def refine_item(self, response, item):
        born = item['other']['born']
        if born:
            birth, death = text.parse_birth_death(
                born.split(',')[-1]
            )
            if birth or death:
                if birth and death and len(death) == 2:
                    # catch dates like 1515-35
                    death = birth[0:2] + death
                item['birth'] = birth
                item['death'] = death
            else:
                try:
                    birth, death = born.split('-', 1)
                    m = re.search(r'\d{3,4}$', birth.strip())
                    item['birth'] = m.group(0) if m else None

                    m = re.search(r'\d{3,4}$', death.strip())
                    item['death'] = m.group(0) if m else None
                except ValueError:
                    pass

        return item
Example #10
0
 def parse_title(self, title):
     name, info = title.split('(')
     birth, death = text.parse_birth_death(info)
     return name.strip(), birth, death
Example #11
0
 def parse_title(self, title):
     name, info = title.split('(')
     birth, death = text.parse_birth_death(info)
     return name.strip(), birth, death