def refine_item(self, response, item): birth, death = text.parse_birth_death(item['other']['born_died']) if birth or death: item['birth'] = birth or None item['death'] = death or None return super(MedicalBioSpider, self).refine_item(response, item)
def refine_item(self, response, item): for each in item['other']['short-desc']: birth, death = text.parse_birth_death(each) if birth or death: item['birth'] = birth item['death'] = death item['bio'] += item['other'].pop('read-more') return super(BbcCoUkSpider, self).refine_item(response, item)
def refine_item(self, response, item): try: dates = text.clean_extract( response, './/div[@id="text"]/span[@class="article_header"]//text()' ).split('(')[1].split(')')[0] except IndexError: pass else: birth, death = text.parse_birth_death(dates.replace('\n', '')) if birth or death: item['birth'] = birth or None item['death'] = death or None item['name'] = '%s, %s' % (item['other'].pop('forename'), item['other'].pop('surname')) return super(YbaLlgcOrgUkSpider, self).refine_item(response, item)
def test(self): for string, birth, death in [ ('b. 1234', '1234', None), ('b. ca. 1234', '1234', None), ('b. c. 1234', '1234', None), ('d. 1234', None, '1234'), ('d. ca. 1234', None, '1234'), ('d. c. 1234', None, '1234'), ('19th century', '1801', '1900'), ('1234-5678', '1234', '5678'), ('ca. 1234-5678', '1234', '5678'), ('c. 1234-5678', '1234', '5678'), ('1234-ca. 5678', '1234', '5678'), ('1234-c. 5678', '1234', '5678'), ]: b, d = text.parse_birth_death(string) self.assertEqual(birth, b) self.assertEqual(death, d)
def refine_item(self, response, item): born = item['other']['born'] if born: birth, death = text.parse_birth_death(born.split(',')[-1]) if birth or death: if birth and death and len(death) == 2: # catch dates like 1515-35 death = birth[0:2] + death item['birth'] = birth item['death'] = death else: try: birth, death = born.split('-', 1) m = re.search(r'\d{3,4}$', birth.strip()) item['birth'] = m.group(0) if m else None m = re.search(r'\d{3,4}$', death.strip()) item['death'] = m.group(0) if m else None except ValueError: pass return item
def refine_item(self, response, item): born = item['other']['born'] if born: birth, death = text.parse_birth_death( born.split(',')[-1] ) if birth or death: if birth and death and len(death) == 2: # catch dates like 1515-35 death = birth[0:2] + death item['birth'] = birth item['death'] = death else: try: birth, death = born.split('-', 1) m = re.search(r'\d{3,4}$', birth.strip()) item['birth'] = m.group(0) if m else None m = re.search(r'\d{3,4}$', death.strip()) item['death'] = m.group(0) if m else None except ValueError: pass return item
def parse_title(self, title): name, info = title.split('(') birth, death = text.parse_birth_death(info) return name.strip(), birth, death