def __init__(self, date=None, fuzzy_year_padding=1, fuzzy_month_padding=1, fuzzy_day_padding=1, fuzzy_season_padding=12, multiplier_if_uncertain=1, multiplier_if_approximate=1, multiplier_if_both=1): super(ExtendedDateFormat, self).__init__() self.orig_date = None self.edtf = None self.result_set = None self.sortable_date = None self.error = None self.fuzzy_year_padding = int(fuzzy_year_padding) self.fuzzy_month_padding = int(fuzzy_month_padding) self.fuzzy_day_padding = int(fuzzy_day_padding) self.fuzzy_season_padding = int(fuzzy_season_padding) self.multiplier_if_uncertain = int(multiplier_if_uncertain) self.multiplier_if_approximate = int(multiplier_if_approximate) self.multiplier_if_both = int(multiplier_if_both) try: self.parse(date) except Exception as e: try: self.parse(text_to_edtf(self.orig_date)) except Exception as err: self.error = err raise err
def __init__(self, date=None, fuzzy_year_padding=1, fuzzy_month_padding=1, fuzzy_day_padding=1, fuzzy_season_padding=12, multiplier_if_uncertain=1, multiplier_if_approximate=1, multiplier_if_both=1): super(ExtendedDateFormat, self).__init__() self.orig_date = None self.edtf = None self.result_set = None self.sortable_date = None self.error = None self.fuzzy_year_padding = int(fuzzy_year_padding) self.fuzzy_month_padding = int(fuzzy_month_padding) self.fuzzy_day_padding = int(fuzzy_day_padding) self.fuzzy_season_padding = int(fuzzy_season_padding) self.multiplier_if_uncertain = int(multiplier_if_uncertain) self.multiplier_if_approximate = int(multiplier_if_approximate) self.multiplier_if_both = int(multiplier_if_both) try: self.parse(date) except: try: self.parse(text_to_edtf(self.orig_date)) except Exception as err: self.error = err raise err
def post_shows(api, scroll): shows = [] url = 'https://studsterkel.wfmt.com/explore#t=date' soup = get_url_as_soup(url) ps = soup.find_all('p') for p in ps: show = {} a = p.find('a') if a is not None: date = a.find('span') if date is not None: # Evil python mutates `a` object [s.extract() for s in a('span')] _edtf = parse_edtf(text_to_edtf(date.text)) title = a.text.strip() person = get_person(title) thumb = None show = { 'when_happened': struct_time_to_datetime(_edtf.upper_strict()), 'resolution': len(str(_edtf)), 'when_original': date.text, 'content_url': 'https://studsterkel.wfmt.com{}'.format(a.get('href')), 'title': a.text.strip(), 'text': '', 'with_thumbnail': thumb, 'media_type': 'audio/mpeg', 'content_type': 'Oral histories', 'source_url': 'https://studsterkel.wfmt.com/', 'with_thumbnail': api.cache_wiki_thumbnail(person) } resp = api.create_event(show, scroll) pprint(resp.json())
def add_first_link(w): if w is not None and 'event' in w: e = w['event'] for tag in e(['sup', 'span']): tag.decompose() text = e.text.rstrip() text = re.sub('^\s*\d+:\s*', '', text) text = re.sub('^:\s*', '', text) if w['context'] is not None and w['context'] != '': text = '{}: {}'.format(w['context'], e.text.rstrip()) m = re.match('^(.+[a-z]{2,}\.\s+)(.*)', text) if m is not None: title = m.group(1) text = m.group(2) else: title = text text = '' w['title'] = title w['text'] = text links = e.select('a') if len(links) > 0 and links[0] is not None: href = links[0].get('href') w['content_url'] = 'https://en.wikipedia.org{}'.format(href, ) w['item'] = re.sub(r'/wiki/|/w/index.php\?title\=', '', href) date_text = '{} {}'.format(w['date'], w['year']) date_text = re.sub('–', '-', date_text) try: edtf_date_txt = text_to_edtf(date_text) edtf_date = parse_edtf(edtf_date_txt) iso_date = time.strftime('%Y-%m-%dT%H:%M:%SZ', edtf_date.upper_fuzzy()) w['when_happened'] = iso_date w['when_original'] = date_text w['resolution'] = 10 del w['event'] del w['date'] del w['context'] if 'header' in w: del w['header'] return w except Exception: pass
def getEDTF(datetext, letter_key=None): ''' datetext is a German. It needs be translated to English to be parsable by the edtf library afterwards. ''' try: translated = simpleTranslate(prepareDate(datetext)) prepared_translated = prepareDateSets(translated) if not isinstance(prepared_translated, str): try: if ' between ' in translated or ( '-' in translated or ' to ' in translated ) and not re.search(r'\d{4}\-\d{4}', translated) or ( 'after ' in translated and 'before ' in translated and '/' in translated): edtf = '/'.join(map(text_to_edtf, prepared_translated)) elif ' and ' in translated: edtf = '{' + ','.join( map(text_to_edtf, prepared_translated)) + '}' else: edtf = '[' + ','.join( map(text_to_edtf, prepared_translated)) + ']' except Exception as e: raise e edtf = None else: try: edtf = text_to_edtf(prepared_translated) except Exception as e: raise e edtf = None except Exception as e: raise e # run postprocessing of date edtf = postprocessDate(translated, edtf) log.debug(datetext) log.debug((letter_key, edtf)) return edtf
def create_from_string(self, date_str): edtf = str(text_to_edtf(date_str)) return ExtendedDate.objects.create(edtf_format=edtf)
def match_string(self, date_str): return self.edtf_format == str(text_to_edtf(date_str))