Beispiel #1
0
    def __init__(self,
                 date=None,
                 fuzzy_year_padding=1,
                 fuzzy_month_padding=1,
                 fuzzy_day_padding=1,
                 fuzzy_season_padding=12,
                 multiplier_if_uncertain=1,
                 multiplier_if_approximate=1,
                 multiplier_if_both=1):
        super(ExtendedDateFormat, self).__init__()
        self.orig_date = None
        self.edtf = None
        self.result_set = None
        self.sortable_date = None
        self.error = None
        self.fuzzy_year_padding = int(fuzzy_year_padding)
        self.fuzzy_month_padding = int(fuzzy_month_padding)
        self.fuzzy_day_padding = int(fuzzy_day_padding)
        self.fuzzy_season_padding = int(fuzzy_season_padding)

        self.multiplier_if_uncertain = int(multiplier_if_uncertain)
        self.multiplier_if_approximate = int(multiplier_if_approximate)
        self.multiplier_if_both = int(multiplier_if_both)

        try:
            self.parse(date)
        except Exception as e:
            try:
                self.parse(text_to_edtf(self.orig_date))
            except Exception as err:
                self.error = err
                raise err
Beispiel #2
0
    def __init__(self, date=None, fuzzy_year_padding=1, fuzzy_month_padding=1, fuzzy_day_padding=1, 
        fuzzy_season_padding=12, multiplier_if_uncertain=1, multiplier_if_approximate=1, multiplier_if_both=1):
        super(ExtendedDateFormat, self).__init__()
        self.orig_date = None
        self.edtf = None
        self.result_set = None
        self.sortable_date = None
        self.error = None

        self.fuzzy_year_padding = int(fuzzy_year_padding)
        self.fuzzy_month_padding = int(fuzzy_month_padding)
        self.fuzzy_day_padding = int(fuzzy_day_padding)
        self.fuzzy_season_padding = int(fuzzy_season_padding)

        self.multiplier_if_uncertain = int(multiplier_if_uncertain)
        self.multiplier_if_approximate = int(multiplier_if_approximate)
        self.multiplier_if_both = int(multiplier_if_both)
        
        try:
            self.parse(date)
        except:
            try:
                self.parse(text_to_edtf(self.orig_date))
            except Exception as err:
                self.error = err
                raise err
Beispiel #3
0
def post_shows(api, scroll):
    shows = []

    url = 'https://studsterkel.wfmt.com/explore#t=date'
    soup = get_url_as_soup(url)
    ps = soup.find_all('p')

    for p in ps:
        show = {}
        a = p.find('a')
        if a is not None:
            date = a.find('span')
            if date is not None:

                # Evil python mutates `a` object
                [s.extract() for s in a('span')]

                _edtf = parse_edtf(text_to_edtf(date.text))
                title = a.text.strip()
                person = get_person(title)
                thumb = None

                show = {
                    'when_happened':
                    struct_time_to_datetime(_edtf.upper_strict()),
                    'resolution':
                    len(str(_edtf)),
                    'when_original':
                    date.text,
                    'content_url':
                    'https://studsterkel.wfmt.com{}'.format(a.get('href')),
                    'title':
                    a.text.strip(),
                    'text':
                    '',
                    'with_thumbnail':
                    thumb,
                    'media_type':
                    'audio/mpeg',
                    'content_type':
                    'Oral histories',
                    'source_url':
                    'https://studsterkel.wfmt.com/',
                    'with_thumbnail':
                    api.cache_wiki_thumbnail(person)
                }
                resp = api.create_event(show, scroll)
                pprint(resp.json())
Beispiel #4
0
def add_first_link(w):
    if w is not None and 'event' in w:
        e = w['event']

        for tag in e(['sup', 'span']):
            tag.decompose()

        text = e.text.rstrip()
        text = re.sub('^\s*\d+:\s*', '', text)
        text = re.sub('^:\s*', '', text)
        if w['context'] is not None and w['context'] != '':
            text = '{}: {}'.format(w['context'], e.text.rstrip())

        m = re.match('^(.+[a-z]{2,}\.\s+)(.*)', text)
        if m is not None:
            title = m.group(1)
            text = m.group(2)
        else:
            title = text
            text = ''
        w['title'] = title
        w['text'] = text
        links = e.select('a')
        if len(links) > 0 and links[0] is not None:
            href = links[0].get('href')
            w['content_url'] = 'https://en.wikipedia.org{}'.format(href, )
            w['item'] = re.sub(r'/wiki/|/w/index.php\?title\=', '', href)
        date_text = '{} {}'.format(w['date'], w['year'])
        date_text = re.sub('–', '-', date_text)
        try:
            edtf_date_txt = text_to_edtf(date_text)
            edtf_date = parse_edtf(edtf_date_txt)
            iso_date = time.strftime('%Y-%m-%dT%H:%M:%SZ',
                                     edtf_date.upper_fuzzy())
            w['when_happened'] = iso_date
            w['when_original'] = date_text
            w['resolution'] = 10
            del w['event']
            del w['date']
            del w['context']
            if 'header' in w:
                del w['header']
            return w
        except Exception:
            pass
Beispiel #5
0
def getEDTF(datetext, letter_key=None):
    ''' datetext is a German. It needs be translated to English to be parsable
    by the edtf library afterwards.
    '''
    try:
        translated = simpleTranslate(prepareDate(datetext))
        prepared_translated = prepareDateSets(translated)
        if not isinstance(prepared_translated, str):
            try:
                if ' between ' in translated or (
                        '-' in translated or ' to ' in translated
                ) and not re.search(r'\d{4}\-\d{4}', translated) or (
                        'after ' in translated and 'before ' in translated
                        and '/' in translated):
                    edtf = '/'.join(map(text_to_edtf, prepared_translated))
                elif ' and ' in translated:
                    edtf = '{' + ','.join(
                        map(text_to_edtf, prepared_translated)) + '}'
                else:
                    edtf = '[' + ','.join(
                        map(text_to_edtf, prepared_translated)) + ']'
            except Exception as e:
                raise e
                edtf = None
        else:
            try:
                edtf = text_to_edtf(prepared_translated)
            except Exception as e:
                raise e
                edtf = None
    except Exception as e:
        raise e

    # run postprocessing of date
    edtf = postprocessDate(translated, edtf)
    log.debug(datetext)
    log.debug((letter_key, edtf))
    return edtf
Beispiel #6
0
 def create_from_string(self, date_str):
     edtf = str(text_to_edtf(date_str))
     return ExtendedDate.objects.create(edtf_format=edtf)
Beispiel #7
0
 def match_string(self, date_str):
     return self.edtf_format == str(text_to_edtf(date_str))