Ejemplo n.º 1
0
def _normalize_edtf(s):
    if s and s != 'u':
        try:
            return parse_edtf(s)
        except Exception:
            pass

    # when all else fails, return the "most unknown" EDTF.
    return parse_edtf('uuuu')
Ejemplo n.º 2
0
        def is_parseable(val):
            try:
                if type(val) != str:
                    raise TypeError(
                        "Values passed to expect_column_values_to_be_edtf_parseable must be of type string.\nIf you want to validate a column of dates or timestamps, please call the expectation before converting from string format."
                    )

                parse_edtf(val)
                return True

            except (ValueError, OverflowError):
                return False
Ejemplo n.º 3
0
def parse_edtf_level0(edtfstr):
    """Parse EDTF input string."""
    try:
        return parse_edtf(edtfstr)
    except ParseException:
        raise EDTFValueError(
            "The string is not a valid EDTF-formatted string.")
def convertEDTFdate(date):
    try:
        d = parse_edtf(downgradeEDTF(date))
    except:
        raise ValueError('Invalid date', date)

    if 'Interval' in str(type(d)):
        if type(d.lower) is list:
            lower = d.lower[0].lower_strict()
        else:
            lower = d.lower.lower_strict()
        if type(d.upper) is list:
            upper = d.upper[0].upper_strict()
        else:
            upper = d.upper.upper_strict()
    else:
        if type(d) is list:
            lower = d[0].lower_strict()
            upper = d[0].upper_strict()
        else:
            lower = d.lower_strict()
            upper = d.upper_strict()
    return {
        'lower': time.strftime("%Y-%m-%d", lower),
        'upper': time.strftime("%Y-%m-%d", upper)
    }
Ejemplo n.º 5
0
    def parse(self, date=None):
        if date is None:
            return None

        self.edtf = None
        self.orig_date = date
        self.result_set = None
        self.error = None

        try:
            # handle for incorrectly formatted year only dates
            # (eg: 290 => 0290, 11909 => y11909)
            if int(date) >= 0:
                date = str(int(date)).zfill(4)
            else:
                date = str(int(date)).zfill(5)
            if len(str(abs(int(date)))) > 4 and int(date) != 0:
                date = 'y' + date
        except Exception:
            pass

        self.edtf = parse_edtf(date)
        result = self.handle_object(self.edtf)
        if isinstance(result, list):
            self.result_set = result
        else:
            self.lower = result.lower
            self.upper = result.upper
            self.lower_fuzzy = result.lower_fuzzy
            self.upper_fuzzy = result.upper_fuzzy
Ejemplo n.º 6
0
def get_date_radical_from_gregorian(date_display: str) -> Optional[str]:
    if not date_display:
        return None

    try:
        date_edtf = parse_edtf(date_display)
        if not date_edtf:
            return None

        date_fr = french_republican.from_gregorian(
            *date_edtf.lower_strict()[:3])
        date_str = (f"{date_fr[2]} "
                    f"{french_republican.MOIS[date_fr[1] - 1].lower()} "
                    f"an {int(date_fr[0])}")

        if date_edtf.lower_strict() != date_edtf.upper_strict():
            date_fr = french_republican.from_gregorian(
                *date_edtf.upper_strict()[:3])
            date_str = (f"{date_str} - "
                        f"{date_fr[2]} "
                        f"{french_republican.MOIS[date_fr[1] - 1].lower()} "
                        f"an {int(date_fr[0])}")

        return date_str
    except EDTFParseException:
        return None
Ejemplo n.º 7
0
    def parse(self, date=None):
        if date == None:
            return None

        self.edtf = None
        self.orig_date = date
        self.result_set = None
        self.error = None

        try: 
            # handle for incorrectly formatted year only dates 
            # (eg: 290 => 0290, 11909 => y11909)
            if int(date) >= 0: 
                date = str(int(date)).zfill(4)
            else:
                date = str(int(date)).zfill(5)
            if len(str(abs(int(date)))) > 4 and int(date) != 0:
                date = 'y' + date
        except:
            pass

        self.edtf = parse_edtf(date)
         
        result = self.handle_object(self.edtf)
        if isinstance(result, list):
            self.result_set = result
        else:
            self.lower = result.lower
            self.upper = result.upper
            self.lower_fuzzy = result.lower_fuzzy
            self.upper_fuzzy = result.upper_fuzzy
Ejemplo n.º 8
0
    def clean(self):
        super().clean()
        # Per Django docs: validate and modify values in Model.clean()
        # https://docs.djangoproject.com/en/3.1/ref/models/instances/#django.db.models.Model.clean

        # Check that nat_lang_edtf_string and edtf_string are either both set, or both unset
        if (self.nat_lang_edtf_string
                and not self.edtf_string) or (not self.nat_lang_edtf_string
                                              and self.edtf_string):
            raise ValidationError(
                'If setting a date on a composition, an EDTF string and a natural language EDTF string must be provided.'
            )

        # Validate edtf_string
        if self.edtf_string and self.nat_lang_edtf_string:
            try:
                e = parse_edtf(self.edtf_string)
            except EDTFParseException:
                raise ValidationError({
                    'edtf_string':
                    '{} is not a valid EDTF string'.format(self.edtf_string)
                })

            self.lower_fuzzy = struct_time_to_date(e.lower_fuzzy())
            self.upper_fuzzy = struct_time_to_date(e.upper_fuzzy())
            self.lower_strict = struct_time_to_date(e.lower_strict())
            self.upper_strict = struct_time_to_date(e.upper_strict())

            if self.lower_strict.year != self.upper_strict.year:
                self.nat_lang_year = '{}-{}'.format(self.lower_strict.year,
                                                    self.upper_strict.year)
            else:
                self.nat_lang_year = str(self.lower_strict.year)
Ejemplo n.º 9
0
    def to_python(self, value):
        if isinstance(value, EDTFObject):
            return value

        if value is None:
            return value

        return parse_edtf(value, fail_silently=True)
Ejemplo n.º 10
0
def _normalize_edtf(s):
    if s and s != 'u' and s != 'uuuu':
        try:
            return parse_edtf(s)
        except Exception:
            pass

    # when all else fails, return the "most unknown" EDTF.
    return MOST_UNKNOWN_EDTF
Ejemplo n.º 11
0
 def from_db_value(self, value, expression, connection, context):
     # Converting values to Python objects
     if not value:
         return None
     try:
         return pickle.loads(str(value))
     except:
         pass
     return parse_edtf(value, fail_silently=True)
Ejemplo n.º 12
0
 def get_publication_year(self, obj):
     """Get publication year from edtf date."""
     try:
         publication_date = obj["metadata"]["publication_date"]
         parsed_date = parse_edtf(publication_date)
         return str(parsed_date.lower_strict().tm_year)
     except ParseException:
         # NOTE: Should not fail since it was validated at service schema
         raise ValidationError(
             "Unable to parse publicationYear from publication_date")
Ejemplo n.º 13
0
 def get_publication_year(self, obj):
     """Get publication year from edtf date."""
     try:
         publication_date = obj["metadata"]["publication_date"]
         parsed_date = parse_edtf(publication_date)
         return str(parsed_date.lower_strict().tm_year)
     except ParseException:
         # Should not fail since it was validated at service schema
         current_app.logger.error("Error parsing publication_date field for"
                                  f"record {obj['metadata']}")
         raise ValidationError(_("Invalid publication date value."))
Ejemplo n.º 14
0
    def pre_save(self, instance, add):
        """
        Updates the edtf value from the value of the display_field.
        If there's a valid edtf, then set the date values.
        """
        if not self.natural_text_field or self.attname not in instance.__dict__:
            return

        edtf = getattr(instance, self.attname)

        # Update EDTF field based on latest natural text value, if any
        natural_text = getattr(instance, self.natural_text_field)
        if natural_text:
            edtf = text_to_edtf(natural_text)
        else:
            edtf = None

        # TODO If `natural_text_field` becomes cleared the derived EDTF field
        # value should also be cleared, rather than left at original value?

        # TODO Handle case where EDTF field is set to a string directly, not
        # via `natural_text_field` (this is a slightly unexpected use-case, but
        # is a very efficient way to set EDTF values in situations like for API
        # imports so we probably want to continue to support it?)
        if edtf and not isinstance(edtf, EDTFObject):
            edtf = parse_edtf(edtf, fail_silently=True)

        setattr(instance, self.attname, edtf)
        # set or clear related date fields on the instance
        for attr in DATE_ATTRS:
            field_attr = "%s_field" % attr
            g = getattr(self, field_attr, None)
            if g:
                if edtf:
                    try:
                        target_field = instance._meta.get_field(g)
                    except FieldDoesNotExist:
                        continue
                    value = getattr(edtf, attr)()  # struct_time
                    if isinstance(target_field, models.FloatField):
                        value = struct_time_to_jd(value)
                    elif isinstance(target_field, models.DateField):
                        value = struct_time_to_date(value)
                    else:
                        raise NotImplementedError(
                            u"EDTFField does not support %s as a derived data"
                            u" field, only FloatField or DateField"
                            % type(target_field))
                    setattr(instance, g, value)
                else:
                    setattr(instance, g, None)
        return edtf
Ejemplo n.º 15
0
    def dump(self, record, data):
        """Dump the data."""
        try:
            parent_data = dict_lookup(data, self.keys, parent=True)

            pd = parse_edtf(parent_data[self.key])
            parent_data[f"{self.key}_start"] = date.fromtimestamp(
                calendar.timegm(pd.lower_strict())).isoformat()
            parent_data[f"{self.key}_end"] = date.fromtimestamp(
                calendar.timegm(pd.upper_strict())).isoformat()
        except (KeyError, EDTFParseException):
            # The field does not exists or had wrong data
            return data  # FIXME: should log this in debug mode?
Ejemplo n.º 16
0
    def dump(self, record, data):
        """Dump the data."""
        try:
            parent_data = dict_lookup(data, self.keys, parent=True)
            pd = parse_edtf(parent_data[self.key])
            parent_data[self.range_key] = {
                "gte": _format_date(pd.lower_strict()),
                "lte": _format_date(pd.upper_strict()),
            }

        except (KeyError, EDTFParseException):
            # The field does not exists or had wrong data
            return data  # FIXME: should log this in debug mode?
Ejemplo n.º 17
0
 def date_is_absolute(self):
     try:
         self.date = parse_edtf(self.input)
     except EDTFParseException:
         try:
             pd.parse(self.input)
         except pd.parsing.exceptions.ParserError:
             return False
         else:
             raise CommandError("Absolute dates must be of the format "
                                "YYYY, YYYY-MM or YYYY-MM-DD")
     else:
         return True
Ejemplo n.º 18
0
def date_to_int(val):

    try:
        date = parse_edtf(val)
    ## if there's a problem parsing, try this as a long year
    except EDTFParseException:
        date = parse_edtf("y{}".format(val))

    # if it's a real DateAndTime (from a date node), must parse it further
    if isinstance(date, DateAndTime):
        date = parse_edtf(str(date.date))

    y = int(date.year) * 10000

    if isinstance(date, LongYear):
        md = "0000"
    else:
        m = int(date.month) if date.month else 0
        d = int(date.day) if date.day else 0
        md = str(m).zfill(2) + str(d).zfill(2)

    dateint = y + int(md)
    return dateint
Ejemplo n.º 19
0
def post_shows(api, scroll):
    shows = []

    url = 'https://studsterkel.wfmt.com/explore#t=date'
    soup = get_url_as_soup(url)
    ps = soup.find_all('p')

    for p in ps:
        show = {}
        a = p.find('a')
        if a is not None:
            date = a.find('span')
            if date is not None:

                # Evil python mutates `a` object
                [s.extract() for s in a('span')]

                _edtf = parse_edtf(text_to_edtf(date.text))
                title = a.text.strip()
                person = get_person(title)
                thumb = None

                show = {
                    'when_happened':
                    struct_time_to_datetime(_edtf.upper_strict()),
                    'resolution':
                    len(str(_edtf)),
                    'when_original':
                    date.text,
                    'content_url':
                    'https://studsterkel.wfmt.com{}'.format(a.get('href')),
                    'title':
                    a.text.strip(),
                    'text':
                    '',
                    'with_thumbnail':
                    thumb,
                    'media_type':
                    'audio/mpeg',
                    'content_type':
                    'Oral histories',
                    'source_url':
                    'https://studsterkel.wfmt.com/',
                    'with_thumbnail':
                    api.cache_wiki_thumbnail(person)
                }
                resp = api.create_event(show, scroll)
                pprint(resp.json())
Ejemplo n.º 20
0
    def dump(self, record, data):
        """Dump the data."""
        try:
            date_list = dict_lookup(data, self.keys, parent=False)

            # EDTF parse_edtf (using pyparsing) expects a string
            for item in date_list:
                pd = parse_edtf(item[self.key])
                item[self.range_key] = {
                    "gte": _format_date(pd.lower_strict()),
                    "lte": _format_date(pd.upper_strict()),
                }

        except (KeyError, EDTFParseException):
            # The field does not exists or had wrong data
            return data  # FIXME: should log this in debug mode?
Ejemplo n.º 21
0
def add_first_link(w):
    if w is not None and 'event' in w:
        e = w['event']

        for tag in e(['sup', 'span']):
            tag.decompose()

        text = e.text.rstrip()
        text = re.sub('^\s*\d+:\s*', '', text)
        text = re.sub('^:\s*', '', text)
        if w['context'] is not None and w['context'] != '':
            text = '{}: {}'.format(w['context'], e.text.rstrip())

        m = re.match('^(.+[a-z]{2,}\.\s+)(.*)', text)
        if m is not None:
            title = m.group(1)
            text = m.group(2)
        else:
            title = text
            text = ''
        w['title'] = title
        w['text'] = text
        links = e.select('a')
        if len(links) > 0 and links[0] is not None:
            href = links[0].get('href')
            w['content_url'] = 'https://en.wikipedia.org{}'.format(href, )
            w['item'] = re.sub(r'/wiki/|/w/index.php\?title\=', '', href)
        date_text = '{} {}'.format(w['date'], w['year'])
        date_text = re.sub('–', '-', date_text)
        try:
            edtf_date_txt = text_to_edtf(date_text)
            edtf_date = parse_edtf(edtf_date_txt)
            iso_date = time.strftime('%Y-%m-%dT%H:%M:%SZ',
                                     edtf_date.upper_fuzzy())
            w['when_happened'] = iso_date
            w['when_original'] = date_text
            w['resolution'] = 10
            del w['event']
            del w['date']
            del w['context']
            if 'header' in w:
                del w['header']
            return w
        except Exception:
            pass
Ejemplo n.º 22
0
    def get_issued(self, obj):
        """Get issued dates."""
        date_parts = []
        publication_date = obj["metadata"]["publication_date"].split("/")
        for date in publication_date:
            p_date = parse_edtf(date)
            date_part = []
            year, month, day = p_date.year, p_date.month, p_date.day
            if year:
                date_part.append(year)
            if month:
                date_part.append(month)
            if day:
                date_part.append(day)

            date_parts.append(date_part)

        return {"date-parts": date_parts}
Ejemplo n.º 23
0
    def clean(self):
        try:
            e = parse_edtf(self.edtf_string)
        except EDTFParseException:
            raise ValidationError({
                'edtf_string':
                '{} is not a valid EDTF string'.format(self.edtf_string)
            })

        self.lower_fuzzy = struct_time_to_date(e.lower_fuzzy())
        self.upper_fuzzy = struct_time_to_date(e.upper_fuzzy())
        self.lower_strict = struct_time_to_date(e.lower_strict())
        self.upper_strict = struct_time_to_date(e.upper_strict())

        if self.lower_strict.year != self.upper_strict.year:
            self.nat_lang_year = '{}-{}'.format(self.lower_strict.year,
                                                self.upper_strict.year)
        else:
            self.nat_lang_year = str(self.lower_strict.year)
Ejemplo n.º 24
0
    def save(self, *args, **kwargs):
        try:
            e = parse_edtf(self.edtf_string)
        except EDTFParseException:
            raise ValidationError('{} is not a valid EDTF string'.format(
                self.edtf_string))

        self.lower_fuzzy = struct_time_to_date(e.lower_fuzzy())
        self.upper_fuzzy = struct_time_to_date(e.upper_fuzzy())
        self.lower_strict = struct_time_to_date(e.lower_strict())
        self.upper_strict = struct_time_to_date(e.upper_strict())

        if self.lower_strict.year != self.upper_strict.year:
            self.nat_lang_year = '{}-{}'.format(self.lower_strict.year,
                                                self.upper_strict.year)
        else:
            self.nat_lang_year = str(self.lower_strict.year)

        super().save(*args, **kwargs)
Ejemplo n.º 25
0
    def __call__(self, value):
        """Validate."""
        try:
            e = parse_edtf(value)
        except ParseException:
            raise ValidationError(self._format_error(value, None))

        if self._types:
            if not any([isinstance(e, t) for t in self._types]):
                raise ValidationError(self._format_error(value, e))

        if self._chronological_interval:
            # We require intervals to be chronological. EDTF Date and Interval
            # both have same interface and
            # date.lower_strict() <= date.upper_strict() is always True for a
            # Date
            if e.upper_strict() < e.lower_strict():
                raise ValidationError(self._format_error(value, e))

        return value
Ejemplo n.º 26
0
    def get_issued(self, obj):
        """Get issued dates."""
        try:
            parsed = parse_edtf(obj["metadata"].get("publication_date"))
        except EDTFParseException:
            return missing

        if isinstance(parsed, Date):
            parts = add_if_not_none(parsed.year, parsed.month, parsed.day)
            return {"date-parts": [parts]}
        elif isinstance(parsed, Interval):
            d1 = parsed.lower
            d2 = parsed.upper
            return {
                "date-parts": [
                    add_if_not_none(d1.year, d1.month, d1.day),
                    add_if_not_none(d2.year, d2.month, d2.day),
                ]
            }
        else:
            return missing
Ejemplo n.º 27
0
 def parse_date(self) -> Optional[struct_time]:
     try:
         return parse_edtf(self.date_display)
     except (AttributeError, EDTFParseException):
         return None
Ejemplo n.º 28
0
        # because the meta csv file didn't have it set if we're trying
        # to fetch the raw json in the first place. But this is meant
        # to catch this scenario.

        self.wof_id = wof_id
        self.reason = reason
        self.message = message
        self.halt = halt
        self.skipped = skipped
        self.funky = funky
        self.superseded = superseded


# keep this as a constant - it actually take a significant amount of time to
# re-parse this every time, when we know it's a constant.
MOST_UNKNOWN_EDTF = parse_edtf('uuuu')


# given a string, parse it as EDTF while allowing a single 'u', four u's
# 'uuuu', or None to mean completely unknown, and return the EDTF object.
def _normalize_edtf(s):
    if s and s != 'u' and s != 'uuuu':
        try:
            return parse_edtf(s)
        except Exception:
            pass

    # when all else fails, return the "most unknown" EDTF.
    return MOST_UNKNOWN_EDTF

    def prepare_feature(self, f, **kwargs):

        props = f['properties']
        props['wof:geomhash'] = u.hash_geom(f)

        # who am I ?
        # have I been here before ?
        # why is the sky blue ?

        # also, what time is it?
        now = int(time.time())

        wofid = None

        if props.has_key('wof:id'):
            wofid = props['wof:id']

        if wofid == None:

            logging.debug(
                "This record has no wofid so now asking what Brooklyn would do..."
            )

            wofid = u.generate_id()

            if wofid == 0:
                logging.error("OH NO - can't get integer!")
                return False

            props['wof:id'] = wofid
            props['wof:created'] = now

        f['id'] = props['wof:id']

        props['wof:lastmodified'] = now

        # TO DO: FIGURE OUT HOW TO DERIVE DEFAULTS FROM
        # py-mapzen-whosonfirst-validator (20150922/thisisaaronland)

        # stubs

        for k in ('supersedes', 'superseded_by', 'hierarchy', 'belongsto',
                  'breaches'):

            k = "wof:%s" % k

            if not props.get(k, False):
                props[k] = []

        # ensure 'mz:' properties
        # https://github.com/whosonfirst/whosonfirst-data/issues/320

        if props.get('mz:hierarchy_label', None) == None:

            props['mz:hierarchy_label'] = 1

        is_current = props.get("mz:is_current", None)

        if not is_current in (-1, 0, 1):

            if str(is_current) == "-1":
                is_current = -1
            elif str(is_current) == "0":
                is_current = 0
            elif str(is_current) == "1":
                is_current = 1
            else:
                is_current = -1

            props['mz:is_current'] = is_current

        # ensure 'wof:repo'
        # https://github.com/whosonfirst/whosonfirst-data/issues/338

        if props.get('wof:repo', None) == None:

            data_root = self.root
            repo_root = os.path.dirname(data_root)
            props['wof:repo'] = os.path.basename(repo_root)

        # ensure edtf stuff - it might be time for py-whosonfirst-dates/edtf package
        # but not today... (20180503/thisisaaronland)

        for k in ('inception', 'cessation'):
            k = "edtf:%s" % k

            # section 5.2.2 (EDTF) - this appears to have changed to 'XXXX' as of
            # the draft sent to ISO (201602) but we're just going to wait...

            if not props.has_key(k):
                props[k] = u"uuuu"

            # my bad - just adding it here in advance of a proper
            # backfill (20160107/thisisaaronland)

            if props.get(k) == "u":
                props[k] = u"uuuu"

        # now we try to append upper/lower ranges for inception and cessation
        # dates - specifically plain vanilla YMD values that can be indexed by
        # plain old databases (20180503/thisisaaronland)

        # note the use of arrow (.py) since datetime.strptime can't deal with
        # parsing YYYY-MM-DD dates before 1900 because... I mean really, who
        # cares why it's just kind of... bad (20180503/thisisaaronland)

        inception = props.get("edtf:inception", "")
        cessation = props.get("edtf:cessation", "")

        fmt = "YYYY-MM-DD"

        # skip "uuuu" because it resolves to 0001-01-01 9999-12-31 (in edtf.py land)

        if not inception in ("", "uuuu"):
            try:

                e = edtf.parse_edtf(unicode(inception))

                lower = arrow.get(e.lower_strict())
                upper = arrow.get(e.upper_strict())

                props["date:inception_lower"] = lower.format(fmt)
                props["date:inception_upper"] = upper.format(fmt)

            except Exception, e:
                logging.warning("Failed to parse inception '%s' because %s" %
                                (inception, e))

            if not cessation in ("", "uuuu", "open"):

                # we'll never get here because of the test above but the point
                # is a) edtf.py freaks out when an edtf string is just "open" (not
                # sure if this is a me-thing or a them-thing and b) edtf.py interprets
                # "open" as "today" which is not what we want to store in the database
                # (20180418/thisisaaronland)

                if cessation == "open" and not inception in ("", "uuuu"):
                    cessation = "%s/open" % inception

                try:
                    e = edtf.parse_edtf(unicode(cessation))

                    lower = arrow.get(e.lower_strict())
                    upper = arrow.get(e.upper_strict())

                    props["date:cessation_lower"] = lower.format(fmt)
                    props["date:cessation_upper"] = upper.format(fmt)

                except Exception, e:
                    logging.warning(
                        "Failed to parse cessation '%s' because %s" %
                        (cessation, e))
Ejemplo n.º 30
0
        # because the meta csv file didn't have it set if we're trying
        # to fetch the raw json in the first place. But this is meant
        # to catch this scenario.

        self.wof_id = wof_id
        self.reason = reason
        self.message = message
        self.halt = halt
        self.skipped = skipped
        self.funky = funky
        self.superseded = superseded


# keep this as a constant - it actually take a significant amount of time to
# re-parse this every time, when we know it's a constant.
MOST_UNKNOWN_EDTF = parse_edtf('uuuu')


# given a string, parse it as EDTF while allowing a single 'u', four u's
# 'uuuu', or None to mean completely unknown, and return the EDTF object.
def _normalize_edtf(s):
    if s and s != 'u' and s != 'uuuu':
        try:
            return parse_edtf(s)
        except Exception:
            pass

    # when all else fails, return the "most unknown" EDTF.
    return MOST_UNKNOWN_EDTF

Ejemplo n.º 31
0
 def _as_edtf_object(cls, edtf_format):
     try:
         return parse_edtf(edtf_format)
     except EDTFParseException:
         return None