Example #1
0
def try_ymd_date(string, outputformat, extensive_search, parser=PARSER):
    """Use a series of heuristics and rules to parse a potential date expression"""
    # discard on formal criteria
    if string is None or len(list(filter(str.isdigit, string))) < 4:
        return None
    # just time/single year, not a date
    if re.match(r'[0-9]{2}:[0-9]{2}(:| )', string) or re.match(
            r'\D*[0-9]{4}\D*$', string):
        return None
    # much faster
    if string[0:4].isdigit():
        # try speedup with ciso8601
        try:
            result = parse_datetime_as_naive(string)
            if date_validator(result, outputformat) is True:
                LOGGER.debug('ciso8601 result: %s', result)
                converted = result.strftime(outputformat)
                return converted
        except ValueError:
            LOGGER.debug('ciso8601 error: %s', string)
    # faster
    customresult = custom_parse(string, outputformat)
    if customresult is not None:
        return customresult
    # slow but extensive search
    if extensive_search is True:
        # send to dateparser
        dateparser_result = external_date_parser(string, outputformat, parser)
        if dateparser_result is not None:
            return dateparser_result
    # catchall
    return None
Example #2
0
def get_extension_params(loan=None, parameter_name=None):
    """Return extension parameters."""
    policy = get_circ_policy(loan)
    end_date = ciso8601.parse_datetime_as_naive(loan.get('end_date'))
    params = {
        'max_count': policy.get('number_renewals'),
        'duration_default': policy.get('renewal_duration')
    }
    current_date = datetime.now()
    time_to_end_of_day = timedelta(hours=23, minutes=59) - \
        timedelta(hours=current_date.hour, minutes=current_date.minute)

    transaction_location_pid = loan.get('transaction_location_pid')
    if not transaction_location_pid:
        library_pid = Item.get_record_by_pid(loan.item_pid).library_pid
    else:
        library_pid = \
            Location.get_record_by_pid(transaction_location_pid).library_pid
    library = Library.get_record_by_pid(library_pid)

    calculated_due_date = current_date + timedelta(
        days=policy.get('renewal_duration'))

    first_open_date = library.next_open(date=calculated_due_date -
                                        timedelta(days=1))

    if first_open_date.date() < end_date.date():
        params['max_count'] = 0

    new_duration = first_open_date - current_date
    params['duration_default'] = \
        timedelta(days=new_duration.days) + time_to_end_of_day

    return params.get(parameter_name)
Example #3
0
 def test_parse_as_naive_auto_generated_valid_formats(self):
     for (timestamp, expected_datetime) in generate_valid_timestamp_and_datetime():
         try:
             self.assertEqual(parse_datetime_as_naive(timestamp), expected_datetime.replace(tzinfo=None))
         except Exception:
             print("Had problems parsing: {timestamp}".format(timestamp=timestamp))
             raise
Example #4
0
def convertTime(t):
    """Parse Arvados timestamp to unix time."""
    if not t:
        return 0
    try:
        return calendar.timegm(ciso8601.parse_datetime_as_naive(t).timetuple())
    except (TypeError, ValueError):
        return 0
Example #5
0
def convertTime(t):
    """Parse Arvados timestamp to unix time."""
    if not t:
        return 0
    try:
        return calendar.timegm(ciso8601.parse_datetime_as_naive(t).timetuple())
    except (TypeError, ValueError):
        return 0
Example #6
0
def docker_link_sort_key(link):
    """Build a sort key to find the latest available Docker image.

    To find one source collection for a Docker image referenced by
    name or image id, the API server looks for a link with the most
    recent `image_timestamp` property; then the most recent
    `created_at` timestamp.  This method generates a sort key for
    Docker metadata links to sort them from least to most preferred.
    """
    try:
        image_timestamp = ciso8601.parse_datetime_as_naive(
            link['properties']['image_timestamp'])
    except (KeyError, ValueError):
        image_timestamp = EARLIEST_DATETIME
    try:
        created_timestamp = ciso8601.parse_datetime_as_naive(link['created_at'])
    except ValueError:
        created_timestamp = None
    return (image_timestamp, created_timestamp)
Example #7
0
def docker_link_sort_key(link):
    """Build a sort key to find the latest available Docker image.

    To find one source collection for a Docker image referenced by
    name or image id, the API server looks for a link with the most
    recent `image_timestamp` property; then the most recent
    `created_at` timestamp.  This method generates a sort key for
    Docker metadata links to sort them from least to most preferred.
    """
    try:
        image_timestamp = ciso8601.parse_datetime_as_naive(
            link['properties']['image_timestamp'])
    except (KeyError, ValueError):
        image_timestamp = EARLIEST_DATETIME
    try:
        created_timestamp = ciso8601.parse_datetime_as_naive(link['created_at'])
    except ValueError:
        created_timestamp = None
    return (image_timestamp, created_timestamp)
Example #8
0
    def done(self, record):
        outputs = {}
        try:
            container = self.arvrunner.api.containers().get(
                uuid=record["container_uuid"]
            ).execute(num_retries=self.arvrunner.num_retries)
            if container["state"] == "Complete":
                rcode = container["exit_code"]
                if self.successCodes and rcode in self.successCodes:
                    processStatus = "success"
                elif self.temporaryFailCodes and rcode in self.temporaryFailCodes:
                    processStatus = "temporaryFail"
                elif self.permanentFailCodes and rcode in self.permanentFailCodes:
                    processStatus = "permanentFail"
                elif rcode == 0:
                    processStatus = "success"
                else:
                    processStatus = "permanentFail"
            else:
                processStatus = "permanentFail"

            if processStatus == "permanentFail" and record["log_uuid"]:
                logc = arvados.collection.CollectionReader(record["log_uuid"],
                                                           api_client=self.arvrunner.api,
                                                           keep_client=self.arvrunner.keep_client,
                                                           num_retries=self.arvrunner.num_retries)
                label = self.arvrunner.label(self)
                done.logtail(
                    logc, logger.error,
                    "%s (%s) error log:" % (label, record["uuid"]), maxlen=40)

            if record["output_uuid"]:
                if self.arvrunner.trash_intermediate or self.arvrunner.intermediate_output_ttl:
                    # Compute the trash time to avoid requesting the collection record.
                    trash_at = ciso8601.parse_datetime_as_naive(record["modified_at"]) + datetime.timedelta(0, self.arvrunner.intermediate_output_ttl)
                    aftertime = " at %s" % trash_at.strftime("%Y-%m-%d %H:%M:%S UTC") if self.arvrunner.intermediate_output_ttl else ""
                    orpart = ", or" if self.arvrunner.trash_intermediate and self.arvrunner.intermediate_output_ttl else ""
                    oncomplete = " upon successful completion of the workflow" if self.arvrunner.trash_intermediate else ""
                    logger.info("%s Intermediate output %s (%s) will be trashed%s%s%s." % (
                        self.arvrunner.label(self), record["output_uuid"], container["output"], aftertime, orpart, oncomplete))
                self.arvrunner.add_intermediate_output(record["output_uuid"])

            if container["output"]:
                outputs = done.done_outputs(self, container, "/tmp", self.outdir, "/keep")
        except WorkflowException as e:
            # Only include a stack trace if in debug mode.
            # A stack trace may obfuscate more useful output about the workflow.
            logger.error("%s unable to collect output from %s:\n%s",
                         self.arvrunner.label(self), container["output"], e, exc_info=(e if self.arvrunner.debug else False))
            processStatus = "permanentFail"
        except Exception:
            logger.exception("%s while getting output object:", self.arvrunner.label(self))
            processStatus = "permanentFail"
        finally:
            self.output_callback(outputs, processStatus)
Example #9
0
    def done(self, record):
        outputs = {}
        try:
            container = self.arvrunner.api.containers().get(
                uuid=record["container_uuid"]
            ).execute(num_retries=self.arvrunner.num_retries)
            if container["state"] == "Complete":
                rcode = container["exit_code"]
                if self.successCodes and rcode in self.successCodes:
                    processStatus = "success"
                elif self.temporaryFailCodes and rcode in self.temporaryFailCodes:
                    processStatus = "temporaryFail"
                elif self.permanentFailCodes and rcode in self.permanentFailCodes:
                    processStatus = "permanentFail"
                elif rcode == 0:
                    processStatus = "success"
                else:
                    processStatus = "permanentFail"
            else:
                processStatus = "permanentFail"

            if processStatus == "permanentFail" and record["log_uuid"]:
                logc = arvados.collection.CollectionReader(record["log_uuid"],
                                                           api_client=self.arvrunner.api,
                                                           keep_client=self.arvrunner.keep_client,
                                                           num_retries=self.arvrunner.num_retries)
                label = self.arvrunner.label(self)
                done.logtail(
                    logc, logger.error,
                    "%s (%s) error log:" % (label, record["uuid"]), maxlen=40)

            if record["output_uuid"]:
                if self.arvrunner.trash_intermediate or self.arvrunner.intermediate_output_ttl:
                    # Compute the trash time to avoid requesting the collection record.
                    trash_at = ciso8601.parse_datetime_as_naive(record["modified_at"]) + datetime.timedelta(0, self.arvrunner.intermediate_output_ttl)
                    aftertime = " at %s" % trash_at.strftime("%Y-%m-%d %H:%M:%S UTC") if self.arvrunner.intermediate_output_ttl else ""
                    orpart = ", or" if self.arvrunner.trash_intermediate and self.arvrunner.intermediate_output_ttl else ""
                    oncomplete = " upon successful completion of the workflow" if self.arvrunner.trash_intermediate else ""
                    logger.info("%s Intermediate output %s (%s) will be trashed%s%s%s." % (
                        self.arvrunner.label(self), record["output_uuid"], container["output"], aftertime, orpart, oncomplete))
                self.arvrunner.add_intermediate_output(record["output_uuid"])

            if container["output"]:
                outputs = done.done_outputs(self, container, "/tmp", self.outdir, "/keep")
        except WorkflowException as e:
            # Only include a stack trace if in debug mode.
            # A stack trace may obfuscate more useful output about the workflow.
            logger.error("%s unable to collect output from %s:\n%s",
                         self.arvrunner.label(self), container["output"], e, exc_info=(e if self.arvrunner.debug else False))
            processStatus = "permanentFail"
        except Exception:
            logger.exception("%s while getting output object:", self.arvrunner.label(self))
            processStatus = "permanentFail"
        finally:
            self.output_callback(outputs, processStatus)
Example #10
0
def test_items_extend_end_date(client, librarian_martigny_no_email,
                               patron_martigny_no_email, loc_public_martigny,
                               item_type_standard_martigny, item_lib_martigny,
                               json_header, circ_policy_short_martigny):
    """Test correct renewal due date for items."""
    login_user_via_session(client, librarian_martigny_no_email.user)
    item = item_lib_martigny
    item_pid = item.pid
    patron_pid = patron_martigny_no_email.pid

    # checkout
    res = client.post(
        url_for('api_item.checkout'),
        data=json.dumps(dict(item_pid=item_pid, patron_pid=patron_pid)),
        content_type='application/json',
    )
    assert res.status_code == 200
    data = get_json(res)
    actions = data.get('action_applied')
    loan_pid = actions[LoanAction.CHECKOUT].get('pid')
    loan = Loan.get_record_by_pid(loan_pid)
    assert not item.get_extension_count()

    max_count = get_extension_params(loan=loan, parameter_name='max_count')
    renewal_duration_policy = circ_policy_short_martigny['renewal_duration']
    renewal_duration = get_extension_params(loan=loan,
                                            parameter_name='duration_default')
    assert renewal_duration_policy <= renewal_duration.days

    # extend loan
    res = client.post(
        url_for('api_item.extend_loan'),
        data=json.dumps(dict(item_pid=item_pid, pid=loan_pid)),
        content_type='application/json',
    )

    assert res.status_code == 200
    data = get_json(res)
    actions = data.get('action_applied')
    loan_pid = actions[LoanAction.EXTEND].get('pid')
    loan = Loan.get_record_by_pid(loan_pid)
    end_date = loan.get('end_date')
    current_date = datetime.now()
    calc_date = current_date + renewal_duration
    assert (calc_date.strftime('%Y-%m-%d') == ciso8601.parse_datetime_as_naive(
        end_date).strftime('%Y-%m-%d'))

    # checkin
    res = client.post(
        url_for('api_item.checkin'),
        data=json.dumps(dict(item_pid=item_pid, pid=loan_pid)),
        content_type='application/json',
    )
    assert res.status_code == 200
Example #11
0
def extend_loan_data_is_valid(end_date, renewal_duration, library_pid):
    """Checks extend loan will be valid."""
    end_date = ciso8601.parse_datetime_as_naive(end_date)
    current_date = datetime.now()
    library = Library.get_record_by_pid(library_pid)
    calculated_due_date = current_date + timedelta(days=renewal_duration)
    first_open_date = library.next_open(date=calculated_due_date -
                                        timedelta(days=1))
    if first_open_date.date() <= end_date.date():
        return False
    return True
Example #12
0
    def flag_articles_matching_date(self, file, field='PublishDate') -> list:
        '''
        Once we need to check dates inside the files...

        Returns a list of bool, where True means "article is within timeframe".
        '''
        date_list = [
            parse_datetime_as_naive(article[field]) for article in file
        ]
        return [
            self.dated(date, self.date_start, self.date_end)
            for date in date_list
        ]
Example #13
0
def make_time(t: TimeT | None) -> int | None:
    if t is None:
        return None

    if isinstance(t, int):
        return t
    if isinstance(t, str):
        try:
            return int(
                time.mktime(ciso8601.parse_datetime_as_naive(t).timetuple()))
        except ValueError:
            return None
    t: datetime.datetime
    return int(time.mktime(t.timetuple()))
Example #14
0
    def parse_path_date(path, pattern) -> datetime.datetime:
        '''
        Parse dates in filename.

        Parameters
        ----------
        path : str
            Path to a file from IM's database

        pattern : `re.Pattern`
            Pattern made using self.path_date_pattern()
        '''
        date_match = pattern.search(path)
        fdate = parse_datetime_as_naive(date_match.group())

        return fdate
def get_book_json(gen):
    # print based on seconds to start
    for market_books in gen():
        for market_book in market_books:
            # Since I'm only interested in the win market I've added this to skip 'To Be Placed'
            # Insert other filters here to helps speed things up
            if market_book['marketDefinition']['name'] == 'To Be Placed':
                return
            # Calculate time until the scheduled start
            seconds_to_start = (
                ciso8601.parse_datetime_as_naive(
                    market_book['marketDefinition']['marketTime']) -
                datetime.utcfromtimestamp(
                    market_book['publishTime'] / 1000)).total_seconds()
            # Extract book at offset (seconds) before jump
            if seconds_to_start < offset and seconds_to_start > offset - 15:
                return market_book
Example #16
0
def get_overdue_loans():
    """Return all overdue loans."""
    from .utils import get_circ_policy
    overdue_loans = []
    results = current_circulation.loan_search\
        .source(['pid'])\
        .params(preserve_order=True)\
        .filter('term', state='ITEM_ON_LOAN')\
        .sort({'transaction_date': {'order': 'asc'}})\
        .scan()
    for record in results:
        loan = Loan.get_record_by_pid(record.pid)
        circ_policy = get_circ_policy(loan)
        now = datetime.now()
        end_date = loan.get('end_date')
        due_date = ciso8601.parse_datetime_as_naive(end_date)

        days_after = circ_policy.get('number_of_days_after_due_date')
        if now > due_date + timedelta(days=days_after):
            overdue_loans.append(loan)
    return overdue_loans
Example #17
0
def custom_parse(string, outputformat, extensive_search, min_date, max_date):
    """Try to bypass the slow dateparser"""
    LOGGER.debug('custom parse test: %s', string)
    # '201709011234' not covered by dateparser # regex was too slow
    if string[0:8].isdigit():
        try:
            candidate = datetime.date(int(string[:4]), int(string[4:6]),
                                      int(string[6:8]))
        except ValueError:
            return None
        if date_validator(candidate, '%Y-%m-%d') is True:
            LOGGER.debug('ymd match: %s', candidate)
            return convert_date(candidate, '%Y-%m-%d', outputformat)
    # much faster
    if string[0:4].isdigit():
        # try speedup with ciso8601 (if installed)
        try:
            if extensive_search is True:
                result = parse_datetime(string)
            # speed-up by ignoring time zone info if ciso8601 is installed
            else:
                result = parse_datetime_as_naive(string)
            if date_validator(
                    result, outputformat, earliest=min_date,
                    latest=max_date) is True:
                LOGGER.debug('parsing result: %s', result)
                return result.strftime(outputformat)
        except ValueError:
            LOGGER.debug('parsing error: %s', string)
    # %Y-%m-%d search
    match = YMD_PATTERN.search(string)
    if match:
        try:
            candidate = datetime.date(int(match.group(1)), int(match.group(2)),
                                      int(match.group(3)))
        except ValueError:
            LOGGER.debug('value error: %s', match.group(0))
        else:
            if date_validator(candidate, '%Y-%m-%d') is True:
                LOGGER.debug('ymd match: %s', candidate)
                return convert_date(candidate, '%Y-%m-%d', outputformat)
    # faster than fire dateparser at once
    datestub = DATESTUB_PATTERN.search(string)
    if datestub and len(datestub.group(3)) in (2, 4):
        try:
            if len(datestub.group(3)) == 2:
                candidate = datetime.date(int('20' + datestub.group(3)),
                                          int(datestub.group(2)),
                                          int(datestub.group(1)))
            elif len(datestub.group(3)) == 4:
                candidate = datetime.date(int(datestub.group(3)),
                                          int(datestub.group(2)),
                                          int(datestub.group(1)))
        except ValueError:
            LOGGER.debug('value error: %s', datestub.group(0))
        else:
            # test candidate
            if date_validator(candidate, '%Y-%m-%d') is True:
                LOGGER.debug('D.M.Y match: %s', candidate)
                return convert_date(candidate, '%Y-%m-%d', outputformat)
    # text match
    dateobject = regex_parse(string)
    # copyright match?
    # © Janssen-Cilag GmbH 2014-2019. https://www.krebsratgeber.de/artikel/was-macht-eine-zelle-zur-krebszelle
    # examine
    if dateobject is not None:
        try:
            if date_validator(dateobject, outputformat) is True:
                LOGGER.debug('custom parse result: %s', dateobject)
                return dateobject.strftime(outputformat)
        except ValueError as err:
            LOGGER.debug('value error during conversion: %s %s', string, err)
    return None
Example #18
0
 def get_datetime(item):
     return timezone.make_aware(parse_datetime_as_naive(item['recordedTime']))
Example #19
0
 def parse_datetime(datetime_string: str) -> Optional[datetime]:
     try:
         return ciso8601.parse_datetime_as_naive(datetime_string)
     except ValueError:
         return
 def parse_dt(s):
     return ciso8601.parse_datetime_as_naive(s)
Example #21
0
        'timeframe':
        '30m',
        'tsunit':
        's',
        'tscoef':
        1,
        'tsindex':
        2
    }

    # Convert datetime sting to timestamps before creating TimeframeDataset
    for index, item in enumerate(t['data']):
        item = list(item)
        newdatestr = item[0].replace('.', '-').replace(' ', 'T')
        item[0] = calendar.timegm(
            ciso8601.parse_datetime_as_naive(newdatestr).timetuple())
        t['data'][index] = item

    # Create the TimeframeDataset and print it
    tfds = TimeframeDataset(data=t['data'],
                            columns=t['columns'],
                            tsname=t['tsname'],
                            timeframe=t['timeframe'],
                            tsunit=t['tsunit'])
    pprint(tfds)
    print()

    # Create Pandas Dataframe from TimeframeDataset and print it
    pdf = tfds2pdf(tfds)
    print(pdf)
    print()
 def get_datetime(item):
     return timezone.make_aware(parse_datetime_as_naive(item['recordedTime']))
Example #23
0
    def replace_pids_and_refs(self):
        """Dumps data."""
        from ..items.api import Item
        try:
            self.init_loan()
            data = deepcopy(self.replace_refs())
            data['loan'] = self.loan
            data['loan']['item'] = self.item.replace_refs().dumps()
            # del(data['loan']['item_pid'])
            data['loan']['patron'] = self.patron.replace_refs().dumps()
            # language = data['loan']['patron']['communication_language']
            # del(data['loan']['patron_pid'])
            data['loan']['transaction_user'] = \
                self.transaction_user.replace_refs().dumps()
            # del(data['loan']['transaction_user_pid'])
            data['loan']['transaction_location'] = \
                self.transaction_location.replace_refs().dumps()
            # del(data['loan']['transaction_location_pid'])
            pickup_location = self.pickup_location
            if pickup_location:
                data['loan']['pickup_location'] = \
                    pickup_location.replace_refs().dumps()
                # del(data['loan']['pickup_location_pid'])
                library_pid = data['loan']['pickup_location']['library']['pid']
                library = Library.get_record_by_pid(library_pid)
                data['loan']['pickup_location']['library'] = library
                data['loan']['library'] = library
                keep_until = datetime.now() + timedelta(days=10)
                next_open = library.next_open(keep_until)
                # language = data['loan']['patron']['communication_language']
                next_open = next_open.strftime("%d.%m.%Y")
                data['loan']['next_open'] = next_open
            else:
                data['loan']['pickup_location'] = \
                    self.transaction_location.replace_refs().dumps()
                item_pid = data['loan']['item_pid']
                library = Item.get_record_by_pid(item_pid).get_library()
                data['loan']['library'] = library

            document = self.document.replace_refs().dumps()
            data['loan']['document'] = document
            authors = document.get('authors', '')
            if authors:
                author = authors[0].get('name', '')
                if not author:
                    mef_list = ['name_fr', 'name_de', 'name_it', 'name_en']
                    for a_name in mef_list:
                        if authors[0].get(a_name, ''):
                            author = authors[0].get(a_name)
                            break
                data['loan']['author'] = author
            end_date = data.get('loan').get('end_date')
            if end_date:
                end_date = ciso8601.parse_datetime_as_naive(end_date)
                data['loan']['end_date'] = end_date.strftime("%d.%m.%Y")
            # del(data['loan']['document_pid'])

            # create a link to patron profile
            patron = Patron.get_record_by_pid(data['loan']['patron']['pid'])
            view_code = patron.get_organisation().get('code')
            base_url = current_app.config.get('RERO_ILS_APP_BASE_URL')
            url_api = '{base_url}/{view_code}/patrons/profile'
            profile_url = url_api.format(base_url=base_url,
                                         view_code=view_code)
            data['loan']['profile_url'] = profile_url

            return data
        except Exception as e:
            raise (e)
Example #24
0
 def parse_datetime(datetime_string):
     try:
         return ciso8601.parse_datetime_as_naive(datetime_string)
     except ValueError:
         return
Example #25
0
    def _get_message(self,
                     topic,
                     input,
                     now,
                     pub_pattern_matched,
                     endpoint_id,
                     subscriptions_by_topic,
                     has_no_sk_server,
                     _initialized=_initialized,
                     _zato_none=ZATO_NONE,
                     _skip=PUBSUB.HOOK_ACTION.SKIP,
                     _default_pri=PUBSUB.PRIORITY.DEFAULT,
                     _opaque_only=PUBSUB.DEFAULT.SK_OPAQUE,
                     _float_str=PUBSUB.FLOAT_STRING_CONVERT):

        priority = get_priority(self.cid, input)

        # So as not to send it to SQL if it is a default value anyway = less overhead = better performance
        if priority == _default_pri:
            priority = None

        expiration = get_expiration(self.cid, input)
        expiration_time = now + (expiration / 1000.0)

        pub_msg_id = input.get('msg_id', '') or new_msg_id()

        # If there is at least one WSX subscriber to this topic which is not connected at the moment,
        # which means it has no delivery server, we uncoditionally turn this message into a GD one ..
        if has_no_sk_server:
            has_gd = True
            logger_pubsub.info(_log_turning_gd_msg.format('no SK server'),
                               pub_msg_id)

        # .. otherwise, use input GD value or the default per topic.
        else:
            has_gd = input.get('has_gd', _zato_none)
            if has_gd != _zato_none:
                if not isinstance(has_gd, bool):
                    raise ValueError(
                        'Input has_gd is not a bool (found:`{}`)'.format(
                            repr(has_gd)))
            else:
                has_gd = topic.has_gd

        pub_correl_id = input.get('correl_id')
        in_reply_to = input.get('in_reply_to')
        ext_client_id = input.get('ext_client_id')
        mime_type = input.get('mime_type')

        ext_pub_time = input.get('ext_pub_time') or None
        if ext_pub_time:
            ext_pub_time = parse_datetime_as_naive(ext_pub_time)
            ext_pub_time = datetime_to_ms(ext_pub_time) / 1000.0

        pub_correl_id = pub_correl_id if pub_correl_id else None
        in_reply_to = in_reply_to if in_reply_to else None
        ext_client_id = ext_client_id if ext_client_id else None
        mime_type = mime_type if mime_type else None
        reply_to_sk = input.get('reply_to_sk') or []
        deliver_to_sk = input.get('deliver_to_sk') or []

        user_ctx = input.get('user_ctx')
        zato_ctx = input.get('zato_ctx')

        ps_msg = PubSubMessage()
        ps_msg.topic = topic
        ps_msg.pub_msg_id = pub_msg_id
        ps_msg.pub_correl_id = pub_correl_id
        ps_msg.in_reply_to = in_reply_to

        # Convert to string to prevent pg8000 from rounding up float values
        ps_msg.pub_time = _float_str.format(now)
        ps_msg.ext_pub_time = _float_str.format(
            ext_pub_time) if ext_pub_time else ext_pub_time

        ps_msg.delivery_status = _initialized
        ps_msg.pub_pattern_matched = pub_pattern_matched
        ps_msg.data = input['data']
        ps_msg.mime_type = mime_type
        ps_msg.priority = priority
        ps_msg.expiration = expiration
        ps_msg.expiration_time = expiration_time
        ps_msg.published_by_id = endpoint_id
        ps_msg.topic_id = topic.id
        ps_msg.topic_name = topic.name
        ps_msg.cluster_id = self.server.cluster_id
        ps_msg.has_gd = has_gd
        ps_msg.ext_client_id = ext_client_id
        ps_msg.group_id = input.get('group_id') or None
        ps_msg.position_in_group = input.get('position_in_group') or None
        ps_msg.is_in_sub_queue = bool(subscriptions_by_topic)
        ps_msg.reply_to_sk = reply_to_sk
        ps_msg.deliver_to_sk = deliver_to_sk
        ps_msg.user_ctx = user_ctx
        ps_msg.zato_ctx = zato_ctx

        # Opaque attributes - we only need reply to sub_keys to be placed in there
        # but we do not do it unless we known that any such sub key was actually requested.
        if reply_to_sk or deliver_to_sk:
            set_instance_opaque_attrs(ps_msg, input, only=_opaque_only)

        # If there are any subscriptions for the topic this message was published to, we want to establish
        # based on what subscription pattern each subscriber will receive the message.
        for sub in subscriptions_by_topic:
            ps_msg.sub_pattern_matched[sub.sub_key] = sub.sub_pattern_matched

        if ps_msg.data:
            ps_msg.size = len(
                ps_msg.data.encode('utf8')
            )  # We need to store the size in bytes rather than Unicode codepoints
        else:
            ps_msg.size = 0

        # Invoke hook service here because it may want to update data in which case
        # we need to take it into account below.
        if topic.before_publish_hook_service_invoker:
            response = topic.before_publish_hook_service_invoker(topic, ps_msg)

            # Hook service decided that we should not process this message
            if response['hook_action'] == _skip:
                logger_audit.info(
                    'Skipping message pub_msg_id:`%s`, pub_correl_id:`%s`, ext_client_id:`%s`',
                    ps_msg.pub_msg_id, ps_msg.pub_correl_id,
                    ps_msg.ext_client_id)
                return

        # These are needed only for GD messages that are stored in SQL
        if has_gd:
            data_prefix, data_prefix_short = self._get_data_prefixes(
                ps_msg.data)
            ps_msg.data_prefix = data_prefix
            ps_msg.data_prefix_short = data_prefix_short

        return ps_msg
Example #26
0
def custom_parse(string, outputformat, extensive_search, min_date, max_date):
    """Try to bypass the slow dateparser"""
    LOGGER.debug('custom parse test: %s', string)

    # 1. '201709011234' not covered by dateparser, and regex too slow
    if string[0:8].isdigit():
        try:
            candidate = datetime.date(int(string[:4]), int(string[4:6]),
                                      int(string[6:8]))
        except ValueError:
            return None
        if date_validator(candidate, '%Y-%m-%d') is True:
            LOGGER.debug('ymd match: %s', candidate)
            return convert_date(candidate, '%Y-%m-%d', outputformat)

    # 2. shortcut, much faster
    if string[0:4].isdigit():
        # try speedup with ciso8601 (if installed)
        try:
            if extensive_search is True:
                result = parse_datetime(string)
            # speed-up by ignoring time zone info if ciso8601 is installed
            else:
                result = parse_datetime_as_naive(string)
            if date_validator(
                    result, outputformat, earliest=min_date,
                    latest=max_date) is True:
                LOGGER.debug('parsing result: %s', result)
                return result.strftime(outputformat)
        except (OverflowError, TypeError, ValueError):
            LOGGER.debug('parsing error: %s', string)

    # 3. Try YYYYMMDD, use regex
    match = YMD_NO_SEP_PATTERN.search(string)
    if match:
        try:
            year, month, day = int(match.group(0)[:4]), int(
                match.group(0)[4:6]), int(match.group(0)[6:8])
            candidate = datetime.date(year, month, day)
        except ValueError:
            LOGGER.debug('YYYYMMDD value error: %s', match.group(0))
        else:
            if date_validator(candidate, '%Y-%m-%d') is True:
                LOGGER.debug('YYYYMMDD match: %s', candidate)
                return convert_date(candidate, '%Y-%m-%d', outputformat)

    # 4. Try Y-M-D pattern since it's the one used in ISO-8601
    match = YMD_PATTERN.search(string)
    if match:
        try:
            day, month, year = int(match.group(3)), int(match.group(2)), int(
                match.group(1))
            candidate = datetime.date(year, month, day)
        except ValueError:
            LOGGER.debug('Y-M-D value error: %s', match.group(0))
        else:
            if date_validator(candidate, '%Y-%m-%d') is True:
                LOGGER.debug('Y-M-D match: %s', candidate)
                return convert_date(candidate, '%Y-%m-%d', outputformat)

    # 5. Try the D-M-Y pattern since it's the most common date format in the world
    match = DMY_PATTERN.search(string)
    if match:
        try:
            day, month, year = int(match.group(1)), int(match.group(2)), int(
                match.group(3))
            year = correct_year(year)
            # If month is more than 12, swap it with the day
            if month > 12 and day <= 12:
                day, month = month, day
            candidate = datetime.date(year, month, day)
        except ValueError:
            LOGGER.debug('D-M-Y value error: %s', match.group(0))
        else:
            if date_validator(candidate, '%Y-%m-%d') is True:
                LOGGER.debug('D-M-Y match: %s', candidate)
                return convert_date(candidate, '%Y-%m-%d', outputformat)

    # 6. Try the Y-M pattern
    match = YM_PATTERN.search(string)
    if match:
        try:
            year, month = int(match.group(1)), int(match.group(2))
            candidate = datetime.date(year, month, 1)
        except ValueError:
            LOGGER.debug('Y-M value error: %s', match.group(0))
        else:
            if date_validator(candidate, '%Y-%m-%d') is True:
                LOGGER.debug('Y-M match: %s', candidate)
                return convert_date(candidate, '%Y-%m-%d', outputformat)

    # 7. Try the other regex pattern
    dateobject = regex_parse(string)
    if date_validator(dateobject, outputformat) is True:
        try:
            LOGGER.debug('custom parse result: %s', dateobject)
            return dateobject.strftime(outputformat)
        except ValueError as err:
            LOGGER.debug('value error during conversion: %s %s', string, err)

    return None
Example #27
0
def __parse_date(date_string):
    return ciso8601.parse_datetime_as_naive(date_string).date()
Example #28
0
 def parse_str_date(str_date) -> datetime.datetime:
     '''
     Parse str to datetime.datetime
     '''
     return parse_datetime_as_naive(str_date)