Esempio n. 1
0
    def process(self, start_time:datetime, end_time:datetime, input:DataFrame):
        logger.debug('Start: %s  End: %s  Log: index=%s fields=%s' % (start_time.isoformat(), end_time.isoformat(), str(self.indices), str(self.fields)))

        search = Search(using=self.client, index=self.indices[0])
        search = search.filter(Range(** {'@timestamp': {'gte': start_time.isoformat(), 'lte': end_time.isoformat()}}))

        for k,v in self.fields.items():
            if isinstance(v, list):
                for sv in v:
                    search = search.query("match", **{k:sv})

            else:
                search = search.query("match", **{k:v})

        logger.debug('ES Query: %s' % str(search.to_dict()))
        response = search.execute()

        logger.debug('Results: success:%d failed:%d hits:%d' % (response._shards.successful, response._shards.failed, len(response.hits)))

        for hit in response:
            # filter out the meta key and flatten the values
            row = {k: str(hit[k]) for k in hit if k != 'meta'}

            logger.debug(row)
            input = input.append(row, ignore_index=True)

        return input
Esempio n. 2
0
    def list_transactions(self, account_id: str,
                          since: datetime.datetime = None,
                          before: datetime.datetime = None,
                          limit: int = None) -> List[Transaction]:
        """
        List recent transactions for the account

        :param account_id: account id
        :param before: only list transactions before that date
        :param since: only list transactions after that date
        :param limit: only show a number of transactions
        :return: A list of Transaction objects
        """

        params = {
            'account_id': account_id,
            'expand[]': 'merchant'
        }

        if since:
            params.update({'since': since.isoformat('T')+'Z'})
        if before:
            params.update({'before': before.isoformat('T')+'Z'})
        if limit:
            params.update({'limit': limit})

        response = self._make_request('/transactions', params)

        return [
            Transaction(client=self, **transaction)
            for transaction in response['transactions']
        ]
    def get_eventcount(self, bucket_id: str, limit: int=100, start: datetime=None, end: datetime=None) -> int:
        endpoint = "buckets/{}/events/count".format(bucket_id)

        params = dict()  # type: Dict[str, str]
        if start is not None:
            params["start"] = start.isoformat()
        if end is not None:
            params["end"] = end.isoformat()

        response = self._get(endpoint, params=params)
        return int(response.text)
    def get_events(self, bucket_id: str, limit: int=100, start: datetime=None, end: datetime=None) -> List[Event]:
        endpoint = "buckets/{}/events".format(bucket_id)

        params = dict()  # type: Dict[str, str]
        if limit is not None:
            params["limit"] = str(limit)
        if start is not None:
            params["start"] = start.isoformat()
        if end is not None:
            params["end"] = end.isoformat()

        events = self._get(endpoint, params=params).json()
        return [Event(**event) for event in events]
Esempio n. 5
0
def _to_isoformat(value: datetime) -> str:
    if not value:
        return ''
    text = value.isoformat()
    if text.endswith(_ZERO_ISO_TIME_PART):
        return text[0:-len(_ZERO_ISO_TIME_PART)]
    return text
Esempio n. 6
0
def populate_full_water_temperature(request):
    obj = SoapCalls()
    DataFormat = 'json'
    sensors = obj.GetSensors(DataFormat)

    sDate = DT(2016, 1, 1)
    eDate = sDate + datetime.timedelta(days=9)
    stationNumbers = get_station_num(sensors, 'WT')

    temperature_data = None
    count = 0

    while eDate < DT.today():
        count = count + 1
        print ("**** Writing batch: %s" % count)
        sDate = eDate + datetime.timedelta(days=1)
        eDate = eDate + datetime.timedelta(days=10)
        for x in stationNumbers:
            temperature_x = obj.GetTimeSeriesData(x, ['WT'], sDate.isoformat(), eDate.isoformat(), DataFormat)
            for x in temperature_x['TimeSeriesData']:
                wt_datetime = DT.strptime(x['TimeStamp'], '%m/%d/%Y %I:%M:%S %p')
                try:
                    wt = WT(station_id=x['StationID'], station_name=x['StationName'],
                            timestamp=wt_datetime, value=x['Value'])
                    wt.save()
                except Exception:
                    continue

    return HttpResponse('Written to database FULL')
 def query(self, query: str, start: datetime, end: datetime, name: str=None, cache: bool=False) -> Union[int, dict]:
     endpoint = "query/"
     params = {}  # type: Dict[str, Any]
     if cache:
         if not name:
             raise Exception("You are not allowed to do caching without a query name")
         params["name"] = name
         params["cache"] = int(cache)
     data = {
         'timeperiods': ["/".join([start.isoformat(), end.isoformat()])],
         'query': query.split("\n")
     }
     response = self._post(endpoint, data, params=params)
     if response.text.isdigit():
         return int(response.text)
     else:
         return response.json()
Esempio n. 8
0
def get_datetime_string(dt: datetime = None, with_timezone: bool = True) -> typing.Optional[str]:
    if not dt:
        dt = datetime.now()

    if with_timezone:
        dt = dt.astimezone()

    s = dt.isoformat()
    return s
Esempio n. 9
0
def format_datetime(__datetime: datetime.datetime) -> str:
    """Format ISO-8601 datetime string.

    Args:
        __datetime: Datetime to process
    Returns:
        ISO-8601 compatible string
    """
    return __datetime.isoformat().replace('+00:00', 'Z')
Esempio n. 10
0
def dateTimeAsRFC3339Text(dateTime: DateTime) -> str:
    """
    Convert a :class:`DateTime` into an RFC 3339 formatted date-time string.

    :param dateTime: A non-naive :class:`DateTime` to convert.

    :return: An RFC 3339 formatted date-time string corresponding to
        :obj:`dateTime`.
    """
    return dateTime.isoformat()
Esempio n. 11
0
    async def list_transactions_async(self, account_id: str,
                                      since: datetime.datetime = None,
                                      before: datetime.datetime = None,
                                      limit: int = None):
        params = {
            'account_id': account_id,
        }

        if since:
            params.update({'since': since.isoformat('T')+'Z'})
        if before:
            params.update({'before': before.isoformat('T')+'Z'})
        if limit:
            params.update({'limit': str(limit)})

        content = await self._make_async_request('/transactions', params)

        return [
            Transaction(client=self, **transaction)
            for transaction in content['transactions']
        ]
Esempio n. 12
0
 def add_appointment(self, start: datetime_type, end: datetime_type, location: str, description: str,
                     doctor_ids: list, nurse_ids: list, patient_ids: list):
     """
     Add an appointment to be exported
     :param start: The start date of the appointment
     :param end: The end date of the appointment
     :param location: The physical location of the appointment
     :param description: A description of the appointment
     :param doctor_ids: A list of ids of the doctors atteniding
     :param nurse_ids: A list of ids of the nurses attending
     :param patient_ids: A list of ids of the patients attending
     :return: None
     """
     self.__export_scheme['appointments'] += [{
         'start': start.isoformat(),
         'end': end.isoformat(),
         'location': location,
         'description': description,
         'doctor_ids': doctor_ids,
         'nurse_ids': nurse_ids,
         'patient_ids': patient_ids
     }]
Esempio n. 13
0
    def set_maintenance_overlay(self, next_maintenance_time: datetime) -> None:
        """
        Once per maintenance window and user: show an overlay that the user must
        actively dismiss once.
        """
        saw_deploy_time = self.request.session.get(self.session_key, '')
        try:
            saw_deploy_time = parse_datetime(saw_deploy_time)
        except ValueError:
            saw_deploy_time = None

        if not saw_deploy_time or saw_deploy_time < now():
            self.request.session[self.session_key] = \
                next_maintenance_time.isoformat()
            text = self.get_full_maintenance_text(next_maintenance_time)
            self.overlay.update({'maintenance_warn_overlay': text})
Esempio n. 14
0
def schedule_nagbot_message(message: str, short_message: str, deadline: datetime, policies: List[dict], uid: str = None) -> str:
    '''
    Instantiates a new message to be sent repeatedly by NagBot

    :param message: Long description of message (ie email body)
    :param short_message: Short description of message (ie email subject, IRC message)
    :param deadline: Message expiry date
    :param policies: Notification policies described in dict format
    :param uid: Optionally specify tracking uid. A random uid will be generated if not given

    :return: Tracking uid for the notification
    '''
    for policy in policies:
        verify_policy_structure(policy)

    if uid is None:
        uid = generate_random_uid()

    request_url = current_app.config['RELENG_NOTIFICATION_POLICY_URL'] + '/message/' + uid

    message_body = json.dumps({
        'deadline': deadline.isoformat(),
        'message': message,
        'shortMessage': short_message,
        'policies': policies,
    })

    hawk = mohawk.Sender(get_current_app_credentials(), request_url, 'put',
                         content=message_body, content_type='application/json')

    headers = {
        'Authorization': hawk.request_header,
        'Content-Type': 'application/json',
    }

    # Support dev ssl ca cert
    ssl_dev_ca = current_app.config.get('SSL_DEV_CA')
    if ssl_dev_ca is not None:
        assert os.path.isdir(ssl_dev_ca), 'SSL_DEV_CA must be a dir with hashed dev ca certs'

    response = put(request_url, headers=headers, data=message_body, verify=ssl_dev_ca)
    response.raise_for_status()

    return uid
Esempio n. 15
0
    def _do_retrieval(self, updates_since: datetime) -> UpdateCollection:
        """
        Handles the retrieval of updates by getting the data using the retriever, notifying the listeners and then
        logging the retrieval.
        :param updates_since: the time from which to retrieve updates since
        :return: the updates retrieved
        """
        logging.debug("Starting update retrieval...")

        # Do retrieve
        started_at_clock_time = RetrievalManager._get_clock_time()
        started_at = RetrievalManager._get_monotonic_time()
        updates = self.update_mapper.get_all_since(updates_since)
        seconds_taken_to_complete_query = RetrievalManager._get_monotonic_time() - started_at
        assert updates is not None
        logging.debug("Retrieved %d updates since %s (query took: %s)"
                      % (len(updates), updates_since, seconds_taken_to_complete_query))

        # Notify listeners of retrieval
        if len(updates) > 0:
            logging.debug("Notifying %d listeners of %d update(s)" % (len(self.get_listeners()), len(updates)))
            self.notify_listeners(updates)

        # Store log of retrieval
        most_recent_retrieved = updates.get_most_recent()[0].timestamp if len(updates) > 0 else None
        self._logger.record(
            MEASURED_RETRIEVAL,
            {
                MEASURED_RETRIEVAL_UPDATES_SINCE: updates_since.isoformat(),
                MEASURED_RETRIEVAL_STARTED_AT: started_at_clock_time.isoformat(),
                MEASURED_RETRIEVAL_DURATION: seconds_taken_to_complete_query,
                MEASURED_RETRIEVAL_UPDATE_COUNT: len(updates),
                MEASURED_RETRIEVAL_MOST_RECENT_RETRIEVED:
                    None if most_recent_retrieved is None else most_recent_retrieved.isoformat()
            }
        )

        return updates
Esempio n. 16
0
 def save_plugin_timestamp(self, name: str, timestamp: datetime) -> None:
     self.workflow.data.plugins_timestamps[name] = timestamp.isoformat()
Esempio n. 17
0
    def process(self, start_time:datetime, end_time:datetime, input:DataFrame):
        logger.debug('Start: %s  End: %s  Event: fields=%s' % (start_time.isoformat(), end_time.isoformat(), str(self.fields)))

        return input
Esempio n. 18
0
 def _get_datafile_name(self, timestamp: datetime):
     assert timestamp.tzinfo is None
     return '{}/{}{}'.format(self.path, timestamp.isoformat(), FILE_EXT)
Esempio n. 19
0
File: util.py Progetto: Narfee/neo
def zulu_time(dt: datetime.datetime):
    return dt.isoformat()[:-6] + "Z"
Esempio n. 20
0
 def _convert_from_python(self, value: datetime.datetime, state) -> str:
     return value.isoformat()
Esempio n. 21
0
def make_time(t: datetime) -> Dict:
  return {
    'value': t.isoformat() + 'Z',
    'format': 'RFC3339'
  }
Esempio n. 22
0
 def datetime_to_json(self, t: datetime.datetime) -> Optional[str]:
     if t is None:
         return
     return t.isoformat()
Esempio n. 23
0
    def get_complete_latencies(
        self,
        topology_id: str,
        cluster: str,
        environ: str,
        start: dt.datetime,
        end: dt.datetime,
        **kwargs: Union[str, int, float],
    ) -> pd.DataFrame:
        """ Gets the complete latencies, as a timeseries, for every instance of
        the of all the spout components of the specified topology. The start
        and end times define the window over which to gather the metrics. The
        window duration should be less than 3 hours as this is the limit of
        what the Topology master stores.

        Arguments:
            topology_id (str):    The topology identification string.
            cluster (str):  The cluster the topology is running in.
            environ (str):  The environment the topology is running in (eg.
                            prod, devel, test, etc).
            start (datetime):    utc datetime instance for the start of the
                                    metrics gathering period.
            end (datetime):  utc datetime instance for the end of the
                                metrics gathering period.

        Returns:
            pandas.DataFrame: A DataFrame containing the service time
            measurements as a timeseries. Each row represents a measurement
            (aggregated over one minute) with the following columns:

            * timestamp:  The UTC timestamp for the metric,
            * component: The component this metric comes from,
            * task: The instance ID number for the instance that the metric
              comes from,
            * container:  The ID for the container this metric comes from,
              stream: The name of the incoming stream from which the tuples
              that lead to this metric came from,
            * latency_ms: The average execute latency measurement in
              milliseconds for that metric time period.

        Raises:
            RuntimeWarning: If the specified topology has a reliability mode
                            that does not enable complete latency.
        """
        LOG.info(
            "Getting complete latencies for topology %s over a %d second "
            "period from %s to %s",
            topology_id,
            (end - start).total_seconds(),
            start.isoformat(),
            end.isoformat(),
        )

        logical_plan, start_time, end_time = self._query_setup(
            topology_id, cluster, environ, start, end
        )

        # First we need to check that the supplied topology will actually have
        # complete latencies. Only ATLEAST_ONCE and EXACTLY_ONCE will have
        # complete latency values as acking is disabled for ATMOST_ONCE.
        physical_plan: Dict[str, Any] = tracker.get_physical_plan(
            self.tracker_url, cluster, environ, topology_id
        )
        if physical_plan["config"]["topology.reliability.mode"] == "ATMOST_ONCE":
            rm_msg: str = (
                f"Topology {topology_id} reliability mode is set "
                f"to ATMOST_ONCE. Complete latency is not "
                f"available for these types of topologies"
            )
            LOG.warning(rm_msg)
            warnings.warn(rm_msg, RuntimeWarning)
            return pd.DataFrame()

        output: pd.DataFrame = None

        spouts: Dict[str, Any] = logical_plan["spouts"]
        for spout_component in spouts:

            try:
                spout_complete_latencies: pd.DataFrame = self.get_spout_complete_latencies(
                    topology_id,
                    cluster,
                    environ,
                    spout_component,
                    start_time,
                    end_time,
                    logical_plan,
                )
            except HTTPError as http_error:
                LOG.warning(
                    "Fetching execute latencies  for component %s "
                    "failed with status code %s",
                    spout_component,
                    str(http_error.response.status_code),
                )

            if output is None:
                output = spout_complete_latencies
            else:
                output = output.append(spout_complete_latencies, ignore_index=True)

        return output
Esempio n. 24
0
    def get_execute_counts(
        self,
        topology_id: str,
        cluster: str,
        environ: str,
        start: dt.datetime,
        end: dt.datetime,
        **kwargs: Union[str, int, float],
    ) -> pd.DataFrame:
        """ Gets the execute counts, as a timeseries, for every instance of
        each of the components of the specified topology. The start and end
        times define the window over which to gather the metrics. The window
        duration should be less than 3 hours as this is the limit of what the
        Topology master stores.

        Arguments:
            topology_id (str):    The topology identification string.
            start (datetime):    UTC datetime instance for the start of the
                                    metrics gathering period.
            end (datetime):  UTC datetime instance for the end of the
                                metrics gathering period.
            **cluster (str):  The cluster the topology is running in.
            **environ (str):  The environment the topology is running in (eg.
                              prod, devel, test, etc).

        Returns:
            pandas.DataFrame:   A DataFrame containing the service time
            measurements as a timeseries. Each row represents a measurement
            (aggregated over one minute) with the following columns:

            * timestamp: The UTC timestamp for the metric,
            * component: The component this metric comes from,
            * task: The instance ID number for the instance that the metric
              comes from,
            * container: The ID for the container this metric comes from.
            * stream: The name of the incoming stream from which the tuples
              that lead to this metric came from,
            * source_component: The name of the component the stream's source
              instance belongs to,
            * execute_count: The execute count during the metric time period.
        """
        LOG.info(
            "Getting execute counts for topology %s over a %d second "
            "period from %s to %s",
            topology_id,
            (end - start).total_seconds(),
            start.isoformat(),
            end.isoformat(),
        )

        logical_plan, start_time, end_time = self._query_setup(
            topology_id, cluster, environ, start, end
        )

        output: pd.DataFrame = None

        for component in logical_plan["bolts"].keys():

            try:
                comp_execute_counts: pd.DataFrame = self.get_component_execute_counts(
                    topology_id,
                    cluster,
                    environ,
                    component,
                    start_time,
                    end_time,
                    logical_plan,
                )
            except HTTPError as http_error:
                LOG.warning(
                    "Fetching execute counts for component %s failed "
                    "with status code %s",
                    component,
                    str(http_error.response.status_code),
                )

            if output is None:
                output = comp_execute_counts
            else:
                output = output.append(comp_execute_counts, ignore_index=True)

        return output
Esempio n. 25
0
def _from_date(obj: datetime):
    return obj.isoformat()
Esempio n. 26
0
 def log_same(self, t0: datetime.datetime, t1: datetime.datetime,
              code: str) -> None:
     with open(os.path.join(self.log_path, '000000_nochange_queries.yaml'),
               'a') as f:
         body = {'t0': t0.isoformat(), 't1': t1.isoformat(), 'code': code}
         f.write(yaml.dump(body, explicit_start=True))
Esempio n. 27
0
def datetime_encoder(date: datetime.datetime):
    representation = date.isoformat()
    if representation.endswith('+00:00'):
        representation = representation[:-6] + 'Z'
    return representation
Esempio n. 28
0
def isoformat(datetime: dt.datetime) -> str:
    """Return the ISO8601-formatted representation of a datetime object.

    :param datetime datetime: The datetime.
    """
    return datetime.isoformat()
Esempio n. 29
0
 def to_json(self, value: datetime) -> str:
     return value.isoformat(timespec='microseconds')
Esempio n. 30
0
 def _build_token_value(self, user_id: str, timestamp: datetime,
                        secret: '') -> str:
     time_bytes = timestamp.isoformat().encode('UTF-8')
     secret_bytes = secret.encode('UTF-8', 'replace')
     user_bytes = user_id.encode('UTF-8', 'replace')
     return time_bytes + secret_bytes + user_bytes
Esempio n. 31
0
 def format_time(self, dt: datetime):
     if self._system:
         return self._system.format_time(dt)
     return dt.isoformat(sep=" ", timespec="seconds") + " UTC"
Esempio n. 32
0
    def get_arrival_rates(
        self,
        topology_id: str,
        cluster: str,
        environ: str,
        start: dt.datetime,
        end: dt.datetime,
        **kwargs: Union[str, int, float],
    ) -> pd.DataFrame:
        """ Gets the arrival rates, as a timeseries, for every instance of each
        of the bolt components of the specified topology. The start and end
        times define the window over which to gather the metrics. The window
        duration should be less than 3 hours as this is the limit of what the
        Topology master stores.

        Arguments:
            topology_id (str):    The topology identification string.
            start (datetime):    utc datetime instance for the start of the
                                    metrics gathering period.
            end (datetime):  utc datetime instance for the end of the
                                metrics gathering period.
            **cluster (str):  The cluster the topology is running in.
            **environ (str):  The environment the topology is running in (eg.
                              prod, devel, test, etc).

        Returns:
            pandas.DataFrame:   A DataFrame containing the arrival rate
            measurements as a timeseries. Each row represents a measurement
            (aggregated over one minute) with the following columns:

            * timestamp: The UTC timestamp for the metric,
            * component: The component this metric comes from,
            * task: The instance ID number for the instance that the metric
              comes from,
            * container: The ID for the container this metric comes from,
            * arrival_count: The number of arrivals (across all streams) at
              each instance.
            * arrival_rate_tps: The arrival rate at each instance (across all
              streams) in units of tuples per second.
        """
        LOG.info(
            "Getting arrival rates for topology %s over a %d second "
            "period from %s to %s",
            topology_id,
            (end - start).total_seconds(),
            start.isoformat(),
            end.isoformat(),
        )

        execute_counts: pd.DataFrame = self.get_execute_counts(
            topology_id, cluster, environ, start, end
        )

        arrivals: pd.DataFrame = (
            execute_counts.groupby(["task", "component", "timestamp"])
            .sum()
            .reset_index()
            .rename(index=str, columns={"execute_count": "arrival_count"})
        )

        arrivals["arrival_rate_tps"] = arrivals["arrival_count"] / DEFAULT_METRIC_PERIOD

        return arrivals
Esempio n. 33
0
def format_datetime(dt: datetime, with_tz=True, timespec="microseconds") -> str:
    dt = normalise_dt(dt)
    dt = dt.isoformat(timespec=timespec)
    if with_tz:
        dt = dt + "Z"
    return dt
Esempio n. 34
0
 async def search(self, start: datetime, end: datetime) -> bool:
     return await self.es.search(index=self.index,body={'query':{'range':{'@timestamp':{'gte':start.isoformat(),'lt':'now'}}}})
Esempio n. 35
0
def generate_bullets(search_start: datetime, detailed: bool = False):
    akst = tz.tzoffset('AKST', timedelta(hours=-9))
    aknow = datetime.now(akst)
    search_start = search_start.astimezone(akst)

    meta = {
        'title':
        'Tools Team bullets',
        'description':
        f"Tools team bullets for {search_start.isoformat(timespec='seconds')}"
        f" through {aknow.isoformat(timespec='seconds')}",
    }
    log.info(f'Generating {meta["description"]}')

    gh = GhApi()
    release_details = {}
    dev_prs = {}
    open_prs = {}
    opened_issues = {}
    for repo in tqdm(gh.repos.list_for_org('ASFHyP3')):
        # FIXME: Returns issues and PRs... simpler to filter this one list or make the three calls?
        for issue in gh.issues.list_for_repo(
                repo.owner.login,
                repo.name,
                state='open',
                sort='created',
                direction='desc',
                since=search_start.isoformat(timespec='seconds')):
            if issue.get('pull_request') is None:
                opened_issues[issue.id] = util.get_details(issue)

        try:
            last_release = parse_date(
                gh.repos.get_latest_release(repo.owner.login,
                                            repo.name).created_at)
            for release in gh.repos.list_releases(repo.owner.login, repo.name):
                created_at = parse_date(release.created_at)
                if created_at >= search_start:
                    release_details[
                        release.target_commitish] = util.get_details(release)
                else:
                    break
        except HTTP404NotFoundError:
            last_release = search_start

        # FIXME: might be able to use issues.list_for_repo with since=... to simplify logic
        for pull in gh.pulls.list(repo.owner.login,
                                  repo.name,
                                  state='closed',
                                  base='develop',
                                  sort='updated',
                                  direction='desc'):
            merged_at = pull.get('merged_at')
            if merged_at and parse_date(merged_at) > max(
                    search_start, last_release):
                dev_prs[pull.merge_commit_sha] = util.get_details(pull)

        for pull in gh.pulls.list(repo.owner.login,
                                  repo.name,
                                  state='open',
                                  sort='created',
                                  direction='desc'):
            open_prs[pull.head.sha] = util.get_details(pull)

    template = 'report_detailed.md.j2' if detailed else 'report.md.j2'
    report_name = 'report_detailed.md' if detailed else 'report.md'
    report = util.render_template(
        template,
        releases=release_details,
        meta=meta,
        dev_prs=dev_prs,
        open_prs=open_prs,
        opened_issues=opened_issues,
    )
    with open(report_name, 'w') as f:
        f.write(report)
Esempio n. 36
0
 def dump(self, value: datetime) -> str:
     return value.isoformat()
Esempio n. 37
0
File: codec.py Progetto: n9code/untt
def datetime_to_json(data: datetime.datetime) -> str:
    return data.isoformat()
Esempio n. 38
0
def serialize_datetime(value: datetime) -> str:
    return value.isoformat()
 def create_bucket(self, bucket_id: str, type: str, client: str, hostname: str,
                   created: datetime = datetime.now(timezone.utc), name: Optional[str] = None) -> "Bucket":
     self.logger.info("Creating bucket '{}'".format(bucket_id))
     self.storage_strategy.create_bucket(bucket_id, type, client, hostname, created.isoformat(), name=name)
     return self[bucket_id]
Esempio n. 40
0
 def default(self, to_encode: datetime) -> str:
     return to_encode.isoformat()
 def convert_dttm(cls, target_type: str, dttm: datetime) -> str:
     return (
         """TO_TIMESTAMP('{}', 'YYYY-MM-DD"T"HH24:MI:SS.ff6')""").format(
             dttm.isoformat())
Esempio n. 42
0
 def _datetime_to_iso(self, d: datetime) -> str:
     isoformat = d.isoformat()
     return isoformat
Esempio n. 43
0
def purge_old_data(
    instance: Recorder,
    purge_before: datetime,
    repack: bool,
    apply_filter: bool = False,
    events_batch_size: int = DEFAULT_EVENTS_BATCHES_PER_PURGE,
    states_batch_size: int = DEFAULT_STATES_BATCHES_PER_PURGE,
) -> bool:
    """Purge events and states older than purge_before.

    Cleans up an timeframe of an hour, based on the oldest record.
    """
    _LOGGER.debug(
        "Purging states and events before target %s",
        purge_before.isoformat(sep=" ", timespec="seconds"),
    )
    using_sqlite = instance.dialect_name == SupportedDialect.SQLITE

    with session_scope(session=instance.get_session()) as session:
        # Purge a max of MAX_ROWS_TO_PURGE, based on the oldest states or events record
        has_more_to_purge = False
        if _purging_legacy_format(session):
            _LOGGER.debug(
                "Purge running in legacy format as there are states with event_id remaining"
            )
            has_more_to_purge |= _purge_legacy_format(
                instance, session, purge_before, using_sqlite
            )
        else:
            _LOGGER.debug(
                "Purge running in new format as there are NO states with event_id remaining"
            )
            # Once we are done purging legacy rows, we use the new method
            has_more_to_purge |= _purge_states_and_attributes_ids(
                instance, session, states_batch_size, purge_before, using_sqlite
            )
            has_more_to_purge |= _purge_events_and_data_ids(
                instance, session, events_batch_size, purge_before, using_sqlite
            )

        statistics_runs = _select_statistics_runs_to_purge(session, purge_before)
        short_term_statistics = _select_short_term_statistics_to_purge(
            session, purge_before
        )
        if statistics_runs:
            _purge_statistics_runs(session, statistics_runs)

        if short_term_statistics:
            _purge_short_term_statistics(session, short_term_statistics)

        if has_more_to_purge or statistics_runs or short_term_statistics:
            # Return false, as we might not be done yet.
            _LOGGER.debug("Purging hasn't fully completed yet")
            return False

        if apply_filter and _purge_filtered_data(instance, session) is False:
            _LOGGER.debug("Cleanup filtered data hasn't fully completed yet")
            return False

        _purge_old_recorder_runs(instance, session, purge_before)
    if repack:
        repack_database(instance)
    return True
def clone_jhu_at_time(checkout_time: datetime.datetime, workdir: os.PathLike):
    """Obtain history JHU dataset by git cloning at a time point in the past.

    Parameters
    ----------
    checkout_time : datetime.datetime
        A timezone-aware datetime object, representing a time point in the past
    workdir : os.PathLike
        a directory to which the data may be cloned. For example a tempfile.TemporaryDirectory
        
    Returns
    -------
    fp_confirmed : os.PathLike
        path to time_series_covid19_confirmed_global.csv
    fp_deaths : os.PathLike
        path to time_series_covid19_deaths_global.csv
    fp_recovered : os.PathLike
        path to time_series_covid19_recovered_global.csv 
    """
    if not checkout_time.tzinfo:
        raise ValueError('The [checkout_time] must be timezone-aware!')

    # clone
    repodir = pathlib.Path(workdir, 'jhu_repo')
    repo = 'https://github.com/CSSEGISandData/COVID-19'
    _log.info(f'Cloning "{repo}" to "{repodir}"')
    _log.debug(
        subprocess.run(['git', 'clone', repo, 'jhu_repo'],
                       cwd=workdir,
                       stdout=subprocess.PIPE,
                       encoding='utf8').stdout)

    # find the commit hash that was relevant at the selected date
    checkout_time = checkout_time.isoformat()
    _log.info(f'Finding the last commit before {checkout_time}')
    commit_id = subprocess.run(
        ['git', 'rev-list', '-1', f'--until="{checkout_time}"', 'master'],
        cwd=repodir,
        stdout=subprocess.PIPE,
        encoding='utf8').stdout.strip()
    if len(commit_id) != 40:
        raise Exception(
            f'Failed to find a valid commit id before the specified checkout_time ({checkout_time})'
        )

    _log.info(f'Checking out commit {commit_id}')
    _log.debug(
        subprocess.run(
            ['git', 'checkout', commit_id],
            cwd=repodir,
            stdout=subprocess.PIPE,
            encoding='utf8',
            # this step is important - we must not fail silently
            check=True))

    dp_ts = pathlib.Path(repodir, 'csse_covid_19_data',
                         'csse_covid_19_time_series')
    fp_confirmed = pathlib.Path(dp_ts,
                                'time_series_covid19_confirmed_global.csv')
    fp_deaths = pathlib.Path(dp_ts, 'time_series_covid19_deaths_global.csv')
    fp_recovered = pathlib.Path(dp_ts,
                                'time_series_covid19_recovered_global.csv')
    return fp_confirmed, fp_deaths, fp_recovered
Esempio n. 45
0
def get_spout_state(
        metrics_client: HeronMetricsClient,
        topology_id: str,
        cluster: str,
        environ: str,
        tracker_url: str,
        start: dt.datetime,
        end: dt.datetime,
        metrics_sample_period: float,
        summary_method: str = "median",
        **kwargs: Union[str, int, float]) -> Dict[int, Dict[str, float]]:
    """ Helper script that will fetch the median or mean spout emission rates
    and format them into the dictionary structure expected by the topology
    performance prediction methods.

    Arguments:
        metrics_client (HeronMetricsClient):    The client for the metrics
                                                database.
        topology_id (str):  The topology identification string.
        cluster (str):  The cluster that that the topology is running on.
        environ (str): The environment that the topology is running in.
        tracker_url (str):  The URL for the Heron Tracker API>
        start (datetime):   The UTC datetime for the start of the metrics
                            gathering period.
        end (datetime): The UTC datetime for the start of the metrics
                        gathering period.
        metrics_sample_period (float):  The period that metrics are sampled
                                        into. eg 60 secs (1 min), 300 secs
                                        (5 mins).
        summary_method (str):   The method to use to summerise the emit counts.
                                Either "mean" to "median". Defaults to median.
        **kwargs:   Any additional keyword arguments required by the metrics
                    client.

    Returns:
        Dict[int, Dict[str, float]]:    A dictionary mapping from task ID to a
        dict that maps from output stream name to an emission rate in tuples
        per second.
    """

    LOG.info(
        "Getting spout emission state dictionary for topology %s over a"
        "period of %d seconds from %s to %s", topology_id,
        (end - start).total_seconds(), start.isoformat(), end.isoformat())

    lplan: Dict[str, Any] = tracker.get_logical_plan(tracker_url, cluster,
                                                     environ, topology_id)

    emit_counts: pd.DataFrame = metrics_client.get_emit_counts(
        topology_id, cluster, environ, start, end, **kwargs)

    spout_groups: pd.core.groupby.DataFrameGroupBy = \
        (emit_counts[emit_counts["component"].isin(lplan["spouts"])]
         .groupby(["task", "stream"]))

    if summary_method == "median":

        spout_emits: pd.Series = spout_groups.emit_count.median()

    elif summary_method == "mean":

        spout_emits = spout_groups.emit_count.mean()

    else:
        msg: str = f"Unknown summary method: {summary_method}"
        LOG.error(msg)
        raise RuntimeError(msg)

    output: DefaultDict[int, Dict[str, float]] = defaultdict(dict)

    for (task_id, stream), emit_count in spout_emits.iteritems():

        output[task_id][stream] = emit_count / metrics_sample_period

    return dict(output)
Esempio n. 46
0
def calculate(
        graph_client: GremlinClient, metrics_client: HeronMetricsClient,
        topology_id: str, cluster: str, environ: str, topology_ref: str,
        start: dt.datetime, end: dt.datetime, io_bucket_length: int,
        tracker_url: str, spout_state: Dict[int, Dict[str, float]],
        **kwargs: Union[str, int, float]) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """

    Arguments:
        graph_client (GremlinClient):   The client instance for the graph database.
        metrics_client (HeronMetricsClient):    The client instance for the metrics
                                                database.
        topology_id (str):  The topology identification string.
        cluster: (str): The cluster the topology is running on.
        environ (str): The environment the topology is running in.
        topology_ref (str): The reference string for the topology physical graph to be
                            used in the calculations.
        start (dt.datetime):    The UTC datetime instance representing the start of the
                                metric gathering window.
        end (dt.datetime):  The UTC datetime instance representing the end of the metric
                            gathering window.
        io_bucket_length (int): The length in seconds that metrics should be aggregated
                                for use in IO ratio calculations.
        tracker_url (str):  The URL for the Heron Tracker API
        spout_state (dict): A dictionary mapping from instance task id to a dictionary
                            that maps from output stream name to the output rate for that
                            spout instance. The units of this rate (TPS, TPM etc) will be
                            the same for the arrival rates.
        **kwargs:   Any additional key word arguments required by the metrics client query
                    methods. NOTE: This is passed to a cached method so all kwargs must be
                    hashable. Un-hashable arguments will be removed before being supplied.

    Returns:
        pd.DataFrame:   A DataFrame containing the arrival rate at each instance.
        pd.DataFrame:   A DataFrame containing the input and output rate of each stream
                        manager.

    Raises:
        RuntimeError:   If there is no entry in the graph database for the supplied
                        topology id and ref.
    """

    # First check that there is a physical graph for the supplied reference in
    # the graph database
    graph_client.raise_if_missing(topology_id, topology_ref)

    LOG.info(
        "Calculating arrival rates for topology %s reference %s using "
        "metrics from a %d second period from %s to %s",
        topology_id,
        topology_ref,
        (end - start).total_seconds(),
        start.isoformat(),
        end.isoformat(),
    )

    i2i_rps, levels, coefficients, sending_instances, receiving_instances = _setup_arrival_calcs(
        metrics_client, graph_client, topology_id, cluster, environ,
        topology_ref, start, end, io_bucket_length, tracker_url, **kwargs)

    topo_traversal: GraphTraversalSource = graph_client.topology_subgraph(
        topology_id, topology_ref)

    arrival_rates: ARRIVAL_RATES = defaultdict(lambda: defaultdict(float))
    output_rates: OUTPUT_RATES = defaultdict(dict)
    output_rates.update(spout_state)

    # Step through the tree levels and calculate the output from each level and
    # the arrivals at the next. Skip the final level as its arrival rates are
    # calculated in the previous step and it has not outputs.
    for level_number, level in enumerate(levels[:-1]):

        LOG.debug("Processing topology level %d", level_number)

        if level_number != 0:
            # If this is not a spout level then we need to calculate the output
            # from the instances in this level.
            for source_vertex in level:

                output_rates = _calculate_outputs(
                    topo_traversal,
                    source_vertex,
                    arrival_rates,
                    output_rates,
                    coefficients,
                )

        # Calculate the arrival rates at the instances down stream on the next
        # level down
        for source_vertex in level:

            arrival_rates = _calculate_arrivals(topo_traversal, source_vertex,
                                                arrival_rates, output_rates,
                                                i2i_rps)

    # At this stage we have the output and arrival amount for all logically
    # connected elements. We now need to map these on to the stream managers to
    # calculate their incoming and outgoing tuple rates.
    strmgr_in_out: pd.DataFrame = _calc_strmgr_in_out(sending_instances,
                                                      receiving_instances,
                                                      output_rates,
                                                      arrival_rates)

    return _convert_arrs_to_df(arrival_rates), strmgr_in_out
Esempio n. 47
0
def transform_date_from_state(dt: datetime.datetime) -> str:
	dt_str = dt.isoformat()
	dt_str = re.sub("\+00:00$", "Z", dt_str)
	return dt_str
Esempio n. 48
0
def _datetime_to_json(obj: datetime) -> JsonType:
    return obj.isoformat()
Esempio n. 49
0
def dt_to_str(dt: datetime):
    return dt.isoformat()
Esempio n. 50
0
def get_dt_jira_format(dt: datetime.datetime):
    dt_str = dt.isoformat(timespec='milliseconds')
    components = dt_str.split('+')
    dt_str = components[0] + '+0000'
    return dt_str
Esempio n. 51
0
def serialize_datetime(when: dt.datetime) -> str:
    """Return a serialized datetime string."""

    return when.isoformat(" ")
Esempio n. 52
0
def datetime_to_serializable(dt: datetime.datetime) -> str:
    return dt.isoformat()
Esempio n. 53
0
def prettydate(d: datetime.datetime) -> str:
    """Jinja filter to convert datetime object to pretty text."""
    return TIME_FORMAT.format(ts=d.isoformat(), t=d.strftime(PRETTY_TIME_FORMAT))
Esempio n. 54
0
 def last_boot(self, value: datetime) -> None:
     """Set last boot datetime."""
     self._data[ATTR_LAST_BOOT] = value.isoformat()
Esempio n. 55
0
 def to_str(self, time: datetime.datetime):
     return time.isoformat()
def convert_datetime(value: datetime.datetime):
    return bytearray(value.isoformat().encode())