Esempio n. 1
0
    def get_data(self, df):
        form_data = self.form_data

        df.columns = ["timestamp", "metric"]
        timestamps = {
            str(obj["timestamp"].value / 10**9): obj.get("metric")
            for obj in df.to_dict("records")
        }

        start = utils.parse_human_datetime(form_data.get("since"))
        end = utils.parse_human_datetime(form_data.get("until"))
        domain = form_data.get("domain_granularity")
        diff_delta = rdelta.relativedelta(end, start)
        diff_secs = (end - start).total_seconds()

        if domain == "year":
            range_ = diff_delta.years + 1
        elif domain == "month":
            range_ = diff_delta.years * 12 + diff_delta.months + 1
        elif domain == "week":
            range_ = diff_delta.years * 53 + diff_delta.weeks + 1
        elif domain == "day":
            range_ = diff_secs // (24 * 60 * 60) + 1
        else:
            range_ = diff_secs // (60 * 60) + 1

        return {
            "timestamps": timestamps,
            "start": start,
            "domain": domain,
            "subdomain": form_data.get("subdomain_granularity"),
            "range": range_,
        }
Esempio n. 2
0
    def get_data(self, df):
        form_data = self.form_data

        df.columns = ["timestamp", "metric"]
        timestamps = {str(obj["timestamp"].value / 10**9):
                      obj.get("metric") for obj in df.to_dict("records")}

        start = utils.parse_human_datetime(form_data.get("since"))
        end = utils.parse_human_datetime(form_data.get("until"))
        domain = form_data.get("domain_granularity")
        diff_delta = rdelta.relativedelta(end, start)
        diff_secs = (end - start).total_seconds()

        if domain == "year":
            range_ = diff_delta.years + 1
        elif domain == "month":
            range_ = diff_delta.years * 12 + diff_delta.months + 1
        elif domain == "week":
            range_ = diff_delta.years * 53 + diff_delta.weeks + 1
        elif domain == "day":
            range_ = diff_secs // (24*60*60) + 1
        else:
            range_ = diff_secs // (60*60) + 1

        return {
            "timestamps": timestamps,
            "start": start,
            "domain": domain,
            "subdomain": form_data.get("subdomain_granularity"),
            "range": range_,
        }
Esempio n. 3
0
    def values_for_column(self,
                          column_name,
                          limit=10000):
        """Retrieve some values for the given column"""
        logging.info(
            'Getting values for columns [{}] limited to [{}]'
            .format(column_name, limit))
        # TODO: Use Lexicographic TopNMetricSpec once supported by PyDruid
        if self.fetch_values_from:
            from_dttm = utils.parse_human_datetime(self.fetch_values_from)
        else:
            from_dttm = datetime(1970, 1, 1)

        qry = dict(
            datasource=self.datasource_name,
            granularity='all',
            intervals=from_dttm.isoformat() + '/' + datetime.now().isoformat(),
            aggregations=dict(count=count('count')),
            dimension=column_name,
            metric='count',
            threshold=limit,
        )

        client = self.cluster.get_pydruid_client()
        client.topn(**qry)
        df = client.export_pandas()
        return [row[column_name] for row in df.to_records(index=False)]
Esempio n. 4
0
    def values_for_column(self,
                          column_name,
                          limit=10000):
        """Retrieve some values for the given column"""
        logging.info(
            'Getting values for columns [{}] limited to [{}]'
            .format(column_name, limit))
        # TODO: Use Lexicographic TopNMetricSpec once supported by PyDruid
        if self.fetch_values_from:
            from_dttm = utils.parse_human_datetime(self.fetch_values_from)
        else:
            from_dttm = datetime(1970, 1, 1)

        qry = dict(
            datasource=self.datasource_name,
            granularity='all',
            intervals=from_dttm.isoformat() + '/' + datetime.now().isoformat(),
            aggregations=dict(count=count('count')),
            dimension=column_name,
            metric='count',
            threshold=limit,
        )

        client = self.cluster.get_pydruid_client()
        client.topn(**qry)
        df = client.export_pandas()
        return [row[column_name] for row in df.to_records(index=False)]
Esempio n. 5
0
    def granularity(period_name, timezone=None, origin=None):
        if not period_name or period_name == 'all':
            return 'all'
        iso_8601_dict = {
            '5 seconds': 'PT5S',
            '30 seconds': 'PT30S',
            '1 minute': 'PT1M',
            '5 minutes': 'PT5M',
            '1 hour': 'PT1H',
            '6 hour': 'PT6H',
            'one day': 'P1D',
            '1 day': 'P1D',
            '7 days': 'P7D',
            'week': 'P1W',
            'week_starting_sunday': 'P1W',
            'week_ending_saturday': 'P1W',
            'month': 'P1M',
        }

        granularity = {'type': 'period'}
        # if timezone:
        #     granularity['timeZone'] = timezone
        granularity['timeZone'] = "Asia/Shanghai"

        if origin:
            dttm = utils.parse_human_datetime(origin)
            granularity['origin'] = dttm.isoformat()

        if period_name in iso_8601_dict:
            granularity['period'] = iso_8601_dict[period_name]
            if period_name in ('week_ending_saturday', 'week_starting_sunday'):
                # use Sunday as start of the week
                granularity['origin'] = '2016-01-03T00:00:00'
        elif not isinstance(period_name, string_types):
            granularity['type'] = 'duration'
            granularity['duration'] = period_name
        elif period_name.startswith('P'):
            # identify if the string is the iso_8601 period
            granularity['period'] = period_name
        else:
            granularity['type'] = 'duration'
            granularity['duration'] = utils.parse_human_timedelta(
                period_name).total_seconds() * 1000
        return granularity
Esempio n. 6
0
    def granularity(period_name, timezone=None, origin=None):
        if not period_name or period_name == 'all':
            return 'all'
        iso_8601_dict = {
            '5 seconds': 'PT5S',
            '30 seconds': 'PT30S',
            '1 minute': 'PT1M',
            '5 minutes': 'PT5M',
            '1 hour': 'PT1H',
            '6 hour': 'PT6H',
            'one day': 'P1D',
            '1 day': 'P1D',
            '7 days': 'P7D',
            'week': 'P1W',
            'week_starting_sunday': 'P1W',
            'week_ending_saturday': 'P1W',
            'month': 'P1M',
        }

        granularity = {'type': 'period'}
        if timezone:
            granularity['timeZone'] = timezone

        if origin:
            dttm = utils.parse_human_datetime(origin)
            granularity['origin'] = dttm.isoformat()

        if period_name in iso_8601_dict:
            granularity['period'] = iso_8601_dict[period_name]
            if period_name in ('week_ending_saturday', 'week_starting_sunday'):
                # use Sunday as start of the week
                granularity['origin'] = '2016-01-03T00:00:00'
        elif not isinstance(period_name, string_types):
            granularity['type'] = 'duration'
            granularity['duration'] = period_name
        elif period_name.startswith('P'):
            # identify if the string is the iso_8601 period
            granularity['period'] = period_name
        else:
            granularity['type'] = 'duration'
            granularity['duration'] = utils.parse_human_timedelta(
                period_name).total_seconds() * 1000
        return granularity
Esempio n. 7
0
 def increment_timestamp(ts):
     dt = utils.parse_human_datetime(ts).replace(tzinfo=DRUID_TZ)
     return dt + timedelta(milliseconds=time_offset)
Esempio n. 8
0
 def increment_timestamp(ts):
     dt = utils.parse_human_datetime(ts).replace(
         tzinfo=DRUID_TZ)
     return dt + timedelta(milliseconds=time_offset)
Esempio n. 9
0
    def query_obj(self):
        """Building a query object"""
        form_data = self.form_data
        groupby = form_data.get("groupby") or []
        metrics = form_data.get("metrics") or ['count']

        # extra_filters are temporary/contextual filters that are external
        # to the slice definition. We use those for dynamic interactive
        # filters like the ones emitted by the "Filter Box" visualization
        extra_filters = self.get_extra_filters()
        granularity = (form_data.get("granularity")
                       or form_data.get("granularity_sqla"))
        limit = int(form_data.get("limit") or 0)
        timeseries_limit_metric = form_data.get("timeseries_limit_metric")
        row_limit = int(form_data.get("row_limit") or config.get("ROW_LIMIT"))

        # __form and __to are special extra_filters that target time
        # boundaries. The rest of extra_filters are simple
        # [column_name in list_of_values]. `__` prefix is there to avoid
        # potential conflicts with column that would be named `from` or `to`
        since = (extra_filters.get('__from')
                 or form_data.get("since", "1 year ago"))

        from_dttm = utils.parse_human_datetime(since)
        now = datetime.now()
        if from_dttm > now:
            from_dttm = now - (from_dttm - now)

        until = extra_filters.get('__to') or form_data.get("until", "now")
        to_dttm = utils.parse_human_datetime(until)
        if from_dttm > to_dttm:
            raise Exception("From date cannot be larger than to date")

        # extras are used to query elements specific to a datasource type
        # for instance the extra where clause that applies only to Tables
        extras = {
            'where': form_data.get("where", ''),
            'having': form_data.get("having", ''),
            'having_druid': form_data.get('having_filters') \
                if 'having_filters' in form_data else [],
            'time_grain_sqla': form_data.get("time_grain_sqla", ''),
            'druid_time_origin': form_data.get("druid_time_origin", ''),
        }
        filters = form_data['filters'] if 'filters' in form_data \
                else []
        for col, vals in self.get_extra_filters().items():
            if not (col and vals) or col.startswith('__'):
                continue
            elif col in self.datasource.filterable_column_names:
                # Quote values with comma to avoid conflict
                filters += [{
                    'col': col,
                    'op': 'in',
                    'val': vals,
                }]
        d = {
            'granularity': granularity,
            'from_dttm': from_dttm,
            'to_dttm': to_dttm,
            'is_timeseries': self.is_timeseries,
            'groupby': groupby,
            'metrics': metrics,
            'row_limit': row_limit,
            'filter': filters,
            'timeseries_limit': limit,
            'extras': extras,
            'timeseries_limit_metric': timeseries_limit_metric,
        }
        return d
Esempio n. 10
0
    def query_obj(self):
        """Building a query object"""
        form_data = self.form_data
        groupby = form_data.get("groupby") or []
        metrics = form_data.get("metrics") or ['count']

        # extra_filters are temporary/contextual filters that are external
        # to the slice definition. We use those for dynamic interactive
        # filters like the ones emitted by the "Filter Box" visualization
        extra_filters = self.get_extra_filters()
        granularity = (
            form_data.get("granularity") or form_data.get("granularity_sqla")
        )
        limit = int(form_data.get("limit") or 0)
        timeseries_limit_metric = form_data.get("timeseries_limit_metric")
        row_limit = int(
            form_data.get("row_limit") or config.get("ROW_LIMIT"))

        # __form and __to are special extra_filters that target time
        # boundaries. The rest of extra_filters are simple
        # [column_name in list_of_values]. `__` prefix is there to avoid
        # potential conflicts with column that would be named `from` or `to`
        since = (
            extra_filters.get('__from') or form_data.get("since", "1 year ago")
        )

        from_dttm = utils.parse_human_datetime(since)
        now = datetime.now()
        if from_dttm > now:
            from_dttm = now - (from_dttm - now)

        until = extra_filters.get('__to') or form_data.get("until", "now")
        to_dttm = utils.parse_human_datetime(until)
        if from_dttm > to_dttm:
            raise Exception("From date cannot be larger than to date")

        # extras are used to query elements specific to a datasource type
        # for instance the extra where clause that applies only to Tables
        extras = {
            'where': form_data.get("where", ''),
            'having': form_data.get("having", ''),
            'having_druid': form_data.get('having_filters') \
                if 'having_filters' in form_data else [],
            'time_grain_sqla': form_data.get("time_grain_sqla", ''),
            'druid_time_origin': form_data.get("druid_time_origin", ''),
        }
        filters = form_data['filters'] if 'filters' in form_data \
                else []
        for col, vals in self.get_extra_filters().items():
            if not (col and vals) or col.startswith('__'):
                continue
            elif col in self.datasource.filterable_column_names:
                # Quote values with comma to avoid conflict
                filters += [{
                    'col': col,
                    'op': 'in',
                    'val': vals,
                }]
        d = {
            'granularity': granularity,
            'from_dttm': from_dttm,
            'to_dttm': to_dttm,
            'is_timeseries': self.is_timeseries,
            'groupby': groupby,
            'metrics': metrics,
            'row_limit': row_limit,
            'filter': filters,
            'timeseries_limit': limit,
            'extras': extras,
            'timeseries_limit_metric': timeseries_limit_metric,
        }
        return d