Exemple #1
0
class ESAlchemy(object):
    def __init__(self, index_name, config):
        self.index_name = index_name
        self.config = config
        self.es = HQESQuery(index_name)

    def __getitem__(self, sliced_or_int):
        hits = self.es[sliced_or_int]
        hits = [self._hit_to_row(hit) for hit in hits]
        if isinstance(sliced_or_int, (int, long)):
            return hits[0]
        return hits

    def _hit_to_row(self, hit):
        def mapping_to_datatype(column, value):
            if not value:
                return value

            datatype = column.datatype
            if datatype == 'datetime':
                try:
                    return datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%S")
                except ValueError:
                    return datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%f")
            elif datatype == 'date':
                return datetime.datetime.strptime(value, "%Y-%m-%d")
            return value

        return ESAlchemyRow(self.column_ordering, {
            col.database_column_name: mapping_to_datatype(col, hit[col.database_column_name])
            for col in self.columns
        })

    @property
    def columns(self):
        return self.config.indicators.get_columns()

    @property
    @memoized
    def column_ordering(self):
        return [col.database_column_name for col in self.columns]

    @property
    def column_descriptions(self):
        return [{"name": col} for col in self.column_ordering]

    def count(self):
        return self.es.count()

    def distinct_values(self, column, size):
        # missing aggregation can be removed on upgrade to ES 2.0
        missing_agg_name = column + '_missing'
        query = self.es.terms_aggregation(column, column, size=size, sort_field="_term").size(0)
        query = query.aggregation(MissingAggregation(missing_agg_name, column))
        results = query.run()
        missing_result = getattr(results.aggregations, missing_agg_name).result
        result = getattr(results.aggregations, column).keys
        if missing_result['doc_count'] > 0:
            result.append(None)
        return result
Exemple #2
0
class ESAlchemy(object):
    def __init__(self, index_name, config):
        self.index_name = index_name
        self.config = config
        self.es = HQESQuery(index_name)

    def __getitem__(self, sliced_or_int):
        hits = self.es[sliced_or_int]
        hits = [self._hit_to_row(hit) for hit in hits]
        if isinstance(sliced_or_int, six.integer_types):
            return hits[0]
        return hits

    def _hit_to_row(self, hit):
        def mapping_to_datatype(column, value):
            if not value:
                return value

            datatype = column.datatype
            if datatype == 'datetime':
                try:
                    return datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%S")
                except ValueError:
                    return datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%f")
            elif datatype == 'date':
                return datetime.datetime.strptime(value, "%Y-%m-%d")
            return value

        return ESAlchemyRow(self.column_ordering, {
            col.database_column_name: mapping_to_datatype(col, hit[col.database_column_name])
            for col in self.columns
        })

    @property
    def columns(self):
        return self.config.indicators.get_columns()

    @property
    @memoized
    def column_ordering(self):
        return [col.database_column_name for col in self.columns]

    @property
    def column_descriptions(self):
        return [{"name": col} for col in self.column_ordering]

    def count(self):
        return self.es.count()

    def distinct_values(self, column, size):
        # missing aggregation can be removed on upgrade to ES 2.0
        missing_agg_name = column + '_missing'
        query = self.es.terms_aggregation(column, column, size=size, sort_field="_term").size(0)
        query = query.aggregation(MissingAggregation(missing_agg_name, column))
        results = query.run()
        missing_result = getattr(results.aggregations, missing_agg_name).result
        result = getattr(results.aggregations, column).keys
        if missing_result['doc_count'] > 0:
            result.append(None)
        return result
Exemple #3
0
class ESAlchemy(object):
    def __init__(self, index_name, config):
        self.index_name = index_name
        self.config = config
        self.es = HQESQuery(index_name)

    def __getitem__(self, sliced_or_int):
        hits = self.es[sliced_or_int]
        hits = [self._hit_to_row(hit) for hit in hits]
        if isinstance(sliced_or_int, (int, long)):
            return hits[0]
        return hits

    def _hit_to_row(self, hit):
        def mapping_to_datatype(column, value):
            if not value:
                return value

            datatype = column.datatype
            if datatype == 'datetime':
                try:
                    return datetime.datetime.strptime(value,
                                                      "%Y-%m-%dT%H:%M:%S")
                except ValueError:
                    return datetime.datetime.strptime(value,
                                                      "%Y-%m-%dT%H:%M:%S.%f")
            elif datatype == 'date':
                return datetime.datetime.strptime(value, "%Y-%m-%d")
            return value

        return ESAlchemyRow(
            self.column_ordering, {
                col.database_column_name: mapping_to_datatype(
                    col, hit[col.database_column_name])
                for col in self.columns
            })

    @property
    def columns(self):
        return self.config.indicators.get_columns()

    @property
    @memoized
    def column_ordering(self):
        return [col.database_column_name for col in self.columns]

    @property
    def column_descriptions(self):
        return [{"name": col} for col in self.column_ordering]

    def count(self):
        return self.es.count()

    def distinct_values(self, column, size):
        query = self.es.terms_aggregation(column, column, size=size).size(0)
        results = query.run()
        return getattr(results.aggregations, column).keys