Example #1
0
 def __init__(self, dataset_url):
     self.dataset_url = dataset_url
     self.query = Query(dataset_url)
     self._time = Time(dataset_url)
     self._index = Index(self)
Example #2
0
class BatFrame(object):
    def __init__(self, dataset_url):
        self.dataset_url = dataset_url
        self.query = Query(dataset_url)
        self._time = Time(dataset_url)
        self._index = Index(self)

    def __getitem__(self, val):

        if isinstance(val, str):
            col = Column(val, self.dataset_url)
            col.query.mergeQuery(self.query)
            return col
        elif isinstance(val, Query):
            bf = self.copy()
            # bf.query.addSELECT('*')
            bf.query.mergeQuery(val)
            return bf
        elif isinstance(val, slice):
            start = val.start
            stop = val.stop
            # step = val.step
            bf = self.copy()
            # bf.query.addSELECT('*')
            if start is not None:
                bf.query.setOFFSET(start)
            if stop is not None:
                bf.query.setLIMIT(stop)
            return bf
        elif isinstance(val, list):
            bf = self.copy()
            for value in val:
                bf.query.addSELECT("\"{}\"".format(value))
            return bf
        elif isinstance(val, Column):
            bf = self.copy()
            bf.query.addWHERE("({})".format(val.execution_name))
            return bf

    @property
    def columns(self):
        """Returns a numpy array of the columns name"""
        return requests.get(self.dataset_url + '/columns').json()

    @property
    def rows(self):
        """Returns a numpy array of the rows name"""
        bf = self.copy()
        result = bf.query.executeQuery(format="soa")
        return result["_rowName"]

    @property
    def time(self):
        copy_time = self._time.copy()
        return copy_time.query.mergeQuery(self.Query)

    @property
    def ix(self):
        copy_index = self._index.copy()
        return copy_index

    def copy(self):
        bf = BatFrame(self.dataset_url)
        bf.query = self.query.copy()
        return bf

    def toPandas(self):
        result = self.query.executeQuery(format="aos")
        if len(result) == 0:
            return pd.DataFrame()
        return pd.DataFrame.from_records(result, index="_rowName")

    def head(self, num_rows=5):
        bf = self.copy()
        bf.query.setLIMIT(num_rows)
        return bf

    def query(self, query):
        raise NotImplementedError()

    def sort(self, value, ascending=True):
        bf = self.copy()
        if not isinstance(value, list):
            value = [value]

        if not isinstance(ascending, list):
            ascending = [ascending]*len(value)

        if len(value) != len(ascending):
            raise RuntimeError("len(value) != len(ascending)")

        for by, asc in zip(value, ascending):
            if asc:
                sort = "ASC"
            else:
                sort = "DESC"
            bf.query.addORDERBY("\"{}\" {}".format(by, sort))
        return bf

    @property
    def shape(self):
        """
        Returns (rowCount, valueCount)
        """
        bf = self.copy()
        content = requests.get(bf.dataset_url).json()
        rowCount = content['status']['rowCount']
        valueCount = content['status']['valueCount']

        return (rowCount, valueCount)

    def __repr__(self):
        bf = self.copy()
        bf.query.setLIMIT(40)
        print(bf.toPandas())
        response = requests.get(bf.dataset_url).json()
        try:
            rowCount = response['status']['rowCount']
        except:
            rowCount = None

        if rowCount is not None and rowCount > 40:
            print("{} rows".format(rowCount))
        return ""