Ejemplo n.º 1
0
    def export_tsv(self, dest_path):
        """
        Export the current query result to a tsv file.

        :param str dest_path: file to write query results to
        :raise NotImplementedError:

        Example

        .. code-block:: python
            :linenos:

                >>> top = client.topn(
                        datasource='twitterstream',
                        granularity='all',
                        intervals='2013-10-04/pt1h',
                        aggregations={"count": doublesum("count")},
                        dimension='user_name',
                        filter = Dimension('user_lang') == 'en',
                        metric='count',
                        threshold=2
                    )

                >>> top.export_tsv('top.tsv')
                >>> !cat top.tsv
                >>> count	user_name	timestamp
                    7.0	user_1	2013-10-04T00:00:00.000Z
                    6.0	user_2	2013-10-04T00:00:00.000Z
        """
        if six.PY3:
            f = open(dest_path, "w", newline="", encoding="utf-8")
        else:
            f = open(dest_path, "wb")
        w = UnicodeWriter(f)

        if self.query_type == "timeseries":
            header = list(self.result[0]["result"].keys())
            header.append("timestamp")
        elif self.query_type == "topN":
            header = list(self.result[0]["result"][0].keys())
            header.append("timestamp")
        elif self.query_type == "groupBy":
            header = list(self.result[0]["event"].keys())
            header.append("timestamp")
            header.append("version")
        else:
            raise NotImplementedError(
                "TSV export not implemented for query type: {0}".format(
                    self.query_type))

        w.writerow(header)

        if self.result:
            if self.query_type == "topN" or self.query_type == "timeseries":
                for item in self.result:
                    timestamp = item["timestamp"]
                    result = item["result"]
                    if type(result) is list:  # topN
                        for line in result:
                            w.writerow(list(line.values()) + [timestamp])
                    else:  # timeseries
                        w.writerow(list(result.values()) + [timestamp])
            elif self.query_type == "groupBy":
                for item in self.result:
                    timestamp = item["timestamp"]
                    version = item["version"]
                    w.writerow(
                        list(item["event"].values()) + [timestamp] + [version])

        f.close()
Ejemplo n.º 2
0
    def export_tsv(self, dest_path):
        """
        Export the current query result to a tsv file.

        :param str dest_path: file to write query results to
        :raise NotImplementedError:

        Example

        .. code-block:: python
            :linenos:

                >>> top = client.topn(
                        datasource='twitterstream',
                        granularity='all',
                        intervals='2013-10-04/pt1h',
                        aggregations={"count": doublesum("count")},
                        dimension='user_name',
                        filter = Dimension('user_lang') == 'en',
                        metric='count',
                        threshold=2
                    )

                >>> top.export_tsv('top.tsv')
                >>> !cat top.tsv
                >>> count	user_name	timestamp
                    7.0	user_1	2013-10-04T00:00:00.000Z
                    6.0	user_2	2013-10-04T00:00:00.000Z
        """
        if six.PY3:
            f = open(dest_path, 'w', newline='', encoding='utf-8')
        else:
            f = open(dest_path, 'wb')
        w = UnicodeWriter(f)

        if self.query_type == "timeseries":
            header = list(self.result[0]['result'].keys())
            header.append('timestamp')
        elif self.query_type == 'topN':
            header = list(self.result[0]['result'][0].keys())
            header.append('timestamp')
        elif self.query_type == "groupBy":
            header = list(self.result[0]['event'].keys())
            header.append('timestamp')
            header.append('version')
        else:
            raise NotImplementedError('TSV export not implemented for query type: {0}'.format(self.query_type))

        w.writerow(header)

        if self.result:
            if self.query_type == "topN" or self.query_type == "timeseries":
                for item in self.result:
                    timestamp = item['timestamp']
                    result = item['result']
                    if type(result) is list:  # topN
                        for line in result:
                            w.writerow(list(line.values()) + [timestamp])
                    else:  # timeseries
                        w.writerow(list(result.values()) + [timestamp])
            elif self.query_type == "groupBy":
                for item in self.result:
                    timestamp = item['timestamp']
                    version = item['version']
                    w.writerow(
                        list(item['event'].values()) + [timestamp] + [version])

        f.close()