def read_data_from_hive(self, query, cluster, verbose=False, **kwargs): print('Running Hive query') cmd = HiveCommand.create(query=query, print_logs_live=True, label=cluster, **kwargs) while cmd.attributes.get('status', None) != 'done': if verbose: cmd = self._get_logs(cmd) else: cmd = self._get_status(cmd) return cmd
def run_query(self, query, user): qbol.configure(api_token=self.configuration.get('token'), api_url='%s/api' % self.configuration.get('endpoint')) try: query_type = self.configuration.get('query_type', 'hive') if query_type == 'quantum': cmd = SqlCommand.create(query=query) elif query_type == 'hive': cmd = HiveCommand.create(query=query, label=self.configuration.get('cluster')) elif query_type == 'presto': cmd = PrestoCommand.create(query=query, label=self.configuration.get('cluster')) else: raise Exception("Invalid Query Type:%s.\ It must be : hive / presto / quantum." % self.configuration.get('query_type')) logging.info("Qubole command created with Id: %s and Status: %s", cmd.id, cmd.status) while not Command.is_done(cmd.status): time.sleep(qbol.poll_interval) cmd = Command.find(cmd.id) logging.info("Qubole command Id: %s and Status: %s", cmd.id, cmd.status) rows = [] columns = [] error = None if cmd.status == 'done': fp = StringIO() cmd.get_results(fp=fp, inline=True, delim='\t', fetch=False, qlog=None, arguments=['true']) results = fp.getvalue() fp.close() data = results.split('\r\n') columns = self.fetch_columns([(i, TYPE_STRING) for i in data.pop(0).split('\t')]) rows = [dict(zip((c['name'] for c in columns), row.split('\t'))) for row in data] json_data = json_dumps({'columns': columns, 'rows': rows}) except KeyboardInterrupt: logging.info('Sending KILL signal to Qubole Command Id: %s', cmd.id) cmd.cancel() error = "Query cancelled by user." json_data = None return json_data, error
def run_hive_query_asynchronous(cluster_label, query_filename, **query_kwargs): template_loader = TemplateLoader(QUERY_DIR) query = template_loader.load_from_file(query_filename, **query_kwargs) return HiveCommand.create(query=query, label=cluster_label)
def run_query(self, query, user): qbol.configure( api_token=self.configuration.get("token"), api_url="%s/api" % self.configuration.get("endpoint"), ) try: query_type = self.configuration.get("query_type", "hive") if query_type == "quantum": cmd = SqlCommand.create(query=query) elif query_type == "hive": cmd = HiveCommand.create( query=query, label=self.configuration.get("cluster")) elif query_type == "presto": cmd = PrestoCommand.create( query=query, label=self.configuration.get("cluster")) else: raise Exception("Invalid Query Type:%s.\ It must be : hive / presto / quantum." % self.configuration.get("query_type")) logging.info("Qubole command created with Id: %s and Status: %s", cmd.id, cmd.status) while not Command.is_done(cmd.status): time.sleep(qbol.poll_interval) cmd = Command.find(cmd.id) logging.info("Qubole command Id: %s and Status: %s", cmd.id, cmd.status) rows = [] columns = [] error = None if cmd.status == "done": fp = StringIO() cmd.get_results( fp=fp, inline=True, delim="\t", fetch=False, qlog=None, arguments=["true"], ) results = fp.getvalue() fp.close() data = results.split("\r\n") columns = self.fetch_columns([ (i, TYPE_STRING) for i in data.pop(0).split("\t") ]) rows = [ dict( zip((column["name"] for column in columns), row.split("\t"))) for row in data ] json_data = json_dumps({"columns": columns, "rows": rows}) except KeyboardInterrupt: logging.info("Sending KILL signal to Qubole Command Id: %s", cmd.id) cmd.cancel() error = "Query cancelled by user." json_data = None return json_data, error