def write_csv(self, path): import csv tmp_path = 'tmp_{0}.csv'.format(util.guid()) f = open(tmp_path, 'w+') try: # Write the DataFrame to the temporary file path if options.verbose: log('Writing DataFrame to temporary file') self.df.to_csv(f, header=False, index=False, sep=',', quoting=csv.QUOTE_NONE, escapechar='\\', na_rep='#NULL') f.seek(0) if options.verbose: log('Writing CSV to: {0}'.format(path)) self.hdfs.put(path, f) finally: f.close() try: os.remove(tmp_path) except os.error: pass return path
def _log(self, sql): try: query_str = str(sql) except sa.exc.UnsupportedCompilationError: pass else: util.log(query_str)
def _wait_synchronous(self): # Wait to finish, but cancel if KeyboardInterrupt from impala.hiveserver2 import OperationalError loop_start = time.time() def _sleep_interval(start_time): elapsed = time.time() - start_time if elapsed < 0.05: return 0.01 elif elapsed < 1.0: return 0.05 elif elapsed < 10.0: return 0.1 elif elapsed < 60.0: return 0.5 return 1.0 cur = self._cursor try: while True: state = cur.status() if self._cursor._op_state_is_error(state): raise OperationalError("Operation is in ERROR_STATE") if not cur._op_state_is_executing(state): break time.sleep(_sleep_interval(loop_start)) except KeyboardInterrupt: util.log('Canceling query') self.cancel() raise
def write_csv(self, path): # Use a temporary dir instead of a temporary file # to provide Windows support and avoid #2267 # https://github.com/ibis-project/ibis/issues/2267 with tempfile.TemporaryDirectory() as f: # Write the DataFrame to the temporary file path tmp_file_path = os.path.join(f, 'impala_temp_file.csv') if options.verbose: util.log('Writing DataFrame to temporary directory {}'.format( tmp_file_path)) self.df.to_csv( tmp_file_path, header=False, index=False, sep=',', quoting=csv.QUOTE_NONE, escapechar='\\', na_rep='#NULL', ) if options.verbose: util.log('Writing CSV to: {0}'.format(path)) self.hdfs.put(path, tmp_file_path) return path
def log(self, msg: str): """Print or log a message. Parameters ---------- msg : string """ log(msg)
def execute(self, query): if isinstance(query, (DDL, DML)): query = query.compile() cursor = self._get_cursor() util.log(query) try: cursor.execute(query) except Exception: cursor.release() util.log('Exception caused by {}: {}'.format( query, traceback.format_exc())) raise return cursor
def write_csv(self): import csv temp_hdfs_dir = pjoin(options.impala.temp_hdfs_path, 'pandas_{0}'.format(util.guid())) tmp_path = 'tmp_{0}.csv'.format(util.guid()) f = open(tmp_path, 'w+') try: # Write the DataFrame to the temporary file path if options.verbose: log('Writing DataFrame to temporary file') self.df.to_csv(f, header=False, index=False, sep=',', quoting=csv.QUOTE_NONE, escapechar='\\', na_rep='#NULL') f.seek(0) # Write the file to HDFS hdfs_path = pjoin(temp_hdfs_dir, '0.csv') if options.verbose: log('Writing CSV to HDFS: {0}'.format(hdfs_path)) self.hdfs.put(hdfs_path, f) # Keep track of the temporary HDFS file self.temp_hdfs_dirs.append(temp_hdfs_dir) self.csv_dir = temp_hdfs_dir finally: f.close() try: os.remove(tmp_path) except os.error: pass return temp_hdfs_dir
def write_csv(self, path): with tempfile.NamedTemporaryFile() as f: # Write the DataFrame to the temporary file path if options.verbose: util.log('Writing DataFrame to temporary file {}'.format( f.name)) self.df.to_csv(f.name, header=False, index=False, sep=',', quoting=csv.QUOTE_NONE, escapechar='\\', na_rep='#NULL') f.seek(0) if options.verbose: util.log('Writing CSV to: {0}'.format(path)) self.hdfs.put(path, f.name) return path
def write_csv(self, path): with tempfile.NamedTemporaryFile() as f: # Write the DataFrame to the temporary file path if options.verbose: util.log( 'Writing DataFrame to temporary file {}'.format(f.name) ) self.df.to_csv( f.name, header=False, index=False, sep=',', quoting=csv.QUOTE_NONE, escapechar='\\', na_rep='#NULL', ) f.seek(0) if options.verbose: util.log('Writing CSV to: {0}'.format(path)) self.hdfs.put(path, f.name) return path
def drop_database(self, name, force=False): """Drop an Impala database. Parameters ---------- name : string Database name force : bool, default False If False and there are any tables in this database, raises an IntegrityError """ if not force or name in self.list_databases(): tables = self.list_tables(database=name) udfs = self.list_udfs(database=name) udas = self.list_udas(database=name) else: tables = [] udfs = [] udas = [] if force: for table in tables: util.log('Dropping {}'.format(f'{name}.{table}')) self.drop_table_or_view(table, database=name) for func in udfs: util.log(f'Dropping function {func.name}({func.inputs})') self.drop_udf( func.name, input_types=func.inputs, database=name, force=True, ) for func in udas: util.log( 'Dropping aggregate function {}({})'.format( func.name, func.inputs ) ) self.drop_uda( func.name, input_types=func.inputs, database=name, force=True, ) else: if len(tables) > 0 or len(udfs) > 0 or len(udas) > 0: raise com.IntegrityError( 'Database {} must be empty before ' 'being dropped, or set ' 'force=True'.format(name) ) statement = DropDatabase(name, must_exist=not force) return self.raw_sql(statement)
def log(self, msg): log(msg)