Ejemplo n.º 1
0
    def write_csv(self, path):
        import csv

        tmp_path = 'tmp_{0}.csv'.format(util.guid())
        f = open(tmp_path, 'w+')

        try:
            # Write the DataFrame to the temporary file path
            if options.verbose:
                log('Writing DataFrame to temporary file')

            self.df.to_csv(f,
                           header=False,
                           index=False,
                           sep=',',
                           quoting=csv.QUOTE_NONE,
                           escapechar='\\',
                           na_rep='#NULL')
            f.seek(0)

            if options.verbose:
                log('Writing CSV to: {0}'.format(path))

            self.hdfs.put(path, f)
        finally:
            f.close()
            try:
                os.remove(tmp_path)
            except os.error:
                pass

        return path
Ejemplo n.º 2
0
 def _log(self, sql):
     try:
         query_str = str(sql)
     except sa.exc.UnsupportedCompilationError:
         pass
     else:
         util.log(query_str)
Ejemplo n.º 3
0
    def _wait_synchronous(self):
        # Wait to finish, but cancel if KeyboardInterrupt
        from impala.hiveserver2 import OperationalError

        loop_start = time.time()

        def _sleep_interval(start_time):
            elapsed = time.time() - start_time
            if elapsed < 0.05:
                return 0.01
            elif elapsed < 1.0:
                return 0.05
            elif elapsed < 10.0:
                return 0.1
            elif elapsed < 60.0:
                return 0.5
            return 1.0

        cur = self._cursor
        try:
            while True:
                state = cur.status()
                if self._cursor._op_state_is_error(state):
                    raise OperationalError("Operation is in ERROR_STATE")
                if not cur._op_state_is_executing(state):
                    break
                time.sleep(_sleep_interval(loop_start))
        except KeyboardInterrupt:
            util.log('Canceling query')
            self.cancel()
            raise
Ejemplo n.º 4
0
    def write_csv(self, path):
        import csv

        tmp_path = 'tmp_{0}.csv'.format(util.guid())
        f = open(tmp_path, 'w+')

        try:
            # Write the DataFrame to the temporary file path
            if options.verbose:
                log('Writing DataFrame to temporary file')

            self.df.to_csv(f, header=False, index=False,
                           sep=',',
                           quoting=csv.QUOTE_NONE,
                           escapechar='\\',
                           na_rep='#NULL')
            f.seek(0)

            if options.verbose:
                log('Writing CSV to: {0}'.format(path))

            self.hdfs.put(path, f)
        finally:
            f.close()
            try:
                os.remove(tmp_path)
            except os.error:
                pass

        return path
Ejemplo n.º 5
0
    def write_csv(self, path):
        # Use a temporary dir instead of a temporary file
        # to provide Windows support and avoid #2267
        # https://github.com/ibis-project/ibis/issues/2267
        with tempfile.TemporaryDirectory() as f:
            # Write the DataFrame to the temporary file path
            tmp_file_path = os.path.join(f, 'impala_temp_file.csv')
            if options.verbose:
                util.log('Writing DataFrame to temporary directory {}'.format(
                    tmp_file_path))

            self.df.to_csv(
                tmp_file_path,
                header=False,
                index=False,
                sep=',',
                quoting=csv.QUOTE_NONE,
                escapechar='\\',
                na_rep='#NULL',
            )

            if options.verbose:
                util.log('Writing CSV to: {0}'.format(path))

            self.hdfs.put(path, tmp_file_path)
        return path
Ejemplo n.º 6
0
    def log(self, msg: str):
        """Print or log a message.

        Parameters
        ----------
        msg : string
        """
        log(msg)
Ejemplo n.º 7
0
    def execute(self, query):
        if isinstance(query, (DDL, DML)):
            query = query.compile()

        cursor = self._get_cursor()
        util.log(query)

        try:
            cursor.execute(query)
        except Exception:
            cursor.release()
            util.log('Exception caused by {}: {}'.format(
                query, traceback.format_exc()))
            raise

        return cursor
Ejemplo n.º 8
0
    def write_csv(self):
        import csv

        temp_hdfs_dir = pjoin(options.impala.temp_hdfs_path,
                              'pandas_{0}'.format(util.guid()))

        tmp_path = 'tmp_{0}.csv'.format(util.guid())
        f = open(tmp_path, 'w+')

        try:
            # Write the DataFrame to the temporary file path
            if options.verbose:
                log('Writing DataFrame to temporary file')

            self.df.to_csv(f,
                           header=False,
                           index=False,
                           sep=',',
                           quoting=csv.QUOTE_NONE,
                           escapechar='\\',
                           na_rep='#NULL')
            f.seek(0)

            # Write the file to HDFS
            hdfs_path = pjoin(temp_hdfs_dir, '0.csv')

            if options.verbose:
                log('Writing CSV to HDFS: {0}'.format(hdfs_path))

            self.hdfs.put(hdfs_path, f)

            # Keep track of the temporary HDFS file
            self.temp_hdfs_dirs.append(temp_hdfs_dir)

            self.csv_dir = temp_hdfs_dir
        finally:
            f.close()
            try:
                os.remove(tmp_path)
            except os.error:
                pass

        return temp_hdfs_dir
Ejemplo n.º 9
0
    def write_csv(self):
        import csv

        temp_hdfs_dir = pjoin(options.impala.temp_hdfs_path,
                              'pandas_{0}'.format(util.guid()))

        tmp_path = 'tmp_{0}.csv'.format(util.guid())
        f = open(tmp_path, 'w+')

        try:
            # Write the DataFrame to the temporary file path
            if options.verbose:
                log('Writing DataFrame to temporary file')

            self.df.to_csv(f, header=False, index=False,
                           sep=',',
                           quoting=csv.QUOTE_NONE,
                           escapechar='\\',
                           na_rep='#NULL')
            f.seek(0)

            # Write the file to HDFS
            hdfs_path = pjoin(temp_hdfs_dir, '0.csv')

            if options.verbose:
                log('Writing CSV to HDFS: {0}'.format(hdfs_path))

            self.hdfs.put(hdfs_path, f)

            # Keep track of the temporary HDFS file
            self.temp_hdfs_dirs.append(temp_hdfs_dir)

            self.csv_dir = temp_hdfs_dir
        finally:
            f.close()
            try:
                os.remove(tmp_path)
            except os.error:
                pass

        return temp_hdfs_dir
Ejemplo n.º 10
0
    def write_csv(self, path):
        with tempfile.NamedTemporaryFile() as f:
            # Write the DataFrame to the temporary file path
            if options.verbose:
                util.log('Writing DataFrame to temporary file {}'.format(
                    f.name))

            self.df.to_csv(f.name,
                           header=False,
                           index=False,
                           sep=',',
                           quoting=csv.QUOTE_NONE,
                           escapechar='\\',
                           na_rep='#NULL')
            f.seek(0)

            if options.verbose:
                util.log('Writing CSV to: {0}'.format(path))

            self.hdfs.put(path, f.name)
        return path
Ejemplo n.º 11
0
    def write_csv(self, path):
        with tempfile.NamedTemporaryFile() as f:
            # Write the DataFrame to the temporary file path
            if options.verbose:
                util.log(
                    'Writing DataFrame to temporary file {}'.format(f.name)
                )

            self.df.to_csv(
                f.name,
                header=False,
                index=False,
                sep=',',
                quoting=csv.QUOTE_NONE,
                escapechar='\\',
                na_rep='#NULL',
            )
            f.seek(0)

            if options.verbose:
                util.log('Writing CSV to: {0}'.format(path))

            self.hdfs.put(path, f.name)
        return path
Ejemplo n.º 12
0
    def drop_database(self, name, force=False):
        """Drop an Impala database.

        Parameters
        ----------
        name : string
          Database name
        force : bool, default False
          If False and there are any tables in this database, raises an
          IntegrityError

        """
        if not force or name in self.list_databases():
            tables = self.list_tables(database=name)
            udfs = self.list_udfs(database=name)
            udas = self.list_udas(database=name)
        else:
            tables = []
            udfs = []
            udas = []
        if force:
            for table in tables:
                util.log('Dropping {}'.format(f'{name}.{table}'))
                self.drop_table_or_view(table, database=name)
            for func in udfs:
                util.log(f'Dropping function {func.name}({func.inputs})')
                self.drop_udf(
                    func.name,
                    input_types=func.inputs,
                    database=name,
                    force=True,
                )
            for func in udas:
                util.log(
                    'Dropping aggregate function {}({})'.format(
                        func.name, func.inputs
                    )
                )
                self.drop_uda(
                    func.name,
                    input_types=func.inputs,
                    database=name,
                    force=True,
                )
        else:
            if len(tables) > 0 or len(udfs) > 0 or len(udas) > 0:
                raise com.IntegrityError(
                    'Database {} must be empty before '
                    'being dropped, or set '
                    'force=True'.format(name)
                )
        statement = DropDatabase(name, must_exist=not force)
        return self.raw_sql(statement)
Ejemplo n.º 13
0
 def log(self, msg):
     log(msg)
Ejemplo n.º 14
0
 def log(self, msg):
     log(msg)