Python WebHDFS.ls Examples

Programming Language: Python

Namespace/Package Name: fsspec.implementations.webhdfs

Class/Type: WebHDFS

Method/Function: ls

Examples at hotexamples.com: 3

Python WebHDFS.ls - 3 examples found. These are the top rated real world Python examples of fsspec.implementations.webhdfs.WebHDFS.ls extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

WebHDFS(14)

open(8)

mkdir(6)

exists(6)

rm(5)

ls(3)

cat(3)

home_directory(3)

ukey(2)

mkdirs(2)

info(2)

content_summary(2)

isfile(2)

_get_kwargs_from_urls(1)

makedir(1)

download(1)

cp_file(1)

mv(1)

close(1)

chmod(1)

upload(1)

Example #1

Show file

File: test_webhdfs.py Project: sscherfke/filesystem_spec

def test_workflow_transaction(hdfs_cluster):
    w = WebHDFS(hdfs_cluster,
                user="******",
                data_proxy={"worker.example.com": "localhost"})
    fn = "/user/testuser/testrun/afile"
    w.mkdirs("/user/testuser/testrun")
    with w.transaction:
        with w.open(fn, "wb") as f:
            f.write(b"hello")
        assert not w.exists(fn)
    assert w.exists(fn)
    assert w.ukey(fn)
    files = w.ls("/user/testuser/testrun", True)
    summ = w.content_summary("/user/testuser/testrun")
    assert summ["length"] == files[0]["size"]
    assert summ["fileCount"] == 1

    w.rm("/user/testuser/testrun", recursive=True)
    assert not w.exists(fn)

Example #2

Show file

File: test_webhdfs.py Project: mtrbean/filesystem_spec

def test_workflow_transaction(hdfs_cluster):
    w = WebHDFS(hdfs_cluster,
                user='******',
                data_proxy={'worker.example.com': 'localhost'})
    fn = '/user/testuser/testrun/afile'
    w.mkdirs('/user/testuser/testrun')
    with w.transaction:
        with w.open(fn, 'wb') as f:
            f.write(b'hello')
        assert not w.exists(fn)
    assert w.exists(fn)
    assert w.ukey(fn)
    files = w.ls('/user/testuser/testrun', True)
    summ = w.content_summary('/user/testuser/testrun')
    assert summ['length'] == files[0]['size']
    assert summ['fileCount'] == 1

    w.rm('/user/testuser/testrun', recursive=True)
    assert not w.exists(fn)

Example #3

Show file

class DISC:
    from _PATHS import _IMPALA_HOST, _HIVE_HOST, _HTTPFS_HOST, _HDFS_PATH, USER_GUIDE_URL

    __TEMP_LOCAL_DIR = os.path.join(os.path.dirname(__file__), '._temp_connectors')
    __PEM_PATH = os.path.join(os.path.dirname(__file__), 'certificates/accprd-truststore.pem')

    def __init__(self):
        try:    # This works on jupyter ipython
            self._is_jupyter = bool(get_ipython().config)
            self._is_ipython = True
        except: # On plain python get_ipython is not defined
            self._is_jupyter = self._is_ipython =  False
        self.open()
        self.__log()
        self.spark = None
        self._spark_uri = None
    
    def open(self, hive=False):
        """Opens DISC connection: 
        selects automatically according to platform (Local Windows or CDSW)
        """
        from fsspec.implementations.webhdfs import WebHDFS
        os.environ['REQUESTS_CA_BUNDLE'] = self.__PEM_PATH

        self._hdfs_cnxn = WebHDFS(self._HTTPFS_HOST,
                                  port=14000,
                                  kerberos=True,
                                  use_https=True,
                                  use_ssl=True,
                                  use_listings_cache=False)
        
        self._engine = "hive" if hive else 'impala'
        if IS_WINDOWS:        # LOCAL - Windows
            from pyodbc import connect
            self._cnxn = connect('DSN=DISC DP Impala 64bit' if not hive 
                                 else 'DSN=DISC DP Hive 64bit', 
                                 autocommit=True)
            
        else:                 # CDSW (os.name='POSIX')
            from impala.dbapi import connect
            self._cnxn = connect(host=self._HIVE_HOST if hive
                                      else self._IMPALA_HOST,
                                 use_ssl=True,
                                 timeout=30,
                                 kerberos_service_name=self._engine,
                                 port=10000 if hive else 21050,
                                 auth_mechanism="GSSAPI")  #['NOSASL', 'PLAIN', 'GSSAPI', 'LDAP']

        self._cursor = self._cnxn.cursor()
        if not os.path.exists(self.__TEMP_LOCAL_DIR):
            os.mkdir(self.__TEMP_LOCAL_DIR)
        
        self._is_disc_connected = True
        self.db = None

    
    def connect_spark(self, app_name=None, master=None, 
                      config=dict(), return_SparkSession=False):
        """Connects to spark via pyspark. 
        Stores the spark session in the attribute `disc.spark`. 
        Access to the Spark UI is provided via the link `disc.spark_ui` 
        (the address is also available as `disc._spark_uri)`.  
        
        Args:
            app_name (str or None):  A name for the current session.
            master (str or None):    Either `yarn` (default) or `local`.
            config (dict):     Dictionary with spark configurations.       
            return_SparkSession (bool): Whether to return the spark session (default is False).
        
        Returns: None or SparkSession
        """

        self._spark_uri = f"https://spark-{os.environ['CDSW_ENGINE_ID']}.{os.environ['CDSW_DOMAIN']}/jobs/"
        from pyspark.sql import SparkSession
        spark = (SparkSession.builder
                             .appName(app_name or 'SparkSession'))
        if master is not None:
            spark = spark.master(master)
        for k, v in config.items():
            spark = spark.config(k, v)
        
        spark = spark.getOrCreate()
        
        self.spark = spark
        
        if return_SparkSession: 
            return spark
        else:                   
            return self.spark_ui

    @property
    def spark_ui(self):
        if self._is_spark_connected:
            from IPython.core.display import HTML
            return HTML(f'<a href="{self._spark_uri}">Go to Spark UI</a>')
        else:
            return 'Spark is not connected. Connect with `disc.connect_spark()`.'
        
        
    def stop_spark(self):
        if self._is_spark_connected:
            self.spark.sparkContext.stop()
            self.spark.stop()
    
    @property
    def _is_spark_connected(self):
        try:
            urllib.request.urlopen(self._spark_uri).getcode()
            return True
        except:
            return False
            

    
    @lru_cache()
    def __get_databases(self):
        self._cursor.execute("show databases")
        return [*zip(*self._cursor.fetchall())][0]
    
    def get_databases(self,ret=False):
        """Prints and returns available databases."""
        databases = self.__get_databases()
        if ret:
            return databases
        else:
            for db in databases:
                print(db) 

    def select_database(self, database):
        """Navigates to given database.

        Args:
            database (str): the database to be selected
        """
        self._cursor = self._cnxn.cursor()
        self._cursor.execute(f"use {database}")
        self.db = database

    def get_tables(self, database=None, ret=False):
        """Prints and returs tables within current database."""
        if database is not None:
            self.select_database(database)
        self._cursor.execute("show tables")
        tables, = zip(*self._cursor.fetchall())
        if ret:
            return tables
        else:
            for table in tables:
                print(table)
           
    def describe_table(self, table):
        """Describes a table and returns DataFrame with description.""" 
        self._cursor.execute(f"describe {table}")
        columns, dtype, desc = [*zip(*self._cursor.fetchall())]
        df = pd.DataFrame({'columns': columns, 
                           'dtype': dtype,
                           'desc': desc})
        return df
    
    def _delete_table(self, lab, table_name):
        self._cursor.execute(f"DROP TABLE IF EXISTS {lab}.{table_name}")

    def _create_table(self, lab, table_name, dtypes, path, cols):
        col_and_types = [f'{col} {tp}' for col,tp in zip(cols, dtypes)]
        col_and_types = ', '.join(col_and_types)
        self._cursor.execute(f"""
            CREATE EXTERNAL TABLE IF NOT EXISTS 
            {lab}.{table_name}({col_and_types})
            ROW FORMAT DELIMITED 
            FIELDS TERMINATED BY ','
            STORED AS TEXTFILE
            LOCATION '{path}'
            """)
    
    # TODO: add APPEND TO TABLE OPTION
    
    def _refresh_table(self, lab, table_name):
        self._cursor.execute(f'REFRESH {lab}.{table_name}')
    
    def create_table_csv(self, df, lab, table_name, path, dtypes, cols=None):
        """
        Wrapper method: deletes previous table, transfers new csv file,
        creates new table, and refreshes it.
        Note: dtypes must be one of: ARRAY, BIGINT, BINARY, BOOLEAN, 
        CHAR, DATE, DATETIME, DECIMAL, REAL, FLOAT, INTEGER, MAP, SMALLINT, 
        STRING, STRUCT, TIMESTAMP, TINYINT, VARCHAR
        Args:
           df: A pandas.DataFrame.
           lab: Name of DataLab where to create the table.
           table_name: Name of the table to be created.
           path: Where to store the underlying data.
           dtypes: SQL data types for each column.
           cols: List of strings with column names. By default df.columns.values.
        
        """
            
        self._delete_table(lab=lab, table_name=table_name)
        self.to_csv(df, f'{path}/{table_name}.csv', index=False, header=False)
        self._create_table(lab=lab, 
                           table_name=table_name, 
                           cols=cols or df.columns.values, 
                           dtypes=dtypes,  
                           path=path)
        
    def create_table(self, df, lab, table_name, path, external=True, permissions=_default_permissions):
        """Create table storing as parquet file.
        TODO: integrate with the above, allowing user to choose.;Allow to append rows to existing table instead of deleting old one; check whether there is no other table in present
        Args:
           df: A pandas.DataFrame or pyspark.sql.dataframe.DataFrame.
           lab: Name of DataLab where to create the table.
           table_name: Name of the table to be created.
           path: Where to store the underlying data.
           external: EXTERNAL table if `True`.
           permissions (str or None): posix representation or permission, give as oct string, e.g, '777'(default) or 0o777
        """
        self._delete_table(lab=lab, table_name=table_name)
        if not isinstance(df, pd.DataFrame):  # If Spark dataframe
            path += f'/{table_name}'
            df.write.parquet(path)
            self._hdfs_cnxn.chmod(path, permissions)
            file_path = [f for f in disc.ls(path) if f[-8:]=='.parquet'][0]

        else:                                # If Pandas dataframe
            df = df.reset_index(drop=True)
            file_path = f'{path}/{table_name}.parq'
            df.columns = df.columns.str.replace(':','').str.replace(' ','')
            self.to_parquet(df, file_path, permissions=permissions)

        query = (f"""CREATE {'EXTERNAL' if external else ''} TABLE {lab}.{table_name}
                     LIKE PARQUET '{file_path}' 
                     STORED AS PARQUET 
                     {f"LOCATION '{path}'" if external else ''};""")
        self._cursor.execute(query)


    def _fix_path(self, path):
        """Adds hdfs root to path."""
        if path[:len(self._HDFS_PATH)] != self._HDFS_PATH:
            path = self._HDFS_PATH + path
        return path
    
    def read_csv(self, path, **kwargs):
        """Wrapper around pandas.read_csv.
        Args:
          path (str):     Path to DISC location
          kwargs: Keyword arguments to be passed to pandas.read_csv
          """
        with self._hdfs_cnxn.open(path) as f:
            df = pd.read_csv(f, **kwargs)
        return df
    
    def to_csv(self, df, path, name=None, permissions=_default_permissions, **kwargs):
        """Save dataframe to DISC `path` in csv format.
        Args:
          df (pandas.DataFrame)
          path (str):     Path to DISC location
          permissions (str or None): posix representation or permission, give as oct string, e.g, '777'(default) or 0o777
          kwargs: Keyword arguments to be passed to pandas.to_csv"""
        if name is None:
            name = ntpath.basename(path)
            path = ntpath.dirname(path)
        token = secrets.token_hex(nbytes=8)
        local_file = f'{self.__TEMP_LOCAL_DIR}/{token}{name}'
        df.to_csv(local_file, **kwargs)
        self.upload_file(local_file=local_file, 
                         destination_file_path=f'{path}/{name}',
                         rm_local=True,
                         permissions=permissions)
        
    def read_excel(self, path, **kwargs):
        """Wrapper around pandas.read_excel"""
        with self._hdfs_cnxn.open(path) as f:
            df = pd.read_excel(f, **kwargs)
        return df
    
    def ls(self, path):
        """Wrapper around `self._hdfs_cnxn.ls`."""
        return self._hdfs_cnxn.ls(path)
        
    def listdir(self, path, full_path=False):
        """Wrapper around `self.ls`. The parameter `full_path` (False by default)
        allows to hide the root of the paths showing only content of selected path."""
        paths = self.ls(path)
        if not full_path:
            paths = [path.split('/')[-1] for path in paths]
        return paths

    def makedir(self, destination_path):
        """Wrapper around WebHDFS.makedir."""
        self._hdfs_cnxn.makedir(destination_path) 
    
    def read_parquet(self, path, **kwargs):
        """Wrapper aroud pandas.read_parquet.
        Args:
          path (str):     Path to DISC location
          kwargs: Keyword arguments to be passed to pandas.read_parquet
        """
        df = pd.read_parquet(path, 
                             filesystem=self._hdfs_cnxn, 
                             **kwargs)
        return df

    def to_parquet(self, df, path, permissions=_default_permissions, **kwargs):
        """Wrapper aroud pandas.DataFrame.to_parquet.
        Args:
          df (pandas.DataFrame)
          path (str):     Path to DISC location
          permissions (str or None): posix representation or permission, give as oct string, e.g, '777'(default) or 0o777
          kwargs: Keyword arguments to be passed to pandas.to_parquet
        """
        df.columns = df.columns.astype(str)
        df.to_parquet(path, filesystem=self._hdfs_cnxn, **kwargs) 
        if permissions: self._hdfs_cnxn.chmod(path, permissions)
        

    def to_feather(self, df, path, permissions=_default_permissions, **kwargs):
        """Wrapper around pandas.to_feather.
          Args:
            df (pandas.DataFrame)
            path (str):     Path to DISC location
            permissions (str or None): posix representation or permission, give as oct string, e.g, '777'(default) or 0o777
            kwargs: Keyword arguments to be passed to pandas.to_feather
        """
        name, path = ntpath.basename(path), ntpath.dirname(path)
        token = secrets.token_hex(nbytes=8)
        local_file = f'{self.__TEMP_LOCAL_DIR}/{token}{name}'
        df.to_feather(local_file, **kwargs)
        self.upload_file(local_file, f'{path}/{name}', 
                         rm_local=True,
                         permissions=permissions)

    
    def read_feather(self, path, **kwargs):
        """Wrapper around pandas.read_feather.
        Args:
          path (str):     Path to DISC location
          kwargs: Keyword arguments to be passed to pandas.read_feather
        """
        with self._hdfs_cnxn.open(path) as f:
            df = pd.read_feather(f, **kwargs)
        return df    
    
    def to_stata(self, df, path, permissions=_default_permissions, **kwargs):
        """Wrapper around pandas.to_stata.
          Args:
            df (pandas.DataFrame)
            path (str):     Path to DISC location
            permissions (str or None): posix representation or permission, give as oct string, e.g, '777'(default) or 0o777
            kwargs: Keyword arguments to be passed to pandas.to_stata
        """
        name, path = ntpath.basename(path), ntpath.dirname(path)
        token = secrets.token_hex(nbytes=8)
        local_file = f'{self.__TEMP_LOCAL_DIR}/{token}{name}'
        df.to_stata(local_file, **kwargs)
        self.upload_file(local_file, f'{path}/{name}', 
                         rm_local=True,
                         permissions=permissions)

    
    def read_stata(self, path, **kwargs):
        """Wrapper around pandas.read_stata.
        Args:
          path (str):     Path to DISC location
          kwargs: Keyword arguments to be passed to pandas.read_stata
        """
        with self._hdfs_cnxn.open(path) as f:
            df = pd.read_stata(f, **kwargs)
        return df
    
    
    def read_encrypted(self, path, password, **kwargs):
        """Wrapper aroud cryptpandas.read_encrypted.
        Args:
          path (str):     Path to DISC location
          password (str): Password for decryption 
          kwargs: Keyword arguments to be passed to cryptpandas.read_encrypted
        """

        token = secrets.token_hex(nbytes=8)
        local_file = f'{self.__TEMP_LOCAL_DIR}/encrypted_{token}'
        self._hdfs_cnxn.download(path, local_file)
        df = crpd.read_encrypted(local_file, password, **kwargs)
        
        return df
    
    
    def to_encrypted(self, df, path, password, permissions=_default_permissions, **kwargs):
        """Write a DataFrame as encrypted binary at specified DISC location.
        Args:
          df:       A pandas DataFrame
          path:     Path to DISC location
          password: Password for encryption
          permissions (str or None): posix representation or permission, give as oct string, e.g, '777'(default) or 0o777
          kwargs:   Keyword arguments to be passed to cryptpandas.to_encrypted
        """
        # Write the encrypted file
        token = secrets.token_hex(nbytes=8)
        name, root = ntpath.basename(path), ntpath.dirname(path)
        local_file = f'{self.__TEMP_LOCAL_DIR}/{token}_{name}'
        encrypted = crpd.to_encrypted(df, password=password, path=local_file, **kwargs)
        destination_file_path = f'{root}/{name}'
        self.upload_file(local_file, destination_file_path, 
                         rm_local=False, overwrite=True, permissions=permissions)

    
    def to_pickle(self, obj, path, protocol='HIGHEST_PROTOCOL', permissions=_default_permissions, **kwargs):
        """Saves to remote HDFS as pickle file.
        Args:
            path (str): the path of the file to be saved
            protocol (str or int): Either a strig ('HIGHEST_PROTOCOL' or 'DEFAULT_PROTOCOL') or an integer        
            permissions (str or None): posix representation or permission, give as oct string, e.g, '777'(default) or 0o777
        """        
        if isinstance(protocol, str):
            protocol = getattr(pickle, protocol)    
            
        with self._hdfs_cnxn.open(path, "wb") as f:
            pickle.dump(obj, f,  protocol=protocol, **kwargs)
        
        if permissions: self._hdfs_cnxn.chmod(path, permissions)          
            
    def read_pickle(self, path, **kwargs):
        with self._hdfs_cnxn.open(path, "rb") as f:
            obj = pickle.load(f, **kwargs)
        return obj
        
    def upload_file(self, local_file, destination_file_path, 
                    rm_local=False, overwrite=True, permissions=_default_permissions):
        """Uploads file to DISC.
        Args:
            local_file (str):  Path to local file to be uploaded.
            destination_file_path (str): Destination path.
            rm_local (bool): If True deletes local file after upload (default is False).
            overwrite (bool): If True overwrites file at destination (default is True).
            permissions (str or None): posix representation or permission, give as oct string, e.g, '777'(default) or 0o777
        """
        if overwrite:
            if self._hdfs_cnxn.exists(destination_file_path):
                self._hdfs_cnxn.rm(destination_file_path)
        self._hdfs_cnxn.upload(local_file, destination_file_path)
        if permissions: self._hdfs_cnxn.chmod(destination_file_path, permissions)          
        if rm_local:
            os.remove(local_file)
            
    def upload(self, local_path, destination_path):  
        """Uploads files and/or folders from `local_path` 
           onto the DISC `destination_path`"""
        if not self._hdfs_cnxn.exists(destination_path):
            self._hdfs_cnxn.mkdir(destination_path)
        if os.path.isdir(local_path):
            for root, dirs, files in [*os.walk(local_path)]:
                if root==local_path:
                    dest_path = destination_path
                else:
                    relpath = os.path.relpath(root, local_path)
                    dest_path = f'{destination_path}/{relpath}'
                for dir in dirs:
                    if not self._hdfs_cnxn.exists(f'{dest_path}/{dir}'):
                        self._hdfs_cnxn.mkdir(f'{dest_path}/{dir}')
                for file in files: 
                    self.upload_file(local_file=f'{root}/{file}', 
                  		          			 destination_file_path=f'{dest_path}/{file}', 
                  					           rm_local=False)
                 
        else:  # upload_file 
            self.upload_file(local_file=local_path, 
                             destination_file_path=destination_path, 
                             rm_local=False)
    
        
            
    def savefig(self, path, ax=None, **kwargs):
        """Saves matplotlib figure to DISC `path` destination."""
        import matplotlib.pyplot as plt
        if ax is not None:
            plt = ax
        token = secrets.token_hex(nbytes=8)
        local_file = f'{self.__TEMP_LOCAL_DIR}/{token}_fig.png'
        plt.savefig(local_file, **kwargs)
        self.upload_file(local_file=local_file, 
                         destination_file_path=path, 
                         rm_local=True)
        
    def make_vintage(self, origin, freq='month', overwrite=False, deep=False, exclude=[]):
        """
        Args:
            origin (str): The folder of which you want to store vintages. A new 'origin/VINTAGES' folder will be created.
            freq (str): Frequency with which to store vintages. Choose between year, month, day, max. Default is 'month'.
            overwrite (bool): Whether to overwrite already existing vintages. Default is `False`.
            deep (bool): If True stores as year/month/etc. Else in a single folder year_month_day_etc.
            exclude (list): List of items (files or directories) in origin, of which no vintaging should occur.
        """
        from datetime import datetime
        dtmt = datetime.today()
        _DAY, _MONTH, _YEAR = dtmt.day, dtmt.strftime('%b'), dtmt.year
        sep = '/' if deep else '_'
        vintage = f'{_YEAR}{sep}{_MONTH}'
        if freq=='year':
            vintage = f'{_YEAR}'
        elif freq=='day':
            vintage = f'{_YEAR}{sep}{_MONTH}{sep}{_DAY}'
        elif freq=='max':
            vintage = f'{_YEAR}{sep}{_MONTH}{sep}{_DAY}{sep}{dtmt.hour}h{dtmt.minute}m{dtmt.second}s'
    
        if self._hdfs_cnxn.exists(f'{origin}/VINTAGES/{vintage}') and not overwrite:
            raise PermissionError(f"Vintage 'VINTAGES/{vintage}' already exists at {origin}.\nTo override set `overwrite=True`.")
        else:
            self._hdfs_cnxn.mkdir(f'{origin}/VINTAGES/{vintage}')
    
        current = set(self.listdir(origin, full_path=False)) - ({'.', '..', 'VINTAGES'}|set(exclude))
    
        for item in current:
            self.hdfs_mv(f'{origin}/{item}', f'{origin}/VINTAGES/{vintage}/')
            
    
    def hdfs_mv(self, origin_path, destination_path):
        """Moves files/directories from one DISC location to another."""
#        if IS_WINDOWS:
        self._hdfs_cnxn.mv(origin_path, destination_path)
#        else:
#            (subprocess.Popen(f'hdfs dfs -mv {origin_path} {destination_path}',
#                         stdout=subprocess.PIPE, shell=True)
#                   .communicate())
    
    def read_sql(self, query, **kwargs):
        """Performs a sql query on disc.
        Args: 
            query (str): a SQL query.
            kwargs: Keyword arguments to be passed to pandas.read_sql.
        Returns: 
            pd.DataFrame
        """
        return pd.read_sql(query, con=self._cnxn, **kwargs)
    
    
    def __repr__(self):
        states = ('Not active', 'Active')
        sb, eb = ("\033[1m","\033[0;0m") if self._is_ipython else ('','')  # Bold
        sr, er = ("\x1b[31m", "\x1b[0m") if self._is_ipython else ('','')  # Red
        sg, eg = ("\033[92m", "\033[0m") if self._is_ipython else ('','')  # Green
        clr_str = lambda isc:(sg,eg) if isc else (sr,er)
        state_str = lambda isc:  f'{clr_str(isc)[0]}{states[isc]}{clr_str(isc)[1]}'
        _repr = f"\n{sb}DISC connection{eb}: {state_str(self._is_disc_connected)}"\
                f"\nEngine: {self._engine}"\
                f"\nSelected database: {self.db}"\
                f"\n{sb}Spark connection{eb}: {state_str(self._is_spark_connected)}"
        return _repr
        
        
    def _repr_html_(self):
        with open(f'{os.path.dirname(__file__)}/res/connector_mini.svg', 'r') as f: 
            _svg_cnn = f.read()
        states = ('Not active', 'Active')
        colors = ('#C82806', '#138F0B')
        bcolors = ('#FCD9D9', '#DBFCD9')
        html_repr = _svg_cnn + f"""</br>
        <span style="white-space: nowrap;">
        <b>DISC connection</b>:
        <span style="color:{colors[self._is_disc_connected]}; 
                     background-color:{bcolors[self._is_disc_connected]}"; 
        white-space: nowrap;>{states[self._is_disc_connected]}</span>
        </span></br>
        <span style="white-space: nowrap;">
        <span style="color: gray">Engine:</span>
        <span white-space: nowrap;>{self._engine}</span>
        </span></br>
        <span style="white-space: nowrap;">
        <span style="color: gray">Selected database:</span>
        <span white-space: nowrap;>{self.db}</span>
        </span></br>


        </br>
        <span style="white-space: nowrap;">
        <b>Spark Connection</b>:
        <span style="color:{colors[self._is_spark_connected]};
                            background-color:{bcolors[self._is_spark_connected]}"; 
        white-space: nowrap;>{states[self._is_spark_connected]}</span>
        </span>"""

        if self._is_spark_connected:
            html_repr +=f"""</br>
            <b><i>SparkContext</i></b></br>

            <a href="{self._spark_uri}">Spark UI</a></br>

            <span style="white-space: nowrap;">
            <span style="color: gray">Master:</span>
            <span white-space: nowrap;>{self.spark.sparkContext.master}</span>
            </span></br>
            <span style="white-space: nowrap;">
            <span style="color: gray">AppName:</span>
            <span white-space: nowrap;>{self.spark.sparkContext.appName}</span>
            </span></br>

            """
        html_repr += f"""</br></br>
                         <a href="{self.USER_GUIDE_URL}">
                         Need help? Check the documentation!</a>"""

        return html_repr
        
            
    def show_spark_conf(self):
        """Displays spark configurations."""
        if not self._is_spark_connected:
            print('Spark is not connected. To connect, try `disc.connect_spark()`')
        else:
            from IPython.core.display import HTML, display
            html_repr = f"""</br><b><i>Spark Configurations</i></b></br>"""
            confs = self.spark.sparkContext.getConf().getAll()
            for (cnf_k,cnf_v) in confs:
                html_repr += f"""
                   <span style="white-space: nowrap;">
                   <span style="color: gray">{cnf_k[6:]}:</span>
                   <span white-space: nowrap;>{cnf_v}</span>
                   </span></br>"""
            display(HTML(html_repr))

    def __log(self):
        try:
            _path = '/data/lab/dlb_ecb_public/share/_CONNECTORS_LOG'
            date = str(datetime.datetime.today().date())
            if date not in self.listdir(_path):
                self.to_pickle(0, f'{_path}/{date}/logs.p')
            else:
                L = self.read_pickle(f'{_path}/{date}/logs.p')
                self.to_pickle(L+1, f'{_path}/{date}/logs.p')
        except:
            pass

        
    def close(self, rm_local_temp=False):
        """Closes connection to DISC.
        Args:
           rm_local_temp (bool): Delete local temp folder. Default is False.
        """
        if rm_local_temp: shutil.rmtree(self.__TEMP_LOCAL_DIR, ignore_errors=True)
        self._cursor.close()
        self._cnxn.close()
        self._is_disc_connected = False
        self.stop_spark()
        if IS_WINDOWS:
            del self._hdfs_cnxn
        else:
            self._hdfs_cnxn.close()
        print('Closed connection to DISC.')
    
    def __del__(self):
        try:
            self.close()
        except:
            pass