コード例 #1
0
ファイル: handle_HDFS.py プロジェクト: yuqi1129/learngit
    def __init__(self, url):
        '''

        :param url:HDFS名称节点的主机名或IP地址,以协议为前缀,其次是namenode上的WebHDFS端口,也可以指定多个URL以分号分隔以获取高可用性支持.
        '''
        # 实例化HDFS web client using Kerberos authentication
        self.client = KerberosClient(url)
コード例 #2
0
 def get_conn(self):
     """
     Returns a hdfscli InsecureClient object.
     """
     nn_connections = self.get_connections(self.webhdfs_conn_id)
     for nn in nn_connections:
         try:
             self.log.debug('Trying namenode %s', nn.host)
             connection_str = 'http://{nn.host}:{nn.port}'.format(nn=nn)
             if _kerberos_security_mode:
                 client = KerberosClient(connection_str)
             else:
                 proxy_user = self.proxy_user or nn.login
                 client = InsecureClient(connection_str, user=proxy_user)
             client.status('/')
             self.log.debug('Using namenode %s for hook', nn.host)
             return client
         except HdfsError as e:
             self.log.debug(
                 "Read operation on namenode {nn.host} "
                 "failed with error: {e}".format(**locals())
             )
     nn_hosts = [c.host for c in nn_connections]
     no_nn_error = "Read operations failed " \
                   "on the namenodes below:\n{}".format("\n".join(nn_hosts))
     raise AirflowWebHDFSHookException(no_nn_error)
コード例 #3
0
def transform(outDir, image, x, y, dt):
    plt.switch_backend('agg')
    plt.figure(figsize=(25, 15), dpi=100)
    p = Proj(proj='geos', h=satHeight, lon_0=satLongitude, sweep=satSweep)
    XX, YY = np.meshgrid(x, y)
    lons, lats = p(XX, YY, inverse=True)
    mH = Basemap(resolution='i', projection='lcc', area_thresh=1500,
                 width=1800 * 3000, height=1060 * 3000,
                 lat_1=38.5, lat_2=38.5,
                 lat_0=38.5, lon_0=-97.5)
    xH, yH = mH(lons, lats)
    rgb = image[1][:, :-1, :]
    rgb = rgb / 256.0
    colorTuple = rgb.reshape((rgb.shape[0] * rgb.shape[1]), 3)
    colorTuple = np.insert(colorTuple, 3, 1.0, axis=1)
    newmap = mH.pcolormesh(xH, yH, image[1][:, :, 0], color=colorTuple, linewidth=0)
    newmap.set_array(None)
    mH.drawstates()
    mH.drawcountries()
    mH.drawcoastlines()
    # plt.title('GOES-16 Pseudo Color\n%s' % dt.strftime('%B %d, %Y UTC'))
    buf = BytesIO()
    plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
    buf.seek(0)
    client = KerberosClient('http://hc.gps.stthomas.edu:50070')
    with client.write(outDir + '/TRANSFORM_' + image[0].split("/")[-1], overwrite=True) as writer:
        writer.write(buf.getvalue())
    buf.close()
コード例 #4
0
def execute_process(args):

    directory = args.directory
    linux_path = args.linuxPath
    file_name = args.fileName
    end_file_name = file_name.replace(".csv", "_done.csv")

    full_path = '{}{}/'.format(linux_path, directory)

    full_file_name_path = '{}{}'.format(full_path, end_file_name)
    with open('{}{}'.format(full_path, file_name), 'rb') as read:
        with open(full_file_name_path, 'wb') as file_write:
            reader = csv.reader(read,
                                delimiter=';',
                                quotechar='"',
                                quoting=csv.QUOTE_MINIMAL)
            for row in reader:
                new_row = [
                    data.replace("\n", " ").replace("\r", " ") for data in row
                ]
                wr = csv.writer(file_write, delimiter=';')
                wr.writerow(new_row)

    client = KerberosClient(args.webHdfs)
    client.upload(args.hdfsPath + directory,
                  full_file_name_path,
                  n_threads=5,
                  overwrite=True)
コード例 #5
0
 def get_conn(self):
     """
     Returns a hdfscli InsecureClient object.
     """
     nn_connections = self.get_connections(self.webhdfs_conn_id)
     for nn in nn_connections:
         try:
             self.log.debug('Trying namenode %s', nn.host)
             connection_str = 'http://{nn.host}:{nn.port}'.format(nn=nn)
             if _kerberos_security_mode:
                 client = KerberosClient(connection_str)
             else:
                 proxy_user = self.proxy_user or nn.login
                 client = InsecureClient(connection_str, user=proxy_user)
             client.status('/')
             self.log.debug('Using namenode %s for hook', nn.host)
             return client
         except HdfsError as e:
             self.log.debug(
                 "Read operation on namenode {nn.host} failed with error: {e}"
                 .format(**locals()))
     nn_hosts = [c.host for c in nn_connections]
     no_nn_error = "Read operations failed on the namenodes below:\n{}".format(
         "\n".join(nn_hosts))
     raise AirflowWebHDFSHookException(no_nn_error)
コード例 #6
0
 def get_conn(self):
     """
     Returns a hdfscli InsecureClient object.
     """
     nn_connections = self.get_connections(self.webhdfs_conn_id)
     for nn in nn_connections:
         try:
             logging.debug("Trying namenode {}".format(nn.host))
             connection_str = "http://{nn.host}:{nn.port}".format(nn=nn)
             if _kerberos_security_mode:
                 client = KerberosClient(connection_str)
             else:
                 proxy_user = self.proxy_user or nn.login
                 client = InsecureClient(connection_str, user=proxy_user)
             client.status("/")
             logging.debug("Using namenode {} for hook".format(nn.host))
             return client
         except HdfsError as e:
             logging.debug(
                 "Read operation on namenode {nn.host} failed with"
                 " error: {e.message}".format(**locals())
             )
     nn_hosts = [c.host for c in nn_connections]
     no_nn_error = "Read operations failed on the namenodes below:\n{}".format(
         "\n".join(nn_hosts)
     )
     raise Exception(no_nn_error)
コード例 #7
0
 def process():
     for key in urlMaping.keys():
         client = KerberosClient(urlMaping[key], root=root, proxy=proxy)
         try:
             client.list("/")
             return client
         except:
             continue
コード例 #8
0
def save_file_hdfs(rdd, dir_files_pdf, server_hdfs, user_name_hdfs):
    n_file_id = int(rdd[0])
    n_info_tec = rdd[1].replace("/", "-")
    n_file = rdd[2]
    hdfsclient = KerberosClient(server_hdfs)
    hdfsclient.write(os.path.join(dir_files_pdf,
                                  '{}_{}.pdf'.format(n_file_id, n_info_tec)),
                     n_file,
                     overwrite=True)
    return rdd
コード例 #9
0
def hdfs_connect_demo():

    # NOTE 底层会调用 kinit
    with krbContext(using_keytab=True,
                    principal='*****@*****.**',
                    keytab_file='/houleilei.client.keytab'):
        client = KerberosClient('http://hadoop01.stor:50070',
                                hostname_override='hadoop01.stor')
        # client = InsecureClient('http://hadoop01.stor:50070', user='******')
        result = client.list('/home/holyzing/')
        print(type(result), result)
コード例 #10
0
        def testip(ip, root=None, proxy=None):
            print ip

            if ip == '':
                return process()
            else:
                client = KerberosClient(urlMaping[ip], root=root, proxy=proxy)
                try:
                    print 'test %s' % urlMaping[ip]
                    client.list("/")
                    return client
                except:

                    return process()
コード例 #11
0
ファイル: completer.py プロジェクト: trams/hdfs-completer
def get_client(host, use_kerberos):
    if use_kerberos:
        from hdfs.ext.kerberos import KerberosClient
        return KerberosClient(host)
    else:
        from hdfs.client import Client
        return Client(host)
コード例 #12
0
ファイル: hdfs_hdfscli.py プロジェクト: lbtanh/dbnd
    def client(self):  # type ()-> WebHDFS
        if self.client_type == WebHdfsClientType.KERBEROS:
            from hdfs.ext.kerberos import KerberosClient

            return KerberosClient(
                url=self.url,
                mutual_authentication=self.mutual_authentication,
                service=self.service,
                delegate=self.delegate,
                force_preemptive=self.force_preemptive,
                principal=self.principal,
                hostname_override=self.hostname_override,
                sanitize_mutual_error_response=self.
                sanitize_mutual_error_response,
                send_cbt=self.send_cbt,
            )

        elif self.client_type == WebHdfsClientType.INSECURE:
            from hdfs import InsecureClient

            return InsecureClient(url=self.url, user=self.user)

        elif self.client_type == WebHdfsClientType.TOKEN:
            from hdfs import TokenClient

            return TokenClient(url=self.url, token=self.token)
        else:
            raise Exception("WebHdfs client type %s does not exist" %
                            self.client_type)
コード例 #13
0
def hdfs_client_ini(conf):
    _conf = conf
    _url = ''
    _nodes = []

    for _node in _conf['namenodes']:
        _nodes.append('http://' + str(_node) + ':' + str(_conf['port']))
    _url = ';'.join(_nodes)

    if os.path.isfile(_conf['keytab']):
        _conf_keytab = _conf['keytab']
    else:
        _conf_keytab = str(os.path.dirname(
            os.path.realpath(__file__))) + os.sep + str(_conf['keytab'])

    try:
        os.environ["KRB5_CLIENT_KTNAME"] = _conf_keytab
    except Exception as _err:
        print('ERR: [initiator:hdfs_client_ini]', _err)
        return False

    try:
        _kerberos_auth = HTTPKerberosAuth(principal=_conf['principal'])
    except Exception as _err:
        print('ERR: [initiator:hdfs_client_ini]', _err)
        return False
    else:
        try:
            _client = KerberosClient(_url)
        except Exception as _err:
            print('ERR: [initiator:hdfs_client_ini]', _err)
            return False
        else:
            return _client
コード例 #14
0
    def run(self):

        # 前处理
        tc = BeforeHandler(self.__args, self.__col_info, self.__db_info,
                           self.__props)
        ret = tc.run()
        if ret != 0:
            LOG.error("加载前处理失败")
            return ret

        # 加载处理

        # 上传文件到指定hdfs目录
        HDFS_WORK_DIR = "{0}/{1}".format(self.__args.loaddir,
                                         self.__args.table)
        #put_cmd = "KRB5_CONFIG={0}" \
        #          " && kinit -kt {1} {2}" \
        #          " && hadoop fs -rm -r -f {3}" \
        #          " && hadoop fs -mkdir -p {3}" \
        #          " && hadoop fs -put {4} {5}"\
        #    .format(self.__args.krbfile, self.__args.ktfile, self.__args.ktuser, HDFS_WORK_DIR, self.__args.srcfile, HDFS_WORK_DIR)

        #LOG.info("HDFS PUT CMD[{0}]".format(put_cmd))
        #ret = os.system(put_cmd)
        #if ret != 0:
        #    LOG.error("上传文件到hdfs失败")
        #    return -1

        print "AAA:{0}".format(HDFS_WORK_DIR)
        try:
            # 建立连接
            hdfs_client = KerberosClient(self.__args.nnurl,
                                         principal="{0}".format(
                                             self.__args.ktuser))

            # 删除历史目录
            hdfs_client.delete(HDFS_WORK_DIR, recursive=True)

            # 创建新的目录
            hdfs_client.makedirs(HDFS_WORK_DIR)

            # 上传文件到HDFS
            hdfs_client.upload(HDFS_WORK_DIR, self.__args.srcfile)
        except:
            traceback.print_exc()
            LOG.error("数据加载失败")
            return -1

        LOG.info("数据加载成功")

        # 后处理
        tc = AfterHandler(self.__args, self.__db_info, self.__props)
        ret = tc.run()
        if ret != 0:
            LOG.error("加载后处理失败")
            return ret
        return 0
コード例 #15
0
 def __get_hdfs_client(self):
     # hdfs_host = "http://10.72.59.89:50070"
     # user = "******"
     if self.is_kerberos:
         cli = KerberosClient(url=hdfs_host)
     else:
         cli = client.InsecureClient(url=hdfs_host, user=user)
     return cli
コード例 #16
0
def get_client(namenode_url: str) -> KerberosClient:
    """Thin wrapper around KerberosClient

    Parameters
    ----------
    namenode_url: The url of the namenode. Should include protocol (http/https) and port
    """
    return KerberosClient(namenode_url)
コード例 #17
0
ファイル: __init__.py プロジェクト: tvial/ibis
def hdfs_connect(host='localhost',
                 port=50070,
                 protocol='webhdfs',
                 use_https='default',
                 auth_mechanism='NOSASL',
                 verify=True,
                 **kwds):
    """
    Connect to HDFS

    Parameters
    ----------
    host : string, Host name of the HDFS NameNode
    port : int, NameNode's WebHDFS port (default 50070)
    protocol : {'webhdfs'}
    use_https : boolean, default 'default'
        Connect to WebHDFS with HTTPS, otherwise plain HTTP. For secure
        authentication, the default for this is True, otherwise False
    auth_mechanism : string, Set to NOSASL or PLAIN for non-secure clusters.
        Set to GSSAPI or LDAP for Kerberos-secured clusters.
    verify : boolean, Set to False to turn off verifying SSL certificates.
        (default True)

    Other keywords are forwarded to hdfs library classes

    Returns
    -------
    client : WebHDFS
    """
    import requests
    session = kwds.setdefault('session', requests.Session())
    session.verify = verify
    if auth_mechanism in ['GSSAPI', 'LDAP']:
        if use_https == 'default':
            prefix = 'https'
        else:
            prefix = 'https' if use_https else 'http'
        try:
            import requests_kerberos
        except ImportError:
            raise IbisError(
                "Unable to import requests-kerberos, which is required for "
                "Kerberos HDFS support. Install it by executing `pip install "
                "requests-kerberos` or `pip install hdfs[kerberos]`.")
        from hdfs.ext.kerberos import KerberosClient
        # note SSL
        url = '{0}://{1}:{2}'.format(prefix, host, port)
        kwds.setdefault('mutual_auth', 'OPTIONAL')
        hdfs_client = KerberosClient(url, **kwds)
    else:
        if use_https == 'default':
            prefix = 'http'
        else:
            prefix = 'https' if use_https else 'http'
        from hdfs.client import InsecureClient
        url = '{0}://{1}:{2}'.format(prefix, host, port)
        hdfs_client = InsecureClient(url, **kwds)
    return WebHDFS(hdfs_client)
コード例 #18
0
 def generate_temp_files(need_certificate=NEED_CERTIFICATE):
     if need_certificate:
         with krbcontext(using_keytab=True,
                         keytab_file=KEYTAB_PATH,
                         principal=PRINCIPAL):
             for node in HDFS.NODES:
                 try:
                     hdfs_client = KerberosClient(node)
                     hdfs_client.download(HDFS.REMOTE_PATH,
                                          HDFS.LOCAL_PATH,
                                          n_threads=HDFS.THREAD_NUM)
                 except Exception as err:
                     logging.info(err)
                 else:
                     return
             logging.error("Failed to download remote HDFS file.")
             raise Exception("Failed to download remote HDFS file.")
     else:
         for node in HDFS.NODES:
             try:
                 hdfs_client = Client(node)
                 hdfs_client.download(HDFS.REMOTE_PATH,
                                      HDFS.LOCAL_PATH,
                                      n_threads=HDFS.THREAD_NUM)
             except Exception as err:
                 logging.info(err)
             else:
                 return
         logging.error("Failed to download remote HDFS file.")
         raise Exception("Failed to download remote HDFS file.")
コード例 #19
0
def initialize_hdfs_client(url):
    global client
    if not client:
        session = Session()
        session.verify = False
        if kerberos['enabled']:
            client = KerberosClient(url, session=session)
        else:
            client = InsecureClient(url, user=hdfs['user'], session=session)
コード例 #20
0
ファイル: webhdfs.py プロジェクト: harishjami1382/test2
    def _get_client(self, connection):
        connection_str = 'http://{host}:{port}'.format(host=connection.host, port=connection.port)

        if _kerberos_security_mode:
            client = KerberosClient(connection_str)
        else:
            proxy_user = self.proxy_user or connection.login
            client = InsecureClient(connection_str, user=proxy_user)

        return client
コード例 #21
0
def addMap(outDir, image, satLongitude, xmin, xmax, ymin, ymax, dt):
    plt.switch_backend('agg')
    plt.figure(figsize=(25, 15), dpi=100)
    m = Basemap(projection='geos', lon_0=satLongitude,
                resolution='i', area_thresh=1000,
                llcrnrx=xmin, llcrnry=ymin,
                urcrnrx=xmax, urcrnry=ymax)
    m.imshow(np.flipud(image[1]))
    m.drawcoastlines()
    m.drawcountries()
    m.drawstates()
    # plt.title('GOES-16 Pseudo Color\n%s' % dt.strftime('%B %d, %Y UTC'))
    buf = BytesIO()
    plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
    buf.seek(0)
    client = KerberosClient('http://hc.gps.stthomas.edu:50070')
    with client.write(outDir + '/MAP_' + image[0].split("/")[-1], overwrite=True) as writer:
        writer.write(buf.getvalue())
    buf.close()
コード例 #22
0
ファイル: webhdfs.py プロジェクト: ysktir/airflow-1
    def _get_client(self, connection: Connection) -> Any:
        connection_str = f'http://{connection.host}:{connection.port}'

        if _kerberos_security_mode:
            client = KerberosClient(connection_str)
        else:
            proxy_user = self.proxy_user or connection.login
            client = InsecureClient(connection_str, user=proxy_user)

        return client
コード例 #23
0
ファイル: context.py プロジェクト: mariusvniekerk/impyla
 def hdfs_client(self):
     url = 'http://{nn_host}:{webhdfs_port}'.format(
         nn_host=self._nn_host, webhdfs_port=self._webhdfs_port)
     if self._kerberized:
         from hdfs.ext.kerberos import KerberosClient
         client = KerberosClient(url, mutual_auth='REQUIRED')
     else:
         from hdfs.client import InsecureClient
         client = InsecureClient(url, user=self._hdfs_user)
     return client
コード例 #24
0
ファイル: webhdfs.py プロジェクト: kosteev/airflow
    def _get_client(self, namenode: str, port: int, login: str, extra_dejson: dict) -> Any:
        connection_str = f'http://{namenode}:{port}'
        session = requests.Session()

        if extra_dejson.get('use_ssl', False):
            connection_str = f'https://{namenode}:{port}'
            session.verify = extra_dejson.get('verify', True)

        if _kerberos_security_mode:
            return KerberosClient(connection_str, session=session)
        proxy_user = self.proxy_user or login
        return InsecureClient(connection_str, user=proxy_user, session=session)
コード例 #25
0
 def client(self):
     # A naive benchmark showed that 1000 existence checks took 2.5 secs
     # when not recreating the client, and 4.0 secs when recreating it. So
     # not urgent to memoize it. Note that it *might* be issues with process
     # forking and whatnot (as the one in the snakebite client) if we
     # memoize it too trivially.
     if self.client_type == 'kerberos':
         from hdfs.ext.kerberos import KerberosClient
         return KerberosClient(url=self.url)
     else:
         import hdfs
         return hdfs.InsecureClient(url=self.url, user=self.user)
コード例 #26
0
    def get_model(self):
        client = KerberosClient(settings.DUNANT_HDFS_PATH)

        MODEL_DIR = settings.DUNANT_MODEL_DIR
        MOST_RECENT_MODEL = sorted(client.list(MODEL_DIR))[-1]

        MODEL_PARAMETERS_PATH = f'{MODEL_DIR}/{MOST_RECENT_MODEL}/model'
        MLB_PATH = f'{MODEL_PARAMETERS_PATH}/mlb_binarizer.pkl'
        VECTORIZER_PATH = f'{MODEL_PARAMETERS_PATH}/vectorizer.pkl'
        CLASSIFIER_PATH = f'{MODEL_PARAMETERS_PATH}/model.pkl'

        # For pickle to be able to unpickle, the class must be present in the
        # same import structure as when it was pickled.
        # Manually setting sys.modules to mimic the expected import structure
        sys.modules['models'] = classifiers

        # Latin1 encoding required to convert Python2 pickle to Python3
        with client.read(MLB_PATH) as r:
            mlb = pickle.loads(r.read(), encoding="latin1")
        with client.read(VECTORIZER_PATH) as r:
            vectorizer = pickle.loads(r.read(), encoding="latin1")
        with client.read(CLASSIFIER_PATH) as r:
            clf = pickle.loads(r.read(), encoding="latin1")
        del sys.modules['models']

        return mlb, vectorizer, clf
コード例 #27
0
    def create(self):
        """
        Creates webhdfs client instance.
        Concrete implementation depends on a client_type parameter,
        if it's kerberos, then KerberosClient is created, otherwise InsecureClient.

        :return hdfs client:
        """
        if self.client_type == 'kerberos':
            from hdfs.ext.kerberos import KerberosClient
            return KerberosClient(url=self.url)
        else:
            return hdfs.InsecureClient(url=self.url, user=self.user)
コード例 #28
0
def get_hdfs_client(is_kerberos=False):
    """

    :return: client of hdfs
    """
    # hdfs_host = "http://10.18.0.28:50070"
    # user="******"
    if is_kerberos:
        cli = KerberosClient(url=hdfs_host)
    else:
        cli = client.InsecureClient(url=hdfs_host, user=user)
        client.Client
    return cli
コード例 #29
0
 def __init__(self,
              hdfs_urls,
              path_hdfs='./',
              max_file_size=MAX_FILE_SIZE,
              max_process=4,
              log_level='INFO'):
     """
     :param hdfs_url list[str]: hdfs url (ex: ['X'])
     :param path_hdfs str: path to write file in HDFS
     :param max_file_size int: limit size before create a new file and save the current file to hdfs (compressed)
     :param max_process int: number of subprocess to compress and write file in HDFS (max_process > 0)
     :param log_level str: logger level
     """
     # Config logger
     formatter = logging.Formatter(
         "%(asctime)s - %(name)s - %(levelname)s - %(message)s")
     stream_handler = logging.StreamHandler()
     stream_handler.setFormatter(formatter)
     self.logger = logging.getLogger('WriteHdfs')
     self.logger.addHandler(stream_handler)
     self.logger.setLevel(log_level)
     # Config signal exit
     signal.signal(signal.SIGINT, self.__signal_handler)
     # Try to find the active namenode in the list
     for hdfs_url in hdfs_urls:
         try:
             hdfs_client = KerberosClient(hdfs_url)
             hdfs_client.list(path_hdfs)
             self.hdfs_url = hdfs_url
             self.logger.info('identify namenode: %s' % hdfs_url)
             break
         except hdfs.util.HdfsError:
             continue
     self.path_hdfs = path_hdfs
     self.max_process = max_process
     # Files settings
     self.file_size = 0
     self.file_name = self.__generate_file_name()
     self.max_file_size = max_file_size
コード例 #30
0
ファイル: webhdfs_hook.py プロジェクト: hoanghw/airflow
 def get_conn(self):
     """
     Returns a hdfscli InsecureClient object.
     """
     nn_connections = self.get_connections(self.webhdfs_conn_id)
     for nn in nn_connections:
         try:
             logging.debug('Trying namenode {}'.format(nn.host))
             connection_str = 'http://{nn.host}:{nn.port}'.format(nn=nn)
             if _kerberos_security_mode:
               client = KerberosClient(connection_str)
             else:
               client = InsecureClient(connection_str)
             client.content('/')
             logging.debug('Using namenode {} for hook'.format(nn.host))
             return client
         except HdfsError as e:
             logging.debug("Read operation on namenode {nn.host} failed with"
                           " error: {e.message}".format(**locals()))
     nn_hosts = [c.host for c in nn_connections]
     no_nn_error = "Read operations failed on the namenodes below:\n{}".format("\n".join(nn_hosts))
     raise AirflowWebHDFSHookException(no_nn_error)
コード例 #31
0
    def __write_to_hdfs(hdfs_url, path_hdfs, file_name, logger):
        """
        - Compress local file with ZLIB
        - Put compressed file on HDFS
        - Remove local files
        :param path_hdfs str: 
        :param file_name str: 
        """
        logger.debug('Start process __write_to_hdfs for file: %s' % file_name)
        # Compress file
        file_name_zlib = '%s.gz' % file_name
        with open(file_name, 'rb') as f_in:
            with open(file_name_zlib, 'wb') as f_out:
                f_out.write(zlib.compress(f_in.read()))

        # Write file to HDFS
        try:
            hdfs_client = KerberosClient(hdfs_url)
        except hdfs.util.HdfsError as e:
            logger.error('Error during HDFS connection, wait...: %s' % e)
            time.sleep(10)
            WriteHdfs.__write_to_hdfs(hdfs_url, path_hdfs, file_name, logger)
            return

        file_name_hdfs = file_name_zlib.replace('.tmp', '')
        file_path_hdfs = '%s/%s' % (path_hdfs, file_name_hdfs)
        try:
            hdfs_client.upload(file_path_hdfs, file_name_zlib)
        except hdfs.util.HdfsError as e:
            logger.error('Error during HDFS write, wait...: %s' % e)
            time.sleep(10)
            WriteHdfs.__write_to_hdfs(hdfs_url, path_hdfs, file_name, logger)
            return

        # Remove tmp files
        os.remove(file_name)
        os.remove(file_name_zlib)
        logger.debug('End process __write_to_hdfs for file: %s' % file_name)
コード例 #32
0
ファイル: webhdfs.py プロジェクト: suiting-young/airflow
    def _get_client(self, connection: Connection) -> Any:
        connection_str = f'http://{connection.host}:{connection.port}'
        session = requests.Session()

        if connection.extra_dejson.get('use_ssl', False):
            connection_str = f'https://{connection.host}:{connection.port}'
            session.verify = connection.extra_dejson.get('verify', True)

        if _kerberos_security_mode:
            client = KerberosClient(connection_str, session=session)
        else:
            proxy_user = self.proxy_user or connection.login
            client = InsecureClient(connection_str,
                                    user=proxy_user,
                                    session=session)

        return client
コード例 #33
0
ファイル: hdfs.py プロジェクト: chuxi/wikidown-scala
from hdfs.ext.kerberos import KerberosClient

if __name__ == "__main__":
    client = KerberosClient("http://10.214.208.11:9000")
    client.list("/")
    pass