def get_conn(self): """ Returns a hdfscli InsecureClient object. """ nn_connections = self.get_connections(self.webhdfs_conn_id) for nn in nn_connections: try: self.log.debug('Trying namenode %s', nn.host) connection_str = 'http://{nn.host}:{nn.port}'.format(nn=nn) if _kerberos_security_mode: client = KerberosClient(connection_str) else: proxy_user = self.proxy_user or nn.login client = InsecureClient(connection_str, user=proxy_user) client.status('/') self.log.debug('Using namenode %s for hook', nn.host) return client except HdfsError as e: self.log.debug( "Read operation on namenode {nn.host} failed with error: {e}" .format(**locals())) nn_hosts = [c.host for c in nn_connections] no_nn_error = "Read operations failed on the namenodes below:\n{}".format( "\n".join(nn_hosts)) raise AirflowWebHDFSHookException(no_nn_error)
def get_conn(self): """ Returns a hdfscli InsecureClient object. """ nn_connections = self.get_connections(self.webhdfs_conn_id) for nn in nn_connections: try: self.log.debug('Trying namenode %s', nn.host) connection_str = 'http://{nn.host}:{nn.port}'.format(nn=nn) if _kerberos_security_mode: client = KerberosClient(connection_str) else: proxy_user = self.proxy_user or nn.login client = InsecureClient(connection_str, user=proxy_user) client.status('/') self.log.debug('Using namenode %s for hook', nn.host) return client except HdfsError as e: self.log.debug( "Read operation on namenode {nn.host} " "failed with error: {e}".format(**locals()) ) nn_hosts = [c.host for c in nn_connections] no_nn_error = "Read operations failed " \ "on the namenodes below:\n{}".format("\n".join(nn_hosts)) raise AirflowWebHDFSHookException(no_nn_error)
def get_conn(self): """ Returns a hdfscli InsecureClient object. """ nn_connections = self.get_connections(self.webhdfs_conn_id) for nn in nn_connections: try: logging.debug("Trying namenode {}".format(nn.host)) connection_str = "http://{nn.host}:{nn.port}".format(nn=nn) if _kerberos_security_mode: client = KerberosClient(connection_str) else: proxy_user = self.proxy_user or nn.login client = InsecureClient(connection_str, user=proxy_user) client.status("/") logging.debug("Using namenode {} for hook".format(nn.host)) return client except HdfsError as e: logging.debug( "Read operation on namenode {nn.host} failed with" " error: {e.message}".format(**locals()) ) nn_hosts = [c.host for c in nn_connections] no_nn_error = "Read operations failed on the namenodes below:\n{}".format( "\n".join(nn_hosts) ) raise Exception(no_nn_error)
file_list = subprocess.check_output([hive_script_name,hive_db_a,hive_db_b]) file_list_arr = file_list.split(' ') py_logger.info("hive execution completed") client = KerberosClient(hdfs_url) s3 = session.client('s3',use_ssl=False, verify=False) counter = 0 for file_path in file_list_arr: file_path = source_directory + file_path status = client.status(file_path, strict=False) if bool(status): file_name = os.path.basename(file_path) key_name = s3_folder_name + file_name with client.read(file_path) as f: s3.upload_fileobj(f, bucket_name, key_name) client.delete(file_path, recursive=False, skip_trash=True) counter = counter + 1 py_logger.info("File: " + file_path + " moved to s3 bucket") py_logger.info("S3 script execution completed. No.of Files moved: " + str(counter)) #Compresses the log files which are greater than 30 days