Python HDFS.fileExists 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: beaver.component.hadoop

클래스/타입: HDFS

메소드/함수: fileExists

hotexamples.com에서의 예제들: 4

Python HDFS.fileExists - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 beaver.component.hadoop.HDFS.fileExists에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

createDirectory(23)

copyFromLocal(21)

deleteDirectory(17)

getDatanodes(8)

chmod(8)

getNamenodeByState(5)

cat(4)

getNameNodePrincipal(4)

getNamenode(4)

fileExists(3)

getHDFSUser(3)

getNamenodeHttpAddress(3)

getGateway(2)

getConfigValue(2)

deleteFile(2)

getNameServices(2)

getNNWebPort(1)

getNNWebAppAddress(1)

getJournalNodes(1)

getDataNodeIPCPort(1)

getDatanodesFromJmx(1)

getDatanodeCount(1)

getActiveNN(1)

formatNN(1)

disallowSnapshot(1)

deleteSnapshot(1)

createUserDirWithGroup(1)

copyToLocal(1)

chown(1)

getNamenodeHttpsAddress(1)

예제 #1

파일 보기

파일: test_cleanUpOfFiles.py 프로젝트: sds-logigear/LogigearSource

def insertFileIntoHdfs(fileName):
    pathFileName = '/user/' + HADOOPQA_USER + '/' + fileName    
    if (not(HDFS.fileExists(pathFileName))):
        sourceFile = DATA_PATH + '/' + fileName
        destFile = '/user/' + HADOOPQA_USER + '/' + fileName
        putCmd = "dfs -put " + sourceFile + ' ' + destFile
        out = Hadoop.run(putCmd)
        return out

예제 #2

파일 보기

 def verifyLongRunningJob(cls):
     '''
     Verify long running background job after it finishes
     :return:
     '''
     ruAssert(
         "Tez",
         YARN.getAppFinalStateFromID(
             cls._background_job_appId) == 'SUCCEEDED')
     for output in cls._hdfsOutputList:
         ruAssert("Tez", HDFS.fileExists(output + '/part*'))
     logger.info("**** Verified long running job for Tez ****")

예제 #3

파일 보기

파일: ruPig.py 프로젝트: thakkardharmik/beaver

    def verifyLongRunningJob(cls):
        '''
        Validate long running background job after end of all component upgrade
        '''
        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
        UpgradePerNode.reportProgress(
            "### Verifying Long running job for Pig ####")
        # Check if the Long Running process is not None
        if cls._process is not None:
            # If process poll() returns None, it means the process has not finished yet.
            if cls._process.poll() is None:
                UpgradePerNode.reportProgress(
                    "### Long running job for Pig has not Finished yet. Waiting for it to complete ####"
                )
                # Wait for _long_running_job_timeout_secs for Job to complete
                starttime = time.time()
                while (starttime - time.time() <
                       cls._long_running_job_timeout_secs
                       ) and cls._process.poll() is None:
                    time.sleep(5)

            exit_code = cls._process.poll()
            if exit_code is None:
                logger.info("Killing Pig Long running job process '%d'" %
                            cls._process.pid)
                Machine.killProcess(cls._process.pid)
                UpgradePerNode.reportProgress(
                    "### [FAILED][Pig][BGJob] Long running job for Pig Failed to finish ####"
                )
            elif exit_code != 0:
                UpgradePerNode.reportProgress(
                    "### [FAILED][Pig][BGJob] Long running job for Pig Failed and Exited with '%d' ####"
                    % exit_code)
            else:
                UpgradePerNode.reportProgress(
                    "### Long running job for Pig Finished ####")

            #Check for _SUCCESS file in HDFS Path
            if HDFS.fileExists(cls._hdfs_success_filepath, cls._job_user):
                UpgradePerNode.reportProgress(
                    "### [PASSED][Pig][BGJob] Found _SUCCESS file in HDFS for Pig Long running job ####"
                )
            else:
                UpgradePerNode.reportProgress(
                    "### [FAILED][Pig][BGJob] Not Found _SUCCESS file in HDFS for Pig Long running job. ####"
                )
        else:
            UpgradePerNode.reportProgress(
                "### [FAILED][Pig][BGJob] Long Running Pig Job Failed. No Process found ####"
            )

예제 #4

파일 보기

    def insertCSVDataViaCSVBuildLoad(cls,
                                     csvFile,
                                     tableName,
                                     putIntoHDFS=True,
                                     deleteAfterExec=True,
                                     runInBackground=False,
                                     user=None,
                                     config=None,
                                     optionAndParameter="",
                                     env=None,
                                     delimiter=",",
                                     arrayDelimiter=None,
                                     schema=None):
        """
          By default, the files will be allocated under /tmp/ folder in HDFS.
        """
        global ZK_ZPARENT
        if Slider.isSlider():
            ZK_ZPARENT = util.getPropertyValueFromConfigXMLFile(
                os.path.join(Config.get('hbase', 'HBASE_CONF_DIR'),
                             'hbase-site.xml'), "zookeeper.znode.parent")

        if Machine.isLinux():
            clientjar = Machine.find(user=Machine.getAdminUser(),
                                     host="localhost",
                                     filepath=PHOENIX_HOME,
                                     searchstr="phoenix-*[0-9]-client.jar",
                                     passwd=Machine.getAdminPasswd())
        else:
            clientjar = Machine.find(user=Machine.getAdminUser(),
                                     host="localhost",
                                     filepath=PHOENIX_HOME,
                                     searchstr="phoenix-*-client.jar",
                                     passwd=Machine.getAdminPasswd())

        if Machine.isWindows():
            clientjar = (clientjar[0].strip("\\localhost")).replace("$", ":")
            fileName = csvFile.split('\\')[-1]
        else:
            clientjar = clientjar[0]
            fileName = csvFile.split('/')[-1]

        # If we need to, we insert it into HDFS, since the library will take it from there.
        executingUser = (HADOOPQA_USER) if user is None else user

        if putIntoHDFS:
            if not HDFS.fileExists('/tmp/'):
                HDFS.mkdir('/tmp/')
            HDFS.copyFromLocal(csvFile, '/tmp/', executingUser, config,
                               optionAndParameter)

        hbaseConfDir = HBASE_CONF_DIR
        if Slider.isSlider():
            hbaseConfDir = Config.get('hbase', 'HBASE_CONF_DIR')

        classpath = hbaseConfDir

        finalCommand = "%s jar %s org.apache.phoenix.mapreduce.CsvBulkLoadTool --table %s  --input %s" \
                       % (HADOOP_CMD, clientjar, tableName, '/tmp/%s' % fileName)

        if schema is not None:
            finalCommand = '%s -schema %s' % (finalCommand, schema)

        if Machine.isWindows():
            os.environ['HADOOP_USER_CLASSPATH_FIRST'] = 'true'
            os.environ['HADOOP_CLASSPATH'] = classpath
            if delimiter != "," or arrayDelimiter != None:
                finalCommand = "%s -d `\\\"`%s`\\\" -a `\\\"`%s`\\\"" \
                               % (finalCommand, delimiter, arrayDelimiter.strip("'"))
            finalCommand = "%s --zookeeper %s" % (finalCommand, ZK_HOST)
            if runInBackground:
                exit_code = 0
                stdout = ''
                Machine.runinbackground(
                    finalCommand,
                    env=dict(env.items() + ENVIRONMENT.items()
                             if env is not None else ENVIRONMENT))
            else:
                exit_code, stdout = Machine.run(
                    finalCommand,
                    env=dict(env.items() + ENVIRONMENT.items()
                             if env is not None else ENVIRONMENT))
        else:
            # delimiter options
            if delimiter != "," or arrayDelimiter != None:
                finalCommand = "%s --delimiter %s --array-delimiter %s" % (
                    finalCommand, delimiter, arrayDelimiter)
            # ZKHosts options
            finalCommand = "%s --zookeeper %s" % (finalCommand,
                                                  cls.getZKConnectString())
            if runInBackground:
                exit_code = 0
                stdout = ''
                Machine.runinbackground(
                    "HADOOP_CLASSPATH=%s %s" % (classpath, finalCommand),
                    env=dict(env.items() + ENVIRONMENT.items()
                             if env is not None else ENVIRONMENT))
            else:
                exit_code, stdout = Machine.run(
                    "HADOOP_CLASSPATH=%s %s" % (classpath, finalCommand),
                    env=dict(env.items() + ENVIRONMENT.items()
                             if env is not None else ENVIRONMENT))

        # If selected, after insertion into HBase we will delete the csvFile from HDFS
        if deleteAfterExec and not runInBackground:
            # Does not work for "run in background" option
            HDFS.deleteFile('/tmp/%s' % fileName, executingUser)
        # return 0,""
        return exit_code, stdout