Exemple #1
0
def basic():
	client = HdfsClient(hosts='study:50070')
	print(client.list_status('/'))

	print '判断某个路径是否存在'
	print client.exists("/test")
	print client.exists("/data/gz/thrift-0.9.2.tar.gz")

	client = HdfsClient(hosts='study:50070')
	print client.get_file_checksum("/data/gz/bison-2.5.1.tar.gz")

	summary = client.get_content_summary("/")
	print summary

	#文件拷贝--从HDFS拷贝到本地磁盘系统
	client.copy_to_local("/data/gz/pip-7.1.2.tar.gz","/root/data/pip-7.1.2.tar.gz")
	#文件拷贝--从本地磁盘系统拷贝到HDFS系统中
	client.copy_from_local("/root/data/thrift-0.9.2.tar.gz","/data/gz/thrift-0.9.2.tar.gz")

	print client.get_home_directory()
Exemple #2
0
    logger_requests = logging.getLogger('requests')
    logger_requests.setLevel(logging.ERROR)

    constants = Constants()
    logger.info('constants loaded')
    logger.info('init : hosts_hdfs   = %s' % hosts_hdfs)
    logger.info('init : dataroot     = %s' % dir_dataroot)
    logger.info('init : path main    = %s' % path_main)
    logger.info('init : path codemap = %s' % path_codemap)
    client = HdfsClient(hosts=hosts_hdfs)
    logger.info('connect hdfs')
    logger.info('---- start working ----')
    #     type='DIRECTORY'   type='FILE'
    while 1:
        list_dirs = [
            x['pathSuffix'] for x in client.list_status(dir_dataroot)
            if x['type'] == 'DIRECTORY'
        ]
        list_dirs.sort()
        for subdir in list_dirs:
            dir_subdata = os.path.join(dir_dataroot, subdir)
            logger.debug('data path : %s' % dir_subdata)
            dir_subdata_cleaned = os.path.join(dir_subdata, 'cleaned4netsec')
            logger.debug('data path for cleaned files : %s' %
                         dir_subdata_cleaned)
            list_subdir_date = [
                x['pathSuffix'] for x in client.list_status(dir_subdata)
                if x['type'] == 'FILE'
            ]
            if len(list_subdir_date) > 0:
                if not client.exists(dir_subdata_cleaned):
Exemple #3
0
def yesterday():
    return today() - datetime.timedelta(days=1)


# 执行主方法
if __name__ == '__main__':
    print "监控HDFS......"
    yesterday_datetime_format = yesterday()
    for table in CHECK_TABLE:
        is_success = False
        has_data = False
        content = ""
        try:
            path = ROOT_DIR + table + "/" + str(yesterday_datetime_format)
            if client.exists(path):
                client_list = client.list_status(path)
                for file_status in client_list:
                    if (file_status.get("pathSuffix").startswith('part-')) and (int(file_status.get("length")) > 0):
                        has_data = True
                    elif file_status.get("pathSuffix").__eq__("_SUCCESS"):
                        is_success = True
            else:
                content = "异常信息:HDFS路径不存在 <br>" + \
                          str("HDFS路径:") + path
        except Exception, e:
            content = "异常信息:" + str(e) + "<br>" + \
                      str("HDFS路径:") + path

        if (content == "") and (not is_success):
            content = "异常信息:" + table + "相关job运行失败" + "<br>" + \
                      str("HDFS路径:") + path