Beispiel #1
0
    def delete(self, request, *args, **kwargs):

        file_id = request.data.get('file_id')
        where = DataSource.objects.get(id=file_id).where
        if where == 'hdfs':
            file = DataSource.objects.get(id=file_id)
            hdfs_name = DataSource.objects.get(id=file_id).format_filename
            client = Client(HDFS_HOST)
            client.delete('/datahoop/' + hdfs_name, recursive=True)
            file.delete()
        else:
            client = pymongo.MongoClient(settings.MONGO_DB_HOST, settings.MONGO_DB_PORT)
            db = client.datahoop.data
            file_id = DataSource.objects.filter(id=id).first()
            obj_id = file_id.obj_id
            file_id.delete()
            db.remove({"_id": ObjectId(obj_id)})
            client.close()
        return HttpResponse(content_type='application/json')
Beispiel #2
0
 def get(self, request):  # delete mydata
     file_id = request.GET.get('file_id')
     try:
         where = DataSource.objects.get(id=file_id).where
         print(DataSource.objects.get(id=file_id))
         print(where)
         format_filename = DataSource.objects.get(
             id=file_id).format_filename
         format_name_count = DataSource.objects.filter(
             format_filename=format_filename).count()
         if where == 'hdfs' and format_name_count == 1:
             file = DataSource.objects.get(id=file_id)
             hdfs_name = DataSource.objects.get(id=file_id).format_filename
             client = Client(HDFS_HOST)
             client.delete('/datahoop/' + hdfs_name, recursive=True)
             file.delete()
             item = Collect.objects.filter(file_id=file_id)
             if item:
                 item.delete()
         elif where == 'hdfs' and format_name_count > 1:
             file = DataSource.objects.get(id=file_id)
             file.delete()
             item = Collect.objects.filter(file_id=file_id)
             if item:
                 item.delete()
         else:
             client = pymongo.MongoClient(settings.MONGO_DB_URI)
             db = client.datahoop.data
             data_obj = DataSource.objects.filter(id=file_id).first()
             obj_id = data_obj.obj_id
             data_obj.delete()
             db.remove({"_id": ObjectId(obj_id)})
             client.close()
             item = Collect.objects.filter(file_id=file_id)
             if item:
                 item.delete()
         return JsonResponse({'status': True})
     except:
         return JsonResponse({'status': False})
def train(train_path,
          test_path,
          output_path,
          target,
          train_split_ratio=0.33,
          penalty='l2',
          dual=False,
          tol=1e-4,
          C=1.0,
          random_state=None,
          multi_class='ovr'):
    # 设置起始时间
    time.localtime()
    time_trains_start = time.strftime('%Y{y}%m{m}%d{d} %H{h}%M{f}%S{s}'.format(
        y='/', m='/', d='', h=':', f=':', s=''))
    start_time = time.time()

    # 设置输入文件路径
    train_FILENAME = train_path + "/data/Data.csv"  # hdfs文件路径
    test_FILENAME = test_path + "/data/Data.csv"  # hdfs文件路径
    client = Client(HDFS_HOSTS1)
    # 训练数据读取
    with client.read(train_FILENAME) as tr_s:
        tr_content = tr_s.read()
        tr_s = str(tr_content, 'utf-8')
    # 确保文件写入完毕
    tr_file = open("trainData.csv", "w")
    tr_file.flush()
    os.fsync(tr_file)
    tr_file.write(tr_s)
    tr_file.close()
    df_train = pd.read_csv("trainData.csv", header=0)
    print(df_train)

    # 测试数据读取
    with client.read(test_FILENAME) as te_fs:
        te_content = te_fs.read()
        te_s = str(te_content, 'utf-8')
    # 确保文件写入完毕
    te_file = open("testData.csv", "w")
    te_file.flush()
    os.fsync(te_file)
    te_file.write(te_s)
    te_file.close()
    df_test = pd.read_csv("testData.csv", header=0)
    print(df_test)

    min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-1, 1))
    test_data_num = df_train.shape[0]
    train_data_num = df_train.shape[0]

    # 处理预测集
    df_test = min_max_scaler.fit_transform(df_test)
    df_test = np.array(df_test)

    # 数据处理和清洗
    cols = [tmp_i for tmp_i in df_train.columns if tmp_i not in [target]]
    X = df_train[cols]

    X = np.array(X)
    X = min_max_scaler.fit_transform(X)
    Y = df_train[target]
    Y = np.array(Y)

    # 训练集数据分割
    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, test_size=train_split_ratio)

    # 使用 scikit learn 中的LR模型进行训练
    clf = LogisticRegression(penalty,
                             dual,
                             tol,
                             C,
                             random_state,
                             multi_class,
                             solver='liblinear')
    clf.fit(X_train, Y_train)

    # 准确率train_acc
    train_acc = clf.score(X_test, Y_test)
    print('score Scikit learn: ', train_acc)
    # 精确率train_precision_score
    train_precision_score = precision_score(Y_test, clf.predict(X_test))
    # 召回率train_recall_score
    train_recall_score = recall_score(Y_test, clf.predict(X_test))
    # F1_Score
    train_f1_score = f1_score(Y_test, clf.predict(X_test))
    # roc_auc_score
    train_roc_auc_score1 = roc_auc_score(Y_test, clf.predict(X_test))

    # 使用 scikit learn 中的LR模型进行预测
    result = clf.predict(df_test)
    # print(result)

    # 设置终止时间,并计算总时间
    train_end = time.time()
    train_seconds = train_end - start_time
    m, s = divmod(train_seconds, 60)
    h, m = divmod(m, 60)
    time_trains_all = "%02d:%02d:%02d" % (h, m, s)

    # ++++++++++++++++++++++++++++++++++++++++训练结果保存+++++++++++++++++++++++++++++++++++++++#
    ## 保存摘要模型报告文件
    # abstract_path = HDFS_HOSTS1 + output_path + '/abstract/data/'
    abstract_path = output_path + '/abstract/data/'
    f = open('abstract.csv', mode='w', newline='')
    fileheader = [
        'FrameWork', 'Version', 'model', 'accuracy', 'time_trains_start',
        'time_trains_all', 'test_data_num', 'train_data_num'
    ]
    w = csv.DictWriter(f, fileheader)
    w.writeheader()
    csv_dict = edict()
    csv_dict.FrameWork = 'Scikit-learn'
    csv_dict.Version = sklearn.__version__
    csv_dict.model = '%s' % LogisticRegression
    csv_dict.accuracy = str(train_acc)
    csv_dict.time_trains_start = time_trains_start
    csv_dict.time_trains_all = time_trains_all
    csv_dict.test_data_num = str(test_data_num)
    csv_dict.train_data_num = str(train_data_num)
    w.writerow(csv_dict)
    f.close()
    client.delete(abstract_path + 'abstract.csv')
    client.upload(abstract_path + 'abstract.csv', 'abstract.csv')
    # if len(client.list(abstract_path)):
    # 	client.delete(abstract_path + 'abstract.csv')
    # 	client.upload(abstract_path + 'abstract.csv', 'abstract.csv')
    # else:
    # 	client.upload(abstract_path + 'abstract.csv', 'abstract.csv')

    ##保存模型版本信息csv文件
    version_path = output_path + '/msg/data/'
    f = open('msg.csv', mode='w', newline='')
    fileheader = [
        'accuracy', 'time_trains_start', 'time_trains_all', 'test_data_num',
        'train_data_num'
    ]
    w = csv.DictWriter(f, fileheader)
    w.writeheader()
    csv_dict = edict()
    csv_dict.accuracy = str(train_acc)
    csv_dict.time_trains_start = time_trains_start
    csv_dict.time_trains_all = time_trains_all
    csv_dict.test_data_num = str(test_data_num)
    csv_dict.train_data_num = str(train_data_num)
    w.writerow(csv_dict)
    f.close()
    client.delete(version_path + 'msg.csv')
    client.upload(version_path + 'msg.csv', 'msg.csv')

    ## 保存训练评价指标模型报告文件
    file_csv_path = output_path + '/evaluation/data/'
    f = open('evaluation.csv', mode='w', newline='')
    fileheader = [
        'accuracy', 'train_precision_score', 'train_recall_score',
        'train_f1_score', 'train_roc_auc_score1'
    ]
    w = csv.DictWriter(f, fileheader)
    w.writeheader()
    csv_dict = edict()
    csv_dict.accuracy = str(train_acc)
    csv_dict.train_precision_score = train_precision_score
    csv_dict.train_recall_score = train_recall_score
    csv_dict.train_f1_score = train_f1_score
    csv_dict.train_roc_auc_score1 = train_roc_auc_score1
    w.writerow(csv_dict)
    f.close()
    client.delete(file_csv_path + 'evaluation.csv')
    client.upload(file_csv_path + 'evaluation.csv', 'evaluation.csv')

    # 保存测试集预测结果文件
    file_csv_path = output_path + '/result/data/'

    # 字典中的key值即为csv中列名
    dataframe = pd.DataFrame({target: result})
    # 将DataFrame存储为csv,index表示是否显示行名,default=True
    dataframe.to_csv("result.csv", index=False, sep=',')

    client.delete(file_csv_path + 'result.csv')
    client.upload(file_csv_path + 'result.csv', 'result.csv')
Beispiel #4
0
class HDFSUtil:
    def __init__(self, url):
        self._client = Client(url)

    def make_dir(self, hdfs_path):
        """
        支持递归创建多级目录
        :param hdfs_path:
        :return:
        """
        self._client.makedirs(hdfs_path)

    def delete_hdfs_file(self, hdfs_path):
        """
        删除HDFS文件

        如果是目录, 必须为空
        :param hdfs_path:
        :return:
        """
        self._client.delete(hdfs_path)

    def delete_hdfs_dir(self, hdfs_dir):
        """
        删除HDFS文件/目录

        如果目录不为空, 递归删除
        :param hdfs_dir:
        :return:
        """
        dir_list = self.hdfs_dir_list(hdfs_dir)
        if dir_list is None or len(dir_list) == 0:
            print('Delete File: {0}'.format(hdfs_dir))
            self._client.delete(hdfs_dir)
        else:
            for file_name in dir_list:
                self.delete_hdfs_dir(hdfs_dir + '/' + file_name)
            print('Delete Dir: {0}'.format(hdfs_dir))
            self._client.delete(hdfs_dir)

    def upload_to_hdfs(self, local_path, hdfs_path):
        """
        将本地文件/目录上传到HDFS上

        如果目录不存在, 会自动创建
        :param local_path:
        :param hdfs_path:
        :return:
        """
        self._client.upload(hdfs_path, local_path, cleanup=True)

    def download_from_hdfs(self, hdfs_path, local_path):
        """
        将HDFS上的文件/目录下载到本地
        :param hdfs_path:
        :param local_path:
        :return:
        """
        self._client.download(hdfs_path, local_path, overwrite=True)

    def write_to_hdfs(self, hdfs_path, data, overwrite=False, append=True):
        """
        追加: overwrite=false, append=true => Default
        复写: overwrite=true, append=false

        overwrite和append逻辑必须互斥
        :param hdfs_path:
        :param data:
        :param overwrite: Boolean 是否复写
        :param append: Boolean 是否追加
        :return:
        """
        if not self._client.content(hdfs_path, strict=False):
            print('File Not exist in HDFS')
        self._client.write(hdfs_path, data, overwrite=overwrite, append=append)

    def move_or_rename(self, hdfs_src_path, hdfs_dst_path):
        """
        文件移动/重命名
        :param hdfs_src_path:
        :param hdfs_dst_path:
        :return:
        """
        self._client.rename(hdfs_src_path, hdfs_dst_path)

    def hdfs_dir_list(self, hdfs_path):
        """
        获取指定目录下的文件
        当hdfs_path不是目录, 捕获异常并返回None
        :param hdfs_path:
        :return: List[filename] or None
        """
        try:
            return self._client.list(hdfs_path, status=False)
        except HdfsError:
            return None
Beispiel #5
0
if __name__ == '__main__':
    hdfs_ip = "192.168.146.133"
    hdfs_version = 3
    hdfs_root = "~/test"
    filepath = r"C:\Users\daqige\PycharmProjects\newLeetCode\convert.py"
    hdfs_addr = "http://" + hdfs_ip + ":" + str(9870 if
                                                (hdfs_version == 3) else 90070)

    client = Client(hdfs_addr)

    # print("创建文件夹")
    # client.makedirs(hdfs_root)
    # print(client.list("/"))
    #
    # print("上传文件")
    # client.upload(hdfs_root, filepath)
    # print(client.list(hdfs_root))
    #
    # print("修改文件名")
    # client.rename(hdfs_root + "/convert.py", hdfs_root + "/ubuntu.py")
    # print(client.list(hdfs_root))

    print("下载文件")
    client.download(hdfs_root + "/ubuntu.py", ".")
    print(os.listdir("."))

    print("删除文件")
    client.delete(hdfs_root + "/ubuntu.py")
    print(client.list(hdfs_root))
def interface(train_path,
              test_path,
              output_path,
              target,
              chaid_ratio,
              train_split_ratio=0.3,
              n_estimators=100,
              max_depth=5,
              min_samples_split=3,
              min_samples_leaf=2,
              min_split_gain=0.0,
              colsample_bytree="log2",
              subsample=0.8,
              random_state=100):
    # 设置起始时间
    time.localtime()
    time_trains_start = time.strftime('%Y{y}%m{m}%d{d} %H{h}%M{f}%S{s}'.format(
        y='/', m='/', d='', h=':', f=':', s=''))
    start_time = time.time()

    # 设置输入文件路径
    train_FILENAME = train_path + "/data/Data.csv"  # hdfs文件路径
    test_FILENAME = test_path + "/data/Data.csv"  # hdfs文件路径
    client = Client(HDFS_HOSTS1)
    # 训练数据读取
    with client.read(train_FILENAME) as tr_s:
        tr_content = tr_s.read()
        tr_s = str(tr_content, 'utf-8')
    # 确保文件写入完毕
    tr_file = open("trainData.csv", "w")
    tr_file.flush()
    os.fsync(tr_file)
    tr_file.write(tr_s)
    tr_file.close()
    df_train = pd.read_csv("trainData.csv", header=0)
    print(df_train)

    # 测试数据读取
    with client.read(test_FILENAME) as te_fs:
        te_content = te_fs.read()
        te_s = str(te_content, 'utf-8')
    # 确保文件写入完毕
    te_file = open("testData.csv", "w")
    te_file.flush()
    os.fsync(te_file)
    te_file.write(te_s)
    te_file.close()
    df_test = pd.read_csv("testData.csv", header=0)
    print(df_test)

    test_data_num = df_train.shape[0]
    train_data_num = df_train.shape[0]

    # 卡方检测选出和label列最相关的前chaid_ratio(默认值为前80%)的列
    ch2 = SelectKBest(chi2, k=int(df_train.shape[1] * chaid_ratio))
    chi_df_train = pd.DataFrame(ch2.fit_transform(df_train, df_train[target]))

    label_df = df_train[target]
    # wine数据集 和 sonar 数据集
    clf = RandomForestClassifier(n_estimators, max_depth, min_samples_split,
                                 min_samples_leaf, min_split_gain,
                                 colsample_bytree, subsample, random_state)

    # 数据集分割与训练
    train_count = int(train_split_ratio * len(chi_df_train))
    clf.fit(chi_df_train.ix[:train_count], label_df.ix[:train_count])
    train_acc = metrics.accuracy_score(
        label_df.ix[:train_count], clf.predict(chi_df_train.ix[:train_count]))
    print("模型的准确率:", train_acc)
    # 精确率
    train_precision_score = metrics.precision_score(
        label_df.ix[:train_count], clf.predict(chi_df_train.ix[:train_count]))
    # 召回率
    train_recall_score = metrics.recall_score(
        label_df.ix[:train_count], clf.predict(chi_df_train.ix[:train_count]))
    # F1_Score
    train_f1_score = metrics.f1_score(
        label_df.ix[:train_count], clf.predict(chi_df_train.ix[:train_count]))
    # roc_auc_score
    train_roc_auc_score1 = metrics.roc_auc_score(
        label_df.ix[:train_count], clf.predict(chi_df_train.ix[:train_count]))

    # 对测试集进行处理,保证其和训练集卡方检测后的列数一致
    ch2_list = list(ch2.get_support())
    ch2_list.pop()
    df_test_head = list(df_test.columns)
    for x, y in zip(ch2_list, df_test_head):
        if x == False:
            df_test_head.remove(y)
    df_test = df_test[df_test_head]
    # 预测
    result = clf.predict(df_test)
    # print(result)

    # 设置终止时间,并计算总时间
    train_end = time.time()
    train_seconds = train_end - start_time
    m, s = divmod(train_seconds, 60)
    h, m = divmod(m, 60)
    time_trains_all = "%02d:%02d:%02d" % (h, m, s)
    # print(time_trains_start,time_trains_all)

    # ++++++++++++++++++++++++++++++++++++++++训练结果保存+++++++++++++++++++++++++++++++++++++++#
    ## 保存摘要模型报告文件
    abstract_path = output_path + '/abstract/data/'
    f = open('abstract.csv', mode='w', newline='')
    fileheader = [
        'FrameWork', 'Version', 'model', 'accuracy', 'time_trains_start',
        'time_trains_all', 'test_data_num', 'train_data_num'
    ]
    w = csv.DictWriter(f, fileheader)
    w.writeheader()
    csv_dict = edict()
    csv_dict.FrameWork = 'Scikit-learn'
    csv_dict.Version = sklearn.__version__
    csv_dict.model = '%s' % RandomForestClassifier
    csv_dict.accuracy = str(train_acc)
    csv_dict.time_trains_start = time_trains_start
    csv_dict.time_trains_all = time_trains_all
    csv_dict.test_data_num = str(test_data_num)
    csv_dict.train_data_num = str(train_data_num)
    w.writerow(csv_dict)
    f.close()
    client.delete(abstract_path + 'abstract.csv')
    client.upload(abstract_path + 'abstract.csv', 'abstract.csv')

    ##保存模型版本信息csv文件
    version_path = output_path + '/msg/data/'
    f = open('msg.csv', mode='w', newline='')
    fileheader = [
        'accuracy', 'time_trains_start', 'time_trains_all', 'test_data_num',
        'train_data_num'
    ]
    w = csv.DictWriter(f, fileheader)
    w.writeheader()
    csv_dict = edict()
    csv_dict.accuracy = str(train_acc)
    csv_dict.time_trains_start = time_trains_start
    csv_dict.time_trains_all = time_trains_all
    csv_dict.test_data_num = str(test_data_num)
    csv_dict.train_data_num = str(train_data_num)
    w.writerow(csv_dict)
    f.close()
    client.delete(version_path + 'msg.csv')
    client.upload(version_path + 'msg.csv', 'msg.csv')

    ## 保存训练评价指标模型报告文件
    file_csv_path = output_path + '/evaluation/data/'
    f = open('evaluation.csv', mode='w', newline='')
    fileheader = [
        'accuracy', 'train_precision_score', 'train_recall_score',
        'train_f1_score', 'train_roc_auc_score1'
    ]
    w = csv.DictWriter(f, fileheader)
    w.writeheader()
    csv_dict = edict()
    csv_dict.accuracy = str(train_acc)
    csv_dict.train_precision_score = train_precision_score
    csv_dict.train_recall_score = train_recall_score
    csv_dict.train_f1_score = train_f1_score
    csv_dict.train_roc_auc_score1 = train_roc_auc_score1
    w.writerow(csv_dict)
    f.close()
    client.delete(file_csv_path + 'evaluation.csv')
    client.upload(file_csv_path + 'evaluation.csv', 'evaluation.csv')

    # 保存测试集预测结果文件
    file_csv_path = output_path + '/result/data/'
    dataframe = pd.DataFrame({target: result})
    dataframe.to_csv("result.csv", index=False, sep=',')
    client.delete(file_csv_path + 'result.csv')
    client.upload(file_csv_path + 'result.csv', 'result.csv')
Beispiel #7
0
# 写入文件(覆盖)
client.write(file_name, data="hello hdfs !", overwrite=True)

# 写入文件(追加)
client.write(file_name, data="hello way !", overwrite=False, append=True)

# 读取文件内容
with client.read(file_name, encoding='utf-8') as f:
    print(f.read())

# 文件下载
client.download(file_name, loacl_file_name, overwrite=True)

# 文件上传
client.upload(file_name + '111', loacl_file_name, cleanup=True)

# 删除文件
client.delete(file_name2)

# 文件重命名
client.rename(file_name, file_name2)

# 文件夹底下文件
files = client.list(file_dir, status=False)
for file in files:
    print(file)

# 删除文件夹(递归删除、谨慎)
# client.delete(file_dir, recursive=True)
Beispiel #8
0
class RF_HDFS(object):
    def __init__(self):
        self.client = None
        self.directory = None

    def connect_and_login(self, **kwargs):
        import requests

        host = None
        port = None
        user = None
        password = None
        root = None
        timeout = None
        proxy = None

        if 'host' in kwargs:
            host = kwargs['host']
        if 'port' in kwargs:
            port = kwargs['port']
        if 'kdc' in kwargs:
            kdc = kwargs['kdc']
        if 'user' in kwargs:
            user = kwargs['user']
        if 'password' in kwargs:
            password = kwargs['password']
        if 'root' in kwargs:
            root = kwargs['root']
        if 'proxy' in kwargs:
            proxy = kwargs['proxy']
        if 'timeout' in kwargs:
            timeout = kwargs['timeout']

        self.session = requests.Session()
        adapter = requests.adapters.HTTPAdapter(pool_maxsize=0)
        self.session.mount('http://',  adapter)
        self.session.mount('https://', adapter)
        self.session.headers.update({'Connection':'Keep-Alive'})

        self.connectionStatus = False
        try:
            timeout = int(timeout)
            url = "http://" + host + ":" + str(port)

            hdfsLogin = WebHDFS(url, kdc)
            cookieStr = hdfsLogin.authenticate(user, password)
            if cookieStr != None:
                cookieList = cookieStr.split('=', 1)
                cookieDict = {cookieList[0]: cookieList[1]}
                requests.utils.add_dict_to_cookiejar(self.session.cookies, cookieDict)

            self.client = Client(url, root=root, proxy=proxy, timeout=timeout, session=self.session)
        except HdfsError as hdfsError:
            raise HdfsLibraryError(str(hdfsError))
        except Exception as exception:
            raise HdfsLibraryError(str(exception))

        self.connectionStatus = True
        return self.client

    def checkConnectionStatus(self):
        return self.connectionStatus

    def list_dir(self, directory):
        output = []
        try:
            if directory != None:
                output = self.client.list(directory, status=True)
            else:
                output = self.client.list(self.client.root, status=True)
        except HdfsError as hdfsError:
            raise HdfsLibraryError(str(hdfsError))
        except Exception as exception:
            raise HdfsLibraryError(str(exception))
        return output

    def list_names(self, directory):
        output = []
        try:
            if directory != None:
                output = self.client.list(directory, status=False)
            else:
                output = self.client.list(self.client.root, status=False)
        except HdfsError as hdfsError:
            raise HdfsLibraryError(str(hdfsError))
        except Exception as exception:
            raise HdfsLibraryError(str(exception))
        return output

    def upload(self, remote_path, local_path, overwrite=False, permission=None):
        output = None
        try:
            output = self.client.upload(remote_path, local_path, overwrite, permission=permission)
        except HdfsError as hdfsError:
            # For some reason this exception includes the entire stack trace after
            # the error message, so split on '\n' and only return the first line.
            error = str(hdfsError).splitlines()[0]
            raise HdfsLibraryError(error)
        except Exception as exception:
            raise HdfsLibraryError(str(exception))
        return output

    def download(self, remote_path, local_path, overwrite=False):
        output = None
        try:
            output = self.client.download(remote_path, local_path, overwrite)
        except HdfsError as hdfsError:
            raise HdfsLibraryError(str(hdfsError))
        except Exception as exception:
            raise HdfsLibraryError(str(exception))
        return output

    def mkdir(self, directory, permission):
        try:
            # no return value
            self.client.makedirs(directory, permission=permission)
        except HdfsError as hdfsError:
            raise HdfsLibraryError(str(hdfsError))
        except Exception as exception:
            raise HdfsLibraryError(str(exception))

    def rmdir(self, directory):
        try:
            # no return value
            if self.client.delete(directory, recursive=True) == False:
                raise HdfsLibraryError("Directory does not exist: %r", directory)
        except HdfsError as hdfsError:
            raise HdfsLibraryError(str(hdfsError))
        except Exception as exception:
            raise HdfsLibraryError(str(exception))

    def rename(self, src_file, dst_file):
        try:
            # no return value
            self.client.rename(src_file, dst_file)
        except HdfsError as hdfsError:
            raise HdfsLibraryError(str(hdfsError))
        except Exception as exception:
            raise HdfsLibraryError(str(exception))

    def delete(self, file):
        try:
            # no return value
            if self.client.delete(file) == False:
                raise HdfsLibraryError("File does not exist: %r", file)
        except HdfsError as hdfsError:
            raise HdfsLibraryError(str(hdfsError))
        except Exception as exception:
            raise HdfsLibraryError(str(exception))

    def set_time(self, file, mod_time):
        try:
            # no return value
            self.client.set_times(file, -1, mod_time)
        except HdfsError as hdfsError:
            raise HdfsLibraryError(str(hdfsError))
        except Exception as exception:
            raise HdfsLibraryError(str(exception))

    def set_owner(self, file, owner, group):
        try:
            # no return value
            self.client.set_owner(file, owner=owner, group=group)
        except HdfsError as hdfsError:
            raise HdfsLibraryError(str(hdfsError))
        except Exception as exception:
            raise HdfsLibraryError(str(exception))

    def set_permission(self, file, permission):
        try:
            # no return value
            self.client.set_permission(file, permission=permission)
        except HdfsError as hdfsError:
            raise HdfsLibraryError(str(hdfsError))
        except Exception as exception:
            raise HdfsLibraryError(str(exception))

    def set_acl(self, file, aclspec):
        try:
            # no return value
            self.client.set_acl(file, aclspec=aclspec)
        except HdfsError as hdfsError:
            raise HdfsLibraryError(str(hdfsError))
        except Exception as exception:
            raise HdfsLibraryError(str(exception))

    def status(self, path):
        output = ''
        try:
            output = self.client.status(path)
        except HdfsError as hdfsError:
            raise HdfsLibraryError(str(hdfsError))
        except Exception as exception:
            raise HdfsLibraryError(str(exception))
        return output

    def checksum(self, path):
        output = ''
        try:
            output = self.client.checksum(path)
        except HdfsError as hdfsError:
            raise HdfsLibraryError(str(hdfsError))
        except Exception as exception:
            raise HdfsLibraryError(str(exception))
        return output

    def close(self):
        self.session.close()