def main(): client = InsecureClient(f'http://127.0.0.1:50070/', user='******') # create directory in HDFS client.makedirs('/test') #list content ll = client.list('/') print(ll) # create file in HDFS data = [{ "name": "Anne", "salary": 10000 }, { "name": "Victor", "salary": 9500 }] with client.write('/test/sample_file.json', encoding='utf-8') as json_file_in_hdfs: json.dump(data, json_file_in_hdfs) # OR client.write(os.path.join('/', 'test', 'sample_file2.json'), data=json.dumps(data), encoding='utf-8') # download file from HDFS client.download('/test/sample_file.json', './file_from_hadoop.json') # upload file to HDFS client.upload('/test/local_file_in_hadoop.json', './file_from_hadoop.json')
def download_directory(self, directory_url): '''Downloads directory from remote HDFS to local, archives it and returns the zip of the directory''' logger.log_info("Downloading the directory {0} ".format(directory_url)) # Remove the base url from the absolute directory path provided as parameter # For example, if the absolute path is hdfs://alpha:9000/configuration/12345/drift, # the below statement will return /configuration/12345/drift directory_name_with_path = urllib3.util.parse_url(directory_url).path directory_name = os.path.split(directory_name_with_path)[1] web_hdfs_url = Environment().get_web_hdfs_url() session = SwSessionManager().get_session() user_name = session.get_username() client = InsecureClient(web_hdfs_url, user_name) try: with tempfile.TemporaryDirectory() as temp: client.download(hdfs_path=directory_name_with_path, local_path=temp, n_threads=5) tmp_archive = os.path.join(temp) data = io.BytesIO() with open(shutil.make_archive(tmp_archive, 'gztar', temp), "rb") as output_data: data.write(output_data.read()) data.seek(0) return send_file(data, as_attachment=True, attachment_filename=directory_name + ".tar.gz") except Exception as e: raise ServiceError( "Downloading the folder from HDFS failed with the error: {0}". format(str(e)))
class Prediction_ML(): def __init__(self, dir_algo, algo, path_img): logging.info('prediction_ML.init') self.directory_algo = dir_algo self.path_img = path_img self.algo = algo self.hdfs_client = InsecureClient('http://192.168.1.4:9870', user='******') self.image = self.read_image(self.path_img, 240) def read_image(self, path_img, img_size=0): logging.info('prediction_ML.read_image') img = 0 try: with self.hdfs_client.read(path_img) as reader: img = Image.open(reader) if img_size != 0: img = img.resize((img_size, img_size)) img = img.convert('L').convert('RGB') img = np.asarray(img).flatten() except IOError as err: logging.error("Error reading image or path") logging.error(err) except Exception as err: logging.error("Unkownown error in read_image") logging.error(err) return img def run(self): try: self.hdfs_client.download( self.directory_algo + self.algo + ".model", self.algo + ".model") model = joblib.load(self.algo + ".model") os.remove(self.algo + ".model") label = model.predict([self.image]) try: array_proba = model.predict_proba([self.image])[0] proba = array_proba[label[0]] except: proba = -1 return label[0], proba except IOError as err: logging.error('Error model ' + str(self.algo) + ' is not trained yet!') logging.error( 'Train this model first before using it for predictions') return -1, 1
class HDFSStorage(Storage): def __init__(self, bucket_name: str, folder_name: str): super().__init__(bucket_name, folder_name) self.client = InsecureClient(url=settings.HDFS_CONN, user=settings.HDFS_USERNAME) def setup(self) -> HDFSResource: super().setup() self.client.makedirs(f"{self.bucket_name}/{self.folder_name}") return HDFSResource( resource=f"hdfs:/{self.bucket_name}/{self.folder_name}/") def put_file(self, file_path: Union[str, Path], rename: Optional[str] = None) -> HDFSResource: if isinstance(file_path, Path): file_path = str(file_path) file_name = Path(file_path).name if not rename else rename # copy file to task directory if not file_path.startswith(str(self.local_dir)): file_path = shutil.copy(file_path, Path(self.local_dir, file_name)) try: self.client.upload( f"{self.bucket_name}/{self.folder_name}/{file_name}", file_path) except (gaierror, NewConnectionError): raise return HDFSResource( resource=f"hdfs:/{self.bucket_name}/{self.folder_name}/{file_name}" ) def get_file(self, data_file: str) -> str: if not data_file.startswith("hdfs:"): raise NotValidScheme( "Object file prefix is invalid: expected `hdfs:`") _, bucket_name, folder_name, file_name = data_file.split("/") file_path = Path(self.temp_dir, bucket_name, folder_name, file_name) if not file_path.is_file(): try: self.client.download(data_file, file_path) except Exception as err: print(err) return str(file_path) def remove_remote_dir(self, omit_files: List[str] = None) -> None: pass
def download(self, remote_location, local_filepath): url = urlparse(remote_location) if not url.hostname: raise ValueError('Hostname was not found in provided URL: %s' % remote_location) connect_url = self._build_namenode_url(url.hostname, url.port) hdfs_path = url.path hdfs = InsecureClient(connect_url) hdfs.download(hdfs_path, local_filepath)
class Storage: def __init__(self, protocol: str = 'webHDFS', *args, **kwargs): self.protocol, self.client = protocol.lower(), None if protocol.lower() == 'webHDFS'.lower(): from hdfs import InsecureClient self.client = InsecureClient(*args, **kwargs) for f in 'upload download list status delete'.split(): setattr(self, f, getattr(self, '%s_%s' % (f, protocol.lower()))) def upload_webhdfs(self, local_path: str, remote_path: str, **kwargs): to_screen("upload %s -> %s" % (local_path, remote_path)) return self.client.upload(local_path=local_path, hdfs_path=remote_path, **kwargs) def download_webhdfs(self, remote_path: str, local_path: str, **kwargs): mkdir_for(local_path) to_screen("download %s -> %s" % (remote_path, local_path)) return self.client.download(local_path=local_path, hdfs_path=remote_path, overwrite=True, **kwargs) def list_webhdfs(self, remote_path: str, **kwargs): return self.client.list(hdfs_path=remote_path, **kwargs) def status_webhdfs(self, remote_path: str, **kwargs): return self.client.status(hdfs_path=remote_path, **kwargs) def delete_webhdfs(self, remote_path: str, **kwargs): return self.client.delete(hdfs_path=remote_path, **kwargs)
def download_data(request): try: data_id = request.GET.get('data_id') user_id = request.GET.get('user_id') data_path = data_id + '.csv' fetched = Datasets.objects.filter(data_id=data_id, user_id=user_id).values( 'hdfs_path', 'data_name') if len(fetched) == 0: raise Exception('Oops! No access!') if list(fetched)[0]['hdfs_path']: client = InsecureClient("http://hdfs.neurolearn.com:50070", user="******") client.download(list(fetched)[0]['hdfs_path'] + '/' + list(fetched)[0]['data_name'], data_path, overwrite=True) else: data_cont_query = Datasets.objects.filter( data_id=data_id, user_id=user_id).values('data_cont') if len(data_cont_query) == 0: raise Exception('Oops! No access!') data_cont = list(data_cont_query)[0]['data_cont'] pd.read_json(data_cont).to_csv(data_path, index=False) data_file = open(data_path, 'rb') response = FileResponse(data_file) response['Content-Type'] = 'application/octet-stream' response[ 'Content-Disposition'] = 'attachment;filename=\"' + data_id + '.csv\"' except Exception as e: traceback.print_exc() response_content = {} response = HttpResponse() response_content['msg'] = str(e) response_content['error_num'] = 1 response.write(json.dumps(response_content)) return response
def config(): # Read params from Json if not request.json or not 'preprocess' in request.json or not 'requirements' in request.json or not 'model' in request.json: abort(400) preprocess_file = request.json['preprocess'] requirements_file = request.json['requirements'] model_file = request.json['model'] hdfs_uri = request.json['hdfs_uri'] logger.info('Read json configurations: OK!!') # Download files from HDFS client_hdfs = InsecureClient(hdfs_uri) client_hdfs.download(requirements_file, "./requirements.txt", overwrite=True) client_hdfs.download(model_file, "./model.pickle", overwrite=True) client_hdfs.download(preprocess_file, "./preprocess.pickle", overwrite=True) logger.info('Download pickles: OK!!') # Install library dependencies subprocess.call("pip install -r ./requirements.txt", shell=True) current_app.hdfs_uri = hdfs_uri current_app.inMemory = False current_app.configured = True current_app.client_hdfs = client_hdfs return jsonify({'status': "Docker configured"}), 201
def download_file(path, test_case_number, task_number): try: client = InsecureClient( ('http://' + HADOOP_HOST_NAME + ':' + HADOOP_NAMENODE_PORT_NUMBER), user=HADOOP_USER_NAME) except: print("Error connecting to hdfs client") return try: client.download( HADOOP_OUTPUT_PATH + task_number + test_case_number + "/", os.path.join(path, test_case_number)) except Exception as e: print(e) print("Error downloading output file from hdfs") return try: client.delete(HADOOP_OUTPUT_PATH + task_number + test_case_number, recursive=True) except: print("Error deleting hdfs output directory") return
class HdfsDownloader(): FILENAME = __file__ def init(self, cfg): url = cfg['source_url'] user = cfg['user'] if 'user' in cfg else None root_path = cfg['root'] if 'root' in cfg else '/' if 'download_filename' not in cfg: cfg['download_filename'] = basename(url) http_protocal_prefix = 'http://' if url.startswith(http_protocal_prefix): index = url[len(http_protocal_prefix):].find('/') + len( http_protocal_prefix) host_port = url[:index] cfg['file_path'] = url[index:] self.hdfs_client = InsecureClient(url=host_port, user=user, root=root_path) return cfg def retrive_file(self, cfg): url = cfg['source_url'] print("download hdfs file: {}".format(url)) if url.startswith('http://'): path = cfg['file_path'] self.hdfs_client.download(path, cfg['download_filename'], overwrite=True) elif url.startswith('hdfs://'): cmd = 'hadoop fs -get {}'.format(url) for out_ in run_command(cmd): print(out_.rstrip().decode('utf-8')) else: raise Exception( 'Not supported protocal. Only support "http://" or "hdfs://"') return basename(url)
def get(self,period): print("Period to predict : ",period) # Connexion au client HDFS client = InsecureClient(url='http://namenode:9870', user='******') # Vérification de la présence du modèle sauvegardé sur HDFS if client.status(model_hdfs_remote_path + model_name , strict=False) != None: # load model client.download(model_hdfs_remote_path+model_name, model_local_path, overwrite=True) model_fit = ARIMAResults.load(model_local_path + model_name) # Dataset pour l'évaluation df = get_data_cassandra() print(df.head()) X = df['total_estimated_load'].values start_index = len(X) end_index = start_index + int(period) forecast = model_fit.predict(start=start_index, end=end_index) #df['date_est_load'] = df['date_est_load'].apply(pd.Timestamp) day = df['date_est_load'].values[-1].date() print(day) print(type(day)) day += datetime.timedelta(days=1) res = {} for yhat in forecast: res[day.strftime("%d/%m/%Y")] = yhat day += datetime.timedelta(days=1) return res return "Service has been stopped"
# client.delete('/test_temps', recursive=True) hdfspath = '/test_datas/' localpath = '/Users/janevallette/Documents/Develops/learn_bigdata/datas/cat.jpeg' result = client.upload(hdfspath, localpath) # Writing part of a file. # with open('datas/upfile.txt') as reader, client.write('/test_datas/upfile1.txt') as writer: # for line in reader: # # if line.startswith('-'): # writer.write(line) from json import dump # Writing a serialized JSON object. # with open('datas/model.json') as reader, client.write('/test_datas/model1.json') as writer: # dump(reader, writer) # with open('datas/cat.jpeg') as reader, client.write('/test_files/cat1.jpeg') as writer: # dump(reader, writer) # Download a file or folder locally. client.download('/test_datas/', 'datas/', n_threads=5) # Loading a file in memory. # with client.read('/test_datas/sample2.txt') as reader: # sample1 = reader.read() # # Directly deserializing a JSON object. # with client.read('/test_datas/model1.json', encoding='utf-8') as reader: # from json import load # model = load(reader)
def Home(request): client = InsecureClient('http://localhost:50070', user='******') if not os.path.exists('1989.csv'): client.download('1989.csv', '1989.csv') if not os.path.exists('1991.csv'): client.download('1991.csv', '1991.csv') if not os.path.exists('1996.csv'): client.download('1996.csv', '1996.csv') if not os.path.exists('1998.csv'): client.download('1998.csv', '1998.csv') if not os.path.exists('1999.csv'): client.download('1999.csv', '1999.csv') if not os.path.exists('2004.csv'): client.download('2004.csv', '2004.csv') if not os.path.exists('2009.csv'): client.download('2009.csv', '2009.csv') if not os.path.exists('2014.csv'): client.download('2014.csv', '2014.csv') if not os.path.exists('Candidate.csv'): client.download('Candidate.csv', 'Candidate.csv') return render(request, 'election/home.html')
from hdfs import InsecureClient import os client = InsecureClient("http://10.150.144.225:50070", user="******") client.download("/SC_recommendation/caml/Electronics_new_strategy_new_2", "data/", overwrite=True) print(os.listdir("data/"))
class HisiHdfs: def __init__(self): self._c = InsecureClient(url="http://{}:14000".format( HisiHdfs.get_host()), user='******', root="/") # self._c = InsecureClient(url="http://10.154.67.254:14000", user='******', root="/") @staticmethod def get_host(): domain = 'hdfs-ngx1.turing-ci.hisilicon.com' try: socket.gethostbyname(domain) return domain except Exception as e: return '10.154.67.254' @staticmethod def build_month_path(build_scene): '''daily build path''' return '/compilepackage/CI_Version/{}/br_hisi_trunk_ai/{}'.\ format(build_scene, datetime.datetime.today().strftime('%Y%m')) @staticmethod def prebuild_month_path(build_scene): '''compile path''' return '/compilepackage/CI_Version/{}/br_hisi_trunk_ai_PRE_COMPILE/{}'.\ format(build_scene, datetime.datetime.today().strftime('%Y%m')) def find_newest_build(self, build_scene): builds = self._c.list(HisiHdfs.build_month_path(build_scene), True) newest_build_name = None for build in builds: if type(build) != tuple: logging.warning("Unexpected build format {}".format(build)) continue if len(build) < 2: logging.warning("Unexpected build format {}".format(build)) continue if type(build[1]) != dict: logging.warning("Unexpected build format[1] {}".format(build)) continue if build[1].get('type', None) != "DIRECTORY": logging.warning( "Found unexpected build type(not DIRECTORY) {}".format( build)) continue if type(build[0]) != str: logging.warning("Unexpected build format[0] {}".format(build)) continue elements = build[0].split('_') if len(elements) != 3: logging.warning("Unexpected build name {}".format(build)) continue if elements[2] != "newest": continue # build_date = datetime.datetime.strptime('_'.join(elements[:2]), "%Y%m%d_%H%M%S%f") if newest_build_name is None: newest_build_name = build[0] continue if newest_build_name < build[0]: newest_build_name = build[0] return newest_build_name def path_exists(self, base_path: str, build_name: str): path = "{}/{}".format(base_path, build_name) return self._c.status(path, strict=False) is not None def find_package(self, base_path: str, build_name: str, package_type: PackageType, os_type=None, arch=None): if os_type is None: os_type, arch = get_env() path = "{}/{}".format(base_path, build_name) packages = self._c.list(path, True) pr = package_type.get_name_re() for package_name, package_info in packages: pm = pr.match(package_name) if pm is not None: if OsType.analyse_os(pm.group('os')) == os_type and pm.group( 'arch') == arch: return package_name return None def download_package(self, base_path: str, build_name: str, package_name: str, local_path: str): return self._c.download(hdfs_path="{}/{}/{}".format( base_path, build_name, package_name), local_path=local_path, overwrite=True) def download_compile_package(self, build_scene: str, build_name: str, package_name: str, local_path: str): return self.download_package(HisiHdfs.prebuild_month_path(build_scene), build_name, package_name, local_path) def download_daily_package(self, build_scene: str, build_name: str, package_name: str, local_path: str): return self.download_package(HisiHdfs.build_month_path(build_scene), build_name, package_name, local_path) def download_newest(self, local_path: str, packages: List[PackageType], os_type=None, arch=None): if not os.path.isdir(local_path): raise FileNotFoundError( "The path {} does not exists".format(local_path)) if os_type is None: os_type, arch = get_env() build_scenes_to_build_name = {} package_names = [] print("Begin to download newest run packages from the newest") for package in packages: build_scene = package.get_build_scene() newest_build_name = build_scenes_to_build_name.get( build_scene, self.find_newest_build(build_scene)) if newest_build_name is None: logging.error("Can not find the newest build") raise Exception("Can not find the newest build") package_name = self.find_package( HisiHdfs.build_month_path(build_scene), newest_build_name, package, os_type, arch) if package_name is None: logging.error( "Can not find the package {}, os {}, arch {}".format( package, os_type, arch)) raise Exception("Can not find package") with shell_printer.DotPrinter( "Begin to download {} from {} to {}".format( package_name, newest_build_name, local_path)): self.download_daily_package(build_scene, newest_build_name, package_name, local_path) logging.info("Download {} to {} successfully".format( package_name, local_path)) package_names.append(package_name) return package_names def download_compile_packages(self, build_name: str, local_path: str, package_types: List[PackageType]): self.wait_compile_paths_ready(package_types, build_name) package_names = [] for package_type in package_types: package_name = self.find_package( HisiHdfs.prebuild_month_path(package_type.get_build_scene()), build_name, package_type) if package_name is None: with shell_printer.DotPrinter("Wait package {} from {}".format( package_type.name, build_name)): while package_name is None: logging.debug( "Can not find package {} from {}, sleep".format( package_type.name, build_name)) time.sleep(10) package_name = self.find_package( HisiHdfs.prebuild_month_path( package_type.get_build_scene()), build_name, package_type) # 实测来看,刚创建好的文件直接下载可能有问题(下载失败,或者下载文件不完整),这里等5秒钟再下载 time.sleep(5) with shell_printer.DotPrinter("Begin to download {} to {}".format( package_name, local_path)): self.download_compile_package(package_type.get_build_scene(), build_name, package_name, local_path) logging.info("Download {} to {} successfully".format( package_name, local_path)) package_names.append(package_name) return package_names def wait_compile_paths_ready(self, package_types: List[PackageType], build_name: str): scenes = set([pt.get_build_scene() for pt in package_types]) for build_scene in scenes: build_path = HisiHdfs.prebuild_month_path(build_scene) if not self.path_exists(build_path, build_name): with shell_printer.DotPrinter( "The build({}) path({}) has not been created, wait". format(build_name, build_path)): while not self.path_exists(build_path, build_name): time.sleep(1)
class HDFSLibrary: """ Test library for working with HDFS """ WEB_HDFS_URL = "" client = "" def __init__(self, namenode="localhost", port="50070"): self.WEB_HDFS_URL = 'http://' + namenode + ':' + str(port) print namenode, ">>", port, ">>", self.WEB_HDFS_URL self.client = InsecureClient(self.WEB_HDFS_URL) def check_hdfs_file_exists(self, file_path, stop=False): if None == self.client.status(file_path, strict=False): if stop: print "ERROR: Error: File does not exist: ", file_path return "ERROR: Error: File does not exist: ", file_path # exit(172) return False return True def get_hdfs_file_content(self, file_path): self.check_hdfs_file_exists(file_path, stop=True) data = "" with self.client.read(file_path) as reader: for line in reader: data += line return data def search_string_in_hdfs_file(self, file_path, text1, text2="aqwszx", text3="xzswqa"): ret = self.check_hdfs_file_exists(file_path, stop=True) found = "" if ret else ret with self.client.read(file_path) as reader: for line in reader: if line.find(text1) == -1 and line.find( text2) == -1 and line.find(text3) == -1: continue found += line return found def hdfs_file_should_not_contain(self, file_path, text1, text2="aqwszx", text3="xzswqa"): self.check_hdfs_file_exists(file_path, stop=True) with self.client.read(file_path) as reader: for line in reader: if line.find(text1) != -1 or line.find( text2) != -1 or line.find(text3) != -1: return False return True ######################## # # BASIC FUNCTIONS: # # ######################## def get_hdfs_file_folder_content_summary(self, file_path): """ Retrieving a file or folder content summary. :return: returns a file or folder content summary. """ self.check_hdfs_file_exists(file_path, stop=True) return self.client.content(file_path) def get_hdfs_file_folder_status(self, file_path): """ Retrieving a file or folder status. :return: returns a file or folder status. """ self.check_hdfs_file_exists(file_path, stop=True) return self.client.status(file_path) def list_hdfs_directory(self, folder_path): """ Listing all files inside a directory. :return: returns a file list. """ self.check_hdfs_file_exists(folder_path, stop=True) return self.client.list(folder_path) def move_hdfs_file(self, old_path, new_path): """ Renaming ("moving") a file. :return: NA """ self.check_hdfs_file_exists(old_path, stop=True) self.client.rename(old_path, new_path) def delete_hdfs_file(self, file_path): """ Deleting a file or folder recursively. :return: returns `True` if the deletion was successful otherwise `False` """ self.check_hdfs_file_exists(file_path) return self.client.delete(file_path, recursive=True) def copy_to_local_hdfs_file(self, hdfs_path, local_path): """ Copy a file or folder from HDFS to local. :return: local_path """ self.check_hdfs_file_exists(hdfs_path) return self.client.download(hdfs_path, local_path, overwrite=True, n_threads=4) def copy_from_local_hdfs_file(self, local_path, hdfs_path): """ Copy a file or folder from local to HDFS. :return: hdfs_path """ return self.client.upload(hdfs_path, local_path, overwrite=True, n_threads=4) def get_hdfs_file_checksum(self, file_path): """ Get the checksum value for file :return: checksum """ self.check_hdfs_file_exists(file_path, stop=True) return self.client.checksum(file_path) def create_hdfs_dir(self, dir_path, perm=755): """ Create a directory or recursive dirs on HDFS :return: NA """ self.client.makedirs(dir_path, permission=perm)
class HadoopFileSystem(): def __init__(self, url, user): u = urlsplit(url) if u.scheme != 'http' and u.scheme != 'https': raise ValueError("Invalid name node address") self.url = urlunparse((u.scheme, u.netloc, '', '', '', '')) self.client = InsecureClient(self.url, user=user) self.localdir = u.path self.prefix = 'HDFS' def normalize_path(self, path): path = os.path.normpath(path) path = self.strip_prefix(path) while path and path[0] == os.sep: path = path[1:] return os.path.join(self.localdir, path) def strip_prefix(self, path): return path[len(self.prefix):] if path.startswith( self.prefix) else path def strip_root(self, path): path = self.strip_prefix(path) if path.startswith(self.url): path = path[len(self.url):] if not path.startswith(self.localdir): raise 'Invalid hdfs path. It must start with the root directory' return path[len(self.localdir):] if path.startswith( self.localdir) else path def create_folder(self, path): try: path = self.normalize_path(path) self.client.makedirs(path) except: return None return path def remove(self, path): try: path = self.normalize_path(path) if self.client.status(path, False) is not None: self.client.delete(path, True) except Exception as e: print(e) def rename(self, oldpath, newpath): try: oldpath = self.normalize_path(oldpath) newpath = self.normalize_path(newpath) self.client.rename(oldpath, newpath) except Exception as e: print(e) def get_files(self, path): path = self.normalize_path(path) files = [] for f in self.client.list(path): status = self.client.status(join(path, f), False) if status['type'] != "DIRECTORY": files.append(f) return files def get_folders(self, path): path = self.normalize_path(path) folders = [] for f in self.client.list(path): status = self.client.status(join(path, f), False) if status['type'] == "DIRECTORY": folders.append(f) return folders def exists(self, path): path = self.normalize_path(path) status = self.client.status(path, False) return not (status is None) def isdir(self, path): path = self.normalize_path(path) status = self.client.status(path, False) return status['type'] == "DIRECTORY" def isfile(self, path): path = self.normalize_path(path) status = self.client.status(path, False) return status['type'] == "FILE" def read(self, path): path = self.normalize_path(path) with self.client.read(path) as reader: return reader.read().decode('utf-8') def write(self, path, content): path = self.normalize_path(path) self.client.write(path, content) def make_json(self, path): normalized_path = self.normalize_path(path) data_json = { 'path': urljoin(self.url, normalized_path), 'text': os.path.basename(path) } status = self.client.status(normalized_path, False) if status is not None: data_json['folder'] = status['type'] == "DIRECTORY" if status['type'] == "DIRECTORY": data_json['nodes'] = [ self.make_json(os.path.join(path, fn)) for fn in self.client.list(normalized_path) ] #print(json.dumps(data_json)) return data_json def save_upload(self, file, fullpath): localpath = os.path.join(tempfile.gettempdir(), os.path.basename(fullpath)) if os.path.isfile(localpath): os.remove(localpath) try: file.save(localpath) if isfile(fullpath): fullpath = os.path.dirname(fullpath) self.client.upload(self.normalize_path(fullpath), localpath, True) except: pass def download(self, path): path = self.normalize_path(path) status = self.client.status(path, False) if status is not None and status['type'] == "FILE": localpath = os.path.join(tempfile.gettempdir(), os.path.basename(path)) return self.client.download(path, localpath, True) else: return None
class HDFSWrapper(object): def __init__(self): self.__m_HDFS_Handler__ = None self.__m_HDFS_WebFSDir__ = None self.__m_HDFS_User__ = None self.__m_HDFS_WebFSURL__ = None def HDFS_makedirs(self, hdfs_path): """ 创建目录 """ if self.__m_HDFS_Handler__ is None: raise HDFSWrapperException( "HDFS not connected. Please connect it frist.") self.__m_HDFS_Handler__.makedirs( os.path.join(self.__m_HDFS_WebFSDir__, hdfs_path).replace('\\', '/')) def HDFS_setPermission(self, hdfs_path, permission): """ 修改指定文件的权限信息 """ if self.__m_HDFS_Handler__ is None: raise HDFSWrapperException( "HDFS not connected. Please connect it frist.") m_hdfs_filepath = os.path.dirname(hdfs_path) m_hdfs_filename = os.path.basename(hdfs_path) self.__m_HDFS_Handler__.set_permission(os.path.join( self.__m_HDFS_WebFSDir__, m_hdfs_filepath, m_hdfs_filename).replace('\\', '/'), permission=permission) def HDFS_Connect(self, p_szURL, p_szUser): """ 连接HDFS, URL使用WEBFS协议 """ m_HDFS_Protocal = p_szURL.split("://")[0] m_HDFS_NodePort = p_szURL[len(m_HDFS_Protocal) + 3:].split("/")[0] m_HDFS_WebFSURL = m_HDFS_Protocal + "://" + m_HDFS_NodePort self.__m_HDFS_User__ = p_szUser self.__m_HDFS_WebFSURL__ = m_HDFS_WebFSURL self.__m_HDFS_WebFSDir__ = p_szURL[len(m_HDFS_WebFSURL):] self.__m_HDFS_Handler__ = InsecureClient(url=m_HDFS_WebFSURL, user=p_szUser, root=self.__m_HDFS_WebFSDir__) # 尝试创建目录,如果目录不存在的话 self.__m_HDFS_Handler__.makedirs( self.__m_HDFS_WebFSDir__.replace('\\', '/')) def HDFS_CD(self, p_szPath): self.__m_HDFS_WebFSDir__ = os.path.join(self.__m_HDFS_WebFSDir__, p_szPath) self.__m_HDFS_Handler__ = InsecureClient(url=self.__m_HDFS_WebFSURL__, user=self.__m_HDFS_User__, root=self.__m_HDFS_WebFSDir__) # 尝试创建目录,如果目录不存在的话 self.__m_HDFS_Handler__.makedirs( self.__m_HDFS_WebFSDir__.replace('\\', '/')) def HDFS_status(self, hdfs_path=""): """ 返回目录下的文件 """ if self.__m_HDFS_Handler__ is None: raise HDFSWrapperException( "HDFS not connected. Please connect it frist.") m_ReturnList = [] m_Status = self.__m_HDFS_Handler__.status(hdfs_path) m_ReturnList.append((hdfs_path, m_Status)) return m_ReturnList def HDFS_list(self, hdfs_path="", recusive=False): """ 返回目录下的文件 """ if self.__m_HDFS_Handler__ is None: raise HDFSWrapperException( "HDFS not connected. Please connect it frist.") m_ReturnList = [] if not recusive: for row in self.__m_HDFS_Handler__.list(hdfs_path, status=True): m_ReturnList.append((os.path.join(hdfs_path, row[0]), row[1])) return m_ReturnList else: for row in self.__m_HDFS_Handler__.list(hdfs_path, status=True): if row[1]['type'].upper() == 'DIRECTORY': m_ReturnList.append( (os.path.join(hdfs_path, row[0]).replace("\\", "/"), row[1])) m_ReturnList.extend( self.HDFS_list(os.path.join(hdfs_path, row[0]).replace("\\", "/"), recusive=True)) else: m_ReturnList.append( (os.path.join(hdfs_path, row[0]).replace("\\", "/"), row[1])) return m_ReturnList def HDFS_Download(self, hdfs_path="", local_path="", recusive=False): """ 从hdfs获取文件到本地 """ if self.__m_HDFS_Handler__ is None: raise HDFSWrapperException( "HDFS not connected. Please connect it frist.") # 如果本地没有对应目录,且local_path传递的是一个目录,则建立目录 m_LocalPath = local_path if m_LocalPath.endswith("/") and not os.path.exists(m_LocalPath): os.makedirs(m_LocalPath) m_FileList = self.HDFS_list(recusive=recusive) for row in m_FileList: if fnmatch.fnmatch(row[0], hdfs_path): self.__m_HDFS_Handler__.download(row[0], m_LocalPath, overwrite=True) def HDFS_Upload(self, local_path, hdfs_path=""): """ 上传文件到hdfs """ if self.__m_HDFS_Handler__ is None: raise HDFSWrapperException( "HDFS not connected. Please connect it frist.") for file in glob(local_path): if hdfs_path == "": m_hdfs_filepath = "" m_hdfs_filename = os.path.basename(file) else: if hdfs_path.endswith("/"): m_hdfs_filepath = hdfs_path m_hdfs_filename = os.path.basename(file) else: m_hdfs_filepath = os.path.dirname(hdfs_path) m_hdfs_filename = os.path.basename(hdfs_path) try: remote_status = self.__m_HDFS_Handler__.status( hdfs_path=os.path.join(self.__m_HDFS_WebFSDir__, m_hdfs_filepath).replace('\\', '/'), strict=True) if remote_status['type'] == "FILE": # 远程以为是目录的地方其实放了一个奇怪的文件,于是删掉它 self.__m_HDFS_Handler__.delete(os.path.join( self.__m_HDFS_WebFSDir__, m_hdfs_filepath).replace('\\', '/'), recursive=True) remote_status = self.__m_HDFS_Handler__.status( os.path.join(self.__m_HDFS_WebFSDir__, m_hdfs_filepath, m_hdfs_filename).replace('\\', '/')) if remote_status['type'] == "DIRECTORY": # 远程目录已经存在, 会尝试删除这个目录 self.__m_HDFS_Handler__.delete(os.path.join( self.__m_HDFS_WebFSDir__, m_hdfs_filepath, m_hdfs_filename).replace('\\', '/'), recursive=True) except HdfsError: # 远程目录不存在,后续的upload会建立该目录 pass self.__m_HDFS_Handler__.upload(os.path.join( self.__m_HDFS_WebFSDir__, m_hdfs_filepath, m_hdfs_filename).replace('\\', '/'), file, overwrite=True, cleanup=True) def Process_SQLCommand(self, p_szSQL): try: m_szSQL = p_szSQL.strip() matchObj = re.match(r"hdfs\s+connect\s+(.*)\s+with\s+user\s+(.*)$", m_szSQL, re.IGNORECASE | re.DOTALL) if matchObj: m_HDFSServer = str(matchObj.group(1)).strip() m_HDFSUser = str(matchObj.group(2)).strip() self.HDFS_Connect(m_HDFSServer, m_HDFSUser) return None, None, None, None, "Hdfs Server set successful." matchObj = re.match(r"hdfs\s+cd\s+(.*)$", m_szSQL, re.IGNORECASE | re.DOTALL) if matchObj: m_HDFSPath = str(matchObj.group(1)).strip() self.HDFS_CD(m_HDFSPath) return None, None, None, None, "Hdfs root dir change successful." matchObj = re.match(r"hdfs\s+status\s+(.*)$", m_szSQL, re.IGNORECASE | re.DOTALL) if matchObj: m_TargetFileList = str(matchObj.group(1)).strip() m_ReturnFileList = self.HDFS_status(m_TargetFileList) m_Result = [] for (m_FileName, m_FileProperties) in m_ReturnFileList: if m_FileProperties["type"] == "FILE": m_PermissionMask = "-" elif m_FileProperties["type"] == "DIRECTORY": m_PermissionMask = "d" else: m_PermissionMask = "?" if len(m_FileProperties["permission"]) == 3: for m_nPos in range(0, 3): if m_FileProperties["permission"][m_nPos] == "0": m_PermissionMask = m_PermissionMask + "---" elif m_FileProperties["permission"][m_nPos] == "1": m_PermissionMask = m_PermissionMask + "--x" elif m_FileProperties["permission"][m_nPos] == "2": m_PermissionMask = m_PermissionMask + "-w-" elif m_FileProperties["permission"][m_nPos] == "3": m_PermissionMask = m_PermissionMask + "-wx" elif m_FileProperties["permission"][m_nPos] == "4": m_PermissionMask = m_PermissionMask + "r--" elif m_FileProperties["permission"][m_nPos] == "5": m_PermissionMask = m_PermissionMask + "r-x" elif m_FileProperties["permission"][m_nPos] == "6": m_PermissionMask = m_PermissionMask + "rw-" elif m_FileProperties["permission"][m_nPos] == "7": m_PermissionMask = m_PermissionMask + "rwx" else: m_PermissionMask = m_PermissionMask + "???" else: m_PermissionMask = m_PermissionMask + "?????????" m_ModifiedTime = str( datetime.datetime.utcfromtimestamp( m_FileProperties["modificationTime"] / 1000).strftime("%Y-%m-%d %H:%M:%S")) m_Result.append([ m_TargetFileList, m_PermissionMask, m_FileProperties["owner"], m_FileProperties["group"], m_FileProperties["length"], m_ModifiedTime ]) return "HDFS file status:", m_Result, ["Path", "Permission", "owner", "group", "Size", "Modified"], \ None, "Total " + str(len(m_Result)) + " files listed." matchObj = re.match(r"hdfs\s+rm\s+(.*)$", m_szSQL, re.IGNORECASE | re.DOTALL) if matchObj: if matchObj: m_Bak_WebFSDir = self.__m_HDFS_WebFSDir__ m_FileDeleted = str(matchObj.group(1)).strip() m_FileDeletedPath = os.path.dirname(m_FileDeleted) m_FileDeletedName = os.path.basename(m_FileDeleted) self.HDFS_CD(m_FileDeletedPath) m_FileList = self.HDFS_list(self.__m_HDFS_WebFSDir__, recusive=False) for row in m_FileList: if fnmatch.fnmatch(os.path.basename(row[0]), m_FileDeletedName): self.__m_HDFS_Handler__.delete(row[0], recursive=True) # 重新返回原目录 self.HDFS_CD(m_Bak_WebFSDir) return None, None, None, None, "Hdfs file deleted successful." matchObj = re.match(r"hdfs\s+makedirs\s+(.*)$", m_szSQL, re.IGNORECASE | re.DOTALL) if matchObj: m_Dir = str(matchObj.group(1)).strip() self.HDFS_makedirs(m_Dir) return None, None, None, None, "Hdfs directory created successful." matchObj = re.match(r"hdfs\s+set_permission\s+(.*)\s+(.*)$", m_szSQL, re.IGNORECASE | re.DOTALL) if matchObj: m_File = str(matchObj.group(1)).strip() m_FilePermission = str(matchObj.group(2)).strip() self.HDFS_setPermission(m_File, m_FilePermission) return None, None, None, None, "Hdfs set permission successful." m_FileUpload = "" m_TargetDir = None matchObj = re.match(r"hdfs\s+upload\s+(.*)$", m_szSQL, re.IGNORECASE | re.DOTALL) if matchObj: m_FileUpload = str(matchObj.group(1)).strip() m_TargetDir = "" matchObj = re.match(r"hdfs\s+upload\s+(.*)\s+(.*)$", m_szSQL, re.IGNORECASE | re.DOTALL) if matchObj: m_FileUpload = str(matchObj.group(1)).strip() m_TargetDir = str(matchObj.group(2)).strip() if m_TargetDir is not None: self.HDFS_Upload(m_FileUpload, m_TargetDir) return None, None, None, None, "Hdfs file upload successful." m_FileDownload = "" m_TargetDir = None matchObj = re.match(r"hdfs\s+download\s+(.*)$", m_szSQL, re.IGNORECASE | re.DOTALL) if matchObj: m_FileDownload = str(matchObj.group(1)).strip() m_TargetDir = "" matchObj = re.match(r"hdfs\s+download\s+(.*)\s+(.*)$", m_szSQL, re.IGNORECASE | re.DOTALL) if matchObj: m_FileDownload = str(matchObj.group(1)).strip() m_TargetDir = str(matchObj.group(2)).strip() if m_TargetDir is not None: self.HDFS_Download(m_FileDownload, m_TargetDir) return None, None, None, None, "Hdfs file download successful." m_TargetFileList = None matchObj = re.match(r"hdfs\s+list(\s+)?$", m_szSQL, re.IGNORECASE | re.DOTALL) if matchObj: m_TargetFileList = "" matchObj = re.match(r"hdfs\s+list\s+(.*)?$", m_szSQL, re.IGNORECASE | re.DOTALL) if matchObj: m_TargetFileList = str(matchObj.group(1)).strip() if m_TargetFileList is not None: m_ReturnFileList = self.HDFS_list(m_TargetFileList, recusive=True) m_Result = [] for (m_FileName, m_FileProperties) in m_ReturnFileList: if m_FileProperties["type"] == "FILE": m_PermissionMask = "-" elif m_FileProperties["type"] == "DIRECTORY": m_PermissionMask = "d" else: m_PermissionMask = "?" if len(m_FileProperties["permission"]) == 3: for m_nPos in range(0, 3): if m_FileProperties["permission"][m_nPos] == "0": m_PermissionMask = m_PermissionMask + "---" elif m_FileProperties["permission"][m_nPos] == "1": m_PermissionMask = m_PermissionMask + "--x" elif m_FileProperties["permission"][m_nPos] == "2": m_PermissionMask = m_PermissionMask + "-w-" elif m_FileProperties["permission"][m_nPos] == "3": m_PermissionMask = m_PermissionMask + "-wx" elif m_FileProperties["permission"][m_nPos] == "4": m_PermissionMask = m_PermissionMask + "r--" elif m_FileProperties["permission"][m_nPos] == "5": m_PermissionMask = m_PermissionMask + "r-x" elif m_FileProperties["permission"][m_nPos] == "6": m_PermissionMask = m_PermissionMask + "rw-" elif m_FileProperties["permission"][m_nPos] == "7": m_PermissionMask = m_PermissionMask + "rwx" else: m_PermissionMask = m_PermissionMask + "???" else: m_PermissionMask = m_PermissionMask + "?????????" m_ModifiedTime = str( datetime.datetime.utcfromtimestamp( m_FileProperties["modificationTime"] / 1000).strftime("%Y-%m-%d %H:%M:%S")) m_Result.append([ m_FileProperties["pathSuffix"], m_PermissionMask, m_FileProperties["owner"], m_FileProperties["group"], m_FileProperties["length"], m_ModifiedTime ]) return "HDFS file List:", m_Result, ["Path", "Permission", "owner", "group", "Size", "Modified"], \ None, "Total " + str(len(m_Result)) + " files listed." return None, None, None, None, "Unknown HDFS Command." except (HDFSWrapperException, HdfsError) as he: if "SQLCLI_DEBUG" in os.environ: print('traceback.print_exc():\n%s' % traceback.print_exc()) print('traceback.format_exc():\n%s' % traceback.format_exc()) raise SQLCliException(he.message)
import pandas as pd #%% hdfs_client = InsecureClient('http://10.10.250.10:50070', timeout=1) hdfs_client #%% from datetime import date # hdfs_path = '/projects/projectfinder/raw/items/' +\ # date.today().year.__str__() + '/' +\ # date.today().month.__str__() + '/' #%% hdfs_path = '/projects/projectfinder/raw/items/2019' #%% hdfs_client.download(hdfs_path, 'hdfs_data', n_threads=5) #%% hdfs_client_status = hdfs_client.status('/', strict=True) hdfs_client_status #%% hdfs_file_status = hdfs_client.list(hdfs_path) hdfs_file_status #%% [markdown] # Go to [manuel](https://hdfscli.readthedocs.io/en/latest/advanced.html#path-expansion) # ```bash
class HDFSStorage(Storage): """ HDFS storage """ def fix_slashes(self, path): sep = os.path.sep if path[0] != sep: path = sep + path if path[-1] != sep: path = path + sep return path def __init__(self, location=None, base_url=None): self.hdfs_hosts = settings.HDFS_STORAGE['hosts'] self.hdfs_root = self.fix_slashes(settings.HDFS_STORAGE['root']) self.media_root = settings.MEDIA_ROOT self.media_url = self.fix_slashes(settings.MEDIA_URL) self.fetch_url = '%s/webhdfs/v1%s%%s?op=OPEN' % (self.hdfs_hosts.split(',')[0], self.hdfs_root) self.client = InsecureClient(self.hdfs_hosts) def _open(self, name, mode='rb'): local_path = os.path.join(settings.MEDIA_ROOT, name.replace('/', os.path.sep)) if not os.path.exists(local_path): remote_path = self.path(name) local_dir = os.path.dirname(local_path) if not os.path.exists(local_dir): os.mkdir(local_dir) print self.client.download(remote_path, local_path=local_path, overwrite=True, temp_dir=tempfile.gettempdir()) return File(open(local_path, mode)) def _save(self, name, content): print "_save(%s, %s, %s)" % (self, name, content) local_path = content.name hdfs_path = self.path(name) # os.path.basename(local_path)) print hdfs_path, local_path self.client.write(hdfs_path, data=content, overwrite=True) return name def url(self, name): return self.fetch_url % name def delete(self, name): return self.client.delete(self.path(name)) def listdir(self, path): file_list = [] dir_list = [] for name, status in self.client.list(self.path(path), status=True): if status['type'] == 'DIRECTORY': dir_list.append(name) elif status['type'] == 'FILE': file_list.append(name) return dir_list, file_list def size(self, name): return self.client.status(self.path(name))['length'] def exists(self, name): try: return True if self.client.status(self.path(name)) else False except HdfsError: return False def path(self, name): return (self.hdfs_root + name).replace('\\', '/')
def test_file(self): client_hdfs = InsecureClient('http://127.0.0.1' + ':50070') self.assertTrue(client_hdfs.download("/user/maria_dev/dataset1", ""))
class HDFSStorage(Storage): """ HDFS storage """ def fix_slashes(self, path): sep = os.path.sep if path[0] != sep: path = sep + path if path[-1] != sep: path = path + sep return path def __init__(self, location=None, base_url=None): self.hdfs_hosts = settings.HDFS_STORAGE['hosts'] self.hdfs_root = self.fix_slashes(settings.HDFS_STORAGE['root']) self.media_root = settings.MEDIA_ROOT self.media_url = self.fix_slashes(settings.MEDIA_URL) self.fetch_url = '%s/webhdfs/v1%s%%s?op=OPEN' % (self.hdfs_hosts.split(',')[0], self.hdfs_root) self.client = InsecureClient(self.hdfs_hosts) def _open(self, name, mode='rb'): local_path = os.path.join(settings.MEDIA_ROOT, name.replace('/', os.path.sep)) if not os.path.exists(local_path): remote_path = self.path(name) local_dir = os.path.dirname(local_path) if not os.path.exists(local_dir): os.makedirs(local_dir) print(self.client.download(remote_path, local_path=local_path, overwrite=True, temp_dir=tempfile.gettempdir())) return File(open(local_path, mode)) def _save(self, name, content): print("_save(%s, %s, %s)" % (self, name, content)) local_path = content.name hdfs_path = self.path(name) # os.path.basename(local_path)) print(hdfs_path, local_path) self.client.write(hdfs_path, data=content, overwrite=True) return name def url(self, name): return self.fetch_url % name def delete(self, name): return self.client.delete(self.path(name)) def listdir(self, path): file_list = [] dir_list = [] for name, status in self.client.list(self.path(path), status=True): if status['type'] == 'DIRECTORY': dir_list.append(name) elif status['type'] == 'FILE': file_list.append(name) return dir_list, file_list def size(self, name): return self.client.status(self.path(name))['length'] def exists(self, name): try: return True if self.client.status(self.path(name)) else False except HdfsError: return False def path(self, name): return (self.hdfs_root + name).replace('\\', '/')
class HdfsClient: def __init__(self, namenode_host): self._client = InsecureClient(f'http://{namenode_host}:9870') def download(self, remote_hdfs_path, local_path): self._client.download(remote_hdfs_path, local_path, overwrite=True)
def config(): json_available = False # Directly pretrained if request.json \ and 'pretrained' in request.json \ and 'folder_path' in request.json \ and 'model_name' in request.json \ and 'hdfs_uri' in request.json: # Download files from HDFS model_name = request.json['model_name'] hdfs_uri = request.json['hdfs_uri'] client_hdfs = InsecureClient(hdfs_uri) handler_file = request.json['folder_path'] + "/handler.py" client_hdfs.download(handler_file, "./handler.py", overwrite=True) logger.info('Download files: OK!!') # Make model archives file_params = open('/' + 'params.pt', 'w') file_params.close() file_model = open('/' + 'model.py', 'w') file_model.close() response = os.popen( """torch-model-archiver --model-name %s --version 1.0 --model-file ./model.py --serialized-file ./params.pt --handler ./handler.py && mv %s.mar /home/model-server/model-store/""" % (model_name, model_name)).read().strip() # Install library dependencies json_available = True logger.info('Uploaded model: %s' % model_name) # Fine-tuned else: # Read file paths from Json input # Input as path of folder if request.json \ and 'folder_path' in request.json \ and 'model_name' in request.json \ and 'hdfs_uri' in request.json: handler_file = request.json['folder_path'] + "/handler.py" model_file = request.json['folder_path'] + "/model.py" params_file = request.json['folder_path'] + "/params.pt" model_name = request.json['model_name'] hdfs_uri = request.json['hdfs_uri'] # Input as path of files elif request.json \ and 'hdfs_uri' in request.json \ and 'handler' in request.json \ and 'params' in request.json \ and 'model' in request.json \ and 'model_name' in request.json: handler_file = request.json['handler'] model_file = request.json['model'] params_file = request.json['params'] model_name = request.json['model_name'] hdfs_uri = request.json['hdfs_uri'] logger.info('Read json configurations: OK!!') # Download files from HDFS client_hdfs = InsecureClient(hdfs_uri) client_hdfs.download(handler_file, "./handler.py", overwrite=True) client_hdfs.download(model_file, "./model.py", overwrite=True) client_hdfs.download(params_file, "./params.pt", overwrite=True) logger.info('Download files: OK!!') # Make model archives response = os.popen( """torch-model-archiver --model-name %s --version 1.0 --model-file ./model.py --serialized-file ./params.pt --handler ./handler.py && mv %s.mar /home/model-server/model-store/""" % (model_name, model_name)).read().strip() current_app.hdfs_uri = hdfs_uri current_app.configured = True json_available = True logger.info('Uploaded model: %s' % model_name) if not (json_available): abort(400) return jsonify({'response': response}), 201
def handleHdfsDownload(hdfs_path, local_path): client = InsecureClient("http://hdfs.neurolearn.com:50070", user="******") client.download(hdfs_path, local_path, overwrite=True) print('Downloaded Images from HDFS.') return local_path
class HadoopFileSystem(object): def __init__(self, *opts): self.client = InsecureClient(current_app.config['WEBHDFS_ADDR'], user=current_app.config['WEBHDFS_USER']) # def make_tree(self, datasourceid, client, path): # tree = dict(name=(os.path.basename(path), datasourceid + os.path.sep + path), children=[]) # try: lst = client.list(path, status=True) # except: # pass #ignore errors # else: # for fsitem in lst: # fn = os.path.join(path, fsitem[0]) # if fsitem[1]['type'] == "DIRECTORY": # tree['children'].append(make_hdfs_tree(datasourceid, client, fn)) # else: # tree['children'].append({'name' : (fsitem[0], datasourceid + os.path.sep + fn), 'children' : []}) # return tree def make_json(self, datasourceid, base, relative_path): path = os.path.join(base, relative_path) data_json = {'datasource': datasourceid, 'path': relative_path, 'name': os.path.basename(relative_path) } status = self.client.status(path, False) if status is not None: if status['type'] == "DIRECTORY": data_json['type'] = DataType.Folder data_json['children'] = [self.make_json(datasourceid, base, os.path.join(relative_path, fn)) for fn in self.client.list(path)] else: data_json['type'] = DataType.File #print(json.dumps(data_json)) return data_json def makedirs(self, path): try: self.client.makedirs(path) except: return None return path def delete(self, path): try: if self.client.status(path, False) is not None: self.client.delete(path, True) except Exception as e: print(e) def addfolder(self, path): i = 0 while self.client.status(os.path.join(path, "New Folder ({0})".format(i)), False) is None: i += 1 return self.makedirs(os.path.join(path, "New Folder ({0})".format(i))) def rename(self, oldpath, newpath): try: self.client.rename(oldpath, newpath) except Exception as e: print(e) def saveUpload(self, file, fullpath): localpath = os.path.join(tempfile.gettempdir(), os.path.basename(fullpath)) if os.path.isfile(localpath): os.remove(localpath) try: file.save(localpath) self.client.upload(os.path.dirname(fullpath), localpath, True) except: pass def download(self, fullpath): status = self.client.status(fullpath, False) if status is not None and status['type'] == "FILE": localpath = os.path.join(tempfile.gettempdir(), os.path.basename(fullpath)) return self.client.download(fullpath, localpath, True) else: return None
# Посмотрим, что у нас есть в рабочей директории print(client.list('/student9_7')) ''' ['cur_readme', 'googlobots.txt', 'py_dir_02', 'readme', 'test', 'test2', 'testdir'] ''' # Посмотрим размер нашей рабочей директории print(client.content('/student9_7')) ''' {'directoryCount': 3, 'fileCount': 5, 'length': 10552, 'quota': -1, 'spaceConsumed': 31637, 'spaceQuota': -1} ''' # Прочитаем файл `test` with client.read('/student9_7/test') as reader: test = reader.read() print(test) ''' b'test file for hdfs\n' ''' # Скопируем файл `test` из хранилища в локальную домашнюю директорию под именем `downloaded_file_via_py3` client.download('/student9_7/test', 'downloaded_file_via_py3', n_threads=5) ''' '/home/student9_7/downloaded_file_via_py3' '''