def gen_local_file_path(folder: str): global roots, dirs for roots, dirs, files in os.walk(top=folder): for _file in files: local_file_path = os.path.join(roots, _file) _f = FilePathParser(full_path_file_string=local_file_path) remote_file_path = _f.translate_to_linux_file() yield local_file_path, remote_file_path, _f.source_name if len(dirs) > 0: for _dir in dirs: local_folder_path = os.path.join(roots, _dir) gen_local_file_path(folder=local_folder_path)
def simple_upload_from_local(self, local_file_path: str, remote_file_path: str = None, excepted_name: str = None) -> dict: """ :param local_file_path: :param remote_file_path: :param excepted_name: :return: """ lfpp = FilePathParser(full_path_file_string=local_file_path) assert_state( lfpp.is_blank, "SimpleDBXServiceAPI#simple_upload_from_local , local_file_path is blank !" ) assert_state( lfpp.is_not_exist, "SimpleDBXServiceAPI#simple_upload_from_local ,local_file_path is not exist !" ) remote_file_path = remote_file_path or "DEFAULT" return self.upload(local_file_path=local_file_path, remote_file_path=remote_file_path, excepted_name=excepted_name).to_dict()
def download_from_dropbox( self, remote_file_path: str) -> Tuple[SimpleFileMetadata, bytes]: """ download from dropbox :param remote_file_path: remote file path in dropbox :return: Tuple[SimpleFileMetadata, bytes] """ if is_blank(remote_file_path): raise DropboxAPIException( message="#download_for_bytes , remote file path is blank !") rfpp = FilePathParser(full_path_file_string=remote_file_path) if not rfpp.source_path.startswith(DROPBOX_FILE_SEP): raise DropboxAPIException( message= "#download_from_dropbox , remote file path format error!") if rfpp.source_name.endswith(DROPBOX_FILE_SEP): raise DropboxAPIException( message= "#download_from_dropbox , source file name format error !") if self.dbxa is None: self.dbx() metadata, bytes_array = self.dbxa.files_download(remote_file_path) return SimpleFileMetadata(metadata), bytes_array
def test_fileparser(self): file_name_path = "/foo/bar/1.jpg" a = FilePathParser(full_path_file_string=file_name_path) assert a.source_name == '1.jpg' assert a.source_path == '/foo/bar/' assert a.source_suffix == 'jpg' assert a.source_mime == MIME_DICT.get(a.source_suffix) file_name_path = "/foo/bar/1" a = FilePathParser(full_path_file_string=file_name_path) assert a.source_name == '1' assert a.source_path == '/foo/bar/' assert a.source_suffix is None print("======") path, name = separate_path_and_name(file_name_path) print(path, name) path2, name2 = a.source_path_and_name print(path2, name2)
def separate_path_and_name(file_path: str): # if is_blank(file_path): # return None, None # fps = file_path.split(FILE_SEP) # f_name = fps[-1] # f_path = '/' # for it in fps[0:-1]: # f_path = os.path.join(f_path, it) # f_path = f_path + "/" # return f_path, f_name return FilePathParser(full_path_file_string=file_path).source_path_and_name
def upload_folder(self, local_file_folder: str, remote_file_folder: str = None): lfpp = FilePathParser(full_path_file_string=local_file_folder) if is_blank(lfpp.source_path): raise DropboxAPIException( message="#upload_folder , local file folder is blank !") if not os.path.exists(lfpp.source_path): raise DropboxAPIException( message="#upload_folder , local file folder is not exist !") if not os.path.isdir(lfpp.source_path): raise DropboxAPIException( message="#upload_folder , local file folder is not folder !") if is_not_blank(remote_file_folder): remote_file_folder = DROPBOX_FILE_SEP + remote_file_folder if not remote_file_folder.startswith( DROPBOX_FILE_SEP) else remote_file_folder remote_file_folder = remote_file_folder + DROPBOX_FILE_SEP if not remote_file_folder.endswith( DROPBOX_FILE_SEP) else remote_file_folder def gen_local_file_path(folder: str): global roots, dirs for roots, dirs, files in os.walk(top=folder): for _file in files: local_file_path = os.path.join(roots, _file) _f = FilePathParser(full_path_file_string=local_file_path) remote_file_path = _f.translate_to_linux_file() yield local_file_path, remote_file_path, _f.source_name if len(dirs) > 0: for _dir in dirs: local_folder_path = os.path.join(roots, _dir) gen_local_file_path(folder=local_folder_path) for glfp in gen_local_file_path(folder=local_file_folder): _local_file_path = glfp[0] _remote_file_path = glfp[1] _source_file_name = glfp[2] if is_not_blank(remote_file_folder): _remote_file_path = os.path.join(remote_file_folder, _source_file_name) if is_debug(): logger.debug("%s ==> %s " % (_local_file_path, _remote_file_path)) # md = self.upload(local_file_path=_local_file_path, remote_file_path=_remote_file_path) # print(md) future = upload_file_pool.submit( fn=self.upload, local_file_path=_local_file_path, remote_file_path=_remote_file_path) pass
def __init__(self, url, local_file_path, **kwargs): self.url = url self.local_file_path = local_file_path or os.getcwd() self.thread_count = kwargs.get("thread_count") or 1 self.bts = [] _lfpp = FilePathParser(full_path_file_string=self.local_file_path) if _lfpp.is_blank: raise PyForifyDownloaderException( message="local file path is blank !") # TODO check lfpp _urlpp = UrlPathParser(full_path_file_string=self.url) if _urlpp.is_blank: raise PyForifyDownloaderException(message="url is blank !") if _urlpp.is_not_http: raise PyForifyDownloaderException( message="url protocol not support http !") self.file = open(file=self.local_file_path, mode='wb')
def test_files_generator(self): for i in FilePathParser.files_generator(file_path="D:\迅雷下载"): print(i)
def test_translate_windows_file_to_linux(self): path = "C:\\foo\\bar\\cat.jpg" a = FilePathParser(full_path_file_string=path) b = a.translate_to_linux_file() print(b)
def download_as_file( self, remote_file_path: str, local_file_path: str, excepted_name: str, ) -> SimpleFileMetadata: """ download as local file # if `remote_file_path` not start with `/` , and combine `/` to head; # sample as `foo/bar.jpg` to `/foo/bar.jpg` # if source name not in remote file path , throw `DropboxAPIException` # if source name not in local file path , first fetch excepted name , second fetch local source name # sample as remote_file_path = "/foo/bar.jpg" local_path_path="/local/ => "/local/bar.jpg" # if excepted name exist , replace source name to excepted name. # sample as excepted_name = "cat.jpg" , local_path_file = "/local/bar.jpg" => "/local/cat.jpg" excepted_name = "/animal/cat.jpg" , local_path_file = "/local/bar.jpg" => "/local/cat.jpg" usage: >>> self.download_as_file(remote_file_path="foo/bar.jpg",local_file_path="/local/bar.jpg",excepted_name="cat.jpg") >>> self.download_as_file(remote_file_path="/foo/bar.jpg",local_file_path="/local/bar.jpg",excepted_name="cat.jpg") >>> self.download_as_file(remote_file_path="/foo/bar.jpg",local_file_path="/local/bar.jpg",excepted_name="cat.jpg") :param remote_file_path: remote file path and just support single file :param local_file_path: local file path which download :param excepted_name: excepted name just excepted name which want to rename :return: """ if is_blank(remote_file_path): raise DropboxAPIException( message="#download_as_file, remote file is blank !") if remote_file_path.endswith(DROPBOX_FILE_SEP): raise DropboxAPIException( message="#download_as_file, remote file path format error !") # if `remote_file_path` not start with `/` , and combine `/` to head; # transform `foo/bar.jpg` to `/foo/bar.jpg` remote_file_path = remote_file_path if remote_file_path.startswith( DROPBOX_FILE_SEP) else DROPBOX_FILE_SEP + remote_file_path rfpp = FilePathParser(full_path_file_string=remote_file_path) lfpp = FilePathParser(full_path_file_string=local_file_path) # if source name not in remote file path , throw `DropboxAPIException` if is_not_blank(rfpp.source_name): raise DropboxAPIException( message= "#download_as_file , remote file path source name is blank !") if lfpp.is_blank: raise DropboxAPIException( message="#download_as_file , local file path is blank !") # fix if local folder is not exist , or create it if not os.path.exists(lfpp.source_path): os.mkdir(lfpp.source_path) # if source name not in local file path , throw `DropboxAPIException` # if is_blank(lfpp.source_name): # raise DropboxAPIException(message="#download_as_file , local file path source name is blank !") # rename local file path . if excepted name exist , replace source name to excepted name. # excepted_name = "cat.jpg" , local_path_file = "/foo/bar.jgp" => "foo/cat.jpg" if is_not_blank(excepted_name): efpp = FilePathParser(full_path_file_string=excepted_name) if is_not_blank(efpp.source_name): local_file_path = lfpp.set_source_name( excepted_source_name=excepted_name) elif is_blank(lfpp.source_name): local_file_path = lfpp.set_source_name( excepted_source_name=rfpp.source_name) simple_file_metadata, bytes_array = self.async_download_from_dropbox( remote_file_path=remote_file_path) buffer = io.StringIO() buffer.write(bytes_array) # local file write with open_file(file_name=local_file_path, mode="wb") as lwf: lwf.write(buffer.getvalue()) if not buffer.closed(): buffer.close() return simple_file_metadata
def upload_from_external_url(self, external_url: str, remote_file_path: str = None, remote_folder_path: str = None, excepted_name: str = None, **kwargs) -> SimpleFileMetadata: """ upload file which from external url in case of all parameters is not none `excepted_name` first priority `remote_file_path` second priority `remote_folder` third priority :param external_url: url from external source :param remote_file_path remote file path :param remote_folder_path remote folder_path :param excepted_name just only excepted name , not with dir path :param kwargs: requests parameters :return: """ if is_blank(remote_file_path): if is_blank(remote_folder_path): raise DropboxAPIException( "#upload_from_external_url, remote_file_path is blank and remote_folder_path " "is also blank !") if not remote_folder_path.startswith(DROPBOX_FILE_SEP): remote_folder_path = DROPBOX_FILE_SEP + remote_folder_path if not remote_folder_path.endswith(DROPBOX_FILE_SEP): remote_folder_path = remote_folder_path + DROPBOX_FILE_SEP remote_file_path = remote_folder_path if not remote_file_path.startswith(DROPBOX_FILE_SEP): remote_file_path = DROPBOX_FILE_SEP + remote_file_path euup = UrlPathParser(full_path_file_string=external_url) if euup.is_blank: raise DropboxAPIException( "#upload_from_external , upload external url is blank!") if euup.is_not_http: raise DropboxAPIException( "#upload_from_external , upload external url is unknown protocol!" ) rfpp = FilePathParser(full_path_file_string=remote_file_path) if is_not_blank(excepted_name): if DROPBOX_FILE_SEP in excepted_name: excepted_name = DROPBOX_FILE_SEP + excepted_name \ if not excepted_name.startswith(DROPBOX_FILE_SEP) else excepted_name remote_file_path = excepted_name else: remote_file_path = rfpp.set_source_name( excepted_name.split(DROPBOX_FILE_SEP)[-1]) else: remote_file_source = rfpp.source_name # case one: # remote_file_path /DEFAULT/A/ # local_file_path /foo/bar.jpg # auto set /DEFAULT/A/bar.jpg if is_blank(remote_file_source): remote_file_path = os.path.join(remote_file_path, euup.source_name) # case two: # remote_file_path /DEFAULT/A/bar # local_file_path /foo/bar.jpg # auto set /DEFAULT/A/bar.jpg elif not remote_file_source.__contains__( DROPBOX_FILE_DOT) and is_not_blank(euup.source_suffix): remote_file_path = remote_file_path + DROPBOX_FILE_DOT + euup.source_suffix # case three: # remote_file_path /DEFAULT/A/bar # local_file_path /foo/bar # auto set /DEFAULT/A/bar if is_debug(): logger.debug( "#upload_from_external_url , remote_file_path=%s, external_url=%s" % (remote_file_path, external_url)) async def _arequest_external_url(): async def _inner_request(): return requests.get(url=external_url, **kwargs, stream=True) _res = await _inner_request() return _res res = self.loop.run_until_complete(_arequest_external_url()) # Tips: Anti spider policy , 可以把status code 设置非200来迷惑爬虫 if res.status_code != 200: error_msg = "#upload_from_external_url , fail to request , url=%s , msg=%s", ( external_url, res.txt) logger.error(error_msg) raise DropboxAPIException(message=error_msg) # request success content_type = res.headers.get("Content-Type") if is_debug(): logger.info("#upload_from_external_url request <%s> success!" % external_url) if DROPBOX_FILE_DOT not in remote_file_path.split( DROPBOX_FILE_SEP)[-1]: # get file type (suffix) file_suffix = get_suffix(mime=content_type) if file_suffix is not None: remote_file_path = remote_file_path + DROPBOX_FILE_DOT + file_suffix buffer = io.BytesIO() for chunk in res.iter_content(chunk_size=20 * 1024): if chunk: buffer.write(chunk) else: break if is_debug(): logger.debug("#upload_from_external_url , remote_file_path=%s" % remote_file_path) md = self.async_upload_bytes(file_bytes=buffer.getvalue(), remote_file_path=remote_file_path) if not buffer.closed: buffer.close() return md
def upload_from_local(self, local_file_path: str, remote_file_path: str = None, remote_folder_path: str = None, excepted_name: str = None) -> SimpleFileMetadata: """ upload to dropbox with excepted name `excepted_name` first priority `remote_file_path` second priority `remote_folder` third priority :param local_file_path: file path in local :param remote_file_path: file path in dropbox :param remote_folder_path folder in dropbox :param excepted_name: excepted name which want to rename sample as >>> self.download_as_file(local_file_path='/foo/bar.jpg',remote_file_path='/DEFAULT/cat.jpg') # `/DEFAULT/cat.jpg` >>> self.download_as_file(local_file_path='/foo/bar.jpg',remote_file_path='DEFAULT/cat.jpg') # `/DEFAULT/cat.jpg` >>> self.download_as_file(local_file_path='/foo/bar.jpg',remote_file_path='DEFAULT/cat.jpg', >>> excepted_name='dog.jpg') # `/DEFAULT/dog.jpg` >>> self.download_as_file(local_file_path='/foo/bar.jpg',remote_file_path='DEFAULT/cat.jpg', >>> excepted_name='EXCEPTED_DIR/dog.jpg') # `/EXCEPTED_DIR/dog.jpg` >>> self.download_as_file(local_file_path='/foo/bar.jpg',remote_folder_path='REMOTE_FOLDER', >>> excepted_name='dog.jpg') # `/REMOTE_FOLDER/dog.jpg` :return: """ global metadata if is_blank(remote_file_path): if is_blank(remote_folder_path): raise DropboxAPIException( "#upload_from_local, remote_file_path is blank and remote_folder_path " "is also blank !") if not remote_folder_path.startswith(DROPBOX_FILE_SEP): remote_folder_path = DROPBOX_FILE_SEP + remote_folder_path if not remote_folder_path.endswith(DROPBOX_FILE_SEP): remote_folder_path = remote_folder_path + DROPBOX_FILE_SEP remote_file_path = remote_folder_path if not remote_file_path.startswith(DROPBOX_FILE_SEP): remote_file_path = DROPBOX_FILE_SEP + remote_file_path lfpp = FilePathParser(full_path_file_string=local_file_path) if lfpp.is_blank: raise DropboxAPIException( "#upload_from_local, local file path is blank !") if lfpp.is_not_exist: raise DropboxAPIException( "#upload_from_local, local file path is not exist !") if lfpp.is_not_file: raise DropboxAPIException( "#upload_from_local, local file path is not a file !") rfpp = FilePathParser(full_path_file_string=remote_file_path) if is_not_blank(excepted_name): if DROPBOX_FILE_SEP in excepted_name: remote_file_path = DROPBOX_FILE_SEP + excepted_name if not excepted_name.startswith( DROPBOX_FILE_SEP) else excepted_name else: remote_file_path = rfpp.set_source_name( excepted_name.split(DROPBOX_FILE_SEP)[-1]) else: remote_file_source = rfpp.source_name # case one: # remote_file_path /DEFAULT/A/ # local_file_path /foo/bar.jpg # auto set /DEFAULT/A/bar.jpg if is_blank(remote_file_source): remote_file_path = os.path.join(remote_file_path, lfpp.source_name) # case two: # remote_file_path /DEFAULT/A/bar # local_file_path /foo/bar.jpg # auto set /DEFAULT/A/bar.jpg elif not remote_file_source.__contains__( DROPBOX_FILE_DOT) and is_not_blank(lfpp.source_suffix): remote_file_path = remote_file_path + DROPBOX_FILE_DOT + lfpp.source_suffix # case three: # remote_file_path /DEFAULT/A/bar # local_file_path /foo/bar # auto set /DEFAULT/A/bar file_size = os.path.getsize(local_file_path) chunk_size = 10 * 1024 * 1024 if file_size <= chunk_size: if is_debug(): logger.debug("> uploading tiny file") with open_file(file_name=local_file_path, mode="rb") as fr: metadata = self.async_upload_bytes( file_bytes=fr.read(), remote_file_path=remote_file_path) else: if is_debug(): logger.debug("> uploading large file") metadata = self.async_upload_with_large_file( local_file_path=local_file_path, remote_file_path=remote_file_path, chunk_size=chunk_size) if logger.level == logging.DEBUG: logger.debug("upload_from_local metadata is %s" % metadata) return metadata
def showtime(): """ 直接名称:/showtime?rfn=/DEFAULT/googlelogo_color_272x92dp.png 模糊查询名称:/showtime?rfn=google 模糊查询名称:/showtime?rfn=/DEFAULT/google :return: """ rfn = request.args.get("remote_file_name") or request.args.get("rfn") if is_blank(rfn): return flask.jsonify({ "response": "remote file name is blank in '/showtime'", "success": False }) rf_path, rf_name = separate_path_and_name(rfn) if is_blank(rf_name): return flask.jsonify({ "response": "rf name is blank in '/showtime'", "success": False }) # 优先使用本地缓存 local_cache_path = os.path.join(os.getcwd(), "DEFAULT") if not os.path.exists(local_cache_path): os.mkdir(local_cache_path) else: filter_file_list = list( filter(lambda x: str(x).__contains__(rf_name), os.listdir(local_cache_path))) if len(filter_file_list) > 0: cache_file_name = filter_file_list[0] # 缓存中目标文件路径 target_cache_file_path = os.path.join(local_cache_path, cache_file_name) if os.path.exists(target_cache_file_path) and os.path.isfile( target_cache_file_path): tfpp = FilePathParser( full_path_file_string=target_cache_file_path) file_mime = get_mime(tfpp.source_suffix) with open_file(target_cache_file_path, 'rb') as f_read: return flask.send_file(io.BytesIO(f_read.read()), attachment_filename=cache_file_name, mimetype=file_mime) # path 为空,只保留文件名,例如:googlelogo_color_272x92dp.png if is_blank(rf_path.replace("/", "")): # 默认 default 目录 rf_path = "/DEFAULT/" rsl = sda.simple_list(remote_folder_path=rf_path) if not rsl.get("success"): return flask.jsonify({ "response": rsl.get("response"), "success": False }) for item in rsl.get("response"): real_name = item.get("name") if str(real_name).__contains__(rf_name): file_suffix = real_name.split(".")[-1] file_mime = get_mime(file_suffix) content = sda.download_as_bytes( remote_file_path=item.get("path")) # cache file via coroutine local_cache_file = os.path.join(local_cache_path, real_name) cache_with_coroutine(file_path=local_cache_file, w_data=content) return flask.send_file(io.BytesIO(content), attachment_filename=real_name, mimetype=file_mime) # can not match with remote files return flask.jsonify({ "response": "rf name can not match with remote files in '/showtime'", "success": False }) # path 不为空,文件名也不为空,例如:/DEFAULT/googlelogo_color_272x92dp.png else: file_suffix = rfn.split(".")[-1] or rf_name.split(".")[-1] file_mime = get_mime(file_suffix) try: md, res = sda.download_as_bytes(remote_file_path=rfn) # cache file via coroutine local_cache_file = os.path.join(local_cache_path, rf_name) cache_with_coroutine(file_path=local_cache_file, w_data=res.content) return flask.send_file(io.BytesIO(res.content), attachment_filename=rf_name, mimetype=file_mime) except Exception as ex: rsl = sda.simple_list(remote_folder_path=rf_path) if not rsl.get("success"): return flask.jsonify({ "response": rsl.get("response"), "success": False }) for item in rsl.get("response"): real_name = item.get("name") if str(real_name).__contains__(rf_name): file_suffix = real_name.split(".")[-1] or real_name.split( ".")[-1] file_mime = get_mime(file_suffix) md, res = sda.download_as_bytes( remote_file_path=item.get("path")) # cache file via coroutine local_cache_file = os.path.join(local_cache_path, real_name) cache_with_coroutine(file_path=local_cache_file, w_data=res.content) return flask.send_file(io.BytesIO(res.content), attachment_filename=rf_name, mimetype=file_mime) # can not match with remote files return flask.jsonify({ "response": "rf name can not match with remote files in '/showtime'", "success": False })