class SeaweedStore(BaseStore): def __init__(self, data): host = data.get('host', 'localhost') port = data.get('port', 9333) self.store = WeedFS(host, port) # weed-fs master address and port self.ids = {} def create(self, key, value, mode='data'): if mode == 'file': resp = self.store.upload_file(path=value) self.ids[resp] = key return resp if mode == 'data': resp = self.store.upload_file(name=key, stream=value) self.ids[resp] = key return resp def read(self, key): # pylint: disable=unused-argument,no-self-use existed = self.store.file_exists(key) if existed: return self.store.get_file(key) def update(self, key, value, mode='data'): return self.create(key, value, mode) def delete(self, key): self.ids.pop(key, None) return self.store.delete_file(key)
def upload(self, file: Optional[TextIO] = None, external_url: Optional[str] = None, file_name: Optional[str] = None, zip_password: str = '', force_reprocess: bool = False, callback: Optional[str] = None) -> bool: """ Either file or external_url should be specified. Other parameters are optional. Returns whether the upload was successful or not. Use this method to upload files to the metadata extractor.""" successful_upload = True if file or external_url: file_name = self._get_file_name(file, file_name) upload_parameters = { 'zip_password': zip_password, 'force_reprocess': force_reprocess, 'callback': callback, 'file_name': file_name } if file: # Upload the file and send the file ID on seaweedfs seaweedfs = WeedFS(settings.SEAWEEDFS_IP, settings.SEAWEEDFS_PORT) upload_parameters['seaweedfs_file_id'] = seaweedfs.upload_file( stream=file.read(), name=file_name) else: # Send the url to download the file on the metadata extractor to avoid an overflow of the API if # lots of files are sent at the same time upload_parameters['external_url'] = external_url TaskManager().submit_url_for_metadata_extractor( **upload_parameters) else: successful_upload = False return successful_upload
def __init__(self, master_host=None, master_port=None): if master_host is None: master_host = settings.WEEDFS_MASTER_HOST self.master_host = master_host if master_port is None: master_port = settings.WEEDFS_MASTER_PORT self.master_port = master_port self.fs = WeedFS(master_host, master_port)
def upload_cover(self): dataPath = os.path.join(self.basePath, "cover") allCoverJsonPath = os.path.join(dataPath, self.allStarCoverDataInJson) cachedStarCover = {} if os.path.exists(allCoverJsonPath): with open(allCoverJsonPath) as fp: cachedStarCover = json.load(fp) print(dataPath) for sub in os.listdir(dataPath): subPath = os.path.join(dataPath, sub) if os.path.isfile(subPath): continue print(subPath) for starCoverImg in os.listdir(subPath): starCoverImgPath = os.path.join(subPath, starCoverImg) try: starId = starCoverImg.split('.')[0] if starId in cachedStarCover: continue except Exception as e: print(e) print(starCoverImgPath) continue print(starCoverImgPath) if self.image_download_helper.invalid_file_and_contine( starCoverImgPath): continue w = WeedFS("localhost", 9333) fid = w.upload_file(starCoverImgPath) img_url = w.get_file_url(fid) cachedStarCover[starId] = img_url print(cachedStarCover) with open(allCoverJsonPath, 'w') as fp: json.dump(cachedStarCover, fp)
def upload_data(self): dataPath = os.path.join(self.basePath, "data") for sub in os.listdir(dataPath): subPath = os.path.join(dataPath, sub) if os.path.isfile(subPath): continue print(subPath) for album in os.listdir(subPath): albumPath = os.path.join(subPath, album) if os.path.isfile(albumPath): continue albumInJson = album + ".json" albumInJsonPath = os.path.join(subPath, albumInJson) processedImg = {} if os.path.exists(albumInJsonPath): fp = open(albumInJsonPath) processedImg = json.load(fp) print(albumPath) for img in os.listdir(albumPath): if img in processedImg: continue imgFullPath = os.path.join(albumPath, img) print(imgFullPath) if self.image_download_helper.invalid_file_and_contine( imgFullPath): continue w = WeedFS("localhost", 9333) fid = w.upload_file(imgFullPath) img_url = w.get_file_url(fid) processedImg[img] = img_url print(processedImg) with open(albumInJsonPath, 'w') as fp: json.dump(processedImg, fp)
class WeedFSStorage(Storage): """ Weed-FS storage. Weed-FS is a simple and highly scalable distributed file system. """ def __init__(self, master_host=None, master_port=None): if master_host is None: master_host = settings.WEEDFS_MASTER_HOST self.master_host = master_host if master_port is None: master_port = settings.WEEDFS_MASTER_PORT self.master_port = master_port self.fs = WeedFS(master_host, master_port) def get_available_name(self, name, **kwargs): return os.path.basename(name) def content(self, name): return self.fs.get_file(name) def _save(self, name, content): fid = self.fs.upload_file(stream=content.file, name=name) content.close() if hasattr(content, 'temporary_file_path'): try: os.remove(content.temporary_file_path()) except FileNotFoundError: pass return '%s:%s' % (fid, name) def delete(self, name): assert name, "The name argument is not allowed to be empty." self.fs.delete_file(name) def exists(self, name): return self.fs.file_exists(name) def size(self, name): return self.fs.get_file_size(name) or 0 def url(self, name): return self.fs.get_file_url(name) def accessed_time(self, name): return DATE_IS_NOT_AVAILABLE def created_time(self, name): return DATE_IS_NOT_AVAILABLE def modified_time(self, name): return DATE_IS_NOT_AVAILABLE def deconstruct(self): return ('{}.{}'.format(self.__class__.__module__, self.__class__.__name__), [], {'master_host': self.master_host, 'master_port': self.master_port})
def GET_to_weed_hash(url: str): try: image_bytes = GET(url, 6) with MongoDBCollection("website_pron", "image_hash_pool") as coll: with BytesIO(image_bytes) as bio: hash_info = hash_algorithm(bio) find_hash_in_lib = coll.find_one({"_id": hash_info}) if find_hash_in_lib is None: weed_fs = WeedFS() weed_fid = weed_fs.upload_file(stream=image_bytes, name=url) find_hash_in_lib = { "_id": hash_info, "weed_fid": weed_fid, "file_type": re.findall("\.(\w+)$", url)[0] } coll.insert_one(find_hash_in_lib) return find_hash_in_lib except: print("Error while get+insert image from web:\n{}\n".format(url), file=sys.stderr) print(traceback.format_exc(), file=sys.stderr) return None
def insert_image_to_weed(file: str, remove_after_insert: bool = False, silence: bool = True): try: file_hash = hash_algorithm(file) file_type = re_find_tail.findall(file)[0] with MongoDBDatabase("website_pron") as mongodb: coll = mongodb.get_collection("image_hash_pool") return_data = coll.find_one({"_id": file_hash}) if return_data is None: weed_fs = WeedFS("192.168.1.103") file_id = weed_fs.upload_file(file) insert_info = { "_id": file_hash, "weed_fid": file_id, "file_type": file_type } coll.insert_one(insert_info) return insert_info else: return return_data except: print(traceback.format_exc(), file=sys.stderr) return None
22 | verify_vivo2_500enroll5 | verify_common | t | 2018-03-01 14:28:54.989257 23 | verify_vivo_tof | verify_base | t | 2018-03-01 14:28:54.989523 24 | verify_oppo3d | verify_base | t | 2018-03-01 14:28:54.989748 25 | Ocular | faceunlock | f | 2018-03-01 14:28:54.989974 26 | Ocular_base1500 | Ocular | t | 2018-03-01 14:28:54.990205 27 | Ocular_version | Ocular | t | 2018-03-01 14:28:54.99045 photos=# select * from owners; id | name | creation_date ----+------+---------------------------- 1 | test | 2018-03-01 14:28:54.981124 ''' import json import requests from pyseaweed import WeedFS owner = 1 node = 5 w = WeedFS("172.20.15.200", 9333) # weed-fs master address and port server = "http://172.20.15.200:5000/api/Photos" fid = w.upload_file("/home/andrew/obama0.png") # path to file file_url = w.get_file_url(fid) payload = {'url': file_url, 'owner': owner, 'node': node} r = requests.post(server, data=json.dumps(payload)) print(r.text)
def lookup(name): try: socket.gethostbyname(name) return name except Exception: return 'localhost' rd = redis.Redis(host=lookup('redis'), port=6379, db=1) # for general tasks rn = redis.Redis(host=lookup('redis'), port=6379, db=2) # for nlp related tasks rc = redis.Redis(host=lookup('redis'), port=6379, db=3) # for cache only ri = redis.Redis(host=lookup('redis'), port=6379, db=4) # for index only wd = WeedFS(lookup("master"), 9333) # weed-fs master address and port ts = typesense.Client({ 'nodes': [{ 'host': lookup('typesense'), 'port': '8108', 'protocol': 'http', }], 'api_key': 'MUzQD3ncGDBihx6YGTBeBJ4Q', 'connection_timeout_seconds': 2 }) sb = redis.Redis(host=lookup('simbase'), port=7654) # for recommand engine if not sb.execute_command('blist'):
def __init__(self, data): host = data.get('host', 'localhost') port = data.get('port', 9333) self.store = WeedFS(host, port) # weed-fs master address and port self.ids = {}
''' import json import glob import os import requests from pyseaweed import WeedFS owner = 1 node = 5 input_ = r"D:\sensetime\user\result" file_type = "jpg" w = WeedFS("172.20.15.200", 9333) # weed-fs master address and port server = "http://172.20.15.200:5000/api/Photos" for filename in glob.glob("{}{}*.{}".format(input_, os.sep, file_type)): fid = w.upload_file(filename) # path to file file_url = w.get_file_url(fid) payload = {'url': file_url, 'owner': owner, 'node':node} r = requests.post(server, data=json.dumps(payload)) print(r.text)
import os import shutil import subprocess import requests import logging from pyseaweed import WeedFS from typing import Dict, Any from requests import Response seaweedfs = WeedFS('seaweedfs_master', 9333) def send_result(result: Dict[str, Any], api_base_url: str) -> Response: logging.info(f'Seding result to API for sample with sha1={result["statistics"]["sha1"]}.') return requests.post(f'http://{api_base_url}/internal/api/set_result', {'result': str(result)}) def get_sample(seaweedfs_file_id: str) -> bytes: # FIXME: use seaweed id instead sample = seaweedfs.get_file(seaweedfs_file_id) logging.info(f'Downloaded sample with seaweedfs_file_id: {seaweedfs_file_id}') return sample def has_a_non_empty_file(base_path: str) -> bool: for path, subdirs, files in os.walk(base_path): for name in files: file_path = os.path.join(path, name) if file_is_not_empty(file_path): return True
def content(self) -> bytes: return WeedFS(settings.SEAWEEDFS_IP, settings.SEAWEEDFS_PORT).get_file(self.seaweedfs_file_id)