Exemple #1
0
class SeaweedStore(BaseStore):
    def __init__(self, data):
        host = data.get('host', 'localhost')
        port = data.get('port', 9333)
        self.store = WeedFS(host, port)  # weed-fs master address and port
        self.ids = {}

    def create(self, key, value, mode='data'):
        if mode == 'file':
            resp = self.store.upload_file(path=value)
            self.ids[resp] = key
            return resp
        if mode == 'data':
            resp = self.store.upload_file(name=key, stream=value)
            self.ids[resp] = key
            return resp

    def read(self, key):
        # pylint: disable=unused-argument,no-self-use
        existed = self.store.file_exists(key)
        if existed:
            return self.store.get_file(key)

    def update(self, key, value, mode='data'):
        return self.create(key, value, mode)

    def delete(self, key):
        self.ids.pop(key, None)
        return self.store.delete_file(key)
Exemple #2
0
    def upload(self,
               file: Optional[TextIO] = None,
               external_url: Optional[str] = None,
               file_name: Optional[str] = None,
               zip_password: str = '',
               force_reprocess: bool = False,
               callback: Optional[str] = None) -> bool:
        """ Either file or external_url should be specified. Other parameters are optional.
            Returns whether the upload was successful or not.
            Use this method to upload files to the metadata extractor."""
        successful_upload = True

        if file or external_url:
            file_name = self._get_file_name(file, file_name)
            upload_parameters = {
                'zip_password': zip_password,
                'force_reprocess': force_reprocess,
                'callback': callback,
                'file_name': file_name
            }
            if file:
                # Upload the file and send the file ID on seaweedfs
                seaweedfs = WeedFS(settings.SEAWEEDFS_IP,
                                   settings.SEAWEEDFS_PORT)
                upload_parameters['seaweedfs_file_id'] = seaweedfs.upload_file(
                    stream=file.read(), name=file_name)
            else:
                # Send the url to download the file on the metadata extractor to avoid an overflow of the API if
                # lots of files are sent at the same time
                upload_parameters['external_url'] = external_url
            TaskManager().submit_url_for_metadata_extractor(
                **upload_parameters)
        else:
            successful_upload = False
        return successful_upload
Exemple #3
0
 def __init__(self, master_host=None, master_port=None):
     if master_host is None:
         master_host = settings.WEEDFS_MASTER_HOST
     self.master_host = master_host
     if master_port is None:
         master_port = settings.WEEDFS_MASTER_PORT
     self.master_port = master_port
     self.fs = WeedFS(master_host, master_port)
Exemple #4
0
 def __init__(self, master_host=None, master_port=None):
     if master_host is None:
         master_host = settings.WEEDFS_MASTER_HOST
     self.master_host = master_host
     if master_port is None:
         master_port = settings.WEEDFS_MASTER_PORT
     self.master_port = master_port
     self.fs = WeedFS(master_host, master_port)
Exemple #5
0
    def upload_cover(self):

        dataPath = os.path.join(self.basePath, "cover")
        allCoverJsonPath = os.path.join(dataPath, self.allStarCoverDataInJson)
        cachedStarCover = {}
        if os.path.exists(allCoverJsonPath):
            with open(allCoverJsonPath) as fp:
                cachedStarCover = json.load(fp)

        print(dataPath)
        for sub in os.listdir(dataPath):
            subPath = os.path.join(dataPath, sub)
            if os.path.isfile(subPath):
                continue

            print(subPath)
            for starCoverImg in os.listdir(subPath):
                starCoverImgPath = os.path.join(subPath, starCoverImg)

                try:
                    starId = starCoverImg.split('.')[0]
                    if starId in cachedStarCover:
                        continue
                except Exception as e:
                    print(e)
                    print(starCoverImgPath)
                    continue

                print(starCoverImgPath)
                if self.image_download_helper.invalid_file_and_contine(
                        starCoverImgPath):
                    continue

                w = WeedFS("localhost", 9333)
                fid = w.upload_file(starCoverImgPath)
                img_url = w.get_file_url(fid)

                cachedStarCover[starId] = img_url
        print(cachedStarCover)
        with open(allCoverJsonPath, 'w') as fp:
            json.dump(cachedStarCover, fp)
Exemple #6
0
    def upload_data(self):
        dataPath = os.path.join(self.basePath, "data")
        for sub in os.listdir(dataPath):
            subPath = os.path.join(dataPath, sub)
            if os.path.isfile(subPath):
                continue

            print(subPath)
            for album in os.listdir(subPath):
                albumPath = os.path.join(subPath, album)
                if os.path.isfile(albumPath):
                    continue

                albumInJson = album + ".json"
                albumInJsonPath = os.path.join(subPath, albumInJson)

                processedImg = {}
                if os.path.exists(albumInJsonPath):
                    fp = open(albumInJsonPath)
                    processedImg = json.load(fp)

                print(albumPath)
                for img in os.listdir(albumPath):
                    if img in processedImg:
                        continue

                    imgFullPath = os.path.join(albumPath, img)
                    print(imgFullPath)

                    if self.image_download_helper.invalid_file_and_contine(
                            imgFullPath):
                        continue

                    w = WeedFS("localhost", 9333)
                    fid = w.upload_file(imgFullPath)
                    img_url = w.get_file_url(fid)

                    processedImg[img] = img_url
                print(processedImg)
                with open(albumInJsonPath, 'w') as fp:
                    json.dump(processedImg, fp)
Exemple #7
0
class WeedFSStorage(Storage):
    """
    Weed-FS storage.
    Weed-FS is a simple and highly scalable distributed file system.
    """

    def __init__(self, master_host=None, master_port=None):
        if master_host is None:
            master_host = settings.WEEDFS_MASTER_HOST
        self.master_host = master_host
        if master_port is None:
            master_port = settings.WEEDFS_MASTER_PORT
        self.master_port = master_port
        self.fs = WeedFS(master_host, master_port)

    def get_available_name(self, name, **kwargs):
        return os.path.basename(name)

    def content(self, name):
        return self.fs.get_file(name)

    def _save(self, name, content):
        fid = self.fs.upload_file(stream=content.file, name=name)
        content.close()
        if hasattr(content, 'temporary_file_path'):
            try:
                os.remove(content.temporary_file_path())
            except FileNotFoundError:
                pass
        return '%s:%s' % (fid, name)

    def delete(self, name):
        assert name, "The name argument is not allowed to be empty."
        self.fs.delete_file(name)

    def exists(self, name):
        return self.fs.file_exists(name)

    def size(self, name):
        return self.fs.get_file_size(name) or 0

    def url(self, name):
        return self.fs.get_file_url(name)

    def accessed_time(self, name):
        return DATE_IS_NOT_AVAILABLE

    def created_time(self, name):
        return DATE_IS_NOT_AVAILABLE

    def modified_time(self, name):
        return DATE_IS_NOT_AVAILABLE

    def deconstruct(self):
        return ('{}.{}'.format(self.__class__.__module__, self.__class__.__name__), [],
                {'master_host': self.master_host, 'master_port': self.master_port})
Exemple #8
0
def GET_to_weed_hash(url: str):
    try:
        image_bytes = GET(url, 6)
        with MongoDBCollection("website_pron", "image_hash_pool") as coll:
            with BytesIO(image_bytes) as bio:
                hash_info = hash_algorithm(bio)
            find_hash_in_lib = coll.find_one({"_id": hash_info})
            if find_hash_in_lib is None:
                weed_fs = WeedFS()
                weed_fid = weed_fs.upload_file(stream=image_bytes, name=url)
                find_hash_in_lib = {
                    "_id": hash_info,
                    "weed_fid": weed_fid,
                    "file_type": re.findall("\.(\w+)$", url)[0]
                }
                coll.insert_one(find_hash_in_lib)
            return find_hash_in_lib
    except:
        print("Error while get+insert image from web:\n{}\n".format(url),
              file=sys.stderr)
        print(traceback.format_exc(), file=sys.stderr)
        return None
Exemple #9
0
def insert_image_to_weed(file: str,
                         remove_after_insert: bool = False,
                         silence: bool = True):
    try:
        file_hash = hash_algorithm(file)
        file_type = re_find_tail.findall(file)[0]
        with MongoDBDatabase("website_pron") as mongodb:
            coll = mongodb.get_collection("image_hash_pool")
            return_data = coll.find_one({"_id": file_hash})
            if return_data is None:
                weed_fs = WeedFS("192.168.1.103")
                file_id = weed_fs.upload_file(file)
                insert_info = {
                    "_id": file_hash,
                    "weed_fid": file_id,
                    "file_type": file_type
                }
                coll.insert_one(insert_info)
                return insert_info
            else:
                return return_data
    except:
        print(traceback.format_exc(), file=sys.stderr)
        return None
Exemple #10
0
 22 | verify_vivo2_500enroll5 | verify_common    | t    | 2018-03-01 14:28:54.989257
 23 | verify_vivo_tof         | verify_base      | t    | 2018-03-01 14:28:54.989523
 24 | verify_oppo3d           | verify_base      | t    | 2018-03-01 14:28:54.989748
 25 | Ocular                  | faceunlock       | f    | 2018-03-01 14:28:54.989974
 26 | Ocular_base1500         | Ocular           | t    | 2018-03-01 14:28:54.990205
 27 | Ocular_version          | Ocular           | t    | 2018-03-01 14:28:54.99045

photos=# select * from owners;
 id | name |       creation_date        
----+------+----------------------------
  1 | test | 2018-03-01 14:28:54.981124

'''
import json

import requests
from pyseaweed import WeedFS

owner = 1
node = 5
w = WeedFS("172.20.15.200", 9333)  # weed-fs master address and port
server = "http://172.20.15.200:5000/api/Photos"

fid = w.upload_file("/home/andrew/obama0.png")  # path to file
file_url = w.get_file_url(fid)

payload = {'url': file_url, 'owner': owner, 'node': node}
r = requests.post(server, data=json.dumps(payload))

print(r.text)
Exemple #11
0

def lookup(name):
    try:
        socket.gethostbyname(name)
        return name
    except Exception:
        return 'localhost'


rd = redis.Redis(host=lookup('redis'), port=6379, db=1)  # for general tasks
rn = redis.Redis(host=lookup('redis'), port=6379, db=2)  # for nlp related tasks
rc = redis.Redis(host=lookup('redis'), port=6379, db=3)  # for cache only
ri = redis.Redis(host=lookup('redis'), port=6379, db=4)  # for index only

wd = WeedFS(lookup("master"), 9333)  # weed-fs master address and port

ts = typesense.Client({
  'nodes': [{
    'host': lookup('typesense'),
    'port': '8108',
    'protocol': 'http',
  }],

  'api_key': 'MUzQD3ncGDBihx6YGTBeBJ4Q',
  'connection_timeout_seconds': 2
})


sb = redis.Redis(host=lookup('simbase'), port=7654)  # for recommand engine
if not sb.execute_command('blist'):
Exemple #12
0
 def __init__(self, data):
     host = data.get('host', 'localhost')
     port = data.get('port', 9333)
     self.store = WeedFS(host, port)  # weed-fs master address and port
     self.ids = {}
Exemple #13
0
'''

import json
import glob
import os

import requests
from pyseaweed import WeedFS

owner = 1
node = 5
input_ = r"D:\sensetime\user\result"
file_type = "jpg"

w = WeedFS("172.20.15.200", 9333) # weed-fs master address and port
server = "http://172.20.15.200:5000/api/Photos"

for filename in glob.glob("{}{}*.{}".format(input_, os.sep, file_type)):    
    
    fid = w.upload_file(filename) # path to file
    file_url = w.get_file_url(fid)
    
    payload = {'url': file_url, 'owner': owner, 'node':node}
    r = requests.post(server, data=json.dumps(payload))
    
    print(r.text)



Exemple #14
0
import os
import shutil
import subprocess
import requests
import logging
from pyseaweed import WeedFS
from typing import Dict, Any
from requests import Response


seaweedfs = WeedFS('seaweedfs_master', 9333)


def send_result(result: Dict[str, Any], api_base_url: str) -> Response:
    logging.info(f'Seding result to API for sample with sha1={result["statistics"]["sha1"]}.')
    return requests.post(f'http://{api_base_url}/internal/api/set_result', {'result': str(result)})


def get_sample(seaweedfs_file_id: str) -> bytes:
    # FIXME: use seaweed id instead
    sample = seaweedfs.get_file(seaweedfs_file_id)
    logging.info(f'Downloaded sample with seaweedfs_file_id: {seaweedfs_file_id}')
    return sample


def has_a_non_empty_file(base_path: str) -> bool:
    for path, subdirs, files in os.walk(base_path):
        for name in files:
            file_path = os.path.join(path, name)
            if file_is_not_empty(file_path):
                return True
Exemple #15
0
 def content(self) -> bytes:
     return WeedFS(settings.SEAWEEDFS_IP, settings.SEAWEEDFS_PORT).get_file(self.seaweedfs_file_id)