Ejemplo n.º 1
0
def get_db():
    global db, pid

    if db and os.getpid() == pid:
        return db

    database_name = get_setting('database:name')
    host = get_setting('database:host')

    if not host:
        connection = MongoClient()
    else:
        if not isinstance(host, str):
            host = ','.join(host)
        kwargs = {}
        replicaset = get_setting('database:replicaset')
        if replicaset:
            kwargs['replicaset'] = replicaset
        connection = MongoClient(host, **kwargs)

    newdb = connection[database_name]

    username = get_setting('database:username')
    if username:
        password = get_setting('database:password')
        newdb.authenticate(username, password)

    db = MongoProxy(newdb)
    pid = os.getpid()
    return db
Ejemplo n.º 2
0
 def set_db_conn(self):
     mongo_config = DB_CONFIG["base"]
     safe_conn = MongoProxy(
         pymongo.MongoClient(mongo_config['host'],
                             mongo_config['port'],
                             connect=False))
     self.conn = safe_conn[mongo_config['db']]
splitter = NNSplit.load("en", use_cuda=True)

lock = Lock()


class JSONEncoder(json.JSONEncoder):
    def default(self, o):
        if isinstance(o, ObjectId):
            return str(o)
        return json.JSONEncoder.default(self, o)


db_pwd = "LTEG2pfoDiKfH29M"
client = MongoProxy(
    MongoClient(
        f"mongodb+srv://cdminix:{db_pwd}@cluster0.pdjrf.mongodb.net/Reviews_Data?retryWrites=true&w=majority"
    ))
db = client.Reviews_Data
model = None


class LSH:
    def __init__(
            self,
            hdf5_file="data.hdf5",
            input_dim=768,
            hash_dim=6,
            seed=42,
            chunksize=1_000,
            dtype="int8",
            file_write="w",
Ejemplo n.º 4
0
    def get_client(self,
                   database_name=None,
                   uri=None,
                   monary=False,
                   host=None,
                   autoreconnect=False,
                   **kwargs):
        """Get a Mongoclient. Returns Mongo database object.
        If you provide a mongodb connection string uri, we will insert user & password into it,
        otherwise one will be built from the configuration settings.
        If database_name=None, will connect to the default database of the uri. database=something
        overrides event the uri's specification of a database.
        host is special magic for split_hosts
        kwargs will be passed to pymongo.mongoclient/Monary
        """
        # Format of URI we should eventually send to mongo
        full_uri_format = 'mongodb://{user}:{password}@{host}:{port}/{database}'

        if uri is None:
            # We must construct the entire URI from the settings
            uri = full_uri_format.format(database=database_name, **self.config)
        else:
            # A URI was given. We expect it to NOT include user and password:
            result = parse_passwordless_uri(uri)
            _host, port, _database_name = result
            if result is not None:
                if not host:
                    host = _host
                if database_name is None:
                    database_name = _database_name
                uri = full_uri_format.format(database=database_name,
                                             host=host,
                                             port=port,
                                             user=self.config['user'],
                                             password=self.config['password'])
            else:
                # Some other URI was provided. Just try it and hope for the best
                pass

        if monary:
            # Be careful enabling this debug log statement, it's useful but prints the password in the uri
            # self.log.debug("Connecting to Mongo via monary using uri %s" % uri)
            # serverselection option makes the C driver retry if it can't connect;
            # since we often make new monary connections this is useful to protect against brief network hickups.
            client = Monary(
                uri +
                '?serverSelectionTryOnce=false&serverSelectionTimeoutMS=60000',
                **kwargs)
            self.log.debug("Succesfully connected via monary (probably...)")
            return client

        else:
            # Be careful enabling this debug log statement, it's useful but prints the password in the uri
            # self.log.debug("Connecting to Mongo using uri %s" % uri)
            client = pymongo.MongoClient(uri, **kwargs)
            client.admin.command(
                'ping')  # raises pymongo.errors.ConnectionFailure on failure
            self.log.debug("Successfully pinged client")

            if autoreconnect:
                # Wrap the client in a magic object that retries autoreconnect exceptions
                client = MongoProxy(client,
                                    disconnect_on_timeout=False,
                                    wait_time=180)

            return client
Ejemplo n.º 5
0
    def __init__(self,
                 database,
                 logs="logs",
                 properties="properties",
                 chunks="chunks",
                 uri="mongodb://localhost",
                 skip_unchanged=False,
                 chunk_size=timedelta(seconds=60),
                 fifo_size=1000,
                 n_workers=1,
                 worker_is_daemon=False,
                 log=None,
                 *args,
                 **kwargs):
        """
        ctor.

        @param database: Database name.
        @type database: string
        @param logs: Collection name.
        This collection keeps information when a property was "monitored".
        Optional, default is "log".
        @type logs: string
        @param properties: Collection name.
        This collection keeps information about the property and its meta data.
        Optional, default is "properties".
        @type properties: string
        @param chunks: Collection name.
        This collection keeps the actual monitoring data.
        However, it will keep chunks of it instead of single values
        to be more efficient. Optional, default is "chunks".
        @type chunks: string
        @param uri: mongodb URI. Optional, default is "mongodb://localhost".
        @type uri: string
        @param skip_unchanged: Skip recording of multiple values within one chunk, if
        the values are the same. Optional, default is False
        @type skip_unchanged: boolean
        @param chunk_size: Specifies the time duration within monitoring data
        is safed into a chunk (fraction of seconds will be ignored).
        Optional, default is 1 minute.
        The chunk size can be given as a timedelta or a 'number of seconds'.
        @type chunk_size: datetime.timedelta or int or float
        @param fifo_size: Sets the upperbound limit on the number of chunks
        that can be placed in the FIFO before overwriting older chunks.
        The FIFO decouples the producers of monitoring data (frontend,
        backend buffers) and the consumer thread(s) that insert(s) the data
        into mongodb. Optional, default is 1000.
        @type fifo_size: int
        @param n_workers: Number of consumer threads, so called workers.
        Optional, default is 1.
        @type n_workers: int
        @param worker_is_daemon: Workers traditionally run as daemon threads
        but this seems not to work within an ACS component. So this is your
        choice ;). We will try to stop all workers in the destructor in case
        they aren't daemons. Optional, default is False.
        @type worker_is_daemon: bool
        @param log: An external logger to write log messages to.
        Optional, default is None.
        @type log: logging.Logger
        """
        super(Registry, self).__init__(log, *args, **kwargs)
        self._log = log
        if not self._log:
            self._log = getLogger(defaultname)
        self._log.debug("creating a mongodb registry")
        self._database_name = database
        self._logs_name = logs
        self._properties_name = properties
        self._chunks_name = chunks
        self._uri = uri
        self._skip_unchanged = skip_unchanged
        if isinstance(chunk_size, timedelta):
            self._chunk_size = chunk_size
        else:
            self._chunk_size = timedelta(seconds=chunk_size)

        cl = MongoClient(uri)
        assert cl.write_concern.acknowledged
        # Starting with version 3.0 the MongoClient constructor no longer
        # blocks while connecting to the server or servers, and it no longer
        # raises ConnectionFailure if they are unavailable,
        # nor ConfigurationError if the user’s credentials are wrong.
        # Instead, the constructor returns immediately and launches
        # the connection process on background threads.
        # Check if the server is available...
        # The ismaster command is cheap and does not require auth.
        cl.admin.command('ismaster')
        if MongoProxy is not None:
            self._client = \
                MongoProxy(cl,
                           logger=self._log,
                           wait_time=MONGO_PROXY_WAIT_TIME,
                           disconnect_on_timeout=MONGO_PROXY_DISCONNECT_ON_TIMEOUT)
        else:
            self._client = cl

        self._database = self._client[database]
        self._logs = self._database[logs]
        self._logs_locks = self._logs["locks"]
        MongoSimpleLock.create_index(self._logs_locks)
        self._properties = self._database[properties]
        self._chunks = self._database[chunks]

        self._worker_is_daemon = worker_is_daemon
        if n_workers <= 0:
            n_workers = 1
        self._fifo = RingBuffer(fifo_size)
        self._workers = []  # keep this the last class member variable in ctor
        for _ in range(n_workers):
            worker = _Worker(uri, self._chunks, self._chunk_size, self._fifo,
                             self._log)
            worker.daemon = worker_is_daemon
            worker.start()
            self._workers.append(worker)