Esempio n. 1
0
def drop_and_reset_database(dbName):
    db = MongoClient("mongodb://localhost:27017/")[dbName]

    db.command("dropDatabase")
    db.prot.create_indexes(indexes_prot_gene)
    db.gene.create_indexes(indexes_prot_gene)
    db.goall.create_indexes(indexes_goall)
Esempio n. 2
0
class EventRepository(object):
    def __init__(self):
        uri = os.environ.get('MONGO_URI')
        self.db = MongoClient(uri).unity

    def find_all(self, selector):
        return self.db.events.find(selector)

    def find(self, selector):
        return self.db.events.find_one(selector)

    def create(self, event):
        return self.db.events.insert_one(event)

    def update(self, selector, event):
        return self.db.events.replace_one(selector, event).modified_count

    def delete(self, selector):
        return self.db.events.delete_one(selector).deleted_count

    def status(self):
        report = self.db.command('dbstats')
        server_status = self.db.command('serverStatus')
        events = self.db.command('collStats', 'events')

        opcounters = server_status['opcounters']

        parsedReport = {
            'ok': report['ok'],
            'database': str(report['db']),
            'collections': {
                'events': {
                    'count': events['count'],
                    'size': humanbytes(events['size'])
                }
            },
            'objects': report['objects'],
            'indexes': report['indexes'],
            'metrics': {
                'connections': server_status['connections'],
                'opcounters': {
                    'insert': opcounters['insert'],
                    'query': opcounters['query'],
                    'update': opcounters['update'],
                    'delete': opcounters['delete']
                },
                'usage': {
                    'ratio':
                    format(report['dataSize'] / report['storageSize'], ".2%"),
                    'used':
                    humanbytes(report['dataSize']),
                    'total':
                    humanbytes(report['storageSize'])
                }
            },
        }

        return parsedReport
Esempio n. 3
0
def mgrant_server():
    # TODO: This is whacked code that starts a mongo server. How do we fix this?
    _, config_path = tempfile.mkstemp()
    _, mdlogpath = tempfile.mkstemp()
    mdpath = tempfile.mkdtemp()
    mdport = 27020
    if not os.getenv("CONTINUOUS_INTEGRATION"):
        basecmd = (
            f"mongod --port {mdport} --dbpath {mdpath} --quiet --logpath {mdlogpath} "
            "--bind_ip_all --auth")
        mongod_process = subprocess.Popen(basecmd,
                                          shell=True,
                                          start_new_session=True)
        time.sleep(5)
        client = MongoClient(port=mdport)
        client.admin.command("createUser",
                             "mongoadmin",
                             pwd="mongoadminpass",
                             roles=["root"])
        client.close()
    else:
        pytest.skip("Disabling mongogrant tests on CI for now")
    dbname = "test_" + uuid4().hex
    db = MongoClient(
        f"mongodb://*****:*****@127.0.0.1:{mdport}/admin"
    )[dbname]
    db.command("createUser", "reader", pwd="readerpass", roles=["read"])
    db.command("createUser", "writer", pwd="writerpass", roles=["readWrite"])
    db.client.close()

    # Yields the fixture to use
    yield config_path, mdport, dbname

    if not (os.getenv("CONTINUOUS_INTEGRATION") and os.getenv("TRAVIS")):
        os.killpg(os.getpgid(mongod_process.pid), signal.SIGTERM)
        os.waitpid(mongod_process.pid, 0)
    os.remove(config_path)
    shutil.rmtree(mdpath)
    os.remove(mdlogpath)
Esempio n. 4
0
    def run(self, count, limit=10):
        db = MongoClient(self.mongo_url) \
            .get_database(self.db_name)

        # .get_collection(self.collection_name)

        doc_count = 0
        for ordinal in range(0, count):
            query = self._generate_query()
            cmd = self.create_command(ordinal, self.collection_name, query,
                                      self.sort_spec, limit)
            for d in db.command(cmd):
                doc_count = doc_count + 1
            logging.info((ordinal, doc_count, cmd['filter'],
                          cmd.get('sort', '_unsorted_')))
        return doc_count
Esempio n. 5
0
    }
}

collection = 'Userinformation'
validator = {'$jsonSchema': {'bsonType': 'object', 'properties': {}}}
required = []

for field_key in user_schema:
    field = user_schema[field_key]
    properties = {'bsonType': field['type']}
    minimum = field.get('minlength')

    if type(minimum) == int:
        properties['minimum'] = minimum

    if field.get('required') is True: required.append(field_key)

    validator['$jsonSchema']['properties'][field_key] = properties

if len(required) > 0:
    validator['$jsonSchema']['required'] = required

query = [('collMod', collection), ('validator', validator)]

try:
    db.create_collection(collection)
except CollectionInvalid:
    pass

command_result = db.command(OrderedDict(query))
Esempio n. 6
0
class SN_OneCollection(SocialNetwork):
    """
    Simplest implementation of SocialNetwork:
    1. All data (posts and comments) are in one collection
    2. No indexes
    3. Write Concern can be reset (https://docs.mongodb.com/manual/reference/write-concern/)    
    """

    favorites_id = []  ## This is list of most popular posts which will be read, commented and upvoted further
    favorites_id_update_cycle = itertools.cycle([])
    favorites_id_read_cycle = itertools.cycle([])

    def __init__(self, connection_string, db_name):
        self.db = MongoClient(connection_string)[db_name]
        self._WriteConcern = { "w": 1, "j": True, "wtimeout": 10000 }
    
    def sort_favorites(self, count = 1000):
        SN_OneCollection.favorites_id = list(self.db.posts.aggregate([ { '$sample': { 'size': count } }, {'$project':{'_id':1}} ]))
        SN_OneCollection.favorites_id_update_cycle = itertools.cycle(SN_OneCollection.favorites_id)
        SN_OneCollection.favorites_id_read_cycle = itertools.cycle(SN_OneCollection.favorites_id)

    def prepare(self):    
        self.sort_favorites()
        # index creation to go here    

    @property
    def WriteConcern(self):
        return self._WriteConcern

    @WriteConcern.setter
    def WriteConcern(self, wc):
        self._WriteConcern = wc

    def get_doc_id_for_update(self):
        if len(SN_OneCollection.favorites_id) == 0 :
            self.sort_favorites()

        return next(SN_OneCollection.favorites_id_update_cycle)      

    def get_doc_id_for_read(self):
        if len(SN_OneCollection.favorites_id) == 0 :
            self.sort_favorites()

        return next(SN_OneCollection.favorites_id_read_cycle)        


    def post(self, score = 0, text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."):
        self.db.posts.insert_one({
            "content": text,
            "score": score,
            "comments": [],
            })

    def _inflate_to(self, M, max_string_len=len(randomText())):
        """
        Make full join of posts to posts and (not very) randomly combine content.
        This will create more different 'words' for future experiments with indexes and text search.
        """

        N = ceil(sqrt(M))
            
        limit = {"$limit": N}
        
        lookup = {
            "$lookup":{
                "from": "posts",
                "localField": "score",
                "foreignField": "score",
                "as": "joined"
            }
        }

        unwinding = {
            "$unwind":
            {
            "path": "$joined"
            }

        }
        
        split = randint(0, max_string_len-1)

        project = {
                '$project': {
                    '_id':0,
                    'score': 1,
                    'comments': 1,
                    'content': { '$concat': [ { '$substr': [ '$content', 0, split ] }, 
                                            { '$substr': [ '$joined.content' , split,  max_string_len-split] }] 
                            }
                }
        }
        
        self.db.posts.aggregate([limit, lookup, unwinding, project, {'$limit': M}, { '$out' : "posts" }])

    def generate_data(self, num):
        """
        Used for initial data generation.
        """
        
        self.db.posts.insert_many([{
            "content": randomText(),
            "score": 1, # this need to be constant for self._inflate_to() to work properly
            "comments": [],
            } for _ in range(min(10, num))]) ## Insert up to 10 initial posts

        ## Inflate exponentially until required number of posts
        while(self.db.command("collstats", "posts")['count'] < num):            
            self._inflate_to(num)

        self.sort_favorites()

            

    def comment(self, text = "What is it? Twitter for ants?"):
        doc_id = self.get_doc_id_for_update() 

        self.db.posts.update_one( doc_id, {"$push":{"comments": text}})

    def upvote(self):
        doc_id = self.get_doc_id_for_update() 

        self.db.posts.update_one( doc_id, {"$inc":{"score":1}})
        #raise NotImplementedError

    def read(self):
        doc_id = self.get_doc_id_for_read()        
        
        return self.db.posts.find_one(doc_id)
Esempio n. 7
0
class MongoDBPersister(object):
    def __init__(self,
                 conn_url='localhost:27017',
                 dbname='skunkqueue'):

        self.conn_url=conn_url
        self.dbname=dbname

        self.skunkdb = MongoClient(conn_url)[dbname]
        self.access_collection = self.skunkdb['access']
        self.jobs_collection = self.skunkdb['jobs']
        self.worker_collection = self.skunkdb['workers']
        self.result_collection = self.skunkdb['result']
        
    ### Basic information ###

    def get_location(self):
        return self.conn_url + '/' + self.dbname

    def get_backend_type(self):
        return 'mongodb'

    def get_version(self):
        return self.skunkdb.command({'buildInfo': 1})['version']

    ### Queue manipulation ###

    def get_all_queues(self):
        return self.access_collection.distinct('q')

    def route_is_empty(self, queue_name, route):
        ret = False
        try:
            res = self.access_collection.find_and_modify(
                    {'q': queue_name, 'locked': False},
                    update={'$set': {'locked': True}})
            if res:
                ret = self.jobs_collection.find({
                    'q': queue_name,
                    'route': route,
                    'now': {'$lte': datetime.utcnow()}
                    }).count() == 0
        finally:
            self.access_collection.update({'q': queue_name},
                {'$set': {'locked': False}})
        return ret

    ### Result access ###

    def job_state(self, job_id):
        ret = self.result_collection.find_one({'job_id': job_id})
        if ret:
            return ret['state']
        else:
            return 'pending'

    def job_result(self, job_id):
        ret = self.result_collection.find_one({'job_id': job_id})
        if ret:
            return ret['value']

    def save_result(self, job_id, value, state):
        self.result_collection.insert(
            {'job_id': job_id, 'value': value, 'state': state})

    ### Job manipulation ###

    def add_job_to_queue(self, job, route, ts=None):
        queue_name = job.queue.name
        job.job_id = str(ObjectId())
        self.access_collection.find_and_modify(
            {'q': queue_name}, {'q': queue_name, 'locked': False}, upsert=True)
        job_flat = job.json()
        # ts should be a datetime.timedelta object
        if ts:
            job_flat['now'] = datetime.utcnow() + ts
        else:
            job_flat['now'] = datetime.utcnow()
        job_flat['route'] = route
        if job.queue.queue_type == 'broadcast':
            for worker in self.worker_collection.find():
                job_flat['q'] = worker['worker_id']
                job_flat['_id'] = ObjectId()
                self.jobs_collection.insert(job_flat)
        else:
            self.jobs_collection.insert(job_flat)

    """Pop the first job in the queue that is schedule for work."""
    def get_job_from_queue(self, queue_name, worker_id, route):
        try:
            res = self.access_collection.find_and_modify(
                    {'q': queue_name, 'locked': False},
                    update={'$set': {'locked': True}})
            if res:
                job = self.jobs_collection.find_and_modify(
                        {'$or': [{'q': queue_name},{'q': worker_id}],
                            'now': {'$lte': datetime.utcnow()},
                            'route': route},
                        remove=True, sort=[('now', -1)])

                return job
        finally:
            self.access_collection.update({'q': queue_name},
                {'$set': {'locked': False}})

    def get_jobs_by_queue(self, queue):
        return [c for c in self.jobs_collection.find({'q': queue})]

    """Delete a specific job from somewhere in the queue."""
    def dequeue_job(self, queue_name, job_id):
        try:
            res = None
            # this call should block on queue availability
            while not res:
                res = self.access_collection.find_and_modify(
                        {'q': queue_name, 'locked': False},
                        update={'$set': {'locked': True}})
                time.sleep(0.1)
            self.jobs_collection.remove({
                'q': queue_name,
                'job_id': job_id
                })
        finally:
            self.access_collection.update({'q': queue_name},
                    {'$set': {'locked': False}})

    ### Worker manipulation ###

    def add_worker(self, worker_id, host, port):
        self.worker_collection.insert({
            'worker_id': worker_id,
            'host': host,
            'port': port,
            'state': 'waiting'
            })

    def delete_worker(self, worker_id):
        self.worker_collection.remove(dict(worker_id=worker_id))

    def add_monitor(self, host):
        if not self.worker_collection.find_one({'monitor': 1, 'host': host}):
            self.worker_collection.insert({
                'host': host,
                'monitor': 1
            })
            return True
        return False

    def delete_monitor(self, host):
        self.worker_collection.remove({'monitor': 1, 'host': host})

    def set_working(self, worker_id):
        self.worker_collection.update(
                dict(worker_id=worker_id),
                {'$set': {
                    'state': 'working',
                    'start': datetime.utcnow()
                    }}, True)

    def unset_working(self, worker_id):
        self.worker_collection.update(
                dict(worker_id=worker_id),
                {'$set': {
                    'state': 'waiting',
                    }}, True)

    def get_all_workers(self):
        return [w for w in self.worker_collection.find()]
    print trial
    min_distance = 0.0
    last_ids = []
    seen_ids = set()
    for skip in range(0, 10000, 100):
        assert len(seen_ids) == skip, 'Skip = %d, got %d ids' % (
            skip, len(seen_ids))

        start = time.time()
        result = db.command(
            'geoNear', 'collection',
            near={
                'type': 'Point',
                'coordinates': [
                    -73.991084,
                    40.735863]},
            spherical=True,
            minDistance=min_distance,
            query={
                '_id': {'$nin': last_ids}
            },
            num=100)

        duration = time.time() - start
        # if trial == 0:
        #     print('skip: %d' % skip)
        #     print('duration: %.2fms' % (duration * 1000))
        #     print('minDistance: %.2fms' % min_distance)
        #     pprint.pprint(result['stats'])

        assert len(result['results']) == 100
Esempio n. 9
0
import pymongo
from pymongo import MongoClient
from bson.son import SON
db = MongoClient().aggregation_example

#result = db.things.insert_many([{"x": 1, "tags": ["dog", "cat"]},
#                                 {"x": 2, "tags": ["cat"]},
#                                 {"x": 2, "tags": ["mouse", "cat", "dog"]},
#                                 {"x": 3, "tags": []}])
#result.inserted_ids
pipeline = [
     {"$unwind": "$tags"},
     {"$group": {"_id": "$tags", "count": {"$sum": 1}}},
     {"$sort": SON([("count", -1), ("_id", -1)])}
 ]
print ("List:", list(db.things.aggregate(pipeline)))
print ("plan for this aggregation:", db.command('aggregate', 'things', pipeline=pipeline, explain=True))
Esempio n. 10
0
def install(args):
    if sys.platform == "linux" or 1 == 1:

        if step_enabled("dependencies", args):
            # install MongoDB and some other things if they're not installed
            try:
                call(["cherrydoor-install"], shell=False)  # nosec
            except (PermissionError, FileNotFoundError):
                print("unable to install dependencies")
                if args.fail:
                    sys.exit(1)
        # generate a configuration based on default config
        if (
            not os.path.exists(f"{Path.home()}/.config/cherrydoor/config.json")
            or "config" in args.install_steps
        ):
            config = {
                "__comment__": "This is a default config for setuptools installation - it shouldn't be used if installed from GitHub",
                "host": "127.0.0.1",
                "port": 5000,
                "mongo": {
                    "url": "localhost:27017",
                    "name": "cherrydoor",
                    "username": "******",
                    "password": "******",
                },
                "login-translation": {
                    "username": "******",
                    "password": "******",
                    "remember-me": "Pamiętaj mnie",
                    "log-in": "Zaloguj się",
                    "message": "Musisz się zalogować by uzyskać dostęp do tej strony",
                },
                "secret-key": "\\xd7w7\\x04\\r\\xfc/q\\x1a\\x9b&",
                "https": {
                    "enabled": False,
                    "hsts-enabled": False,
                    "hsts-preload": False,
                },
                "interface": {
                    "type": "serial",
                    "baudrate": 115200,
                    "port": "/dev/serial0",
                    "encoding": "utf-8",
                },
                "manufacturer-code": "18",
            }
        else:
            with open(
                f"{Path.home()}/.config/cherrydoor/config.json", "r", encoding="utf-8"
            ) as f:
                config = json.load(f)
        if step_enabled("config", args):
            # create a random secret key
            config["secret-key"] = os.urandom(24).hex()
            # let user choose a password for the database
            if step_enabled("database", args):
                config["mongo"]["password"] = getpass("Wprowadź hasło do bazy danych: ")
            try:
                # files configuration
                if not os.path.exists(f"${Path.home()}"):
                    os.makedirs(f"{Path.home()}/.config/cherrydoor")
                with open(
                    f"{Path.home()}/.config/cherrydoor/config.json",
                    "w",
                    encoding="utf-8",
                ) as f:
                    json.dump(config, f, ensure_ascii=False, indent=4)
            except (IOError, PermissionError):
                print(
                    f"Nie udało się stworzyć plików w {Path.home()}/.config/cherrydoor. Spróbuj stworzyć ten folder manualnie i nadać mu właściwe uprawnienia",
                    file=sys.stderr,
                )
                if args.fail:
                    sys.exit(1)
        if step_enabled("service", args):
            service_config = f"""\
[Unit]
Description=Cherrydoor Service
After=network.target
[Service]
ExecStart={os.path.realpath(__file__).replace("install.py", "__init__.py")} start
Environment=PYTHONUNBUFFERED=1
Restart=always
Type=simple
User=ubuntu
[Install]
WantedBy=multi-user.target
"""
            try:
                if not os.path.exists(f"{Path.home()}/.config/systemd/user"):
                    os.makedirs(f"{Path.home()}/.config/systemd/user")
                with open(
                    f"{Path.home()}/.config/systemd/user/cherrydoor.service", "w"
                ) as f:
                    f.write(service_config)
                    print(
                        f"Plik konfiguracyjny znajduje się w folderze {Path.home()}/.config/cherrydoor"
                    )
            except (IOError, PermissionError):
                print(
                    f"Nie udało się stworzyć pliku usługi pod {Path.home()}/.config/systemd/user/cherrydoor.service - spróbuj uruchomić skrypt z właściwymi uprawnieniami lub stworzyć ten plik manualnie. Zawartość:",
                    file=sys.stderr,
                )
                print(service_config, file=sys.stderr)
                if args.fail:
                    sys.exit(1)
        hasher = PasswordHasher(
            time_cost=4,
            memory_cost=65536,
            parallelism=8,
            hash_len=16,
            salt_len=16,
            encoding="utf-8",
        )
        db = MongoClient(
            f"mongodb://{config['mongo']['url']}/{config['mongo']['name']}"
        )[config["mongo"]["name"]]
        if step_enabled("database", args):
            try:
                db.command(
                    "createUser",
                    config["mongo"]["username"],
                    pwd=config["mongo"]["password"],
                    roles=[
                        {"role": "readWrite", "db": config["mongo"]["name"]},
                        {"role": "clusterMonitor", "db": "admin"},
                    ],
                )
                db.create_collection("users")
                db.create_collection(
                    "logs", options={"size": 1073742000, "capped": True}
                )
                db.create_collection("settings")
                db.create_collection(
                    "terminal", options={"size": 1048576, "capped": True, "max": 10000}
                )
            except OperationFailure:
                pass
            user_indexes = db.users.index_information()
            if "username_index" not in user_indexes.keys():
                db.users.create_index("username", name="username_index", unique=True)
            if "cards_index" not in user_indexes.keys():
                db.users.create_index("cards", name="cards_index", sparse=True)
        # nosec - it's python3, not 2, Bandit...
        if step_enabled("user", args) and input(
            "Czy chcesz stworzć nowego użytkownika-administratora? [y/n]"
        ).lower() in ["y", "yes", "tak", "t"]:
            # nosec - it's python3, not 2, Bandit...
            username = input("Wprowadź nazwę użytkownika: ")
            password = hasher.hash(getpass("Hasło: "))
            db.users.insert({"username": username, "password": password, "cards": []})
        print("Instalacja skończona!")
        try:
            service_call_args = ["systemctl", "--user", "enable", "cherrydoor"]
            call(service_call_args, shell=False)  # nosec
        except (IOError, PermissionError):
            pass
    else:
        print("Ten system operacyjny nie jest obecnie obsługiwany")

    sys.exit()
Esempio n. 11
0
class Database:
    def __init__(self, database_url: str, database_name: str = "username601"):
        """ Database object. """
        self.db = MongoClient(database_url)[database_name]
        self.types = ModifyType
        self.database_calls = {
            'modify': {},
            'eval': 0,
            'add': 0,
            'delete': 0,
            'get': 0
        }
        del database_url  # for safety purposes lel

    def modify(self, collection_name: str, modify_type: ModifyType,
               query: dict, payload: dict):  # resembles a POST request
        """
        Updates/modifies a single part of the database.
        Example:
        db.modify("economy", db.types.CHANGE, {"key_name": "key_value"}, {"key_name_to_change": "new_value"})
        """
        self.database_calls['modify'][
            modify_type.value] = self.database_calls['modify'].get(
                modify_type.value, 0) + 1
        self.db[collection_name].update_one(query,
                                            {modify_type.value: payload})
        return payload

    def eval(self, command: str):
        """ Evaluates stuff """
        self.database_calls['eval'] += 1

        try:
            return self.db.command(command)
        except:
            return

    def add(self, collection_name: str,
            new_data: dict):  # resembles a PUT request
        """ Adds a document to the database. """
        self.database_calls['add'] += 1
        self.db[collection_name].insert_one(new_data)
        return new_data

    def delete(self, collection_name: str,
               query: dict):  # resembles a DELETE request
        """ Removes a document from the database. """
        self.database_calls['delete'] += 1
        self.db[collection_name].delete_one(query)

    def get(self, collection_name: str,
            query: dict) -> dict:  # resembles a GET request
        """ Fetches a document from the database. """
        self.database_calls['get'] += 1
        return self.db[collection_name].find_one(query)

    def exist(self, collection_name: str,
              query: dict) -> bool:  # resembles a GET request (sort of)
        """ Checks if a specific query exists. """
        return (self.get(collection_name, query) is not None)

    def get_all(
            self, collection_name: str
    ) -> list:  # resembles a hell of a big GET request
        """ Gets every single document from a specific database collection. """
        return self.db[collection_name].find()
Esempio n. 12
0
class MongoDBPersister(object):
    def __init__(self, conn_url='localhost:27017', dbname='fwbots'):

        self.conn_url = conn_url
        self.dbname = dbname

        self.fwdb = MongoClient(conn_url)[dbname]
        self.worker_collection = self.fwdb['workers']
        self.pool_collection = self.fwdb['pool']

    ### Basic information ###

    def get_location(self):
        return self.conn_url + '/' + self.dbname

    def get_backend_type(self):
        return 'mongodb'

    def get_version(self):
        return self.fwdb.command({'buildInfo': 1})['version']

    ### Pool manipulation ###

    def add_pool(self, pool):
        obj = {
            'name': pool.name,
            'manual': pool.manual.keys(),
            'auto': pool.auto.keys(),
            'insta': pool.insta.keys()
        }
        self.pool_collection.insert(obj)

    def get_all_pools(self):
        res = {}
        for p in self.pool_collection.find():
            res[p['name']] = p
        return res

    def delete_pool(self, name):
        self.pool_collection.remove({'name': name})

    ### Worker manipulation ###

    def add_worker(self, worker_id, host, port):
        self.worker_collection.insert({
            'worker_id': worker_id,
            'host': host,
            'port': port,
            'state': 'waiting'
        })

    def delete_worker(self, worker_id):
        self.worker_collection.remove(dict(worker_id=worker_id))

    def delete_monitor(self, host):
        self.worker_collection.remove({'monitor': 1, 'host': host})

    def set_working(self, worker_id):
        self.worker_collection.update(
            dict(worker_id=worker_id),
            {'$set': {
                'state': 'working',
                'start': datetime.utcnow()
            }}, True)

    def unset_working(self, worker_id):
        self.worker_collection.update(dict(worker_id=worker_id),
                                      {'$set': {
                                          'state': 'waiting',
                                      }}, True)

    def get_all_workers(self):
        return [w for w in self.worker_collection.find()]

    def get_avail_workers(self):
        return [w for w in self.worker_collection.find({'state': 'waiting'})]
Esempio n. 13
0
## This script is to remove additional datasets from S3 that don't exist in Mongo

import boto3
import botocore
from pymongo import MongoClient
from pprint import pprint
from bson.objectid import ObjectId
import yaml

creds = yaml.safe_load(open("../creds.yaml", "r"))
dbHostUri = "mongodb+srv://" + creds["DB_USER"] + ":" + creds[
    "DB_PASSWORD"] + "@cluster0-ollas.mongodb.net/test?retryWrites=true&w=majority&ssl=true&ssl_cert_reqs=CERT_NONE"
db = MongoClient(dbHostUri).test

serverStatusResult = db.command("serverStatus")
# pprint(serverStatusResult)

s3 = boto3.resource('s3')
bucketName = "agriworks-user-datasets"
bucket = s3.Bucket(bucketName)

s3Length = sum(1 for i in bucket.objects.all())
print('s3 Size =', s3Length)

for key in bucket.objects.all():
    if not db.dataset.find_one({'_id': ObjectId(key.key.split('.')[0])}):
        print(key.key)
        obj = s3.Object(bucketName, key.key)
        obj.delete()

print('________DELETED__________')
Esempio n. 14
0
                            'type': 'text',
                            'analyzer': 'ik_max_word',
                            'search_analyzer': 'ik_max_word',
                        },
                        'en': {
                            'type': 'text',
                            'analyzer': 'english',
                        }
                    }
                },
            }
        }
    }
}

total = db.command('collstats', 'Movies')['count']
docs = db['Movies'].find({},
                         projection={
                             '_id': False,
                             'cover_x': False,
                             'cover_y': False,
                             'cover': False,
                             'star': False,
                         })


def clean_movie(m):
    parenthesis_pat = re.compile(r'\(.*?\)')
    release_date = '/'.join(m['release_date'])
    release_date = parenthesis_pat.sub('', release_date)
    m['release_date'] = release_date.split('/')
Esempio n. 15
0
class DataBase:
    def __init__(self, db_name):
        # 设置登录IP,密码,数据库ip,数据库名
        usr = '******'
        passwd = 'xxxxxxxx'
        ip = 'xxx.xxx.xxx.xxx'
        # 建立连接
        self.db = MongoClient('mongodb://%s:%s@%s' % (usr, passwd, ip))[db_name]
        self.fs = gridfs.GridFS(self.db)
        self.db_name = db_name

    # 向表插入数据
    # 注意:data原本的格式必须是json格式
    def _insert(self, col_name, data):
        # 转换符号
        if data:
            data = data.replace("'", '"').replace("true", "True").replace("false", "False").replace("None", "null")

        order = '{"time": "%s", "data": %s}' % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), str(data))

        order = order.encode('utf-8')  # 编码
        up = self.fs.put(data=order, filename=col_name)  # 上传
        print(up)
        print('inserted')

    # 请求数据
    def _query(self, col_name):
        print('query begin')
        print(col_name)
        print(self.db_name)

        down = self.fs.get_version(col_name).read()  # 下载

        down = json.loads(down.decode("utf-8").replace('None', 'null'), strict=False)  # 转化为json格式

        return down

    # 查询数据库,返回数据库中保存的所有数据名
    def _check(self):
        print('check begin')

        col_names = []

        # 提取所有文件名
        for grid_out in self.db['fs.files'].find({}, {"_id": 0, "filename": 1}):  # 数据库中的fs.files表存储了文件名信息
            col_names.append(grid_out['filename'])

        # 去除重复值
        return_list = []
        for d in col_names:
            if d not in return_list:
                return_list.append(d)

        print(return_list)
        return return_list

    # 删除数据库
    def _delete(self):
        self.db.command("dropDatabase")
        print('database %s deleted' % str(self.db_name))

    # 自定义数据库检索(格式转换可能转成dict,还存在bug)
    def _custom(self, col_name, _request_order):
        return_list = []
        _request_order = _request_order.replace("'", '"').replace("true", "True").replace("false", "False").replace(
            "Null", "none")
        _request_order = ast.literal_eval(_request_order)
        print(_request_order)
        print(type(_request_order))
        col = self.db[col_name]
        data = col.find(_request_order)
        for file in data:
            return_list.append(file)
        return return_list
def connect_to_mongodb_and_print(ds, **kwargs):
    db = MongoClient().zips
    buildinfo = db.command("buildinfo")
    print(buildinfo)
    return 'Whatever you return gets printed in the logs'
Esempio n. 17
0
    "tags": ["mouse", "cat", "dog"]
}, {
    "x": 3,
    "tags": []
}])
result.inserted_ids
'''
{ "_id" : ObjectId("576aaa973e5269020848cc7c"), "x" : 1, "tags" : [ "dog", "cat" ] }
{ "_id" : ObjectId("576aaa973e5269020848cc7d"), "x" : 2, "tags" : [ "cat" ] }
{ "_id" : ObjectId("576aaa973e5269020848cc7e"), "x" : 2, "tags" : [ "mouse", "cat", "dog" ] }
{ "_id" : ObjectId("576aaa973e5269020848cc7f"), "x" : 3, "tags" : [ ] }
'''
from bson.son import SON

# $unwind 解开-后面的变量
pipeline = [{
    "$unwind": "$tags"
}, {
    "$group": {
        "_id": "$tags",
        "count": {
            "$sum": 1
        }
    }
}, {
    "$sort": SON([("count", -1), ("_id", -1)])
}]
list(db.things.aggregate(pipeline))
# 使用聚合函数with command
db.command('aggregate', 'things', pipeline=pipeline, explain=True)
Esempio n. 18
0
class MongoDBPersister(object):
    def __init__(self,
                 conn_url='localhost:27017',
                 dbname='fwbots'):

        self.conn_url=conn_url
        self.dbname=dbname

        self.fwdb = MongoClient(conn_url)[dbname]
        self.worker_collection = self.fwdb['workers']
        self.pool_collection = self.fwdb['pool']
        
    ### Basic information ###

    def get_location(self):
        return self.conn_url + '/' + self.dbname

    def get_backend_type(self):
        return 'mongodb'

    def get_version(self):
        return self.fwdb.command({'buildInfo': 1})['version']

    ### Pool manipulation ###

    def add_pool(self,pool):
        obj = {
            'name':pool.name,
            'manual':pool.manual.keys(),
            'auto':pool.auto.keys(),
            'insta':pool.insta.keys()
        }
        self.pool_collection.insert(obj)

    def get_all_pools(self):
        res = {}
        for p in self.pool_collection.find():
            res[p['name']] = p
        return res

    def delete_pool(self,name):
        self.pool_collection.remove({'name':name})

    ### Worker manipulation ###

    def add_worker(self, worker_id, host, port):
        self.worker_collection.insert({
            'worker_id': worker_id,
            'host': host,
            'port': port,
            'state': 'waiting'
            })

    def delete_worker(self, worker_id):
        self.worker_collection.remove(dict(worker_id=worker_id))

    def delete_monitor(self, host):
        self.worker_collection.remove({'monitor': 1, 'host': host})

    def set_working(self, worker_id):
        self.worker_collection.update(
                dict(worker_id=worker_id),
                {'$set': {
                    'state': 'working',
                    'start': datetime.utcnow()
                    }}, True)

    def unset_working(self, worker_id):
        self.worker_collection.update(
                dict(worker_id=worker_id),
                {'$set': {
                    'state': 'waiting',
                    }}, True)

    def get_all_workers(self):
        return [w for w in self.worker_collection.find()]

    def get_avail_workers(self):
        return [w for w in self.worker_collection.find({'state':'waiting'})]