def test_bulk_write(self):
        self.db.test.collection.bulk_write([
            DeleteOne({'noCollation': 42}),
            DeleteMany({'noCollation': 42}),
            DeleteOne({'foo': 42}, collation=self.collation),
            DeleteMany({'foo': 42}, collation=self.collation),
            ReplaceOne({'noCollation': 24}, {'bar': 42}),
            UpdateOne({'noCollation': 84}, {'$set': {'bar': 10}}, upsert=True),
            UpdateMany({'noCollation': 45}, {'$set': {'bar': 42}}),
            ReplaceOne({'foo': 24}, {'foo': 42}, collation=self.collation),
            UpdateOne({'foo': 84}, {'$set': {'foo': 10}}, upsert=True,
                      collation=self.collation),
            UpdateMany({'foo': 45}, {'$set': {'foo': 42}},
                       collation=self.collation)
        ])

        delete_cmd = self.listener.results['started'][0].command
        update_cmd = self.listener.results['started'][1].command

        def check_ops(ops):
            for op in ops:
                if 'noCollation' in op['q']:
                    self.assertNotIn('collation', op)
                else:
                    self.assertEqual(self.collation.document,
                                     op['collation'])

        check_ops(delete_cmd['deletes'])
        check_ops(update_cmd['updates'])
Ejemplo n.º 2
0
 def saveAll(self, exchanger, unit, sums):
     reqs = []
     for s in sums:
         item = s.toDict()
         condition = {'datetime': item['datetime']}
         reqs.append(ReplaceOne(condition, item, upsert=True))
     if len(reqs) > 0:
         self.getCollection(exchanger, unit).bulk_write(reqs)
Ejemplo n.º 3
0
    def test_ReplaceOne(self):
        result = yield self.coll.bulk_write([
            ReplaceOne({'x': 42}, {'j': 5}),
            ReplaceOne({'x': 555}, {'k': 5}, upsert=True),
        ])

        docs = yield self.coll.find(fields={"_id": 0})
        self.assertEqual(len(docs), 4)
        self.assertIn({'j': 5}, docs)
        self.assertIn({'y': 123}, docs)
        self.assertIn({'z': 321}, docs)
        self.assertIn({'k': 5}, docs)

        self.assertIsInstance(result, BulkWriteResult)
        self.assertEqual(result.matched_count, 1)
        self.assertEqual(result.modified_count, 1)
        self.assertEqual(set(result.upserted_ids), {1})
Ejemplo n.º 4
0
def retryable_single_statement_ops(coll):
    return [
        (coll.bulk_write, [[InsertOne({}), InsertOne({})]], {}),
        (coll.bulk_write, [[InsertOne({}),
                            InsertOne({})]], {'ordered': False}),
        (coll.bulk_write, [[ReplaceOne({}, {})]], {}),
        (coll.bulk_write, [[ReplaceOne({}, {}), ReplaceOne({}, {})]], {}),
        (coll.bulk_write, [[UpdateOne({}, {'$set': {'a': 1}}),
                            UpdateOne({}, {'$set': {'a': 1}})]], {}),
        (coll.bulk_write, [[DeleteOne({})]], {}),
        (coll.bulk_write, [[DeleteOne({}), DeleteOne({})]], {}),
        (coll.insert_one, [{}], {}),
        (coll.insert_many, [[{}, {}]], {}),
        (coll.replace_one, [{}, {}], {}),
        (coll.update_one, [{}, {'$set': {'a': 1}}], {}),
        (coll.delete_one, [{}], {}),
        (coll.find_one_and_replace, [{}, {'a': 3}], {}),
        (coll.find_one_and_update, [{}, {'$set': {'a': 1}}], {}),
        (coll.find_one_and_delete, [{}, {}], {}),
    ]
Ejemplo n.º 5
0
def send_to_db(username, display_name, user_ratings):
   database_url = os.getenv('DATABASE_URL', None)

   if database_url:
        client = pymongo.MongoClient(database_url, server_api=pymongo.server_api.ServerApi('1'))
        db = client["letterboxd"]
        users = db.users
        ratings = db.ratings
        movies = db.movies

        user = {
            "username": username,
            "display_name": display_name,
            "num_reviews": len(user_ratings)
        }

        users.update_one({"username": user["username"]}, {"$set": user}, upsert=True)

        upsert_ratings_operations = []
        upsert_movies_operations = []
        # print(len(user_ratings))
        for rating in user_ratings:
            upsert_ratings_operations.append(
                ReplaceOne({
                    "user_id": username,
                    "movie_id": rating["movie_id"]
                },
                rating,
                upsert=True)
            )

            upsert_movies_operations.append(UpdateOne({
                    "movie_id": rating["movie_id"]
                },
                {
                    "$set": {
                        "movie_id": rating["movie_id"]
                    }
                },
                    upsert=True
                )
            )

        try:
            if len(upsert_ratings_operations) > 0:
                ratings.bulk_write(upsert_ratings_operations, ordered=False)
            if len(upsert_movies_operations) > 0:
                movies.bulk_write(upsert_movies_operations, ordered=False)
        except BulkWriteError as bwe:
            pprint(bwe.details)

        return
Ejemplo n.º 6
0
 def saveAll(self, exchanger, ticks):
     batch_size = 2048
     collection = self.collections[exchanger]
     reqs = []
     for t in ticks:
         item = t.toDict()
         condition = {'datetime': item['datetime']}
         reqs.append(ReplaceOne(condition, item, upsert=True))
         if len(reqs) >= batch_size:
             collection.bulk_write(reqs)
             reqs = []
     if len(reqs) > 0:
         collection.bulk_write(reqs, ordered=False)
def retryable_single_statement_ops(coll):
    return [
        (coll.bulk_write, [[InsertOne({}), InsertOne({})]], {}),
        (coll.bulk_write, [[InsertOne({}),
                            InsertOne({})]], {'ordered': False}),
        (coll.bulk_write, [[ReplaceOne({}, {})]], {}),
        (coll.bulk_write, [[ReplaceOne({}, {}), ReplaceOne({}, {})]], {}),
        (coll.bulk_write, [[UpdateOne({}, {'$set': {'a': 1}}),
                            UpdateOne({}, {'$set': {'a': 1}})]], {}),
        (coll.bulk_write, [[DeleteOne({})]], {}),
        (coll.bulk_write, [[DeleteOne({}), DeleteOne({})]], {}),
        (coll.insert_one, [{}], {}),
        (coll.insert_many, [[{}, {}]], {}),
        (coll.replace_one, [{}, {}], {}),
        (coll.update_one, [{}, {'$set': {'a': 1}}], {}),
        (coll.delete_one, [{}], {}),
        (coll.find_one_and_replace, [{}, {'a': 3}], {}),
        (coll.find_one_and_update, [{}, {'$set': {'a': 1}}], {}),
        (coll.find_one_and_delete, [{}, {}], {}),
        # Deprecated methods.
        # Insert with single or multiple documents.
        (coll.insert, [{}], {}),
        (coll.insert, [[{}]], {}),
        (coll.insert, [[{}, {}]], {}),
        # Save with and without an _id.
        (coll.save, [{}], {}),
        (coll.save, [{'_id': ObjectId()}], {}),
        # Non-multi update.
        (coll.update, [{}, {'$set': {'a': 1}}], {}),
        # Non-multi remove.
        (coll.remove, [{}], {'multi': False}),
        # Replace.
        (coll.find_and_modify, [{}, {'a': 3}], {}),
        # Update.
        (coll.find_and_modify, [{}, {'$set': {'a': 1}}], {}),
        # Delete.
        (coll.find_and_modify, [{}, {}], {'remove': True}),
    ]
Ejemplo n.º 8
0
 def test_ReplaceOneNotEquals(self):
     self.assertNotEqual(ReplaceOne({'foo': 42}, {'bar': 42}, upsert=False),
                         ReplaceOne({'foo': 42}, {'bar': 42}, upsert=True))
Ejemplo n.º 9
0
def copy_indexes(*index_names,
                 batch_size: int = 50,
                 rewrite: bool = False,
                 request_timeout: int = 60) -> None:
    """
    Read the data from D3M's database and store it to the lab's database. We
    mirror just a subset of indexes and fields.

    Parameters
    ----------
    indexes : list
        The names of the specific indexes to read from D3M's db. If
        not provided, all indexes will by read.
    batch_size : int
        The number of records to retrieve from D3M's db with each
        network request.
    rewrite : bool
        If `True`, deletes the collections and rereads them from scratch.
        If `False`, only new records will be copied down.
    request_timeout : int
        Number of seconds to wait for a response from elasticsearch.
    """
    d3m_db = D3MDB()
    aml_db = AMLDB()

    if len(index_names) == 0:
        # Copy all by default.
        to_copy = Index
    else:
        to_copy = {Index(name) for name in index_names}

    for index in to_copy:

        index_name = index.value
        aml_collection = aml_db.db[index_name]

        if rewrite:
            print(f"Removing all records in the '{index_name}' collection...")
            aml_collection.delete_many({})

        # Only copy over documents we don't have yet.
        print(
            f"Determining which documents to copy from index '{index_name}'..."
        )
        d3m_ids = d3m_db.get_all_ids(index_name)
        aml_ids = aml_db.get_all_ids(index_name)
        ids_of_docs_to_copy = list(d3m_ids - aml_ids)
        num_docs_to_copy = len(ids_of_docs_to_copy)

        print((
            f"Now copying subset of index '{index_name}' ({num_docs_to_copy} documents) "
            f"to the AML database..."))

        # We'll write the data to the lab db in batches.
        write_buffer = MongoWriteBuffer(aml_collection, batch_size)

        # Iterate over this index in batches, only querying the subset of fields we care about.
        for id_chunk in chunk(ids_of_docs_to_copy,
                              batch_size,
                              show_progress=True):
            hits = (d3m_db.search(index=index_name).query(
                "ids", values=list(id_chunk)).source(
                    elasticsearch_fields[index]).params(
                        size=batch_size,
                        request_timeout=request_timeout).execute())

            for hit in hits:
                doc = hit.to_dict()
                # Mongodb will use the same primary key elastic search does.
                doc["_id"] = hit.meta.id
                write_buffer.queue(
                    # Insert the doc, or if another document already exists with the same _id,
                    # then replace it.
                    ReplaceOne(filter={"_id": doc["_id"]},
                               replacement=doc,
                               upsert=True))

        # Write and flush any leftovers.
        write_buffer.flush()