def bulk_insert(): """测试bulk insert和一个个insert的性能区别: 结论: Bulk insert的速度要远远快于一条条insert 注: Bulk insert支持生成器模式。 """ list_of_documents1 = [{"name": fmter.tpl.randstr(8)} for i in range(1000)] def document_generator(): for doc in list_of_documents1: yield doc timer.start() users.insert(document_generator()) timer.timeup() list_of_documents2 = [{"name": fmter.tpl.randstr(8)} for i in range(1000)] timer.start() for doc in list_of_documents2: users.insert(doc) timer.timeup() print(users.find().count()) # bulk_insert()
def can_key_be_other_than_string(): """json file cannot have key other than string. So you cannot use integer as a key, even it is valid in python dictionary. In addition, choose key string wisely can save space. """ document = {1: "a"} users.insert(document) for doc in users.find(): print(doc)
def date_and_datetime_type(): """ mongodb doesn't support date object and only accept datetime. you have to convert date to datetime using datetime.combine(date_object, datetime.min.time()) to normalize to midnight. """ document = {"create_datetime": datetime.now(), "create_date": datetime.combine(date.today(), datetime.min.time())} users.insert(document) for doc in users.find(): print(doc)
def bytes_type(): """mongodb support bytes, which means you can use pickle to dump anything into mongodb. But! Don't forget the maximum BSON document size is 16 megabytes """ documents = [ {"pickle": "hello world".encode("utf-8")}, {"pickle": obj2bytestr(set([1,2,3]))}, ] users.insert(documents) for doc in users.find(): print(doc)
def list_and_set_type(): documents = [ {"list": [1,2,3]}, {"set": set([1,2,3])}, # this cannot be done ] users.insert(documents) for doc in users.find(): print(doc) # list_and_set_type()
def date_and_datetime_type(): """ mongodb doesn't support date object and only accept datetime. you have to convert date to datetime using datetime.combine(date_object, datetime.min.time()) to normalize to midnight. """ document = { "create_datetime": datetime.now(), "create_date": datetime.combine(date.today(), datetime.min.time()) } users.insert(document) for doc in users.find(): print(doc)
def insept_example(): """insept的意思是: 首先尝试insert, 如果面临着_id重复问题, 则update 该逻辑可以用upsert实现。注: 有时候document是没有包含_id项的 """ doc = {"_id": 1, "name": "obama", "new_field": 999} try: users.insert(doc) except: _id = doc["_id"] del doc["_id"] users.update({"_id": _id}, {"$set": doc}, upsert=True) ppt(users.find({"name": "obama"})[0]) # insept_example()
def bytes_type(): """mongodb support bytes, which means you can use pickle to dump anything into mongodb. But! Don't forget the maximum BSON document size is 16 megabytes """ documents = [ { "pickle": "hello world".encode("utf-8") }, { "pickle": obj2bytestr(set([1, 2, 3])) }, ] users.insert(documents) for doc in users.find(): print(doc)
def list_and_set_type(): documents = [ { "list": [1, 2, 3] }, { "set": set([1, 2, 3]) }, # this cannot be done ] users.insert(documents) for doc in users.find(): print(doc) # list_and_set_type()
def boolean_and_none_type(): """{key: None} means key == None or key is not existing """ documents = [{"is_valid": True}, {"is_valid": False}, {"is_valid": None}] users.insert(documents) fmter.tpl._straightline("is_valid == True", 100) for doc in users.find({"is_valid": True}): print(doc) fmter.tpl._straightline("is_valid == False", 100) for doc in users.find({"is_valid": False}): print(doc) fmter.tpl._straightline("is_valid is null", 100) for doc in users.find({"is_valid": None}): print(doc) fmter.tpl._straightline("is_valid not null", 100) for doc in users.find({"is_valid": {"$ne": None}}): print(doc)
def reserved_key_id(): """如果用户不指定_id, 则系统会自动创建一个_id。问题是对于同样内容的文档, 自动生成的_id会不会 重复呢? 结论: 对于内存中不同的对象, mongodb是不会生成重复的_id的。但是如果是同样的对象, 则会生成重复的_id test1: 每一个doc其实是生成了一个新字典。而每次生成了新字典的时候python就将变量名doc绑定 到新字典上。由于旧字典没有被reference, 那么系统就会自动垃圾回收释放内存了。所以每一次 doc其实是内存中不同的对象。所以_id不会冲突。 test2: 我们生成了一个document的列表。里面每一个元素在内存中其实是不同的。所以_id也不会冲突 test3: 我们重复调用了test2中的列表。由于里面每一个元素在内存中的地址是一样的, 所以生成了同样 的_id, 造成了冲突。 """ # test 1 for i in range(10): doc = {"text": "abcdefg"} users.insert(doc) print(users.find().count()) list_of_documents = [{"text": "abcdefg"} for i in range(10)] # test 2 users.insert(list_of_documents) print(users.find().count()) # test 3 for doc in list_of_documents: users.insert(doc) print(users.find().count())
def basic_insert_syntax(): """ db.collection.insert(one_document) or db.collections.insert(list_of_documents) BUT! if any document in list_of_documents has _id conflict with existing document, then that would be failed. you should use the following code: for document in list_of_documents: try: db.collection.insert(document) except: pass """ documents1 = [ {"name": "Bill Gates", "lastname": "Bill", "firstname": "Gates", "profile": {"year": 1955, "money": 700}}, {"name": "Steve Jobs", "lastname": "Steve", "firstname": "Jobs", "profile": {"year": 1955, "money": 69}}, {"name": "Elon Musk", "lastname": "Elon", "firstname": "Musk", "profile": {"year": 1971, "money": 103}}, ] documents2 = [ {"_id": 100, "name": "Obama", "nation": "USA", "money": None}, {"_id": 101, "name": "Churchill", "nation": "Egnland", "money": None}, {"_id": 101, "name": "Bin laden", "nation": "Pakistan", "money": None}, # 有重复 ] users.insert(documents1) # list of dict 一口气插入, 其中当然不能有_id重复 for doc in documents2: # 用 for loop 一个个插入 try: users.insert(doc) except Exception as e: print(e) for doc in users.find(): print(type(doc), doc) # 默认返回字典, 并非有序字典
def basic_insert_syntax(): """ db.collection.insert(one_document) or db.collections.insert(list_of_documents) BUT! if any document in list_of_documents has _id conflict with existing document, then that would be failed. you should use the following code: for document in list_of_documents: try: db.collection.insert(document) except: pass """ documents1 = [ { "name": "Bill Gates", "lastname": "Bill", "firstname": "Gates", "profile": { "year": 1955, "money": 700 } }, { "name": "Steve Jobs", "lastname": "Steve", "firstname": "Jobs", "profile": { "year": 1955, "money": 69 } }, { "name": "Elon Musk", "lastname": "Elon", "firstname": "Musk", "profile": { "year": 1971, "money": 103 } }, ] documents2 = [ { "_id": 100, "name": "Obama", "nation": "USA", "money": None }, { "_id": 101, "name": "Churchill", "nation": "Egnland", "money": None }, { "_id": 101, "name": "Bin laden", "nation": "Pakistan", "money": None }, # 有重复 ] users.insert(documents1) # list of dict 一口气插入, 其中当然不能有_id重复 for doc in documents2: # 用 for loop 一个个插入 try: users.insert(doc) except Exception as e: print(e) for doc in users.find(): print(type(doc), doc) # 默认返回字典, 并非有序字典