def reserved_key_id(): """如果用户不指定_id, 则系统会自动创建一个_id。问题是对于同样内容的文档, 自动生成的_id会不会 重复呢? 结论: 对于内存中不同的对象, mongodb是不会生成重复的_id的。但是如果是同样的对象, 则会生成重复的_id test1: 每一个doc其实是生成了一个新字典。而每次生成了新字典的时候python就将变量名doc绑定 到新字典上。由于旧字典没有被reference, 那么系统就会自动垃圾回收释放内存了。所以每一次 doc其实是内存中不同的对象。所以_id不会冲突。 test2: 我们生成了一个document的列表。里面每一个元素在内存中其实是不同的。所以_id也不会冲突 test3: 我们重复调用了test2中的列表。由于里面每一个元素在内存中的地址是一样的, 所以生成了同样 的_id, 造成了冲突。 """ # test 1 for i in range(10): doc = {"text": "abcdefg"} users.insert(doc) print(users.find().count()) list_of_documents = [{"text": "abcdefg"} for i in range(10)] # test 2 users.insert(list_of_documents) print(users.find().count()) # test 3 for doc in list_of_documents: users.insert(doc) print(users.find().count())
def push(): """在右边添加一项, 相当于list.append(item) """ fmter.tpl._straightline("before", 100) ppt(users.find({"_id": 1})[0]) users.update({"_id": 1}, {"$push": {"skillset": "data visualization"}}) fmter.tpl._straightline("after", 100) ppt(users.find({"_id": 1})[0])
def addToSet(): """将array当成set来执行set.add(item)操作 """ users.update({"_id": 1}, {"$push": {"skillset": "python"}}) # 先加一个python, 故意添加重复项 fmter.tpl._straightline("before", 100) ppt(users.find({"_id": 1})[0]) users.update({"_id": 1}, {"$addToSet": {"skillset": "R"}}) fmter.tpl._straightline("after", 100) ppt(users.find({"_id": 1})[0]) # 只会在添加的时候将其当成set处理, 并不会把array自动转化为set
def pullall(): """删除多项, 相当于 for i in list, if i in [item1, item2, ...], list.remove(i) """ fmter.tpl._straightline("before", 100) ppt(users.find({"_id": 1})[0]) users.update({"_id": 1}, {"$pullAll": {"skillset": ["python", "cSharp", "R"]}}) fmter.tpl._straightline("after", 100) ppt(users.find({"_id": 1})[0])
def pull(): """删除所有的某项, 相当于 for i in list, if i == pull_item, list.remove(i) """ users.update({"_id": 1}, {"$push": {"skillset": "python"}}) # 先加一个python, 故意添加重复项 fmter.tpl._straightline("before", 100) ppt(users.find({"_id": 1})[0]) users.update({"_id": 1}, {"$pull": {"skillset": "python"}}) fmter.tpl._straightline("after", 100) ppt(users.find({"_id": 1})[0]) # 把所有的python, 包括重复项都删除了
def pushAll(): """在右边加上多项, 相当于 list = list + another_list """ fmter.tpl._straightline("before", 100) ppt(users.find({"_id": 1})[0]) users.update({"_id": 1}, {"$pushAll": {"skillset": ["data visualization", "R"]}}) fmter.tpl._straightline("after", 100) ppt(users.find({"_id": 1})[0])
def pop(): """在右边取出一项, 相当于list.pop() """ fmter.tpl._straightline("before", 100) ppt(users.find({"_id": 1})[0]) users.update({"_id": 1}, {"$pop": {"skillset": 1}}) fmter.tpl._straightline("after", 100) ppt(users.find({"_id": 1})[0])
def unset(): """移除掉某一个key, 请使用"$unset" """ fmter.tpl._straightline("before", 100) ppt(users.find({"_id": 1})[0]) users.update({"_id": 1}, {"$unset": {"profile": 1}}) fmter.tpl._straightline("after", 100) ppt(users.find({"_id": 1})[0])
def update_without_set(): """若不使用collection.update(query, {"$set": {key: value}), 而使用: collection.update(query, new_document) 则会将所有定位到的document替换成, new_document """ fmter.tpl._straightline("before", 100) ppt(users.find({"_id": 1})[0]) users.update({"_id": 1}, {"_id": 1}) # replace the whole document with the new one fmter.tpl._straightline("after", 100) ppt(users.find({"_id": 1})[0])
def addToSet(): """将array当成set来执行set.add(item)操作 """ users.update({"_id": 1}, {"$push": { "skillset": "python" }}) # 先加一个python, 故意添加重复项 fmter.tpl._straightline("before", 100) ppt(users.find({"_id": 1})[0]) users.update({"_id": 1}, {"$addToSet": {"skillset": "R"}}) fmter.tpl._straightline("after", 100) ppt(users.find({"_id": 1})[0]) # 只会在添加的时候将其当成set处理, 并不会把array自动转化为set
def pull(): """删除所有的某项, 相当于 for i in list, if i == pull_item, list.remove(i) """ users.update({"_id": 1}, {"$push": { "skillset": "python" }}) # 先加一个python, 故意添加重复项 fmter.tpl._straightline("before", 100) ppt(users.find({"_id": 1})[0]) users.update({"_id": 1}, {"$pull": {"skillset": "python"}}) fmter.tpl._straightline("after", 100) ppt(users.find({"_id": 1})[0]) # 把所有的python, 包括重复项都删除了
def pushAll(): """在右边加上多项, 相当于 list = list + another_list """ fmter.tpl._straightline("before", 100) ppt(users.find({"_id": 1})[0]) users.update({"_id": 1}, {"$pushAll": { "skillset": ["data visualization", "R"] }}) fmter.tpl._straightline("after", 100) ppt(users.find({"_id": 1})[0])
def pullall(): """删除多项, 相当于 for i in list, if i in [item1, item2, ...], list.remove(i) """ fmter.tpl._straightline("before", 100) ppt(users.find({"_id": 1})[0]) users.update({"_id": 1}, {"$pullAll": { "skillset": ["python", "cSharp", "R"] }}) fmter.tpl._straightline("after", 100) ppt(users.find({"_id": 1})[0])
def absolute_update(): """collection.update()语法分两部分, 第一部分是定位到需要修改的document, 第二部分是对值 进行设定。 注意: 使用 "$set": {key: value} 只会对key的部分进行修改, 如果使用: users.update({"_id": 1}, {"name": "obama"}), 则会将整个文档替换成 {"name": "obama"} """ fmter.tpl._straightline("before", 100) ppt(users.find({"_id": 1})[0]) ppt(users.find({"_id": 2})[0]) users.update({"_id": 1}, {"$set": {"name": "obama", # update name field "profile.enthnicity": "african american"}}) # access child users.update({"name": "michael"}, {"age": 100}) # replace whole document, only keep _id fmter.tpl._straightline("after", 100) ppt(users.find({"_id": 1})[0]) ppt(users.find({"_id": 2})[0])
def upsert_example(): """upsert的意思是: 首先尝试update, 如果找不到该文档, 则insert改文档 """ users.update({"name": "obama"}, {"$set": {"name": "obama"}}, upsert=True) fmter.tpl._straightline("after", 100) for doc in users.find({"name": "obama"}): ppt(doc)
def bulk_insert(): """测试bulk insert和一个个insert的性能区别: 结论: Bulk insert的速度要远远快于一条条insert 注: Bulk insert支持生成器模式。 """ list_of_documents1 = [{"name": fmter.tpl.randstr(8)} for i in range(1000)] def document_generator(): for doc in list_of_documents1: yield doc timer.start() users.insert(document_generator()) timer.timeup() list_of_documents2 = [{"name": fmter.tpl.randstr(8)} for i in range(1000)] timer.start() for doc in list_of_documents2: users.insert(doc) timer.timeup() print(users.find().count()) # bulk_insert()
def dot_notation(): """because document can be nested, so you can use field.sub_field.subfield. ... to access children fields """ ppt(list( users.find({"profile.enthnicity": "asian"}) ))
def multiUpdate(): # users.update({}, {"name": "obama"}) # only one document are updated users.update({}, {"$set": {"name": "obama"}}, multi=True) # all document matching where clause are updated for doc in users.find(): ppt(doc) # multiUpdate()
def sort(): """notice, in mongo java script shell, we use: sort({"field1", 1, "field2", -1}), 1 means asc, -1 means des """ ppt(list( users.find().sort("profile.enthnicity", pymongo.ASCENDING) ))
def relative_update(): """在Sql中有 set column_name = column_name + 1 这种相对更新的方法。在mongodb中我们的做法是: 1. 使用$inc, $mul等操作符: http://docs.mongodb.org/manual/reference/operator/update-field/ 2. 首先find()找到document, 然后修改document对象, 最后再collection.save(document)保存改动。 """ fmter.tpl._straightline("before", 100) ppt(users.find({"_id": 1})[0]) doc = users.find_one({"_id": 1}) # find the document doc["age"] += 30 # do some change to the document users.save(doc) # save changes into collections fmter.tpl._straightline("after", 100) ppt(users.find({"_id": 1})[0]) fmter.tpl._straightline("before", 100) ppt(users.find({"_id": 2})[0]) users.update({"_id": 2}, {"$inc": {"age": 30}}) fmter.tpl._straightline("after", 100) ppt(users.find({"_id": 2})[0])
def can_key_be_other_than_string(): """json file cannot have key other than string. So you cannot use integer as a key, even it is valid in python dictionary. In addition, choose key string wisely can save space. """ document = {1: "a"} users.insert(document) for doc in users.find(): print(doc)
def multiUpdate(): # users.update({}, {"name": "obama"}) # only one document are updated users.update({}, {"$set": { "name": "obama" }}, multi=True) # all document matching where clause are updated for doc in users.find(): ppt(doc) # multiUpdate()
def date_and_datetime_type(): """ mongodb doesn't support date object and only accept datetime. you have to convert date to datetime using datetime.combine(date_object, datetime.min.time()) to normalize to midnight. """ document = {"create_datetime": datetime.now(), "create_date": datetime.combine(date.today(), datetime.min.time())} users.insert(document) for doc in users.find(): print(doc)
def list_and_set_type(): documents = [ {"list": [1,2,3]}, {"set": set([1,2,3])}, # this cannot be done ] users.insert(documents) for doc in users.find(): print(doc) # list_and_set_type()
def bytes_type(): """mongodb support bytes, which means you can use pickle to dump anything into mongodb. But! Don't forget the maximum BSON document size is 16 megabytes """ documents = [ {"pickle": "hello world".encode("utf-8")}, {"pickle": obj2bytestr(set([1,2,3]))}, ] users.insert(documents) for doc in users.find(): print(doc)
def boolean_and_none_type(): """{key: None} means key == None or key is not existing """ documents = [{"is_valid": True}, {"is_valid": False}, {"is_valid": None}] users.insert(documents) fmter.tpl._straightline("is_valid == True", 100) for doc in users.find({"is_valid": True}): print(doc) fmter.tpl._straightline("is_valid == False", 100) for doc in users.find({"is_valid": False}): print(doc) fmter.tpl._straightline("is_valid is null", 100) for doc in users.find({"is_valid": None}): print(doc) fmter.tpl._straightline("is_valid not null", 100) for doc in users.find({"is_valid": {"$ne": None}}): print(doc)
def date_and_datetime_type(): """ mongodb doesn't support date object and only accept datetime. you have to convert date to datetime using datetime.combine(date_object, datetime.min.time()) to normalize to midnight. """ document = { "create_datetime": datetime.now(), "create_date": datetime.combine(date.today(), datetime.min.time()) } users.insert(document) for doc in users.find(): print(doc)
def insept_example(): """insept的意思是: 首先尝试insert, 如果面临着_id重复问题, 则update 该逻辑可以用upsert实现。注: 有时候document是没有包含_id项的 """ doc = {"_id": 1, "name": "obama", "new_field": 999} try: users.insert(doc) except: _id = doc["_id"] del doc["_id"] users.update({"_id": _id}, {"$set": doc}, upsert=True) ppt(users.find({"name": "obama"})[0]) # insept_example()
def bytes_type(): """mongodb support bytes, which means you can use pickle to dump anything into mongodb. But! Don't forget the maximum BSON document size is 16 megabytes """ documents = [ { "pickle": "hello world".encode("utf-8") }, { "pickle": obj2bytestr(set([1, 2, 3])) }, ] users.insert(documents) for doc in users.find(): print(doc)
def list_and_set_type(): documents = [ { "list": [1, 2, 3] }, { "set": set([1, 2, 3]) }, # this cannot be done ] users.insert(documents) for doc in users.find(): print(doc) # list_and_set_type()
def basic_insert_syntax(): """ db.collection.insert(one_document) or db.collections.insert(list_of_documents) BUT! if any document in list_of_documents has _id conflict with existing document, then that would be failed. you should use the following code: for document in list_of_documents: try: db.collection.insert(document) except: pass """ documents1 = [ {"name": "Bill Gates", "lastname": "Bill", "firstname": "Gates", "profile": {"year": 1955, "money": 700}}, {"name": "Steve Jobs", "lastname": "Steve", "firstname": "Jobs", "profile": {"year": 1955, "money": 69}}, {"name": "Elon Musk", "lastname": "Elon", "firstname": "Musk", "profile": {"year": 1971, "money": 103}}, ] documents2 = [ {"_id": 100, "name": "Obama", "nation": "USA", "money": None}, {"_id": 101, "name": "Churchill", "nation": "Egnland", "money": None}, {"_id": 101, "name": "Bin laden", "nation": "Pakistan", "money": None}, # 有重复 ] users.insert(documents1) # list of dict 一口气插入, 其中当然不能有_id重复 for doc in documents2: # 用 for loop 一个个插入 try: users.insert(doc) except Exception as e: print(e) for doc in users.find(): print(type(doc), doc) # 默认返回字典, 并非有序字典
Mongodb中如何query. http://docs.mongodb.org/manual/reference/operator/query/ """ from tt00_connect import client, db, users from angora.GADGET.pytimer import Timer from angora.STRING.formatmaster import fmter from pprint import pprint as ppt from datetime import datetime, date import re timer = Timer() # comparison operator: $gt, $gte, $lt, $lte, $ne, $eq = {key: value} fmter.tpl._straightline("""users.find({"age": {"$gt": 30}})""", 150) ppt(list( users.find({"age": {"$gt": 30}}) )) fmter.tpl._straightline("""users.find({"age": {"$lt": 30}})""", 150) ppt(list( users.find({"age": {"$lt": 30}}) )) # compare datetime, in java script shell, the command to create a datetime object is: # db.test.insert({"Time" : new ISODate("2012-01-11T03:34:54Z") }); fmter.tpl._straightline("""users.find({"enroll_date": {"$gt": datetime(2014, 6, 1)}}""", 150) ppt(list( users.find({"enroll_date": {"$gt": datetime(2014, 6, 1)}}) )) # $in, $nin, in and not in
##encoding=utf-8 """ Mongodb中如何: 1. 删除document 2. 删除整个collection 3. remove和drop的区别 collecion.remove({})很像find(), 会先query找到匹配的内容, 然后一条条删除之 而collection.drop()则是删除整个collection. 如过collection有一些metadata, 例如index, 那么remove({})掉所有的document并不会删除index. 而drop()则会删除掉这些metadata """ from tt00_connect import client, db, users from angora.GADGET.pytimer import Timer from angora.STRING.formatmaster import fmter from pprint import pprint as ppt from datetime import datetime, date timer = Timer() timer.start() users.remove({}) # users.drop() timer.timeup() for doc in users.find(): print(doc)
""" Mongodb中如何query. http://docs.mongodb.org/manual/reference/operator/query/ """ from tt00_connect import client, db, users from angora.GADGET.pytimer import Timer from angora.STRING.formatmaster import fmter from pprint import pprint as ppt from datetime import datetime, date import re timer = Timer() # comparison operator: $gt, $gte, $lt, $lte, $ne, $eq = {key: value} fmter.tpl._straightline("""users.find({"age": {"$gt": 30}})""", 150) ppt(list(users.find({"age": {"$gt": 30}}))) fmter.tpl._straightline("""users.find({"age": {"$lt": 30}})""", 150) ppt(list(users.find({"age": {"$lt": 30}}))) # compare datetime, in java script shell, the command to create a datetime object is: # db.test.insert({"Time" : new ISODate("2012-01-11T03:34:54Z") }); fmter.tpl._straightline( """users.find({"enroll_date": {"$gt": datetime(2014, 6, 1)}}""", 150) ppt(list(users.find({"enroll_date": {"$gt": datetime(2014, 6, 1)}}))) # $in, $nin, in and not in fmter.tpl._straightline("""users.find({"age": {"$lt": 30}})""", 100) ppt(list(users.find({"age": {"$lt": 30}}))) fmter.tpl._straightline(
def basic_insert_syntax(): """ db.collection.insert(one_document) or db.collections.insert(list_of_documents) BUT! if any document in list_of_documents has _id conflict with existing document, then that would be failed. you should use the following code: for document in list_of_documents: try: db.collection.insert(document) except: pass """ documents1 = [ { "name": "Bill Gates", "lastname": "Bill", "firstname": "Gates", "profile": { "year": 1955, "money": 700 } }, { "name": "Steve Jobs", "lastname": "Steve", "firstname": "Jobs", "profile": { "year": 1955, "money": 69 } }, { "name": "Elon Musk", "lastname": "Elon", "firstname": "Musk", "profile": { "year": 1971, "money": 103 } }, ] documents2 = [ { "_id": 100, "name": "Obama", "nation": "USA", "money": None }, { "_id": 101, "name": "Churchill", "nation": "Egnland", "money": None }, { "_id": 101, "name": "Bin laden", "nation": "Pakistan", "money": None }, # 有重复 ] users.insert(documents1) # list of dict 一口气插入, 其中当然不能有_id重复 for doc in documents2: # 用 for loop 一个个插入 try: users.insert(doc) except Exception as e: print(e) for doc in users.find(): print(type(doc), doc) # 默认返回字典, 并非有序字典
def skip_and_limit(): """跳过前n个, 只返回n条 """ ppt(list( users.find().skip(1).limit(1) ))