def add_data(): client = MongoUtils("", "test", "test") collection = client.get_mongo_collection() students = [] for i in range(0, 10): student = {"name": "name" + str(i), "age": i} students.append(student) collection.insert_many(students)
def save(): ''' 用于保存 mongo 数据 Returns: ''' collection = MongoUtils(str(mongo_conf.ip) + u':' + str(mongo_conf.port), u'test', u'user').getMongo() document = {u'name': u'李四', u'age': 20} collection.insert_one(document) print u'success'
def test_mongo(): client = MongoUtils("", "acrm", "") collection = client.get_mongo_collection() document = {u'ut': {u'$gte': 1504022400000, u'$lt': 1504540800000}} # 按照 pymongo.ASCENDING 表示升序排序 # pymongo.DESCENDING 表示降序 cursor_ = collection.find(document, no_cursor_timeout=True).sort([ (u'ut', pymongo.ASCENDING) ]).limit(5) for docu in cursor_: ut = docu[u'ut'] print ut
def query_oplog(): ''' 用于查询 oplog 日志 Returns: ''' collection = MongoUtils(u'', u'local', u'oplog.rs').getMongo() oplog_start = bson.timestamp.Timestamp(1533528000, 0) oplog_end = bson.timestamp.Timestamp(1536207400, 0) document = {u'ts': {u'$gte': oplog_start, u'$lt': oplog_end}} cursor_ = collection.find(document, cursor_type=pymongo.CursorType.TAILABLE_AWAIT, oplog_replay=True).limit(10) for docu in cursor_: print docu['ts'] print docu['op']
def query(): ''' 用于简单查询 mongo 数据 Returns: ''' collection = MongoUtils(mongo_conf.url, u'test', u'user').getMongo() document = {u'_id': ObjectId(u'5bebe7e1d953ca91c44c3b84')} cursor = collection.find(document) for doc in cursor: print doc[u'_id'] print doc[u'name'][u'class'][u'name'] cursor.close()
def test_sync_policy(): # 获取 mongodb 到表的连接 collection = MongoUtils("", "local", "oplog.rs").get_mongo_collection() oplog_start = bson.timestamp.Timestamp(1552666352, 6) oplog_end = bson.timestamp.Timestamp(1552666353, 0) document = {"ns": "", "ts": {"$gte": oplog_start, "$lt": oplog_end}} cursor = collection.find(document, cursor_type=pymongo.CursorType.TAILABLE_AWAIT, oplog_replay=True).sort([ (u'$natural', pymongo.ASCENDING) ]).limit(100) for docu in cursor: update_policy_result(docu, "jdzz")
def test_query1(): collection = MongoUtils("", "test", "stu").get_mongo_collection() document = {"_id": bson.ObjectId("5cda286a7bde5ca85d3de0ea")} cursor = collection.find(document).limit(100) # for docu in cursor: # # name nan age null # print docu["name"], type(docu["name"]), type(docu["name"]) is float, str(docu["name"]) == "nan", str( # docu["name"]).replace("nan", "zjx") # print docu["age"], type(docu["age"]), docu["age"] is None # print sync_mongo_service.get_mysql_data(docu["name"]) is "nan", sync_mongo_service.get_mysql_data( # docu["name"]) is "None" for docu in cursor: print filter_null(docu["name"]), filter_null(docu["age"]) cursor.close()
def test_query(): collection = MongoUtils("", "local", "oplog.rs").get_mongo_collection() oplog_start = bson.timestamp.Timestamp(1552298024, 0) oplog_end = bson.timestamp.Timestamp(1552298025, 0) document = {"ns": "", "ts": {"$gte": oplog_start, "$lt": oplog_end}} cursor = collection.find(document, cursor_type=pymongo.CursorType.TAILABLE_AWAIT, oplog_replay=True).sort([ (u'$natural', pymongo.ASCENDING) ]).limit(100) for docu in cursor: _id = docu["o2"]["_id"] s = str(_id) print type(_id), type(s), s
def test_sync_model(): # 获取 mongodb 到表的连接 collection = MongoUtils("", "local", "oplog.rs").get_mongo_collection() oplog_start = bson.timestamp.Timestamp(1552618080, 11) oplog_end = bson.timestamp.Timestamp(1552618081, 0) document = {"ns": "", "ts": {"$gte": oplog_start, "$lt": oplog_end}} cursor = collection.find(document, cursor_type=pymongo.CursorType.TAILABLE_AWAIT, oplog_replay=True).sort([ (u'$natural', pymongo.ASCENDING) ]).limit(100) for docu in cursor: obj = docu["o"]["model_result"] print obj["level"], type(obj["level"]), obj.has_key( "level"), sync_mongo_service.get_mysql_data( obj["level"]) is not "nan", obj["level"] is not None
def query_sort(): ''' mongo 查询 排序 limit 操作 noCursorTimeout 设置为 true Returns: ''' collection = MongoUtils(u'', u'acrm', u'policy_result').getMongo() document = {u'ut': {u'$gte': 1504022400000, u'$lt': 1504540800000}} # 按照 pymongo.ASCENDING 表示升序排序 # pymongo.DESCENDING 表示降序 cursor_ = collection.find(document, no_cursor_timeout=True).sort([(u'ut', pymongo.ASCENDING)]).limit(5) for docu in cursor_: ut = docu[u'ut'] date_str = ts_str(ut / 1000) print date_str
def test(): logger.error(" logger ================= ") email_logger.error(" email_logger ================= ") errorSql_logger.error(" errorSql_logger ================= ") fullSql_logger.error(" fullSql_logger ================= ") collection = MongoUtils("", "local", "oplog.rs").get_mongo_collection() oplog_start = bson.timestamp.Timestamp(1552298024, 0) oplog_end = bson.timestamp.Timestamp(1552298025, 0) document = {"ns": "jdzz_acrm.policy_result", "ts": {"$gte": oplog_start, "$lt": oplog_end}} cursor = collection.find(document, cursor_type=pymongo.CursorType.TAILABLE_AWAIT, oplog_replay=True).sort( [(u'$natural', pymongo.ASCENDING)]).limit(100) for docu in cursor: _id = docu["o2"]["_id"] result_json = {"_id": str(_id), "ts": sync_mongo_data.get_ts("jdzz", "policy_result")} return jsonify(result_json)
def test_insert_policy(): collection = MongoUtils("", "local", "oplog.rs").get_mongo_collection() # 20190128 1548604800 # 20190308 1552035860 oplog_start = bson.timestamp.Timestamp(1552035860, 0) oplog_end = bson.timestamp.Timestamp(1552035861, 0) # 5c6e04666ffe5a3ef6a92bdc document = {"ns": "", "ts": {"$gte": oplog_start, "$lt": oplog_end}} cursor = collection.find(document, cursor_type=pymongo.CursorType.TAILABLE_AWAIT, oplog_replay=True).sort([ (u'$natural', pymongo.ASCENDING) ]).limit(100) for docu in cursor: try: string = "ss" + str(docu) print string except Exception as e: print e
def sync_mongo_data(): """ 同步 mongodb 数据到 mysql Returns: """ try: # 获取 post 请求的 json 数据 json_data = json.loads(request.get_data()) # mongodb 的 url ip = json_data['ip'] org_id = json_data['org_id'] # 过滤 oplog.rs 数据 ns = json_data['ns'] # 表 table_name = ns.split(".")[1] # 获取 mongodb 到表的连接 collection = MongoUtils(ip, "local", "oplog.rs").get_mongo_collection() # 获取初始时间戳参数 result = get_ts(org_id, table_name) ts = result["ts"] inc = result["inc"] while True: oplog_start = bson.timestamp.Timestamp(ts, inc) document = {"ns": ns, "ts": {"$gt": oplog_start}} cursor = collection.find( document, cursor_type=pymongo.CursorType.TAILABLE_AWAIT, oplog_replay=True).sort([(u'$natural', pymongo.ASCENDING) ]).limit(100) # 循环遍历游标数据 for docu in cursor: try: # 解析游标数据 sync_mongo_service.sync_mongo(docu, org_id, table_name) bts = docu["ts"] # 当前时间戳 current_ts = bts.time # 当前增量 current_inc = bts.inc ts = current_ts inc = current_inc # 更新当时的 时间戳 增量 update_ts(org_id + "_" + table_name, current_ts, current_inc) except Exception as e: logger.error(u" 循环遍历 cursor 的时候出现异常 %s doc : \n %s", e, docu) EmailUtils.send_email(u"循环遍历 cursor 的时候出现异常", str(e) + "\n" + str(docu), email_conf.receiver) continue # 让游标休息 100 ms time.sleep(0.01) cursor.close() except Exception as e: logger.error(u" 获取 mongo 端数据源报错 %s", e) EmailUtils.send_email(u"获取 mongo 端数据源报错", str(e), email_conf.receiver) return jsonify(result)