def _full_sql(self): self.logger.record('Starting:based full sql...') for table in self.mongo['tables']: # sync self.mongo['table'] = table client = Mongo(self.mongo) total = client.count() offset = 0 limit = 100 while offset <= total: # record offset and limit self.logger.record('total:{}, offset:{}, limit:{}'.format( total, offset, limit)) sleep(_SLEEP) queryset = client.find(offset=offset, limit=limit) actions = [] action_ids = [] for q in queryset: data = format_data(table, q) action_ids.append(str(q['_id'])) actions.append(data) # pg save for batch sql = get_sql(table) Postgresql(self.postgresql).insert_batch(sql, actions) offset += limit self.logger.record('Ending:based full sql.')
def _inc_oplog(self): self.logger.record('Starting:based increase oplog...') # sync oplog = Mongo(self.mongo).client().local.oplog.rs # 获取偏移量 stamp = oplog.find().sort('$natural', ASCENDING).limit( -1).next()['ts'] if not self.oplog['ts'] else self.oplog['ts'] while True: kw = {} kw['filter'] = {'ts': {'$gt': eval(stamp)}} kw['cursor_type'] = CursorType.TAILABLE_AWAIT kw['oplog_replay'] = True cursor = oplog.find(**kw) try: while cursor.alive: for q in cursor: stamp = q['ts'] # Do something with doc. op = q['op'] # 操作 u i d db, table = q['ns'].split('.') # 表的变动 saas_dq_uat.pos doc = q['o'] # 数据库ID -> 文档ID doc_id = str(doc['_id']) del doc['_id'] # format data format_data(doc) format_data_for_aggs(doc) if op is 'u': self.es.update(table, doc_id, doc) elif op is 'i': self.es.insert(table, doc_id, doc) elif op is 'd': self.es.delete(table, doc_id) # 记录增量位置 write_config('oplog', 'ts', stamp) sleep(_SLEEP) except AutoReconnect: sleep(_SLEEP) self.logger.record('Ending:based increase oplog.')
class Connections(MongoMixin): CLIENT = Mongo(settings.MONGO_URL, settings.MONGO_PORT) COLLECTION = CLIENT["mqtt"]["connections"] def __init__(self, client_id, connected, device, connected_at, keepalive=60, ipaddress="", proto_ver=0, conn_ack=0): self.client_id = client_id self.device = device self.connected = connected self.keepalive = keepalive self.ipaddress = ipaddress self.connected_at = connected_at self.proto_ver = proto_ver self.conn_ack = conn_ack def to_doc(self): return { "client_id": self.client_id, "device": self.device, "connected": self.connected, "connected_at": self.connected_at, "keepalive": self.keepalive, "ipaddress": self.ipaddress, "proto_ver": self.proto_ver, "conn_ack": self.conn_ack }
class MongoMixin(): CLIENT = Mongo(settings.MONGO_URL, settings.MONGO_PORT) COLLECTION = None @classmethod def find(cls, condition): return cls.COLLECTION.find(condition) @classmethod def find_one(cls, condition=None): return cls.COLLECTION.find_one(condition) @classmethod def insert_one(cls, doc): return cls.COLLECTION.insert_one(doc) @classmethod def delete_one(cls, condition): return cls.COLLECTION.delete_one(condition) @classmethod def delete_many(cls, condition): return cls.COLLECTION.delete_many(condition) @classmethod def replace_one(cls, filter, replacement, upsert=False): return cls.COLLECTION.replace_one(filter, replacement, upsert) @classmethod def update_one(cls, filter, replacement, upsert=False): return cls.COLLECTION.update_one(filter, replacement, upsert)
def _full_sql(self): self.logger.record('Starting:based full sql...') for table in self.mongo['tables']: # create new es index self.logger.record('create new es index:{}'.format(table)) self.es.init(table, mapping=read_mapping(table)) # sync self.mongo['table'] = table client = Mongo(self.mongo) total = client.count() offset = 0 limit = 100 while offset <= total: # record offset and limit self.logger.record('offset:{}, limit:{}'.format(offset, limit)) queryset = client.find(offset=offset, limit=limit) actions = [] action_ids = [] for q in queryset: # 数据库ID -> 文档ID doc_id = str(q['_id']) del q['_id'] # format data doc = q format_data(doc) format_data_for_aggs(doc) action_ids.append(doc_id) actions.append({ "_index": table, "_type": table, "_id": doc_id, "_source": doc }) # elastic save for batch self.es.insert_batch(table, actions, action_ids) sleep(_SLEEP) offset += limit self.logger.record('Ending:based full sql.')
def __init__(self, eventModel, mongoClient, user, emotes, actionQueue): self._event = eventModel self._user = user self._identityUtil = IdentityUtil() self._randomUtil = Random() self._banUtil = Ban() self._channelUtil = Channel() self._mongoUtil = Mongo(mongoClient) self._triggerUtil = Trigger(eventModel) self._emoteUtil = Emote(emotes) self._queue = actionQueue
def main(): redis = redis_init() mongo = Mongo() sync_redis_with_mongo(redis, mongo) crawler = Crawler() topics = get_all_topics(crawler, thread_num=4) batch_process_topics_data(redis, mongo, crawler, topics, batch=50, thread_num=8)
def main(): config = read_config() mongo = config['mongo'] oplog = config['oplog'] postgresql = config['postgresql'] logger = Logger('test') index = 0 while index <= 1000000: for table in mongo['tables']: # sync mongo['table'] = table client = Mongo(mongo) total = client.count() offset = 0 limit = 100 while offset <= total: # record offset and limit print('index:{}, offset:{}, limit:{}'.format( index, offset, limit)) logger.record('index:{}, offset:{}, limit:{}'.format( index, offset, limit)) queryset = client.find(offset=offset, limit=limit) actions = [] action_ids = [] for q in queryset: index += 1 q['_id'] = '{}'.format(index) q['store_id'] = random.choice(STORE_IDS) data = format_data(table, q) action_ids.append(str(q['_id'])) actions.append(data) # pg save for batch sql = get_sql(table) Postgresql(postgresql).insert_batch(sql, actions) sleep(_SLEEP) offset += limit
def _build_mapping(self): ''' 根据config文件中的mongo配置,把mongo的表结构转换成es的mapping ''' self.logger.record('Starting:build mapping...') for table in self.mongo['tables']: # build new es mapping self.logger.record('build new es mapping:{}'.format(table)) old_mapping = read_mapping(table) # build self.mongo['table'] = table client = Mongo(self.mongo) total = client.count() offset = 0 # 随机向前移动 n 步:1⃣减少重复操作 2⃣均匀生成mapping increase = lambda: random.randint(1, 100) while offset <= total: self.logger.record( 'mapping: {}.json, total number:{}, current number:{}'. format(table, total, offset)) for query in client.find_one(offset=offset): del query['_id'] format_mapping( old_mapping['mappings'][table]['properties'], query) write_mapping(table, old_mapping) offset += increase() sleep(_SLEEP) self.logger.record('Ending:build mapping.')
class DeviceAcl(MongoMixin): CLIENT = Mongo(settings.MONGO_URL, settings.MONGO_PORT) COLLECTION = CLIENT["mqtt"]["device_acl"] def __init__(self, username, publish, subscribe, pubsub): self.username = username self.publish = publish self.subscribe = subscribe self.pubsub = pubsub def to_doc(self): return { "username": self.username, "publish": self.publish, "subscribe": self.subscribe, "pubsub": self.pubsub }
loss = SigmoidLoss(adv_temperature = 1), batch_size = train_dataloader.get_batch_size(), regul_rate = 0.0 ) # train the model trainer = Trainer(model = model, data_loader = train_dataloader, train_times = 3000, alpha = 2e-5, use_gpu = False, opt_method = "adam") trainer.run() transe.save_checkpoint('./checkpoint/transe_fn.ckpt') # test the model transe.load_checkpoint('./checkpoint/transe_fn.ckpt') tester = Tester(model = transe, data_loader = test_dataloader, use_gpu = False) # tester.run_link_prediction(type_constrain = False) acc, threshlod = tester.run_triple_classification() db = Mongo().get_client() db_col = db["training_results"] transe_result = db_col.find_one({"name": "transe"}) if transe_result: print("update acc and threshold to existed transe result") query = { "name": "transe" } update_values = { "$set": { "acc": acc, "threshold": threshlod } } db_col.update_one(query, update_values) else: print("add new transe result") transe_dict = { "name": "transe", "acc": acc, "threshold": threshlod } x = db_col.insert_one(transe_dict)
class Device(MongoMixin): CLIENT = Mongo(settings.MONGO_URL, settings.MONGO_PORT) COLLECTION = CLIENT["mqtt"]["devices"] def __init__(self, product_name, device_name, username, password, status="", device_status="{}", last_status_update=0, tags=None, tags_version=1, shadow=None, _id=None): self.product_name = product_name self.device_name = device_name self.username = username self.password = password self.status = status # 可接入状态 self.device_status = device_status self.last_status_update = last_status_update # ("最新更新时间") self.tags = tags if tags is not None else [] self.tags_version = tags_version self.shadow = shadow if shadow is not None else json.dumps({ "state": {}, "metadata": {}, "version": 0 }) self._id = _id def to_doc(self): if self._id: return { "product_name": self.product_name, "device_name": self.device_name, "username": self.username, "password": self.password, "status": self.status, "device_status": self.device_status, "last_status_update": self.last_status_update, "tags": self.tags, "tags_version": self.tags_version, "shadow": self.shadow, "_id": self._id } else: return { "product_name": self.product_name, "device_name": self.device_name, "username": self.username, "password": self.password, "status": self.status, "device_status": self.device_status, "last_status_update": self.last_status_update, "tags": self.tags, "tags_version": self.tags_version, "shadow": self.shadow } def get_acl(self): publish = [ "upload_data/{product_name}/{device_name}/+/+".format( product_name=self.product_name, device_name=self.device_name), "update_status/{product_name}/{device_name}/+".format( product_name=self.product_name, device_name=self.device_name), "cmd_resp/{product_name}/{device_name}/+/+/+".format( product_name=self.product_name, device_name=self.device_name), "rpc_resp/{product_name}/{device_name}/+/+/+".format( product_name=self.product_name, device_name=self.device_name), "get/{product_name}/{device_name}/+/+".format( product_name=self.product_name, device_name=self.device_name), "m2m/{product_name}/+/{device_name}/+".format( product_name=self.product_name, device_name=self.device_name), "update_ota_status/{product_name}/{device_name}/+".format( product_name=self.product_name, device_name=self.device_name), ] subscribe = [ "tags/{product_name}/+/cmd/+/+/+/#".format( product_name=self.product_name) ] pubsub = [] return {"publish": publish, "subscribe": subscribe, "pubsub": pubsub} @classmethod def add_connection(cls, msg): username = msg.get("username", "") if not username: return try: product_name, device_name = username.split("/") except ValueError: return if not product_name or device_name: return condition = {"product_name": product_name, "device_name": device_name} device = cls.find_one(condition) if device: #TODO: conn = { "client_id": msg["client_id"], "device": device._id, "connected": True, "connected_at": msg["connected_at"], "keepalive": msg["keepalive"], "ipaddress": msg["ipaddress"], "proto_ver": msg["proto_ver"] } Connections.update_one({"device": device._id}, {"$set": conn}, upsert=True) @classmethod def remove_connection(cls, msg): username = msg.get("username", "") if not username: return try: product_name, device_name = username.split("/") except ValueError: return if not product_name or device_name: return condition = {"product_name": product_name, "device_name": device_name} device = cls.find_one(condition) if device: #TODO: conn = { "client_id": msg["client_id"], "device": device._id, "connected": False, "disconnected_at": msg["disconnected_at"] } Connections.update_one({"device": device._id}, {"$set": conn}, upsert=True) def disconnect(self): if self._id: pass def remove(self): if self._id: _id = ObjectId(self._id) Device.delete_one({"_id": _id}) DeviceAcl.delete_many({"username": self.username}) Connections.delete_many({"device": _id})