Beispiel #1
0
    def _full_sql(self):
        self.logger.record('Starting:based full sql...')

        for table in self.mongo['tables']:

            # sync
            self.mongo['table'] = table
            client = Mongo(self.mongo)
            total = client.count()
            offset = 0
            limit = 100
            while offset <= total:
                # record offset and limit
                self.logger.record('total:{}, offset:{}, limit:{}'.format(
                    total, offset, limit))
                sleep(_SLEEP)
                queryset = client.find(offset=offset, limit=limit)
                actions = []
                action_ids = []
                for q in queryset:
                    data = format_data(table, q)
                    action_ids.append(str(q['_id']))
                    actions.append(data)

                # pg save for batch
                sql = get_sql(table)
                Postgresql(self.postgresql).insert_batch(sql, actions)

                offset += limit

        self.logger.record('Ending:based full sql.')
Beispiel #2
0
    def _inc_oplog(self):
        self.logger.record('Starting:based increase oplog...')

        # sync
        oplog = Mongo(self.mongo).client().local.oplog.rs
        # 获取偏移量
        stamp = oplog.find().sort('$natural', ASCENDING).limit(
            -1).next()['ts'] if not self.oplog['ts'] else self.oplog['ts']

        while True:
            kw = {}

            kw['filter'] = {'ts': {'$gt': eval(stamp)}}
            kw['cursor_type'] = CursorType.TAILABLE_AWAIT
            kw['oplog_replay'] = True

            cursor = oplog.find(**kw)
            try:

                while cursor.alive:
                    for q in cursor:
                        stamp = q['ts']

                        # Do something with doc.
                        op = q['op']  # 操作 u i d
                        db, table = q['ns'].split('.')  # 表的变动 saas_dq_uat.pos
                        doc = q['o']

                        # 数据库ID -> 文档ID
                        doc_id = str(doc['_id'])
                        del doc['_id']

                        # format data
                        format_data(doc)
                        format_data_for_aggs(doc)

                        if op is 'u':
                            self.es.update(table, doc_id, doc)
                        elif op is 'i':
                            self.es.insert(table, doc_id, doc)
                        elif op is 'd':
                            self.es.delete(table, doc_id)

                        # 记录增量位置
                        write_config('oplog', 'ts', stamp)
                    sleep(_SLEEP)

            except AutoReconnect:
                sleep(_SLEEP)

        self.logger.record('Ending:based increase oplog.')
Beispiel #3
0
class Connections(MongoMixin):
    CLIENT = Mongo(settings.MONGO_URL, settings.MONGO_PORT)
    COLLECTION = CLIENT["mqtt"]["connections"]

    def __init__(self,
                 client_id,
                 connected,
                 device,
                 connected_at,
                 keepalive=60,
                 ipaddress="",
                 proto_ver=0,
                 conn_ack=0):
        self.client_id = client_id
        self.device = device
        self.connected = connected
        self.keepalive = keepalive
        self.ipaddress = ipaddress
        self.connected_at = connected_at
        self.proto_ver = proto_ver
        self.conn_ack = conn_ack

    def to_doc(self):
        return {
            "client_id": self.client_id,
            "device": self.device,
            "connected": self.connected,
            "connected_at": self.connected_at,
            "keepalive": self.keepalive,
            "ipaddress": self.ipaddress,
            "proto_ver": self.proto_ver,
            "conn_ack": self.conn_ack
        }
Beispiel #4
0
class MongoMixin():
    CLIENT = Mongo(settings.MONGO_URL, settings.MONGO_PORT)
    COLLECTION = None

    @classmethod
    def find(cls, condition):
        return cls.COLLECTION.find(condition)

    @classmethod
    def find_one(cls, condition=None):
        return cls.COLLECTION.find_one(condition)

    @classmethod
    def insert_one(cls, doc):
        return cls.COLLECTION.insert_one(doc)

    @classmethod
    def delete_one(cls, condition):
        return cls.COLLECTION.delete_one(condition)

    @classmethod
    def delete_many(cls, condition):
        return cls.COLLECTION.delete_many(condition)

    @classmethod
    def replace_one(cls, filter, replacement, upsert=False):
        return cls.COLLECTION.replace_one(filter, replacement, upsert)

    @classmethod
    def update_one(cls, filter, replacement, upsert=False):
        return cls.COLLECTION.update_one(filter, replacement, upsert)
Beispiel #5
0
    def _full_sql(self):
        self.logger.record('Starting:based full sql...')

        for table in self.mongo['tables']:

            # create new es index
            self.logger.record('create new es index:{}'.format(table))
            self.es.init(table, mapping=read_mapping(table))

            # sync
            self.mongo['table'] = table
            client = Mongo(self.mongo)
            total = client.count()
            offset = 0
            limit = 100
            while offset <= total:

                # record offset and limit
                self.logger.record('offset:{}, limit:{}'.format(offset, limit))

                queryset = client.find(offset=offset, limit=limit)
                actions = []
                action_ids = []
                for q in queryset:
                    # 数据库ID -> 文档ID
                    doc_id = str(q['_id'])
                    del q['_id']
                    # format data
                    doc = q
                    format_data(doc)
                    format_data_for_aggs(doc)

                    action_ids.append(doc_id)
                    actions.append({
                        "_index": table,
                        "_type": table,
                        "_id": doc_id,
                        "_source": doc
                    })

                # elastic save for batch
                self.es.insert_batch(table, actions, action_ids)

                sleep(_SLEEP)
                offset += limit

        self.logger.record('Ending:based full sql.')
Beispiel #6
0
 def __init__(self, eventModel, mongoClient, user, emotes, actionQueue):
     self._event = eventModel
     self._user = user
     self._identityUtil = IdentityUtil()
     self._randomUtil = Random()
     self._banUtil = Ban()
     self._channelUtil = Channel()
     self._mongoUtil = Mongo(mongoClient)
     self._triggerUtil = Trigger(eventModel)
     self._emoteUtil = Emote(emotes)
     self._queue = actionQueue
Beispiel #7
0
def main():
    redis = redis_init()
    mongo = Mongo()
    sync_redis_with_mongo(redis, mongo)
    crawler = Crawler()
    topics = get_all_topics(crawler, thread_num=4)
    batch_process_topics_data(redis,
                              mongo,
                              crawler,
                              topics,
                              batch=50,
                              thread_num=8)
Beispiel #8
0
def main():
    config = read_config()
    mongo = config['mongo']
    oplog = config['oplog']
    postgresql = config['postgresql']
    logger = Logger('test')

    index = 0
    while index <= 1000000:
        for table in mongo['tables']:
            # sync
            mongo['table'] = table
            client = Mongo(mongo)
            total = client.count()
            offset = 0
            limit = 100
            while offset <= total:
                # record offset and limit
                print('index:{}, offset:{}, limit:{}'.format(
                    index, offset, limit))
                logger.record('index:{}, offset:{}, limit:{}'.format(
                    index, offset, limit))
                queryset = client.find(offset=offset, limit=limit)
                actions = []
                action_ids = []
                for q in queryset:
                    index += 1
                    q['_id'] = '{}'.format(index)
                    q['store_id'] = random.choice(STORE_IDS)
                    data = format_data(table, q)
                    action_ids.append(str(q['_id']))
                    actions.append(data)

                # pg save for batch
                sql = get_sql(table)
                Postgresql(postgresql).insert_batch(sql, actions)

                sleep(_SLEEP)
                offset += limit
Beispiel #9
0
    def _build_mapping(self):
        '''
        根据config文件中的mongo配置,把mongo的表结构转换成es的mapping
        '''
        self.logger.record('Starting:build mapping...')

        for table in self.mongo['tables']:

            # build new es mapping
            self.logger.record('build new es mapping:{}'.format(table))
            old_mapping = read_mapping(table)

            # build
            self.mongo['table'] = table
            client = Mongo(self.mongo)
            total = client.count()
            offset = 0
            # 随机向前移动 n 步:1⃣减少重复操作 2⃣均匀生成mapping
            increase = lambda: random.randint(1, 100)
            while offset <= total:
                self.logger.record(
                    'mapping: {}.json, total number:{}, current number:{}'.
                    format(table, total, offset))

                for query in client.find_one(offset=offset):

                    del query['_id']

                    format_mapping(
                        old_mapping['mappings'][table]['properties'], query)

                    write_mapping(table, old_mapping)

                offset += increase()

            sleep(_SLEEP)

        self.logger.record('Ending:build mapping.')
Beispiel #10
0
class DeviceAcl(MongoMixin):
    CLIENT = Mongo(settings.MONGO_URL, settings.MONGO_PORT)
    COLLECTION = CLIENT["mqtt"]["device_acl"]

    def __init__(self, username, publish, subscribe, pubsub):
        self.username = username
        self.publish = publish
        self.subscribe = subscribe
        self.pubsub = pubsub

    def to_doc(self):
        return {
            "username": self.username,
            "publish": self.publish,
            "subscribe": self.subscribe,
            "pubsub": self.pubsub
        }
	loss = SigmoidLoss(adv_temperature = 1),
	batch_size = train_dataloader.get_batch_size(), 
	regul_rate = 0.0
)

# train the model
trainer = Trainer(model = model, data_loader = train_dataloader, train_times = 3000, alpha = 2e-5, use_gpu = False, opt_method = "adam")
trainer.run()
transe.save_checkpoint('./checkpoint/transe_fn.ckpt')

# test the model
transe.load_checkpoint('./checkpoint/transe_fn.ckpt')
tester = Tester(model = transe, data_loader = test_dataloader, use_gpu = False)
# tester.run_link_prediction(type_constrain = False)
acc, threshlod = tester.run_triple_classification()

db = Mongo().get_client()
db_col = db["training_results"]

transe_result = db_col.find_one({"name": "transe"})

if transe_result:
	print("update acc and threshold to existed transe result")
	query = { "name": "transe" }
	update_values = { "$set": { "acc": acc, "threshold": threshlod } }
	db_col.update_one(query, update_values)
else:
	print("add new transe result")
	transe_dict = { "name": "transe", "acc": acc, "threshold": threshlod }
	x = db_col.insert_one(transe_dict)
Beispiel #12
0
class Device(MongoMixin):
    CLIENT = Mongo(settings.MONGO_URL, settings.MONGO_PORT)
    COLLECTION = CLIENT["mqtt"]["devices"]

    def __init__(self,
                 product_name,
                 device_name,
                 username,
                 password,
                 status="",
                 device_status="{}",
                 last_status_update=0,
                 tags=None,
                 tags_version=1,
                 shadow=None,
                 _id=None):

        self.product_name = product_name
        self.device_name = device_name
        self.username = username
        self.password = password
        self.status = status  # 可接入状态
        self.device_status = device_status
        self.last_status_update = last_status_update  # ("最新更新时间")
        self.tags = tags if tags is not None else []
        self.tags_version = tags_version
        self.shadow = shadow if shadow is not None else json.dumps({
            "state": {},
            "metadata": {},
            "version": 0
        })
        self._id = _id

    def to_doc(self):
        if self._id:
            return {
                "product_name": self.product_name,
                "device_name": self.device_name,
                "username": self.username,
                "password": self.password,
                "status": self.status,
                "device_status": self.device_status,
                "last_status_update": self.last_status_update,
                "tags": self.tags,
                "tags_version": self.tags_version,
                "shadow": self.shadow,
                "_id": self._id
            }
        else:
            return {
                "product_name": self.product_name,
                "device_name": self.device_name,
                "username": self.username,
                "password": self.password,
                "status": self.status,
                "device_status": self.device_status,
                "last_status_update": self.last_status_update,
                "tags": self.tags,
                "tags_version": self.tags_version,
                "shadow": self.shadow
            }

    def get_acl(self):
        publish = [
            "upload_data/{product_name}/{device_name}/+/+".format(
                product_name=self.product_name, device_name=self.device_name),
            "update_status/{product_name}/{device_name}/+".format(
                product_name=self.product_name, device_name=self.device_name),
            "cmd_resp/{product_name}/{device_name}/+/+/+".format(
                product_name=self.product_name, device_name=self.device_name),
            "rpc_resp/{product_name}/{device_name}/+/+/+".format(
                product_name=self.product_name, device_name=self.device_name),
            "get/{product_name}/{device_name}/+/+".format(
                product_name=self.product_name, device_name=self.device_name),
            "m2m/{product_name}/+/{device_name}/+".format(
                product_name=self.product_name, device_name=self.device_name),
            "update_ota_status/{product_name}/{device_name}/+".format(
                product_name=self.product_name, device_name=self.device_name),
        ]
        subscribe = [
            "tags/{product_name}/+/cmd/+/+/+/#".format(
                product_name=self.product_name)
        ]
        pubsub = []
        return {"publish": publish, "subscribe": subscribe, "pubsub": pubsub}

    @classmethod
    def add_connection(cls, msg):
        username = msg.get("username", "")
        if not username:
            return

        try:
            product_name, device_name = username.split("/")
        except ValueError:
            return

        if not product_name or device_name:
            return
        condition = {"product_name": product_name, "device_name": device_name}
        device = cls.find_one(condition)
        if device:
            #TODO:
            conn = {
                "client_id": msg["client_id"],
                "device": device._id,
                "connected": True,
                "connected_at": msg["connected_at"],
                "keepalive": msg["keepalive"],
                "ipaddress": msg["ipaddress"],
                "proto_ver": msg["proto_ver"]
            }
            Connections.update_one({"device": device._id}, {"$set": conn},
                                   upsert=True)

    @classmethod
    def remove_connection(cls, msg):
        username = msg.get("username", "")
        if not username:
            return

        try:
            product_name, device_name = username.split("/")
        except ValueError:
            return
        if not product_name or device_name:
            return

        condition = {"product_name": product_name, "device_name": device_name}
        device = cls.find_one(condition)
        if device:
            #TODO:

            conn = {
                "client_id": msg["client_id"],
                "device": device._id,
                "connected": False,
                "disconnected_at": msg["disconnected_at"]
            }
            Connections.update_one({"device": device._id}, {"$set": conn},
                                   upsert=True)

    def disconnect(self):
        if self._id:
            pass

    def remove(self):
        if self._id:
            _id = ObjectId(self._id)
            Device.delete_one({"_id": _id})
            DeviceAcl.delete_many({"username": self.username})
            Connections.delete_many({"device": _id})