def insertMango(ym): year=ym[:4] month=ym[4:] conn=Connection('10.18.10.41', 27018) print conn.alive() db=conn.op_production print db.collection_names() # col = db.monthly_stats col = db.marketinfos # col.remove({"year":"2013"}) docs = col.find() count = docs.count() print count for k in docs: print k break
def getMango(ym): conn=Connection('10.18.10.41', 27018) print conn.alive() db=conn.op_production print db.collection_names() col = db.app_op_exts docs=col.find() print docs.count() wfile = open('/home/xiarong/appDB/data/app_op_exts_mangoDB_'+ym,'w') for k in docs: try: wfile.write(str(k).encode('utf-8')+'\n') except: print k continue wfile.close()
def getMango(ym): conn = Connection('10.18.10.41', 27018) print conn.alive() db = conn.op_production print db.collection_names() col = db.app_op_exts docs = col.find() print docs.count() wfile = open('/home/xiarong/appDB/data/app_op_exts_mangoDB_' + ym, 'w') for k in docs: try: wfile.write(str(k).encode('utf-8') + '\n') except: print k continue wfile.close()
def getMango(): conn=Connection('10.18.10.41', 27018) print conn.alive() db=conn.op_production print db.collection_names() #col = db.app_op_exts col = db.marketinfos obid= ['5130123f527015019e00001e','4ded91c2431fe33688000402'] # obid= ObjectId('4e2562a9431fe3485b0002cd') docs=col.find({'appkey':obid[1]}) print docs.count() for k in docs: try: print k except: print k continue
class MonitorCronJob(object): def __init__(self): self.mongo_host = MONGO_HOST self.mongo_port = MONGO_PORT self.redis_host = REDIS_HOST self.redis_port = REDIS_PORT self.rmap = REDIS_MAP self.emap = EXTRA_MAP self.ulist = '%s:start_urls' % SPIDER self.dupefilter_key = '%s:dupefilter' % SPIDER self._connectDB() self._connectRedis() self._cleanRedis() def __del__(self): self._disconnectDB() def _connectDB(self): self.c = Connection(self.mongo_host, self.mongo_port) self.db = self.c[TASK_DB] self.collection = self.db[TASK_COLLECTION] print "[log] Connect to MongoDB %s:%s" % (self.mongo_host, self.mongo_port) def _disconnectDB(self): self.c.disconnect() del self.c, self.db, self.collection print "[log] Disconnect from MongoDB %s:%s" % (self.mongo_host, self.mongo_port) def _alive(self): return True if self.c and self.c.alive() else False def _connectRedis(self): self.r = redis.Redis(host=self.redis_host, port=self.redis_port) print "[log] Connect to Redis %s:%s" % (self.redis_host, self.redis_port) def _cleanRedis(self): self.r.delete(self.rmap) print "[log] Clean url hash map of %s" % self.rmap self.r.delete(self.emap) print "[log] Clean extra url hash map of %s" % self.emap self.r.delete(self.dupefilter_key) print "[log] Clean dupefilter_key of %s" % self.dupefilter_key def read_task(self, rto_type='cursor'): """ Get tasks from MongoDB, then return a list """ if self._alive(): cursor = self.collection.find({'state': 1}) if rto_type == 'cursor': print "[log] Read %d tasks from DB" % cursor.count() return cursor elif rto_type == 'list': tasks = [] for i in cursor: tasks.append(i) print "[log] Read %d tasks from DB" % len(tasks) return tasks else: pass def map_tasks(self): """ Map tasks' url and SKU ---------------------- Task's structure { 'sku' : string, product sku of feifei, 'urls' : list, list of this task's urls } """ def to_redis(url): self.r.lpush(self.ulist, url) tasks = self.read_task('cursor') for i in tasks: sku = i.get('sku', None) urls = i.get('urls', None) extras = i.get('extras', None) if sku: if urls: for url in urls: self.r.hset(self.rmap, url, sku) # hset(hash, key, value) to_redis(url) print "[log] List %s to %s" % (url, self.ulist) if extras: for extra in extras: self.r.hset(self.emap, extra, sku) to_redis(extra) print "[log] List %s to %s" % (extra, self.ulist) def test(self): with open('test.txt', 'ab') as f: f.write('1\n') def __getattribute__(self, name): try: rt = object.__getattribute__(self, name) except: rt = None return rt
class MongoDB(object): def __init__(self): self.plugin_name = "mongo" self.mongo_host = "127.0.0.1" self.mongo_port = 27017 self.mongo_dbs = ["admin", ] self.mongo_user = None self.mongo_password = None self.includeConnPoolMetrics = None self.includeServerStatsMetrics = None self.includeDbstatsMetrics = None def connect(self): global SERVER_STATUS_METRICS self.mongo_client = Connection(host=self.mongo_host, port=self.mongo_port, slave_okay=True) if not self.mongo_client.alive(): collectd.error("mongodb plugin failed to connect to %s:%d" % (self.mongo_host, self.mongo_port)) server_status = self.mongo_client[self.mongo_dbs[0]].command("serverStatus") version = server_status["version"] at_least_2_4 = V(version) >= V("2.4.0") if not at_least_2_4: indexCountersMap = SERVER_STATUS_METRICS.pop("indexCounters") SERVER_STATUS_METRICS["indexCounters"] = {"btree": indexCountersMap} def disconnect(self): if self.mongo_client and self.mongo_client.alive(): self.mongo_client.disconnect() def config(self, obj): for node in obj.children: if node.key == "Port": self.mongo_port = int(node.values[0]) collectd.info("mongodb plugin: Port " + self.mongo_port) elif node.key == "Host": self.mongo_host = node.values[0] collectd.info("mongodb plugin: Host " + self.Host) elif node.key == "User": self.mongo_user = node.values[0] elif node.key == "Password": self.mongo_password = node.values[0] elif node.key == "Databases": self.mongo_dbs = node.values collectd.info("mongodb plugin: Databases " + self.mongo_dbs) elif node.key == "ConnectionPoolStatus": self.includeConnPoolMetrics = node.values collectd.info("mongodb plugin: ConnectionPoolStatus " + self.ConnectionPoolStatus) elif node.key == "ServerStats": self.includeServerStatsMetrics = node.values collectd.info("mongodb plugin: ServerStats " + self.ServerStats) elif node.key == "DBStats": self.includeDbstatsMetrics = node.values collectd.info("mongodb plugin: DBStats " + self.DBStats) else: collectd.warning("mongodb plugin: Unkown configuration key %s" % node.key) def submit(self, instance, type, value, db=None): # actually a recursive submit call to dive deeper into nested dict data # since the leaf value in the nested dicts is the type, we check on the type type :-) if db: plugin_instance = "%s-%s" % (self.mongo_port, db) else: plugin_instance = str(self.mongo_port) v = collectd.Values() v.plugin = self.plugin_name v.plugin_instance = plugin_instance v.type = type v.type_instance = instance v.values = [value, ] v.dispatch() def recursive_submit(self, type_tree, data_tree, instance_name=None, db=None): # if we are still in the middle of the type and data tree if isinstance(type_tree, types.DictType) and isinstance(data_tree, types.DictType): for type_name, type_value in type_tree.iteritems(): next_instance_name = None if instance_name: next_instance_name = instance_name + "." + type_name else: next_instance_name = type_name if data_tree.has_key(type_name): self.recursive_submit(type_value, data_tree[type_name], next_instance_name, db=db) else: # may want to log this but some mongodb setups may not have anything to report pass elif isinstance(type_tree, types.DictType) or isinstance(data_tree, types.DictType): print("type tree and data tree structure differ for data instance: " + instance_name) else: self.submit(instance_name, type_tree, data_tree, db) def publish_connection_pool_metrics(self): # connPoolStats produces the same results regardless of db used db = self.mongo_client[self.mongo_dbs[0]] if self.mongo_user and self.mongo_password: db.authenticate(self.mongo_user, self.mongo_password) conn_pool_stats = db.command("connPoolStats") metrics_to_collect = {} if self.includeConnPoolMetrics: for root_metric_key in self.includeConnPoolMetrics.iterkeys(): if conn_pool_stats.has_key(root_metric_key): metrics_to_collect[root_metric_key] = deepcopy(CONNECTION_POOL_STATUS_METRICS[root_metric_key]) else: metrics_to_collect = CONNECTION_POOL_STATUS_METRICS self.recursive_submit(metrics_to_collect, conn_pool_stats) def publish_dbstats(self): for db_name in self.mongo_dbs: db = self.mongo_client[db_name] if self.mongo_user and self.mongo_password: db.authenticate(self.mongo_user, self.mongo_password) dbstats = db.command("dbStats") metrics_to_collect = {} if self.includeDbstatsMetrics: for root_metric_key in self.includeDbstatsMetrics.iterkeys(): if dbstats.has_key(root_metric_key): metrics_to_collect[root_metric_key] = deepcopy(DBSTATS_METRICS[root_metric_key]) else: metrics_to_collect = DBSTATS_METRICS self.recursive_submit(metrics_to_collect, dbstats, db=db_name) def publish_server_status(self): # serverStatus produces the same results regardless of db used db = self.mongo_client[self.mongo_dbs[0]] if self.mongo_user and self.mongo_password: db.authenticate(self.mongo_user, self.mongo_password) server_status = db.command("serverStatus") metrics_to_collect = {} if self.includeServerStatsMetrics: for root_metric_key in self.includeServerStatsMetrics.iterkeys(): if server_status.has_key(root_metric_key): metrics_to_collect[root_metric_key] = deepcopy(SERVER_STATUS_METRICS[root_metric_key]) else: metrics_to_collect = deepcopy(SERVER_STATUS_METRICS) # rename "." lock to be "GLOBAL" if metrics_to_collect["locks"].has_key("."): print(SERVER_STATUS_METRICS["locks"]) global_lock_data = metrics_to_collect["locks"].pop(".") metrics_to_collect["locks"]["GLOBAL"] = global_lock_data print(SERVER_STATUS_METRICS["locks"]) for db_name in self.mongo_dbs: metrics_to_collect["locks"][db_name] = deepcopy(SERVER_STATUS_METRICS["locks"]["."]) self.recursive_submit(metrics_to_collect, server_status) def publish_data(self): self.publish_server_status() self.publish_connection_pool_metrics() self.publish_dbstats()
class MonitorCronJob(object): def __init__(self): self.mongo_host = MONGO_HOST self.mongo_port = MONGO_PORT self.redis_host = REDIS_HOST self.redis_port = REDIS_PORT self.rmap = REDIS_MAP self.emap = EXTRA_MAP self.ulist = '%s:start_urls' % SPIDER self.dupefilter_key = '%s:dupefilter' % SPIDER self._connectDB() self._connectRedis() self._cleanRedis() def __del__(self): self._disconnectDB() def _connectDB(self): self.c = Connection(self.mongo_host, self.mongo_port) self.db = self.c[TASK_DB] self.collection = self.db[TASK_COLLECTION] print "[log] Connect to MongoDB %s:%s" % (self.mongo_host, self.mongo_port) def _disconnectDB(self): self.c.disconnect() del self.c, self.db, self.collection print "[log] Disconnect from MongoDB %s:%s" % (self.mongo_host, self.mongo_port) def _alive(self): return True if self.c and self.c.alive() else False def _connectRedis(self): self.r = redis.Redis(host=self.redis_host, port=self.redis_port) print "[log] Connect to Redis %s:%s" % (self.redis_host, self.redis_port) def _cleanRedis(self): self.r.delete( self.rmap ) print "[log] Clean url hash map of %s" % self.rmap self.r.delete( self.emap ) print "[log] Clean extra url hash map of %s" % self.emap self.r.delete( self.dupefilter_key ) print "[log] Clean dupefilter_key of %s" % self.dupefilter_key def read_task(self, rto_type='cursor'): """ Get tasks from MongoDB, then return a list """ if self._alive(): cursor = self.collection.find({'state': 1}) if rto_type == 'cursor': print "[log] Read %d tasks from DB" % cursor.count() return cursor elif rto_type == 'list': tasks = [] for i in cursor: tasks.append( i ) print "[log] Read %d tasks from DB" % len(tasks) return tasks else: pass def map_tasks(self): """ Map tasks' url and SKU ---------------------- Task's structure { 'sku' : string, product sku of feifei, 'urls' : list, list of this task's urls } """ def to_redis(url): self.r.lpush( self.ulist, url ) tasks = self.read_task( 'cursor' ) for i in tasks: sku = i.get('sku', None) urls = i.get('urls', None) extras = i.get('extras', None) if sku: if urls: for url in urls: self.r.hset( self.rmap, url, sku ) # hset(hash, key, value) to_redis( url ) print "[log] List %s to %s" % (url, self.ulist) if extras: for extra in extras: self.r.hset( self.emap, extra, sku ) to_redis( extra ) print "[log] List %s to %s" % (extra, self.ulist) def test(self): with open('test.txt', 'ab') as f: f.write('1\n') def __getattribute__(self, name): try: rt = object.__getattribute__(self, name) except: rt = None return rt