class CassandraServer(BaseServer, JobGetterMixin): regenerating = False redis_client = None def __init__(self, config): super(CassandraServer, self).__init__(config) self.cassandra_cf_content = config["cassandra_cf_content"] self.cassandra_cf_temp_content = config["cassandra_cf_temp_content"] # Cassandra Clients & Factorys self.cassandra_client = CassandraClusterPool( config["cassandra_servers"], keyspace=config["cassandra_keyspace"], pool_size=len(config["cassandra_servers"]) * 2) self.cassandra_client.startService() # Negative Cache self.disable_negative_cache = config.get("disable_negative_cache", False) # Redis self.redis_hosts = config["redis_hosts"] # deltas self.delta_enabled = config.get('delta_enabled', False) self.delta_debug = config.get('delta_debug', False) self.delta_sample_rate = config.get('delta_sample_rate', 1.0) # create the log path if required & enabled self.cassandra_cf_delta = config.get('cassandra_cf_delta', None) self.cassandra_cf_delta_user = config.get('cassandra_cf_delta_user', None) # sanity check config; if cfs aren't set, turn deltas off if not all([self.cassandra_cf_delta, self.cassandra_cf_delta_user]): logger.warn('Disabling cassandra deltas; both cf_delta and' ' cf_delta_user must be set in the config.') self.delta_enabled = False self.setupJobGetter(config) def start(self): start_deferred = super(CassandraServer, self).start() start_deferred.addCallback(self._cassandraStart) return start_deferred @inlineCallbacks def _cassandraStart(self, started=False): logger.debug("Starting Cassandra components.") try: self.redis_client = yield RedisShardingConnection(self.redis_hosts) except Exception, e: logger.error("Could not connect to Redis: %s" % e) self.shutdown() raise Exception("Could not connect to Redis.") if self.disable_negative_cache: logger.warning("Disabling negative cache.") logger.debug("Started RedisShardingConnection") self.pg = PageGetter( self.cassandra_client, redis_client=self.redis_client, disable_negative_cache=self.disable_negative_cache, rq=self.rq) logger.debug("Initialized pagegetter.") returnValue(True)
def __init__(self, config, port=None): super(IdentityServer, self).__init__(config) self.plugin_mapping = config["plugin_mapping"] self.setupMySQL(config) self.setupIdentityQueue(config) self.cassandra_cf_identity = config["cassandra_cf_identity"] self.cassandra_cf_connections = config["cassandra_cf_connections"] self.cassandra_cf_recommendations = config["cassandra_cf_recommendations"] self.cassandra_cf_reverse_recommendations = config["cassandra_cf_reverse_recommendations"] self.cassandra_client = CassandraClusterPool( config["cassandra_servers"], keyspace=config["cassandra_keyspace"], pool_size=len(config["cassandra_servers"]) * 2) self.cassandra_client.startService() resource = Resource() self.function_resource = Resource() resource.putChild("function", self.function_resource) if port is None: port = config["identity_server_port"] self.site_port = reactor.listenTCP(port, server.Site(resource)) self.expose(self.updateConnections) self.expose(self.updateAllConnections) self.expose(self.updateAllIdentities) self.expose(self.getRecommendations) self.expose(self.getReverseRecommendations) self.expose(self.updateIdentity) # setup manhole manhole_namespace = { 'service': self, 'globals': globals(), } reactor.listenTCP(config["manhole_identity_port"], self.getManholeFactory(manhole_namespace, admin=config["manhole_password"]))
def __init__(self, config): super(CassandraServer, self).__init__(config) self.cassandra_cf_content = config["cassandra_cf_content"] self.cassandra_cf_temp_content = config["cassandra_cf_temp_content"] # Cassandra Clients & Factorys self.cassandra_client = CassandraClusterPool( config["cassandra_servers"], keyspace=config["cassandra_keyspace"], pool_size=len(config["cassandra_servers"]) * 2) self.cassandra_client.startService() # Negative Cache self.disable_negative_cache = config.get("disable_negative_cache", False) # Redis self.redis_hosts = config["redis_hosts"] # deltas self.delta_enabled = config.get('delta_enabled', False) self.delta_debug = config.get('delta_debug', False) self.delta_sample_rate = config.get('delta_sample_rate', 1.0) # create the log path if required & enabled self.cassandra_cf_delta = config.get('cassandra_cf_delta', None) self.cassandra_cf_delta_user = config.get('cassandra_cf_delta_user', None) # sanity check config; if cfs aren't set, turn deltas off if not all([self.cassandra_cf_delta, self.cassandra_cf_delta_user]): logger.warn('Disabling cassandra deltas; both cf_delta and' ' cf_delta_user must be set in the config.') self.delta_enabled = False self.setupJobGetter(config)
def initialize(self): LOGGER.info('Initializing %s' % self.__class__.__name__) self.client = CassandraClusterPool( self.servers, keyspace=self.keyspace, pool_size=self.pool_size) self.client.startService() LOGGER.info('%s initialized, connected to: %s.' % (self.__class__.__name__, self.servers))
def cluster_and_pool(self, num_nodes=10, pool_size=5, start=True, cluster_class=None): if cluster_class is None: cluster_class = FakeCassandraCluster cluster = cluster_class(num_nodes, start_port=self.start_port) pool = CassandraClusterPool([cluster.iface], thrift_port=self.start_port, pool_size=pool_size) if start: cluster.startService() pool.startService() self.cluster = cluster self.pool = pool try: yield cluster, pool finally: del self.pool del self.cluster if pool.running: pool.stopService() if cluster.running: cluster.stopService()
def cluster_and_pool(self, num_nodes=10, pool_size=5, start=True, cluster_class=None, api_version=None): if cluster_class is None: cluster_class = FakeCassandraCluster cluster = cluster_class(num_nodes, start_port=self.start_port) pool = CassandraClusterPool([cluster.iface], thrift_port=self.start_port, pool_size=pool_size, api_version=api_version) if start: cluster.startService() pool.startService() self.cluster = cluster self.pool = pool try: yield cluster, pool finally: del self.pool del self.cluster if pool.running: pool.stopService() if cluster.running: cluster.stopService()
def cluster_and_pool(self, num_nodes=10, pool_size=5, start=True, cluster_class=None, node_discovery=True, fill_throttle=0.0): if cluster_class is None: cluster_class = FakeCassandraCluster cluster = cluster_class(num_nodes, start_port=self.start_port) pool = CassandraClusterPool([cluster.iface], thrift_port=self.start_port, pool_size=pool_size, auto_node_discovery=node_discovery, fill_pool_throttle=fill_throttle) if start: cluster.startService() pool.startService() self.cluster = cluster self.pool = pool try: yield cluster, pool finally: del self.pool del self.cluster if pool.running: pool.stopService() if cluster.running: cluster.stopService()
class IdentityServer(BaseServer, MySQLMixin, IdentityQueueMixin): name = "HiiSpider Identity Server UUID: %s" % str(uuid4()) simultaneous_jobs = 50 active_jobs = 0 updating_connections = {} updating_identities = {} connections_queue = [] connectionsloop = None dequeueloop = None queue_requests = 0 def __init__(self, config, port=None): super(IdentityServer, self).__init__(config) self.plugin_mapping = config["plugin_mapping"] self.setupMySQL(config) self.setupIdentityQueue(config) self.cassandra_cf_identity = config["cassandra_cf_identity"] self.cassandra_cf_connections = config["cassandra_cf_connections"] self.cassandra_cf_recommendations = config["cassandra_cf_recommendations"] self.cassandra_cf_reverse_recommendations = config["cassandra_cf_reverse_recommendations"] self.cassandra_client = CassandraClusterPool( config["cassandra_servers"], keyspace=config["cassandra_keyspace"], pool_size=len(config["cassandra_servers"]) * 2) self.cassandra_client.startService() resource = Resource() self.function_resource = Resource() resource.putChild("function", self.function_resource) if port is None: port = config["identity_server_port"] self.site_port = reactor.listenTCP(port, server.Site(resource)) self.expose(self.updateConnections) self.expose(self.updateAllConnections) self.expose(self.updateAllIdentities) self.expose(self.getRecommendations) self.expose(self.getReverseRecommendations) self.expose(self.updateIdentity) # setup manhole manhole_namespace = { 'service': self, 'globals': globals(), } reactor.listenTCP(config["manhole_identity_port"], self.getManholeFactory(manhole_namespace, admin=config["manhole_password"])) def start(self): start_deferred = super(IdentityServer, self).start() start_deferred.addCallback(self._identityStart) return start_deferred @inlineCallbacks def _identityStart(self, started=False): yield self.startIdentityQueue() self.connectionsloop = task.LoopingCall(self.findConnections) self.connectionsloop.start(0.2) self.dequeueloop = task.LoopingCall(self.dequeue) self.dequeueloop.start(1) @inlineCallbacks def shutdown(self): self.connectionsloop.stop() self.dequeueloop.stop() logger.debug("%s stopping on main HTTP interface." % self.name) yield self.site_port.stopListening() yield self.stopIdentityQueue() yield super(IdentityServer, self).shutdown() def updateUser(self, user_id): reactor.callLater(0, self._updateUser, user_id) return {"success": True, "message": "User update started."} @inlineCallbacks def _updateUser(self, user_id): sql = """SELECT type FROM content_account WHERE user_id=%s""" data = yield self.mysql.runQuery(sql, int(user_id)) deferreds = [self._updateIdentity(str(user_id), x["type"]) for x in data if "custom_" not in x["type"]] results = yield DeferredList(deferreds, consumeErrors=True) for result in results: if not result[0]: raise result[1] deferreds = [self._updateConnections(str(user_id), x["type"]) for x in data if "custom_" not in x["type"]] results = yield DeferredList(deferreds, consumeErrors=True) for result in results: if not result[0]: raise result[1] def updateAllIdentities(self, service_name): if self.updating_identities.get(service_name, False): return {"success": False, "message": "Already updating %s" % service_name} else: reactor.callLater(0, self._updateAllIdentities, service_name) return {"success": True, "message": "Update all identities started."} @inlineCallbacks def _updateAllIdentities(self, service_name): self.updating_identities[service_name] = True sql = """SELECT user_id FROM content_%(service_name)saccount INNER JOIN content_account ON content_%(service_name)saccount.account_id = content_account.id LIMIT %%s, %%s """ % {"service_name": service_name} start = 0 step = 100 data = yield self.mysql.runQuery(sql, (start, step)) while data: d = [self._updateIdentity(str(x["user_id"]), service_name) for x in data] results = yield DeferredList(d, consumeErrors=True) for result in results: if not result[0]: raise result[1] start += step data = yield self.mysql.runQuery(sql, (start, step)) self.updating_connections[service_name] = False def updateAllConnections(self, service_name): if self.updating_connections.get(service_name, False): return {"success": False, "message": "Already updating %s" % service_name} else: reactor.callLater(0, self._updateAllConnections, service_name) return {"success": True, "message": "Update all connections started."} @inlineCallbacks def _updateAllConnections(self, service_name): self.updating_connections[service_name] = True sql = """SELECT user_id FROM content_%(service_name)saccount INNER JOIN content_account ON content_%(service_name)saccount.account_id = content_account.id LIMIT %%s, %%s """ % {"service_name": service_name} start = 0 step = 40 data = yield self.mysql.runQuery(sql, (start, step)) while data: d = [self._updateConnections(str(x["user_id"]), service_name) for x in data] results = yield DeferredList(d, consumeErrors=True) for result in results: if not result[0]: raise result[1] start += step data = yield self.mysql.runQuery(sql, (start, step)) self.updating_connections[service_name] = False returnValue({"success": True}) @inlineCallbacks def _accountData(self, user_id, service_name): sql = """SELECT content_%(service_name)saccount.* FROM content_%(service_name)saccount INNER JOIN content_account ON content_%(service_name)saccount.account_id = content_account.id WHERE content_account.user_id = %%s""" % {"service_name": service_name} try: data = yield self.mysql.runQuery(sql, user_id) except Exception: message = "Could not find service %s:%s, %s" % ( service_name, user_id, sql) logger.error(message) raise if len(data) == 0: # No results? message = "Could not find service %s:%s" % (service_name, user_id) logger.error(message) raise Exception(message) if service_name in self.inverted_args_mapping: mapping = self.inverted_args_mapping[service_name] for kwargs in data: for key, value in mapping.iteritems(): if value in kwargs: kwargs[key] = kwargs.pop(value) returnValue(data) def updateIdentity(self, user_id, service_name): reactor.callLater(0, self._updateIdentity, user_id, service_name) return {"success": True, "message": "Update identity started."} @inlineCallbacks def _updateIdentity(self, user_id, service_name): data = yield self._accountData(user_id, service_name) for kwargs in data: function_key = "%s/_getidentity" % self.plugin_mapping.get(service_name, service_name) try: service_id = yield self.executeFunction(function_key, **kwargs) except NotImplementedError: logger.info("%s not implemented." % function_key) return yield self.cassandra_client.insert( "%s|%s" % (service_name, service_id), self.cassandra_cf_identity, user_id, column="user_id") def updateConnections(self, user_id, service_name): reactor.callLater(0, self._updateConnections, user_id, service_name) return {"success": True, "message": "Update identity started."} @inlineCallbacks def _updateConnections(self, user_id, service_name): logger.debug("Updating %s for user %s." % (service_name, user_id)) data = yield self._accountData(user_id, service_name) ids = [] for kwargs in data: function_key = "%s/_getconnections" % self.plugin_mapping.get(service_name, service_name) try: account_ids = yield self.executeFunction(function_key, **kwargs) except NotImplementedError: logger.info("%s not implemented." % function_key) return except Exception, e: logger.error(e.message) return ids.extend(account_ids) data = yield self.cassandra_client.get_slice( key=user_id, column_family=self.cassandra_cf_connections, start=service_name, finish=service_name + chr(0xff)) ids = set(ids) old_ids = dict([(x.column.name.split("|").pop(), x.column.value) for x in data]) new_ids = ids - set(old_ids) obsolete_ids = set(old_ids) - ids for service_id in obsolete_ids: try: logger.debug("Removing %s|%s from connections CF." % (service_name, service_id)) yield self.cassandra_client.remove( key=user_id, column_family=self.cassandra_cf_connections, column="%s|%s" % (service_name, service_id)) logger.debug("Decrementing %s:%s." % (user_id, old_ids[service_id])) yield DeferredList([ self.client.add( key=user_id, column_family=self.cassandra_cf_recommendations, value=-1, column=old_ids[service_id]), self.client.add( key=old_ids[service_id], column_family=self.cassandra_cf_reverse_recommendations, value=-1, column=user_id)]) except Exception, e: logger.error(e.message)
class Cassandra(Component): """ Implements basic Cassandra operations as well as more complex job-based methods. """ client = None def __init__(self, server, config, server_mode, **kwargs): super(Cassandra, self).__init__(server, server_mode) config = copy(config) config.update(kwargs) self.servers = config["cassandra_servers"] self.keyspace = config["cassandra_keyspace"] self.pool_size = len(config["cassandra_servers"]) * 2 self.cf_content = config["cassandra_cf_content"] self.cf_delta = config["cassandra_cf_delta"] self.cf_delta_user = config["cassandra_cf_delta_user"] self.cf_identity = config["cassandra_cf_identity"] self.cf_connections = config["cassandra_cf_connections"] self.cf_recommendations = config["cassandra_cf_recommendations"] self.cf_reverse_recommendations = config["cassandra_cf_reverse_recommendations"] def initialize(self): LOGGER.info('Initializing %s' % self.__class__.__name__) self.client = CassandraClusterPool( self.servers, keyspace=self.keyspace, pool_size=self.pool_size) self.client.startService() LOGGER.info('%s initialized, connected to: %s.' % (self.__class__.__name__, self.servers)) def shutdown(self): LOGGER.info("Stopping %s" % self.__class__.__name__) self.client.stopService() LOGGER.info("%s stopped." % self.__class__.__name__) @shared def batch_insert(self, *args, **kwargs): return self.client.batch_insert(*args, **kwargs) @shared def insert(self, *args, **kwargs): return self.client.insert(*args, **kwargs) @shared def remove(self, *args, **kwargs): return self.client.remove(*args, **kwargs) @shared def get(self, *args, **kwargs): return self.client.get(*args, **kwargs) @shared def get_key_range(self, *args, **kwargs): return self.client.get_key_range(*args, **kwargs) @shared def get_slice(self, *args, **kwargs): return self.client.get_slice(*args, **kwargs) @shared def get_range_slices(self, *args, **kwargs): return self.client.get_range_slices(*args, **kwargs) @shared @inlineCallbacks def get_delta(self, delta_id): """Get data from cassandra by user delta_id.""" try: columns = yield self.client.get_slice( key=binascii.unhexlify(delta_id), column_family=self.cf_delta, consistency=2) except NotFoundException: LOGGER.error("%s not found." % delta_id) return results = dict([(x.column.name, x.column.value) for x in columns]) results["data"] = decompress(results["data"]) if "old_data" in results: results["old_data"] = decompress(results["old_data"]) if "new_data" in results: results["new_data"] = decompress(results["new_data"]) returnValue(results) @shared @inlineCallbacks def getDataByIDAndUUID(self, user_id, uuid): """Get data from cassandra by user id and uuid.""" try: data = yield self.client.get( key=str(user_id), column_family=self.cf_content, column=uuid, consistency=2) except NotFoundException: return obj = yield threads.deferToThread(decompress, data.column.value) returnValue(obj) @shared @inlineCallbacks def getData(self, job, consistency=2): try: data = yield self.client.get( key=str(job.user_account["user_id"]), column_family=self.cf_content, column=job.uuid, consistency=consistency) except NotFoundException: return obj = yield threads.deferToThread(decompress, data.column.value) returnValue(obj) @shared @inlineCallbacks def setData(self, user_id, data, uuid): s = yield threads.deferToThread(compress, data) result = yield self.client.insert( str(user_id), self.cf_content, s, column=uuid, consistency=2) returnValue(result) @shared @inlineCallbacks def setServiceIdentity(self, service, user_id, service_id): LOGGER.debug("Inserting identity: %s | %s" % (service, service_id)) try: yield self.client.insert( "%s|%s" % (service, service_id), self.cf_identity, user_id, column="user_id", consistency=2) except: LOGGER.error(format_exc()) returnValue(None) @shared @inlineCallbacks def getServiceConnections(self, service, user_id): try: data = yield self.client.get_slice( key=user_id, column_family=self.cf_connections, start=service, finish=service + chr(0xff), consistency=2) except: LOGGER.error(format_exc()) returnValue([]) returnValue(dict([(x.column.name.split("|").pop(), x.column.value) for x in data])) @inlineCallbacks def addConnections(self, service, user_id, new_ids): mapped_new_ids = {} for chunk in list(chunks(list(new_ids), 100)): data = yield self.client.multiget( keys=["%s|%s" % (service, x) for x in chunk], column_family=self.cf_identity, column="user_id", consistency=2) for key in data: if data[key]: mapped_new_ids[key] = data[key][0].column.value if not mapped_new_ids: # We don't have any of the new connections in the system. return LOGGER.debug("Batch inserting: %s" % pformat(mapped_new_ids)) yield self.client.batch_insert( key=user_id, column_family=self.cf_connections, mapping=mapped_new_ids, consistency=2) followee_ids = mapped_new_ids.values() for chunk in list(chunks(followee_ids, 10)): deferreds = [] for followee_id in chunk: LOGGER.info("Incrementing %s:%s" % (user_id, followee_id)) deferreds.append(self.client.add( key=user_id, column_family=self.cf_recommendations, value=1, column=followee_id)) deferreds.append(self.client.add( key=followee_id, column_family=self.cf_reverse_recommendations, value=1, column=user_id), consistency=2) yield DeferredList(deferreds) @inlineCallbacks def removeConnections(self, service, user_id, obsolete_mapping): for service_id in obsolete_mapping: LOGGER.debug("Removing %s|%s from connections CF." % (service, service_id)) yield self.client.remove( key=user_id, column_family=self.cf_connections, column="%s|%s" % (service_name, service_id), consistency=2) logger.debug("Decrementing %s:%s." % (user_id, old_ids[service_id])) yield DeferredList([ self.client.add( key=user_id, column_family=self.cf_recommendations, value=-1, column=obsolete[service_id], consistency=2), self.client.add( key=obsolete[service_id], column_family=self.cf_reverse_recommendations, value=-1, column=user_id, consistency=2)])
config = ConfigParser.ConfigParser() with open("etc/defaults.cfg") as defaults: config.readfp(defaults) config.read(["etc/devel.cfg", "etc/production.cfg"]) cdnHost = config.get("General", "CDNHost") secureProxy = config.get("General", "SecureProxy") rootUrl = config.get("General", "URL") brandName = config.get("Branding", "Name") # # Cassandra connection pool # cassandraNodes = config.get("Cassandra", "Nodes").split(",") cassandraKeyspace = config.get("Cassandra", "Keyspace") db = CassandraClusterPool(cassandraNodes, cassandraKeyspace, pool_size=10) db.set_consistency(ConsistencyLevel.LOCAL_QUORUM) # # Internationalization # Aliases for i18n # _ = gettext.gettext __ = gettext.ngettext # # Map of all item type plugins # plugins = {}
def __init__(self, cfg): self.cfg = cfg servers = map(parse_srvaddr, self.cfg.get("cassandra", "seed").split(",")) keyspace = self.cfg.get("cassandra", "keyspace") self.request_retries = self.cfg.getint("cassandra", "retries") CassandraClusterPool.__init__(self, seed_list=servers, keyspace=keyspace, conn_timeout=self.cfg.getint("cassandra", "timeout"))