Example #1
0
class CassandraServer(BaseServer, JobGetterMixin):

    regenerating = False
    redis_client = None

    def __init__(self, config):
        super(CassandraServer, self).__init__(config)
        self.cassandra_cf_content = config["cassandra_cf_content"]
        self.cassandra_cf_temp_content = config["cassandra_cf_temp_content"]
        # Cassandra Clients & Factorys
        self.cassandra_client = CassandraClusterPool(
            config["cassandra_servers"],
            keyspace=config["cassandra_keyspace"],
            pool_size=len(config["cassandra_servers"]) * 2)
        self.cassandra_client.startService()
        # Negative Cache
        self.disable_negative_cache = config.get("disable_negative_cache", False)
        # Redis
        self.redis_hosts = config["redis_hosts"]
        # deltas
        self.delta_enabled = config.get('delta_enabled', False)
        self.delta_debug = config.get('delta_debug', False)
        self.delta_sample_rate = config.get('delta_sample_rate', 1.0)
        # create the log path if required & enabled
        self.cassandra_cf_delta = config.get('cassandra_cf_delta', None)
        self.cassandra_cf_delta_user = config.get('cassandra_cf_delta_user', None)
        # sanity check config;  if cfs aren't set, turn deltas off
        if not all([self.cassandra_cf_delta, self.cassandra_cf_delta_user]):
            logger.warn('Disabling cassandra deltas; both cf_delta and'
                ' cf_delta_user must be set in the config.')
            self.delta_enabled = False
        self.setupJobGetter(config)

    def start(self):
        start_deferred = super(CassandraServer, self).start()
        start_deferred.addCallback(self._cassandraStart)
        return start_deferred

    @inlineCallbacks
    def _cassandraStart(self, started=False):
        logger.debug("Starting Cassandra components.")
        try:
            self.redis_client = yield RedisShardingConnection(self.redis_hosts)
        except Exception, e:
            logger.error("Could not connect to Redis: %s" % e)
            self.shutdown()
            raise Exception("Could not connect to Redis.")
        if self.disable_negative_cache:
            logger.warning("Disabling negative cache.")
        logger.debug("Started RedisShardingConnection")
        self.pg = PageGetter(
            self.cassandra_client,
            redis_client=self.redis_client,
            disable_negative_cache=self.disable_negative_cache,
            rq=self.rq)
        logger.debug("Initialized pagegetter.")
        returnValue(True)
Example #2
0
 def __init__(self, config, port=None):
     super(IdentityServer, self).__init__(config)
     self.plugin_mapping = config["plugin_mapping"]
     self.setupMySQL(config)
     self.setupIdentityQueue(config)
     self.cassandra_cf_identity = config["cassandra_cf_identity"]
     self.cassandra_cf_connections = config["cassandra_cf_connections"]
     self.cassandra_cf_recommendations = config["cassandra_cf_recommendations"]
     self.cassandra_cf_reverse_recommendations = config["cassandra_cf_reverse_recommendations"]
     self.cassandra_client = CassandraClusterPool(
         config["cassandra_servers"],
         keyspace=config["cassandra_keyspace"],
         pool_size=len(config["cassandra_servers"]) * 2)
     self.cassandra_client.startService()
     resource = Resource()
     self.function_resource = Resource()
     resource.putChild("function", self.function_resource)
     if port is None:
         port = config["identity_server_port"]
     self.site_port = reactor.listenTCP(port, server.Site(resource))
     self.expose(self.updateConnections)
     self.expose(self.updateAllConnections)
     self.expose(self.updateAllIdentities)
     self.expose(self.getRecommendations)
     self.expose(self.getReverseRecommendations)
     self.expose(self.updateIdentity)
     # setup manhole
     manhole_namespace = {
         'service': self,
         'globals': globals(),
     }
     reactor.listenTCP(config["manhole_identity_port"], self.getManholeFactory(manhole_namespace, admin=config["manhole_password"]))
Example #3
0
 def __init__(self, config):
     super(CassandraServer, self).__init__(config)
     self.cassandra_cf_content = config["cassandra_cf_content"]
     self.cassandra_cf_temp_content = config["cassandra_cf_temp_content"]
     # Cassandra Clients & Factorys
     self.cassandra_client = CassandraClusterPool(
         config["cassandra_servers"],
         keyspace=config["cassandra_keyspace"],
         pool_size=len(config["cassandra_servers"]) * 2)
     self.cassandra_client.startService()
     # Negative Cache
     self.disable_negative_cache = config.get("disable_negative_cache", False)
     # Redis
     self.redis_hosts = config["redis_hosts"]
     # deltas
     self.delta_enabled = config.get('delta_enabled', False)
     self.delta_debug = config.get('delta_debug', False)
     self.delta_sample_rate = config.get('delta_sample_rate', 1.0)
     # create the log path if required & enabled
     self.cassandra_cf_delta = config.get('cassandra_cf_delta', None)
     self.cassandra_cf_delta_user = config.get('cassandra_cf_delta_user', None)
     # sanity check config;  if cfs aren't set, turn deltas off
     if not all([self.cassandra_cf_delta, self.cassandra_cf_delta_user]):
         logger.warn('Disabling cassandra deltas; both cf_delta and'
             ' cf_delta_user must be set in the config.')
         self.delta_enabled = False
     self.setupJobGetter(config)
Example #4
0
 def initialize(self):
     LOGGER.info('Initializing %s' % self.__class__.__name__)
     self.client = CassandraClusterPool(
         self.servers,
         keyspace=self.keyspace,
         pool_size=self.pool_size)
     self.client.startService()
     LOGGER.info('%s initialized, connected to: %s.' % (self.__class__.__name__, self.servers))
 def cluster_and_pool(self, num_nodes=10, pool_size=5, start=True,
                      cluster_class=None):
     if cluster_class is None:
         cluster_class = FakeCassandraCluster
     cluster = cluster_class(num_nodes, start_port=self.start_port)
     pool = CassandraClusterPool([cluster.iface], thrift_port=self.start_port,
                                 pool_size=pool_size)
     if start:
         cluster.startService()
         pool.startService()
     self.cluster = cluster
     self.pool = pool
     try:
         yield cluster, pool
     finally:
         del self.pool
         del self.cluster
         if pool.running:
             pool.stopService()
         if cluster.running:
             cluster.stopService()
Example #6
0
 def cluster_and_pool(self,
                      num_nodes=10,
                      pool_size=5,
                      start=True,
                      cluster_class=None,
                      api_version=None):
     if cluster_class is None:
         cluster_class = FakeCassandraCluster
     cluster = cluster_class(num_nodes, start_port=self.start_port)
     pool = CassandraClusterPool([cluster.iface],
                                 thrift_port=self.start_port,
                                 pool_size=pool_size,
                                 api_version=api_version)
     if start:
         cluster.startService()
         pool.startService()
     self.cluster = cluster
     self.pool = pool
     try:
         yield cluster, pool
     finally:
         del self.pool
         del self.cluster
         if pool.running:
             pool.stopService()
         if cluster.running:
             cluster.stopService()
 def cluster_and_pool(self,
                      num_nodes=10,
                      pool_size=5,
                      start=True,
                      cluster_class=None,
                      node_discovery=True,
                      fill_throttle=0.0):
     if cluster_class is None:
         cluster_class = FakeCassandraCluster
     cluster = cluster_class(num_nodes, start_port=self.start_port)
     pool = CassandraClusterPool([cluster.iface],
                                 thrift_port=self.start_port,
                                 pool_size=pool_size,
                                 auto_node_discovery=node_discovery,
                                 fill_pool_throttle=fill_throttle)
     if start:
         cluster.startService()
         pool.startService()
     self.cluster = cluster
     self.pool = pool
     try:
         yield cluster, pool
     finally:
         del self.pool
         del self.cluster
         if pool.running:
             pool.stopService()
         if cluster.running:
             cluster.stopService()
Example #8
0
class IdentityServer(BaseServer, MySQLMixin, IdentityQueueMixin):

    name = "HiiSpider Identity Server UUID: %s" % str(uuid4())
    simultaneous_jobs = 50
    active_jobs = 0
    updating_connections = {}
    updating_identities = {}
    connections_queue = []
    connectionsloop = None
    dequeueloop = None
    queue_requests = 0

    def __init__(self, config, port=None):
        super(IdentityServer, self).__init__(config)
        self.plugin_mapping = config["plugin_mapping"]
        self.setupMySQL(config)
        self.setupIdentityQueue(config)
        self.cassandra_cf_identity = config["cassandra_cf_identity"]
        self.cassandra_cf_connections = config["cassandra_cf_connections"]
        self.cassandra_cf_recommendations = config["cassandra_cf_recommendations"]
        self.cassandra_cf_reverse_recommendations = config["cassandra_cf_reverse_recommendations"]
        self.cassandra_client = CassandraClusterPool(
            config["cassandra_servers"],
            keyspace=config["cassandra_keyspace"],
            pool_size=len(config["cassandra_servers"]) * 2)
        self.cassandra_client.startService()
        resource = Resource()
        self.function_resource = Resource()
        resource.putChild("function", self.function_resource)
        if port is None:
            port = config["identity_server_port"]
        self.site_port = reactor.listenTCP(port, server.Site(resource))
        self.expose(self.updateConnections)
        self.expose(self.updateAllConnections)
        self.expose(self.updateAllIdentities)
        self.expose(self.getRecommendations)
        self.expose(self.getReverseRecommendations)
        self.expose(self.updateIdentity)
        # setup manhole
        manhole_namespace = {
            'service': self,
            'globals': globals(),
        }
        reactor.listenTCP(config["manhole_identity_port"], self.getManholeFactory(manhole_namespace, admin=config["manhole_password"]))

    def start(self):
        start_deferred = super(IdentityServer, self).start()
        start_deferred.addCallback(self._identityStart)
        return start_deferred

    @inlineCallbacks
    def _identityStart(self, started=False):
        yield self.startIdentityQueue()
        self.connectionsloop = task.LoopingCall(self.findConnections)
        self.connectionsloop.start(0.2)
        self.dequeueloop = task.LoopingCall(self.dequeue)
        self.dequeueloop.start(1)

    @inlineCallbacks
    def shutdown(self):
        self.connectionsloop.stop()
        self.dequeueloop.stop()
        logger.debug("%s stopping on main HTTP interface." % self.name)
        yield self.site_port.stopListening()
        yield self.stopIdentityQueue()
        yield super(IdentityServer, self).shutdown()

    def updateUser(self, user_id):
        reactor.callLater(0, self._updateUser, user_id)
        return {"success": True, "message": "User update started."}

    @inlineCallbacks
    def _updateUser(self, user_id):
        sql = """SELECT type FROM content_account WHERE user_id=%s"""
        data = yield self.mysql.runQuery(sql, int(user_id))
        deferreds = [self._updateIdentity(str(user_id), x["type"]) for x in data if "custom_" not in x["type"]]
        results = yield DeferredList(deferreds, consumeErrors=True)
        for result in results:
            if not result[0]:
                raise result[1]
        deferreds = [self._updateConnections(str(user_id), x["type"]) for x in data if "custom_" not in x["type"]]
        results = yield DeferredList(deferreds, consumeErrors=True)
        for result in results:
            if not result[0]:
                raise result[1]

    def updateAllIdentities(self, service_name):
        if self.updating_identities.get(service_name, False):
            return {"success": False, "message": "Already updating %s" % service_name}
        else:
            reactor.callLater(0, self._updateAllIdentities, service_name)
            return {"success": True, "message": "Update all identities started."}

    @inlineCallbacks
    def _updateAllIdentities(self, service_name):
        self.updating_identities[service_name] = True
        sql = """SELECT user_id
        FROM content_%(service_name)saccount
        INNER JOIN content_account
            ON content_%(service_name)saccount.account_id = content_account.id
        LIMIT %%s, %%s
        """ % {"service_name": service_name}
        start = 0
        step = 100
        data = yield self.mysql.runQuery(sql, (start, step))
        while data:
            d = [self._updateIdentity(str(x["user_id"]), service_name) for x in data]
            results = yield DeferredList(d, consumeErrors=True)
            for result in results:
                if not result[0]:
                    raise result[1]
            start += step
            data = yield self.mysql.runQuery(sql, (start, step))
        self.updating_connections[service_name] = False

    def updateAllConnections(self, service_name):
        if self.updating_connections.get(service_name, False):
            return {"success": False, "message": "Already updating %s" % service_name}
        else:
            reactor.callLater(0, self._updateAllConnections, service_name)
            return {"success": True, "message": "Update all connections started."}

    @inlineCallbacks
    def _updateAllConnections(self, service_name):
        self.updating_connections[service_name] = True
        sql = """SELECT user_id
        FROM content_%(service_name)saccount
        INNER JOIN content_account
            ON content_%(service_name)saccount.account_id = content_account.id
        LIMIT %%s, %%s
        """ % {"service_name": service_name}
        start = 0
        step = 40
        data = yield self.mysql.runQuery(sql, (start, step))
        while data:
            d = [self._updateConnections(str(x["user_id"]), service_name) for x in data]
            results = yield DeferredList(d, consumeErrors=True)
            for result in results:
                if not result[0]:
                    raise result[1]
            start += step
            data = yield self.mysql.runQuery(sql, (start, step))
        self.updating_connections[service_name] = False
        returnValue({"success": True})

    @inlineCallbacks
    def _accountData(self, user_id, service_name):
        sql = """SELECT content_%(service_name)saccount.*
        FROM content_%(service_name)saccount
        INNER JOIN content_account
            ON content_%(service_name)saccount.account_id = content_account.id
        WHERE content_account.user_id = %%s""" % {"service_name": service_name}
        try:
            data = yield self.mysql.runQuery(sql, user_id)
        except Exception:
            message = "Could not find service %s:%s, %s" % (
                service_name,
                user_id,
                sql)
            logger.error(message)
            raise
        if len(data) == 0:  # No results?
            message = "Could not find service %s:%s" % (service_name, user_id)
            logger.error(message)
            raise Exception(message)
        if service_name in self.inverted_args_mapping:
            mapping = self.inverted_args_mapping[service_name]
            for kwargs in data:
                for key, value in mapping.iteritems():
                    if value in kwargs:
                        kwargs[key] = kwargs.pop(value)
        returnValue(data)

    def updateIdentity(self, user_id, service_name):
        reactor.callLater(0, self._updateIdentity, user_id, service_name)
        return {"success": True, "message": "Update identity started."}

    @inlineCallbacks
    def _updateIdentity(self, user_id, service_name):
        data = yield self._accountData(user_id, service_name)
        for kwargs in data:
            function_key = "%s/_getidentity" % self.plugin_mapping.get(service_name, service_name)
            try:
                service_id = yield self.executeFunction(function_key, **kwargs)
            except NotImplementedError:
                logger.info("%s not implemented." % function_key)
                return
            yield self.cassandra_client.insert(
                "%s|%s" % (service_name, service_id),
                self.cassandra_cf_identity,
                user_id,
                column="user_id")

    def updateConnections(self, user_id, service_name):
        reactor.callLater(0, self._updateConnections, user_id, service_name)
        return {"success": True, "message": "Update identity started."}

    @inlineCallbacks
    def _updateConnections(self, user_id, service_name):
        logger.debug("Updating %s for user %s." % (service_name, user_id))
        data = yield self._accountData(user_id, service_name)
        ids = []
        for kwargs in data:
            function_key = "%s/_getconnections" % self.plugin_mapping.get(service_name, service_name)
            try:
                account_ids = yield self.executeFunction(function_key, **kwargs)
            except NotImplementedError:
                logger.info("%s not implemented." % function_key)
                return
            except Exception, e:
                logger.error(e.message)
                return
            ids.extend(account_ids)
        data = yield self.cassandra_client.get_slice(
            key=user_id,
            column_family=self.cassandra_cf_connections,
            start=service_name,
            finish=service_name + chr(0xff))
        ids = set(ids)
        old_ids = dict([(x.column.name.split("|").pop(), x.column.value) for x in data])
        new_ids = ids - set(old_ids)
        obsolete_ids = set(old_ids) - ids
        for service_id in obsolete_ids:
            try:
                logger.debug("Removing %s|%s from connections CF." % (service_name, service_id))
                yield self.cassandra_client.remove(
                    key=user_id,
                    column_family=self.cassandra_cf_connections,
                    column="%s|%s" % (service_name, service_id))
                logger.debug("Decrementing %s:%s." % (user_id, old_ids[service_id]))
                yield DeferredList([
                    self.client.add(
                        key=user_id,
                        column_family=self.cassandra_cf_recommendations,
                        value=-1,
                        column=old_ids[service_id]),
                    self.client.add(
                        key=old_ids[service_id],
                        column_family=self.cassandra_cf_reverse_recommendations,
                        value=-1,
                        column=user_id)])
            except Exception, e:
                logger.error(e.message)
Example #9
0
class Cassandra(Component):
    """
    Implements basic Cassandra operations as well as more complex job-based
    methods.
    """

    client = None

    def __init__(self, server, config, server_mode, **kwargs):
        super(Cassandra, self).__init__(server, server_mode)
        config = copy(config)
        config.update(kwargs)
        self.servers = config["cassandra_servers"]
        self.keyspace = config["cassandra_keyspace"]
        self.pool_size = len(config["cassandra_servers"]) * 2
        self.cf_content = config["cassandra_cf_content"]
        self.cf_delta = config["cassandra_cf_delta"]
        self.cf_delta_user = config["cassandra_cf_delta_user"]
        self.cf_identity = config["cassandra_cf_identity"]
        self.cf_connections = config["cassandra_cf_connections"]
        self.cf_recommendations = config["cassandra_cf_recommendations"]
        self.cf_reverse_recommendations = config["cassandra_cf_reverse_recommendations"]

    def initialize(self):
        LOGGER.info('Initializing %s' % self.__class__.__name__)
        self.client = CassandraClusterPool(
            self.servers,
            keyspace=self.keyspace,
            pool_size=self.pool_size)
        self.client.startService()
        LOGGER.info('%s initialized, connected to: %s.' % (self.__class__.__name__, self.servers))

    def shutdown(self):
        LOGGER.info("Stopping %s" % self.__class__.__name__)
        self.client.stopService()
        LOGGER.info("%s stopped." % self.__class__.__name__)

    @shared
    def batch_insert(self, *args, **kwargs):
        return self.client.batch_insert(*args, **kwargs)

    @shared
    def insert(self, *args, **kwargs):
        return self.client.insert(*args, **kwargs)

    @shared
    def remove(self, *args, **kwargs):
        return self.client.remove(*args, **kwargs)

    @shared
    def get(self, *args, **kwargs):
        return self.client.get(*args, **kwargs)

    @shared
    def get_key_range(self, *args, **kwargs):
        return self.client.get_key_range(*args, **kwargs)

    @shared
    def get_slice(self, *args, **kwargs):
        return self.client.get_slice(*args, **kwargs)

    @shared
    def get_range_slices(self, *args, **kwargs):
        return self.client.get_range_slices(*args, **kwargs)

    @shared
    @inlineCallbacks
    def get_delta(self, delta_id):
        """Get data from cassandra by user delta_id."""
        try:
            columns = yield self.client.get_slice(
                key=binascii.unhexlify(delta_id),
                column_family=self.cf_delta,
                consistency=2)
        except NotFoundException:
            LOGGER.error("%s not found." % delta_id)
            return
        results = dict([(x.column.name, x.column.value) for x in columns])
        results["data"] = decompress(results["data"])
        if "old_data" in results:
            results["old_data"] = decompress(results["old_data"])
        if "new_data" in results:
            results["new_data"] = decompress(results["new_data"])
        returnValue(results)

    @shared
    @inlineCallbacks
    def getDataByIDAndUUID(self, user_id, uuid):
        """Get data from cassandra by user id and uuid."""
        try:
            data = yield self.client.get(
                key=str(user_id),
                column_family=self.cf_content,
                column=uuid,
                consistency=2)
        except NotFoundException:
            return
        obj = yield threads.deferToThread(decompress, data.column.value)
        returnValue(obj)

    @shared
    @inlineCallbacks
    def getData(self, job, consistency=2):
        try:
            data = yield self.client.get(
                key=str(job.user_account["user_id"]),
                column_family=self.cf_content,
                column=job.uuid,
                consistency=consistency)
        except NotFoundException:
            return
        obj = yield threads.deferToThread(decompress, data.column.value)
        returnValue(obj)

    @shared
    @inlineCallbacks
    def setData(self, user_id, data, uuid):
        s = yield threads.deferToThread(compress, data)
        result = yield self.client.insert(
            str(user_id),
            self.cf_content,
            s,
            column=uuid,
            consistency=2)
        returnValue(result)

    @shared
    @inlineCallbacks
    def setServiceIdentity(self, service, user_id, service_id):
        LOGGER.debug("Inserting identity: %s | %s" % (service, service_id))
        try:
            yield self.client.insert(
                "%s|%s" % (service, service_id),
                self.cf_identity,
                user_id,
                column="user_id",
                consistency=2)
        except:
            LOGGER.error(format_exc())
        returnValue(None)

    @shared
    @inlineCallbacks
    def getServiceConnections(self, service, user_id):
        try:
            data = yield self.client.get_slice(
                key=user_id,
                column_family=self.cf_connections,
                start=service,
                finish=service + chr(0xff),
                consistency=2)
        except:
            LOGGER.error(format_exc())
            returnValue([])
        returnValue(dict([(x.column.name.split("|").pop(), x.column.value)
            for x in data]))

    @inlineCallbacks
    def addConnections(self, service, user_id, new_ids):
        mapped_new_ids = {}
        for chunk in list(chunks(list(new_ids), 100)):
            data = yield self.client.multiget(
                keys=["%s|%s" % (service, x) for x in chunk],
                column_family=self.cf_identity,
                column="user_id",
                consistency=2)
            for key in data:
                if data[key]:
                    mapped_new_ids[key] = data[key][0].column.value
        if not mapped_new_ids:
            # We don't have any of the new connections in the system.
            return
        LOGGER.debug("Batch inserting: %s" % pformat(mapped_new_ids))
        yield self.client.batch_insert(
            key=user_id,
            column_family=self.cf_connections,
            mapping=mapped_new_ids,
            consistency=2)
        followee_ids = mapped_new_ids.values()
        for chunk in list(chunks(followee_ids, 10)):
            deferreds = []
            for followee_id in chunk:
                LOGGER.info("Incrementing %s:%s" % (user_id, followee_id))
                deferreds.append(self.client.add(
                    key=user_id,
                    column_family=self.cf_recommendations,
                    value=1,
                    column=followee_id))
                deferreds.append(self.client.add(
                    key=followee_id,
                    column_family=self.cf_reverse_recommendations,
                    value=1,
                    column=user_id),
                    consistency=2)
            yield DeferredList(deferreds)

    @inlineCallbacks
    def removeConnections(self, service, user_id, obsolete_mapping):
        for service_id in obsolete_mapping:
            LOGGER.debug("Removing %s|%s from connections CF." % (service, service_id))
            yield self.client.remove(
                key=user_id,
                column_family=self.cf_connections,
                column="%s|%s" % (service_name, service_id),
                consistency=2)
            logger.debug("Decrementing %s:%s." % (user_id, old_ids[service_id]))
            yield DeferredList([
                self.client.add(
                    key=user_id,
                    column_family=self.cf_recommendations,
                    value=-1,
                    column=obsolete[service_id],
                    consistency=2),
                self.client.add(
                    key=obsolete[service_id],
                    column_family=self.cf_reverse_recommendations,
                    value=-1,
                    column=user_id,
                    consistency=2)])
Example #10
0
config = ConfigParser.ConfigParser()
with open("etc/defaults.cfg") as defaults:
    config.readfp(defaults)
config.read(["etc/devel.cfg", "etc/production.cfg"])
cdnHost = config.get("General", "CDNHost")
secureProxy = config.get("General", "SecureProxy")
rootUrl = config.get("General", "URL")
brandName = config.get("Branding", "Name")


#
# Cassandra connection pool
#
cassandraNodes = config.get("Cassandra", "Nodes").split(",")
cassandraKeyspace = config.get("Cassandra", "Keyspace")
db = CassandraClusterPool(cassandraNodes, cassandraKeyspace, pool_size=10)
db.set_consistency(ConsistencyLevel.LOCAL_QUORUM)


#
# Internationalization
# Aliases for i18n
#
_ = gettext.gettext
__ = gettext.ngettext


#
# Map of all item type plugins
#
plugins = {}
Example #11
0
 def __init__(self, cfg):
     self.cfg = cfg
     servers              = map(parse_srvaddr, self.cfg.get("cassandra", "seed").split(","))
     keyspace             = self.cfg.get("cassandra", "keyspace")
     self.request_retries = self.cfg.getint("cassandra", "retries")
     CassandraClusterPool.__init__(self, seed_list=servers, keyspace=keyspace, conn_timeout=self.cfg.getint("cassandra", "timeout"))