Example #1
0
def use_cluster_with_graph(num_nodes):
    """
    This is a  work around to account for the fact that spark nodes will conflict over master assignment
    when started all at once.
    """

    # Create the cluster but don't start it.
    use_singledc(start=False, workloads=['graph', 'spark'])
    # Start first node.
    get_node(1).start(wait_for_binary_proto=True)
    # Wait binary protocol port to open
    wait_for_node_socket(get_node(1), 120)
    # Wait for spark master to start up
    spark_master_http = ("localhost", 7080)
    common.check_socket_listening(spark_master_http, timeout=60)
    tmp_cluster = Cluster(protocol_version=PROTOCOL_VERSION)

    # Start up remaining nodes.
    try:
        session = tmp_cluster.connect()
        statement = "ALTER KEYSPACE dse_leases WITH REPLICATION = {'class': 'NetworkTopologyStrategy', 'dc1': '%d'}" % (num_nodes)
        session.execute(statement)
    finally:
        tmp_cluster.shutdown()

    for i in range(1, num_nodes+1):
        if i is not 1:
            node = get_node(i)
            node.start(wait_for_binary_proto=True)
            wait_for_node_socket(node, 120)

    # Wait for workers to show up as Alive on master
    wait_for_spark_workers(3, 120)
Example #2
0
def use_cluster_with_graph(num_nodes):
    """
    This is a  work around to account for the fact that spark nodes will conflict over master assignment
    when started all at once.
    """

    # Create the cluster but don't start it.
    use_singledc(start=False, workloads=['graph', 'spark'])
    # Start first node.
    get_node(1).start(wait_for_binary_proto=True)
    # Wait binary protocol port to open
    wait_for_node_socket(get_node(1), 120)
    # Wait for spark master to start up
    spark_master_http = ("localhost", 7080)
    common.check_socket_listening(spark_master_http, timeout=60)
    tmp_cluster = Cluster(protocol_version=PROTOCOL_VERSION)

    # Start up remaining nodes.
    try:
        session = tmp_cluster.connect()
        statement = "ALTER KEYSPACE dse_leases WITH REPLICATION = {'class': 'NetworkTopologyStrategy', 'dc1': '%d'}" % (
            num_nodes)
        session.execute(statement)
    finally:
        tmp_cluster.shutdown()

    for i in range(1, num_nodes + 1):
        if i is not 1:
            node = get_node(i)
            node.start(wait_for_binary_proto=True)
            wait_for_node_socket(node, 120)

    # Wait for workers to show up as Alive on master
    wait_for_spark_workers(3, 120)
Example #3
0
    def _start_scylla_manager_agent(self):
        agent_bin = os.path.join(self.scylla_manager._get_path(), 'bin',
                                 'scylla-manager-agent')
        log_file = os.path.join(self.get_path(), 'logs',
                                'system.log.manager_agent')
        config_file = self._create_agent_config()

        agent_log = open(log_file, 'a')
        args = [agent_bin, '--config-file', config_file]
        self._process_agent = subprocess.Popen(args,
                                               stdout=agent_log,
                                               stderr=agent_log,
                                               close_fds=True)
        self._process_agent.poll()
        # When running on ccm standalone, the waiter thread would block
        # the create commands. Besides in that mode, waiting is unnecessary,
        # since the original popen reference is garbage collected.
        standalone = os.environ.get('SCYLLA_CCM_STANDALONE', None)
        if standalone is None:
            self._process_agent_waiter = threading.Thread(
                target=self._wait_for_agent)
            # Don't block the main thread on abnormal shutdown
            self._process_agent_waiter.daemon = True
            self._process_agent_waiter.start()
        pid_filename = os.path.join(self.get_path(), 'scylla-agent.pid')
        with open(pid_filename, 'w') as pid_file:
            pid_file.write(str(self._process_agent.pid))

        api_interface = common.parse_interface(self.address(), 10001)
        if not common.check_socket_listening(api_interface, timeout=180):
            raise Exception(
                "scylla manager agent interface %s:%s is not listening after 180 seconds, scylla manager agent may have failed to start."
                % (api_interface[0], api_interface[1]))
Example #4
0
    def start(self):
        # some configurations are set post initalization (cluster id) so
        # we are forced to update the config prior to calling start
        self._update_config()
        # check process is not running
        if self._pid:
            try:
                os.kill(self._pid, 0)
                return
            except OSError as err:
                pass

        log_file = os.path.join(self._get_path(),'scylla-manager.log')
        scylla_log = open(log_file, 'a')

        if os.path.isfile(self._get_pid_file()):
            os.remove(self._get_pid_file())

        args=[os.path.join(self._get_path(),'bin','scylla-manager'),
              '--config-file',os.path.join(self._get_path(),'scylla-manager.yaml'),
              '--developer-mode']
        self._process_scylla_manager = subprocess.Popen(args, stdout=scylla_log,
                                                stderr=scylla_log,
                                                close_fds=True)
        self._process_scylla_manager.poll()
        with open(self._get_pid_file(), 'w') as pid_file:
            pid_file.write(str(self._process_scylla_manager.pid))

        api_interface = common.parse_interface(self._get_api_address(),9090)
        if not common.check_socket_listening(api_interface,timeout=180):
            raise Exception("scylla manager interface %s:%s is not listening after 180 seconds, scylla manager may have failed to start."
                          % (api_interface[0], api_interface[1]))

        return self._process_scylla_manager
Example #5
0
def find_spark_master(session):

    # Itterate over the nodes the one with port 7080 open is the spark master
    for host in session.hosts:
        ip = host.address
        port = 7077
        spark_master = (ip, port)
        if common.check_socket_listening(spark_master, timeout=3):
            return spark_master[0]
    return None
Example #6
0
def find_spark_master(session):

    # Itterate over the nodes the one with port 7080 open is the spark master
    for host in session.hosts:
        ip = host.address
        port = 7077
        spark_master = (ip, port)
        if common.check_socket_listening(spark_master, timeout=3):
            return spark_master[0]
    return None
Example #7
0
def wait_for_node_socket(node, timeout):
    binary_itf = node.network_interfaces['binary']
    if not common.check_socket_listening(binary_itf, timeout=timeout):
        log.warning("Unable to connect to binary socket for node " + node.name)
    else:
        log.debug("Node %s is up and listening " % (node.name,))
Example #8
0
def wait_for_node_socket(node, timeout):
    binary_itf = node.network_interfaces['binary']
    if not common.check_socket_listening(binary_itf, timeout=timeout):
        log.warn("Unable to connect to binary socket for node " + node.name)
    else:
        log.debug("Node %s is up and listening " % (node.name,))
Example #9
0
def wait_for_node_socket(node, timeout):
    binary_itf = node.network_interfaces['binary']
    if not common.check_socket_listening(binary_itf, timeout=timeout):
        print("Unable to connect to binary socket for node" + str(node))
    else:
        print("Node is up and listening " + str(node))
Example #10
0
def wait_for_node_socket(node, timeout):
    binary_itf = node.network_interfaces['binary']
    if not common.check_socket_listening(binary_itf, timeout=timeout):
        print("Unable to connect to binary socket for node"+str(node))
    else:
        print("Node is up and listening "+str(node))