Ejemplo n.º 1
0
    def attach(self, cc, out):
        out.info("Started installing %s in %s (%s) ---------------------------------------" % (self.role, self.hostname, self.ip_address))

        # Regenerate host files of all the managed nodes (before acquiring the node lock)
        if not cc.options.dns:
            try:
                self.config_description = 'regenerating /etc/hosts files of the cluster'
                bigdata.acquire('cluster')
                cc = bigdata.create_config_context(cc.options)
                self.regenerate_etc_hosts_files(cc, out)
            finally:
                bigdata.release('cluster')
        
        # Connect by SSH
        bigdata.acquire("node-%s-files" % self.hostname)
        ssh = SSHConnection(self.hostname, out)
        try:
            out.info("Connecting to %s" % self.hostname)
            ssh.connect()

            # Check operating system type and version and remote hostname
            self.check_operating_system_version(cc, out, ssh)
            self.check_remote_hostname(cc, out, ssh)

            # Set state        
            self.put_config_state("attaching")
        
            # Regenerate host files of all the managed nodes
            try:
                bigdata.acquire('cluster')
                self.regenerate_etc_hosts_files(cc, out)
            finally:
                bigdata.release('cluster')
        
            # Make template params
            template_params = make_template_params(cc)
        
            # Check if there is an installation in the node already
            self.check_possible_big_data_installation(cc, out, ssh)
            
            # List of rpm lists for uninstallation
            rpms_list = RpmList(self)
            rpms_list.push([])

            # Install RPMs
            if cc.options.rpms:
                self.config_description = 'installing rpms'
                
                ## Running yum update
                #out.info("Running YUM update") # TODO: this shouldn't be done in the final version
                #ssh.execute("yum clean all ; yum --assumeyes --quiet update")

                # Install open java
                out.info("Installing OpenJDK")
                ssh.install(['java-1.6.0-openjdk'])

            
                if self.role == "hmaster":
                    out.info("Installing HMaster RPM files")
                    if self.type == 'hbase':
                        rpms = [
                                'zookeeper',
                                'hadoop-0.20-mapreduce-jobtracker',
                                'hadoop-hdfs-secondarynamenode',
                                'hadoop-hdfs-namenode',
                                'hbase',
                                'hbase-master',
                                'hadoop-hdfs',
                        ]
                    else:
                        rpms = [
                                'hadoop-0.20-mapreduce-jobtracker',
                                'hadoop-hdfs-secondarynamenode',
                                'hadoop-hdfs-namenode',
                                'hadoop-hdfs',
                        ]
                    ssh.install(rpms)
                    rpms_list.push(rpms)
                elif self.role == "zookeeper":
                    # Install RPM packages
                    out.info("Installing ZooKeeper RPM files")
                    rpms = [
                            'zookeeper',
                            'zookeeper-server',
                    ]
                    ssh.install(rpms)
                    rpms_list.push(rpms)
                elif self.role == "slave":
                    # Install RPM packages
                    if self.type == 'hbase':
                        rpms = [
                                    'zookeeper',
                                    'hadoop-hdfs',
                                    'hadoop-hdfs-datanode',
                                    'hadoop-0.20-mapreduce-tasktracker',
                                    'hbase',
                                    'hbase-regionserver',
                        ]
                    else:
                        rpms = [
                                    'hadoop-hdfs',
                                    'hadoop-hdfs-datanode',
                                    'hadoop-0.20-mapreduce-tasktracker',
                        ]
                    ssh.install(rpms)
                    rpms_list.push(rpms)
                elif self.role == "hive":
                    # Install RPM packages
                    rpms = [
                                'hive',
                                'hive-metastore',
                                'hive-server2',
                                'MariaDB-server',
                                'MariaDB-client',
                                'mysql-connector-java',
                    ]
                    if self.type == 'hbase':
                        rpms += ['hive-hbase']
                    if cc.pig_support:
                        rpms += ['pig']
                        rpms += ['pig-udf-datafu']
                    
                    ssh.install(rpms)
                    rpms_list.push(rpms)
                else:
                    raise "Unknown role: %s" % (self.role)

            # FIXME: earlier these all refered to 'hbase' user, but changed to the correct ones. Does this work?
            hbase_homedir = ssh.get_homedir_of('hbase')
            hdfs_homedir = ssh.get_homedir_of('hdfs')
            zookeeper_homedir = ssh.get_homedir_of('zookeeper')
            hive_homedir = ssh.get_homedir_of('hive')

            # Ensure that update-alternatives configuration is correct
            if self.role in ['hmaster', 'slave']:
                out.info("Configure update alternatives")
                ssh.execute("mkdir -p /etc/hadoop/conf.cluster")
                ssh.execute("cp -pfR /etc/hadoop/conf.empty/* /etc/hadoop/conf.cluster/")
                ssh.execute("update-alternatives --install /etc/hadoop/conf hadoop-conf /etc/hadoop/conf.cluster 20")
                ssh.execute("update-alternatives --set hadoop-conf /etc/hadoop/conf.cluster")

            # Wait until expected number of dependencies are ready        
            try:
                self.config_description = 'waiting for other nodes'
                bigdata.release('node-%s' % self.hostname)
                if self.role == 'hmaster':
                    if len(cc.zookeepers) > 0:
                        self.config_description = 'waiting for zookeepers'
                        while count_config_states(cc.zookeepers, 'attached') < 3:
                            sleep(2.0)
                            cc = self.recreate_config_context(cc.options)
                    self.config_description = 'waiting for initial slaves'
                    while count_config_states(cc.slaves, 'attached') < 3:
                        sleep(2.0)
                        cc = self.recreate_config_context(cc.options)
                elif self.role == 'zookeeper':
                    pass
                elif self.role == 'slave':
                    if len(cc.zookeepers) > 0:
                        self.config_description = 'waiting for zookeepers'
                        while count_config_states(cc.zookeepers, 'attached') < 3:
                            sleep(2.0)
                            cc = self.recreate_config_context(cc.options)
                elif self.role == 'hive':
                    # Wait for ZooKeepers
                    if len(cc.zookeepers) > 0:
                        self.config_description = 'waiting for zookeepers'
                        while count_config_states(cc.zookeepers, 'attached') < 3:
                            sleep(2.0)
                            cc = self.recreate_config_context(cc.options)
                            
                    # Wait for HMaster
                    self.config_description = 'waiting for hmaster'
                    while count_config_states(cc.hmasters, 'attached') < 1:
                        sleep(2.0)
                        cc = self.recreate_config_context(cc.options)
                else:
                    raise MgmtException("Uknown role %s" % self.role)
            finally:
                bigdata.acquire('node-%s' % self.hostname)
            self.config_description = ''
        
            # Re-read nodes and generate template parameters
            try:
                bigdata.acquire('cluster')
                cc = bigdata.create_config_context(cc.options)
                template_params = make_template_params(cc)
            finally:
                bigdata.release('cluster')

            # Populate templates and send them to the remote server
            self.config_description = 'populating templates'
            out.info("Populating and applying configuration templates")
            for content, remote_filename, mode in populate_templates('hbase', self.role, template_params):
                ssh.send_file_to(content, remote_filename, mode=mode)

            # Repopulate and start the first slaves 
            if self.role == 'hmaster':
                self.config_description = "updating and restarting slaves"
                out.info("Updating and restarting first 3 slaves")
                for snode in cc.slaves[0:3]:
                    self.config_description = "updating and restarting slaves: %s" % snode.hostname
                    rssh = SSHConnection(snode.hostname, out)
                    try:
                        bigdata.acquire('node-%s' % snode.hostname)
                        rssh.connect()
                        
                        # Repopulate and send the templates
                        for content, remote_filename, mode in populate_templates('hbase', 'slave', template_params):
                            rssh.send_file_to(content, remote_filename, mode=mode)

                        # (Re)start the services                            
                        rssh.execute("service hadoop-hdfs-datanode restart");                    
                        rssh.execute("service hadoop-0.20-mapreduce-tasktracker restart");
                        if self.type == 'hbase':
                            rssh.execute("service hbase-regionserver restart");
                    finally:
                        bigdata.release('node-%s' % snode.hostname)
                        rssh.disconnect()

            # Run post install script
            self.config_description = 'executing post install script'
            out.info("Executing post-install script")
            ssh.execute("cd /tmp && ./post-install.sh", raise_on_non_zero=True, 
                raise_on_keywords=["java.net.ConnectException"])

            # Process startups
            if self.role == "hmaster":
                # Copy SSH keys of hdfs and hbase to the bigdata storage
                self.config_description = 'sending ssh keys'
                if self.type == 'hbase':
                    out.info("Save public SSH keys of hbase and hdfs")
                    self.hdfs_public_ssh_key  = ssh.receive_file_from("%s/.ssh/id_rsa.pub" % hdfs_homedir)
                    self.hbase_public_ssh_key = ssh.receive_file_from("%s/.ssh/id_rsa.pub" % hbase_homedir)
                else:
                    out.info("Save public SSH keys of hdfs")
                    self.hdfs_public_ssh_key  = ssh.receive_file_from("%s/.ssh/id_rsa.pub" % hdfs_homedir)

                # Start the services
                self.config_description = 'starting services'
                if self.type == 'hbase':
                    out.info("Starting services in HMaster")
                    ssh.execute("service hbase-master stop", raise_on_non_zero=False)
                    ssh.execute("/etc/init.d/hadoop-0.20-mapreduce-jobtracker stop", raise_on_non_zero=False)
                    ssh.execute("service hadoop-hdfs-secondarynamenode stop", raise_on_non_zero=False)
                    ssh.execute("service hadoop-hdfs-namenode stop", raise_on_non_zero=False)
                    
                    ssh.execute("service hadoop-hdfs-namenode start")
                    ssh.execute("service hadoop-hdfs-secondarynamenode start")
                    ssh.execute("/etc/init.d/hadoop-0.20-mapreduce-jobtracker restart")
                    ssh.execute("service hbase-master start")
                else:
                    out.info("Starting services in Hadoop Master")
                    ssh.execute("/etc/init.d/hadoop-0.20-mapreduce-jobtracker stop", raise_on_non_zero=False)
                    ssh.execute("service hadoop-hdfs-secondarynamenode stop", raise_on_non_zero=False)
                    ssh.execute("service hadoop-hdfs-namenode stop", raise_on_non_zero=False)
                    
                    ssh.execute("service hadoop-hdfs-namenode start")
                    ssh.execute("service hadoop-hdfs-secondarynamenode start")
                    ssh.execute("/etc/init.d/hadoop-0.20-mapreduce-jobtracker restart")
            elif self.role == "zookeeper":
                # Initialize the service
                self.config_description = 'starting services'
                out.info("Initiating ZooKeeper")
                ssh.execute("service zookeeper-server init");      

                # update zookeeper id before start
                ssh.execute("echo %s > /var/lib/zookeeper/myid" % (cc.zookeepers.index(self)));      
                
                # Start the service
                out.info("Starting services in ZooKeeper")
                ssh.execute("service zookeeper-server restart");
            elif self.role == "slave":
                # Copy SSH public keys
                if self.type == 'hbase':
                    out.info("Copying the public SSH keys of hbase and hdfs from master to the node")
                    ssh.send_file_to(self.hbase_public_ssh_key, "/tmp/id_rsa.pub")
                    ssh.execute("cat /tmp/id_rsa.pub >> %s/.ssh/authorized_keys && rm /tmp/id_rsa.pub" % hbase_homedir)
                    ssh.execute("mkdir %s/.ssh && chmod 0700 %s/.ssh" % (hdfs_homedir, hdfs_homedir), raise_on_non_zero=False)
                    ssh.send_file_to(self.hdfs_public_ssh_key, "/tmp/id_rsa.pub")
                    ssh.execute("cat /tmp/id_rsa.pub >> %s/.ssh/authorized_keys && rm /tmp/id_rsa.pub" % hdfs_homedir)
                else:
                    out.info("Copying the public SSH keys of hdfs from master to the node")
                    ssh.execute("mkdir %s/.ssh && chmod 0700 %s/.ssh" % (hdfs_homedir, hdfs_homedir), raise_on_non_zero=False)
                    ssh.send_file_to(self.hdfs_public_ssh_key, "/tmp/id_rsa.pub")
                    ssh.execute("cat /tmp/id_rsa.pub >> %s/.ssh/authorized_keys && rm /tmp/id_rsa.pub" % hdfs_homedir)
            
                # Start the services
                if len(cc.slaves) > 3:
                    self.config_description = 'starting services'
                    if self.type == 'hbase':
                        out.info("Starting services in HBase slave")
                        ssh.execute("service hadoop-hdfs-datanode restart");                    
                        ssh.execute("/etc/init.d/hadoop-0.20-mapreduce-tasktracker restart");
                        ssh.execute("service hbase-regionserver restart");
                    else:
                        out.info("Starting services in Hadoop slave")
                        ssh.execute("service hadoop-hdfs-datanode restart");                    
                        ssh.execute("/etc/init.d/hadoop-0.20-mapreduce-tasktracker restart");
                else:
                    out.info("Not starting slave services before HMaster")
            elif self.role == "hive":
                # Initialize the service
                self.config_description = 'starting services'
                
                # Start the service
                out.info("Starting services in Hive")
                ssh.execute("service mysql restart");
                ssh.execute("service hive-metastore restart");
                ssh.execute("service hive-server2 restart");
            else:
                raise "Unknown role: %s" % (self.role)

            # Run status check script
            self.config_description = 'checking status'
            out.info("Run status check script")
            ssh.execute("cd /tmp && ./status-check.sh")

            ## Remove post install and status check scripts
            #self.config_description = 'cleaning /tmp'
            #out.info("Remove installation scripts from /tmp")
            #ssh.remove_file("/tmp/post-install.sh")
            #ssh.remove_file("/tmp/status-check.sh")
            #ssh.remove_file("/tmp/hadoop-config.sh.diff")
            
            # Release host lock because we won't touch in this host anymore
            bigdata.release('node-%s' % self.hostname)

            # Configure related nodes
            self.config_description = 'configuring related nodes...'
            
            # Re-read nodes and generate template parameters
            try:
                bigdata.acquire('cluster')
                cc = bigdata.create_config_context(cc.options)
                template_params = make_template_params(cc)
            finally:
                bigdata.release('cluster')
            
            # Reconfigure zoo.cfg on all zookeeper nodes and restart services
            if self.role == "hmaster":
                # Notice: if we need to add something here, be careful with
                # node locking...
                pass
            elif self.role == "zookeeper":
                # Update ZooKeepers (if this is the last zookeeper being configured in parallel)
                if cc.zookeepers.index(self) == len(cc.zookeepers) - 1:
                    for node in cc.zookeepers:
                        if node != self:
                            rssh = SSHConnection(node.hostname, out)
                            try:
                                self.config_description = "reconfiguring zookeeper of %s" % node.hostname
                                bigdata.acquire('node-%s' % node.hostname)
                                rssh.connect()
                                # copy zoo.cfg including new zookeeper to all zookeeper nodes 
                                # Populate templates and send them to the remote server
                                out.info("Populating and applying configuration templates")
                                for content, remote_filename, mode in populate_templates('hbase', 'zookeeper', template_params):
                                    # send configurations to new node
                                    rssh.send_file_to(content, remote_filename, mode=mode)
                                # restart zookeepers
                                out.info("Restarting ZooKeeper services in node: %s" % node.hostname)
                                rssh.execute("service zookeeper-server restart");
                            finally:
                                bigdata.release('node-%s' % node.hostname)
                                rssh.disconnect()                                
                                
                    # Update and restart HMaster
                    if cc.hmaster != None and len(cc.zookeepers) > 3:
                        rssh = SSHConnection(cc.hmaster.hostname, out)
                        try:
                            self.config_description = "updating and restarting hmaster"
                            bigdata.acquire('node-%s' % cc.hmaster.hostname)
                            rssh.connect()
                            
                            # update hbase-site.xml for hmaster 
                            for content, remote_filename, mode in populate_templates('hbase', 'hmaster', template_params):
                                rssh.send_file_to(content, remote_filename, mode=mode)
                               
                            # refresh hmaster (hbase)?? - restart required?
                            rssh.execute("service hbase-master restart");
                        finally:
                            bigdata.release('node-%s' % cc.hmaster.hostname)
                            rssh.disconnect()

            elif self.role == "slave":
                if len(cc.hmasters) > 0:
                    rssh = SSHConnection(cc.hmaster.hostname, out)
                    try:
                        self.config_description = "updating hmaster files"
                        bigdata.acquire('node-%s' % cc.hmaster.hostname)
                        rssh.connect()
                        # hbase:
                        # copy regionservers file including new node to hmaster
                        for content, remote_filename, mode in populate_templates('hbase', 'hmaster', template_params):
                            # send configurations to new node
                            rssh.send_file_to(content, remote_filename, mode=mode)
                        # start hbase services in node
                        # optional: hbase splitting or other reblancing activities?
                        # hadoop/hdfs:
                        # copy slaves file including new node to hmaster
                        # start hadoop services in node
                        # optional: use balacer tool for re-distributing blocks to upscaled cluster
                        
                        ## Name node restart
                        #ssh.execute("service hadoop-hdfs-namenode restart")
                    finally:
                        bigdata.release('node-%s' % cc.hmaster.hostname)
                        rssh.disconnect()
                else:
                    out.info("No masters available, skipping sending region servers file at this time")

            self.put_config_state("attached")
            out.info("Node configuration finished SUCCESSFULLY.")
        except socket.error as e:
            raise MgmtException("SSH connection failed: %s" % str(e), e)
        except SSHException as e:
            raise MgmtException("SSH error: %s" % str(e), e)
        except Exception as e:
            self.put_config_state("error", str(e))
            raise
        finally:
            if ssh != None:
                ssh.disconnect()
            out.info("Closed SSH connection to %s" % self.hostname)
            
        return True
Ejemplo n.º 2
0
 def recreate_config_context(self, options):
     try:
         bigdata.acquire('cluster')
         return bigdata.create_config_context(options)
     finally:
         bigdata.release('cluster')
Ejemplo n.º 3
0
    def detach(self, cc, out):
        # Check exclude/decommission status
        if not cc.options.force:
            out.info("Cheking decommission status of %s" % self.hostname)
            if self.is_decommission_in_progress(cc, out):
                out.status = "pending"
                raise MgmtException("Excluding (decommission) is in progress. The node can't be detached safely, aborting. Use --force paramter to bypass this check.")
    
        # Connect by SSH
        hostname = self.hostname
        bigdata.acquire("node-%s-files" % hostname)
        ssh = SSHConnection(self.hostname, out)
        try:
            # Set state during the uninstall
            out.info("Detaching node %s from the bigdata cluster" % (self))
            self.put_config_state("detaching")

            # SSH connect
            out.info("Connecting to %s" % self.hostname)
            ssh.connect()

            # Populate /tmp/mongodb templates and send them to the node
            template_params = make_template_params(cc, self)
            out.info("Populating and applying configuration templates")
            for content, remote_filename, mode in populate_templates('mongodb', self.role, template_params):
                if remote_filename[0:12] == "/tmp/mongodb":
                    ssh.send_file_to(content, remote_filename, mode=mode)
                    
            # Remove shard from config server
            if self.role == "shard":
                if cc.options.force:
                    out.info("Shard draining skipped because of the force switch")
                else:
                    try:
                        bigdata.acquire('cluster')
                        rset = cc.find_replicaset_members(self.replsetnum)
                        if len(rset) >= cc.replica_set_size:
                            out.info("Removing %s (very slow)" % template_params["SHARD_NAME"])
                            ssh.execute("mongo %s:27017 /tmp/mongodb/mongos-shard-remove.js" % (cc.first_mongos.hostname))
                            for node in rset:
                                node.put_config_state("drained", "")
                    finally:
                        bigdata.release('cluster')

            # Stop services
            out.info("Stopping services")
            if self.role == "shard":
                out.info("  mongod...")
                ssh.execute("/etc/init.d/mongod stop", raise_on_non_zero=False)
            elif self.role == "config":
                out.info("  mongos...")
                ssh.execute("/etc/init.d/mongos stop", raise_on_non_zero=False)
                out.info("  config server...")
                ssh.execute("/etc/init.d/mongo-cfgsrv stop", raise_on_non_zero=False);      
            
            # Uninstall RPMS
            out.info("Uninstalling RPM files")
            ssh.uninstall(['mongodb-org-server', 'mongodb-org'])

            # Run the final cleanup script
            out.info("Run cleanup script")
            ssh.execute("cd /tmp/mongodb && ./cleanup.sh", raise_on_non_zero=False)

            # Delete /etc/bigdata file from the server
            ssh.remove_file("/etc/bigdata")

            # Drop the node from the config context
            if self.role == "shard":
                del cc.shards[cc.shards.index(self)]
            elif self.role == "config":
                del cc.configs[cc.configs.index(self)]

            # Make template params
            template_params = make_template_params(cc, self)

            # Remove post install and status check scripts
            out.info("Remove installation related scripts from /tmp")
            ssh.execute("rm -fR /tmp/mongodb", raise_on_non_zero=False, read_out_and_err=False)

            # Drop from cluster
            try:
                bigdata.acquire('cluster')
                self.drop()
            finally:
                bigdata.release('cluster')
            
            out.info("The node was dropped SUCCESSFULLY.")
        except Exception as e:
            self.put_config_state("error", str(e))
            if cc.options.force:
                out.warn("An error detected but forcing detach still")
                self.drop()
            raise
        finally:
            ssh.disconnect()
            out.info("Closed SSH the connection")
            bigdata.release("node-%s-files" % hostname)
Ejemplo n.º 4
0
            raise MgmtException("Invalid HBase role: %s" % role)

        # Configure
        out.role = role
        if not cc.options.roleonly:
            node = hbase.HBaseNode(role, cc.type)
            node.hostname = hostname
            node.ip_address = ip_address
            cclist.append(node)

            # Update local /etc/hosts
            if not cc.options.roleonly and not cc.options.dns:
                update_local_etc_hosts(cc)

            # Release cluster lock and start attaching
            bigdata.release("cluster")
            node.attach(cc, out)
    elif cc.type == "mongodb":
        # Decide role
        role = "unknown"
        if options.role != None:
            role = options.role
        else:
            (roles, rolemap) = mongodb.generate_role_list(cc, len(cc.everything) + 1)
            if len(cc.configs) < rolemap["configs"]:
                role = "config"
            else:
                role = "shard"

        # Choose the node list of config context
        cclist = None
Ejemplo n.º 5
0
    def attach(self, cc, out):
        out.info("Started installing %s in %s (%s) ---------------------------------------" % (self.role, self.hostname, self.ip_address))

        # Regenerate host files of all the managed nodes (before acquiring the node lock)
        try:
            self.put_config_state("starting")
            self.config_description = "regerating /etc/host files"
            out.info("Regerating /etc/host files")
            bigdata.acquire('cluster')
            cc = bigdata.create_config_context(cc.options)
            self.regenerate_etc_hosts_files(cc, out)
        finally:
            bigdata.release('cluster')
        
        # Connect by SSH
        bigdata.acquire("node-%s-files" % self.hostname)
        ssh = SSHConnection(self.hostname, out)
        try:
            out.info("Connecting to %s" % self.hostname)
            ssh.connect()

            # Check operating system type and version and remote hostname
            self.check_operating_system_version(cc, out, ssh)
            self.check_remote_hostname(cc, out, ssh)

            # Set state        
            self.put_config_state("attaching")
        
            # Decide replica set name
            try:
                bigdata.acquire('cluster')
                cc = bigdata.create_config_context(cc.options)
            
                current_replica_set_size = 0
                primary_replicaset_member = None
                current_replica_set = []
                current_is_the_last_in_rs = False
                if self.role == 'shard':
                    max_rn = 0
                    replsets = {}
                    for snode in cc.shards:
                        if snode.replsetnum != None:
                            rn = int(snode.replsetnum)
                            max_rn = max(rn, max_rn)
                            if rn in replsets:
                                replsets[rn].append(snode)
                            else:
                                replsets[rn] = [snode]
                    self.replsetnum = None
                    for rn in replsets:
                        # Try to find a non-complete replica set and assign this node to it
                        if len(replsets[rn]) < cc.replica_set_size:
                            if len(replsets[rn]) + 1 == cc.replica_set_size:
                                current_is_the_last_in_rs = True
                            self.replsetnum = rn
                            primary_replicaset_member = replsets[rn][0]
                            current_replica_set_size = len(replsets[rn])
                            current_replica_set = replsets[rn]
                    if self.replsetnum == None:                        
                        self.replsetnum = max_rn + 1
                else:
                    self.replsetnum = None
            finally:
                bigdata.release('cluster')
        
            # Make template params
            template_params = make_template_params(cc, self)
        
            # Check if there is an installation in the node already
            self.check_possible_big_data_installation(cc, out, ssh)

            # Install RPM files from the custom repository
            self.config_description = "installing RPMs"
            out.info("Installing RPM files from repository")
            ssh.install(['mongodb-org-server', 'mongodb-org'])
            self.config_description = None

            # Populate templates and send them to the remote server
            out.info("Populating and applying configuration templates")
            for content, remote_filename, mode in populate_templates('mongodb', self.role, template_params):
                ssh.send_file_to(content, remote_filename, mode=mode)

            # Run post install script
            out.info("Executing post-install script")
            ssh.execute("cd /tmp/mongodb && ./post-install.sh", raise_on_non_zero=True)

            # Process startups
            attached_when_finished = True
            if self.role == "config":
                # Start cfg server
                out.info("Starting config server")
                ssh.execute("/etc/rc.d/init.d/mongo-cfgsrv restart")
                
                # Wait for cfg server
                out.info("Waiting for mongo config server to get ready and its port bound (can take minutes)")
                self.config_description = "initializing"
                ssh.execute("cd /tmp/mongodb && ./wait-for-mongo-cfgsrv-startup.sh")
                self.config_description = None
                
                # Reconfigure config nodes                    
                if len(cc.configs) >= cc.number_of_config_servers and cc.configs[-1].hostname == self.hostname:
                    # Since parallel configuration is possible, let's ensure,
                    # that the other config servers are ready
                    out.info("Waiting for the other config nodes to become ready")
                    self.put_config_state("waiting", "waiting for config nodes")
                    all_config_nodes_ok = False
                    while not all_config_nodes_ok:
                        try:
                            bigdata.acquire('cluster')
                            cc = bigdata.create_config_context(cc.options)
                            all_config_nodes_ok = True
                            for cnode in cc.configs:
                                if cnode.hostname != self.hostname:
                                    if cnode.config_state != 'attached':
                                        all_config_nodes_ok = False
                                        break
                                    if cnode.config_state == 'error':
                                        # We can't continue if any of the config nodes failed
                                        raise MgmtException("Config node %s has an error. Aborting configuration." % cnode.hostname)
                        finally:
                            bigdata.release('cluster')
                        sleep(2.0)
                    self.put_config_state("attaching")
                
                    out.info("Reconfigurating and restarting mongos processes in config nodes")
                    try:
                        bigdata.acquire('cluster')
                        for cnode in cc.configs:
                            bigdata.acquire("node-%s-files" % cnode.hostname)
                            rssh = SSHConnection(cnode.hostname, out)
                            try:
                                rssh.connect()
                                # Update config node templates
                                for content, remote_filename, mode in populate_templates('mongodb', 'config', template_params):
                                    rssh.send_file_to(content, remote_filename, mode=mode)
                                
                                # Mongos (re)start
                                rssh.execute("/etc/init.d/mongos restart")
                            finally:
                                rssh.disconnect()
                                bigdata.release("node-%s-files" % cnode.hostname)
                    finally:
                        bigdata.release('cluster')

            elif self.role == "shard":
                # Start the services
                out.info("Starting services of shard node")
                ssh.execute("/etc/rc.d/init.d/mongod restart")

                out.info("Waiting for mongod to get ready and its port bound (can take minutes)")
                self.config_description = "mongod initializing"
                ssh.execute("cd /tmp/mongodb && ./wait-for-mongod-startup.sh")
                self.config_description = None

                # Since parallel configuration is possible, let's ensure, all
                # of the config servers are up and running
                out.info("Waiting for the config nodes to become ready")
                self.put_config_state("waiting", "waiting for config nodes")
                all_config_nodes_ok = False
                while not all_config_nodes_ok:
                    try:
                        bigdata.acquire('cluster')
                        cc = bigdata.create_config_context(cc.options)
                        if len(cc.configs) >= cc.number_of_config_servers:
                            all_config_nodes_ok = True
                            for cnode in cc.configs:
                                if cnode.config_state != 'attached':
                                    all_config_nodes_ok = False
                                    break
                                if cnode.config_state == 'error':
                                    # We can't continue if any of the config nodes failed
                                    raise MgmtException("Config node %s has an error. Aborting configuration." % cnode.hostname)
                    finally:
                        bigdata.release('cluster')
                    sleep(2.0)
                self.put_config_state("attaching")
                    

                # Set state of the non-last rs members                
                self.put_config_state("pending", "configuration completed but waiting to be added to the replica set")
                attached_when_finished = False
                    
                # Operations for the last replica set member
                if current_is_the_last_in_rs:
                    self.put_config_state("attaching")
                    
                    # Wait until other replica set members are ready
                    out.info("Waiting for the other members of the replicaset rs%s to start" % self.replsetnum)
                    self.put_config_state("waiting", "waiting for the replica set members")
                    all_replica_set_memebers_ready = False
                    while not all_replica_set_memebers_ready:
                        all_replica_set_memebers_ready = True
                        for rnode in current_replica_set:
                            if rnode.hostname != self.hostname:
                                if rnode.config_state != 'pending':
                                    all_replica_set_memebers_ready = False
                                if rnode.config_state in ['error', None]:
                                    raise MgmtException("Aborting replica set configuration because node %s failed" % rnode.hostname)
                        sleep(2.0)
                    self.put_config_state("attaching")

                    # Initiate replica set
                    try:
                        bigdata.acquire('cluster')
                        self.put_config_state("attaching", "initiating replica set")
                        out.info("Initiating the replica set rs%s " % self.replsetnum)
                        ssh.execute("mongo localhost:27018 /tmp/mongodb/mongod-replicaset-initiate.js")
                        self.put_config_state("attaching")
                    finally:
                        bigdata.release('cluster')

                    try:                        
                        bigdata.acquire('cluster')

                        # Repopulate templates and send them to the remote server (to update mongos-shard-add.js)
                        out.info("Populating and applying configuration templates")
                        cc = bigdata.create_config_context(cc.options)
                        template_params = make_template_params(cc, self)
                        for content, remote_filename, mode in populate_templates('mongodb', self.role, template_params):
                            ssh.send_file_to(content, remote_filename, mode=mode)
                            
                        # Create a shard from the replica set
                        bigdata.acquire('cluster')
                        self.put_config_state("attaching", "creating shard")
                        out.info("Creating a shard for replica set rs%s " % self.replsetnum)
                        ssh.execute("mongo %s:27017 /tmp/mongodb/mongos-shard-add.js" % cc.first_mongos.hostname)
                        rset = cc.find_replicaset_members(self.replsetnum)
                        self.put_config_state("attaching")
                        for node in rset:
                            node.put_config_state("attached", "")
                    finally:
                        bigdata.release('cluster')
                    attached_when_finished = True
            else:
                raise "Unknown role: %s" % (self.role)

            # Run status check script
            out.info("Run status check script")
            ssh.execute("cd /tmp/mongodb && ./status-check.sh")

            # Remove post install and status check scripts
            #out.info("Remove installation related scripts from /tmp")
            #ssh.execute("rm -fR /tmp/mongodb")

            if attached_when_finished:
                self.put_config_state("attached")
            out.info("Node configuration finished SUCCESSFULLY.")
        except socket.error as e:
            raise MgmtException("SSH connection failed: %s" % str(e), e)
        except SSHException as e:
            raise MgmtException("SSH error: %s" % str(e), e)
        except Exception as e:
            self.put_config_state("error", str(e))
            raise
        finally:
            if ssh != None:
                ssh.disconnect()
            out.info("Closed SSH connection to %s" % self.hostname)
            bigdata.release("node-%s-files" % self.hostname)
        return True