Esempio n. 1
0
 def crawl(self, path=".", xtr=None, done=0):
     """ generate a CHANGELOG file consumable by process_change """
     if path == ".":
         self.open()
         self.crawls += 1
     if not xtr:
         # get the root stime and use it for all comparisons
         xtr = self.xtime(".", self.slave)
         if isinstance(xtr, int):
             if xtr != ENOENT:
                 raise GsyncdError("slave is corrupt")
             xtr = self.minus_infinity
     xtl = self.xtime(path)
     if isinstance(xtl, int):
         raise GsyncdError("master is corrupt")
     if xtr == xtl:
         if path == ".":
             self.close()
         return
     self.xtime_reversion_hook(path, xtl, xtr)
     logging.debug("entering " + path)
     dem = self.master.server.entries(path)
     pargfid = self.master.server.gfid(path)
     if isinstance(pargfid, int):
         logging.warn("skipping directory %s" % (path))
     for e in dem:
         bname = e
         e = os.path.join(path, e)
         st = self.lstat(e)
         if isinstance(st, int):
             logging.warn("%s got purged in the interim.." % e)
             continue
         gfid = self.master.server.gfid(e)
         if isinstance(gfid, int):
             logging.warn("skipping entry %s.." % (e))
             continue
         xte = self.xtime(e)
         if isinstance(xte, int):
             raise GsyncdError("master is corrupt")
         if not self.need_sync(e, xte, xtr):
             continue
         mo = st.st_mode
         if stat.S_ISDIR(mo):
             self.write_entry_change("E", [gfid, "MKDIR", escape(os.path.join(pargfid, bname))])
             self.crawl(e, xtr)
         elif stat.S_ISREG(mo):
             self.write_entry_change("E", [gfid, "CREATE", escape(os.path.join(pargfid, bname))])
             self.write_entry_change("D", [gfid])
         elif stat.S_ISLNK(mo):
             self.write_entry_change("E", [gfid, "SYMLINK", escape(os.path.join(pargfid, bname))])
         else:
             logging.info("ignoring %s" % e)
     if path == ".":
         logging.info("processing xsync changelog %s" % self.fname())
         self.close()
         self.process([self.fname()], done)
         self.upd_stime(xtl)
Esempio n. 2
0
 def _set_checkpt_param(cls, chkpt, prm, val, xtimish=True):
     """use config backend to store a parameter associated
        with checkpoint @chkpt"""
     if xtimish:
         val = cls.serialize_xtime(val)
     gconf.configinterface.set('checkpoint_' + prm,
                               "%s:%s" % (escape(chkpt), val))
Esempio n. 3
0
 def get_url(self, canonical=False, escaped=False):
     if canonical:
         pa = self.canonical_path()
     else:
         pa = self.path
     u = "://".join((self.scheme(), pa))
     if escaped:
         u = syncdutils.escape(u)
     return u
Esempio n. 4
0
 def get_url(self, canonical=False, escaped=False):
     if canonical:
         pa = self.canonical_path()
     else:
         pa = self.path
     u = "://".join((self.scheme(), pa))
     if escaped:
         u = syncdutils.escape(u)
     return u
Esempio n. 5
0
 def get_url(self, canonical=False, escaped=False):
     """format self's url in various styles"""
     if canonical:
         pa = self.canonical_path()
     else:
         pa = self.path
     u = "://".join((self.scheme, pa))
     if escaped:
         u = syncdutils.escape(u)
     return u
Esempio n. 6
0
 def section(self, rx=False):
     peers = self.peers
     if not peers:
         peers = ['.', '.']
         rx = True
     if rx:
         st = 'peersrx'
     else:
         st = 'peers'
     return ' '.join([st] + [escape(u) for u in peers])
Esempio n. 7
0
 def section(self, rx=False):
     peers = self.peers
     if not peers:
         peers = ['.', '.']
         rx = True
     if rx:
         st = 'peersrx'
     else:
         st = 'peers'
     return ' '.join([st] + [escape(u) for u in peers])
Esempio n. 8
0
 def get_url(self, canonical=False, escaped=False):
     """format self's url in various styles"""
     if canonical:
         pa = self.canonical_path()
     else:
         pa = self.path
     u = "://".join((self.scheme, pa))
     if escaped:
         u = syncdutils.escape(u)
     return u
Esempio n. 9
0
 def section(self, rx=False):
     """get the section name of the section representing .peers in .config"""
     peers = self.peers
     if not peers:
         peers = ['.', '.']
         rx = True
     if rx:
         st = 'peersrx'
     else:
         st = 'peers'
     return ' '.join([st] + [escape(u) for u in peers])
 def section(self, rx=False):
     """get the section name of the section representing .peers in .config"""
     peers = self.peers
     if not peers:
         peers = [".", "."]
         rx = True
     if rx:
         st = "peersrx"
     else:
         st = "peers"
     return " ".join([st] + [escape(u) for u in peers])
Esempio n. 11
0
 def section(self, rx=False):
     """get the section name of the section representing .peers
     in .config"""
     peers = self.peers
     if not peers:
         peers = ['.', '.']
         rx = True
     if rx:
         return ' '.join(['peersrx'] + [escape(u) for u in peers])
     else:
         return ' '.join(['peers'] + [u.split(':')[-1] for u in peers])
 def section(self, rx=False):
     """get the section name of the section representing .peers in .config"""
     peers = self.peers
     if not peers:
         peers = ['.', '.']
         rx = True
     if rx:
         st = 'peersrx'
     else:
         st = 'peers'
     return ' '.join([st] + [escape(u) for u in peers])
Esempio n. 13
0
 def _set_checkpt_param(cls, chkpt, prm, val, xtimish=True):
     """use config backend to store a parameter associated
        with checkpoint @chkpt"""
     if xtimish:
         val = cls.serialize_xtime(val)
     gconf.configinterface.set("checkpoint_" + prm, "%s:%s" % (escape(chkpt), val))
Esempio n. 14
0
    def crawl(self, path='.', xtr=None, done=0):
        """ generate a CHANGELOG file consumable by process_change """
        if path == '.':
            self.open()
            self.crawls += 1
        if not xtr:
            # get the root stime and use it for all comparisons
            xtr = self.xtime('.', self.slave)
            if isinstance(xtr, int):
                if xtr != ENOENT:
                    raise GsyncdError('slave is corrupt')
                xtr = self.minus_infinity
        xtl = self.xtime(path)
        if isinstance(xtl, int):
            raise GsyncdError('master is corrupt')
        if xtr == xtl:
            if path == '.':
                self.close()
            return
        self.xtime_reversion_hook(path, xtl, xtr)
        logging.debug("entering " + path)
        dem = self.master.server.entries(path)
        pargfid = self.master.server.gfid(path)
        if isinstance(pargfid, int):
            logging.warn('skipping directory %s' % (path))
        for e in dem:
            bname = e
            e = os.path.join(path, e)
            st = lstat(e)
            if isinstance(st, int):
                logging.warn('%s got purged in the interim..' % e)
                continue
            gfid = self.master.server.gfid(e)
            if isinstance(gfid, int):
                logging.warn('skipping entry %s..' % (e))
                continue
            xte = self.xtime(e)
            if isinstance(xte, int):
                raise GsyncdError('master is corrupt')
            if not self.need_sync(e, xte, xtr):
                continue
            mo = st.st_mode
            if stat.S_ISDIR(mo):
                self.write_entry_change(
                    "E", [gfid, 'MKDIR',
                          escape(os.path.join(pargfid, bname))])
                self.crawl(e, xtr)
            elif stat.S_ISLNK(mo):
                rl = errno_wrap(os.readlink, [en], [ENOENT])
                if isinstance(rl, int):
                    continue
                self.write_entry_change("E", [
                    gfid, 'SYMLINK',
                    escape(os.path.join(pargfid, bname)), rl
                ])
            else:
                # if a file has a hardlink, create a Changelog entry as 'LINK' so the slave
                # side will decide if to create the new entry, or to create link.
                if st.st_nlink == 1:
                    self.write_entry_change(
                        "E",
                        [gfid, 'MKNOD',
                         escape(os.path.join(pargfid, bname))])
                else:
                    self.write_entry_change(
                        "E",
                        [gfid, 'LINK',
                         escape(os.path.join(pargfid, bname))])
                if stat.S_ISREG(mo):
                    self.write_entry_change("D", [gfid])

        if path == '.':
            logging.info('processing xsync changelog %s' % self.fname())
            self.close()
            self.process([self.fname()], done)
            self.upd_stime(xtl)
Esempio n. 15
0
def main():
    rconf.starttime = time.time()

    # If old Glusterd sends commands in old format, below function
    # converts the sys.argv to new format. This conversion is added
    # temporarily for backward compatibility. This can be removed
    # once integrated with Glusterd2
    # This modifies sys.argv globally, so rest of the code works as usual
    argsupgrade.upgrade()

    # Default argparse version handler prints to stderr, which is fixed in
    # 3.x series but not in 2.x, using custom parser to fix this issue
    if "--version" in sys.argv:
        print(GSYNCD_VERSION)
        sys.exit(0)

    parser = ArgumentParser()
    parser.add_argument("--inet6", action="store_true")
    sp = parser.add_subparsers(dest="subcmd")

    # Monitor Status File update
    p = sp.add_parser("monitor-status")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave details user@host::vol format")
    p.add_argument("status", help="Update Monitor Status")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument("--debug", action="store_true")

    # Monitor
    p = sp.add_parser("monitor")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave details user@host::vol format")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument("--pause-on-start",
                   action="store_true",
                   help="Start with Paused state")
    p.add_argument("--local-node-id", help="Local Node ID")
    p.add_argument("--debug", action="store_true")
    p.add_argument("--use-gconf-volinfo", action="store_true")

    # Worker
    p = sp.add_parser("worker")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave details user@host::vol format")
    p.add_argument("--local-path", help="Local Brick Path")
    p.add_argument("--feedback-fd",
                   type=int,
                   help="feedback fd between monitor and worker")
    p.add_argument("--local-node", help="Local master node")
    p.add_argument("--local-node-id", help="Local Node ID")
    p.add_argument("--rpc-fd",
                   help="Read and Write fds for worker-agent communication")
    p.add_argument("--subvol-num", type=int, help="Subvolume number")
    p.add_argument("--is-hottier",
                   action="store_true",
                   help="Is this brick part of hot tier")
    p.add_argument("--resource-remote",
                   help="Remote node to connect to Slave Volume")
    p.add_argument("--resource-remote-id",
                   help="Remote node ID to connect to Slave Volume")
    p.add_argument("--slave-id", help="Slave Volume ID")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument("--debug", action="store_true")

    # Agent
    p = sp.add_parser("agent")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave details user@host::vol format")
    p.add_argument("--local-path", help="Local brick path")
    p.add_argument("--local-node", help="Local master node")
    p.add_argument("--local-node-id", help="Local Node ID")
    p.add_argument("--slave-id", help="Slave Volume ID")
    p.add_argument("--rpc-fd",
                   help="Read and Write fds for worker-agent communication")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument("--debug", action="store_true")

    # Slave
    p = sp.add_parser("slave")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave details user@host::vol format")
    p.add_argument("--session-owner")
    p.add_argument("--master-brick",
                   help="Master brick which is connected to the Slave")
    p.add_argument("--master-node",
                   help="Master node which is connected to the Slave")
    p.add_argument("--master-node-id",
                   help="Master node ID which is connected to the Slave")
    p.add_argument("--local-node", help="Local Slave node")
    p.add_argument("--local-node-id", help="Local Slave ID")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument("--debug", action="store_true")

    # All configurations which are configured via "slave-" options
    # DO NOT add default values for these configurations, default values
    # will be picked from template config file
    p.add_argument("--slave-timeout",
                   type=int,
                   help="Timeout to end gsyncd at Slave side")
    p.add_argument("--use-rsync-xattrs", action="store_true")
    p.add_argument("--slave-log-level", help="Slave Gsyncd Log level")
    p.add_argument("--slave-gluster-log-level",
                   help="Slave Gluster mount Log level")
    p.add_argument("--slave-gluster-command-dir",
                   help="Directory where Gluster binaries exist on slave")
    p.add_argument("--slave-access-mount",
                   action="store_true",
                   help="Do not lazy umount the slave volume")

    # Status
    p = sp.add_parser("status")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument("--local-path", help="Local Brick Path")
    p.add_argument("--debug", action="store_true")
    p.add_argument("--json", action="store_true")

    # Config-check
    p = sp.add_parser("config-check")
    p.add_argument("name", help="Config Name")
    p.add_argument("--value", help="Config Value")
    p.add_argument("--debug", action="store_true")

    # Config-get
    p = sp.add_parser("config-get")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave")
    p.add_argument("--name", help="Config Name")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument("--debug", action="store_true")
    p.add_argument("--show-defaults", action="store_true")
    p.add_argument("--only-value", action="store_true")
    p.add_argument("--use-underscore", action="store_true")
    p.add_argument("--json", action="store_true")

    # Config-set
    p = sp.add_parser("config-set")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave")
    p.add_argument("-n", "--name", help="Config Name")
    p.add_argument("-v", "--value", help="Config Value")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument("--debug", action="store_true")

    # Config-reset
    p = sp.add_parser("config-reset")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave")
    p.add_argument("name", help="Config Name")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument("--debug", action="store_true")

    # voluuidget
    p = sp.add_parser("voluuidget")
    p.add_argument("host", help="Hostname")
    p.add_argument("volname", help="Volume Name")
    p.add_argument("--debug", action="store_true")

    # Delete
    p = sp.add_parser("delete")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument('--path', dest='paths', action="append")
    p.add_argument("--reset-sync-time",
                   action="store_true",
                   help="Reset Sync Time")
    p.add_argument("--debug", action="store_true")

    # Parse arguments
    args = parser.parse_args()

    # Extra template values, All arguments are already part of template
    # variables, use this for adding extra variables
    extra_tmpl_args = {}

    # Add First/Primary Slave host, user and volume
    if getattr(args, "slave", None) is not None:
        hostdata, slavevol = args.slave.split("::")
        hostdata = hostdata.split("@")
        slavehost = hostdata[-1]
        slaveuser = "******"
        if len(hostdata) == 2:
            slaveuser = hostdata[0]
        extra_tmpl_args["primary_slave_host"] = slavehost
        extra_tmpl_args["slaveuser"] = slaveuser
        extra_tmpl_args["slavevol"] = slavevol

    # Add Bricks encoded path
    if getattr(args, "local_path", None) is not None:
        extra_tmpl_args["local_id"] = escape(args.local_path)

    # Add Master Bricks encoded path(For Slave)
    if getattr(args, "master_brick", None) is not None:
        extra_tmpl_args["master_brick_id"] = escape(args.master_brick)

    # Load configurations
    config_file = getattr(args, "config_file", None)

    # Subcmd accepts config file argument but not passed
    # Set default path for config file in that case
    # If an subcmd accepts config file then it also accepts
    # master and Slave arguments.
    if config_file is None and hasattr(args, "config_file"):
        config_file = "%s/geo-replication/%s_%s_%s/gsyncd.conf" % (
            GLUSTERD_WORKDIR, args.master,
            extra_tmpl_args["primary_slave_host"], extra_tmpl_args["slavevol"])

    # If Config file path not exists, log error and continue using default conf
    config_file_error_msg = None
    if config_file is not None and not os.path.exists(config_file):
        # Logging not yet initialized, create the error message to
        # log later and reset the config_file to None
        config_file_error_msg = lf(
            "Session config file not exists, using the default config",
            path=config_file)
        config_file = None

    rconf.config_file = config_file

    # Override gconf values from argument values only if it is slave gsyncd
    override_from_args = False
    if args.subcmd == "slave":
        override_from_args = True

    if args.subcmd == "monitor":
        ret = gconf.is_config_file_old(config_file, args.master,
                                       extra_tmpl_args["slavevol"])
        if ret is not None:
            gconf.config_upgrade(config_file, ret)

    # Load Config file
    gconf.load(GLUSTERFS_CONFDIR + "/gsyncd.conf", config_file, vars(args),
               extra_tmpl_args, override_from_args)

    # Default label to print in log file
    label = args.subcmd
    if args.subcmd in ("worker", "agent"):
        # If Worker or agent, then add brick path also to label
        label = "%s %s" % (args.subcmd, args.local_path)
    elif args.subcmd == "slave":
        # If Slave add Master node and Brick details
        label = "%s %s%s" % (args.subcmd, args.master_node, args.master_brick)

    # Setup Logger
    # Default log file
    log_file = gconf.get("cli-log-file")
    log_level = gconf.get("cli-log-level")
    if getattr(args, "master", None) is not None and \
       getattr(args, "slave", None) is not None:
        log_file = gconf.get("log-file")
        log_level = gconf.get("log-level")

    # Use different log file location for Slave log file
    if args.subcmd == "slave":
        log_file = gconf.get("slave-log-file")
        log_level = gconf.get("slave-log-level")

    if args.debug:
        log_file = "-"
        log_level = "DEBUG"

    # Create Logdir if not exists
    try:
        if log_file != "-":
            os.mkdir(os.path.dirname(log_file))
    except OSError as e:
        if e.errno != EEXIST:
            raise

    setup_logging(log_file=log_file, level=log_level, label=label)

    if config_file_error_msg is not None:
        logging.warn(config_file_error_msg)

    # Log message for loaded config file
    if config_file is not None:
        logging.info(lf("Using session config file", path=config_file))

    set_term_handler()
    excont = FreeObject(exval=0)

    # Gets the function name based on the input argument. For example
    # if subcommand passed as argument is monitor then it looks for
    # function with name "subcmd_monitor" in subcmds file
    func = getattr(subcmds, "subcmd_" + args.subcmd.replace("-", "_"), None)

    try:
        try:
            if func is not None:
                rconf.args = args
                func(args)
        except:
            log_raise_exception(excont)
    finally:
        finalize(exval=excont.exval)
Esempio n. 16
0
    def monitor(self, w, argv, cpids, agents, slave_vol, slave_host, master):
        """the monitor loop

        Basic logic is a blantantly simple blunt heuristics:
        if spawned client survives 60 secs, it's considered OK.
        This servers us pretty well as it's not vulneralbe to
        any kind of irregular behavior of the child...

        ... well, except for one: if children is hung up on
        waiting for some event, it can survive aeons, still
        will be defunct. So we tweak the above logic to
        expect the worker to send us a signal within 60 secs
        (in the form of closing its end of a pipe). The worker
        does this when it's done with the setup stage
        ready to enter the service loop (note it's the setup
        stage which is vulnerable to hangs -- the full
        blown worker blows up on EPIPE if the net goes down,
        due to the keep-alive thread)
        """
        if not self.status.get(w[0]['dir'], None):
            self.status[w[0]['dir']] = GeorepStatus(gconf.state_file,
                                                    w[0]['host'],
                                                    w[0]['dir'],
                                                    w[0]['uuid'],
                                                    master,
                                                    "%s::%s" % (slave_host,
                                                                slave_vol))

        set_monitor_status(gconf.state_file, self.ST_STARTED)
        self.status[w[0]['dir']].set_worker_status(self.ST_INIT)

        ret = 0

        def nwait(p, o=0):
            try:
                p2, r = waitpid(p, o)
                if not p2:
                    return
                return r
            except OSError as e:
                # no child process, this happens if the child process
                # already died and has been cleaned up
                if e.errno == ECHILD:
                    return -1
                else:
                    raise

        def exit_signalled(s):
            """ child teminated due to receipt of SIGUSR1 """
            return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1))

        def exit_status(s):
            if os.WIFEXITED(s):
                return os.WEXITSTATUS(s)
            return 1

        conn_timeout = int(gconf.connection_timeout)
        while ret in (0, 1):
            remote_host = w[1]
            # Check the status of the connected slave node
            # If the connected slave node is down then try to connect to
            # different up node.
            m = re.match("(ssh|gluster|file):\/\/(.+)@([^:]+):(.+)",
                         remote_host)
            if m:
                current_slave_host = m.group(3)
                slave_up_hosts = get_slave_bricks_status(
                    slave_host, slave_vol)

                if current_slave_host not in slave_up_hosts:
                    if len(slave_up_hosts) > 0:
                        remote_host = "%s://%s@%s:%s" % (m.group(1),
                                                         m.group(2),
                                                         random.choice(
                                                             slave_up_hosts),
                                                         m.group(4))

            # Spawn the worker and agent in lock to avoid fd leak
            self.lock.acquire()

            logging.info('starting gsyncd worker(%s). Slave node: %s' %
                         (w[0]['dir'], remote_host))

            # Couple of pipe pairs for RPC communication b/w
            # worker and changelog agent.

            # read/write end for agent
            (ra, ww) = os.pipe()
            # read/write end for worker
            (rw, wa) = os.pipe()

            # spawn the agent process
            apid = os.fork()
            if apid == 0:
                os.close(rw)
                os.close(ww)
                os.execv(sys.executable, argv + ['--local-path', w[0]['dir'],
                                                 '--local-node', w[0]['host'],
                                                 '--local-node-id',
                                                 w[0]['uuid'],
                                                 '--agent',
                                                 '--rpc-fd',
                                                 ','.join([str(ra), str(wa),
                                                           str(rw), str(ww)])])
            pr, pw = os.pipe()
            cpid = os.fork()
            if cpid == 0:
                os.close(pr)
                os.close(ra)
                os.close(wa)
                os.execv(sys.executable, argv + ['--feedback-fd', str(pw),
                                                 '--local-path', w[0]['dir'],
                                                 '--local-node', w[0]['host'],
                                                 '--local-node-id',
                                                 w[0]['uuid'],
                                                 '--local-id',
                                                 '.' + escape(w[0]['dir']),
                                                 '--rpc-fd',
                                                 ','.join([str(rw), str(ww),
                                                           str(ra), str(wa)]),
                                                 '--subvol-num', str(w[2])] +
                         (['--is-hottier'] if w[3] else []) +
                         ['--resource-remote', remote_host])

            cpids.add(cpid)
            agents.add(apid)
            os.close(pw)

            # close all RPC pipes in monitor
            os.close(ra)
            os.close(wa)
            os.close(rw)
            os.close(ww)
            self.lock.release()

            t0 = time.time()
            so = select((pr,), (), (), conn_timeout)[0]
            os.close(pr)

            if so:
                ret = nwait(cpid, os.WNOHANG)
                ret_agent = nwait(apid, os.WNOHANG)

                if ret_agent is not None:
                    # Agent is died Kill Worker
                    logging.info("Changelog Agent died, "
                                 "Aborting Worker(%s)" % w[0]['dir'])
                    errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
                    nwait(cpid)
                    nwait(apid)

                if ret is not None:
                    logging.info("worker(%s) died before establishing "
                                 "connection" % w[0]['dir'])
                    nwait(apid)  # wait for agent
                else:
                    logging.debug("worker(%s) connected" % w[0]['dir'])
                    while time.time() < t0 + conn_timeout:
                        ret = nwait(cpid, os.WNOHANG)
                        ret_agent = nwait(apid, os.WNOHANG)

                        if ret is not None:
                            logging.info("worker(%s) died in startup "
                                         "phase" % w[0]['dir'])
                            nwait(apid)  # wait for agent
                            break

                        if ret_agent is not None:
                            # Agent is died Kill Worker
                            logging.info("Changelog Agent died, Aborting "
                                         "Worker(%s)" % w[0]['dir'])
                            errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
                            nwait(cpid)
                            nwait(apid)
                            break

                        time.sleep(1)
            else:
                logging.info("worker(%s) not confirmed in %d sec, aborting it. "
                             "Gsyncd invocation on remote slave via SSH or "
                             "gluster master mount might have hung. Please "
                             "check the above logs for exact issue and check "
                             "master or slave volume for errors. Restarting "
                             "master/slave volume accordingly might help."
                             % (w[0]['dir'], conn_timeout))
                errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
                nwait(apid)  # wait for agent
                ret = nwait(cpid)
            if ret is None:
                self.status[w[0]['dir']].set_worker_status(self.ST_STABLE)
                # If worker dies, agent terminates on EOF.
                # So lets wait for agent first.
                nwait(apid)
                ret = nwait(cpid)
            if exit_signalled(ret):
                ret = 0
            else:
                ret = exit_status(ret)
                if ret in (0, 1):
                    self.status[w[0]['dir']].set_worker_status(self.ST_FAULTY)
                    gf_event(EVENT_GEOREP_FAULTY,
                             master_volume=master.volume,
                             master_node=w[0]['host'],
                             master_node_id=w[0]['uuid'],
                             slave_host=slave_host,
                             slave_volume=slave_vol,
                             current_slave_host=current_slave_host,
                             brick_path=w[0]['dir'])
            time.sleep(10)
        self.status[w[0]['dir']].set_worker_status(self.ST_INCON)
        return ret
Esempio n. 17
0
    def monitor(self, w, argv, cpids, agents, slave_vol, slave_host):
        """the monitor loop

        Basic logic is a blantantly simple blunt heuristics:
        if spawned client survives 60 secs, it's considered OK.
        This servers us pretty well as it's not vulneralbe to
        any kind of irregular behavior of the child...

        ... well, except for one: if children is hung up on
        waiting for some event, it can survive aeons, still
        will be defunct. So we tweak the above logic to
        expect the worker to send us a signal within 60 secs
        (in the form of closing its end of a pipe). The worker
        does this when it's done with the setup stage
        ready to enter the service loop (note it's the setup
        stage which is vulnerable to hangs -- the full
        blown worker blows up on EPIPE if the net goes down,
        due to the keep-alive thread)
        """

        self.set_state(self.ST_INIT, w)

        ret = 0

        def nwait(p, o=0):
            p2, r = waitpid(p, o)
            if not p2:
                return
            return r

        def exit_signalled(s):
            """ child teminated due to receipt of SIGUSR1 """
            return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1))

        def exit_status(s):
            if os.WIFEXITED(s):
                return os.WEXITSTATUS(s)
            return 1

        conn_timeout = int(gconf.connection_timeout)
        while ret in (0, 1):
            remote_host = w[1]
            # Check the status of the connected slave node
            # If the connected slave node is down then try to connect to
            # different up node.
            m = re.match("(ssh|gluster|file):\/\/(.+)@([^:]+):(.+)",
                         remote_host)
            if m:
                current_slave_host = m.group(3)
                slave_up_hosts = get_slave_bricks_status(slave_host, slave_vol)

                if current_slave_host not in slave_up_hosts:
                    if len(slave_up_hosts) > 0:
                        remote_host = "%s://%s@%s:%s" % (m.group(1), m.group(
                            2), random.choice(slave_up_hosts), m.group(4))

            # Spawn the worker and agent in lock to avoid fd leak
            self.lock.acquire()

            logging.info('-' * conn_timeout)
            logging.info('starting gsyncd worker')

            # Couple of pipe pairs for RPC communication b/w
            # worker and changelog agent.

            # read/write end for agent
            (ra, ww) = os.pipe()
            # read/write end for worker
            (rw, wa) = os.pipe()

            # spawn the agent process
            apid = os.fork()
            if apid == 0:
                os.execv(
                    sys.executable, argv + [
                        '--local-path', w[0], '--agent', '--rpc-fd', ','.join(
                            [str(ra), str(wa),
                             str(rw), str(ww)])
                    ])
            pr, pw = os.pipe()
            cpid = os.fork()
            if cpid == 0:
                os.close(pr)
                os.execv(
                    sys.executable, argv + [
                        '--feedback-fd',
                        str(pw), '--local-path', w[0], '--local-id',
                        '.' + escape(w[0]), '--rpc-fd', ','.join([
                            str(rw), str(ww),
                            str(ra), str(wa)
                        ]), '--resource-remote', remote_host
                    ])

            cpids.add(cpid)
            agents.add(apid)
            os.close(pw)

            # close all RPC pipes in monitor
            os.close(ra)
            os.close(wa)
            os.close(rw)
            os.close(ww)
            self.lock.release()

            t0 = time.time()
            so = select((pr, ), (), (), conn_timeout)[0]
            os.close(pr)

            if so:
                ret = nwait(cpid, os.WNOHANG)
                if ret is not None:
                    logging.info("worker(%s) died before establishing "
                                 "connection" % w[0])
                    nwait(apid)  #wait for agent
                else:
                    logging.debug("worker(%s) connected" % w[0])
                    while time.time() < t0 + conn_timeout:
                        ret = nwait(cpid, os.WNOHANG)
                        if ret is not None:
                            logging.info("worker(%s) died in startup "
                                         "phase" % w[0])
                            nwait(apid)  #wait for agent
                            break
                        time.sleep(1)
            else:
                logging.info("worker(%s) not confirmed in %d sec, "
                             "aborting it" % (w[0], conn_timeout))
                os.kill(cpid, signal.SIGKILL)
                nwait(apid)  #wait for agent
                ret = nwait(cpid)
            if ret is None:
                self.set_state(self.ST_STABLE, w)
                #If worker dies, agent terminates on EOF.
                #So lets wait for agent first.
                nwait(apid)
                ret = nwait(cpid)
            if exit_signalled(ret):
                ret = 0
            else:
                ret = exit_status(ret)
                if ret in (0, 1):
                    self.set_state(self.ST_FAULTY, w)
            time.sleep(10)
        self.set_state(self.ST_INCON, w)
        return ret
Esempio n. 18
0
    def monitor(self, w, argv, cpids):
        """the monitor loop

        Basic logic is a blantantly simple blunt heuristics:
        if spawned client survives 60 secs, it's considered OK.
        This servers us pretty well as it's not vulneralbe to
        any kind of irregular behavior of the child...

        ... well, except for one: if children is hung up on
        waiting for some event, it can survive aeons, still
        will be defunct. So we tweak the above logic to
        expect the worker to send us a signal within 60 secs
        (in the form of closing its end of a pipe). The worker
        does this when it's done with the setup stage
        ready to enter the service loop (note it's the setup
        stage which is vulnerable to hangs -- the full
        blown worker blows up on EPIPE if the net goes down,
        due to the keep-alive thread)
        """

        self.set_state(self.ST_INIT, w)
        ret = 0

        def nwait(p, o=0):
            p2, r = waitpid(p, o)
            if not p2:
                return
            return r

        def exit_signalled(s):
            """ child teminated due to receipt of SIGUSR1 """
            return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1))

        def exit_status(s):
            if os.WIFEXITED(s):
                return os.WEXITSTATUS(s)
            return 1
        conn_timeout = int(gconf.connection_timeout)
        while ret in (0, 1):
            logging.info('-' * conn_timeout)
            logging.info('starting gsyncd worker')
            pr, pw = os.pipe()
            cpid = os.fork()
            if cpid == 0:
                os.close(pr)
                os.execv(sys.executable, argv + ['--feedback-fd', str(pw),
                                                 '--local-path', w[0],
                                                 '--local-id',
                                                 '.' + escape(w[0]),
                                                 '--resource-remote', w[1]])
            self.lock.acquire()
            cpids.add(cpid)
            self.lock.release()
            os.close(pw)
            t0 = time.time()
            so = select((pr,), (), (), conn_timeout)[0]
            os.close(pr)
            if so:
                ret = nwait(cpid, os.WNOHANG)
                if ret is not None:
                    logging.info("worker(%s) died before establishing "
                                 "connection" % w[0])
                else:
                    logging.debug("worker(%s) connected" % w[0])
                    while time.time() < t0 + conn_timeout:
                        ret = nwait(cpid, os.WNOHANG)
                        if ret is not None:
                            logging.info("worker(%s) died in startup "
                                         "phase" % w[0])
                            break
                        time.sleep(1)
            else:
                logging.info("worker(%s) not confirmed in %d sec, "
                             "aborting it" % (w[0], conn_timeout))
                os.kill(cpid, signal.SIGKILL)
                ret = nwait(cpid)
            if ret is None:
                self.set_state(self.ST_STABLE, w)
                ret = nwait(cpid)
            if exit_signalled(ret):
                ret = 0
            else:
                ret = exit_status(ret)
                if ret in (0, 1):
                    self.set_state(self.ST_FAULTY, w)
            time.sleep(10)
        self.set_state(self.ST_INCON, w)
        return ret
Esempio n. 19
0
    def monitor(self, w, argv, cpids, agents, slave_vol, slave_host, master):
        """the monitor loop

        Basic logic is a blantantly simple blunt heuristics:
        if spawned client survives 60 secs, it's considered OK.
        This servers us pretty well as it's not vulneralbe to
        any kind of irregular behavior of the child...

        ... well, except for one: if children is hung up on
        waiting for some event, it can survive aeons, still
        will be defunct. So we tweak the above logic to
        expect the worker to send us a signal within 60 secs
        (in the form of closing its end of a pipe). The worker
        does this when it's done with the setup stage
        ready to enter the service loop (note it's the setup
        stage which is vulnerable to hangs -- the full
        blown worker blows up on EPIPE if the net goes down,
        due to the keep-alive thread)
        """
        if not self.status.get(w[0], None):
            self.status[w[0]] = GeorepStatus(gconf.state_file, w[0])

        set_monitor_status(gconf.state_file, self.ST_STARTED)
        self.status[w[0]].set_worker_status(self.ST_INIT)

        ret = 0

        def nwait(p, o=0):
            try:
                p2, r = waitpid(p, o)
                if not p2:
                    return
                return r
            except OSError as e:
                # no child process, this happens if the child process
                # already died and has been cleaned up
                if e.errno == ECHILD:
                    return -1
                else:
                    raise

        def exit_signalled(s):
            """ child teminated due to receipt of SIGUSR1 """
            return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1))

        def exit_status(s):
            if os.WIFEXITED(s):
                return os.WEXITSTATUS(s)
            return 1

        conn_timeout = int(gconf.connection_timeout)
        while ret in (0, 1):
            remote_host = w[1]
            # Check the status of the connected slave node
            # If the connected slave node is down then try to connect to
            # different up node.
            m = re.match("(ssh|gluster|file):\/\/(.+)@([^:]+):(.+)",
                         remote_host)
            if m:
                current_slave_host = m.group(3)
                slave_up_hosts = get_slave_bricks_status(
                    slave_host, slave_vol)

                if current_slave_host not in slave_up_hosts:
                    if len(slave_up_hosts) > 0:
                        remote_host = "%s://%s@%s:%s" % (m.group(1),
                                                         m.group(2),
                                                         random.choice(
                                                             slave_up_hosts),
                                                         m.group(4))

            # Spawn the worker and agent in lock to avoid fd leak
            self.lock.acquire()

            logging.info('-' * conn_timeout)
            logging.info('starting gsyncd worker')

            # Couple of pipe pairs for RPC communication b/w
            # worker and changelog agent.

            # read/write end for agent
            (ra, ww) = os.pipe()
            # read/write end for worker
            (rw, wa) = os.pipe()

            # spawn the agent process
            apid = os.fork()
            if apid == 0:
                os.close(rw)
                os.close(ww)
                os.execv(sys.executable, argv + ['--local-path', w[0],
                                                 '--agent',
                                                 '--rpc-fd',
                                                 ','.join([str(ra), str(wa),
                                                           str(rw), str(ww)])])
            pr, pw = os.pipe()
            cpid = os.fork()
            if cpid == 0:
                os.close(pr)
                os.close(ra)
                os.close(wa)
                os.execv(sys.executable, argv + ['--feedback-fd', str(pw),
                                                 '--local-path', w[0],
                                                 '--local-id',
                                                 '.' + escape(w[0]),
                                                 '--rpc-fd',
                                                 ','.join([str(rw), str(ww),
                                                           str(ra), str(wa)]),
                                                 '--subvol-num', str(w[2])] +
                         (['--is-hottier'] if w[3] else []) +
                         ['--resource-remote', remote_host])

            cpids.add(cpid)
            agents.add(apid)
            os.close(pw)

            # close all RPC pipes in monitor
            os.close(ra)
            os.close(wa)
            os.close(rw)
            os.close(ww)
            self.lock.release()

            t0 = time.time()
            so = select((pr,), (), (), conn_timeout)[0]
            os.close(pr)

            if so:
                ret = nwait(cpid, os.WNOHANG)
                ret_agent = nwait(apid, os.WNOHANG)

                if ret_agent is not None:
                    # Agent is died Kill Worker
                    logging.info("Changelog Agent died, "
                                 "Aborting Worker(%s)" % w[0])
                    errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
                    nwait(cpid)
                    nwait(apid)

                if ret is not None:
                    logging.info("worker(%s) died before establishing "
                                 "connection" % w[0])
                    nwait(apid)  # wait for agent
                else:
                    logging.debug("worker(%s) connected" % w[0])
                    while time.time() < t0 + conn_timeout:
                        ret = nwait(cpid, os.WNOHANG)
                        ret_agent = nwait(apid, os.WNOHANG)

                        if ret is not None:
                            logging.info("worker(%s) died in startup "
                                         "phase" % w[0])
                            nwait(apid)  # wait for agent
                            break

                        if ret_agent is not None:
                            # Agent is died Kill Worker
                            logging.info("Changelog Agent died, Aborting "
                                         "Worker(%s)" % w[0])
                            errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
                            nwait(cpid)
                            nwait(apid)
                            break

                        time.sleep(1)
            else:
                logging.info("worker(%s) not confirmed in %d sec, "
                             "aborting it" % (w[0], conn_timeout))
                errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
                nwait(apid)  # wait for agent
                ret = nwait(cpid)
            if ret is None:
                self.status[w[0]].set_worker_status(self.ST_STABLE)
                # If worker dies, agent terminates on EOF.
                # So lets wait for agent first.
                nwait(apid)
                ret = nwait(cpid)
            if exit_signalled(ret):
                ret = 0
            else:
                ret = exit_status(ret)
                if ret in (0, 1):
                    self.status[w[0]].set_worker_status(self.ST_FAULTY)
            time.sleep(10)
        self.status[w[0]].set_worker_status(self.ST_INCON)
        return ret
Esempio n. 20
0
    def crawl(self, path='.', xtr=None, done=0):
        """ generate a CHANGELOG file consumable by process_change """
        if path == '.':
            self.open()
            self.crawls += 1
        if not xtr:
            # get the root stime and use it for all comparisons
            xtr = self.xtime('.', self.slave)
            if isinstance(xtr, int):
                if xtr != ENOENT:
                    raise GsyncdError('slave is corrupt')
                xtr = self.minus_infinity
        xtl = self.xtime(path)
        if isinstance(xtl, int):
            raise GsyncdError('master is corrupt')
        if xtr == xtl:
            if path == '.':
                self.close()
            return
        self.xtime_reversion_hook(path, xtl, xtr)
        logging.debug("entering " + path)
        dem = self.master.server.entries(path)
        pargfid = self.master.server.gfid(path)
        if isinstance(pargfid, int):
            logging.warn('skipping directory %s' % (path))
        for e in dem:
            bname = e
            e = os.path.join(path, e)
            st = lstat(e)
            if isinstance(st, int):
                logging.warn('%s got purged in the interim..' % e)
                continue
            gfid = self.master.server.gfid(e)
            if isinstance(gfid, int):
                logging.warn('skipping entry %s..' % (e))
                continue
            xte = self.xtime(e)
            if isinstance(xte, int):
                raise GsyncdError('master is corrupt')
            if not self.need_sync(e, xte, xtr):
                continue
            mo = st.st_mode
            if stat.S_ISDIR(mo):
                self.write_entry_change("E", [gfid, 'MKDIR', escape(os.path.join(pargfid, bname))])
                self.crawl(e, xtr)
            elif stat.S_ISLNK(mo):
                rl = errno_wrap(os.readlink, [en], [ENOENT])
                if isinstance(rl, int):
                    continue
                self.write_entry_change("E", [gfid, 'SYMLINK', escape(os.path.join(pargfid, bname)), rl])
            else:
                # if a file has a hardlink, create a Changelog entry as 'LINK' so the slave
                # side will decide if to create the new entry, or to create link.
                if st.st_nlink == 1:
                    self.write_entry_change("E", [gfid, 'MKNOD', escape(os.path.join(pargfid, bname))])
                else:
                    self.write_entry_change("E", [gfid, 'LINK', escape(os.path.join(pargfid, bname))])
                if stat.S_ISREG(mo):
                    self.write_entry_change("D", [gfid])

        if path == '.':
            logging.info('processing xsync changelog %s' % self.fname())
            self.close()
            self.process([self.fname()], done)
            self.upd_stime(xtl)
Esempio n. 21
0
    def monitor(self, w, argv, cpids, agents):
        """the monitor loop

        Basic logic is a blantantly simple blunt heuristics:
        if spawned client survives 60 secs, it's considered OK.
        This servers us pretty well as it's not vulneralbe to
        any kind of irregular behavior of the child...

        ... well, except for one: if children is hung up on
        waiting for some event, it can survive aeons, still
        will be defunct. So we tweak the above logic to
        expect the worker to send us a signal within 60 secs
        (in the form of closing its end of a pipe). The worker
        does this when it's done with the setup stage
        ready to enter the service loop (note it's the setup
        stage which is vulnerable to hangs -- the full
        blown worker blows up on EPIPE if the net goes down,
        due to the keep-alive thread)
        """

        if gconf.pause_on_start:
            self.set_state(self.ST_INIT_PAUSE, w)
        else:
            self.set_state(self.ST_INIT, w)

        ret = 0

        def nwait(p, o=0):
            p2, r = waitpid(p, o)
            if not p2:
                return
            return r

        def exit_signalled(s):
            """ child teminated due to receipt of SIGUSR1 """
            return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1))

        def exit_status(s):
            if os.WIFEXITED(s):
                return os.WEXITSTATUS(s)
            return 1
        conn_timeout = int(gconf.connection_timeout)
        while ret in (0, 1):
            logging.info('-' * conn_timeout)
            logging.info('starting gsyncd worker')

            # Couple of pipe pairs for RPC communication b/w
            # worker and changelog agent.

            # read/write end for agent
            (ra, ww) = os.pipe()
            # read/write end for worker
            (rw, wa) = os.pipe()

            # spawn the agent process
            apid = os.fork()
            if apid == 0:
                os.execv(sys.executable, argv + ['--local-path', w[0],
                                                 '--agent',
                                                 '--rpc-fd',
                                                 ','.join([str(ra), str(wa),
                                                           str(rw), str(ww)])])
            pr, pw = os.pipe()
            cpid = os.fork()
            if cpid == 0:
                os.close(pr)
                os.execv(sys.executable, argv + ['--feedback-fd', str(pw),
                                                 '--local-path', w[0],
                                                 '--local-id',
                                                 '.' + escape(w[0]),
                                                 '--rpc-fd',
                                                 ','.join([str(rw), str(ww),
                                                           str(ra), str(wa)]),
                                                 '--resource-remote', w[1]])
            self.lock.acquire()
            cpids.add(cpid)
            agents.add(apid)
            self.lock.release()
            os.close(pw)

            t0 = time.time()
            so = select((pr,), (), (), conn_timeout)[0]
            os.close(pr)

            # close all RPC pipes in monitor
            os.close(ra)
            os.close(wa)
            os.close(rw)
            os.close(ww)

            if so:
                ret = nwait(cpid, os.WNOHANG)
                if ret is not None:
                    logging.info("worker(%s) died before establishing "
                                 "connection" % w[0])
                else:
                    logging.debug("worker(%s) connected" % w[0])
                    while time.time() < t0 + conn_timeout:
                        ret = nwait(cpid, os.WNOHANG)
                        if ret is not None:
                            logging.info("worker(%s) died in startup "
                                         "phase" % w[0])
                            break
                        time.sleep(1)
            else:
                logging.info("worker(%s) not confirmed in %d sec, "
                             "aborting it" % (w[0], conn_timeout))
                os.kill(cpid, signal.SIGKILL)
                ret = nwait(cpid)
            if ret is None:
                self.set_state(self.ST_STABLE, w)
                ret = nwait(cpid)
            if exit_signalled(ret):
                ret = 0
            else:
                ret = exit_status(ret)
                if ret in (0, 1):
                    self.set_state(self.ST_FAULTY, w)
            time.sleep(10)
        self.set_state(self.ST_INCON, w)
        return ret
Esempio n. 22
0
def main():
    rconf.starttime = time.time()

    # If old Glusterd sends commands in old format, below function
    # converts the sys.argv to new format. This conversion is added
    # temporarily for backward compatibility. This can be removed
    # once integrated with Glusterd2
    # This modifies sys.argv globally, so rest of the code works as usual
    argsupgrade.upgrade()

    # Default argparse version handler prints to stderr, which is fixed in
    # 3.x series but not in 2.x, using custom parser to fix this issue
    if "--version" in sys.argv:
        print(GSYNCD_VERSION)
        sys.exit(0)

    parser = ArgumentParser()
    parser.add_argument("--inet6", action="store_true")
    sp = parser.add_subparsers(dest="subcmd")

    # Monitor Status File update
    p = sp.add_parser("monitor-status")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave details user@host::vol format")
    p.add_argument("status", help="Update Monitor Status")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument("--debug", action="store_true")

    # Monitor
    p = sp.add_parser("monitor")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave details user@host::vol format")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument("--pause-on-start",
                   action="store_true",
                   help="Start with Paused state")
    p.add_argument("--local-node-id", help="Local Node ID")
    p.add_argument("--debug", action="store_true")
    p.add_argument("--use-gconf-volinfo", action="store_true")

    # Worker
    p = sp.add_parser("worker")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave details user@host::vol format")
    p.add_argument("--local-path", help="Local Brick Path")
    p.add_argument("--feedback-fd", type=int,
                   help="feedback fd between monitor and worker")
    p.add_argument("--local-node", help="Local master node")
    p.add_argument("--local-node-id", help="Local Node ID")
    p.add_argument("--rpc-fd",
                   help="Read and Write fds for worker-agent communication")
    p.add_argument("--subvol-num", type=int, help="Subvolume number")
    p.add_argument("--is-hottier", action="store_true",
                   help="Is this brick part of hot tier")
    p.add_argument("--resource-remote",
                   help="Remote node to connect to Slave Volume")
    p.add_argument("--resource-remote-id",
                   help="Remote node ID to connect to Slave Volume")
    p.add_argument("--slave-id", help="Slave Volume ID")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument("--debug", action="store_true")

    # Agent
    p = sp.add_parser("agent")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave details user@host::vol format")
    p.add_argument("--local-path", help="Local brick path")
    p.add_argument("--local-node", help="Local master node")
    p.add_argument("--local-node-id", help="Local Node ID")
    p.add_argument("--slave-id", help="Slave Volume ID")
    p.add_argument("--rpc-fd",
                   help="Read and Write fds for worker-agent communication")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument("--debug", action="store_true")

    # Slave
    p = sp.add_parser("slave")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave details user@host::vol format")
    p.add_argument("--session-owner")
    p.add_argument("--master-brick",
                   help="Master brick which is connected to the Slave")
    p.add_argument("--master-node",
                   help="Master node which is connected to the Slave")
    p.add_argument("--master-node-id",
                   help="Master node ID which is connected to the Slave")
    p.add_argument("--local-node", help="Local Slave node")
    p.add_argument("--local-node-id", help="Local Slave ID")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument("--debug", action="store_true")

    # All configurations which are configured via "slave-" options
    # DO NOT add default values for these configurations, default values
    # will be picked from template config file
    p.add_argument("--slave-timeout", type=int,
                   help="Timeout to end gsyncd at Slave side")
    p.add_argument("--use-rsync-xattrs", action="store_true")
    p.add_argument("--slave-log-level", help="Slave Gsyncd Log level")
    p.add_argument("--slave-gluster-log-level",
                   help="Slave Gluster mount Log level")
    p.add_argument("--slave-gluster-command-dir",
                   help="Directory where Gluster binaries exist on slave")
    p.add_argument("--slave-access-mount", action="store_true",
                   help="Do not lazy umount the slave volume")

    # Status
    p = sp.add_parser("status")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument("--local-path", help="Local Brick Path")
    p.add_argument("--debug", action="store_true")
    p.add_argument("--json", action="store_true")

    # Config-check
    p = sp.add_parser("config-check")
    p.add_argument("name", help="Config Name")
    p.add_argument("--value", help="Config Value")
    p.add_argument("--debug", action="store_true")

    # Config-get
    p = sp.add_parser("config-get")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave")
    p.add_argument("--name", help="Config Name")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument("--debug", action="store_true")
    p.add_argument("--show-defaults", action="store_true")
    p.add_argument("--only-value", action="store_true")
    p.add_argument("--use-underscore", action="store_true")
    p.add_argument("--json", action="store_true")

    # Config-set
    p = sp.add_parser("config-set")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave")
    p.add_argument("-n", "--name", help="Config Name")
    p.add_argument("-v", "--value", help="Config Value")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument("--debug", action="store_true")

    # Config-reset
    p = sp.add_parser("config-reset")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave")
    p.add_argument("name", help="Config Name")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument("--debug", action="store_true")

    # voluuidget
    p = sp.add_parser("voluuidget")
    p.add_argument("host", help="Hostname")
    p.add_argument("volname", help="Volume Name")
    p.add_argument("--debug", action="store_true")

    # Delete
    p = sp.add_parser("delete")
    p.add_argument("master", help="Master Volume Name")
    p.add_argument("slave", help="Slave")
    p.add_argument("-c", "--config-file", help="Config File")
    p.add_argument('--path', dest='paths', action="append")
    p.add_argument("--reset-sync-time", action="store_true",
                   help="Reset Sync Time")
    p.add_argument("--debug", action="store_true")

    # Parse arguments
    args = parser.parse_args()

    # Extra template values, All arguments are already part of template
    # variables, use this for adding extra variables
    extra_tmpl_args = {}

    # Add First/Primary Slave host, user and volume
    if getattr(args, "slave", None) is not None:
        hostdata, slavevol = args.slave.split("::")
        hostdata = hostdata.split("@")
        slavehost = hostdata[-1]
        slaveuser = "******"
        if len(hostdata) == 2:
            slaveuser = hostdata[0]
        extra_tmpl_args["primary_slave_host"] = slavehost
        extra_tmpl_args["slaveuser"] = slaveuser
        extra_tmpl_args["slavevol"] = slavevol

    # Add Bricks encoded path
    if getattr(args, "local_path", None) is not None:
        extra_tmpl_args["local_id"] = escape(args.local_path)

    # Add Master Bricks encoded path(For Slave)
    if getattr(args, "master_brick", None) is not None:
        extra_tmpl_args["master_brick_id"] = escape(args.master_brick)

    # Load configurations
    config_file = getattr(args, "config_file", None)

    # Subcmd accepts config file argument but not passed
    # Set default path for config file in that case
    # If an subcmd accepts config file then it also accepts
    # master and Slave arguments.
    if config_file is None and hasattr(args, "config_file"):
        config_file = "%s/geo-replication/%s_%s_%s/gsyncd.conf" % (
            GLUSTERD_WORKDIR,
            args.master,
            extra_tmpl_args["primary_slave_host"],
            extra_tmpl_args["slavevol"])

    # If Config file path not exists, log error and continue using default conf
    config_file_error_msg = None
    if config_file is not None and not os.path.exists(config_file):
        # Logging not yet initialized, create the error message to
        # log later and reset the config_file to None
        config_file_error_msg = lf(
            "Session config file not exists, using the default config",
            path=config_file)
        config_file = None

    rconf.config_file = config_file

    # Override gconf values from argument values only if it is slave gsyncd
    override_from_args = False
    if args.subcmd == "slave":
        override_from_args = True

    # Load Config file
    gconf.load(GLUSTERFS_CONFDIR + "/gsyncd.conf",
               config_file,
               vars(args),
               extra_tmpl_args,
               override_from_args)

    # Default label to print in log file
    label = args.subcmd
    if args.subcmd in ("worker", "agent"):
        # If Worker or agent, then add brick path also to label
        label = "%s %s" % (args.subcmd, args.local_path)
    elif args.subcmd == "slave":
        # If Slave add Master node and Brick details
        label = "%s %s%s" % (args.subcmd, args.master_node, args.master_brick)

    # Setup Logger
    # Default log file
    log_file = gconf.get("cli-log-file")
    log_level = gconf.get("cli-log-level")
    if getattr(args, "master", None) is not None and \
       getattr(args, "slave", None) is not None:
        log_file = gconf.get("log-file")
        log_level = gconf.get("log-level")

    # Use different log file location for Slave log file
    if args.subcmd == "slave":
        log_file = gconf.get("slave-log-file")
        log_level = gconf.get("slave-log-level")

    if args.debug:
        log_file = "-"
        log_level = "DEBUG"

    # Create Logdir if not exists
    try:
        if log_file != "-":
            os.mkdir(os.path.dirname(log_file))
    except OSError as e:
        if e.errno != EEXIST:
            raise

    setup_logging(
        log_file=log_file,
        level=log_level,
        label=label
    )

    if config_file_error_msg is not None:
        logging.warn(config_file_error_msg)

    # Log message for loaded config file
    if config_file is not None:
        logging.info(lf("Using session config file", path=config_file))

    set_term_handler()
    excont = FreeObject(exval=0)

    # Gets the function name based on the input argument. For example
    # if subcommand passed as argument is monitor then it looks for
    # function with name "subcmd_monitor" in subcmds file
    func = getattr(subcmds, "subcmd_" + args.subcmd.replace("-", "_"), None)

    try:
        try:
            if func is not None:
                rconf.args = args
                func(args)
        except:
            log_raise_exception(excont)
    finally:
        finalize(exval=excont.exval)