def recover(job_path):
    job = import_job(job_path)

    data_path = os.path.join(root_dir(), 'data')
    root = os.path.join(data_path, 'worker', 'jobs', job.real_name)
    if os.path.exists(root):
        lock_path = os.path.join(root, 'lock')
        if os.path.exists(lock_path):
            os.remove(lock_path)

        def _recover_dir(dir_):
            for f in os.listdir(dir_):
                if f.endswith('.old'):
                    f_path = os.path.join(dir_, f)
                    os.remove(f_path)

            for f in os.listdir(dir_):
                if f == 'lock':
                    lock_f = os.path.join(dir_, f)
                    os.remove(lock_f)

                f_path = os.path.join(dir_, f)
                if os.path.isfile(f_path) and not f.endswith('.old'):
                    os.rename(f_path, f_path + '.old')

        mq_store_dir = os.path.join(root, 'store')
        mq_backup_dir = os.path.join(root, 'backup')
        if os.path.exists(mq_store_dir):
            _recover_dir(mq_store_dir)
        if os.path.exists(mq_backup_dir):
            _recover_dir(mq_backup_dir)
Exemple #2
0
def start_master():
    path = os.path.join(root_dir(), 'cola', 'master', 'watcher.py')

    print 'Start master at %s:%s' % (get_ip(), main_conf.master.port)
    print 'Master will run in background.'

    subprocess.Popen(['python', path])
Exemple #3
0
def recover(job_path):
    job = import_job(job_path)

    data_path = os.path.join(root_dir(), 'data')
    root = os.path.join(data_path, 'worker', 'jobs', job.real_name)
    if os.path.exists(root):
        lock_path = os.path.join(root, 'lock')
        if os.path.exists(lock_path):
            os.remove(lock_path)

        def _recover_dir(dir_):
            for f in os.listdir(dir_):
                if f.endswith('.old'):
                    f_path = os.path.join(dir_, f)
                    os.remove(f_path)

            for f in os.listdir(dir_):
                if f == 'lock':
                    lock_f = os.path.join(dir_, f)
                    os.remove(lock_f)

                f_path = os.path.join(dir_, f)
                if os.path.isfile(f_path) and not f.endswith('.old'):
                    os.rename(f_path, f_path+'.old')

        mq_store_dir = os.path.join(root, 'store')
        mq_backup_dir = os.path.join(root, 'backup')
        if os.path.exists(mq_store_dir):
            _recover_dir(mq_store_dir)
        if os.path.exists(mq_backup_dir):
            _recover_dir(mq_backup_dir)
Exemple #4
0
def load_job(path, nodes, context=None):
    if not os.path.exists(path):
        raise ValueError('Job definition does not exist.')
        
    job = import_job(path)
    
    job_name = job.name.replace(' ', '_')
    if job.debug:
        job_name += '_debug'
    holder = os.path.join(root_dir(), 'data', 'master', 'jobs', job_name)
    if not os.path.exists(holder):
        os.makedirs(holder)
    
    lock_f = os.path.join(holder, 'lock')
    if os.path.exists(lock_f):
        raise JobMasterRunning('There has been a running job master')
    open(lock_f, 'w').close()
    
    rpc_server = create_rpc_server(job)
    try:
        loader = JobLoader(job, nodes, rpc_server, context=context)
        loader.run()
        # nofify master watcher finishing
        master_watcher = '%s:%s' % (get_ip(), main_conf.master.port)
        client_call(master_watcher, 'finish_job', job.real_name)
    finally:
        os.remove(lock_f)
        rpc_server.shutdown()
Exemple #5
0
def load_job(job_path,
             nodes,
             ip_address=None,
             data_path=None,
             client=None,
             context=None,
             force=False):
    if not os.path.exists(job_path):
        raise ValueError('Job definition does not exist.')

    job = import_job(job_path)

    if data_path is None:
        data_path = os.path.join(root_dir(), 'data')
    root = os.path.join(data_path, 'master', 'jobs', job.real_name)
    if not os.path.exists(root):
        os.makedirs(root)

    with MasterJobLoader(job,
                         root,
                         nodes,
                         local_ip=ip_address,
                         client=client,
                         context=context,
                         force=force) as job_loader:
        job_loader.run()
Exemple #6
0
def start_master():
    path = os.path.join(root_dir(), 'cola', 'master', 'watcher.py')
    
    print 'Start master at %s:%s' % (get_ip(), main_conf.master.port)
    print 'Master will run in background.'
    
    subprocess.Popen(['python', path])
Exemple #7
0
def load_job(path, nodes, context=None):
    if not os.path.exists(path):
        raise ValueError('Job definition does not exist.')

    job = import_job(path)

    job_name = job.name.replace(' ', '_')
    if job.debug:
        job_name += '_debug'
    holder = os.path.join(root_dir(), 'data', 'master', 'jobs', job_name)
    if not os.path.exists(holder):
        os.makedirs(holder)

    lock_f = os.path.join(holder, 'lock')
    if os.path.exists(lock_f):
        raise JobMasterRunning('There has been a running job master')
    open(lock_f, 'w').close()

    rpc_server = create_rpc_server(job)
    try:
        loader = JobLoader(job, nodes, rpc_server, context=context)
        loader.run()
        # nofify master watcher finishing
        master_watcher = '%s:%s' % (get_ip(), main_conf.master.port)
        client_call(master_watcher, 'finish_job', job.real_name)
    finally:
        os.remove(lock_f)
        rpc_server.shutdown()
Exemple #8
0
def start_master(data_path=None, force=False):
    path = os.path.join(root_dir(), "cola", "master", "watcher.py")

    print "Start master at %s:%s" % (get_ip(), main_conf.master.port)
    print "Master will run in background. Please do not shut down the terminal."

    cmds = ["python", path]
    if data_path is not None:
        cmds.extend(["-d", data_path])
    if force is True:
        cmds.append("-f")
    subprocess.Popen(cmds)
def start_worker(master, data_path=None, force=False):
    path = os.path.join(root_dir(), 'cola', 'worker', 'watcher.py')

    print 'Start worker at %s:%s' % (get_ip(), main_conf.worker.port)
    print 'Worker will run in background. Please do not shut down the terminal.'

    cmds = ['python', path, '-m', master]
    if data_path is not None:
        cmds.extend(['-d', data_path])
    if force is True:
        cmds.append('-f')
    subprocess.Popen(cmds)
Exemple #10
0
def start_worker(master, data_path=None, force=False):
    path = os.path.join(root_dir(), 'cola', 'worker', 'watcher.py')
    
    print 'Start worker at %s:%s' % (get_ip(), main_conf.worker.port)
    print 'Worker will run in background. Please do not shut down the terminal.'
    
    cmds = ['python', path, '-m', master]
    if data_path is not None:
        cmds.extend(['-d', data_path])
    if force is True:
        cmds.append('-f')
    subprocess.Popen(cmds)
Exemple #11
0
def load_job(path, master=None):
    if not os.path.exists(path):
        raise ValueError('Job definition does not exist.')

    job = import_job(path)

    holder = os.path.join(root_dir(), 'data', 'worker', 'jobs', job.real_name)
    mq_holder = os.path.join(holder, 'mq')
    if not os.path.exists(mq_holder):
        os.makedirs(mq_holder)

    # Logger
    logger = get_logger(os.path.join(holder, 'job.log'))

    local_node = '%s:%s' % (get_ip(), job.context.job.port)
    nodes = [local_node]
    if master is not None:
        nodes = client_call(master, 'get_nodes')

    # Bloom filter hook
    bloom_filter_file = os.path.join(holder, 'bloomfilter')
    bloom_filter_hook = create_bloom_filter_hook(bloom_filter_file, job)

    rpc_server = create_rpc_server(job)
    loader = JobLoader(job, rpc_server, logger=logger, master=master)
    loader.init_mq(nodes,
                   local_node,
                   mq_holder,
                   verify_exists_hook=bloom_filter_hook,
                   copies=2 if master else 1)

    if master is None:
        try:
            loader.mq.put(job.starts)
            loader.run()
        finally:
            rpc_server.shutdown()
    else:
        try:
            client_call(master, 'ready', local_node)

            def _start():
                while not loader.stopped:
                    time.sleep(TIME_SLEEP)
                loader.run()

            thread = threading.Thread(target=_start)
            thread.start()
            thread.join()
        finally:
            rpc_server.shutdown()
Exemple #12
0
def start_master(ip=None, data_path=None, force=False):
    path = os.path.join(root_dir(), 'cola', 'master', 'watcher.py')
    
    print 'Start master at %s:%s' % (ip, main_conf.master.port)
    print 'Master will run in background. Please do not shut down the terminal.'
    
    cmds = ['python', path]
    if ip is not None:
        cmds.extend(['-i', ip])
    if data_path is not None:
        cmds.extend(['-d', data_path])
    if force is True:
        cmds.append('-f')
    subprocess.Popen(cmds)
Exemple #13
0
def load_job(path, master=None):
    if not os.path.exists(path):
        raise ValueError('Job definition does not exist.')
        
    job = import_job(path)
    
    holder = os.path.join(
        root_dir(), 'data', 'worker', 'jobs', job.real_name)
    mq_holder = os.path.join(holder, 'mq')
    if not os.path.exists(mq_holder):
        os.makedirs(mq_holder)
    
    # Logger
    logger = get_logger(os.path.join(holder, 'job.log'))
    
    local_node = '%s:%s' % (get_ip(), job.context.job.port)
    nodes = [local_node]
    if master is not None:
        nodes = client_call(master, 'get_nodes')
    
    # Bloom filter hook
    bloom_filter_file = os.path.join(holder, 'bloomfilter')
    bloom_filter_hook = create_bloom_filter_hook(bloom_filter_file, job)
    
    rpc_server = create_rpc_server(job)
    loader = JobLoader(job, rpc_server, logger=logger, master=master)
    loader.init_mq(nodes, local_node, mq_holder, 
                   verify_exists_hook=bloom_filter_hook,
                   copies=2 if master else 1)
    
    if master is None:
        try:
            loader.mq.put(job.starts)
            loader.run()
        finally:
            rpc_server.shutdown()
    else:
        try:
            client_call(master, 'ready', local_node)
            
            def _start():
                while not loader.stopped: 
                    time.sleep(TIME_SLEEP)
                loader.run()
            thread = threading.Thread(target=_start)
            thread.start()
            thread.join()
        finally:
            rpc_server.shutdown()
Exemple #14
0
def start_master(ip=None, data_path=None, force=False):
    path = os.path.join(root_dir(), 'cola', 'master', 'watcher.py')
    
    ip_str = ip if ip is not None else get_ip()
    print 'Start master at %s:%s' % (ip_str, main_conf.master.port)
    print 'Master will run in background. Please do not shut down the terminal.'
    
    cmds = ['python', path]
    if ip is not None:
        cmds.extend(['-i', ip])
    if data_path is not None:
        cmds.extend(['-d', data_path])
    if force is True:
        cmds.append('-f')
    subprocess.Popen(cmds)
Exemple #15
0
def load_job(job_path, nodes, ip_address=None, data_path=None, 
             client=None, context=None, force=False):
    if not os.path.exists(job_path):
        raise ValueError('Job definition does not exist.')
        
    job = import_job(job_path)
    
    if data_path is None:
        data_path = os.path.join(root_dir(), 'data')
    root = os.path.join(data_path, 'master', 'jobs', job.real_name)
    if not os.path.exists(root):
        os.makedirs(root)
    
    with MasterJobLoader(job, root, nodes, local_ip=ip_address, client=client, 
                         context=context, force=force) as job_loader:
        job_loader.run()
Exemple #16
0
    def setUp(self):
        self.dir = tempfile.mkdtemp()
        self.root = os.path.join(self.dir, "watch")
        if not os.path.exists(self.root):
            os.mkdir(self.root)
        self.zip_dir = os.path.join(self.dir, "zip")
        if not os.path.exists(self.zip_dir):
            os.mkdir(self.zip_dir)
        self.job_dir = os.path.join(self.dir, "job")
        if not os.path.exists(self.job_dir):
            os.mkdir(self.job_dir)

        zip_file = os.path.join(self.zip_dir, "wiki.zip")
        src_dir = os.path.join(root_dir(), "contrib", "wiki")
        self.zip_file = ZipHandler.compress(zip_file, src_dir, type_filters=("pyc",))

        self.master_watcher = MasterWatcher(self.root, self.zip_dir, self.job_dir)
    def setUp(self):
        self.dir = tempfile.mkdtemp()
        self.zip_dir = os.path.join(self.dir, "zip")
        if not os.path.exists(self.zip_dir):
            os.mkdir(self.zip_dir)
        self.job_dir = os.path.join(self.dir, "job")
        if not os.path.exists(self.job_dir):
            os.mkdir(self.job_dir)

        zip_file = os.path.join(self.zip_dir, "wiki.zip")
        src_dir = os.path.join(root_dir(), "contrib", "wiki")
        self.zip_file = ZipHandler.compress(zip_file, src_dir, type_filters=("pyc",))

        self.rpc_server = ColaRPCServer(("localhost", main_conf.master.port))
        self.master_watcher = MasterWatcher(self.rpc_server, self.zip_dir, self.job_dir)

        thd = threading.Thread(target=self.rpc_server.serve_forever)
        thd.setDaemon(True)
        thd.start()
Exemple #18
0
    def setUp(self):
        self.dir = tempfile.mkdtemp()
        self.root = os.path.join(self.dir, 'watch')
        if not os.path.exists(self.root):
            os.mkdir(self.root)
        self.zip_dir = os.path.join(self.dir, 'zip')
        if not os.path.exists(self.zip_dir):
            os.mkdir(self.zip_dir)
        self.job_dir = os.path.join(self.dir, 'job')
        if not os.path.exists(self.job_dir):
            os.mkdir(self.job_dir)

        zip_file = os.path.join(self.zip_dir, 'wiki.zip')
        src_dir = os.path.join(root_dir(), 'contrib', 'wiki')
        self.zip_file = ZipHandler.compress(zip_file,
                                            src_dir,
                                            type_filters=('pyc', ))

        self.master_watcher = MasterWatcher(self.root, self.zip_dir,
                                            self.job_dir)
Exemple #19
0
    def setUp(self):
        self.dir = tempfile.mkdtemp()
        self.zip_dir = os.path.join(self.dir, 'zip')
        if not os.path.exists(self.zip_dir):
            os.mkdir(self.zip_dir)
        self.job_dir = os.path.join(self.dir, 'job')
        if not os.path.exists(self.job_dir):
            os.mkdir(self.job_dir)

        zip_file = os.path.join(self.zip_dir, 'wiki.zip')
        src_dir = os.path.join(root_dir(), 'contrib', 'wiki')
        self.zip_file = ZipHandler.compress(zip_file,
                                            src_dir,
                                            type_filters=('pyc', ))

        self.rpc_server = ColaRPCServer(('localhost', main_conf.master.port))
        self.master_watcher = MasterWatcher(self.rpc_server, self.zip_dir,
                                            self.job_dir)

        thd = threading.Thread(target=self.rpc_server.serve_forever)
        thd.setDaemon(True)
        thd.start()
Exemple #20
0
def load_job(job_path, data_path=None, master=None, force=False):
    if not os.path.exists(job_path):
        raise ValueError("Job definition does not exist.")

    job = import_job(job_path)

    if data_path is None:
        data_path = os.path.join(root_dir(), "data")
    root = os.path.join(data_path, "worker", "jobs", job.real_name)
    if not os.path.exists(root):
        os.makedirs(root)

    if master is None:
        with StandaloneWorkerJobLoader(job, root, force=force) as job_loader:
            job_loader.run()
    else:
        nodes = client_call(master, "get_nodes")
        local = "%s:%s" % (get_ip(), job.context.job.port)
        client_call(master, "ready", local)
        with WorkerJobLoader(job, root, master, local=local, nodes=nodes, force=force) as job_loader:
            client_call(master, "ready", local)
            job_loader.ready_for_run()
Exemple #21
0
def load_job(job_path, data_path=None, master=None, force=False):
    if not os.path.exists(job_path):
        raise ValueError('Job definition does not exist.')

    job = import_job(job_path)

    if data_path is None:
        data_path = os.path.join(root_dir(), 'data')
    root = os.path.join(data_path, 'worker', 'jobs', job.real_name)
    if not os.path.exists(root):
        os.makedirs(root)

    if master is None:
        with StandaloneWorkerJobLoader(job, root, force=force) as job_loader:
            job_loader.run()
    else:
        nodes = client_call(master, 'get_nodes')
        local = '%s:%s' % (get_ip(), job.context.job.port)
        client_call(master, 'ready', local)
        with WorkerJobLoader(job, root, master, local=local, nodes=nodes, force=force) \
            as job_loader:
            client_call(master, 'ready', local)
            job_loader.ready_for_run()
Exemple #22
0
def load_job(job_path, data_path=None, master=None, force=False):
    if not os.path.exists(job_path):
        raise ValueError('Job definition does not exist.')
        
    job = import_job(job_path)
    
    if data_path is None:
        data_path = os.path.join(root_dir(), 'data')
    root = os.path.join(
        data_path, 'worker', 'jobs', job.real_name)
    if not os.path.exists(root):
        os.makedirs(root)
    
    if master is None:
        with StandaloneWorkerJobLoader(job, root, force=force) as job_loader:
            job_loader.run()
    else:
        nodes = client_call(master, 'get_nodes')
        local = '%s:%s' % (get_ip(), job.context.job.port)
        client_call(master, 'ready', local)
        with WorkerJobLoader(job, root, master, local=local, nodes=nodes, force=force) \
            as job_loader:
            client_call(master, 'ready', local)
            job_loader.ready_for_run()
Exemple #23
0
        help="master connected to(in the former of `ip:port` or `ip`)",
    )
    parser.add_argument(
        "-d",
        "--data",
        metavar="data root directory",
        nargs="?",
        default=None,
        const=None,
        help="root directory to put data",
    )
    parser.add_argument("-f", "--force", metavar="force start", nargs="?", default=False, const=True, type=bool)
    args = parser.parse_args()

    data_path = args.data
    if data_path is None:
        data_path = os.path.join(root_dir(), "data")
    force = args.force
    master = args.master
    if ":" not in master:
        master = "%s:%s" % (master, main_conf.master.port)

    root = os.path.join(data_path, "worker", "watcher")
    zip_dir = os.path.join(data_path, "zip")
    job_dir = os.path.join(data_path, "jobs")
    for dir_ in (root, zip_dir, job_dir):
        makedirs(dir_)

    with WorkerWatcher(master, root, zip_dir, job_dir, data_path=data_path, force=force) as master_watcher:
        master_watcher.run()
Exemple #24
0
        thread = self.start_check_worker()
        thread.join()
        
def makedirs(path):
    if not os.path.exists(master_watcher_dir):
        os.makedirs(master_watcher_dir)
        
def create_rpc_server():
    rpc_server = ColaRPCServer((get_ip(), main_conf.master.port))
    thd = threading.Thread(target=rpc_server.serve_forever)
    thd.setDaemon(True)
    thd.start()
    return rpc_server
        
if __name__ == "__main__":
    root = root_dir()
    master_watcher_dir = os.path.join(root, 'data', 'master', 'watcher')
    makedirs(master_watcher_dir)
    zip_dir = os.path.join(root, 'data', 'zip')
    makedirs(zip_dir)
    job_dir = os.path.join(root, 'data', 'jobs')
    makedirs(job_dir)
    
    lock_f = os.path.join(master_watcher_dir, 'lock')
    if os.path.exists(lock_f):
        raise MasterWatcherRunning('There has been a running master watcher.')
    
    rpc_server = create_rpc_server()
    try:
        open(lock_f, 'w').close()
        
                        '--ip',
                        metavar='IP address',
                        nargs='?',
                        default=None,
                        const=None,
                        help='IP Address to start')
    parser.add_argument('-f',
                        '--force',
                        metavar='force start',
                        nargs='?',
                        default=False,
                        const=True,
                        type=bool)
    args = parser.parse_args()

    data_path = args.data
    if data_path is None:
        data_path = os.path.join(root_dir(), 'data')
    ip = args.ip
    force = args.force

    root = os.path.join(data_path, 'master', 'watcher')
    zip_dir = os.path.join(data_path, 'zip')
    job_dir = os.path.join(data_path, 'jobs')
    for dir_ in (root, zip_dir, job_dir):
        makedirs(dir_)

    with MasterWatcher(root, zip_dir, job_dir, ip_address=ip,
                       data_path=data_path, force=force) \
        as master_watcher:
        master_watcher.run()
Exemple #26
0
if __name__ == "__main__":
    import argparse
    
    parser = argparse.ArgumentParser('Cola worker watcher')
    parser.add_argument('-m', '--master', metavar='master watcher', required=True,
                        help='master connected to(in the former of `ip:port` or `ip`)')
    parser.add_argument('-d', '--data', metavar='data root directory', nargs='?',
                        default=None, const=None, 
                        help='root directory to put data')
    parser.add_argument('-f', '--force', metavar='force start', nargs='?',
                        default=False, const=True, type=bool)
    args = parser.parse_args()
    
    data_path = args.data
    if data_path is None:
        data_path = os.path.join(root_dir(), 'data')
    force = args.force
    master = args.master
    if ':' not in master:
        master = '%s:%s' % (master, main_conf.master.port)
        
    root = os.path.join(data_path, 'worker', 'watcher')
    zip_dir = os.path.join(data_path, 'zip')
    job_dir = os.path.join(data_path, 'jobs')
    for dir_ in (root, zip_dir, job_dir):
        makedirs(dir_)
    
    with WorkerWatcher(master, root, zip_dir, job_dir, data_path=data_path, force=force) \
        as master_watcher:
        master_watcher.run()