예제 #1
0
def test_dicod_interf(exit_on_deadlock, algo, n_jobs, n_seg):
    K = 3
    rng = np.random.RandomState(42)
    D = rng.normal(size=(K, 2, 5))
    D /= np.sqrt((D*D).sum(axis=-1))[:, :, None]
    z = np.zeros((K, 100))
    z[0, [min(99, 100 // n_jobs + 1)]] = 1
    x = np.array([[fftconvolve(zk, dk, 'full') for dk in Dk]
                  for Dk, zk in zip(D, z)]).sum(axis=0)
    pb = MultivariateConvolutionalCodingProblem(
            D, x, lmbd=0.002)

    dicod = DICOD(n_jobs=n_jobs, use_seg=n_seg, max_iter=1e6, tol=1e-15,
                  hostfile='hostfile', algorithm=algo, debug=5, patience=1000)
    dicod.fit(pb)

    pt = pb.pt*(abs(pb.pt) > pb.lmbd)

    # Assert we recover the right support
    print(pb.pt.reshape(1, -1).nonzero()[1], '\n',
          pt.reshape(1, -1).nonzero()[1], '\n',
          z.reshape(1, -1).nonzero()[1])
    assert (np.all(pt.reshape(1, -1).nonzero()[1] ==
                   z.reshape(1, -1).nonzero()[1]) or
            pb.cost(z) >= dicod.cost), (
        "Cost pt: ", dicod.cost, "Cost z: ", pb.cost(z))
    assert abs(pb.cost(pb.pt) - dicod.cost)/dicod.cost < 1e-6
예제 #2
0
def run_one(args_pb, lmbd, optimizer, optimizer_kwargs, fname, file_lock):

    n_pb, args_pb, seed_pb = args_pb
    pb = fun_rand_problem(*args_pb, seed=seed_pb)

    if isinstance(optimizer, str):
        method = optimizer
        if optimizer == "lgcd":
            from dicod.dicod import DICOD
            optimizer = DICOD(None, **optimizer_kwargs)
        elif optimizer == "fista":
            from dicod.fista import FISTA
            optimizer = FISTA(None, **optimizer_kwargs)
    elif getattr(optimizer, "fit", None) is None:
        raise ValueError("`optimizer` parameter should be a string or an "
                         "optimizer object.")
    else:
        method = "dicod"

    # Do not count the initialization cost of the MPI pool of workers
    pb.lmbd = lmbd_max = pb.get_lmbd_max()
    optimizer.fit(pb)

    pb.lmbd = lmbd_max * lmbd
    pb.reset()

    optimizer.fit(pb)
    import time
    time.sleep(1)
    sparsity = len(pb.pt.nonzero()[0]) / pb.pt.size
    out_str = 'Pb{},{},{},{},{},{},{}\n'.format(n_pb, lmbd, optimizer.runtime,
                                                optimizer.t, lmbd_max, method,
                                                sparsity)
    with file_lock:
        with open(fname, 'a') as f:
            f.write(out_str)

    print('=' * 79)
    print('[{}] PB{}: End process with lmbd={}({}) in {:.2f}s'
          ''.format(datetime.datetime.now().strftime("%Ih%M"), n_pb, lmbd,
                    lmbd * lmbd_max, optimizer.runtime))
    print('\n' + '=' * 79)
    sleep(.5)
    return out_str
예제 #3
0
def step_detect(max_iter=5e6,
                timeout=7200,
                n_jobs=2,
                hostfile=None,
                n_epoch=10,
                save_dir=None,
                debug=0):
    '''Run DICOD algorithm for a certain problem with different ValueError
    for n_jobs and store the runtime in csv files if given a save_dir.

    Parameters
    ----------
    max_iter: int, optional (default: 5e6)
        maximal number of iteration run by DICOD
    timeout: int, optional (default: 7200)
        maximal running time for DICOD. The default timeout
        is 2 hours
    n_jobs: int, optional (default: 2)
        Maximal number of jobs used to compute the convolutional
        sparse coding
    hostfile: str, optional (default: None)
        MPI cluster confifg file, permit to specify multiple machine
        to run the convolutional sparse coding
    n_epoch: int, optional (default: 10)
        number of epoch run by the algorithm
    save_dir: str, optional (default: None)
        If not None, all the runtimes will be saved in csv files
        contained in the given directory. The directory must exist.
        This will create a file for each problem size T and save
        the Pb number, the number of core and the runtime computed
        in two different ways.
    debug: int, optional (default:0)
        The greater it is, the more verbose the algorithm

    '''
    try:
        common_args = dict(logging=True,
                           log_rate='log1.6',
                           max_iter=max_iter,
                           timeout=timeout,
                           debug=debug,
                           tol=5e-2)

        print('construct problem')
        pbs, D, D_labels = fun_step_problem(K=50, N=None, lmbd=1)
        N = len(pbs)
        print('End\n')
        lmbd = .3

        dcp = DICOD(pbs[0][0],
                    n_jobs=n_jobs,
                    hostfile=hostfile,
                    positive=True,
                    use_seg=1,
                    **common_args)

        grad_D = [np.zeros(D.shape) for _ in range(N)]
        grad_nz = set()
        cost = np.zeros(N)
        cost_i = 1e6

        order = np.arange(N)
        np.random.shuffle(order)
        mini_batch_size = 20
        n_batch = N // mini_batch_size
        current_batch = 0
        current_epoch = 0
        time_epoch = time()

        while current_epoch < n_epoch:
            # Stochastic choice of a ponit

            pb_batch = [[pbs[i][0], i]
                        for i in order[current_batch *
                                       mini_batch_size:(current_batch + 1) *
                                       mini_batch_size]]
            current_batch += 1
            current_batch %= n_batch
            DD = None
            new = False
            for pb, i0 in pb_batch:
                pb.D = D

                # Sparse coding
                pb.reset()
                DD = dcp.fit(pb, DD=DD)

                # Update cost and D gradient
                new |= cost[i0] == 0
                cost[i0] = pb.cost()
                grad_D[i0] = pb.grad_D(pb.pt)
                grad_nz.add(i0)

            # Logging
            N_see = len(grad_nz)
            cost_i1 = cost_i
            cost_i = np.sum(cost, axis=0) / N_see
            print('End mini_batch {:3} with cost {:e}'
                  ''.format(int(current_batch), cost_i))
            if current_batch == 0:
                print('=' * 79)
                print('End Epoch {} in {:.2}s'
                      ''.format(current_epoch,
                                time() - time_epoch))
                time_epoch = time()
                current_epoch += 1
                np.random.shuffle(order)
                print('=' * 79)

            # reg = np.zeros(D.shape)
            # reg[:, :, :-2] += D[:, :, 2:]
            # reg[:, :, :] -= D[:, :, :]
            # reg[:, :, :] /= np.sqrt(1e-2+D[:, :, :]*D[:, :, :])

            # Update dictionary
            grad = np.sum(grad_D, axis=0) / N_see
            D -= lmbd * grad

            if cost_i >= cost_i1 and not new:
                #IPython.embed()
                lmbd *= .7
        from sys import stdout as out
        print('=' * 79)
        print('Fit the pb to the latest dictionary')
        print('=' * 79)
        for i, (pb, _, _) in enumerate(pbs):
            pb.D = D
            pb.reset()
            dcp.fit(pb)
            out.write('\rCompute rpz: {:7.2%}'.format(i / N))
            out.flush()
        print('\rCompute rpz: {:7}'.format('Done'))
    except KeyboardInterrupt:
        from sys import stdout as out
        print('=' * 79)
        print('Fit the pb to the latest dictionary')
        print('=' * 79)
        for i, (pb, _, _) in enumerate(pbs):
            pb.D = D
            pb.reset()
            dcp.fit(pb)
            out.write('\rCompute rpz: {:7.2%}'.format(i / N))
            out.flush()
        print('\rCompute rpz: {:7}'.format('Done'))

    finally:
        IPython.embed()
        log.end()
예제 #4
0
def scaling_n_jobs(T=300,
                   max_jobs=75,
                   n_rep=10,
                   save_dir=None,
                   max_iter=5e6,
                   timeout=7200,
                   hostfile=None,
                   run='all',
                   lgg=False,
                   use_seg=False,
                   algorithm=ALGO_GS,
                   debug=0,
                   seed=None):
    '''Run DICOD algorithm for a certain problem with different value
    for n_jobs and store the runtime in csv files if given a save_dir.

    Parameters
    ----------
    T: int, optional (default: 300)
        Size of the generated problems
    max_jobs: int, optional (default: 75)
        The algorithm will be run on problems with a number
        of cores varying from 5 to max_jobs in a log2 fashion
    n_rep: int, optional (default: 10)
        Number of different problem solved for all the different
        number of cores.
    save_dir: str, optional (default: None)
        If not None, all the runtimes will be saved in csv files
        contained in the given directory. The directory must exist.
        This will create a file for each problem size T and save
        the Pb number, the number of core and the runtime computed
        in two different ways.
    max_iter: int, optional (default: 5e6)
        maximal number of iteration run by DICOD
    timeout: int, optional (default: 7200)
        maximal running time for DICOD. The default timeout
        is 2 hours
    hostfile: str, optional (default: None)
        hostfile for the openMPI API to connect to the other
        running server to spawn the processes over different
        nodes
    run: list or str, optional (default: 'all')
        if all, run all the possible runs. Else, it should be a list composed
        of int for njobs to run or of str {n_jobs: n_rep} for specific cases.
    lgg: bool, optional (default: False)
        If set to true, enable the logging of the iteration cost
        during the run. It might slow down a bit the execution
        time and the collection of the results
    algorithm: enum, optional (default: ALGO_GS)
        Algorithm used to select the update for the coordinate descent. It
        should be either ALGO_GS (greedy selection) or ALGO_RANDOM (random
        selection).
    debug: int, optional (default:0)
        The greater it is, the more verbose the algorithm
    seed: int, optional (default:None)
        seed the rng of numpy to obtain fixed set of problems

    '''
    common_args = dict(logging=lgg,
                       log_rate='log1.6',
                       max_iter=max_iter,
                       timeout=timeout,
                       debug=debug,
                       tol=5e-2,
                       patience=1000,
                       hostfile=hostfile,
                       algorithm=algorithm)

    # Do not use seg with ALGO_RANDOM
    assert not use_seg or algorithm == ALGO_GS

    S = 150
    K = 10
    d = 7
    lmbd = 0.1
    noise_level = 1

    if save_dir is not None and not osp.exists(save_dir):
        import os
        os.mkdir(save_dir)
    elif save_dir is None:
        save_dir = "."

    rng = np.random.RandomState(seed)
    suffix = "_random" if algorithm else "_seg" if use_seg else ""
    file_name = 'runtimes_n_jobs_{}{}.csv'.format(T, suffix)
    file_name = osp.join(save_dir, file_name)

    for j in range(n_rep):
        seed_pb = rng.randint(4294967295)
        pb = fun_rand_problem(T, S, K, d, lmbd, noise_level, seed=seed_pb)

        dicod = DICOD(n_jobs=2, **common_args)

        runtimes = []
        n_jobs = np.logspace(0, np.log2(75), 10, base=2)
        n_jobs = [int(round(nj)) for nj in n_jobs if nj <= max_jobs]
        n_jobs = np.unique(n_jobs)
        n_jobs = n_jobs[::-1]
        for nj in n_jobs:
            code_run = "{}:{}".format(nj, j)
            if (run != 'all' and str(nj) not in run and code_run not in run):
                continue
            dicod.reset()
            pb.reset()
            dicod.n_jobs = nj
            dicod.use_seg = T // nj if use_seg else 1

            dicod.fit(pb)
            timings = TimingLogs(time=dicod.time,
                                 runtime=dicod.runtime,
                                 t_init=dicod.t_int)
            runtimes += [[timings]]
            import time
            time.sleep(1)
            if save_dir is not None:
                with open(file_name, 'a') as f:
                    f.write('Pb{},{},{},{}\n'.format(j, nj, timings[0],
                                                     timings[1]))
            print('=' * 79)
            print('[{}] PB{}: End process with {} jobs  in {:.2f}s'
                  ''.format(datetime.datetime.now().strftime("%I:%M"), j, nj,
                            timings[0]))
            print('\n' + '=' * 79)
            sleep(.5)

    min_njobs = 0
    fig, axs = plt.subplots(1, 1, sharex=True, num="scaling")
    with open(file_name) as f:
        lines = f.readlines()
    arr = defaultdict(lambda: [])
    for l in lines:
        r = list(map(float, l.split(',')[1:]))
        arr[r[0]] += [r]
    axk = axs
    l, L = 1e6, 0
    for k, v in arr.items():
        if k > min_njobs:
            V = np.mean(v, axis=0)[1]
            axk.scatter(k, V, color="b")
            l, L = min(l, V), max(L, V)
    axk.set_xscale('log')
    axk.set_yscale('log')
    n_jobs = np.array([k for k in arr.keys() if k > min_njobs]).astype(int)
    n_jobs.sort()
    m, M = n_jobs.min(), n_jobs.max()
    t = np.logspace(np.log2(m), np.log2(2 * M), 200, base=2)
    R0 = np.mean(arr[m], axis=0)[1]

    axk.plot(t, R0 * m / t, 'k--')
    axk.plot(t, R0 * (m / t)**2, 'r--')
    scaling = R0 / (t * t * np.maximum(
        1 - 2 * (t / T)**2 * (1 + 2 * (t / T)**2)**(t / 2 - 1), 1e-5))
    break_p = np.where((scaling[2:] > scaling[1:-1])
                       & (scaling[:-2] > scaling[1:-1]))[0] + 1

    axk.plot(t, scaling, "g-.", label="theoretical speedup")
    axk.vlines(t[break_p], .1, 100000, "g", linestyle="-", linewidth=2)
    axk.set_xlim((m * .7, 1.7 * M))
    axk.set_ylim((.5 * l, 1.7 * L))
    axk.set_title("$T={}$".format(T), fontsize="x-large")
    # if i == 0:
    axk.legend(fontsize="large")
    axk.set_ylim((.2 * l, 1.7 * L))
    tt = 8
    axk.text(tt, .4 * R0 * (m / tt)**2, "quadratic", rotation=-22)
    axk.text(tt,
             R0 * m / tt,
             "linear",
             rotation=-14,
             bbox=dict(facecolor="white", edgecolor="white"))

    axk.text(.9 * t[break_p],
             .7 * R0 * m / tt,
             "$M^*$",
             rotation=0,
             bbox=dict(facecolor="w", edgecolor="w"))
    axk.minorticks_off()

    axk.set_xticks(n_jobs)
    axk.set_xticklabels(n_jobs)
    axk.set_xlabel("# cores $M$", fontsize="x-large")
    axk.set_ylabel("Runtime (s)", fontsize="x-large")
    axk.set_xticks([])
    axk.set_xticklabels([], [])
    axk.set_yticks([])
    axk.set_yticklabels([], [])
    plt.subplots_adjust(left=.1, right=.99, top=.95, bottom=.1)
    plt.show()
    input()