def least_square_w_eval(n,
                        m,
                        r,
                        approximate_size,
                        v,
                        iteration,
                        seeds,
                        c_mode,
                        o_w=0):
    nmf_error = np.zeros(iteration)
    snmf_error = np.zeros(iteration)
    w = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
    h = ff.generate_h(r, m, seeds[1], c_mode=c_mode)
    v_s = ff.uniform_sampling(v, approximate_size, seeds[0] + 1, t=True)
    w_s = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
    h_s = ff.generate_h(r, approximate_size, seeds[1], c_mode=c_mode)

    if np.all(o_w == 0):
        print("\n\n\n------------------   NMF   -------------------")
        for i in range(0, iteration):
            w, h = ff.update(v, w, h, r, c_mode)
            w_s, h_s, nmf_error[i] = calculate(v, w, w_s, h_s, c_mode, i)
        return nmf_error, snmf_error, w, h, w_s, h_s

    else:
        print("\n\n\n------------------   NMF   -------------------")
        for i in range(0, iteration):
            w, h, nmf_error[i] = calculate(v, o_w, w, h, c_mode, i)

        print("\n\n-------------   Sketching NMF   --------------")
        for i in range(0, iteration):
            w_s, h_s, snmf_error[i] = calculate(v_s, o_w, w_s, h_s, c_mode, i)
        h_os = ff.calculate_h(v, w_s, print_interim=True)

        return nmf_error, snmf_error, w, h, w_s, h_os
def time_measurement(n, m, r, approximate_size, v, iteration, seeds, c_mode,
                     NMF_or_SNMF):
    if NMF_or_SNMF == 0:
        # NMF------------------------------------
        print("\n\n\n------------------   NMF   -------------------")

        start = time.time()
        w = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
        h = ff.generate_h(r, m, seeds[1], c_mode=c_mode)

        for i in range(0, iteration):
            w, h = ff.update(v, w, h, c_mode)

        t_result = time.time() - start
        print("\nNMF time: " + str(t_result))

    else:
        # Sketching NMF --------------------------
        print("\n\n-------------   Sketching NMF   --------------")

        start = time.time()
        v_s = ff.uniform_sampling(v, approximate_size, 0, t=True)
        w_s = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
        h_s = ff.generate_h(r, approximate_size, seeds[1], c_mode=c_mode)

        for i in range(0, iteration):
            w_s, h_s = ff.update(v_s, w_s, h_s, c_mode)

        _ = ff.calculate_h(v, w_s, True)

        t_result = time.time() - start
        print("\nSketching NMF time: " + str(t_result) + "\n")

    return t_result
def v_error_eval(n, m, r, approximate_size, v, iteration, seeds, c_mode, nmfqp,
                 column_sketching):
    nmf_error = np.zeros(iteration)
    snmf_error = np.zeros(iteration)
    w = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
    h = ff.generate_h(r, m, seeds[1], c_mode=c_mode)
    v_s = ff.uniform_sampling(v,
                              approximate_size,
                              seeds[0] + 1,
                              right_product=column_sketching)

    print("\n\n\n------------------   NMF   -------------------")
    if nmfqp:
        print("NMF matrix H is calculated by QP.")
        for i in range(0, iteration):
            w, h = ff.update(v, w, h, c_mode)
            h_qp = ff.calculate_h(v, w, print_interim=True)
            nmf_error[i] = np.linalg.norm(v - np.dot(w, h_qp))**2
            if (i == 0) | (i % 100 == 99):
                print(
                    str(i + 1) + " times update  error: " + str(nmf_error[i]))
        h = h_qp

    else:
        for i in range(0, iteration):
            w, h = ff.update(v, w, h, c_mode)
            nmf_error[i] = np.linalg.norm(v - np.dot(w, h))**2
            if (i == 0) | (i % 100 == 99):
                print(
                    str(i + 1) + " times update  error: " + str(nmf_error[i]))

    print("\n\n-------------   Sketching NMF   --------------")
    if column_sketching:
        w_s = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
        h_s = ff.generate_h(r, approximate_size, seeds[1], c_mode=c_mode)
        for i in range(0, iteration):
            w_s, h_s = ff.update(v_s, w_s, h_s, c_mode)
            h_os = ff.calculate_h(v, w_s, print_interim=True)
            snmf_error[i] = np.linalg.norm(v - np.dot(w_s, h_os))**2
            if (i == 0) | (i % 100 == 99):
                print(
                    str(i + 1) + " times update  error: " + str(snmf_error[i]))
        h_s = h_os

    else:
        w_s = ff.generate_w(approximate_size, r, seeds[0], c_mode=c_mode)
        h_s = ff.generate_h(r, m, seeds[1], c_mode=c_mode)
        for i in range(0, iteration):
            w_s, h_s = ff.update(v_s, w_s, h_s, c_mode)
            w_os = ff.calculate_h(v, h_s, print_interim=True)
            snmf_error[i] = np.linalg.norm(v - np.dot(w_os, h_s))**2
            if (i == 0) | (i % 100 == 99):
                print(
                    str(i + 1) + " times update  error: " + str(snmf_error[i]))
        w_s = w_os

    return nmf_error, snmf_error, w, h, w_s, h_s
def get_v_ite(v, n, m, r, approximate_size, seeds, c_mode, CDC, NMFQP=False):
    iteration_list = np.zeros([2], dtype="int")

    # NMF------------------------------------
    print("\n------------------   NMF   -------------------")
    nmf_error = 1000000
    w = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
    h = ff.generate_h(r, m, seeds[1], c_mode=c_mode)

    if NMFQP:
        print("NMF matrix H is calculated by QP.")
        while nmf_error >= CDC:
            w, h = ff.update(v, w, h, c_mode)

            h_qp = ff.calculate_h(v, w, True)
            nmf_error = np.linalg.norm(v - np.dot(w, h_qp))**2

            if (iteration_list[0] == 0) | (iteration_list[0] % 100 == 99):
                print(
                    str(iteration_list[0] + 1) + " times update  error: " +
                    str(nmf_error))
            iteration_list[0] += 1

    else:
        while nmf_error >= CDC:
            w, h = ff.update(v, w, h, c_mode)
            nmf_error = np.linalg.norm(v - np.dot(w, h))**2

            if (iteration_list[0] == 0) | (iteration_list[0] % 100 == 99):
                print(
                    str(iteration_list[0] + 1) + " times update  error: " +
                    str(nmf_error))
            iteration_list[0] += 1

    # Sketching NMF --------------------------
    print("\n-------------   Sketching NMF   --------------")

    snmf_error = 1000000
    v_s = ff.uniform_sampling(v, approximate_size, 0, t=True)
    w_s = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
    h_s = ff.generate_h(r, approximate_size, seeds[1], c_mode=c_mode)

    while snmf_error >= CDC:
        w_s, h_s = ff.update(v_s, w_s, h_s, c_mode)

        h_os = ff.calculate_h(v, w_s, True)
        snmf_error = np.linalg.norm(v - np.dot(w_s, h_os))**2

        if (iteration_list[1] == 0) | (iteration_list[1] % 100 == 99):
            print(
                str(iteration_list[1] + 1) + " times update  error: " +
                str(snmf_error))
        iteration_list[1] = iteration_list[1] + 1

    print(iteration_list)
    return iteration_list
def pivoting_qr_q_eval(n,
                       m,
                       r,
                       approximate_size,
                       v,
                       iteration,
                       seeds,
                       c_mode,
                       o_w=0):
    print("\n\n\n------------------   NMF   -------------------")

    nmf_error = np.zeros(iteration)
    snmf_error = np.zeros(iteration)
    w = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
    h = ff.generate_h(r, m, seeds[1], c_mode=c_mode)
    v_s = ff.uniform_sampling(v, approximate_size, 0, t=True)
    w_s = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
    h_s = ff.generate_h(r, approximate_size, seeds[1], c_mode=c_mode)

    if np.all(o_w == 0):
        for i in range(0, iteration):
            w, h = ff.update(v, w, h, r, c_mode)
            qr_q, qr_r, p = linalg.qr(w, pivoting=True)

            w_s, h_s = ff.update(v_s, w_s, h_s, c_mode)
            qr_q_s, qr_r_s, p_s = linalg.qr(w_s, pivoting=True)

            frobenius_norm = np.linalg.norm(
                np.dot(qr_q_s, qr_r_s) - np.dot(qr_q, qr_r))**2
            nmf_error[i] = frobenius_norm

            if (i == 0) | (i % 100 == 99):
                print(
                    str(i + 1) + " times update  error: " +
                    str(frobenius_norm))
        return nmf_error, snmf_error, w, h, w_s, h_s

    else:
        original_q, original_r, original_p = linalg.qr(o_w, pivoting=True)
        o_w = np.dot(original_q, original_r)

        # NMF------------------------------------
        print("\n\n\n------------------   NMF   -------------------")

        for i in range(0, iteration):
            nmf_error[i] = calculate(v, o_w, w, h, c_mode, i)

        print("\n\n-------------   Sketching NMF   --------------")
        for i in range(0, iteration):
            snmf_error[i] = calculate(v_s, o_w, w_s, h_s, c_mode, i)
        h_os = ff.calculate_h(v, w_s, True)

        return nmf_error, snmf_error, w, h, w_s, h_os
Ejemplo n.º 6
0
def rapid_v_error_eval(n,
                       m,
                       r,
                       approximate_size,
                       v,
                       iteration,
                       seeds,
                       c_mode,
                       nmfqp=False):
    nmf_error = np.zeros(iteration)
    snmf_error = np.zeros(iteration)
    h = ff.generate_h(r, m, seeds[1], c_mode=c_mode)
    v_s = ff.uniform_sampling(v, approximate_size, 0, t=True)
    h_s = ff.generate_h(r, approximate_size, seeds[1], c_mode=c_mode)

    w_s = np.zeros([n, r, iteration + 1])
    w_s[:, :, 0] = ff.generate_w(n, r, seeds[0], c_mode=c_mode)

    if nmfqp:
        w = np.zeros([n, r, iteration + 1])
        w[:, :, 0] = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
    else:
        w = ff.generate_w(n, r, seeds[0], c_mode=c_mode)

    print("\n\n\n------------------   NMF   -------------------")
    # if nmfqp:
    #     print("NMF matrix H is calculated by QP.")
    #     for i in range(iteration):
    #         w[:, :, i + 1], h, nmf_error[i] = calculate(v, w[:, :, i], h, c_mode, i)
    #     tmp = Parallel(n_jobs=-1, verbose=3)([delayed(ff.calculate_h)(v, w[:, :, i], qp_num=i) for i in range(iteration)])
    # else:
    #     for i in range(iteration):
    #         w, h, nmf_error[i] = calculate(v, w, h, c_mode, i)

    print("\n\n-------------   Sketching NMF   --------------")
    for i in range(0, iteration):
        w_s[:, :,
            i + 1], h_s, snmf_error[i] = calculate(v_s, w_s[:, :, i], h_s,
                                                   c_mode, i)
    tmp = Parallel(n_jobs=3, verbose=3)([
        delayed(ff.calculate_h)(v, w_s[:, :, i + 1], qp_num=i)
        for i in range(iteration)
    ])
    # h_os = ff.calculate_h(v, w_s, print_interim=True)

    tmp.sort(key=itemgetter(1))

    return nmf_error, snmf_error, w[:, :, iteration], h, w_s[:, :,
                                                             iteration], 0
def bcv(v, r, iteration, seed, row_sep_num, column_sep_num, error_print):
    n, m = v.shape
    n_sep_size = int(n / row_sep_num)
    m_sep_size = int(m / column_sep_num)
    counter = 1
    bic_tmp = 0
    seeds = ff.two_seeds(seed)
    error = np.zeros(iteration)

    if n - n_sep_size < r | m - m_sep_size < r:
        print("Error: row or column size of D is bigger than R",
              file=sys.stderr)
        sys.exit(1)

    for r_sep in range(row_sep_num):
        for c_sep in range(column_sep_num):  # for folded row and column
            row_s = n_sep_size * r_sep
            column_s = m_sep_size * c_sep

            if r_sep != row_sep_num - 1:
                row_e = n_sep_size * (r_sep + 1)
            else:
                row_e = n
            if c_sep != column_sep_num - 1:
                column_e = m_sep_size * (c_sep + 1)
            else:
                column_e = m

            # set Matrices ABCD  ------------------------------------------------------------------------------------
            a = v[row_s:row_e, column_s:column_e]
            b = np.concatenate(
                [v[row_s:row_e, :column_s], v[row_s:row_e, column_e:]], 1)
            c = np.concatenate(
                [v[:row_s, column_s:column_e], v[row_e:, column_s:column_e]],
                0)

            d_1 = np.concatenate([v[:row_s, :column_s], v[:row_s, column_e:]],
                                 1)
            d_2 = np.concatenate([v[row_e:, :column_s], v[row_e:, column_e:]],
                                 1)
            d = np.concatenate([d_1, d_2], 0)

            # fit NMF to D  --------------------------------------------------------------------------------
            w_d = ff.generate_w(d.shape[0], r, seeds[0], c_mode=0)
            h_d = ff.generate_h(r, d.shape[1], seeds[1], c_mode=0)

            for i in range(iteration):
                w_d, h_d = ff.update(d, w_d, h_d, 0)
                if error_print & (row_e == n) & (column_e == m):
                    error[i] = np.linalg.norm(d - np.dot(w_d, h_d))**2
                if i % 1000 == 999:
                    print("BCV  NMF ( r={}  {} / {}) : {} times update".format(
                        r, counter, row_sep_num * column_sep_num, i + 1))

            # calculate BCV
            a_r = np.dot(np.dot(b, np.linalg.pinv(h_d)),
                         np.dot(np.linalg.pinv(w_d), c))
            bic_tmp += np.linalg.norm(a - a_r)**2
            counter += 1
    return bic_tmp, error
def parallel_least_square_w_eval(n, m, r, approximate_size, v, o_w, iteration,
                                 wh_seed, c_mode, column_sketching):
    seeds = ff.two_seeds(wh_seed)
    nmf_error = np.zeros(iteration)
    snmf_error = np.zeros(iteration)
    w = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
    h = ff.generate_h(r, m, seeds[1], c_mode=c_mode)
    v_s = ff.uniform_sampling(v,
                              approximate_size,
                              seeds[0] + 1,
                              right_product=column_sketching)
    w_s = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
    h_s = ff.generate_h(r, approximate_size, seeds[1], c_mode=c_mode)

    for i in range(0, iteration):
        w, h, nmf_error[i] = calculate(v, o_w, w, h, c_mode, i)

    for i in range(0, iteration):
        w_s, h_s, snmf_error[i] = calculate(v_s, o_w, w_s, h_s, c_mode, i)
    h_os = ff.calculate_h(v, w_s)

    return nmf_error, snmf_error, w, h, w_s, h_os
def bic(v, r, iteration, seed):
    n, m = v.shape
    seeds = ff.two_seeds(seed)
    w = ff.generate_w(n, r, seeds[0], c_mode=0)
    h = ff.generate_h(r, m, seeds[1], c_mode=0)
    error = np.zeros(iteration)

    for i in range(iteration):
        w, h = ff.update(v, w, h, 0)
        error[i] = np.linalg.norm(v - np.dot(w, h))**2
        if i % 1000 == 999:
            print("BIC  NMF ( r={} ) : {} times update".format(r, i + 1))
    v_reconstructed = np.dot(w, h)

    a = (n + m) / (n * m)
    d_eu = np.linalg.norm(v - v_reconstructed)**2
    return np.log(d_eu) + r * a * np.log(1 / a), error
Ejemplo n.º 10
0
def error_calculate(r_size, ap_size, v_origin, w_origin, ite, wh_seed, c_mode,
                    nmfqp, t_flag):
    theta_start = 5
    seeds = ff.two_seeds(wh_seed)
    v_nmf_error = np.zeros(ite)
    v_snmf_error = np.zeros(ite)
    w_nmf_error = np.zeros(ite)
    w_snmf_error = np.zeros(ite)

    n_size, m_size = v_origin.shape
    w = ff.generate_w(n_size, r_size, seeds[0], c_mode=c_mode)
    h = ff.generate_h(r_size, m_size, seeds[1], c_mode=c_mode)
    v_s = ff.uniform_sampling(v_origin, ap_size, 0)

    # NMF calculate  ---------------------------------------------------------------------------------------------------
    if c_mode == 2:
        theta_w = theta_h = theta_start

    if nmfqp:
        print("NMF matrix H is calculated by QP.")
    for i in range(0, ite):
        if c_mode == 2:
            w, h, theta_w, theta_h = ff.fgd_update(v_origin, w, h, theta_w,
                                                   theta_h)
        else:
            w, h = ff.update(v_origin, w, h, c_mode)

        if nmfqp:
            h_result = ff.calculate_h(v_origin, w, print_interim=False)
        else:
            h_result = h

    # v evaluate  -------------------
        v_nmf_error[i] = np.linalg.norm(v_origin - np.dot(w, h_result))**2
        # w evaluate  -------------------
        if t_flag:
            d = np.linalg.lstsq(h.T, w_origin)[0]
            w_nmf_error[i] = np.linalg.norm(w_origin - np.dot(h.T, d))**2
        else:
            d = np.linalg.lstsq(w, w_origin)[0]
            w_nmf_error[i] = np.linalg.norm(w_origin - np.dot(w, d))**2

        if (i == 0) | (i % 100 == 99):
            print("NMF ( r=" + str(r_size) + "  k=" + str(ap_size) + " ) : " +
                  str(i + 1) + " times update")

    # else:
    #     for i in range(0, ite):
    #         if c_mode != 2:
    #             w, h = ff.update(v_origin, w, h, c_mode)
    #         else:
    #             w, h, theta_w, theta_h = ff.fgd_update(v_origin, w, h, theta_w, theta_h)
    #
    #         # v evaluate  -------------------
    #         v_nmf_error[i] = np.linalg.norm(v_origin - np.dot(w, h)) ** 2
    #         # w evaluate  -------------------
    #         d = np.linalg.lstsq(w, w_origin)[0]
    #         w_nmf_error[i] = np.linalg.norm(w_origin - np.dot(w, d)) ** 2
    #
    #         if (i == 0) | (i % 2000 == 1999):
    #             print("NMF ( r=" + str(r_size) + "  k=" + str(ap_size) + " ) : " + str(i + 1) + " times update")

    # SNMF calculate  --------------------------------------------------------------------------------------------------
    if c_mode == 2:
        theta_w = theta_h = theta_start

    w_s = ff.generate_w(n_size, r_size, seeds[0], c_mode=c_mode)
    h_s = ff.generate_h(r_size, ap_size, seeds[1], c_mode=c_mode)

    for i in range(0, ite):
        if c_mode == 2:
            w_s, h_s, theta_w, theta_h = ff.fgd_update(v_s, w_s, h_s, theta_w,
                                                       theta_h)
        else:
            w_s, h_s = ff.update(v_s, w_s, h_s, c_mode)

        # v evaluate  -------------------
        os_M = ff.calculate_h(v_origin, w_s, print_interim=False)
        v_snmf_error[i] = np.linalg.norm(v_origin - np.dot(w_s, os_M))**2
        # w evaluate  -------------------
        if t_flag:
            d_s = np.linalg.lstsq(os_M.T, w_origin)[0]
            w_snmf_error[i] = np.linalg.norm(w_origin - np.dot(os_M.T, d_s))**2
        else:
            d_s = np.linalg.lstsq(w_s, w_origin)[0]
            w_snmf_error[i] = np.linalg.norm(w_origin - np.dot(w_s, d_s))**2

        if (i == 0) | (i % 100 == 99):
            print(
                "SketchingNMF ( r={}  k={} seed={} ) : {} times update".format(
                    r_size, ap_size, wh_seed, i + 1))

    if t_flag:
        return v_nmf_error, v_snmf_error, w_nmf_error, w_snmf_error, h.T, w.T, h_s.T, w_s.T, os_M
    else:
        return v_nmf_error, v_snmf_error, w_nmf_error, w_snmf_error, w, h, w_s, h_s, os_M
def parallel_v_error_eval(r,
                          approximate_size,
                          v,
                          iteration,
                          wh_seed,
                          c_mode,
                          nmfqp,
                          t_flag,
                          snmf_only=False):
    theta_start = 5
    n, m = v.shape
    seeds = ff.two_seeds(wh_seed)
    nmf_error = np.zeros(iteration)
    snmf_error = np.zeros(iteration)
    w = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
    h = ff.generate_h(r, m, seeds[1], c_mode=c_mode)
    v_s = ff.uniform_sampling(v, approximate_size, seeds[0] + 1)

    # NMF  -------------------------------------------------------------------------------------------------------------
    theta_w = theta_h = theta_start
    if not snmf_only:
        if nmfqp:
            print("NMF matrix H is calculated by QP.")
            for i in range(0, iteration):
                if c_mode != 2:
                    w, h = ff.update(v, w, h, c_mode)
                else:
                    w, h, theta_w, theta_h = ff.fgd_update(
                        v, w, h, theta_w, theta_h)
                h_qp = ff.calculate_h(v, w, print_interim=False)
                nmf_error[i] = np.linalg.norm(v - np.dot(w, h_qp))**2
                if (i == 0) | (i % 100 == 99):
                    print("NMF ( r=" + str(r) + "  k=" +
                          str(approximate_size) + " ) : " + str(i + 1) +
                          " times update")
            h = h_qp

        else:
            for i in range(0, iteration):
                if c_mode != 2:
                    w, h = ff.update(v, w, h, c_mode)
                else:
                    w, h, theta_w, theta_h = ff.fgd_update(
                        v, w, h, theta_w, theta_h)
                nmf_error[i] = np.linalg.norm(v - np.dot(w, h))**2
                if (i == 0) | (i % 2000 == 1999):
                    print("NMF ( r=" + str(r) + "  k=" +
                          str(approximate_size) + " ) : " + str(i + 1) +
                          " times update")

    # SNMF  ------------------------------------------------------------------------------------------------------------
    w_s = ff.generate_w(n, r, seeds[0], c_mode=c_mode)
    h_s = ff.generate_h(r, approximate_size, seeds[1], c_mode=c_mode)
    for i in range(0, iteration):
        if c_mode != 2:
            w_s, h_s = ff.update(v_s, w_s, h_s, c_mode)
        else:
            w_s, h_s, theta_w, theta_h = ff.fgd_update(v_s, w_s, h_s, theta_w,
                                                       theta_h)
        h_os = ff.calculate_h(v, w_s, print_interim=False)
        snmf_error[i] = np.linalg.norm(v - np.dot(w_s, h_os))**2
        if (i == 0) | (i % 100 == 99):
            print(
                "SketchingNMF ( r={}  k={} seed={} ) : {} times update".format(
                    r, approximate_size, wh_seed, i + 1))
    if t_flag & snmf_only:
        return nmf_error, snmf_error, None, None, h_os.T, w_s.T, h_s.T
    elif t_flag:
        return nmf_error, snmf_error, h.T, w.T, h_os.T, w_s.T, h_s.T
    elif snmf_only:
        return nmf_error, snmf_error, None, None, w_s, h_os, h_s
    else:
        return nmf_error, snmf_error, w, h, w_s, h_os, h_s