예제 #1
0
    def __init__(self, n_components=2, reg_param=0.1):
        GCCA.__init__(self, n_components, reg_param)

        self.cca1 = CCA(self.n_components, self.reg_param)
        self.cca2 = CCA(self.n_components, self.reg_param)

        self.z_list = []
예제 #2
0
    def train(epoch, config):
        print('==> Epoch: {}'.format(epoch))
        model.train()
        proj_k = config['proj_k']
        train_loss = 0
        corr_before = 0
        corr_after = 0

        with trange(len(train_loader)) as t:
            for i, (v1, v2, label, v_len, t_len) in enumerate(train_loader):
                v1, v2, label, v_len, t_len = v1.to(device), v2.to(
                    device), label.to(device), v_len.to(device), t_len.to(
                        device)
                before_prox_1, before_prox_2, after_prox_1, after_prox_2, log_prob = model(
                    v1, v2, config['alpha'])
                corr = CCA.apply(before_prox_1.view(-1, proj_k),
                                 before_prox_2.view(-1, proj_k), config)
                loss = criteria(log_prob, label, v_len,
                                t_len) - corr / float(proj_k)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                corr_before += corr.item()
                corr_prox = CCA.apply(after_prox_1.view(-1, proj_k),
                                      after_prox_2.view(-1, proj_k), config)
                corr_after += corr_prox.item()

                metrics = {
                    'loss':
                    '{:.3f}'.format(train_loss / (i + 1)),
                    'corr':
                    '{:.3f}/{:.3f}'.format(corr_before / (i + 1),
                                           corr_after / (i + 1)),
                    # 'per': '{:.3f}({}/{})'.format((err_phone/total_phone)*100, err_phone, total_phone)
                }

                t.set_postfix(metrics)
                t.update()
        lr_scheduler.step()
        logging.info('train: epoch {}, loss {:.3f}, corr {:.3f}/{:.3f}'.format(
            epoch, train_loss / (i + 1), corr_before / (i + 1),
            corr_after / (i + 1)))
예제 #3
0
def myKCCA(x, y, kernel, dim, degree, epsilon, gamma, coef0, n_jobs):

    kx = pairwise_kernels(x,
                          Y=None,
                          metric=kernel,
                          filter_params=True,
                          n_jobs=n_jobs,
                          degree=degree,
                          gamma=gamma,
                          coef0=coef0)
    ky = pairwise_kernels(y,
                          Y=None,
                          metric=kernel,
                          filter_params=True,
                          n_jobs=n_jobs,
                          degree=degree,
                          gamma=gamma,
                          coef0=coef0)
    print("my kx is", kx[0][:3])
    wx, wy, r = CCA(kx, ky, dim=dim)

    return wx, wy, r
예제 #4
0
train_data = pd.DataFrame(dataset)
train_data.columns = ["intensity", "last", "time", "status"]

data = train_data.values

intensity = data[:, 0]
last = data[:, 1]

status = data[:, 2:]

intensity = intensity.reshape(-1, 1)
last = last.reshape(-1, 1)

res = []

cca = CCA()
cca.fit(intensity, status)
cca.transform(intensity, status)
cca.ptransform(intensity, status)
col = cca.calc_correlations()
res.append(col)

cca.fit(last, status)
cca.transform(last, status)
cca.ptransform(last, status)
col = cca.calc_correlations()
res.append(col)

with open('./result/predictResult.txt', 'w', encoding='UTF-8') as f:
    s = ''
    x = res[0]
예제 #5
0
def train(iter_funcs, dataset, train_batch_iter, valid_batch_iter, fit_cca):
    """
    Train the model with `dataset` with mini-batch training.
    Each mini-batch has `batch_size` recordings.
    """
    import time
    import sys

    for epoch in itertools.count(1):

        # iterate train batches
        batch_train_evals = []
        batch_train_losses = []
        iterator = train_batch_iter(dataset['train'])
        generator = threaded_generator_from_iterator(iterator)

        batch_times = np.zeros(5, dtype=np.float32)
        start, after = time.time(), time.time()
        for i_batch, train_input in enumerate(generator):

            batch_res = iter_funcs['train'](*train_input)
            # try:
            #     batch_res = iter_funcs['train'](*train_input)
            # except:
            #     batch_res[0] = np.nan

            batch_train_losses.append(batch_res[0])
            if len(batch_res) > 1:
                batch_train_evals.append(batch_res[1])

            # compute timing
            batch_time = time.time() - after
            after = time.time()
            train_time = (after - start)

            # estimate updates per second (running avg)
            batch_times[0:4] = batch_times[1:5]
            batch_times[4] = batch_time
            ups = 1.0 / batch_times.mean()

            # report loss during training
            perc = 100 * (float(i_batch + 1) / train_batch_iter.n_batches)
            dec = int(perc // 4)
            progbar = "|" + dec * "#" + (25 - dec) * "-" + "|"
            vals = (perc, progbar, train_time, ups, np.mean(batch_train_losses))
            loss_str = " (%d%%) %s time: %.2fs, ups: %.2f, loss: %.5f" % vals
            print(col.print_colored(loss_str, col.WARNING), end="\r")
            sys.stdout.flush()

        # compute network output on train set
        n_valid_cca = np.min([1000, dataset['valid'].shape[0]])
        V1_tr, V2_tr = None, None
        batch_iter_copy = copy.copy(train_batch_iter)
        batch_iter_copy.epoch_counter = 0
        iterator = batch_iter_copy(dataset['train'])
        generator = threaded_generator_from_iterator(iterator)
        for i_batch, train_input in enumerate(generator):

            if V1_tr is None or V1_tr.shape[0] < n_valid_cca:
                X_o, Z_o = iter_funcs['compute_output'](*train_input)
                V1_tr = X_o if V1_tr is None else np.vstack([V1_tr, X_o])
                V2_tr = Z_o if V2_tr is None else np.vstack([V2_tr, Z_o])

        # fit canonical correlation analysis
        if fit_cca:
            cca = CCA(method='svd')
            cca.fit(V1_tr, V2_tr, verbose=False)
            lv1_cca = cca.transform_V1(V1_tr)
            lv2_cca = cca.transform_V2(V2_tr)
        else:
            lv1_cca = V1_tr
            lv2_cca = V2_tr

        # evaluate retrieval on train set
        mean_rank_tr, med_rank_tr, dist_tr, hit_rates, map_tr = eval_retrieval(lv1_cca, lv2_cca)
        mean_rank_tr = 1.0 - float(hit_rates[10]) / len(lv1_cca)

        print("\x1b[K", end="\r")
        print(' ')
        avg_train_loss = np.mean(batch_train_losses)
        if len(batch_train_evals) > 0:
            batch_train_evals = np.asarray(batch_train_evals).mean(axis=0)
        else:
            batch_train_evals = None

        # evaluate classification power of data set

        # iterate validation batches
        V1_va, V2_va = None, None
        batch_valid_losses = []
        iterator = valid_batch_iter(dataset['valid'])
        generator = threaded_generator_from_iterator(iterator)
        for train_input in generator:
            batch_res = iter_funcs['valid'](*train_input)
            batch_valid_losses.append(batch_res[0])

            # compute network output
            if V1_va is None or V1_va.shape[0] < n_valid_cca:
                X_o, Z_o = iter_funcs['compute_output'](*train_input)
                V1_va = X_o if V1_va is None else np.vstack([V1_va, X_o])
                V2_va = Z_o if V2_va is None else np.vstack([V2_va, Z_o])

        avg_valid_loss = np.mean(batch_valid_losses)

        # compute distance on validation set
        if fit_cca:
            lv1_cca = cca.transform_V1(V1_va)
            lv2_cca = cca.transform_V2(V2_va)
        else:
            lv1_cca = V1_va
            lv2_cca = V2_va

        # evaluate retrieval on validation set
        mean_rank_va, med_rank_va, dist_va, hit_rates, map_va = eval_retrieval(lv1_cca, lv2_cca)
        mean_rank_va = 1.0 - float(hit_rates[10]) / 1000

        # collect results
        yield {
            'number': epoch,
            'train_loss': avg_train_loss,
            'valid_loss': avg_valid_loss,
            'mean_cos_dist_tr': dist_tr,
            'mean_cos_dist_va': dist_va,
            'mean_rank_tr': mean_rank_tr,
            'mean_rank_va': mean_rank_va,
            'med_rank_tr': med_rank_tr,
            'med_rank_va': med_rank_va,
            'map_tr': map_tr,
            'map_va': map_va,
            'evals_tr': batch_train_evals,
        }
예제 #6
0
class HierarchicalCCA(GCCA):

    def __init__(self, n_components=2, reg_param=0.1):
        GCCA.__init__(self, n_components, reg_param)

        self.cca1 = CCA(self.n_components, self.reg_param)
        self.cca2 = CCA(self.n_components, self.reg_param)

        self.z_list = []

    def fit(self, x0, x1, x2):

        self.data_num = 3

        # 1
        self.cca1.fit(x0, x1)
        self.cca1.transform(x0, x1)

        # 2
        z0 = self.cca1.z_list[0]
        z1 = self.cca1.z_list[1]
        z_all = np.vstack([z0, z1])
        x_dup = np.vstack([x2, x2])
        self.cca2.fit(z_all, x_dup)


    def transform(self, x0, x1, x2):

        # 1
        self.cca1.transform(x0, x1)

        # 2
        z0 = self.cca1.z_list[0]
        z1 = self.cca1.z_list[1]
        z_all = np.vstack([z0, z1])
        x_dup = np.vstack([x2, x2])
        self.cca2.transform(z_all, x_dup)
        w_all, w2_dup = self.cca2.z_list
        w0 = w_all[:z0.shape[0]]
        w1 = w_all[z0.shape[0]:]
        w2 = w2_dup[:x2.shape[0]]

        self.z_list = [w0, w1, w2]

    def save_params(self, filepath):
        self.logger.info("saving hierarchical cca to %s", filepath)
        with h5py.File(filepath, 'w') as f:
            f.create_dataset("n_components", data=self.n_components)
            f.create_dataset("reg_param", data=self.reg_param)
            f.create_dataset("data_num_all", data=self.data_num)
            f.create_dataset("data_num1", data=self.cca1.data_num)
            f.create_dataset("data_num2", data=self.cca2.data_num)

            cov_grp1 = f.create_group("cov_mat1")
            for i, row in enumerate(self.cca1.cov_mat):
                for j, cov in enumerate(row):
                    cov_grp1.create_dataset(str(i) + "_" + str(j), data=cov)

            cov_grp2 = f.create_group("cov_mat2")
            for i, row in enumerate(self.cca2.cov_mat):
                for j, cov in enumerate(row):
                    cov_grp2.create_dataset(str(i) + "_" + str(j), data=cov)

            h_grp1 = f.create_group("h_list1")
            for i, h in enumerate(self.cca1.h_list):
                h_grp1.create_dataset(str(i), data=h)

            h_grp2 = f.create_group("h_list2")
            for i, h in enumerate(self.cca2.h_list):
                h_grp2.create_dataset(str(i), data=h)

            f.create_dataset("eig_vals1", data=self.cca1.eigvals)
            f.create_dataset("eig_vals2", data=self.cca2.eigvals)

            if len(self.cca1.z_list) != 0:
                z_grp1 = f.create_group("z_list1")
                for i, z in enumerate(self.z_list):
                    z_grp1.create_dataset(str(i), data=z)

            if len(self.cca2.z_list) != 0:
                z_grp2 = f.create_group("z_list2")
                for i, z in enumerate(self.cca2.z_list):
                    z_grp2.create_dataset(str(i), data=z)


            if len(self.z_list) != 0:
                z_grp3 = f.create_group("z_list_all")
                for i, z in enumerate(self.z_list):
                    z_grp3.create_dataset(str(i), data=z)

            f.flush()

    def load_params(self, filepath):
        self.logger.info("loading hierarchical cca from %s", filepath)
        with h5py.File(filepath, "r") as f:
            self.n_components = f["n_components"].value
            self.reg_param = f["reg_param"].value
            self.cca1.n_components = self.n_components
            self.cca1.reg_param = self.reg_param
            self.data_num = f["data_num_all"].value
            self.cca1.data_num = f["data_num1"].value
            self.cca2.data_num = f["data_num2"].value

            self.cca1.cov_mat = [[np.array([]) for col in range(self.cca1.data_num)] for row in range(self.cca1.data_num)]
            self.cca2.cov_mat = [[np.array([]) for col in range(self.cca2.data_num)] for row in range(self.cca2.data_num)]

            for i in xrange(self.cca1.data_num):
                for j in xrange(self.cca1.data_num):
                    self.cca1.cov_mat[i][j] = f["cov_mat1/" + str(i) + "_" + str(j)]

            for i in xrange(self.cca2.data_num):
                for j in xrange(self.cca2.data_num):
                    self.cca2.cov_mat[i][j] = f["cov_mat2/" + str(i) + "_" + str(j)]

            self.cca1.h_list = [None] * self.data_num
            for i in xrange(self.cca1.data_num):
                self.cca1.h_list[i] = f["h_list1/" + str(i)].value
            self.cca2.h_list = [None] * self.data_num
            for i in xrange(self.cca2.data_num):
                self.cca2.h_list[i] = f["h_list2/" + str(i)].value
            self.cca1.eig_vals = f["eig_vals1"].value
            self.cca2.eig_vals = f["eig_vals2"].value

            if "z_list1" in f:
                self.cca1.z_list = [None] * self.cca2.data_num
                for i in xrange(self.cca1.data_num):
                    self.cca1.z_list[i] = f["z_list1/" + str(i)].value

            if "z_list2" in f:
                self.cca2.z_list = [None] * self.cca2.data_num
                for i in xrange(self.cca2.data_num):
                    self.cca2.z_list[i] = f["z_list2/" + str(i)].value

            if "z_list_all" in f:
                self.z_list = [None] * self.data_num
                for i in xrange(self.data_num):
                    self.z_list[i] = f["z_list_all/" + str(i)].value

            f.flush()
예제 #7
0
    def test(epoch, config):
        global best_per
        model.eval()
        proj_k = config['proj_k']
        test_loss = 0
        corr_before = 0
        corr_after = 0
        total_phone = 0
        err_phone = 0
        with torch.no_grad():
            with trange(len(test_loader)) as t:
                for i, (v1, v2, label, v_len, t_len) in enumerate(test_loader):
                    v1, v2, label, v_len, t_len = v1.to(device), v2.to(
                        device), label.to(device), v_len.to(device), t_len.to(
                            device)
                    before_prox_1, before_prox_2, after_prox_1, after_prox_2, log_prob = model(
                        v1, v2, config['alpha'])
                    corr = CCA.apply(before_prox_1.view(-1, proj_k),
                                     before_prox_2.view(-1, proj_k), config)
                    loss = criteria(log_prob, label, v_len,
                                    t_len) - corr / float(proj_k)

                    test_loss += loss.item()
                    corr_before += corr.item()
                    corr_prox = CCA.apply(after_prox_1.view(-1, proj_k),
                                          after_prox_2.view(-1, proj_k),
                                          config)
                    corr_after += corr_prox.item()
                    tp, ep = phone_error(log_prob.cpu(),
                                         label.cpu(),
                                         t_len.cpu(),
                                         blank=config['blank'])
                    total_phone += tp
                    err_phone += ep

                    metrics = {
                        'loss':
                        '{:.3f}'.format(test_loss / (i + 1)),
                        'corr':
                        '{:.3f}/{:.3f}'.format(corr_before / (i + 1),
                                               corr_after / (i + 1)),
                        'per':
                        '{:.3f}({}/{})'.format((err_phone / total_phone) * 100,
                                               err_phone, total_phone)
                    }
                    t.set_postfix(metrics)
                    t.update()
        per = (err_phone / total_phone) * 100
        logging.info(
            'test: epoch {}, loss {:.3f}, corr {:.3f}/{:.3f}, per {:.3f}({}/{})'
            .format(epoch, test_loss / (i + 1), corr_before / (i + 1),
                    corr_after / (i + 1), per, err_phone, total_phone))
        if per < best_per:
            print('Saving..')
            state = {
                'model': model.state_dict(),
                'per': per,
                'epoch': epoch,
            }
            if not os.path.isdir('checkpoint'):
                os.mkdir('checkpoint')
            torch.save(state,
                       './checkpoint/{}_{}.ckpt'.format(args.ckpt, args.noisy))
            best_per = per