Ejemplo n.º 1
0
 def densration_gridsearch(self, X_ref, X_test):
     lambda_range = 10**np.linspace(
         -3, 3, 7)  # np.array([10**-3, 10**-2, 10**-1, 10**0, 10**1])
     sigma_range = 10**np.linspace(
         -3, 3, 25
     )  # np.array([10**-3, 10**-2, 10**-1, 10**0, 10**1, 10**2, 10**3])
     estimator_1 = densratio(X_ref,
                             X_test,
                             self.alpha,
                             sigma_range,
                             lambda_range,
                             self.kernel_num,
                             verbose=False)
     estimator_2 = densratio(X_test,
                             X_ref,
                             self.alpha,
                             sigma_range,
                             lambda_range,
                             self.kernel_num,
                             verbose=False)
     w1_ref = estimator_1.compute_density_ratio(X_ref)
     w2_test = estimator_2.compute_density_ratio(X_test)
     score_max = (0.5 * np.mean(w1_ref) - 0.5) + (0.5 * np.mean(w2_test) -
                                                  0.5)
     return score_max
Ejemplo n.º 2
0
def estimate_hyperparameters(packed_sequence,
                             window_size=50,
                             alpha=0.1,
                             sigma_range=None,
                             lambda_range=None,
                             num_samples=50,
                             num_rank=2):

    sigmas_forward = np.zeros(num_samples)
    sigmas_backward = np.zeros(num_samples)
    lambdas_forward = np.zeros(num_samples)
    lambdas_backward = np.zeros(num_samples)

    print 'Sampling for hyperparameter estimation:...'

    for iteration in range(num_samples):
        i = np.random.randint(low=window_size,
                              high=packed_sequence.shape[0] - window_size)

        backward_window = packed_sequence[i - window_size:i]
        forward_window = packed_sequence[i:i + window_size]

        ratio_forward_obj = densratio(backward_window,
                                      forward_window,
                                      alpha=alpha,
                                      sigma_range=sigma_range,
                                      lambda_range=lambda_range,
                                      verbose=False)
        ratio_backward_obj = densratio(forward_window,
                                       backward_window,
                                       alpha=alpha,
                                       sigma_range=sigma_range,
                                       lambda_range=lambda_range,
                                       verbose=False)

        sigmas_forward[iteration] = ratio_forward_obj.kernel_info.sigma
        lambdas_forward[iteration] = ratio_forward_obj.lambda_

        sigmas_backward[iteration] = ratio_backward_obj.kernel_info.sigma
        lambdas_backward[iteration] = ratio_backward_obj.lambda_

        print 'Iteration', iteration, 'complete.'

    print 'Sampling for hyperparameter estimation complete.'

    return get_top_counts(sigmas_forward, num_rank), \
           get_top_counts(sigmas_backward, num_rank), \
           get_top_counts(lambdas_forward, num_rank), \
           get_top_counts(lambdas_backward, num_rank)
Ejemplo n.º 3
0
def rulsif(ts, window_size=50, threshold=.1):
    from densratio import densratio

    def make_window(arr, win, jump):
        return np.array(
            [arr[i:i + win] for i in range(0,
                                           len(arr) - win + 1, jump)])

    ts = (ts - ts.min(axis=0)) / (ts.max(axis=0) - ts.min(axis=0))
    all_ratios = []
    rolling_window = np.array(make_window(ts, window_size, window_size))
    for win1, win2 in zip(rolling_window[:-1], rolling_window[1:]):
        concat = np.concatenate([win1, win2])
        med = np.nanmedian(concat)
        sigma_list = med * np.array([.6, .8, 1.0, 1.2, 1.4])
        lambda_list = [10e-3, 10e-2, 10e-1, 1, 10]
        ratio = densratio(win1,
                          win2,
                          alpha=0.01,
                          lambda_range=lambda_list,
                          sigma_range=sigma_list,
                          verbose=False)
        all_ratios.append(ratio.alpha_PE)

    preds = _find_bp_rulsif(ts, all_ratios, threshold, window_size)
    return (preds.tolist(), None)
Ejemplo n.º 4
0
    def __init__(self, args):
        super().__init__(1, "MSSynthetic1D")
        np.random.seed(args.seed)

        self.num_sources = args.num_sources
        self.num_data = args.num_data
        source_mus = (np.random.rand(self.num_sources) *
                      (args.source_mu_bound * 2.0)) - args.source_mu_bound
        target_mu = (np.random.rand() * args.target_mu_bound *
                     2.0) - args.target_mu_bound
        self.mus = np.append(source_mus, target_mu)
        assert len(self.mus) == self.num_sources + 1
        self.sigma = args.sigma

        self.estimator_type = args.estimator_type
        self.source_ind = np.random.randint(0,
                                            self.num_sources)  # used for naive

        self.sources = [
            create_one_task(self.mus[s], args.coef_a, args.coef_b, self.sigma,
                            args.num_data) for s in range(self.num_sources)
        ]
        self.target = create_one_task(
            self.mus[self.num_sources],
            args.coef_a,
            args.coef_b,
            args.sigma,
            args.num_data,
        )

        print(f"target_mu:{target_mu}, source_mus:{source_mus}")
        # for unbiased or vr
        self.density_ratios = None
        if self.estimator_type == "unbiased" or self.estimator_type == "vr":
            hp_search_range = [0.1] if args.debug else [0.001, 0.01, 0.1, 1.0]
            self.density_ratios = [
                densratio(
                    self.target["X"],
                    self.sources[s]["X"],
                    alpha=0.0,
                    sigma_range=hp_search_range,
                    lambda_range=hp_search_range,
                ) for s in range(self.num_sources)
            ]

        # check consistency for mu and lambda
        if self.estimator_type == "vr":
            self.log = dict()
            self.log["target_mu"] = target_mu
            self.log["source1_mu"] = self.mus[0]
            self.log["source2_mu"] = self.mus[1]
            self.log["abs_source1_mu"] = abs(target_mu - self.mus[0])
            self.log["abs_source2_mu"] = abs(target_mu - self.mus[1])
            self.log["lambda1"] = []
            self.log["lambda2"] = []
        else:
            self.log = None
Ejemplo n.º 5
0
def main(filename, dataset_filename, alpha, seed):

    (X_train, y_train), (X_test, y_test) = load_hdf5(dataset_filename)

    densratio_obj = densratio(X_test, X_train, alpha=alpha)
    importance_weights = densratio_obj.compute_density_ratio(X_train)

    with h5py.File(filename, 'w') as f:
        f.create_dataset("importance_weights", data=importance_weights)

    return 0
def density_ratio_estimation(train_data, test_data):
    result = densratio(train_data, test_data)
    sample_weight = result.compute_density_ratio(train_data)

    return sample_weight
def experiment():
    percent_p = 3
    path = r'C:\Users\yyveggie\Desktop\UCI\Conversion\mushroom.csv'
    seed = 2019

    est_error_upu = []
    est_binary_upu = []
    est_binary_pusb = []
    est_error_pusb = []
    est_error_drsb = []
    est_binary_drsb = []

    for k in range(10):
        np.random.seed(seed)
        pi = 0.6        # 类先验,表示U中P所占比例,现在的问题是,是否需要根据下文中比例进行计算,而不是提前随机指定
        classifier = LogisticRegression(C=0.01, penalty='l2', solver='liblinear')
        texts_1, texts_0 = CSV(path)
        texts_1 = np.array_split(texts_1, 10)  # 将类别为1的样本集分成十份
        texts_0 = np.array_split(texts_0, 10)  # 将类别为0的样本集分成十份
        x_test = np.array(list(texts_1[k]) + list(texts_0[k]))  # 测试集x,每一份每轮选一次
        t_test = np.array(list(len(texts_1[k]) * [1]) + list(len(texts_0[k]) * [0]))  # 测试集y,正例为1,负例为0

        index_rest = sorted(set(range(10)) - set([k]))  # 除了测试集剩下的索引
        texts_1 = np.array(texts_1)
        texts_0 = np.array(texts_0)
        texts_1 = np.array([j for i in texts_1[index_rest] for j in i])     # p
        texts_0 = np.array([j for i in texts_0[index_rest] for j in i])     # n
        x = np.vstack((texts_1, texts_0))   # p和n组成训练集
        one = np.ones((len(x), 1))  #
        x_pn = np.concatenate([x, one], axis=1)
        t = pd.Series([1] * len(texts_1) + [0] * len(texts_0))
        classifier.fit(x_pn, t)

        x_train = x
        t_train = t

        xp = x_train[t_train == 1]
        one = np.ones((len(xp), 1))
        xp_temp = np.concatenate([xp, one], axis=1)
        xp_prob = classifier.predict_proba(xp_temp)[:, 1]
        # xp_prob /= np.mean(xp_prob)
        xp_prob = xp_prob ** 20
        xp_prob /= np.max(xp_prob)
        rand = np.random.uniform(size=len(xp))
        temp = xp[xp_prob > rand]
        pdata = int(percent_p / 10 * len(x))  # p样本数量,占了总数的3/10
        while (len(temp) < pdata):
            rand = np.random.uniform(size=len(xp))
            temp = np.concatenate([temp, xp[xp_prob > rand]], axis=0)
        xp = temp
        perm = np.random.permutation(len(xp))
        xp = xp[perm[:pdata]]
        u = int(6 / 10 * len(x))  # u样本数量,占了总数的6/10
        updata = np.int(u * pi)   # U中P的数量 = U的数量 * 类先验
        undata = u - updata       # U中N的数量 = U的数量 - U中P的数量

        xp_temp = x_train[t_train == 1]
        xn_temp = x_train[t_train == 0]
        perm = np.random.permutation(len(xp_temp))
        xp_temp = xp_temp[perm[:updata]]

        perm = np.random.permutation(len(xn_temp))
        xn_temp = xn_temp[perm[:undata]]
        xu = np.concatenate([xp_temp, xn_temp], axis=0)

        x = np.concatenate([xp, xu], axis=0)

        tp = np.ones(len(xp))
        tu = np.zeros(len(xu))
        t = np.concatenate([tp, tu], axis=0)

        updata = np.int(1000 * pi)
        undata = 1000 - updata

        xp_test = x_test[t_test == 1]
        perm = np.random.permutation(len(xp_test))
        xp_test = xp_test[perm[:updata]]
        xn_test = x_test[t_test == 0]
        perm = np.random.permutation(len(xn_test))
        xn_test = xn_test[perm[:undata]]

        x_test = np.concatenate([xp_test, xn_test], axis=0)
        tp = np.ones(len(xp_test))
        tu = np.zeros(len(xn_test))
        t_test = np.concatenate([tp, tu], axis=0)

        pu = PU(pi=pi)
        x_train = x
        res, x_test_kernel = pu.optimize(x, t, x_test)
        acc1, f1_binary1 = pu.test(x_test_kernel, res, t_test, quant=False)
        acc2, f1_binary2 = pu.test(x_test_kernel, res, t_test, quant=True, pi=pi)

        result = densratio(x_train[t == 1], x_train[t == 0])
        r = result.compute_density_ratio(x_test)
        temp = np.copy(r)
        temp = np.sort(temp)
        theta = temp[np.int(np.floor(len(x_test) * (1 - pi)))]
        pred = np.zeros(len(x_test))
        pred[r > theta] = 1
        acc3 = np.mean(pred == t_test)
        f1_binary3 = f1_score(t_test, pred, average='binary')

        est_error_upu.append(acc1)
        est_binary_upu.append(f1_binary1)
        est_error_pusb.append(acc2)
        est_binary_pusb.append(f1_binary2)
        est_error_drsb.append(acc3)
        est_binary_drsb.append(f1_binary3)

        seed += 1

        print("Iter:", k)
        print("upu_accuracy ", acc1)
        print("upu_f1_binary ", f1_binary1)
        print("pusb_accuracy ", acc2)
        print("pusb_f1_binary ", f1_binary2)
        print("drsb_accuracy ", acc3)
        print("drsb_f1_binary ", f1_binary3)

    print("Accuracy for uPU:", np.mean(est_error_upu))
    print("F1-Score for uPU:", np.mean(est_binary_upu))
    print("Accuracy for PUSB:", np.mean(est_error_pusb))
    print("F1-Score for PUSB:", np.mean(est_binary_pusb))
    print("Accuracy for DRSB:", np.mean(est_error_drsb))
    print("F1-Score for DRSB:", np.mean(est_binary_drsb))
Ejemplo n.º 8
0
    dim_image=1 
else:
    dataset = np.load('../input_data/cifar_dataset.npz')
    size_image=32
    dim_image=3
#size_image=28
#dim_image=1

Xtr = dataset ['Xtr'].astype('float64')
Str = dataset ['Str'].ravel()

eps=np.finfo(np.float64).eps
Xtr2=Xtr/255
Xtr2[Xtr2==0]=eps
Xtr2[Xtr2==1]=1-eps
Xtr3=np.log(Xtr2/(1-Xtr2))
Xtr=Xtr3

PY1=sum(Str)/Str.shape
PY0=1-PY1
XY1=Xtr[Str==1,:]
XY0=Xtr[Str==0,:]
XY1oX=densratio(XY1,Xtr)
XY1oXV=min(XY1oX.compute_density_ratio(Xtr))
Y1X=XY1oXV*PY1
print(Y1X)

XY0oX=densratio(XY0,Xtr)
XY0oXV=min(XY1oX.compute_density_ratio(Xtr))
Y0X=XY0oXV*PY0
print(Y0X)
Ejemplo n.º 9
0
    def __init__(self, args):
        super().__init__(dim=args.dim, name="Parkinson")

        # check requirements
        assert args.target_name is not None, "target_name is None."
        if is_our_estimator(args.estimator_type):
            assert args.ratio_dre is not None, "DRE ratio is None."
            assert (args.is_separate_source_dens
                    is not None), "is_separate_source_dens is None."

        np.random.seed(args.seed)
        self.space = None

        self.estimator_type = args.estimator_type
        (
            self.sources_density,
            self.sources_train,
            self.sources_val,
            self.target_opt,
            self.target_val,
        ) = load_dataset(
            self.estimator_type,
            args.data_dir,
            args.target_name,
            args.ratio_validation,
            args.ratio_dre,
            args.is_separate_source_dens,
        )
        self.source_num = len(self.sources_train)

        if self.estimator_type == "naive":
            self.source_naive_ind = np.random.randint(0,
                                                      len(self.sources_train))
            self.source_name_naive = self.sources_train[
                self.source_naive_ind]["filename"]
            self.is_source_concat_for_naive = args.is_source_concat_for_naive
        else:
            self.source_naive_ind = None
            self.source_name_naive = None
            self.is_source_concat_for_naive = None

        self.density_ratios = None
        if is_our_estimator(args.estimator_type):
            if args.debug:
                self.density_ratios = [
                    densratio(
                        self.target_opt["X"],
                        self.sources_density[s]["X"],
                        alpha=args.alpha,
                        sigma_range=[1.0],
                        lambda_range=[0.001],
                    ) for s in range(len(self.sources_density))
                ]
            else:
                hp_search_range = [1e-3, 1e-2, 1e-1, 1e-0]
                self.density_ratios = [
                    densratio(
                        self.target_opt["X"],
                        self.sources_density[s]["X"],
                        alpha=args.alpha,
                        sigma_range=hp_search_range,
                        lambda_range=hp_search_range,
                    ) for s in range(len(self.sources_density))
                ]

        self.all_source_train_data = self.concat_all_sources_train()
        self.all_source_val_data = self.concat_all_sources_val()

        self.model = get_model(args.model)
Ejemplo n.º 10
0
def experiment(datatype, udata):
    priors = [0.2, 0.4, 0.6, 0.8]
    ite = 100
    pdata = 400
    num_basis = 300

    seed = 2018

    est_error_pu = np.zeros((len(udata), len(priors), ite))
    est_error_pubp = np.zeros((len(udata), len(priors), ite))
    est_error_dr = np.zeros((len(udata), len(priors), ite))

    for i in range(len(udata)):
        u = udata[i]
        for j in range(len(priors)):
            pi = priors[j]
            for k in range(ite):
                np.random.seed(seed)
                #PN classification
                x, t = make_data(datatype=datatype)
                x = x / np.max(x, axis=0)
                one = np.ones((len(x), 1))
                x_pn = np.concatenate([x, one], axis=1)
                classifier = LogisticRegression(C=0.01, penalty='l2')
                classifier.fit(x_pn, t)

                perm = np.random.permutation(len(x))
                x_train = x[perm[:-3000]]
                t_train = t[perm[:-3000]]
                x_test = x[perm[-3000:]]
                t_test = t[perm[-3000:]]

                xp = x_train[t_train == 1]
                one = np.ones((len(xp), 1))
                xp_temp = np.concatenate([xp, one], axis=1)
                xp_prob = classifier.predict_proba(xp_temp)[:, 1]
                #xp_prob /= np.mean(xp_prob)
                xp_prob = xp_prob**20
                xp_prob /= np.max(xp_prob)
                rand = np.random.uniform(size=len(xp))
                temp = xp[xp_prob > rand]
                while (len(temp) < pdata):
                    rand = np.random.uniform(size=len(xp))
                    temp = np.concatenate([temp, xp[xp_prob > rand]], axis=0)
                xp = temp
                perm = np.random.permutation(len(xp))
                xp = xp[perm[:pdata]]
                updata = np.int(u * pi)
                undata = u - updata

                xp_temp = x_train[t_train == 1]
                xn_temp = x_train[t_train == 0]
                perm = np.random.permutation(len(xp_temp))
                xp_temp = xp_temp[perm[:updata]]

                perm = np.random.permutation(len(xn_temp))
                xn_temp = xn_temp[perm[:undata]]
                xu = np.concatenate([xp_temp, xn_temp], axis=0)

                x = np.concatenate([xp, xu], axis=0)

                tp = np.ones(len(xp))
                tu = np.zeros(len(xu))
                t = np.concatenate([tp, tu], axis=0)

                updata = np.int(1000 * pi)
                undata = 1000 - updata

                xp_test = x_test[t_test == 1]
                perm = np.random.permutation(len(xp_test))
                xp_test = xp_test[perm[:updata]]
                xn_test = x_test[t_test == 0]
                perm = np.random.permutation(len(xn_test))
                xn_test = xn_test[perm[:undata]]

                x_test = np.concatenate([xp_test, xn_test], axis=0)
                tp = np.ones(len(xp_test))
                tu = np.zeros(len(xn_test))
                t_test = np.concatenate([tp, tu], axis=0)

                pu = PU(pi=pi)
                x_train = x
                res, x_test_kernel = pu.optimize(x, t, x_test)
                acc1 = pu.test(x_test_kernel, res, t_test, quant=False)
                acc2 = pu.test(x_test_kernel, res, t_test, quant=True, pi=pi)

                result = densratio(x_train[t == 1], x_train[t == 0])
                r = result.compute_density_ratio(x_test)
                temp = np.copy(r)
                temp = np.sort(temp)
                theta = temp[np.int(np.floor(len(x_test) * (1 - pi)))]
                pred = np.zeros(len(x_test))
                pred[r > theta] = 1
                acc3 = np.mean(pred == t_test)

                est_error_pu[i, j, k] = acc1
                est_error_pubp[i, j, k] = acc2
                est_error_dr[i, j, k] = acc3

                seed += 1

                print(acc1)
                print(acc2)
                print(acc3)

    est_error_pu_mean = np.mean(est_error_pu, axis=2)
    est_error_pubp_mean = np.mean(est_error_pubp, axis=2)
    est_error_dr_mean = np.mean(est_error_dr, axis=2)
    est_error_pu_std = np.std(est_error_pu, axis=2)
    est_error_pubp_std = np.std(est_error_pubp, axis=2)
    est_error_dr_std = np.std(est_error_dr, axis=2)
    return est_error_pu_mean, est_error_pubp_mean, est_error_pu_std, est_error_pubp_std, est_error_dr_mean, est_error_dr_std
Ejemplo n.º 11
0
    def fit(self, X_top, X_bot, *args, **kwargs):

        self.densratio_obj = densratio(X_top, X_bot, alpha=self.alpha)
Ejemplo n.º 12
0
def main():
    ite = 10
    num_train_data = 2000
    num_test_data = 2000
    Net = NN

    model_num = 3

    learning_rate = 1e-4

    epoch = 200
    batchsize = 256

    seed = 2020

    for f_name_idx0 in range(len(file_names)):
        for f_name_idx1 in range(f_name_idx0 + 1, len(file_names)):
            train_loss_normal = np.zeros((ite, model_num))
            test_loss_normal = np.zeros((ite, model_num))
            auc_normal = np.zeros((ite, model_num))

            train_loss_kerulsif = np.zeros((ite, model_num))
            test_loss_kerulsif = np.zeros((ite, model_num))
            auc_kerulsif = np.zeros((ite, model_num))

            train_loss_kerkleip = np.zeros((ite, model_num))
            test_loss_kerkleip = np.zeros((ite, model_num))
            auc_kerkleip = np.zeros((ite, model_num))

            train_loss_pu = np.zeros((ite, model_num))
            test_loss_pu = np.zeros((ite, model_num))
            auc_pu = np.zeros((ite, model_num))

            train_loss_ulsif = np.zeros((ite, model_num))
            test_loss_ulsif = np.zeros((ite, model_num))
            auc_ulsif = np.zeros((ite, model_num))

            train_loss_nnpu = np.zeros((ite, model_num))
            test_loss_nnpu = np.zeros((ite, model_num))
            auc_nnpu = np.zeros((ite, model_num))

            train_loss_nnulsif = np.zeros((ite, model_num))
            test_loss_nnulsif = np.zeros((ite, model_num))
            auc_nnulsif = np.zeros((ite, model_num))

            f_name0 = file_names[f_name_idx0]
            f_name1 = file_names[f_name_idx1]

            for i in range(ite):
                np.random.seed(seed)

                if f_name0 != f_name1:

                    data0 = pd.read_csv('dataset/%s.csv' % f_name0)
                    data1 = pd.read_csv('dataset/%s.csv' % f_name1)

                    data0 = data0.dropna()
                    data1 = data1.dropna()

                    perm0 = np.random.permutation(len(data0))
                    perm1 = np.random.permutation(len(data1))

                    choice0 = np.zeros(len(data0))
                    choice0[perm0[:num_train_data]] = 1
                    data0['choice'] = choice0

                    choice1 = np.zeros(len(data1))
                    choice1[perm1[:num_test_data]] = 1
                    data1['choice'] = choice1

                    data0 = data0.get(['rating', 'text', 'item', 'choice'])
                    data1 = data1.get(['rating', 'text', 'item', 'choice'])

                    data = pd.concat([data0, data1])

                else:
                    data = pd.read_csv('dataset/%s.csv' % f_name0)

                    data = data.dropna()

                    perm = np.random.permutation(len(data))

                    choice = np.zeros(len(data))
                    choice[perm[:num_train_data + num_test_data]] = 1

                    data['choice'] = choice

                    print('N: ', len(data))

                text_data = data.text.values

                vectorizer = TfidfVectorizer(max_features=10000,
                                             min_df=0.0,
                                             max_df=0.8)
                #vectorizer = TfidfVectorizer(min_df=0.0, max_df=0.8)
                text_list_vec = vectorizer.fit_transform(text_data)

                #X = text_list_vec[data['choice'].values == 1].toarray()
                X = text_list_vec[data['choice'].values == 1].toarray()
                print(X.shape)

                pca = PCA(n_components=100)
                pca.fit(X)

                X_pca = pca.transform(X)

                rating0 = data[data['choice'].values ==
                               1].rating.values[:num_train_data]
                rating1 = data[data['choice'].values ==
                               1].rating.values[num_train_data:]

                X0 = X[:num_train_data]
                X1 = X[num_train_data:]

                X_pca0 = X_pca[:num_train_data]
                X_pca1 = X_pca[num_train_data:]

                result = densratio(
                    X_pca0,
                    X_pca1,
                    sigma_range=[0.01, 0.05, 0.1, 0.5, 1],
                    lambda_range=[0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1])

                dr0 = result.compute_density_ratio(X_pca0)

                kliep = DensityRatioEstimator()
                kliep.fit(X_pca0, X_pca1)
                #dr1 = np.ones(len(X_pca0))
                dr1 = kliep.predict(X_pca0)

                dim = X0.shape[1]

                device = torch.device(
                    'cuda' if torch.cuda.is_available() else 'cpu')
                #device = 'cpu'

                model = Net(dim).to(device)
                optimizer = optim.Adam(params=model.parameters(),
                                       lr=learning_rate,
                                       weight_decay=1e-5)

                model = train(X0,
                              X1,
                              epoch,
                              model,
                              optimizer,
                              device,
                              batchsize=batchsize,
                              method='PU')
                dr2 = test(X0, model, device, batchsize=100, method='PU')

                model = Net(dim).to(device)
                optimizer = optim.Adam(params=model.parameters(),
                                       lr=learning_rate,
                                       weight_decay=1e-5)

                model = train(X0,
                              X1,
                              epoch,
                              model,
                              optimizer,
                              device,
                              batchsize=batchsize,
                              method='uLSIF')
                dr3 = test(X0, model, device, batchsize=100, method='uLSIF')

                model = Net(dim).to(device)
                optimizer = optim.Adam(params=model.parameters(),
                                       lr=learning_rate,
                                       weight_decay=1e-5)

                model = train(X0,
                              X1,
                              epoch,
                              model,
                              optimizer,
                              device,
                              batchsize=batchsize,
                              method='nnPU')
                dr4 = test(X0, model, device, batchsize=100, method='PU')

                model = Net(dim).to(device)
                optimizer = optim.Adam(params=model.parameters(),
                                       lr=learning_rate,
                                       weight_decay=1e-5)

                model = train(X0,
                              X1,
                              epoch,
                              model,
                              optimizer,
                              device,
                              batchsize=batchsize,
                              method='nnuLSIF')
                dr5 = test(X0, model, device, batchsize=100, method='uLSIF')

                dr3[dr3 < 0] = 0.
                dr5[dr5 < 0] = 0.

                dr0[~((dr0 > 0) & (dr0 < 100))] = 100
                dr1[~((dr1 > 0) & (dr1 < 100))] = 100
                dr2[~((dr2 > 0) & (dr2 < 100))] = 100
                dr3[~((dr3 > 0) & (dr3 < 100))] = 100
                dr4[~((dr4 > 0) & (dr4 < 100))] = 100
                dr5[~((dr5 > 0) & (dr5 < 100))] = 100

                print(dr3)
                print(dr4)
                print(dr5)

                print('meandr4', np.mean(dr4))
                print('meandr5', np.mean(dr5))

                reg = Ridge()
                reg = GridSearchCV(reg,
                                   {'alpha': [0.0001, 0.001, 0.01, 0.1, 1]},
                                   cv=5)

                idx_model = 0

                x_train = X_pca0
                x_test = X_pca1

                train_loss, test_loss, auc = calc_result(reg,
                                                         x_train,
                                                         rating0,
                                                         x_test,
                                                         rating1,
                                                         dr=None)
                train_loss_normal[i, idx_model] = train_loss
                test_loss_normal[i, idx_model] = test_loss
                auc_normal[i, idx_model] = auc

                train_loss, test_loss, auc = calc_result(reg,
                                                         x_train,
                                                         rating0,
                                                         x_test,
                                                         rating1,
                                                         dr=dr0)
                train_loss_kerulsif[i, idx_model] = train_loss
                test_loss_kerulsif[i, idx_model] = test_loss
                auc_kerulsif[i, idx_model] = auc

                train_loss, test_loss, auc = calc_result(reg,
                                                         x_train,
                                                         rating0,
                                                         x_test,
                                                         rating1,
                                                         dr=dr1)
                train_loss_kerkleip[i, idx_model] = train_loss
                test_loss_kerkleip[i, idx_model] = test_loss
                auc_kerkleip[i, idx_model] = auc

                train_loss, test_loss, auc = calc_result(reg,
                                                         x_train,
                                                         rating0,
                                                         x_test,
                                                         rating1,
                                                         dr=dr2)
                train_loss_pu[i, idx_model] = train_loss
                test_loss_pu[i, idx_model] = test_loss
                auc_pu[i, idx_model] = auc

                train_loss, test_loss, auc = calc_result(reg,
                                                         x_train,
                                                         rating0,
                                                         x_test,
                                                         rating1,
                                                         dr=dr3)
                train_loss_ulsif[i, idx_model] = train_loss
                test_loss_ulsif[i, idx_model] = test_loss
                auc_ulsif[i, idx_model] = auc

                train_loss, test_loss, auc = calc_result(reg,
                                                         x_train,
                                                         rating0,
                                                         x_test,
                                                         rating1,
                                                         dr=dr4)
                train_loss_nnpu[i, idx_model] = train_loss
                test_loss_nnpu[i, idx_model] = test_loss
                auc_nnpu[i, idx_model] = auc

                train_loss, test_loss, auc = calc_result(reg,
                                                         x_train,
                                                         rating0,
                                                         x_test,
                                                         rating1,
                                                         dr=dr5)
                train_loss_nnulsif[i, idx_model] = train_loss
                test_loss_nnulsif[i, idx_model] = test_loss
                auc_nnulsif[i, idx_model] = auc

                print('0:normal', test_loss_normal)
                print('0:nnulsif', test_loss_nnulsif)
                print('0:nnpu', test_loss_nnpu)

                print('0:normal', auc_normal)
                print('0:nnulsif', auc_nnulsif)
                print('0:nnpu', auc_nnpu)
                #reg = KernelRidge(alpha=1, kernel='rbf', gamma=0.1)

                #reg = KernelRidge(alpha=0.1, kernel='rbf', gamma=1)

                reg = KernelRidge()
                reg = GridSearchCV(reg, {
                    'kernel': ['rbf'],
                    'alpha': [0.0001, 0.001, 0.01, 0.1, 1],
                    'gamma': [0.001, 0.01, 0.1, 1]
                },
                                   cv=5)

                idx_model = 1

                x_train = X_pca0
                x_test = X_pca1

                train_loss, test_loss, auc = calc_result(reg,
                                                         x_train,
                                                         rating0,
                                                         x_test,
                                                         rating1,
                                                         dr=None)
                train_loss_normal[i, idx_model] = train_loss
                test_loss_normal[i, idx_model] = test_loss
                auc_normal[i, idx_model] = auc

                train_loss, test_loss, auc = calc_result(reg,
                                                         x_train,
                                                         rating0,
                                                         x_test,
                                                         rating1,
                                                         dr=dr0)
                train_loss_kerulsif[i, idx_model] = train_loss
                test_loss_kerulsif[i, idx_model] = test_loss
                auc_kerulsif[i, idx_model] = auc

                train_loss, test_loss, auc = calc_result(reg,
                                                         x_train,
                                                         rating0,
                                                         x_test,
                                                         rating1,
                                                         dr=dr1)
                train_loss_kerkleip[i, idx_model] = train_loss
                test_loss_kerkleip[i, idx_model] = test_loss
                auc_kerkleip[i, idx_model] = auc

                train_loss, test_loss, auc = calc_result(reg,
                                                         x_train,
                                                         rating0,
                                                         x_test,
                                                         rating1,
                                                         dr=dr2)
                train_loss_pu[i, idx_model] = train_loss
                test_loss_pu[i, idx_model] = test_loss
                auc_pu[i, idx_model] = auc

                train_loss, test_loss, auc = calc_result(reg,
                                                         x_train,
                                                         rating0,
                                                         x_test,
                                                         rating1,
                                                         dr=dr3)
                train_loss_ulsif[i, idx_model] = train_loss
                test_loss_ulsif[i, idx_model] = test_loss
                auc_ulsif[i, idx_model] = auc

                train_loss, test_loss, auc = calc_result(reg,
                                                         x_train,
                                                         rating0,
                                                         x_test,
                                                         rating1,
                                                         dr=dr4)
                train_loss_nnpu[i, idx_model] = train_loss
                test_loss_nnpu[i, idx_model] = test_loss
                auc_nnpu[i, idx_model] = auc

                train_loss, test_loss, auc = calc_result(reg,
                                                         x_train,
                                                         rating0,
                                                         x_test,
                                                         rating1,
                                                         dr=dr5)
                train_loss_nnulsif[i, idx_model] = train_loss
                test_loss_nnulsif[i, idx_model] = test_loss
                auc_nnulsif[i, idx_model] = auc

                print('1:normal', test_loss_normal)
                print('1:nnulsif', test_loss_nnulsif)
                '''

                reg = KernelRidge()
                reg = GridSearchCV(reg, {'kernel': ['polynomial'], 'alpha': [0.0001, 0.001, 0.01, 0.1, 1], 'gamma': [2, 3, 4, 5]}, cv=5)

                idx_model = 2

                x_train = X0
                x_test = X1
                
                train_loss, test_loss, auc = calc_result(reg, x_train, rating0, x_test, rating1, dr=None)
                train_loss_normal[i, idx_model] = train_loss
                test_loss_normal[i, idx_model] = test_loss
                auc_normal[i, idx_model] = auc

                train_loss, test_loss, auc = calc_result(reg, x_train, rating0, x_test, rating1, dr=dr0)
                train_loss_kerulsif[i, idx_model] = train_loss
                test_loss_kerulsif[i, idx_model] = test_loss
                auc_kerulsif[i, idx_model] = auc

                train_loss, test_loss, auc = calc_result(reg, x_train, rating0, x_test, rating1, dr=dr1)
                train_loss_kerkleip[i, idx_model] = train_loss
                test_loss_kerkleip[i, idx_model] = test_loss
                auc_kerkleip[i, idx_model] = auc

                train_loss, test_loss, auc = calc_result(reg, x_train, rating0, x_test, rating1, dr=dr2)
                train_loss_pu[i, idx_model] = train_loss
                test_loss_pu[i, idx_model] = test_loss
                auc_pu[i, idx_model] = auc

                train_loss, test_loss, auc = calc_result(reg, x_train, rating0, x_test, rating1, dr=dr3)
                train_loss_ulsif[i, idx_model] = train_loss
                test_loss_ulsif[i, idx_model] = test_loss
                auc_ulsif[i, idx_model] = auc

                train_loss, test_loss, auc = calc_result(reg, x_train, rating0, x_test, rating1, dr=dr4)
                train_loss_nnpu[i, idx_model] = train_loss
                test_loss_nnpu[i, idx_model] = test_loss
                auc_nnpu[i, idx_model] = auc

                train_loss, test_loss, auc = calc_result(reg, x_train, rating0, x_test, rating1, dr=dr5)
                train_loss_nnulsif[i, idx_model] = train_loss
                test_loss_nnulsif[i, idx_model] = test_loss
                auc_nnulsif[i, idx_model] = auc

                '''

                seed += 1

                np.savetxt('results/train_loss_normal_%s_%s.csv' %
                           (f_name0, f_name1),
                           train_loss_normal,
                           delimiter=',')
                np.savetxt('results/test_loss_normal_%s_%s.csv' %
                           (f_name0, f_name1),
                           test_loss_normal,
                           delimiter=',')
                np.savetxt('results/auc_normal_%s_%s.csv' % (f_name0, f_name1),
                           auc_normal,
                           delimiter=',')

                np.savetxt('results/train_loss_kerulsif_%s_%s.csv' %
                           (f_name0, f_name1),
                           train_loss_kerulsif,
                           delimiter=',')
                np.savetxt('results/test_loss_kerulsif_%s_%s.csv' %
                           (f_name0, f_name1),
                           test_loss_kerulsif,
                           delimiter=',')
                np.savetxt('results/auc_kerulsif_%s_%s.csv' %
                           (f_name0, f_name1),
                           auc_kerulsif,
                           delimiter=',')

                np.savetxt('results/train_loss_kerkleip_%s_%s.csv' %
                           (f_name0, f_name1),
                           train_loss_kerkleip,
                           delimiter=',')
                np.savetxt('results/test_loss_kerkleip_%s_%s.csv' %
                           (f_name0, f_name1),
                           test_loss_kerkleip,
                           delimiter=',')
                np.savetxt('results/auc_kerkleip_%s_%s.csv' %
                           (f_name0, f_name1),
                           auc_kerkleip,
                           delimiter=',')

                np.savetxt('results/train_loss_pu_%s_%s.csv' %
                           (f_name0, f_name1),
                           train_loss_pu,
                           delimiter=',')
                np.savetxt('results/test_loss_pu_%s_%s.csv' %
                           (f_name0, f_name1),
                           test_loss_pu,
                           delimiter=',')
                np.savetxt('results/auc_pu_%s_%s.csv' % (f_name0, f_name1),
                           auc_pu,
                           delimiter=',')

                np.savetxt('results/train_loss_ulsif_%s_%s.csv' %
                           (f_name0, f_name1),
                           train_loss_ulsif,
                           delimiter=',')
                np.savetxt('results/test_loss_ulsif_%s_%s.csv' %
                           (f_name0, f_name1),
                           test_loss_ulsif,
                           delimiter=',')
                np.savetxt('results/auc_ulsif_%s_%s.csv' % (f_name0, f_name1),
                           auc_ulsif,
                           delimiter=',')

                np.savetxt('results/train_loss_nnpu_%s_%s.csv' %
                           (f_name0, f_name1),
                           train_loss_nnpu,
                           delimiter=',')
                np.savetxt('results/test_loss_nnpu_%s_%s.csv' %
                           (f_name0, f_name1),
                           test_loss_nnpu,
                           delimiter=',')
                np.savetxt('results/auc_nnpu_%s_%s.csv' % (f_name0, f_name1),
                           auc_nnpu,
                           delimiter=',')

                np.savetxt('results/train_loss_nnulsif_%s_%s.csv' %
                           (f_name0, f_name1),
                           train_loss_nnulsif,
                           delimiter=',')
                np.savetxt('results/test_loss_nnulsif_%s_%s.csv' %
                           (f_name0, f_name1),
                           test_loss_nnulsif,
                           delimiter=',')
                np.savetxt('results/auc_nnulsif_%s_%s.csv' %
                           (f_name0, f_name1),
                           auc_nnulsif,
                           delimiter=',')
Ejemplo n.º 13
0
def rulsif_analysis(input_data,
                    k,
                    n,
                    perform_hyperparameter_estimation,
                    alpha,
                    anomaly_type='change_points',
                    transform='no_transform'):

    # Unpack input.
    counts, energy_range, times = input_data

    # Transform counts if required.
    counts = Transformation(transform).transform(counts)

    # Pack sequence into blocks.
    counts_packed = pack(counts, k)

    # Median distance between subsequences.
    dmed = get_median_pairwise_distance(counts_packed)

    # Range of values the hyperparameters were supposed to take, according to the reference.
    sigma_range = np.array(
        [0.6 * dmed, 0.8 * dmed, 1.0 * dmed, 1.2 * dmed, 1.4 * dmed])
    sigma_forward_range = sigma_backward_range = sigma_range
    lambda_range = np.array([1e-3, 1e-2, 1e-1, 1e0, 1e1])
    lambda_forward_range = lambda_backward_range = lambda_range

    # Restrict range further by taking the most common hyperparameters,
    # selected by fitting random samples.
    if perform_hyperparameter_estimation:
        sigma_forward_range, sigma_backward_range, lambda_forward_range, lambda_backward_range = \
                estimate_hyperparameters(counts_packed, window_size=n,
                                         sigma_range=sigma_range,
                                         lambda_range=lambda_range,
                                         alpha=alpha, num_rank=2)

    # Change-point scores.
    packed_sequence_size = counts_packed.shape[0]
    original_sequence_size = counts.shape[0]
    scores = np.zeros(original_sequence_size)

    # Sliding-window over packed sequence.
    for i in range(n, packed_sequence_size - n + 1):
        forward_window = counts_packed[i:i + n]
        backward_window = counts_packed[i - n:i]
        forward_density_obj = densratio(backward_window,
                                        forward_window,
                                        alpha=alpha,
                                        sigma_range=sigma_forward_range,
                                        lambda_range=lambda_forward_range,
                                        verbose=False)
        forward_divergence = forward_density_obj.alpha_PE
        backward_density_obj = densratio(forward_window,
                                         backward_window,
                                         alpha=alpha,
                                         sigma_range=sigma_backward_range,
                                         lambda_range=lambda_backward_range,
                                         verbose=False)
        backward_divergence = backward_density_obj.alpha_PE
        change_point_score = forward_divergence + backward_divergence

        # Use larger range of hyperparameters if we can't get a good fit with the smaller one.
        if change_point_score < 0:
            sigma_range = np.array([
                0.7 * dmed, 0.8 * dmed, 0.9 * dmed, dmed, 1.1 * dmed,
                1.2 * dmed, 1.3 * dmed
            ])

            forward_density_obj = densratio(backward_window,
                                            forward_window,
                                            alpha=alpha,
                                            sigma_range=sigma_range,
                                            verbose=False)
            forward_divergence = forward_density_obj.alpha_PE
            backward_density_obj = densratio(forward_window,
                                             backward_window,
                                             alpha=alpha,
                                             sigma_range=sigma_range,
                                             verbose=False)
            backward_divergence = backward_density_obj.alpha_PE

            change_point_score = forward_divergence + backward_divergence

        # Shift score ahead because of packing.
        scores[i + k // 2] = change_point_score

    # Cut off scores at 0, no negative values.
    scores[scores < 0] = 0

    # Return a list of times and scores, for change-points.
    # Convert these to intervals for other anomalies.
    if anomaly_type == 'change_points':
        return times, scores

    elif anomaly_type in ['bimodality', 'negative_ions']:
        intervals = []
        interval_scores = []

        # Compute maximum position and indices.
        max_index = np.argmax(scores)
        max_score = np.max(scores)

        while max_score > 0:

            # Idea: Full-Width at Quarter-Maximum
            # Go towards the left.
            for start_index, score in reversed(
                    list(enumerate(scores[:max_index]))):
                if score < max_score / 4:
                    start_index += 1
                    break

            # Now towards the right.
            for end_index, score in enumerate(scores[max_index:],
                                              start=max_index):
                if score < max_score / 4:
                    break

            # Add this as an interval.
            # The interval's score as the mean score of all points within.
            if start_index != end_index:
                intervals.append((times[start_index], times[end_index]))
                interval_scores.append(np.sum(scores[start_index:end_index]))

                # Mask these indices.
                scores[start_index:end_index] = -np.inf

            # Compute maximum position and indices.
            max_index = np.argmax(scores)
            max_score = np.max(scores)

        # Aggregating zero-scored timesteps into intervals.
        start_index = 0
        while start_index < len(scores):
            if scores[start_index] == 0:
                for end_index, score in enumerate(scores[start_index:],
                                                  start=start_index):
                    if score == -np.inf:
                        break

                intervals.append((times[start_index], times[end_index]))
                interval_scores.append(np.mean(scores[start_index:end_index]))
                start_index = end_index

            start_index += 1

        return intervals, interval_scores
Ejemplo n.º 14
0
def main(els_data_file, output_file, perform_hyperparameter_estimation,
         load_from_file, save_to_file, quantity, start_time, end_time,
         run_tests, plot_processed_sequence, k, n):

    # Check input arguments - start and end times should be valid.
    if start_time is not None:
        try:
            start_time = datetime.strptime(start_time, '%d-%m-%Y/%H:%M')
        except ValueError:
            raise
    else:
        start_time = datetime.min

    if end_time is not None:
        try:
            end_time = datetime.strptime(end_time, '%d-%m-%Y/%H:%M').replace(
                second=59, microsecond=999999)
        except ValueError:
            raise
    else:
        end_time = datetime.max

    # Run doctests.
    if run_tests:
        import doctest
        import data_utils
        doctest.testmod(data_utils,
                        verbose=True,
                        optionflags=doctest.NORMALIZE_WHITESPACE)
        doctest.testmod(verbose=True, optionflags=doctest.NORMALIZE_WHITESPACE)

    # RuLSIF parameter.
    alpha = 0.1

    # Set random seed for reproducibility.
    random_seed = 7
    np.random.seed(random_seed)

    # Load processed sequence, if file found.
    els_sequence_file = os.path.splitext(els_data_file)[0] + '_RuLSIF_sequence'
    if load_from_file and os.path.exists(els_sequence_file + '.npz'):
        print 'Loading processed sequence from sequence file...'
        filedata = np.load(els_sequence_file + '.npz')
        counts_packed = filedata['counts_packed']
        energy_range = filedata['energy_range']
        times = filedata['times']
        dmed = filedata['dmed']
    else:
        print 'Sequence file not found. Extracting data from original ELS file and processing...'
        counts, energy_range, times = get_ELS_data(els_data_file, quantity,
                                                   start_time, end_time)

        # import pdb; pdb.set_trace() # For debugging.

        # Process counts.
        # counts = gaussian_blur(counts, sigma=0.5)
        # counts = np.ma.log(counts)

        # See the sequence plotted (lineplot for 1D data, colourplot for 2D data).
        if plot_processed_sequence:
            print 'Plotting processed sequence...'
            fig, ax = plt.subplots(1, 1)
            ax.set_title('Processed Sequence')

            if len(counts.shape) == 1:
                ax.xaxis_date()
                ax.xaxis.set_major_formatter(
                    mdates.DateFormatter('%d-%m-%Y/%H:%M'))
                fig.autofmt_xdate()
                ax.plot(times, counts)

            elif len(counts.shape) == 2:
                plt.imshow(counts.T, origin='lower', interpolation='none')
                ax.set_aspect('auto')
                plt.colorbar(ax=ax, orientation='vertical')

            plt.show()

        # Pack sequence into blocks.
        print 'Packing sequence into blocks...'
        counts_packed = pack(counts, k)
        print 'Sequence packed into shape %s.' % (counts_packed.shape, )

        # Median distance between subsequences.
        print 'Computing median distance between packed samples...'
        dmed = get_median_pairwise_distance(counts_packed)
        print 'Median distance between packed samples, dmed =', dmed

        # Save values to file.
        if save_to_file:
            arrays_with_names = {
                'counts_packed': counts_packed,
                'energy_range': energy_range,
                'times': times,
                'dmed': np.array(dmed)
            }
            np.savez(els_sequence_file, **arrays_with_names)

    # Range of values the hyperparameters were supposed to take, according to the reference.
    sigma_range = np.array([dmed])
    sigma_forward_range = sigma_backward_range = sigma_range
    lambda_range = np.array([1e-3, 1e-2, 1e-1, 1e0, 1e1])
    lambda_forward_range = lambda_backward_range = lambda_range

    # Restrict range further by taking the most common hyperparameters selected for fitting random samples.
    if perform_hyperparameter_estimation:
        els_hyperparameters_file = os.path.splitext(
            els_data_file)[0] + '_RuLSIF_hyperparameters'
        if load_from_file and os.path.exists(els_hyperparameters_file +
                                             '.npz'):
            print 'Hyperparameters file found. Loading from file...'
            filedata = np.load(els_hyperparameters_file + '.npz')
            sigma_forward_range = filedata['sigma_forward_range']
            sigma_backward_range = filedata['sigma_backward_range']
            lambda_forward_range = filedata['lambda_forward_range']
            lambda_backward_range = filedata['lambda_backward_range']
        else:
            print 'Hyperparameters file not found. Performing estimation...'
            sigma_forward_range, sigma_backward_range, \
            lambda_forward_range, lambda_backward_range = \
                estimate_hyperparameters(counts_packed, window_size=n,
                                         sigma_range=sigma_range,
                                         lambda_range=lambda_range,
                                         alpha=alpha, num_rank=2)

            if save_to_file:
                arrays_with_names = {
                    'sigma_forward_range': sigma_forward_range,
                    'sigma_backward_range': sigma_backward_range,
                    'lambda_forward_range': lambda_forward_range,
                    'lambda_backward_range': lambda_backward_range
                }
                np.savez(els_hyperparameters_file, **arrays_with_names)

    print 'Hyperparameters will be selected from the ranges:'
    print 'sigma_forward_range =', sigma_forward_range
    print 'sigma_backward_range =', sigma_backward_range
    print 'lambda_forward_range =', lambda_forward_range
    print 'lambda_backward_range =', lambda_backward_range

    # Change-point scores.
    packed_sequence_size = counts_packed.shape[0]
    original_sequence_size = counts.shape[0]
    scores = np.ma.masked_all(original_sequence_size)

    # Start timing here.
    timing_start = datetime.now()

    # Sliding-window over packed sequence.
    for i in range(n, packed_sequence_size - n + 1):
        forward_window = counts_packed[i:i + n]
        backward_window = counts_packed[i - n:i]
        forward_density_obj = densratio(backward_window,
                                        forward_window,
                                        alpha=alpha,
                                        sigma_range=sigma_forward_range,
                                        lambda_range=lambda_forward_range,
                                        verbose=False)
        forward_divergence = forward_density_obj.alpha_PE
        backward_density_obj = densratio(forward_window,
                                         backward_window,
                                         alpha=alpha,
                                         sigma_range=sigma_backward_range,
                                         lambda_range=lambda_backward_range,
                                         verbose=False)
        backward_divergence = backward_density_obj.alpha_PE
        change_point_score = forward_divergence + backward_divergence

        # Use larger range of hyperparameters if we can't get a good fit with the smaller one.
        if change_point_score < 0:
            print 'Bad fit with forward sigma = %0.2f, backward sigma = %0.2f.' % (
                forward_density_obj.kernel_info.sigma,
                backward_density_obj.kernel_info.sigma)
            sigma_range = np.array([
                0.7 * dmed, 0.8 * dmed, 0.9 * dmed, dmed, 1.1 * dmed,
                1.2 * dmed, 1.3 * dmed
            ])

            forward_density_obj = densratio(backward_window,
                                            forward_window,
                                            alpha=alpha,
                                            sigma_range=sigma_range,
                                            verbose=False)
            forward_divergence = forward_density_obj.alpha_PE
            backward_density_obj = densratio(forward_window,
                                             backward_window,
                                             alpha=alpha,
                                             sigma_range=sigma_range,
                                             verbose=False)
            backward_divergence = backward_density_obj.alpha_PE

            change_point_score = forward_divergence + backward_divergence

            print 'Tried again with forward sigma = %0.2f, backward sigma = %0.2f.' % (
                forward_density_obj.kernel_info.sigma,
                backward_density_obj.kernel_info.sigma)

        scores[i + k // 2] = change_point_score
        print 'Change-point score at time %s computed as %0.4f.' % (
            datetime.strftime(mdates.num2date(times[i]),
                              '%d-%m-%Y/%H:%M'), scores[i])

    # End time.
    timing_end = datetime.now()

    # Compute average time taken.
    total_time = (timing_end - timing_start).total_seconds()
    num_evals = packed_sequence_size - 2 * n + 1
    print '%0.2f seconds taken for %d change-point score evaluations. Average is %0.2f evals/sec, with k = %d, and n = %d.' % \
          (total_time, num_evals, num_evals/total_time, k, n)

    # Mask negative change-point scores.
    scores = np.ma.masked_less(scores, 0)

    # Plot change-point scores over windows as well as the original data.
    print 'Plotting...'

    fig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True)

    plot_raw_ELS_data(fig,
                      ax0,
                      els_data_file,
                      quantity,
                      start_time,
                      end_time,
                      colorbar_range='subset',
                      colorbar_orientation='horizontal')

    ax1.plot(times, scores)
    ax1.set_ylabel('Change-point Score')
    ax1.xaxis.set_tick_params(labelsize=8)

    # Place title below.
    fig.text(s='Change-point Scores for ELS Data \n k = %d, n = %d' % (k, n),
             x=0.5,
             y=0.03,
             horizontalalignment='center',
             fontsize=13)

    plt.subplots_adjust(bottom=0.3, left=0.2)

    # Save plot.
    if output_file is None:
        plt.show()
    else:
        plt.savefig(output_file, bbox_inches='tight')

    # Save scores.
    if save_to_file:
        rulsif_output_file = os.path.splitext(
            els_data_file)[0] + '_RuLSIF_output'
        arrays_with_names = {'scores': scores, 'times': times}
        np.savez(rulsif_output_file, **arrays_with_names)