Esempio n. 1
0
    def test_weighted_combined_copula3d(self):
        dimkeys = ["solar", "wind", "tide"]
        dimension = len(dimkeys)

        ourmean = [0, 0, 0]
        ourcov = [[1, 0.1, 0.3], [0.1, 2, 0], [0.3, 0, 3]]
        marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                     "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1]),
                     "tide": UnivariateNormalDistribution(var=ourcov[2][2], mean=ourmean[2])}
        data_array = np.random.multivariate_normal(ourmean, ourcov, 10000)
        data_dict = dict.fromkeys(dimkeys)

        for i in range(dimension):
            data_dict[dimkeys[i]] = data_array[:, i]

        copulas= ['student-copula', 'gaussian-copula']
        list_of_gaussian = ['gaussian-copula','gaussian-copula']
        list_of_student = ['student-copula','student-copula']
        weights =[0.12,0.88]
        mydistr = WeightedCombinedCopula(dimkeys,data_dict,marginals,copulas,weights)
        gaussian = GaussianCopula(dimkeys,data_dict,marginals)
        weightedgaussian = WeightedCombinedCopula(dimkeys,data_dict,marginals,list_of_gaussian,weights)
        weightedstudent = WeightedCombinedCopula(dimkeys, data_dict, marginals, list_of_student, weights)
        student = StudentCopula(dimkeys,data_dict,marginals)
        g = gaussian.c_log_likelihood()
        s = student.c_log_likelihood()
        m = mydistr.c_log_likelihood()
        self.assertAlmostEqual(weightedgaussian.c_log_likelihood(),g,7)
        self.assertAlmostEqual(weightedstudent.c_log_likelihood(),s,7)
        self.assertGreater(g,m)
        self.assertGreater(m,s)
Esempio n. 2
0
    def test_with_multinormal_4_dim(self):
        dimkeys = ["solar", "wind", "tide","geo"]
        dimension = len(dimkeys)
        ourmean = [0, 0, 0, 0]
        ourcov = [[1, 0.1, 0.3,0.4], [0.1, 2, 0,0], [0.3, 0, 3,0],[0.4,0,0,4]]
        marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                     "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1]),
                     "tide": UnivariateNormalDistribution(var=ourcov[2][2], mean=ourmean[2]),
                     "geo":UnivariateNormalDistribution(var=ourcov[3][3], mean=ourmean[3])}
        valuedict = {"solar": 0, "wind": 0, "tide": 0,"geo":0}
        lowerdict = {"solar": -1, "wind": -1, "tide": -1,"geo":-2}
        upperdict = {"solar": 1, "wind": 1, "tide": 1,"geo":2}
        data_array = np.random.multivariate_normal(ourmean, ourcov, 10000)
        data_dict = dict.fromkeys(dimkeys)

        for i in range(dimension):
            data_dict[dimkeys[i]] = data_array[:, i]

        pair_copulae_strings = [[None, 'gaussian-copula', 'gaussian-copula','gaussian-copula'],
                               [None, None, 'gaussian-copula','gaussian-copula'],
                               [None, None, None,'gaussian-copula'],
                               [None,None,None,None]]

        with Timer('MultiNormal'):
            multigaussian = MultiNormalDistribution(dimkeys, input_data=data_dict)
            print(multigaussian.rect_prob(lowerdict, upperdict))
        cvine = CVineCopula(dimkeys, data_dict, marginals, pair_copulae_strings)
        with Timer('CVine rect_prob calculus'):
            print(cvine.rect_prob(lowerdict, upperdict))
        dvine = DVineCopula(dimkeys, data_dict, marginals, pair_copulae_strings)
        with Timer('DVine rect_prob calculus'):
            print(dvine.rect_prob(lowerdict, upperdict))
Esempio n. 3
0
    def test_quick_dim_3(self):
        dimkeys = ["solar", "wind", "tide"]
        dimension = len(dimkeys)

        ourmean = [0, 0, 0]
        ourcov = [[1, 0.1, 0.3], [0.1, 2, 0], [0.3, 0, 3]]
        marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                     "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1]),
                     "tide": UnivariateNormalDistribution(var=ourcov[2][2], mean=ourmean[2])}
        data_array = np.random.multivariate_normal(ourmean, ourcov, 10000)
        data_dict = dict.fromkeys(dimkeys)

        for i in range(dimension):
            data_dict[dimkeys[i]] = data_array[:, i]

        pair_copulae_strings = [[None, 'student-copula', 'frank-copula'],
                                [None, None, 'clayton-copula'],
                                [None, None, None]]

        valuedict = {"solar": 0.43, "wind": 0.92, "tide": 0.27}

        print('CVine')
        CVine = CVineCopula(dimkeys, data_dict, marginals, pair_copulae_strings)
        print(CVine.C(valuedict=valuedict))
        print(CVine.c(valuedict))
        print('DVine')
        DVine = DVineCopula(dimkeys, data_dict, marginals, pair_copulae_strings)
        print(DVine.C(valuedict=valuedict))
        print(DVine.c(valuedict))
Esempio n. 4
0
    def test_with_gaussian_copula_3_dim(self):
        dimkeys = ["solar", "wind", "tide"]
        dimension = len(dimkeys)
        # dictin = {"solar": np.random.randn(200), "wind": np.random.randn(200)}

        ourmean = [0, 0, 0]
        ourcov = [[1, 0.1, 0.3], [0.1, 2, 0], [0.3, 0, 3]]
        marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                     "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1]),
                     "tide": UnivariateNormalDistribution(var=ourcov[2][2], mean=ourmean[2])}
        valuedict = {"solar": 0, "wind": 0, "tide": 0}
        lowerdict = {"solar": -1, "wind": -1, "tide": -1}
        upperdict = {"solar": 1, "wind": 1, "tide": 1}

        data_array = np.random.multivariate_normal(ourmean, ourcov, 1000)
        data_dict = dict.fromkeys(dimkeys)

        for i in range(dimension):
            GaussianCopula(dimkeys, data_dict, marginals, pair_copulae_strings)
            data_dict[dimkeys[i]] = data_array[:, i]

        multigaussian1 = GaussianCopula(input_data=data_dict, dimkeys=dimkeys, marginals=marginals, quadstep=0.1)
        multigaussian2 = MultiNormalDistribution(dimkeys, input_data=data_dict)
        self.assertAlmostEqual(multigaussian1.rect_prob(lowerdict, upperdict),
                               multigaussian2.rect_prob(lowerdict, upperdict), 2)

        self.assertAlmostEqual(multigaussian1.rect_prob(lowerdict, upperdict),multigaussian2.rect_prob(lowerdict, upperdict), 1)
Esempio n. 5
0
    def test_gaussian_copula(self):
        n = 10000
        dimkeys = ["solar", "wind"]
        dimension = len(dimkeys)
        ourmean = [2, 3]
        ourmeandict = {"solar": 0, "wind": 0}
        rho =0.5
        rho2 = 0.5
        ourcov = [[1, rho], [rho, 1]]
        ourcov2 = [[1, rho2], [rho2, 1]]
        marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                     "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])}

        data_array = np.random.multivariate_normal(ourmean, ourcov, 100000)
        data_array2 = np.random.multivariate_normal(ourmean, ourcov2, 100000)
        data_dict = dict.fromkeys(dimkeys)

        for i in range(dimension):
            data_dict[dimkeys[i]] = data_array[:, i]

        data_dict2 = dict.fromkeys(dimkeys)
        for i in range(dimension):
            data_dict2[dimkeys[i]] = data_array2[:, i]

        multigaussian1 = GaussianCopula(input_data=data_dict, dimkeys=dimkeys, marginals=marginals, quadstep=0.001)
        multigaussian2 = GaussianCopula(input_data=data_dict2, dimkeys=dimkeys, marginals=marginals, quadstep=0.001)

        rank_data = multigaussian2.generates_U(10000)

        diag(2).rank_histogram(rank_data, 20, multigaussian1)
Esempio n. 6
0
    def test_quick_dim_2(self):
        dimkeys = ["solar", "wind"]
        dimension = len(dimkeys)

        ourmean = [1, 0.5]
        ourcov = [[1, 0.3], [0.3, 2]]
        marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                     "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])}
        data_array = np.random.multivariate_normal(ourmean, ourcov, 10000)
        data_dict = dict.fromkeys(dimkeys)

        for i in range(dimension):
            data_dict[dimkeys[i]] = data_array[:, i]

        pair_copulae_strings = [[None, 'student-copula'],
                                [None, None]]

        valuedict = {"solar": 0.96, "wind": 0.87}
        CVine = CVineCopula(dimkeys, data_dict, marginals, pair_copulae_strings)
        DVine = DVineCopula(dimkeys, data_dict, marginals, pair_copulae_strings)
        gaussiancopula = GaussianCopula(dimkeys,data_dict,marginals)
        gaussiancopula.c(valuedict)
        self.assertAlmostEqual(CVine.C(valuedict),DVine.C(valuedict),1)
        self.assertAlmostEqual(gaussiancopula.C(valuedict), DVine.C(valuedict), 1)
        self.assertAlmostEqual(CVine.C(valuedict), gaussiancopula.C(valuedict), 1)
Esempio n. 7
0
    def test_gaussian_copula(self):
        #not finished yet
        print("Warning test not finished yet")
        n = 10000
        dimkeys = ["solar", "wind"]
        dimension = len(dimkeys)
        ourmean = [2, 3]
        ourmeandict = {"solar": 0, "wind": 0}
        rho =0.1
        rho2 = 0.9
        ourcov = [[1, rho], [rho, 1]]
        ourcov2 = [[1, rho2], [rho2, 1]]
        marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                     "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])}

        data_array = np.random.multivariate_normal(ourmean, ourcov, 100000)
        data_array2 = np.random.multivariate_normal(ourmean, ourcov2, 100000)
        data_dict = dict.fromkeys(dimkeys)

        for i in range(dimension):
            data_dict[dimkeys[i]] = data_array[:, i]

        data_dict2 = dict.fromkeys(dimkeys)
        for i in range(dimension):
            data_dict2[dimkeys[i]] = data_array2[:, i]

        multigaussian1 = GaussianCopula(input_data=data_dict, dimkeys=dimkeys, marginals=marginals, quadstep=0.001)
        multigaussian2 = GaussianCopula(input_data=data_dict2, dimkeys=dimkeys, marginals=marginals, quadstep=0.001)

        print(emd_sort(data_array,data_array))
        print(emd_sort(data_array2, data_array))
        print(emd_sort(data_array2, data_array2))
Esempio n. 8
0
def initialize(dim=2,precision = None,copula_string='independence-copula'):
    if dim==1:
        mymean = 0
        myvar = 2
        dimkeys = ["solar"]
        data_array = np.random.multivariate_normal([mymean], [[myvar]], 1000)
        dictin = {"solar": data_array[:, 0]}
        distr_class = distribution_factory(copula_string)
        mydistr = distr_class(dimkeys, dictin)

        return mydistr

    if dim==2:
        # For some tests, gaussian and student are less precised so we change so precision asked :

        dimkeys = ["solar", "wind"]
        ourmean = [3, 4]
        rho=0.5
        ourcov = [[1, rho], [rho, 1]]
        data_array = np.random.multivariate_normal(ourmean, ourcov, 1000)
        dictin = dict.fromkeys(dimkeys)

        for i in range(dim):
            dictin[dimkeys[i]] = data_array[:, i]

        valuedict = {"solar": 0.14, "wind": 0.49}
        distr_class = distribution_factory(copula_string)
        mydistr = distr_class(dimkeys, dictin)

        return mydistr

    if dim==3:
        dimkeys = ["solar", "wind", "tide"]
        dimension = len(dimkeys)
        # dictin = {"solar": np.random.randn(200), "wind": np.random.randn(200)}

        ourmean = [0, 0, 0]
        rho01 = 0.1
        rho02 = 0.3
        rho12 = 0
        ourcov = [[1, rho01, rho02], [rho01, 2, rho12], [rho02, rho12, 3]]
        marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                     "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1]),
                     "tide": UnivariateNormalDistribution(var=ourcov[2][2], mean=ourmean[2])}

        data_array = np.random.multivariate_normal(ourmean, ourcov, 1000)
        dictin = dict.fromkeys(dimkeys)

        for i in range(dimension):
            dictin[dimkeys[i]] = data_array[:, i]

        distr_class = distribution_factory(copula_string)
        mydistr = distr_class(dimkeys, dictin)

        return mydistr
Esempio n. 9
0
    def test_plot(self):
        dimkeys = ["solar", "wind", "tide"]
        dimension = len(dimkeys)

        ourmean = [0, 0, 0]
        ourcov = [[1, 1.3, 1.2], [1.3, 2, 0], [1.2, 0, 1.5]]
        marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                     "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1]),
                     "tide": UnivariateNormalDistribution(var=ourcov[2][2], mean=ourmean[2])}
        data_array = np.random.multivariate_normal(ourmean, ourcov, 10000)
        data_dict = dict.fromkeys(dimkeys)

        for i in range(dimension):
            data_dict[dimkeys[i]] = data_array[:, i]

        pair_copulae_strings = [[None, 'gaussian-copula', 'frank-copula'],
                                [None, None, 'gaussian-copula'],
                                [None, None, None]]

        valuedict = {"solar": 1, "wind": 1, "tide": 0.73}
        lowerdict = {"solar": -3, "wind": -2, "tide": 0}
        upperdict = {"solar": 0.5, "wind": 1, "tide": 1}

        mydistr = DVineCopula(dimkeys, data_dict, marginals, pair_copulae_strings)
        n = 20      #number of points to display
        U = mydistr.generates_U(n=n)
        d = 3
        diago = diag(d)
        P =[]
        fig = plt.figure()
        center = 0.5*np.ones(d)
        k = 2 #index of the diagonal where you want to project
        ax = fig.add_subplot(111, projection='3d')

        ax.scatter(U[:, 0], U[:, 1], U[:, 2], c='g', marker='o')
        for i in range(n):
            P = diago.proj(U[i],k)
            ax.scatter(P[0,0],P[0,1],P[0,2], c='r', marker='o')
            ax.plot([U[i,0], P[0,0]],[U[i,1], P[0,1]],[U[i,2], P[0,2]], c='k')
        diagonal = diago.list_of_diag[k]
        ax.plot([diagonal[0][0],diagonal[1][0]], [diagonal[0][1],diagonal[1][1]],[diagonal[0][2],diagonal[1][2]], c='b')

        ax.set_xlabel(dimkeys[0])
        ax.set_ylabel(dimkeys[1])
        ax.set_zlabel(dimkeys[2])

        plt.show()
Esempio n. 10
0
 def test_normal_distribution(self):
     mu = 0
     sigma = 1
     m = 10000
     mydistr = UnivariateNormalDistribution(0, 1)
     rank_data = mu + sigma * np.random.randn(10000)
     rank = RankHistogram(mydistr, rank_data, 25)
     rank.plot()
Esempio n. 11
0
    def test_gaussian_copula2d(self):
        n = 10000
        dimkeys = ["solar", "wind"]
        dimension = len(dimkeys)
        ourmean = [2, 3]
        ourmeandict = {"solar": 0, "wind": 0}
        rho = 0.5
        rho2 = 0.7
        ourcov = [[1, rho], [rho, 1]]
        ourcov2 = [[1, rho2], [rho2, 1]]
        marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                     "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])}

        data_array = np.random.multivariate_normal(ourmean, ourcov, 100000)
        data_array2 = np.random.multivariate_normal(ourmean, ourcov2, 100000)
        data_dict = dict.fromkeys(dimkeys)

        for i in range(dimension):
            data_dict[dimkeys[i]] = data_array[:, i]

        data_dict2 = dict.fromkeys(dimkeys)
        for i in range(dimension):
            data_dict2[dimkeys[i]] = data_array2[:, i]

        gumbel = GumbelCopula(dimkeys, data_dict, marginals)
        frank = FrankCopula(dimkeys, data_dict, marginals)
        clayton = ClaytonCopula(dimkeys, data_dict, marginals)
        student = StudentCopula(dimkeys, data_dict, marginals)

        multigaussian1 = GaussianCopula(dimkeys=dimkeys, input_data=data_dict, marginals=marginals, quadstep=0.001)
        multigaussian2 = GaussianCopula(dimkeys=dimkeys, input_data=data_dict, marginals=marginals, quadstep=0.001,
                                        cov=ourcov2)
        multigaussian3 = GaussianCopula(dimkeys=dimkeys, input_data=data_dict2, marginals=marginals, quadstep=0.001,
                                        cov=ourcov2)
        multigaussian4 = GaussianCopula(dimkeys=dimkeys, input_data=data_dict2, marginals=marginals, quadstep=0.001,
                                        cov=ourcov)


        l1=multigaussian1.c_log_likelihood()
        self.assertGreater(l1,multigaussian2.c_log_likelihood())
        self.assertGreater(multigaussian3.c_log_likelihood(),multigaussian4.c_log_likelihood())
        self.assertGreater(l1,gumbel.c_log_likelihood())
        self.assertGreater(l1, clayton.c_log_likelihood())
        self.assertGreater(l1, frank.c_log_likelihood())
        self.assertGreater(l1, student.c_log_likelihood())
Esempio n. 12
0
 def test_two_dimensions(self):
     dimkeys = ["solar", "wind"]
     dimension = len(dimkeys)
     ourmean = [-4, 3]
     ourcov = [[2, 0], [0, 2]]
     lowerdict = {"solar": -1, "wind": 0}
     upperdict = {"solar": 3, "wind": 4}
     marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                  "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])}
     data_array = np.random.multivariate_normal(ourmean, ourcov, 10000)
     data_dict = dict.fromkeys(dimkeys)
     for i in range(dimension):
         data_dict[dimkeys[i]] = data_array[:, i]
     dist = MultiNormalDistribution(dimkeys,input_data=data_dict)
     dist2 = MultiNormalDistribution(dimkeys,mean=ourmean,cov=ourcov)
     self.assertAlmostEqual(dist.rect_prob(lowerdict,upperdict),dist2.rect_prob(lowerdict,upperdict),2)
     self.assertAlmostEqual(np.mean(dist.generates_X(n=1000)[:,1]),ourmean[1],1)
     self.assertAlmostEqual(np.mean(dist.generates_X(n=1000)[:, 0]), ourmean[0], 1)
Esempio n. 13
0
 def test_with_gaussian_copula_1_dim(self):
     mymean = 0
     myvar = 2
     dimkeys1 = ["solar"]
     lowerdict = {"solar": -2}
     upperdict = {"solar": 1}
     data_array1 = np.random.multivariate_normal([mymean], [[myvar]], 10000)
     data_dict1 = {"solar": data_array1[:, 0]}
     marginals1 = {"solar": UnivariateNormalDistribution(input_data=data_array1[:, 0])}
     unigaussian1 = GaussianCopula(input_data=data_dict1, dimkeys=dimkeys1, marginals=marginals1)
     unigaussian2 = MultiNormalDistribution(dimkeys1, input_data=data_dict1)
     self.assertAlmostEqual(unigaussian1.rect_prob(lowerdict, upperdict),unigaussian2.rect_prob(lowerdict, upperdict),3)
Esempio n. 14
0
 def test_with_mean_var(self):
     sigma = 2
     mean = 3
     data = sigma*np.random.randn(10000)+mean
     dist = UnivariateNormalDistribution(input_data=data)
     self.assertAlmostEqual(dist.cdf(4),0.6915,1)
     dist = UnivariateNormalDistribution(mean = mean,var=sigma**2)
     self.assertAlmostEqual(dist.cdf(4),0.6915,3)
Esempio n. 15
0
    def test_with_gaussian_copula_2_dim(self):
        dimkeys = ["solar", "wind"]
        dimension = len(dimkeys)
        ourmean = [3, 4]
        ourmeandict = {"solar": 0, "wind": 0}
        ourcov = [[1, 0.5], [0.5, 1]]
        marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                     "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])}
        valuedict = {"solar": 0, "wind": 0}
        lowerdict = {"solar": 2, "wind": 3}
        upperdict = {"solar": 4, "wind": 5}

        data_array = np.random.multivariate_normal(ourmean, ourcov, 100000)
        data_dict = dict.fromkeys(dimkeys)

        for i in range(dimension):
            data_dict[dimkeys[i]] = data_array[:, i]

        multigaussian1 = GaussianCopula(input_data=data_dict, dimkeys=dimkeys, marginals=marginals, quadstep=0.001)
        multigaussian2 = MultiNormalDistribution(dimkeys, input_data=data_dict)
        valuedict = {"solar": 0.45, "wind": 0.89}
        self.assertAlmostEqual(multigaussian1.rect_prob(lowerdict, upperdict),
                               multigaussian2.rect_prob(lowerdict, upperdict), 3)
Esempio n. 16
0
 def test_quick(self):
     data = np.random.randn(1000)
     dist = UnivariateNormalDistribution(input_data=data)
     self.assertAlmostEqual(dist.rect_prob(-1.96,1.96),0.95,1)
Esempio n. 17
0
 def test_pdf_cdf(self):
     x = -2 + 2 * np.random.randn(2000)
     mydistr = UnivariateNormalDistribution(input_data=x)
     res, i = spi.quad(mydistr.pdf, -1, 3)
     self.assertAlmostEqual(res,mydistr.rect_prob(-1, 3),5)