예제 #1
0
    def LP_solve_gaps(self,param):
        exp_means_gapest = {}

        for (i,j,is_PE_link) in self.observations:
            mean_obs = self.observations[(i,j,is_PE_link)][0]
            if is_PE_link:
                exp_means_gapest[(i,j,is_PE_link)] = self.observations[(i,j,is_PE_link)][0] + GC.GapEstimator(self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length)
                #print 'GAPEST:',mean_obs, self.ctgs[i].length, self.ctgs[j].length, 'gap:' ,  GC.GapEstimator(self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length)

            else:
                exp_means_gapest[(i,j,is_PE_link)] = self.observations[(i,j,is_PE_link)][0] + GC.GapEstimator(self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length)
                #print 'GAPEST:',mean_obs, self.ctgs[i].length, self.ctgs[j].length, 'gap:' ,  GC.GapEstimator(self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length)
        
        ####################
        ####### NEW ########
        ####################
        
        # convert problem to standard form 
        #  minimize    z = c' x
        # subject to  A x = b, x >= 0
        # b does not neccessarily need to be a positive vector

        # decide how long rows.
        # we need 2*g gap variables because they can be negative
        # and r help variables because absolute sign in objective function

        t = LpForm()  

        g = len(self.ctgs)-1
        r = len(self.observations)
        n = 2*g+r 

        #A = []
        #c = []
        #b = []
        # add  gap variable constraints g_i = x_i - y_i <= mean + 2stddev, x_i,y_i >= 0
        # gap 0 on column 0, gap1 on column 1 etc.
        for i in range(g):
            row = [0]*n
            row[2*i] = 1      # x_i
            row[2*i+1] = -1   # y_i
            #A.append(row)
            #b.append(self.mean + 2*self.stddev)
            t.add_constraint(row, self.mean + 2*self.stddev)

        # add r help variable constraints (for one case in absolute value)
        for h_index,(i,j,is_PE_link) in enumerate(self.observations):
            row = [0]*n
            
            # g gap variable constants
            for k in range(n):
                if i<= k <j:
                    row[2*k] = -1
                    row[2*k+1] =  1
            
            # r Help variables
            row[ 2*g + h_index] = -1

            # sum of "inbetween" contig lengths + observation
            constant =   sum(map(lambda x: x.length, self.ctgs[i+1:j])) + self.observations[(i,j,is_PE_link)][0]
            predicted_distance = exp_means_gapest[(i,j,is_PE_link)]

            t.add_constraint(row, constant - predicted_distance)

        # add r help variable constraints (for the other case in absolute value)
        for h_index,(i,j,is_PE_link) in enumerate(self.observations):
            row = [0]*n
            
            # q gap variable constants
            for k in range(n):
                if i<= k <j:
                    row[2*k] = 1
                    row[2*k+1] = -1
            
            # r Help variables
            row[ 2*g + h_index] = -1

            # sum of "inbetween" contig lengths + observation
            constant =   sum(map(lambda x: x.length, self.ctgs[i+1:j])) + self.observations[(i,j,is_PE_link)][0]
            predicted_distance = exp_means_gapest[(i,j,is_PE_link)]

            t.add_constraint(row, predicted_distance - constant )

        # add objective row


        if self.contamination_ratio:
            obj_row = [0]*n
            for h_index,(i,j,is_PE_link) in enumerate(self.observations):
                obj_row[ 2*g + h_index] = is_PE_link * self.stddev * (1 - self.contamination_ratio) * self.observations[(i,j,is_PE_link)][1] +  (1-is_PE_link) * self.contamination_stddev * (self.contamination_ratio)*self.observations[(i,j,is_PE_link)][1]
                t.add_objective(obj_row)
            #problem += lpSum( [ is_PE_link * (1 - self.contamination_ratio) * help_variables[(i,j,is_PE_link)]*self.observations[(i,j,is_PE_link)][1] + (1-is_PE_link)*(self.contamination_ratio)* help_variables[(i,j,is_PE_link)]*self.observations[(i,j,is_PE_link)][1] for (i,j,is_PE_link) in self.observations] ) , "objective"
        else:
            obj_row = [0]*n
            for h_index,(i,j,is_PE_link) in enumerate(self.observations):
                obj_row[ 2*g + h_index] = self.observations[(i,j,is_PE_link)][1]
                t.add_objective(obj_row)
            #problem += lpSum( [ help_variables[(i,j,is_PE_link)]*self.observations[(i,j,is_PE_link)][1] for (i,j,is_PE_link) in self.observations] ) , "objective"

        A, b, c = t.standard_form()
        #t.display()

        # print 'Objective:', c 
        # for row in A:
        #     print 'constraint:', row
        # print 'constnts:', b
        lpsol = lp_solve(c,A,b,tol=1e-4)
        optx = lpsol.x
        # zmin = lpsol.fun
        # bounded = lpsol.is_bounded
        # solvable = lpsol.is_solvable
        # basis = lpsol.basis
        # print " ---->"
        # print "optx:",optx
        # print "zmin:",zmin
        # print "bounded:",bounded
        # print "solvable:",solvable
        # print "basis:",basis
        # print "-------------------------------------------"

        # transform solutions to gaps back
        gap_solution = []
        for i in range(g):
            gap_solution.append( round (optx[2*i] -optx[2*i +1],0) )           

        self.objective = lpsol.fun
        #print self.objective
        
        return gap_solution
예제 #2
0
def test_lp(prt=False):
    m1 = 20
    m2 = 50
    probs = [
        {
            'A': array([
                [2.,  5., 3., -1.,  0.,  0.],
                [3., 2.5, 8.,  0., -1.,  0.],
                [8.,10.,  4.,  0.,  0., -1.]]),
            'b': array([185., 155., 600.]),
            'c': array([4., 8., 3., 0., 0., 0.]),
            'result': [
                    array([ 66.25, 0., 17.5, 0., 183.75, 0.]),
                    317.5,
                    True,
                    True,
                    array([2, 0, 4])            
                ]
        },
        {        
            'A': array([
                [-1., -1., -1.,  0.,  0.,  0.],
                [ 0.,  0.,  0.,  1.,  1.,  1.],
                [ 1.,  0.,  0.,  1.,  0.,  0.],
                [ 0.,  1.,  0.,  0.,  1.,  0.],
                [ 0.,  0.,  1.,  0.,  0.,  1.]]),
            'b': array([-0.5, 0.4, 0.3, 0.3, 0.3]),
            'c': array([2.8, 6.3, 10.8, -2.8, -6.3, -10.8]),
            'result': [
                    array([0.3, 0.2, 0.0, 0.0, 0.1, 0.3]),
                    -1.77,
                    True,
                    True,
                    array([1, 7, 0, 4, 5])            
                ]
        },
        {   # with degeneracy
            'A': array([[cos(2*pi*i/(m1+1))-1., sin(2*pi*i/(m1+1))]
                        for i in xrange(1,m1+1)]).T,
            'b': zeros(2).T,
            'c': -ones(m1).T,
            'result': [
                    zeros(m1),
                    0.,
                    True,
                    True,
                    array([0,19])
                ]
            
        },
        {   # with unboundedness (0 is a member of the convex hull
            # of these vectors)
            'A': array([[cos(2*pi*i/(m2+1))-1., sin(2*pi*i/(m2+1))]
                        for i in xrange(0,m2)]).T,
            'b': zeros(2).T,
            'c': -ones(m2).T,
            'result': [
                    None,   # unchecked when unbounded
                    -Inf,   # unchecked when unbounded
                    False,
                    True,
                    array([2, 49])
                ]
            
        }, 
        {   # Unsolvable
            'A': array([[cos(2*pi*i/(m2+1))-1., sin(2*pi*i/(m2+1))]
                        for i in xrange(0,m2)]).T,
            'b': ones(2).T,
            'c': -ones(m2).T,
            'result': [
                    None,   # unchecked when unsolvable
                    None,   # unchecked when unsolvable
                    None,   # unchecked when unsolvable
                    False,
                    array([50, 1])
                ]
            
        }, # add other test cases here...
    ]


    for prob in probs:
        lpsol = lp_solve(prob['c'],prob['A'],prob['b'])
        optx = lpsol.x
        zmin = lpsol.fun
        bounded = lpsol.is_bounded
        solvable = lpsol.is_solvable
        basis = lpsol.basis
        if prt:
            print "A:\n",prob['A']
            print "b:",prob['b']
            print "c:",prob['c']
            print " ---->"
            print "optx:",optx
            print "zmin:",zmin
            print "bounded:",bounded
            print "solvable:",solvable
            print "basis:",basis
            print "-------------------------------------------"
        else:
            expected_res = prob['result']
            assert_equal(solvable, expected_res[3], err_msg=repr(prob))
            assert_equal(basis, expected_res[4], err_msg=repr(prob))
            if solvable:
                assert_equal(bounded, expected_res[2], err_msg=repr(prob))
                if bounded:
                    assert_almost_equal(optx, expected_res[0],
                                        err_msg=repr(prob))
                assert_almost_equal(zmin, expected_res[1], err_msg=repr(prob))
예제 #3
0
    def LP_solve_gaps(self, param):
        exp_means_gapest = {}

        for (i, j, is_PE_link) in self.observations:
            mean_obs = self.observations[(i, j, is_PE_link)][0]
            if is_PE_link:
                exp_means_gapest[(i, j, is_PE_link)] = self.observations[(i, j, is_PE_link)][0] + GC.GapEstimator(
                    self.contamination_mean,
                    self.contamination_stddev,
                    self.read_len,
                    mean_obs,
                    self.ctgs[i].length,
                    self.ctgs[j].length,
                )
                # print 'GAPEST:',mean_obs, self.ctgs[i].length, self.ctgs[j].length, 'gap:' ,  GC.GapEstimator(self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length)

            else:
                if param.lognormal:
                    samples = self.observations[(i, j, is_PE_link)][3]
                    exp_means_gapest[(i, j, is_PE_link)] = self.observations[(i, j, is_PE_link)][0] + lnpe.GapEstimator(
                        param.lognormal_mean,
                        param.lognormal_sigma,
                        self.read_len,
                        samples,
                        self.ctgs[i].length,
                        c2_len=self.ctgs[j].length,
                    )
                else:
                    exp_means_gapest[(i, j, is_PE_link)] = self.observations[(i, j, is_PE_link)][0] + GC.GapEstimator(
                        self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length
                    )
                    # print 'GAPEST:',mean_obs, self.ctgs[i].length, self.ctgs[j].length, 'gap:' ,  GC.GapEstimator(self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length)

        if (
            all(length in self.ctg_lengths for length in [670, 2093])
            or all(length in self.ctg_lengths for length in [900, 3810])
            or all(length in self.ctg_lengths for length in [2528, 591])
            or all(length in self.ctg_lengths for length in [734, 257, 1548])
        ):

            for (i, j, is_PE_link) in self.observations:
                mean_obs = self.observations[(i, j, is_PE_link)][0]
                if is_PE_link:
                    # exp_means_gapest[(i,j,is_PE_link)] = self.observations[(i,j,is_PE_link)][0] + GC.GapEstimator(self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length)
                    print >>param.information_file, "GAPEST:", mean_obs, self.ctgs[i].length, self.ctgs[
                        j
                    ].length, "gap:", GC.GapEstimator(
                        self.contamination_mean,
                        self.contamination_stddev,
                        self.read_len,
                        mean_obs,
                        self.ctgs[i].length,
                        self.ctgs[j].length,
                    )

                else:
                    # exp_means_gapest[(i,j,is_PE_link)] = self.observations[(i,j,is_PE_link)][0] + GC.GapEstimator(self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length)
                    print >>param.information_file, "GAPEST:", mean_obs, self.ctgs[i].length, self.ctgs[
                        j
                    ].length, "gap:", GC.GapEstimator(
                        self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length
                    )

        ####################
        ####### NEW ########
        ####################

        # convert problem to standard form
        #  minimize    z = c' x
        # subject to  A x = b, x >= 0
        # b does not neccessarily need to be a positive vector

        # decide how long rows.
        # we need 2*g gap variables because they can be negative
        # and r help variables because absolute sign in objective function

        t = LpForm()

        g = len(self.ctgs) - 1
        r = len(self.observations)
        n = 2 * g + r

        # A = []
        # c = []
        # b = []
        # add  gap variable constraints g_i = x_i - y_i <= mean + 2stddev, x_i,y_i >= 0
        # gap 0 on column 0, gap1 on column 1 etc.

        for i in range(g):
            row = [0] * n
            row[2 * i] = 1  # x_i
            row[2 * i + 1] = -1  # y_i
            # A.append(row)
            # b.append(self.mean + 2*self.stddev)
            t.add_constraint(row, self.mean + 2 * self.stddev)

        # add r help variable constraints (for one case in absolute value)
        for h_index, (i, j, is_PE_link) in enumerate(self.observations):
            row = [0] * n

            # g gap variable constants
            for k in range(n):
                if i <= k < j:
                    row[2 * k] = -1
                    row[2 * k + 1] = 1

            # r Help variables
            row[2 * g + h_index] = -1

            # sum of "inbetween" contig lengths + observation
            constant = sum(map(lambda x: x.length, self.ctgs[i + 1 : j])) + self.observations[(i, j, is_PE_link)][0]
            predicted_distance = exp_means_gapest[(i, j, is_PE_link)]

            t.add_constraint(row, constant - predicted_distance)

        # add r help variable constraints (for the other case in absolute value)
        for h_index, (i, j, is_PE_link) in enumerate(self.observations):
            row = [0] * n

            # q gap variable constants
            for k in range(n):
                if i <= k < j:
                    row[2 * k] = 1
                    row[2 * k + 1] = -1

            # r Help variables
            row[2 * g + h_index] = -1

            # sum of "inbetween" contig lengths + observation
            constant = sum(map(lambda x: x.length, self.ctgs[i + 1 : j])) + self.observations[(i, j, is_PE_link)][0]
            predicted_distance = exp_means_gapest[(i, j, is_PE_link)]

            t.add_constraint(row, predicted_distance - constant)

        # add objective row

        # calculate the total penalties of discrepancies of stddevs given assigned orientations
        # of all edges
        obj_delta_stddev = 0
        if self.contamination_ratio:
            obj_row = [0] * n
            for h_index, (i, j, is_PE_link) in enumerate(self.observations):
                n = self.observations[(i, j, is_PE_link)][1]
                obs_stddev = self.observations[(i, j, is_PE_link)][2]
                if is_PE_link:
                    obj_delta_stddev += abs(self.contamination_stddev - obs_stddev) * n
                else:
                    obj_delta_stddev += abs(self.stddev - obs_stddev) * n

                obj_row[2 * g + h_index] = is_PE_link * n + (1 - is_PE_link) * n
                # obj_row[ 2*g + h_index] = is_PE_link*self.stddev*n + (1-is_PE_link)*self.contamination_stddev * n

                # obj_row[ 2*g + h_index] = is_PE_link * self.stddev  * self.observations[(i,j,is_PE_link)][1] +  (1-is_PE_link) * self.contamination_stddev * self.observations[(i,j,is_PE_link)][1]
                # obj_row[ 2*g + h_index] = is_PE_link * self.stddev * (1 - self.contamination_ratio) * self.observations[(i,j,is_PE_link)][1] +  (1-is_PE_link) * self.contamination_stddev * (self.contamination_ratio)*self.observations[(i,j,is_PE_link)][1]
                t.add_objective(obj_row)
        else:
            obj_row = [0] * n
            for h_index, (i, j, is_PE_link) in enumerate(self.observations):
                obj_row[2 * g + h_index] = self.observations[(i, j, is_PE_link)][1]
                t.add_objective(obj_row)

        A, b, c = t.standard_form()

        # sol_lsq =np.linalg.lstsq(A,b)
        # print "LEAST SQUARES SOLUTION:"
        # print sol_lsq[0]

        # t.display()

        # print 'Objective:', c
        # for row in A:
        #     print 'constraint:', row
        # print 'constnts:', b
        lpsol = lp_solve(c, A, b, tol=1e-4)
        optx = lpsol.x
        # zmin = lpsol.fun
        # bounded = lpsol.is_bounded
        # solvable = lpsol.is_solvable
        # basis = lpsol.basis
        # print " ---->"
        # print "optx:",optx
        # print "zmin:",zmin
        # print "bounded:",bounded
        # print "solvable:",solvable
        # print "basis:",basis
        # print "-------------------------------------------"

        # print "LP SOLUTION:"
        # print optx

        # transform solutions to gaps back
        gap_solution = []
        for i in range(g):
            gap_solution.append(round(optx[2 * i] - optx[2 * i + 1], 0))

        self.objective = lpsol.fun

        # also add the penalties from the observed standard deviations
        # self.objective += obj_delta_stddev

        # ctg_lengths = map(lambda x: x.length, self.ctgs)
        # if 1359 in ctg_lengths and 673 in ctg_lengths: #len(path.gaps) >= 4:
        #     print 'Obj:',self.objective
        #     print "of which stddev contributing:", obj_delta_stddev
        # print "objective:",self.objective

        #### Use the added accurace from the narrow contamine distribution here
        #### to further precisely adjust the gaps in the LP solution if there is
        #### any pe links
        if self.contamination_ratio:
            for g_i in xrange(g):
                if (g_i, g_i + 1, True) in self.observations:  # if it is a PE-link
                    mean_obs = self.observations[(g_i, g_i + 1, True)][0]
                    gap_contamination = exp_means_gapest[(g_i, g_i + 1, True)] - mean_obs
                    old_gap = gap_solution[g_i]
                    gap_solution[g_i] = gap_contamination
                    # print " changing contamination gap from: {0} to {1}".format(old_gap, gap_contamination)

        return gap_solution
예제 #4
0
    def LP_solve_gaps(self, param):
        exp_means_gapest = {}

        for (i, j, is_PE_link) in self.observations:
            mean_obs = self.observations[(i, j, is_PE_link)][0]
            if is_PE_link:
                exp_means_gapest[(i, j, is_PE_link)] = self.observations[
                    (i, j, is_PE_link)][0] + GC.GapEstimator(
                        self.contamination_mean, self.contamination_stddev,
                        self.read_len, mean_obs, self.ctgs[i].length,
                        self.ctgs[j].length)
                #print 'GAPEST:',mean_obs, self.ctgs[i].length, self.ctgs[j].length, 'gap:' ,  GC.GapEstimator(self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length)

            else:
                exp_means_gapest[(i, j, is_PE_link)] = self.observations[
                    (i, j, is_PE_link)][0] + GC.GapEstimator(
                        self.mean, self.stddev, self.read_len, mean_obs,
                        self.ctgs[i].length, self.ctgs[j].length)
                #print 'GAPEST:',mean_obs, self.ctgs[i].length, self.ctgs[j].length, 'gap:' ,  GC.GapEstimator(self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length)

        ####################
        ####### NEW ########
        ####################

        # convert problem to standard form
        #  minimize    z = c' x
        # subject to  A x = b, x >= 0
        # b does not neccessarily need to be a positive vector

        # decide how long rows.
        # we need 2*g gap variables because they can be negative
        # and r help variables because absolute sign in objective function

        t = LpForm()

        g = len(self.ctgs) - 1
        r = len(self.observations)
        n = 2 * g + r

        #A = []
        #c = []
        #b = []
        # add  gap variable constraints g_i = x_i - y_i <= mean + 2stddev, x_i,y_i >= 0
        # gap 0 on column 0, gap1 on column 1 etc.
        for i in range(g):
            row = [0] * n
            row[2 * i] = 1  # x_i
            row[2 * i + 1] = -1  # y_i
            #A.append(row)
            #b.append(self.mean + 2*self.stddev)
            t.add_constraint(row, self.mean + 2 * self.stddev)

        # add r help variable constraints (for one case in absolute value)
        for h_index, (i, j, is_PE_link) in enumerate(self.observations):
            row = [0] * n

            # g gap variable constants
            for k in range(n):
                if i <= k < j:
                    row[2 * k] = -1
                    row[2 * k + 1] = 1

            # r Help variables
            row[2 * g + h_index] = -1

            # sum of "inbetween" contig lengths + observation
            constant = sum(map(
                lambda x: x.length,
                self.ctgs[i + 1:j])) + self.observations[(i, j, is_PE_link)][0]
            predicted_distance = exp_means_gapest[(i, j, is_PE_link)]

            t.add_constraint(row, constant - predicted_distance)

        # add r help variable constraints (for the other case in absolute value)
        for h_index, (i, j, is_PE_link) in enumerate(self.observations):
            row = [0] * n

            # q gap variable constants
            for k in range(n):
                if i <= k < j:
                    row[2 * k] = 1
                    row[2 * k + 1] = -1

            # r Help variables
            row[2 * g + h_index] = -1

            # sum of "inbetween" contig lengths + observation
            constant = sum(map(
                lambda x: x.length,
                self.ctgs[i + 1:j])) + self.observations[(i, j, is_PE_link)][0]
            predicted_distance = exp_means_gapest[(i, j, is_PE_link)]

            t.add_constraint(row, predicted_distance - constant)

        # add objective row

        if self.contamination_ratio:
            obj_row = [0] * n
            for h_index, (i, j, is_PE_link) in enumerate(self.observations):
                obj_row[2 * g + h_index] = is_PE_link * self.stddev * (
                    1 - self.contamination_ratio) * self.observations[
                        (i, j, is_PE_link)][1] + (
                            1 - is_PE_link) * self.contamination_stddev * (
                                self.contamination_ratio) * self.observations[
                                    (i, j, is_PE_link)][1]
                t.add_objective(obj_row)
            #problem += lpSum( [ is_PE_link * (1 - self.contamination_ratio) * help_variables[(i,j,is_PE_link)]*self.observations[(i,j,is_PE_link)][1] + (1-is_PE_link)*(self.contamination_ratio)* help_variables[(i,j,is_PE_link)]*self.observations[(i,j,is_PE_link)][1] for (i,j,is_PE_link) in self.observations] ) , "objective"
        else:
            obj_row = [0] * n
            for h_index, (i, j, is_PE_link) in enumerate(self.observations):
                obj_row[2 * g + h_index] = self.observations[(i, j,
                                                              is_PE_link)][1]
                t.add_objective(obj_row)
            #problem += lpSum( [ help_variables[(i,j,is_PE_link)]*self.observations[(i,j,is_PE_link)][1] for (i,j,is_PE_link) in self.observations] ) , "objective"

        A, b, c = t.standard_form()
        #t.display()

        # print 'Objective:', c
        # for row in A:
        #     print 'constraint:', row
        # print 'constnts:', b
        lpsol = lp_solve(c, A, b, tol=1e-4)
        optx = lpsol.x
        # zmin = lpsol.fun
        # bounded = lpsol.is_bounded
        # solvable = lpsol.is_solvable
        # basis = lpsol.basis
        # print " ---->"
        # print "optx:",optx
        # print "zmin:",zmin
        # print "bounded:",bounded
        # print "solvable:",solvable
        # print "basis:",basis
        # print "-------------------------------------------"

        # transform solutions to gaps back
        gap_solution = []
        for i in range(g):
            gap_solution.append(round(optx[2 * i] - optx[2 * i + 1], 0))

        self.objective = lpsol.fun
        #print self.objective

        return gap_solution
예제 #5
0
    def LP_solve_gaps(self, param):
        exp_means_gapest = {}

        for (i, j, is_PE_link) in self.observations:
            mean_obs = self.observations[(i, j, is_PE_link)][0]
            if is_PE_link:
                exp_means_gapest[(i, j, is_PE_link)] = self.observations[
                    (i, j, is_PE_link)][0] + GC.GapEstimator(
                        self.contamination_mean, self.contamination_stddev,
                        self.read_len, mean_obs, self.ctgs[i].length,
                        self.ctgs[j].length)
                #print 'GAPEST:',mean_obs, self.ctgs[i].length, self.ctgs[j].length, 'gap:' ,  GC.GapEstimator(self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length)

            else:
                if param.lognormal:
                    samples = self.observations[(i, j, is_PE_link)][3]
                    exp_means_gapest[(i, j, is_PE_link)] = self.observations[
                        (i, j, is_PE_link)][0] + lnpe.GapEstimator(
                            param.lognormal_mean,
                            param.lognormal_sigma,
                            self.read_len,
                            samples,
                            self.ctgs[i].length,
                            c2_len=self.ctgs[j].length)
                else:
                    exp_means_gapest[(i, j, is_PE_link)] = self.observations[
                        (i, j, is_PE_link)][0] + GC.GapEstimator(
                            self.mean, self.stddev, self.read_len, mean_obs,
                            self.ctgs[i].length, self.ctgs[j].length)
                    #print 'GAPEST:',mean_obs, self.ctgs[i].length, self.ctgs[j].length, 'gap:' ,  GC.GapEstimator(self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length)

        if all(length in self.ctg_lengths for length in [670, 2093]) or all(
                length in self.ctg_lengths for length in [900, 3810]) or all(
                    length in self.ctg_lengths
                    for length in [2528, 591]) or all(
                        length in self.ctg_lengths
                        for length in [734, 257, 1548]):

            for (i, j, is_PE_link) in self.observations:
                mean_obs = self.observations[(i, j, is_PE_link)][0]
                if is_PE_link:
                    #exp_means_gapest[(i,j,is_PE_link)] = self.observations[(i,j,is_PE_link)][0] + GC.GapEstimator(self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length)
                    print('GAPEST:',
                          mean_obs,
                          self.ctgs[i].length,
                          self.ctgs[j].length,
                          'gap:',
                          GC.GapEstimator(self.contamination_mean,
                                          self.contamination_stddev,
                                          self.read_len, mean_obs,
                                          self.ctgs[i].length,
                                          self.ctgs[j].length),
                          file=param.information_file)

                else:
                    #exp_means_gapest[(i,j,is_PE_link)] = self.observations[(i,j,is_PE_link)][0] + GC.GapEstimator(self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length)
                    print('GAPEST:',
                          mean_obs,
                          self.ctgs[i].length,
                          self.ctgs[j].length,
                          'gap:',
                          GC.GapEstimator(self.mean, self.stddev,
                                          self.read_len, mean_obs,
                                          self.ctgs[i].length,
                                          self.ctgs[j].length),
                          file=param.information_file)

        ####################
        ####### NEW ########
        ####################

        # convert problem to standard form
        #  minimize    z = c' x
        # subject to  A x = b, x >= 0
        # b does not neccessarily need to be a positive vector

        # decide how long rows.
        # we need 2*g gap variables because they can be negative
        # and r help variables because absolute sign in objective function

        t = LpForm()

        g = len(self.ctgs) - 1
        r = len(self.observations)
        n = 2 * g + r

        #A = []
        #c = []
        #b = []
        # add  gap variable constraints g_i = x_i - y_i <= mean + 2stddev, x_i,y_i >= 0
        # gap 0 on column 0, gap1 on column 1 etc.

        for i in range(g):
            row = [0] * n
            row[2 * i] = 1  # x_i
            row[2 * i + 1] = -1  # y_i
            #A.append(row)
            #b.append(self.mean + 2*self.stddev)
            t.add_constraint(row, self.mean + 2 * self.stddev)

        # add r help variable constraints (for one case in absolute value)
        for h_index, (i, j, is_PE_link) in enumerate(self.observations):
            row = [0] * n

            # g gap variable constants
            for k in range(n):
                if i <= k < j:
                    row[2 * k] = -1
                    row[2 * k + 1] = 1

            # r Help variables
            row[2 * g + h_index] = -1

            # sum of "inbetween" contig lengths + observation
            constant = sum([x.length for x in self.ctgs[i + 1:j]
                            ]) + self.observations[(i, j, is_PE_link)][0]
            predicted_distance = exp_means_gapest[(i, j, is_PE_link)]

            t.add_constraint(row, constant - predicted_distance)

        # add r help variable constraints (for the other case in absolute value)
        for h_index, (i, j, is_PE_link) in enumerate(self.observations):
            row = [0] * n

            # q gap variable constants
            for k in range(n):
                if i <= k < j:
                    row[2 * k] = 1
                    row[2 * k + 1] = -1

            # r Help variables
            row[2 * g + h_index] = -1

            # sum of "inbetween" contig lengths + observation
            constant = sum([x.length for x in self.ctgs[i + 1:j]
                            ]) + self.observations[(i, j, is_PE_link)][0]
            predicted_distance = exp_means_gapest[(i, j, is_PE_link)]

            t.add_constraint(row, predicted_distance - constant)

        # add objective row

        # calculate the total penalties of discrepancies of stddevs given assigned orientations
        # of all edges
        obj_delta_stddev = 0
        if self.contamination_ratio:
            obj_row = [0] * n
            for h_index, (i, j, is_PE_link) in enumerate(self.observations):
                n = self.observations[(i, j, is_PE_link)][1]
                obs_stddev = self.observations[(i, j, is_PE_link)][2]
                if is_PE_link:
                    obj_delta_stddev += abs(self.contamination_stddev -
                                            obs_stddev) * n
                else:
                    obj_delta_stddev += abs(self.stddev - obs_stddev) * n

                obj_row[2 * g +
                        h_index] = is_PE_link * n + (1 - is_PE_link) * n
                #obj_row[ 2*g + h_index] = is_PE_link*self.stddev*n + (1-is_PE_link)*self.contamination_stddev * n

                #obj_row[ 2*g + h_index] = is_PE_link * self.stddev  * self.observations[(i,j,is_PE_link)][1] +  (1-is_PE_link) * self.contamination_stddev * self.observations[(i,j,is_PE_link)][1]
                # obj_row[ 2*g + h_index] = is_PE_link * self.stddev * (1 - self.contamination_ratio) * self.observations[(i,j,is_PE_link)][1] +  (1-is_PE_link) * self.contamination_stddev * (self.contamination_ratio)*self.observations[(i,j,is_PE_link)][1]
                t.add_objective(obj_row)
        else:
            obj_row = [0] * n
            for h_index, (i, j, is_PE_link) in enumerate(self.observations):
                obj_row[2 * g + h_index] = self.observations[(i, j,
                                                              is_PE_link)][1]
                t.add_objective(obj_row)

        A, b, c = t.standard_form()

        # sol_lsq =np.linalg.lstsq(A,b)
        # print "LEAST SQUARES SOLUTION:"
        # print sol_lsq[0]

        #t.display()

        # print 'Objective:', c
        # for row in A:
        #     print 'constraint:', row
        # print 'constnts:', b
        lpsol = lp_solve(c, A, b, tol=1e-4)
        optx = lpsol.x
        # zmin = lpsol.fun
        # bounded = lpsol.is_bounded
        # solvable = lpsol.is_solvable
        # basis = lpsol.basis
        # print " ---->"
        # print "optx:",optx
        # print "zmin:",zmin
        # print "bounded:",bounded
        # print "solvable:",solvable
        # print "basis:",basis
        # print "-------------------------------------------"

        # print "LP SOLUTION:"
        # print optx

        # transform solutions to gaps back
        gap_solution = []
        for i in range(g):
            gap_solution.append(round(optx[2 * i] - optx[2 * i + 1], 0))

        self.objective = lpsol.fun

        # also add the penalties from the observed standard deviations
        #self.objective += obj_delta_stddev

        # ctg_lengths = map(lambda x: x.length, self.ctgs)
        # if 1359 in ctg_lengths and 673 in ctg_lengths: #len(path.gaps) >= 4:
        #     print 'Obj:',self.objective
        #     print "of which stddev contributing:", obj_delta_stddev
        #print "objective:",self.objective

        #### Use the added accurace from the narrow contamine distribution here
        #### to further precisely adjust the gaps in the LP solution if there is
        #### any pe links
        if self.contamination_ratio:
            for g_i in range(g):
                if (g_i, g_i + 1,
                        True) in self.observations:  # if it is a PE-link
                    mean_obs = self.observations[(g_i, g_i + 1, True)][0]
                    gap_contamination = exp_means_gapest[(g_i, g_i + 1,
                                                          True)] - mean_obs
                    old_gap = gap_solution[g_i]
                    gap_solution[g_i] = gap_contamination
                    #print " changing contamination gap from: {0} to {1}".format(old_gap, gap_contamination)

        return gap_solution