def LP_solve_gaps(self,param): exp_means_gapest = {} for (i,j,is_PE_link) in self.observations: mean_obs = self.observations[(i,j,is_PE_link)][0] if is_PE_link: exp_means_gapest[(i,j,is_PE_link)] = self.observations[(i,j,is_PE_link)][0] + GC.GapEstimator(self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length) #print 'GAPEST:',mean_obs, self.ctgs[i].length, self.ctgs[j].length, 'gap:' , GC.GapEstimator(self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length) else: exp_means_gapest[(i,j,is_PE_link)] = self.observations[(i,j,is_PE_link)][0] + GC.GapEstimator(self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length) #print 'GAPEST:',mean_obs, self.ctgs[i].length, self.ctgs[j].length, 'gap:' , GC.GapEstimator(self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length) #################### ####### NEW ######## #################### # convert problem to standard form # minimize z = c' x # subject to A x = b, x >= 0 # b does not neccessarily need to be a positive vector # decide how long rows. # we need 2*g gap variables because they can be negative # and r help variables because absolute sign in objective function t = LpForm() g = len(self.ctgs)-1 r = len(self.observations) n = 2*g+r #A = [] #c = [] #b = [] # add gap variable constraints g_i = x_i - y_i <= mean + 2stddev, x_i,y_i >= 0 # gap 0 on column 0, gap1 on column 1 etc. for i in range(g): row = [0]*n row[2*i] = 1 # x_i row[2*i+1] = -1 # y_i #A.append(row) #b.append(self.mean + 2*self.stddev) t.add_constraint(row, self.mean + 2*self.stddev) # add r help variable constraints (for one case in absolute value) for h_index,(i,j,is_PE_link) in enumerate(self.observations): row = [0]*n # g gap variable constants for k in range(n): if i<= k <j: row[2*k] = -1 row[2*k+1] = 1 # r Help variables row[ 2*g + h_index] = -1 # sum of "inbetween" contig lengths + observation constant = sum(map(lambda x: x.length, self.ctgs[i+1:j])) + self.observations[(i,j,is_PE_link)][0] predicted_distance = exp_means_gapest[(i,j,is_PE_link)] t.add_constraint(row, constant - predicted_distance) # add r help variable constraints (for the other case in absolute value) for h_index,(i,j,is_PE_link) in enumerate(self.observations): row = [0]*n # q gap variable constants for k in range(n): if i<= k <j: row[2*k] = 1 row[2*k+1] = -1 # r Help variables row[ 2*g + h_index] = -1 # sum of "inbetween" contig lengths + observation constant = sum(map(lambda x: x.length, self.ctgs[i+1:j])) + self.observations[(i,j,is_PE_link)][0] predicted_distance = exp_means_gapest[(i,j,is_PE_link)] t.add_constraint(row, predicted_distance - constant ) # add objective row if self.contamination_ratio: obj_row = [0]*n for h_index,(i,j,is_PE_link) in enumerate(self.observations): obj_row[ 2*g + h_index] = is_PE_link * self.stddev * (1 - self.contamination_ratio) * self.observations[(i,j,is_PE_link)][1] + (1-is_PE_link) * self.contamination_stddev * (self.contamination_ratio)*self.observations[(i,j,is_PE_link)][1] t.add_objective(obj_row) #problem += lpSum( [ is_PE_link * (1 - self.contamination_ratio) * help_variables[(i,j,is_PE_link)]*self.observations[(i,j,is_PE_link)][1] + (1-is_PE_link)*(self.contamination_ratio)* help_variables[(i,j,is_PE_link)]*self.observations[(i,j,is_PE_link)][1] for (i,j,is_PE_link) in self.observations] ) , "objective" else: obj_row = [0]*n for h_index,(i,j,is_PE_link) in enumerate(self.observations): obj_row[ 2*g + h_index] = self.observations[(i,j,is_PE_link)][1] t.add_objective(obj_row) #problem += lpSum( [ help_variables[(i,j,is_PE_link)]*self.observations[(i,j,is_PE_link)][1] for (i,j,is_PE_link) in self.observations] ) , "objective" A, b, c = t.standard_form() #t.display() # print 'Objective:', c # for row in A: # print 'constraint:', row # print 'constnts:', b lpsol = lp_solve(c,A,b,tol=1e-4) optx = lpsol.x # zmin = lpsol.fun # bounded = lpsol.is_bounded # solvable = lpsol.is_solvable # basis = lpsol.basis # print " ---->" # print "optx:",optx # print "zmin:",zmin # print "bounded:",bounded # print "solvable:",solvable # print "basis:",basis # print "-------------------------------------------" # transform solutions to gaps back gap_solution = [] for i in range(g): gap_solution.append( round (optx[2*i] -optx[2*i +1],0) ) self.objective = lpsol.fun #print self.objective return gap_solution
def test_lp(prt=False): m1 = 20 m2 = 50 probs = [ { 'A': array([ [2., 5., 3., -1., 0., 0.], [3., 2.5, 8., 0., -1., 0.], [8.,10., 4., 0., 0., -1.]]), 'b': array([185., 155., 600.]), 'c': array([4., 8., 3., 0., 0., 0.]), 'result': [ array([ 66.25, 0., 17.5, 0., 183.75, 0.]), 317.5, True, True, array([2, 0, 4]) ] }, { 'A': array([ [-1., -1., -1., 0., 0., 0.], [ 0., 0., 0., 1., 1., 1.], [ 1., 0., 0., 1., 0., 0.], [ 0., 1., 0., 0., 1., 0.], [ 0., 0., 1., 0., 0., 1.]]), 'b': array([-0.5, 0.4, 0.3, 0.3, 0.3]), 'c': array([2.8, 6.3, 10.8, -2.8, -6.3, -10.8]), 'result': [ array([0.3, 0.2, 0.0, 0.0, 0.1, 0.3]), -1.77, True, True, array([1, 7, 0, 4, 5]) ] }, { # with degeneracy 'A': array([[cos(2*pi*i/(m1+1))-1., sin(2*pi*i/(m1+1))] for i in xrange(1,m1+1)]).T, 'b': zeros(2).T, 'c': -ones(m1).T, 'result': [ zeros(m1), 0., True, True, array([0,19]) ] }, { # with unboundedness (0 is a member of the convex hull # of these vectors) 'A': array([[cos(2*pi*i/(m2+1))-1., sin(2*pi*i/(m2+1))] for i in xrange(0,m2)]).T, 'b': zeros(2).T, 'c': -ones(m2).T, 'result': [ None, # unchecked when unbounded -Inf, # unchecked when unbounded False, True, array([2, 49]) ] }, { # Unsolvable 'A': array([[cos(2*pi*i/(m2+1))-1., sin(2*pi*i/(m2+1))] for i in xrange(0,m2)]).T, 'b': ones(2).T, 'c': -ones(m2).T, 'result': [ None, # unchecked when unsolvable None, # unchecked when unsolvable None, # unchecked when unsolvable False, array([50, 1]) ] }, # add other test cases here... ] for prob in probs: lpsol = lp_solve(prob['c'],prob['A'],prob['b']) optx = lpsol.x zmin = lpsol.fun bounded = lpsol.is_bounded solvable = lpsol.is_solvable basis = lpsol.basis if prt: print "A:\n",prob['A'] print "b:",prob['b'] print "c:",prob['c'] print " ---->" print "optx:",optx print "zmin:",zmin print "bounded:",bounded print "solvable:",solvable print "basis:",basis print "-------------------------------------------" else: expected_res = prob['result'] assert_equal(solvable, expected_res[3], err_msg=repr(prob)) assert_equal(basis, expected_res[4], err_msg=repr(prob)) if solvable: assert_equal(bounded, expected_res[2], err_msg=repr(prob)) if bounded: assert_almost_equal(optx, expected_res[0], err_msg=repr(prob)) assert_almost_equal(zmin, expected_res[1], err_msg=repr(prob))
def LP_solve_gaps(self, param): exp_means_gapest = {} for (i, j, is_PE_link) in self.observations: mean_obs = self.observations[(i, j, is_PE_link)][0] if is_PE_link: exp_means_gapest[(i, j, is_PE_link)] = self.observations[(i, j, is_PE_link)][0] + GC.GapEstimator( self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length, ) # print 'GAPEST:',mean_obs, self.ctgs[i].length, self.ctgs[j].length, 'gap:' , GC.GapEstimator(self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length) else: if param.lognormal: samples = self.observations[(i, j, is_PE_link)][3] exp_means_gapest[(i, j, is_PE_link)] = self.observations[(i, j, is_PE_link)][0] + lnpe.GapEstimator( param.lognormal_mean, param.lognormal_sigma, self.read_len, samples, self.ctgs[i].length, c2_len=self.ctgs[j].length, ) else: exp_means_gapest[(i, j, is_PE_link)] = self.observations[(i, j, is_PE_link)][0] + GC.GapEstimator( self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length ) # print 'GAPEST:',mean_obs, self.ctgs[i].length, self.ctgs[j].length, 'gap:' , GC.GapEstimator(self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length) if ( all(length in self.ctg_lengths for length in [670, 2093]) or all(length in self.ctg_lengths for length in [900, 3810]) or all(length in self.ctg_lengths for length in [2528, 591]) or all(length in self.ctg_lengths for length in [734, 257, 1548]) ): for (i, j, is_PE_link) in self.observations: mean_obs = self.observations[(i, j, is_PE_link)][0] if is_PE_link: # exp_means_gapest[(i,j,is_PE_link)] = self.observations[(i,j,is_PE_link)][0] + GC.GapEstimator(self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length) print >>param.information_file, "GAPEST:", mean_obs, self.ctgs[i].length, self.ctgs[ j ].length, "gap:", GC.GapEstimator( self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length, ) else: # exp_means_gapest[(i,j,is_PE_link)] = self.observations[(i,j,is_PE_link)][0] + GC.GapEstimator(self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length) print >>param.information_file, "GAPEST:", mean_obs, self.ctgs[i].length, self.ctgs[ j ].length, "gap:", GC.GapEstimator( self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length ) #################### ####### NEW ######## #################### # convert problem to standard form # minimize z = c' x # subject to A x = b, x >= 0 # b does not neccessarily need to be a positive vector # decide how long rows. # we need 2*g gap variables because they can be negative # and r help variables because absolute sign in objective function t = LpForm() g = len(self.ctgs) - 1 r = len(self.observations) n = 2 * g + r # A = [] # c = [] # b = [] # add gap variable constraints g_i = x_i - y_i <= mean + 2stddev, x_i,y_i >= 0 # gap 0 on column 0, gap1 on column 1 etc. for i in range(g): row = [0] * n row[2 * i] = 1 # x_i row[2 * i + 1] = -1 # y_i # A.append(row) # b.append(self.mean + 2*self.stddev) t.add_constraint(row, self.mean + 2 * self.stddev) # add r help variable constraints (for one case in absolute value) for h_index, (i, j, is_PE_link) in enumerate(self.observations): row = [0] * n # g gap variable constants for k in range(n): if i <= k < j: row[2 * k] = -1 row[2 * k + 1] = 1 # r Help variables row[2 * g + h_index] = -1 # sum of "inbetween" contig lengths + observation constant = sum(map(lambda x: x.length, self.ctgs[i + 1 : j])) + self.observations[(i, j, is_PE_link)][0] predicted_distance = exp_means_gapest[(i, j, is_PE_link)] t.add_constraint(row, constant - predicted_distance) # add r help variable constraints (for the other case in absolute value) for h_index, (i, j, is_PE_link) in enumerate(self.observations): row = [0] * n # q gap variable constants for k in range(n): if i <= k < j: row[2 * k] = 1 row[2 * k + 1] = -1 # r Help variables row[2 * g + h_index] = -1 # sum of "inbetween" contig lengths + observation constant = sum(map(lambda x: x.length, self.ctgs[i + 1 : j])) + self.observations[(i, j, is_PE_link)][0] predicted_distance = exp_means_gapest[(i, j, is_PE_link)] t.add_constraint(row, predicted_distance - constant) # add objective row # calculate the total penalties of discrepancies of stddevs given assigned orientations # of all edges obj_delta_stddev = 0 if self.contamination_ratio: obj_row = [0] * n for h_index, (i, j, is_PE_link) in enumerate(self.observations): n = self.observations[(i, j, is_PE_link)][1] obs_stddev = self.observations[(i, j, is_PE_link)][2] if is_PE_link: obj_delta_stddev += abs(self.contamination_stddev - obs_stddev) * n else: obj_delta_stddev += abs(self.stddev - obs_stddev) * n obj_row[2 * g + h_index] = is_PE_link * n + (1 - is_PE_link) * n # obj_row[ 2*g + h_index] = is_PE_link*self.stddev*n + (1-is_PE_link)*self.contamination_stddev * n # obj_row[ 2*g + h_index] = is_PE_link * self.stddev * self.observations[(i,j,is_PE_link)][1] + (1-is_PE_link) * self.contamination_stddev * self.observations[(i,j,is_PE_link)][1] # obj_row[ 2*g + h_index] = is_PE_link * self.stddev * (1 - self.contamination_ratio) * self.observations[(i,j,is_PE_link)][1] + (1-is_PE_link) * self.contamination_stddev * (self.contamination_ratio)*self.observations[(i,j,is_PE_link)][1] t.add_objective(obj_row) else: obj_row = [0] * n for h_index, (i, j, is_PE_link) in enumerate(self.observations): obj_row[2 * g + h_index] = self.observations[(i, j, is_PE_link)][1] t.add_objective(obj_row) A, b, c = t.standard_form() # sol_lsq =np.linalg.lstsq(A,b) # print "LEAST SQUARES SOLUTION:" # print sol_lsq[0] # t.display() # print 'Objective:', c # for row in A: # print 'constraint:', row # print 'constnts:', b lpsol = lp_solve(c, A, b, tol=1e-4) optx = lpsol.x # zmin = lpsol.fun # bounded = lpsol.is_bounded # solvable = lpsol.is_solvable # basis = lpsol.basis # print " ---->" # print "optx:",optx # print "zmin:",zmin # print "bounded:",bounded # print "solvable:",solvable # print "basis:",basis # print "-------------------------------------------" # print "LP SOLUTION:" # print optx # transform solutions to gaps back gap_solution = [] for i in range(g): gap_solution.append(round(optx[2 * i] - optx[2 * i + 1], 0)) self.objective = lpsol.fun # also add the penalties from the observed standard deviations # self.objective += obj_delta_stddev # ctg_lengths = map(lambda x: x.length, self.ctgs) # if 1359 in ctg_lengths and 673 in ctg_lengths: #len(path.gaps) >= 4: # print 'Obj:',self.objective # print "of which stddev contributing:", obj_delta_stddev # print "objective:",self.objective #### Use the added accurace from the narrow contamine distribution here #### to further precisely adjust the gaps in the LP solution if there is #### any pe links if self.contamination_ratio: for g_i in xrange(g): if (g_i, g_i + 1, True) in self.observations: # if it is a PE-link mean_obs = self.observations[(g_i, g_i + 1, True)][0] gap_contamination = exp_means_gapest[(g_i, g_i + 1, True)] - mean_obs old_gap = gap_solution[g_i] gap_solution[g_i] = gap_contamination # print " changing contamination gap from: {0} to {1}".format(old_gap, gap_contamination) return gap_solution
def LP_solve_gaps(self, param): exp_means_gapest = {} for (i, j, is_PE_link) in self.observations: mean_obs = self.observations[(i, j, is_PE_link)][0] if is_PE_link: exp_means_gapest[(i, j, is_PE_link)] = self.observations[ (i, j, is_PE_link)][0] + GC.GapEstimator( self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length) #print 'GAPEST:',mean_obs, self.ctgs[i].length, self.ctgs[j].length, 'gap:' , GC.GapEstimator(self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length) else: exp_means_gapest[(i, j, is_PE_link)] = self.observations[ (i, j, is_PE_link)][0] + GC.GapEstimator( self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length) #print 'GAPEST:',mean_obs, self.ctgs[i].length, self.ctgs[j].length, 'gap:' , GC.GapEstimator(self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length) #################### ####### NEW ######## #################### # convert problem to standard form # minimize z = c' x # subject to A x = b, x >= 0 # b does not neccessarily need to be a positive vector # decide how long rows. # we need 2*g gap variables because they can be negative # and r help variables because absolute sign in objective function t = LpForm() g = len(self.ctgs) - 1 r = len(self.observations) n = 2 * g + r #A = [] #c = [] #b = [] # add gap variable constraints g_i = x_i - y_i <= mean + 2stddev, x_i,y_i >= 0 # gap 0 on column 0, gap1 on column 1 etc. for i in range(g): row = [0] * n row[2 * i] = 1 # x_i row[2 * i + 1] = -1 # y_i #A.append(row) #b.append(self.mean + 2*self.stddev) t.add_constraint(row, self.mean + 2 * self.stddev) # add r help variable constraints (for one case in absolute value) for h_index, (i, j, is_PE_link) in enumerate(self.observations): row = [0] * n # g gap variable constants for k in range(n): if i <= k < j: row[2 * k] = -1 row[2 * k + 1] = 1 # r Help variables row[2 * g + h_index] = -1 # sum of "inbetween" contig lengths + observation constant = sum(map( lambda x: x.length, self.ctgs[i + 1:j])) + self.observations[(i, j, is_PE_link)][0] predicted_distance = exp_means_gapest[(i, j, is_PE_link)] t.add_constraint(row, constant - predicted_distance) # add r help variable constraints (for the other case in absolute value) for h_index, (i, j, is_PE_link) in enumerate(self.observations): row = [0] * n # q gap variable constants for k in range(n): if i <= k < j: row[2 * k] = 1 row[2 * k + 1] = -1 # r Help variables row[2 * g + h_index] = -1 # sum of "inbetween" contig lengths + observation constant = sum(map( lambda x: x.length, self.ctgs[i + 1:j])) + self.observations[(i, j, is_PE_link)][0] predicted_distance = exp_means_gapest[(i, j, is_PE_link)] t.add_constraint(row, predicted_distance - constant) # add objective row if self.contamination_ratio: obj_row = [0] * n for h_index, (i, j, is_PE_link) in enumerate(self.observations): obj_row[2 * g + h_index] = is_PE_link * self.stddev * ( 1 - self.contamination_ratio) * self.observations[ (i, j, is_PE_link)][1] + ( 1 - is_PE_link) * self.contamination_stddev * ( self.contamination_ratio) * self.observations[ (i, j, is_PE_link)][1] t.add_objective(obj_row) #problem += lpSum( [ is_PE_link * (1 - self.contamination_ratio) * help_variables[(i,j,is_PE_link)]*self.observations[(i,j,is_PE_link)][1] + (1-is_PE_link)*(self.contamination_ratio)* help_variables[(i,j,is_PE_link)]*self.observations[(i,j,is_PE_link)][1] for (i,j,is_PE_link) in self.observations] ) , "objective" else: obj_row = [0] * n for h_index, (i, j, is_PE_link) in enumerate(self.observations): obj_row[2 * g + h_index] = self.observations[(i, j, is_PE_link)][1] t.add_objective(obj_row) #problem += lpSum( [ help_variables[(i,j,is_PE_link)]*self.observations[(i,j,is_PE_link)][1] for (i,j,is_PE_link) in self.observations] ) , "objective" A, b, c = t.standard_form() #t.display() # print 'Objective:', c # for row in A: # print 'constraint:', row # print 'constnts:', b lpsol = lp_solve(c, A, b, tol=1e-4) optx = lpsol.x # zmin = lpsol.fun # bounded = lpsol.is_bounded # solvable = lpsol.is_solvable # basis = lpsol.basis # print " ---->" # print "optx:",optx # print "zmin:",zmin # print "bounded:",bounded # print "solvable:",solvable # print "basis:",basis # print "-------------------------------------------" # transform solutions to gaps back gap_solution = [] for i in range(g): gap_solution.append(round(optx[2 * i] - optx[2 * i + 1], 0)) self.objective = lpsol.fun #print self.objective return gap_solution
def LP_solve_gaps(self, param): exp_means_gapest = {} for (i, j, is_PE_link) in self.observations: mean_obs = self.observations[(i, j, is_PE_link)][0] if is_PE_link: exp_means_gapest[(i, j, is_PE_link)] = self.observations[ (i, j, is_PE_link)][0] + GC.GapEstimator( self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length) #print 'GAPEST:',mean_obs, self.ctgs[i].length, self.ctgs[j].length, 'gap:' , GC.GapEstimator(self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length) else: if param.lognormal: samples = self.observations[(i, j, is_PE_link)][3] exp_means_gapest[(i, j, is_PE_link)] = self.observations[ (i, j, is_PE_link)][0] + lnpe.GapEstimator( param.lognormal_mean, param.lognormal_sigma, self.read_len, samples, self.ctgs[i].length, c2_len=self.ctgs[j].length) else: exp_means_gapest[(i, j, is_PE_link)] = self.observations[ (i, j, is_PE_link)][0] + GC.GapEstimator( self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length) #print 'GAPEST:',mean_obs, self.ctgs[i].length, self.ctgs[j].length, 'gap:' , GC.GapEstimator(self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length) if all(length in self.ctg_lengths for length in [670, 2093]) or all( length in self.ctg_lengths for length in [900, 3810]) or all( length in self.ctg_lengths for length in [2528, 591]) or all( length in self.ctg_lengths for length in [734, 257, 1548]): for (i, j, is_PE_link) in self.observations: mean_obs = self.observations[(i, j, is_PE_link)][0] if is_PE_link: #exp_means_gapest[(i,j,is_PE_link)] = self.observations[(i,j,is_PE_link)][0] + GC.GapEstimator(self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length) print('GAPEST:', mean_obs, self.ctgs[i].length, self.ctgs[j].length, 'gap:', GC.GapEstimator(self.contamination_mean, self.contamination_stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length), file=param.information_file) else: #exp_means_gapest[(i,j,is_PE_link)] = self.observations[(i,j,is_PE_link)][0] + GC.GapEstimator(self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length) print('GAPEST:', mean_obs, self.ctgs[i].length, self.ctgs[j].length, 'gap:', GC.GapEstimator(self.mean, self.stddev, self.read_len, mean_obs, self.ctgs[i].length, self.ctgs[j].length), file=param.information_file) #################### ####### NEW ######## #################### # convert problem to standard form # minimize z = c' x # subject to A x = b, x >= 0 # b does not neccessarily need to be a positive vector # decide how long rows. # we need 2*g gap variables because they can be negative # and r help variables because absolute sign in objective function t = LpForm() g = len(self.ctgs) - 1 r = len(self.observations) n = 2 * g + r #A = [] #c = [] #b = [] # add gap variable constraints g_i = x_i - y_i <= mean + 2stddev, x_i,y_i >= 0 # gap 0 on column 0, gap1 on column 1 etc. for i in range(g): row = [0] * n row[2 * i] = 1 # x_i row[2 * i + 1] = -1 # y_i #A.append(row) #b.append(self.mean + 2*self.stddev) t.add_constraint(row, self.mean + 2 * self.stddev) # add r help variable constraints (for one case in absolute value) for h_index, (i, j, is_PE_link) in enumerate(self.observations): row = [0] * n # g gap variable constants for k in range(n): if i <= k < j: row[2 * k] = -1 row[2 * k + 1] = 1 # r Help variables row[2 * g + h_index] = -1 # sum of "inbetween" contig lengths + observation constant = sum([x.length for x in self.ctgs[i + 1:j] ]) + self.observations[(i, j, is_PE_link)][0] predicted_distance = exp_means_gapest[(i, j, is_PE_link)] t.add_constraint(row, constant - predicted_distance) # add r help variable constraints (for the other case in absolute value) for h_index, (i, j, is_PE_link) in enumerate(self.observations): row = [0] * n # q gap variable constants for k in range(n): if i <= k < j: row[2 * k] = 1 row[2 * k + 1] = -1 # r Help variables row[2 * g + h_index] = -1 # sum of "inbetween" contig lengths + observation constant = sum([x.length for x in self.ctgs[i + 1:j] ]) + self.observations[(i, j, is_PE_link)][0] predicted_distance = exp_means_gapest[(i, j, is_PE_link)] t.add_constraint(row, predicted_distance - constant) # add objective row # calculate the total penalties of discrepancies of stddevs given assigned orientations # of all edges obj_delta_stddev = 0 if self.contamination_ratio: obj_row = [0] * n for h_index, (i, j, is_PE_link) in enumerate(self.observations): n = self.observations[(i, j, is_PE_link)][1] obs_stddev = self.observations[(i, j, is_PE_link)][2] if is_PE_link: obj_delta_stddev += abs(self.contamination_stddev - obs_stddev) * n else: obj_delta_stddev += abs(self.stddev - obs_stddev) * n obj_row[2 * g + h_index] = is_PE_link * n + (1 - is_PE_link) * n #obj_row[ 2*g + h_index] = is_PE_link*self.stddev*n + (1-is_PE_link)*self.contamination_stddev * n #obj_row[ 2*g + h_index] = is_PE_link * self.stddev * self.observations[(i,j,is_PE_link)][1] + (1-is_PE_link) * self.contamination_stddev * self.observations[(i,j,is_PE_link)][1] # obj_row[ 2*g + h_index] = is_PE_link * self.stddev * (1 - self.contamination_ratio) * self.observations[(i,j,is_PE_link)][1] + (1-is_PE_link) * self.contamination_stddev * (self.contamination_ratio)*self.observations[(i,j,is_PE_link)][1] t.add_objective(obj_row) else: obj_row = [0] * n for h_index, (i, j, is_PE_link) in enumerate(self.observations): obj_row[2 * g + h_index] = self.observations[(i, j, is_PE_link)][1] t.add_objective(obj_row) A, b, c = t.standard_form() # sol_lsq =np.linalg.lstsq(A,b) # print "LEAST SQUARES SOLUTION:" # print sol_lsq[0] #t.display() # print 'Objective:', c # for row in A: # print 'constraint:', row # print 'constnts:', b lpsol = lp_solve(c, A, b, tol=1e-4) optx = lpsol.x # zmin = lpsol.fun # bounded = lpsol.is_bounded # solvable = lpsol.is_solvable # basis = lpsol.basis # print " ---->" # print "optx:",optx # print "zmin:",zmin # print "bounded:",bounded # print "solvable:",solvable # print "basis:",basis # print "-------------------------------------------" # print "LP SOLUTION:" # print optx # transform solutions to gaps back gap_solution = [] for i in range(g): gap_solution.append(round(optx[2 * i] - optx[2 * i + 1], 0)) self.objective = lpsol.fun # also add the penalties from the observed standard deviations #self.objective += obj_delta_stddev # ctg_lengths = map(lambda x: x.length, self.ctgs) # if 1359 in ctg_lengths and 673 in ctg_lengths: #len(path.gaps) >= 4: # print 'Obj:',self.objective # print "of which stddev contributing:", obj_delta_stddev #print "objective:",self.objective #### Use the added accurace from the narrow contamine distribution here #### to further precisely adjust the gaps in the LP solution if there is #### any pe links if self.contamination_ratio: for g_i in range(g): if (g_i, g_i + 1, True) in self.observations: # if it is a PE-link mean_obs = self.observations[(g_i, g_i + 1, True)][0] gap_contamination = exp_means_gapest[(g_i, g_i + 1, True)] - mean_obs old_gap = gap_solution[g_i] gap_solution[g_i] = gap_contamination #print " changing contamination gap from: {0} to {1}".format(old_gap, gap_contamination) return gap_solution