Esempio n. 1
0
def PreFilterEdges2(G, G_prime, Scaffolds, small_scaffolds, param):
    #### Filter out edges that only has links far in on a contig (not near contig ends)#### 
    pre_filtered = 0
    for edge in G.edges():
        if edge[0][0] != edge[1][0]:
            node1 = edge[0][0]
            node2 = edge[1][0]
            side1 = edge[0][1]
            side2 = edge[1][1]
            n = G[edge[0]][edge[1]]['nr_links']
            #calculate ML distance here
            d = 1500
            # Get the lower bound on each side here
            if side1 == 'R':
                try:
                    z_hat = Scaffolds[node1 ].lower_right_nbrs_obs[(node2, side2)]
                except KeyError:
                    z_hat = small_scaffolds[node1 ].lower_right_nbrs_obs[(node2, side2)]

                print z_hat, ((1 - normcdf(d + z_hat, param.mean_ins_size, param.std_dev_ins_size)) / (1 - normcdf(d + param.read_len, param.mean_ins_size, param.std_dev_ins_size))) ** n
            else:
                try:
                    z_hat = Scaffolds[node1 ].lower_left_nbrs_obs[(node2, side2)]
                except KeyError:
                    z_hat = small_scaffolds[node1 ].lower_left_nbrs_obs[(node2, side2)]

                print z_hat, ((1 - normcdf(d + z_hat, param.mean_ins_size, param.std_dev_ins_size)) / (1 - normcdf(d + param.read_len, param.mean_ins_size, param.std_dev_ins_size))) ** n


            if side2 == 'R':
                try:
                    z_hat2 = Scaffolds[node2 ].lower_right_nbrs_obs[(node1, side1)]
                except KeyError:
                    z_hat2 = small_scaffolds[node2 ].lower_right_nbrs_obs[(node1, side1)]

                print z_hat2, ((1 - normcdf(d + z_hat2, param.mean_ins_size, param.std_dev_ins_size)) / (1 - normcdf(d + param.read_len, param.mean_ins_size, param.std_dev_ins_size))) ** n

            else:
                try:
                    z_hat2 = Scaffolds[node2 ].lower_left_nbrs_obs[(node1, side1)]
                except KeyError:
                    z_hat2 = small_scaffolds[node2 ].lower_left_nbrs_obs[(node1, side1)]

                print z_hat2, ((1 - normcdf(d + z_hat2, param.mean_ins_size, param.std_dev_ins_size)) / (1 - normcdf(d + param.read_len, param.mean_ins_size, param.std_dev_ins_size))) ** n

    print 'Nr of edges that did not pass the pre filtering step: ', pre_filtered

    return()
Esempio n. 2
0
def PreFilterEdges2(G, G_prime, Scaffolds, small_scaffolds, param):
    #### Filter out edges that only has links far in on a contig (not near contig ends)####
    pre_filtered = 0
    for edge in G.edges():
        if edge[0][0] != edge[1][0]:
            node1 = edge[0][0]
            node2 = edge[1][0]
            side1 = edge[0][1]
            side2 = edge[1][1]
            n = G[edge[0]][edge[1]]['nr_links']
            #calculate ML distance here
            d = 1500
            # Get the lower bound on each side here
            if side1 == 'R':
                try:
                    z_hat = Scaffolds[node1].lower_right_nbrs_obs[(node2,
                                                                   side2)]
                except KeyError:
                    z_hat = small_scaffolds[node1].lower_right_nbrs_obs[(
                        node2, side2)]

                print z_hat, (
                    (1 - normcdf(d + z_hat, param.mean_ins_size,
                                 param.std_dev_ins_size)) /
                    (1 - normcdf(d + param.read_len, param.mean_ins_size,
                                 param.std_dev_ins_size)))**n
            else:
                try:
                    z_hat = Scaffolds[node1].lower_left_nbrs_obs[(node2,
                                                                  side2)]
                except KeyError:
                    z_hat = small_scaffolds[node1].lower_left_nbrs_obs[(node2,
                                                                        side2)]

                print z_hat, (
                    (1 - normcdf(d + z_hat, param.mean_ins_size,
                                 param.std_dev_ins_size)) /
                    (1 - normcdf(d + param.read_len, param.mean_ins_size,
                                 param.std_dev_ins_size)))**n

            if side2 == 'R':
                try:
                    z_hat2 = Scaffolds[node2].lower_right_nbrs_obs[(node1,
                                                                    side1)]
                except KeyError:
                    z_hat2 = small_scaffolds[node2].lower_right_nbrs_obs[(
                        node1, side1)]

                print z_hat2, (
                    (1 - normcdf(d + z_hat2, param.mean_ins_size,
                                 param.std_dev_ins_size)) /
                    (1 - normcdf(d + param.read_len, param.mean_ins_size,
                                 param.std_dev_ins_size)))**n

            else:
                try:
                    z_hat2 = Scaffolds[node2].lower_left_nbrs_obs[(node1,
                                                                   side1)]
                except KeyError:
                    z_hat2 = small_scaffolds[node2].lower_left_nbrs_obs[(
                        node1, side1)]

                print z_hat2, (
                    (1 - normcdf(d + z_hat2, param.mean_ins_size,
                                 param.std_dev_ins_size)) /
                    (1 - normcdf(d + param.read_len, param.mean_ins_size,
                                 param.std_dev_ins_size)))**n

    print 'Nr of edges that did not pass the pre filtering step: ', pre_filtered

    return ()
Esempio n. 3
0
 def Part(a, b):
     expr1 = (min(len1, len2) - param.read_len) / k * normcdf(a, 0, 1)
     expr2 = (b * std_dev) / k * (normcdf(b, 0, 1) - normcdf(a, 0, 1))
     expr3 = (std_dev / k) * (normpdf(b, 0, 1) - normpdf(a, 0, 1))
     value = expr1 + expr2 + expr3
     return value
Esempio n. 4
0
 def Part(a, b):
     expr1 = (min(len1, len2) - param.read_len) / k * normcdf(a, 0, 1)
     expr2 = (b * std_dev) / k * (normcdf(b, 0, 1) - normcdf(a, 0, 1))
     expr3 = (std_dev / k) * (normpdf(b, 0, 1) - normpdf(a, 0, 1))
     value = expr1 + expr2 + expr3
     return value