Python Alignment.refend Examples

Programming Language: Python

Namespace/Package Name: alignment

Class/Type: Alignment

Method/Function: refend

Examples at hotexamples.com: 1

Python Alignment.refend - 1 examples found. These are the top rated real world Python examples of alignment.Alignment.refend extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Alignment(30)

read(6)

same_as(4)

reader_pharaoh(3)

from_alignedRead(3)

datatype(2)

qry_left_overlen(2)

qry_right_overlen(2)

read_filepath(2)

get_messages_aligned(2)

end(2)

name(2)

_parse_cigar(2)

soft_clip(2)

write(2)

start(2)

convert_string_to_alignment_dictionary(2)

single_local_solution_trace_back(1)

refend(1)

printDynamic(1)

writeResults(1)

readFile(1)

unpack(1)

tile_to_telescope_coordinates(1)

ref_left_overlen(1)

ref_right_overlen(1)

taxNames(1)

report_optimal_score(1)

refstart(1)

symbols(1)

save_alignments(1)

seconds_since_last_alignment(1)

sequences(1)

printBruteForce(1)

set_focus_area(1)

set_moon_ephem(1)

set_telescope(1)

single_global_single_align(1)

single_global_solution_trace_back(1)

setProject(1)

load(1)

perform_calc(1)

export_data_by_pk(1)

_parse_md(1)

aggregate_by_strata(1)

align(1)

bruteForceSolving(1)

center_offset_to_telescope_coordinates(1)

checkLengthsAndTypes(1)

compute_telescope_coordinates_of_focus_area(1)

Example #1

Show file

def pre_process(optmap_i, optmap_file, myfile, myfile2, output_dir,
                min_confidence):
    header_lines = 10
    header = []
    minrefoverhang = 50000
    minqryoverhang = 50000

    all_alms = {
    }  # stores all the Alignments for all groups, all_groups[ref] should contain molecule ref
    qualify_alms = {
    }  # only keep one alignment(the one with highest confidence) for each contig in one molecule
    removed = {
    }  # removed[ref,qry] == True means alignment for (ref, qry) is already removed

    # collecting alignments and store in all_groups
    print '---------------read .xmap file-------------------'
    with open(myfile + '_flip.xmap', 'rb') as csvfile:
        csvreader = csv.reader(csvfile, delimiter='\t')
        for i in range(header_lines):  # 10 lines of header
            header.append(csvreader.next())  # save them
        # read the first non-header line
        while True:
            try:
                row = csvreader.next()
                x = Alignment(int(row[1]), int(row[2]), float(row[3]),
                              float(row[4]), float(row[5]), float(row[6]),
                              row[7], float(row[8]), row[9], float(row[10]),
                              float(row[11]), int(row[12]), row[13])
                if x.ref not in all_alms:
                    all_alms[x.ref] = [x]
                else:
                    all_alms[x.ref].append(x)
            except StopIteration:
                break
    num_all_alms = 0
    for ref in all_alms:
        num_all_alms += len(all_alms[ref])
    print "In total, the number of alignments collected is ", num_all_alms

    # only keep one alignment(the one with highest confidence) for each contig in one molecule
    for ref in all_alms:
        group = all_alms[ref]
        qry_bestx = {}
        for x in group:
            if x.qry not in qry_bestx:
                qry_bestx[x.qry] = x
            else:
                if x.confidence > qry_bestx[x.qry].confidence:
                    qry_bestx[x.qry] = x

        qualify_alms[ref] = {}
        for qry in qry_bestx:
            qualify_alms[ref][qry] = qry_bestx[qry]

    num_qualify_alms = 0
    for ref in qualify_alms:
        num_qualify_alms += len(qualify_alms[ref])
    # initialize removed array
    for ref in qualify_alms:
        for qry in qualify_alms[ref]:
            removed[ref, qry] = False
    current_alms = copy_alms(qualify_alms, removed)
    output_alms(current_alms,
                output_dir + "/opt_" + str(optmap_i) + "_alms_0_initial.log")
    print "In total, the number of alignments in qualify_alms is ", num_qualify_alms

    # remove low confidence alignments
    print '---------------Remove low quality alignments---------------'
    for ref in qualify_alms:
        for qry in qualify_alms[ref]:
            x = qualify_alms[ref][qry]
            if x.confidence < min_confidence:
                removed[ref, qry] = True
                print 'alignment (', ref, ',', qry, ') is low quality and removed'
    num_alms = 0
    for ref in qualify_alms:
        for qry in qualify_alms[ref]:
            if removed[ref, qry] == False:
                num_alms += 1
    current_alms = copy_alms(qualify_alms, removed)
    output_alms(
        current_alms,
        output_dir + "/opt_" + str(optmap_i) + "_alms_1_removed_low_conf.log")
    print "After removing low confidence alignments, the number of alignments is ", num_alms
    print '---------------End---------------'

    # read optical map
    optmap = {}
    with open(optmap_file) as f_map:
        for line in f_map:
            line = line.strip()
            if line[0] == '#':
                continue
            cols = line.split('\t')
            CMapId = int(cols[0])
            LabelChannel = cols[4]
            Position = float(cols[5])

            if CMapId not in optmap:
                optmap[CMapId] = []
            if LabelChannel == "1":
                optmap[CMapId].append(Position)
    for CMapId in optmap:
        optmap[CMapId].sort()

    print '---------------scaling-------------------'
    # calculating scaling
    qry_len = {}
    with open(myfile2 + '_key.txt') as f_key:
        for i in range(0, 4):  # 4 header lines
            f_key.readline()
        for line in f_key:
            line = line.strip()
            cols = line.split('\t')
            qry_id = int(cols[0])
            seq_len = int(cols[2])
            qry_len[qry_id] = seq_len
    scaling = 0
    num = 0
    with open(myfile + '_r.cmap') as f_q:
        for i in range(0, 11):  # 11 header lines
            f_q.readline()
        for line in f_q:
            line = line.strip()
            cols = line.split('\t')
            qry_id = int(cols[0])
            appr_len = float(cols[1])
            seq_len = qry_len[qry_id]
            scaling += appr_len / seq_len
            num += 1
    scaling /= num  # scaling=1.02258059775
    scaling = 1.0
    # use scaling to adjsut coordinates of alignments
    for ref in qualify_alms:
        for qry in qualify_alms[ref]:
            x = qualify_alms[ref][qry]
            x.qrystartpos /= scaling
            x.qryendpos /= scaling
            x.qrylen /= scaling
            x.refstartpos /= scaling
            x.refendpos /= scaling
            x.reflen /= scaling

    # use scaling to adjsut coordinates of optial map
    for ref in optmap:
        for i in range(0, len(optmap[ref])):
            optmap[ref][i] /= scaling

    print '---------------END-------------------'

    # find the reference-based coordinates for each contig
    for ref in qualify_alms:
        for qry in qualify_alms[ref]:
            x = qualify_alms[ref][qry]
            if (x.orientation == '+'):
                x.qry_left_overlen = x.qrystartpos
                x.qry_right_overlen = x.qrylen - x.qryendpos
            else:
                x.qry_left_overlen = x.qrylen - x.qrystartpos
                x.qry_right_overlen = x.qryendpos
            x.start = x.refstartpos - x.qry_left_overlen
            x.end = x.refendpos + x.qry_right_overlen
            x.ref_left_overlen = x.refstartpos
            x.ref_right_overlen = x.reflen - x.refendpos
            if (x.orientation == '+'):
                x.refstart = x.qrystartpos - x.ref_left_overlen
                x.refend = x.qryendpos + x.ref_right_overlen
            else:
                x.refstart = x.qryendpos - x.ref_right_overlen
                x.refend = x.qrystartpos + x.ref_left_overlen

    num_alms = 0
    for ref in qualify_alms:
        for qry in qualify_alms[ref]:
            if removed[ref, qry] == False:
                num_alms += 1
    current_alms = copy_alms(qualify_alms, removed)
    output_alms(current_alms,
                output_dir + "/opt_" + str(optmap_i) + "_alms_2_scaled.log")
    print "After scaling, the number of alignments is ", num_alms

    # read qry map
    qry_markers = {}
    with open(myfile + '_r.cmap') as f_q:
        for i in range(11):  # 10 lines of header
            header_line = f_q.readline()
        for line in f_q:
            line = line.strip()
            cols = line.split('\t')
            CMapId = int(cols[0])
            ContigLength = float(cols[1])
            NumSites = int(cols[2])
            SiteID = int(cols[3])
            LabelChannel = cols[4]
            Position = float(cols[5])
            if LabelChannel == "0":
                continue
            if CMapId not in qry_markers:
                qry_markers[CMapId] = []
            Position /= scaling
            qry_markers[CMapId].append(Position)
    for CMapId in qry_markers:
        qry_markers[CMapId].sort()
    f_q.close()

    print '---------------candidate cutting sites-------------------'
    fpair = file(output_dir + "/chimeric_pairs_" + str(optmap_i) + ".log", 'w')
    fpair.write("ref_id\tref_pos\tqry_id\tqry_pos\n")
    chimeric_pairs = []

    for ref in qualify_alms:
        for qry in qualify_alms[ref]:
            if removed[ref, qry] == True:
                continue
            x = qualify_alms[ref][qry]

            if (x.confidence > min_confidence):
                ref_left_overlen = x.refstartpos
                ref_right_overlen = x.reflen - x.refendpos
                flag_left = False
                flag_right = False
                if (x.qry_left_overlen > minqryoverhang
                        and ref_left_overlen > minrefoverhang
                        and markers_in_qry_left_overhang(qry_markers, x) > 0):
                    flag_left = True
                    chimeric_pairs.append(
                        (x.ref, x.refstartpos, x.qry, x.qrystartpos))
                    print(
                        x.ref, x.refstartpos, x.qry,
                        x.qrystartpos), "is a pair of candidate cutting sites"
                    fpair.write(
                        str(x.ref) + "\t" + str(x.refstartpos) + "\t" +
                        str(x.qry) + "\t" + str(x.qrystartpos) + "\n")
                if (x.qry_right_overlen > minqryoverhang
                        and ref_right_overlen > minrefoverhang
                        and markers_in_qry_right_overhang(qry_markers, x) > 0):
                    flag_right = True
                    chimeric_pairs.append(
                        (x.ref, x.refendpos, x.qry, x.qryendpos))
                    print(x.ref, x.refendpos, x.qry,
                          x.qryendpos), "is a pair of candidate cutting sites"
                    fpair.write(
                        str(x.ref) + "\t" + str(x.refendpos) + "\t" +
                        str(x.qry) + "\t" + str(x.qryendpos) + "\n")
                if flag_left == True and flag_right == True:
                    removed[ref, qry] = True
    fpair.close()
    print '---------------END-------------------'
    num_alms = 0
    for ref in qualify_alms:
        for qry in qualify_alms[ref]:
            if removed[ref, qry] == False:
                num_alms += 1
    current_alms = copy_alms(qualify_alms, removed)
    output_alms(
        current_alms, output_dir + "/opt_" + str(optmap_i) +
        "_alms_3_removed_both_overhang.log")
    print "After removing alignments with both overhangs, the number of alignments is ", num_alms

    # check overlap between alignments
    for r in qualify_alms:
        for q1 in qualify_alms[r]:
            if removed[r, q1] == True:
                continue
            x = qualify_alms[r][q1]
            for q2 in qualify_alms[r]:
                if removed[r, q2] == True:
                    continue
                y = qualify_alms[r][q2]
                if q1 >= q2:
                    continue
                if x.refstartpos <= y.refstartpos and y.refstartpos <= x.refendpos:
                    overlap = min(x.refendpos, y.refendpos) - y.refstartpos
                elif y.refstartpos <= x.refstartpos and x.refstartpos <= y.refendpos:
                    overlap = min(x.refendpos, y.refendpos) - x.refstartpos
                else:
                    overlap = 0
                if overlap >= 20000:
                    if x.confidence < y.confidence:
                        removed[r, q1] = True
                    else:
                        removed[r, q2] = True
    num_alms = 0
    for ref in qualify_alms:
        for qry in qualify_alms[ref]:
            if removed[ref, qry] == False:
                num_alms += 1
    current_alms = copy_alms(qualify_alms, removed)
    output_alms(
        current_alms,
        output_dir + "/opt_" + str(optmap_i) + "_alms_4_solved_overlaps.log")
    print "After removing one of two overlap alignments, the number of alignments is ", num_alms

    return current_alms, optmap, chimeric_pairs