예제 #1
0
파일: amptklib.py 프로젝트: irawand07/amptk
def bam2fastq(input, output):
    import pybam
    with open(output, 'w') as fastqout:
        with open(input, 'rb') as bamin:
            for title, seq, qual in pybam.read(
                    bamin, ['sam_qname', 'sam_seq', 'sam_qual']):
                fastqout.write("@%s\n%s\n+\n%s\n" % (title, seq, qual))
예제 #2
0
def total_phy_coverage():
    print("Total Physical Coverage started...\n")

    f = open('../wig-tracks/tot_phy_coverage.wig', 'w')

    # initialize genome_change variable as a list constituted by 0 with length = genomelength
    genome_length = 3079196
    genome_change = [0] * genome_length

    for alignment in pybam.read("../data/lact_sorted.bam"):
        # conversion of flag from integer to binary
        flag = bin(int(alignment.sam_flag))

        # get start position and tlen value
        start_pos = int(alignment.sam_pos1)  # 4th column
        tlen = int(alignment.sam_tlen)  # 9th column

        if tlen <= 3000 and flag.endswith('1'):
            if tlen > 0:
                genome_change[start_pos] += 1
                genome_change[start_pos + tlen] -= 1
            else:
                genome_change[start_pos + tlen + 1] += 1
                genome_change[start_pos + 1] -= 1

    print("Generating .wig file\n")
    # print genomic profile as a wiggle file
    f.write("fixedStep chrom=genome start=1 step=1 span=1 \n")

    current_coverage = 0

    # cycle over all positions of the genome
    for position in range(genome_length):
        current_coverage += genome_change[position]
        f.write(str(current_coverage) + '\n')

    f.close()
    print("done!")
예제 #3
0
def Read_alignment(titleBam, dicoInit, lstError):
    try:
        pathSCjson = os.path.join(dicoInit['pathTmpDir'],
                                  titleBam + "_SC.json")
        pathSCfasta = os.path.join(dicoInit['pathTmpDir'], titleBam + ".fasta")
        FASTA = open(pathSCfasta, 'w')
        # Init dicoBam
        dicoBam = {}
        for pos in range(1, dicoInit["dicoGbk"]['refLength'] + 1, 1):
            dicoBam[pos] = { 'nb_reads_F':0, 'nb_reads_R':0,\
                             'nb_sc_reads_F':0, 'nb_sc_reads_R':0,\
                             'nb_sc_fasta_F':0, 'nb_sc_fasta_R':0 }
        #***** BROWSE READS & SEARCH SCR *****#
        # Switch to downsampled BAM if exist
        if dicoInit['dicoBam'][titleBam]['path_downsampling'] != "":
            dicoInit['dicoBam'][titleBam]['path'] = dicoInit['dicoBam'][
                titleBam]['path_downsampling']
        for alignment in pybam.read(dicoInit['dicoBam'][titleBam]['path']):
            if alignment.file_chromosomes[
                    alignment.sam_refID] == dicoInit["dicoBam"][titleBam][
                        'refName'] and alignment.sam_mapq >= dicoInit[
                            'minQ'] and not alignment.sam_cigar_string.__contains__(
                                "H"
                            ):  # and not explain_sam_flags(alignment.sam_flag).__contains__("second in pair") and not explain_sam_flags(alignment.sam_flag).__contains__("supplementary"):
                #***** RETRIEVE positions tuple & lastMapped infos *****#
                positionsLstTuple, lastMappedPos, lastMappedPosRead = cigar_list_to_tuple(
                    alignment.sam_cigar_list, alignment.sam_pos0)
                #***** FORWARD reads *****#
                if explain_sam_flags(
                        alignment.sam_flag) == "" or explain_sam_flags(
                            alignment.sam_flag).__contains__(
                                "mate reverse strand"):
                    # Count reads
                    for posTuple in positionsLstTuple:
                        try:
                            dicoBam[posTuple[1] + 1]['nb_reads_F'] += 1
                        except:
                            pass  # None case
                    # Count softclipped (right soft-clipping)
                    length, operation = alignment.sam_cigar_list[
                        len(alignment.sam_cigar_list) - 1]
                    if operation == "S":
                        dicoBam[lastMappedPos]['nb_sc_reads_F'] += 1
                        # Write to Fasta (apply filter) / not consider 'N'
                        if length - alignment.sam_seq[len(
                                alignment.sam_seq
                        ) - length:].count("N") >= dicoInit[
                                'SCsize'] and lastMappedPos + length <= dicoInit[
                                    "dicoGbk"]['refLength']:
                            nb_mapped_part = min(dicoInit["MappedPart"],
                                                 lastMappedPosRead)
                            FASTA.write(
                                ">" + str(lastMappedPos) + "_" +
                                str(nb_mapped_part) + "_scrF_" +
                                alignment.sam_qname + "\n" +
                                alignment.sam_seq[len(alignment.sam_seq) -
                                                  length - nb_mapped_part:] +
                                "\n")
                            dicoBam[lastMappedPos]['nb_sc_fasta_F'] += 1
                #***** REVERSE reads *****#
                else:
                    # Count reads
                    for posTuple in positionsLstTuple:
                        try:
                            dicoBam[posTuple[1] + 1]['nb_reads_R'] += 1
                        except:
                            pass
                    # Count softclipped (left soft-clipping)
                    length, operation = alignment.sam_cigar_list[0]
                    if operation == "S":
                        dicoBam[alignment.sam_pos0 + 1]['nb_sc_reads_R'] += 1
                        # Write to Fasta (apply filter)
                        if length - alignment.sam_seq[0:length].count(
                                "N"
                        ) >= dicoInit[
                                'SCsize'] and alignment.sam_pos0 + 1 - length >= 0:
                            nb_mapped_part = min(dicoInit["MappedPart"],
                                                 length)
                            FASTA.write(">" + str(alignment.sam_pos0 + 1) +
                                        "_" + str(nb_mapped_part) + "_scrR_" +
                                        alignment.sam_qname + "\n" +
                                        alignment.sam_seq[0:length +
                                                          nb_mapped_part] +
                                        "\n")
                            dicoBam[alignment.sam_pos0 +
                                    1]['nb_sc_fasta_R'] += 1
        # CLOSE files
        FASTA.close()
        # WRITE .json results
        JSON = open(pathSCjson, 'wb')
        JSON.write(json.dumps(dicoBam))
        JSON.close()
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        lstError.append("ReadThread \"" + titleBam + "\": " + str(exc_value) +
                        " (line " + str(exc_traceback.tb_lineno) + ")")
예제 #4
0
    print("debugging...\n")
    cont = 0
    for row in bam_f:
        length = row.sam_tlen
        flag = bin(int(row.sam_flag))
        print(flag, length, cont)


#################### MAIN ######################

if __name__ == '__main__':
    ########################## PART 2 ###########################

    # load sorted Lactobacillus bam file (using pybam library):
    # 	refer to a BAM file sorted by genomic position!
    sorted_bam = pybam.read('../data/lact_sorted.bam')

    # 9) Calculate PHYSICAL COVERAGE, creating related wig file
    # phy_coverage(sorted_bam)

    # 10) Calculate SEQUENCE COVERAGE, creating related wig file
    # sequence_coverage(sorted_bam)

    # 11) Calculate INSERT STATS
    # get_genome_stats(sorted_bam)

    # 12) Calculate AVERAGE INSERTS LENGTH, creating related wig file
    # avg_inserts_coverage(sorted_bam)

    # Saved values from get_genome_stats() function above
    avg = 2101.0225496051385
예제 #5
0
import pybam

bam_data = pybam.read('./pb_467_2_sr_blasr.bam')
bam_rowData = []
for alignment in bam_data:
	bam_rowData.append(alignment.sam_seq)
        #print alignment.sam_seq
		#print alignment.sam_mapq

print bam_rowData[0]