Beispiel #1
0
fasta_for_bowtie_index_name = out_dir + stem + "_SPORK_preprocess_Junctions.fa"
gtfs = generate_gtfs(gtf)
if use_prior and os.path.isfile(fasta_for_bowtie_index_name):
    write_time("Using prior jcts: " + fasta_for_bowtie_index_name, time.time(),
               timer_file_path)
else:
    # Store all five prime mappings by base_read_id (read_id w/out 5_prime or 3_prime)
    # There should not be two identical base_read_id's
    id_to_sam_dict = {}
    with open(five_prime_mapped_name, "r") as five_prime_mapped:
        sam_line = five_prime_mapped.readline()
        while sam_line and "@" == sam_line[0]:  #Read past the header lines
            sam_line = five_prime_mapped.readline()

        while sam_line:
            sam_entry = SAMEntry(sam_line)
            base_read_id = sam_entry.read_id.split("/")[0]
            if base_read_id in id_to_sam_dict:
                sys.stderr.write(
                    "ERROR: Found duplicate base_read_id in 5_prime mappings\n"
                )
                sys.exit(1)
            # Filter out the strange chromosomes: (e.g. chrUn_gl000220)
            if "_" not in sam_entry.chromosome:
                id_to_sam_dict[base_read_id] = sam_entry
            sam_line = five_prime_mapped.readline()

    # Now walk through the three prime mappings creating bin pairs from all shared ids
    bin_pairs = []
    with open(three_prime_mapped_name, "r") as three_prime_mapped:
        sam_line = three_prime_mapped.readline()