def write_genepred_to_fasta_directionless(gpd_filename,ref_fasta,out_fasta): ofile = open(out_fasta,'w') ref = sequence_basics.read_fasta_into_hash(ref_fasta) with open(gpd_filename) as f: for line in f: if re.match('^#',line): continue d = genepred_line_to_dictionary(line) if d['chrom'] in ref: seq = '' for i in range(0,d['exonCount']): seq = seq+ref[d['chrom']][d['exonStarts'][i]:d['exonEnds'][i]] ofile.write(">"+str(d['name'])+"\n"+seq+"\n") ofile.close()
def get_splice_bases(junctions, genome_filename): genome = sequence_basics.read_fasta_into_hash(genome_filename) bases = {} for id in junctions: chr1 = junctions[id]["chr1"] coo1 = int(junctions[id]["coo1"]) dir1 = junctions[id]["dir1"] chr2 = junctions[id]["chr2"] coo2 = int(junctions[id]["coo2"]) dir2 = junctions[id]["dir2"] bases1 = "??" if dir1 == "+" and len(genome[chr1]) > coo1 + 1: bases1 = genome[chr1][coo1] + genome[chr1][coo1 + 1] elif dir1 == "-" and len(genome[chr1]) > coo1 - 2: bases1 = sequence_basics.rc(genome[chr1][coo1 - 3] + genome[chr1][coo1 - 2]) bases2 = "??" if dir2 == "+" and len(genome[chr2]) > coo2 - 2: bases2 = genome[chr2][coo2 - 3] + genome[chr2][coo2 - 2] elif dir2 == "-" and len(genome[chr2]) > coo2 + 1: bases2 = sequence_basics.rc(genome[chr2][coo2] + genome[chr2][coo2 + 1]) bases[id] = bases1.upper() + " " + bases2.upper() return bases
def get_splice_bases(junctions,genome_filename): genome = sequence_basics.read_fasta_into_hash(genome_filename) bases = {} for id in junctions: chr1 = junctions[id]['chr1'] coo1 = int(junctions[id]['coo1']) dir1 = junctions[id]['dir1'] chr2 = junctions[id]['chr2'] coo2 = int(junctions[id]['coo2']) dir2 = junctions[id]['dir2'] bases1 = '??' if dir1 == '+' and len(genome[chr1]) > coo1+1: bases1 = genome[chr1][coo1] + genome[chr1][coo1+1] elif dir1 == '-' and len(genome[chr1]) > coo1-2: bases1 = sequence_basics.rc(genome[chr1][coo1-3] + genome[chr1][coo1-2]) bases2 = '??' if dir2 == '+' and len(genome[chr2]) > coo2-2: bases2 = genome[chr2][coo2-3] + genome[chr2][coo2-2] elif dir2 == '-' and len(genome[chr2]) > coo2+1: bases2 = sequence_basics.rc(genome[chr2][coo2] + genome[chr2][coo2+1]) bases[id] = bases1.upper() + " " + bases2.upper() return bases