def main( args ): gfhash = gff3.get_gff_hash(args['gffile']) sys.stderr.write("gff loaded ") gid, startpos, endpos = get_coordinates(gfhash, args['genes']) sys.stderr.write("| coordinates identified ") if not args['rv']: print ">%s_%s:%s" %(gid, startpos, endpos) else: print ">%s_%s:%s" %(gid, endpos, startpos) seqhash = fasta.get_sequence_hash(args['fastafile']) sys.stderr.write("| fasta loaded ") seq = seqhash[gid][startpos-1:endpos] if args['rv']: seq = Seq(seq).reverse_complement().tostring() sys.stderr.write("| subsequence extracted ") print seq sys.stderr.write("\n")
def align(sequences, ids, outfile=False): h, infile = tempfile.mkstemp() os.close(h) fw = open(infile, 'w') for i in range(len(sequences)): fw.write(">" + ids[i] + "\n" + sequences[i] + "\n") fw.close() h, outfile = tempfile.mkstemp() os.close(h) os.system("muscle -in %s -out %s -quiet 2> /dev/null" %(infile, outfile)) os.unlink(infile) aligned_sequences = [] alnhash = fasta.get_sequence_hash(outfile) for gid in ids: aligned_sequences.append(alnhash[gid]) os.unlink(outfile) return aligned_sequences
def main(args): gfhash = gff3.get_gff_hash(args['gffile']) sys.stderr.write("gff loaded ") gid, startpos, endpos = get_coordinates(gfhash, args['genes']) sys.stderr.write("| coordinates identified ") if not args['rv']: print ">%s_%s:%s" % (gid, startpos, endpos) else: print ">%s_%s:%s" % (gid, endpos, startpos) seqhash = fasta.get_sequence_hash(args['fastafile']) sys.stderr.write("| fasta loaded ") seq = seqhash[gid][startpos - 1:endpos] if args['rv']: seq = Seq(seq).reverse_complement().tostring() sys.stderr.write("| subsequence extracted ") print seq sys.stderr.write("\n")
def align(sequences, ids, outfile=False): h, infile = tempfile.mkstemp() os.close(h) fw = open(infile, 'w') for i in range(len(sequences)): fw.write(">" + ids[i] + "\n" + sequences[i] + "\n") fw.close() h, outfile = tempfile.mkstemp() os.close(h) os.system("muscle -in %s -out %s -quiet 2> /dev/null" % (infile, outfile)) os.unlink(infile) aligned_sequences = [] alnhash = fasta.get_sequence_hash(outfile) for gid in ids: aligned_sequences.append(alnhash[gid]) os.unlink(outfile) return aligned_sequences
def get_seq_lengths(fastafile): lenhash = {} seqhash = fasta.get_sequence_hash(fastafile) for gid, seq in seqhash.iteritems(): lenhash[gid] = len(seq) return lenhash
def prefetch_sequences(pepfile): return fasta.get_sequence_hash(pepfile)
def main(args): for gid, seq in fasta.get_sequence_hash(args['fastafile']).iteritems(): print string.join([gid, seq], "\t")