#!/usr/bin/python from pyfasta import Fasta import numpy as np import sys fa = Fasta(sys.argv[1]) # softmasked sequence for seqid, aseq in fa.iteritems(): aseq = np.array(str(aseq).upper(), dtype="c") print ">%s\n%s" % (seqid, aseq.tostring())
a[start] = a[start - 1] else: a[start:end] = (a[start - 1] + a[end]) / 2 if __name__ == "__main__": import doctest doctest.testmod() if __name__ == "__main__": plookups = parse_posns(posns) fasta = Fasta("/opt/src/flatfeature/data/thaliana_v9.fasta") arrs = {} counts = {} for seqid, seq in fasta.iteritems(): arrs[seqid] = np.zeros((len(seq),), dtype=np.float32) counts[seqid] = np.zeros((len(seq),), dtype=np.float32) print arrs.keys() success = 0 for id, val in parse_hists(hists): start, length = plookups[id] assert val != 0 #print id, val, start, length seqid = id[3] if not seqid in "12345": continue a = arrs[seqid][start - 1: start + length] counts[seqid][start - 1: start + length] += 1 c = counts[seqid][start -1: start + length]