Example #1
0
def print_header(sequence_id):
    header = sequence_id.split()
    gene_id = '>' + header[0]
    rangefield = re.split(HEADER_SPLIT, header[1])
    chrom_id = rangefield[1]
    range_start = rangefield[2]
    range_end = rangefield[3]
Example #2
0
# Generate sequences
SeqList = generate_sequences(Bases, Spacerlen, Spacerpos, Begpos, Endposition)

#resort list
SeqList = sortlist(SEQUENCE, SeqList)

# get rid of original sequence
del SeqList[:1]

# Number of sequences generated
sys.stderr.write("Number of sequences made = %d \n" % (len(SeqList)-1))

#get header info
header = seq_id.split()
gene_id = '>' + header[0]
rangefield = re.split(HEADER_SPLIT, header[1])
chrom_id = rangefield[1]
range_start = rangefield[2]
range_end = rangefield[3]

finalout = []

#get index of changes
for key, seq in enumerate(SeqList):
    changes = [i for i, (s1, s2) in enumerate(zip(SEQUENCE,seq)) if s1 != s2]
    finalout.append({"key":key+1, "mutations start": changes[0]+1 , "mutations":len(changes[0:]), "mutations end":changes[-1]+1, "sequence": seq})

#sort by index of change
finalout = sorted(finalout, key=itemgetter("mutations start", "mutations end", "mutations"))

finalout = {"gene id": gene_id, "chrom": chrom_id, "range start": range_start, "sequence length": len(SEQUENCE),\