def print_header(sequence_id): header = sequence_id.split() gene_id = '>' + header[0] rangefield = re.split(HEADER_SPLIT, header[1]) chrom_id = rangefield[1] range_start = rangefield[2] range_end = rangefield[3]
# Generate sequences SeqList = generate_sequences(Bases, Spacerlen, Spacerpos, Begpos, Endposition) #resort list SeqList = sortlist(SEQUENCE, SeqList) # get rid of original sequence del SeqList[:1] # Number of sequences generated sys.stderr.write("Number of sequences made = %d \n" % (len(SeqList)-1)) #get header info header = seq_id.split() gene_id = '>' + header[0] rangefield = re.split(HEADER_SPLIT, header[1]) chrom_id = rangefield[1] range_start = rangefield[2] range_end = rangefield[3] finalout = [] #get index of changes for key, seq in enumerate(SeqList): changes = [i for i, (s1, s2) in enumerate(zip(SEQUENCE,seq)) if s1 != s2] finalout.append({"key":key+1, "mutations start": changes[0]+1 , "mutations":len(changes[0:]), "mutations end":changes[-1]+1, "sequence": seq}) #sort by index of change finalout = sorted(finalout, key=itemgetter("mutations start", "mutations end", "mutations")) finalout = {"gene id": gene_id, "chrom": chrom_id, "range start": range_start, "sequence length": len(SEQUENCE),\