encodings = [] header = ['#'] for g in range(gap + 1): for f in features: header.append(f + '.gap' + str(g)) encodings.append(header) for i in fastas: name, sequence = i[0], re.sub('-', '', i[1]) code = [name] if len(sequence) < 2 * gap + 3: print( 'Error: for "KSCTriad" encoding, the input fasta sequences should be greater than (2*gap+3). \n\n' ) return 0 code = code + CalculateKSCTriad(sequence, gap, features, AADict) encodings.append(code) return encodings if __name__ == '__main__': if len(sys.argv) == 1: print(USAGE) sys.exit(1) fastas = readFasta.readFasta(sys.argv[1]) k = int(sys.argv[2]) if len(sys.argv) >= 3 else 5 output = sys.argv[3] if len(sys.argv) >= 4 else 'encoding.tsv' encodings = KSCTriad(fastas, k) saveCode.savetsv(encodings, output)
def CTDCClass(fastas, groups): encodings = [] header = ['#'] for g in range(len(groups)): header.append('g.'+str(g+1)) encodings.append(header) for i in fastas: name, sequence = i[0], re.sub('-', '', i[1]) code = [name] for group in groups: code.append(Count(group, sequence) / len(sequence)) encodings.append(code) return encodings if __name__ == '__main__': if len(sys.argv) < 5: print(USAGE) sys.exit(1) groups = sys.argv[3:] myStr = ''.join(groups) myStr = re.sub('[^ACDEFGHIKLMNPQRSTVWY]', '', myStr) if len(myStr) != 20 or len(set(myStr)) != 20: print('\nERROR: The amino acid must be no-repeat in each groups and the sum is 20!\n') fastas = readFasta.readFasta(sys.argv[1]) encodings = CTDCClass(fastas, groups) saveCode.savetsv(encodings, sys.argv[2])
sum([ Rvalue(sequence[j], sequence[j + n], AADict, AAProperty1) for j in range(len(sequence) - n) ]) / (len(sequence) - n) if (len(sequence) - n) != 0 else 0) myDict = {} for aa in AA: myDict[aa] = sequence.count(aa) code = code + [myDict[aa] / (1 + w * sum(theta)) for aa in AA] code = code + [(w * j) / (1 + w * sum(theta)) for j in theta] encodings.append(code) return encodings fastas = readFasta.readFasta('datasets_fasta/dataset_balanceado.txt') encodings = PAAC(fastas) saveCode.savetsv(encodings, 'BBDD/encodings_dataset_balanceado.tsv') fastas = readFasta.readFasta('datasets_fasta/All_peptides.txt') encodings = PAAC(fastas) saveCode.savetsv(encodings, 'BBDD/encodings_Allpeptides.tsv') fastas = readFasta.readFasta('datasets_fasta/dataset_ejemplo.csv') encodings = PAAC(fastas) saveCode.savetsv(encodings, 'BBDD/encodings_dataset_ejemplo.tsv')