def generate(X, seqType, args): ''' # Note-1: args.gGap --> 1, 2, 3 # Note-2: gGap --> ('X', 'X') :param X: :param seqType: :param args: :return: ''' elements = utils.sequenceElements(seqType) m2 = list(itertools.product(elements, repeat=2)) m = m2 # print(args.gGap) T = [] for x in X: x = x[:args.terminusLength] t = [] for i in range(1, args.gGap + 1, 1): V = utils.kmers(x, i + 2) # seqLength = len(x) - (i+2) + 1 for gGap in m: # print(gGap[0], end='') # print('-'*i, end='') # print(gGap[1]) # trackingFeatures.append(gGap[0] + '-' * i + gGap[1]) C = 0 for v in V: if v[0] == gGap[0] and v[-1] == gGap[1]: C += 1 # print(C, end=',') t.append(C) #end-for #end-for t = np.array(t) # t = t.reshape(-1, 1) T.append(t) # end-for T = np.array(T) # print(T.shape) totalFeature = 0 if seqType == 'DNA' or seqType == 'RNA': totalFeature = (4 * args.gGap * 4) else: if seqType == 'PROT': totalFeature = (20 * args.gGap * 20) else: None #end-if save.datasetSave(T, totalFeature, 'fg11') #end-def
def generate(X, seqType, args): ''' # Reference-1: (http://rosalind.info/glossary/k-mer-composition/) # It is also called "k-mer composition". # Reference-2: iRecSpot-EF: https://www.sciencedirect.com/science/article/abs/pii/S0010482518302981 :param X: :param seqType: :param args: :return: ''' elements = utils.sequenceElements(seqType) # print(elements) # print(args.gGap) # print(args.kTuple) T = [] for x in X: x = x[:args.terminusLength] t = [] for i in range(1, args.kTuple + 1, 1): v = list(itertools.product(elements, repeat=i)) # seqLength = len(x) - i + 1 for i in v: # print(x.count(''.join(i)), end=',') t.append(x.count(''.join(i))) ### --- ### t = np.array(t) # t = t.reshape(-1, 1) # print(t.shape) T.append(t) #end-for T = np.array(T) # print(T.shape) totalFeature = 0 if seqType == 'DNA' or seqType == 'RNA': totalFeature = np.sum([4**(i) for i in range(1, args.kTuple + 1)]) else: if seqType == 'PROT': totalFeature = np.sum([20**(i) for i in range(1, args.kTuple + 1)]) else: None #end-if save.datasetSave(T, totalFeature, 'fkmer')
def generate(X, seqType, args): ''' :param X: :param seqType: :param args: :return: ''' if seqType == 'DNA' or seqType == 'RNA': p = [0] * (4 * 4) # As we are working for g11 else: if seqType == 'PROT': p = [0] * (20 * 20) # As we are working for g11 else: None # Trail: Merged elements = utils.sequenceElements(seqType) m = list(itertools.product(elements, repeat=2)) T = [] for x in X: merged = [] x = x[:args.terminusLength] for i in range(1, args.gGap + 1): kmers = utils.kmers(x, 2 + i) # g11 --> 2, gGap (g11+gGap) t = [] require = (args.terminusLength - (2 + 1) + 1) - (len(x) - (2 + i) + 1) for kmer in kmers: d = {''.join(_): 0 for _ in m} segment = kmer[0] + kmer[-1] d[segment] = 1 t.append(list(d.values())) # break # break # print(v) if require > 0: for i in range(require): t.append(p) # end-for else: None t = np.array(t) # print(t) merged.append(t) # print('------------------') # end-for T.append(np.concatenate((merged), axis=1)) # end-for T = np.array(T) # print(T.shape) totalFeature = 0 if seqType == 'DNA' or seqType == 'RNA': totalFeature = (4 * args.gGap * 4) else: if seqType == 'PROT': totalFeature = (20 * args.gGap * 20) else: None # end-if save.datasetSave(T, totalFeature, 'pg11') #end-for
def generate(X, seqType, args): ''' :param X: :param seqType: :param args: :return: ''' if seqType == 'DNA' or seqType == 'RNA': p = [0] * (4**args.kTuple) else: if seqType == 'PROT': p = [0] * (20**args.kTuple) else: None # print(p) # print(len(p)) elements = utils.sequenceElements(seqType) m = list(itertools.product(elements, repeat=args.kTuple)) terminusLength = args.terminusLength # print(terminusLength) T = [] for x in X: # print(len(x)) x = x[:terminusLength] # print(len(x)) # print('-----------------') require = (terminusLength - args.kTuple + 1) - (len(x) - args.kTuple + 1) # print(require) t = [] kmers = utils.kmers(x, args.kTuple) for kmer in kmers: d = {''.join(i): 0 for i in m} d[kmer] = 1 t.append(list(d.values())) #end-for if require > 0: for i in range(require): t.append(p) #end-for else: None t = np.array(t) # print(t.shape) T.append(t) # print(t.shape) #end-for T = np.array(T) # print(T.shape) totalFeature = 0 if seqType == 'DNA' or seqType == 'RNA': totalFeature = (4**args.kTuple) else: if seqType == 'PROT': totalFeature = (20**args.kTuple) else: None # end-if save.datasetSave(T, totalFeature, 'pkmer') #end-def