def processLines(lines, best_sequences, find_params, verbose=False): if len(best_sequences) > 0: print("len of best_sequences={}".format(len(best_sequences[0][0]))) for i in range(1, len(best_sequences)): assert len(best_sequences[i][0]) == len(best_sequences[0][0]) if len(lines) > 0: print("len of new lines={}".format(len(lines[0]))) for i in range(1, len(lines)): assert len(lines[i]) == len(lines[0]) if len(lines) == 0: return best_sequences if len(best_sequences) > 0: if len(best_sequences[0][0]) < len(lines[0]): return best_sequences if len(best_sequences[0][0]) > len(lines[0]): best_sequences = [] space = lines if verbose==False else tqdm.tqdm(lines) for seq in space: seq_coefs = parsers.analyze_string(seq, find_params) good = True for _, other_coefs in best_sequences: if is_less(seq_coefs, other_coefs): good = False break if good == True: best_sequences.append((seq, seq_coefs)) best_sequences = list(filter( lambda x: not is_less(x[1], seq_coefs), best_sequences)) return best_sequences
def create_table(kParam, resultStringSize): result = [] excludedStrings = set() counter = 0 for patternSize in tqdm.trange(1, kParam+1): assert resultStringSize % patternSize == 0 for patternString in itertools.product(*(['acgt'] * patternSize)): fullString = "".join(patternString) * (resultStringSize // patternSize) good = True for s in excludedStrings: if fullString == s or fullString == s[::-1]: good = False if good == False: continue excludedStrings.add(fullString) if patternSize == kParam: #print("{}: {}".format(str(counter), fullString)) result.append(("".join(patternString), fullString)) #counter += 1 find_GQD = False find_IMT = False find_TRP = False find_HRP = False result_lines = [] for pattern, fullString in result: seq_coefs = parsers.analyze_string(s, [find_GQD, find_IMT, find_TRP, find_HRP]) #print(seq_coefs) result_lines.append((kParam, pattern, fullString) + seq_coefs) return result_lines
from itertools import * from modules import parsers from timeit import default_timer as timer length = 14 start = timer() with open('result.txt', 'w') as f: for s in product('acgt', repeat=length): s = ''.join(s) result_tuple = parsers.analyze_string(s, [True, True, True, 1]) if result_tuple >= (1, 1, 1, 1): f.write('{0} {1}\n'.format(s, result_tuple)) end = timer() f.write('total seconds:{0}'.format(end - start))
# -*- coding: utf-8 -*- import sys, os sys.path.append('../') from modules import parsers if __name__ == "__main__": seq = "tgactgactgactgactgactgac" find_GQD = True find_IMT = True find_TRP = True find_HRP = True result = parsers.analyze_string(seq, [find_GQD, find_IMT, find_TRP, find_HRP]) print("GQD strength = {}".format(result[0])) print("IMT strength = {}".format(result[1])) print("HRP strength = {}".format(result[2])) print("TRP strength = {}".format(result[3]))
parser.add_argument('--find-GQD', type=int, default=0) parser.add_argument('--find-IMT', type=int, default=1) parser.add_argument('--find-HRP', type=int, default=0) parser.add_argument('--find-TRP', type=int, default=0) args = parser.parse_args() input_path = args.input_path output_path = args.output_path if output_path == None: output_path = input_path + ".processed.csv" find_GQD = bool(args.find_GQD) find_IMT = bool(args.find_IMT) find_HRP = bool(args.find_HRP) find_TRP = bool(args.find_TRP) with open(output_path, "w") as output: output.write(",".join(["{}"] * 5).format("string", "GQD", "IMT", "TRP", "HRP") + "\n") with open(input_path, "r") as input: lines = input.readlines() lines = list(map(lambda x: re.sub('[\n\r]', '', x), lines)) for line in tqdm.tqdm(lines): sequence = line.split(",")[0] seq_coefs = parsers.analyze_string( sequence, [find_GQD, find_IMT, find_TRP, find_HRP]) output.write(",".join(["{}"] * 5).format(*( (sequence, ) + seq_coefs)) + "\n")