def processLines(lines, best_sequences, find_params, verbose=False):
    if len(best_sequences) > 0:
        print("len of best_sequences={}".format(len(best_sequences[0][0])))
        for i in range(1, len(best_sequences)):
            assert len(best_sequences[i][0]) == len(best_sequences[0][0])
    if len(lines) > 0:
        print("len of new lines={}".format(len(lines[0])))
        for i in range(1, len(lines)):
            assert len(lines[i]) == len(lines[0])

    if len(lines) == 0:
        return best_sequences

    if len(best_sequences) > 0:
        if len(best_sequences[0][0]) < len(lines[0]):
            return best_sequences
        if len(best_sequences[0][0]) > len(lines[0]):
            best_sequences = []

    space = lines if verbose==False else tqdm.tqdm(lines)
    for seq in space:
        seq_coefs = parsers.analyze_string(seq, find_params)
        good = True
        for _, other_coefs in best_sequences:
            if is_less(seq_coefs, other_coefs):
                good = False
                break
        if good == True:
            best_sequences.append((seq, seq_coefs))
            best_sequences = list(filter(
                lambda x: not is_less(x[1], seq_coefs), best_sequences))
    return best_sequences
Exemple #2
0
def create_table(kParam, resultStringSize):


    result = []
    excludedStrings = set()
    counter = 0
    for patternSize in tqdm.trange(1, kParam+1):
        assert resultStringSize % patternSize == 0
        for patternString in itertools.product(*(['acgt'] * patternSize)):
            fullString = "".join(patternString) * (resultStringSize // patternSize)

            good = True
            for s in excludedStrings:
                if fullString == s or fullString == s[::-1]:
                    good = False
            if good == False:
                continue

            excludedStrings.add(fullString)
            if patternSize == kParam:
                #print("{}: {}".format(str(counter), fullString))
                result.append(("".join(patternString), fullString))
                #counter += 1

    find_GQD = False
    find_IMT = False
    find_TRP = False
    find_HRP = False

    result_lines = []
    for pattern, fullString in result:
        seq_coefs = parsers.analyze_string(s, [find_GQD, find_IMT, find_TRP, find_HRP])
        #print(seq_coefs)
        result_lines.append((kParam, pattern, fullString) + seq_coefs)
    return result_lines
Exemple #3
0
from itertools import *
from modules import parsers
from timeit import default_timer as timer
length = 14

start = timer()

with open('result.txt', 'w') as f:
    for s in product('acgt', repeat=length):
        s = ''.join(s)
        result_tuple = parsers.analyze_string(s, [True, True, True, 1])
        if result_tuple >= (1, 1, 1, 1):
            f.write('{0} {1}\n'.format(s, result_tuple))

    end = timer()
    f.write('total seconds:{0}'.format(end - start))



Exemple #4
0
# -*- coding: utf-8 -*-

import sys, os
sys.path.append('../')

from modules import parsers

if __name__ == "__main__":
    seq = "tgactgactgactgactgactgac"

    find_GQD = True
    find_IMT = True
    find_TRP = True
    find_HRP = True
    result = parsers.analyze_string(seq,
                                    [find_GQD, find_IMT, find_TRP, find_HRP])
    print("GQD strength = {}".format(result[0]))
    print("IMT strength = {}".format(result[1]))
    print("HRP strength = {}".format(result[2]))
    print("TRP strength = {}".format(result[3]))
Exemple #5
0
    parser.add_argument('--find-GQD', type=int, default=0)
    parser.add_argument('--find-IMT', type=int, default=1)
    parser.add_argument('--find-HRP', type=int, default=0)
    parser.add_argument('--find-TRP', type=int, default=0)

    args = parser.parse_args()

    input_path = args.input_path
    output_path = args.output_path
    if output_path == None:
        output_path = input_path + ".processed.csv"

    find_GQD = bool(args.find_GQD)
    find_IMT = bool(args.find_IMT)
    find_HRP = bool(args.find_HRP)
    find_TRP = bool(args.find_TRP)

    with open(output_path, "w") as output:
        output.write(",".join(["{}"] *
                              5).format("string", "GQD", "IMT", "TRP", "HRP") +
                     "\n")
        with open(input_path, "r") as input:
            lines = input.readlines()
            lines = list(map(lambda x: re.sub('[\n\r]', '', x), lines))
        for line in tqdm.tqdm(lines):
            sequence = line.split(",")[0]
            seq_coefs = parsers.analyze_string(
                sequence, [find_GQD, find_IMT, find_TRP, find_HRP])
            output.write(",".join(["{}"] * 5).format(*(
                (sequence, ) + seq_coefs)) + "\n")