Ejemplo n.º 1
0
def main():
    assert os.path.exists('in.fa')
    assert os.path.exists('in.weights')

    sanity_checks.sanity_check_fasta('in.fa')

    num_size = int(os.popen("grep -c \">\" in.fa").read().strip())

    if num_size <= cc_settings.MAX_SPLIT_IN_SIZE:
        run_Cogent_on_input()
    else:
        dirs = split_files(input_filename='in.fa',
                           split_size=cc_settings.MAX_SPLIT_IN_SIZE)
        run_Cogent_on_split_files(dirs, depth=0)

    # align input to cogent2 gmap db so we can use it for evalution later;
    run_gmap(dbname='cogent2', infile='in.trimmed.fa')

    # clean up GMAP db files
    if os.path.exists('cogent') and os.path.isdir('cogent'):
        cleanup_gmap('cogent')
    if os.path.exists('cogent2') and os.path.isdir('cogent2'):
        cleanup_gmap('cogent2')

    # rewrite cogent2.fa with prefix
    f = open('cogent2.renamed.fasta', 'w')
    for r in SeqIO.parse(open('cogent2.fa'), 'fasta'):
        f.write(">{0}|{1}\n{2}\n".format(cc_settings.OUTPUT_PREFIX, r.id,
                                         r.seq))
    f.close()
Ejemplo n.º 2
0
def main():
    assert os.path.exists('in.fa')
    assert os.path.exists('in.weights')

    sanity_checks.sanity_check_fasta('in.fa')

    num_size = int(os.popen("grep -c \">\" in.fa").read().strip())

    if num_size <= 20:
        run_Cogent_on_input()
    else:
        dirs = split_files(input_filename='in.fa', split_size=20)
        run_Cogent_on_split_files(dirs)
Ejemplo n.º 3
0
def main():
    assert os.path.exists("in.fa")
    assert os.path.exists("in.weights")

    sanity_checks.sanity_check_fasta("in.fa")

    num_size = int(os.popen('grep -c ">" in.fa').read().strip())

    if num_size <= 20:
        run_Cogent_on_input()
    else:
        dirs = split_files(input_filename="in.fa", split_size=20)
        run_Cogent_on_split_files(dirs)
Ejemplo n.º 4
0
def main():
    assert os.path.exists('in.fa')
    assert os.path.exists('in.weights')

    sanity_checks.sanity_check_fasta('in.fa')

    num_size = int(os.popen("grep -c \">\" in.fa").read().strip())

    if num_size <= cc_settings.MAX_SPLIT_IN_SIZE:
        run_Cogent_on_input()
    else:
        dirs = split_files(input_filename='in.fa', split_size=cc_settings.MAX_SPLIT_IN_SIZE)
        run_Cogent_on_split_files(dirs, depth=0)

    # clean up GMAP db files
    if os.path.exists('cogent') and os.path.isdir('cogent'):
        cleanup_gmap('cogent')
    if os.path.exists('cogent2') and os.path.isdir('cogent2'):
        cleanup_gmap('cogent2')