Ejemplo n.º 1
0
def starting_ehcp(in_put, out_put, args_muscle, args_muscle_p, args_clustalw,
                  args_clustalw_p, args_mafft, args_mafft_p, args_gblocks,
                  args_gblocks_p, args_trimal, args_trimal_p, args_raxml,
                  args_raxml_p, args_fasttree, args_fasttree_p, args_iqtree,
                  args_iqtree_p, args_thread, args_extenddata, args_db):
    '''reconstruct phylogenetic tree by ehcp method'''
    hcp_input, recovery_dic = checkKeggOrganism(in_put)
    start = time.time()
    out_retrieve = doretrieve(hcp_input, out_put, args_db)
    end = time.time()
    auto_build_log.info(
        'Retrieving HCP sequences used time: {} Seconds'.format(end - start))

    if not recovery_dic == {}:
        recovery(out_retrieve, recovery_dic)
    retrieve_pro = os.listdir(out_retrieve)
    for reline in retrieve_pro:
        fw_name = os.path.join(out_retrieve, reline)
        fr_name = os.path.join(args_extenddata, reline)
        fw = open(fw_name, 'a')
        with open(fr_name) as fr:
            for line in fr:
                fw.write(line)
        fw.close()

    # set default aligned by muscle if not specify clustalw or mafft
    start2 = time.time()

    if args_clustalw:
        out_alg = doclustalw_file(out_retrieve, out_put, args_clustalw_p)
    elif args_mafft:
        out_alg = domafft_file(out_retrieve, out_put, args_mafft_p,
                               args_thread)
    elif args_muscle:
        out_alg = domuscle_file(out_retrieve, out_put, args_muscle_p,
                                args_thread)

    out_concat = cocat_path(out_alg)

    # set default trim by gblocks if not specify trimal
    if args_trimal:
        out_f2p = dotrimal(out_concat, args_trimal_p)
    elif args_gblocks:
        out_gblock = dogblocks(out_concat, args_gblocks_p)
        out_f2p = fasta2phy(out_gblock)
    # reconstruct tree
    if args_fasttree:
        doFastTree(out_f2p, out_put, args_fasttree_p, args_thread)
    elif args_iqtree:
        doiqtree(out_f2p, out_put, args_iqtree_p, args_thread)
    elif args_raxml:
        doraxml(out_f2p, out_put, args_raxml_p, args_thread)
    end2 = time.time()
    auto_build_log.info(
        'Constructing species tree used time: {} Seconds'.format(end2 -
                                                                 start2))
Ejemplo n.º 2
0
def starting_esrna(in_put, out_put, args_muscle, args_muscle_p, args_clustalw,
                   args_clustalw_p, args_mafft, args_mafft_p, args_gblocks,
                   args_gblocks_p, args_trimal, args_trimal_p, args_raxml,
                   args_raxml_p, args_fasttree, args_fasttree_p, args_iqtree,
                   args_iqtree_p, args_thread, args_extenddata, args_db):
    '''reconstruct phylogenetic tree by ssu rna extend method'''
    extend_check = checkFile(args_extenddata)
    ssu_input, recovery_dic = checkSilvaOrganism(in_put)
    start = time.time()
    out_retrieve = retrieve16srna(ssu_input, out_put, args_db)
    end = time.time()
    auto_build_log.info(
        'Retrieving SSU rRNA sequences used time: {} Seconds'.format(end -
                                                                     start))
    if not recovery_dic == []:
        recovery_silva(out_retrieve, recovery_dic, ssu_input)
    retrieve_srna_path = os.path.join(out_retrieve, 'rna_sequence.fasta')

    fw = open(retrieve_srna_path, 'a')
    with open(extend_check) as read:
        for line in read:
            fw.write(line)
    fw.close()

    # set default aligned by muscle if not specify clustalw
    start2 = time.time()
    if args_clustalw:
        out_alg = doclustalw(out_retrieve, out_put, args_clustalw_p)
    elif args_mafft:
        out_alg = domafft(out_retrieve, out_put, args_mafft_p)
    elif args_muscle:
        out_alg = domuscle(out_retrieve, out_put, args_muscle_p)

    # set default trim by gblocks if not specify trimal
    if args_trimal:
        out_f2p = dotrimal(out_alg, args_trimal_p)
    elif args_gblocks:
        if args_gblocks_p is gblockspara_pro:
            args_gblocks_p = gblockspara_dna
            out_gblock = dogblocks(out_alg, args_gblocks_p)
        out_f2p = fasta2phy(out_gblock)
    # reconstruct tree
    if args_fasttree:
        args_fasttree_p_add = "-nt " + args_fasttree_p.lstrip()
        doFastTree(out_f2p, out_put, args_fasttree_p_add, args_thread)
    elif args_iqtree:
        doiqtree(out_f2p, out_put, args_iqtree_p, args_thread)
    elif args_raxml:
        if args_raxml_p is raxmlpara_pro:
            args_raxml_p = raxmlpara_dna
            doraxml(out_f2p, out_put, args_raxml_p, args_thread)
    end2 = time.time()
    auto_build_log.info(
        'Constructing species tree used time: {} Seconds'.format(end2 -
                                                                 start2))
Ejemplo n.º 3
0
def starting_esrna(in_put, out_put,
                   args_muscle, args_muscle_p,
                   args_clustalw, args_clustalw_p,
                   args_mafft, args_mafft_p,
                   args_gblocks, args_gblocks_p,
                   args_trimal, args_trimal_p,
                   args_raxml, args_raxml_p,
                   args_fasttree, args_fasttree_p,
                   args_iqtree,args_iqtree_p,
                   args_thread, args_extenddata):
    '''reconstruct phylogenetic tree by ssu rna extend method'''
    extend_check = checkFile(args_extenddata)
    ssu_input,recovery_dic = checkSilvaOrganism(in_put)
    out_retrieve = retrieve16srna(ssu_input, out_put)
    if not recovery_dic == {}:
        recovery(out_retrieve,recovery_dic)
    retrieve_srna_path = os.path.join(out_retrieve, 'rna_sequence.fasta')

    fw = open(retrieve_srna_path, 'ab')
    with open(extend_check) as read:
        for line in read:
            fw.write(line)
    fw.close()

    # set default aligned by muscle if not specify clustalw
    if args_clustalw:
        out_alg = doclustalw(out_retrieve, out_put, args_clustalw_p)
    elif args_mafft:
        out_alg = domafft(out_retrieve, out_put, args_mafft_p)
    elif args_muscle:
        out_alg = domuscle(out_retrieve, out_put, args_muscle_p)

    # set default trim by gblocks if not specify trimal
    if args_trimal:
        out_f2p = dotrimal(out_alg, args_trimal_p)
    elif args_gblocks:
        if args_gblocks_p is gblockspara_pro:
            args_gblocks_p = gblockspara_dna
            out_gblock = dogblocks(out_alg, args_gblocks_p)
        out_f2p = fasta2phy(out_gblock)
    # reconstruct tree
    if args_fasttree:
        args_fasttree_p_add = "-nt " + args_fasttree_p.lstrip()
        doFastTree(out_f2p, out_put, args_fasttree_p_add, args_thread)
    elif args_iqtree:
        doiqtree(out_f2p, out_put, args_iqtree_p, args_thread)
    elif args_raxml:
        if args_raxml_p is raxmlpara_pro:
            args_raxml_p = raxmlpara_dna
            doraxml(out_f2p, out_put, args_raxml_p, args_thread)
Ejemplo n.º 4
0
def starting_ehcp(in_put, out_put,
                  args_muscle, args_muscle_p,
                  args_clustalw, args_clustalw_p,
                  args_mafft, args_mafft_p,
                  args_gblocks, args_gblocks_p,
                  args_trimal, args_trimal_p,
                  args_raxml, args_raxml_p,
                  args_fasttree, args_fasttree_p,
                  args_iqtree,args_iqtree_p,
                  args_thread, args_extenddata):
    '''reconstruct phylogenetic tree by ehcp method'''
    hcp_input, recovery_dic = checkKeggOrganism(in_put)
    out_retrieve = doretrieve(hcp_input, out_put)
    if not recovery_dic == {}:
        recovery(out_retrieve,recovery_dic)
    retrieve_pro = os.listdir(out_retrieve)
    for reline in retrieve_pro:
        fw_name = os.path.join(out_retrieve, reline)
        fr_name = os.path.join(args_extenddata, reline)
        fw = open(fw_name, 'ab')
        with open(fr_name) as fr:
            for line in fr:
                fw.write(line)
        fw.close()

    # set default aligned by muscle if not specify clustalw or mafft
    if args_clustalw:
        out_alg = doclustalw_file(out_retrieve, out_put, args_clustalw_p)
    elif args_mafft:
        out_alg = domafft_file(out_retrieve, out_put, args_mafft_p)
    elif args_muscle:
        out_alg = domuscle_file(out_retrieve, out_put, args_muscle_p)

    out_concat = cocat_path(out_alg)

    # set default trim by gblocks if not specify trimal
    if args_trimal:
        out_f2p = dotrimal(out_concat, args_trimal_p)
    elif args_gblocks:
        out_gblock = dogblocks(out_concat, args_gblocks_p)
        out_f2p = fasta2phy(out_gblock)
    # reconstruct tree
    if args_fasttree:
        doFastTree(out_f2p, out_put, args_fasttree_p, args_thread)
    elif args_iqtree:
        doiqtree(out_f2p, out_put, args_iqtree_p, args_thread)
    elif args_raxml:
        doraxml(out_f2p, out_put, args_raxml_p, args_thread)
Ejemplo n.º 5
0
def starting_hcp(in_put, out_put, args_muscle, args_muscle_p, args_clustalw,
                 args_clustalw_p, args_mafft, args_mafft_p, args_gblocks,
                 args_gblocks_p, args_trimal, args_trimal_p, args_raxml,
                 args_raxml_p, args_fasttree, args_fasttree_p, args_iqtree,
                 args_iqtree_p, args_thread, args_db):
    '''reconstruct phylogenetic tree by hcp method'''
    hcp_input, recovery_dic = checkKeggOrganism(in_put)
    start = time.time()
    out_retrieve = doretrieve(hcp_input, out_put, args_db)
    end = time.time()
    auto_build_log.info(
        'Retrieving HCP sequences used time: {} Seconds'.format(end - start))

    if not recovery_dic == {}:
        recovery(out_retrieve, recovery_dic)
    # set default aligned by muscle if not specify clustalw
    start2 = time.time()
    if args_clustalw:
        out_alg = doclustalw_file(out_retrieve, out_put, args_clustalw_p)
    elif args_mafft:
        out_alg = domafft_file(out_retrieve, out_put, args_mafft_p,
                               args_thread)
    elif args_muscle:
        out_alg = domuscle_file(out_retrieve, out_put, args_muscle_p,
                                args_thread)

    out_concat = cocat_path(out_alg)

    # set default trim by gblocks if not specify trimal
    if args_trimal:
        out_f2p = dotrimal(out_concat, args_trimal_p)
    elif args_gblocks:
        out_gblock = dogblocks(out_concat, args_gblocks_p)
        out_f2p = fasta2phy(out_gblock)

    # reconstruct tree
    if args_fasttree:
        doFastTree(out_f2p, out_put, args_fasttree_p, args_thread)
    elif args_iqtree:
        doiqtree(out_f2p, out_put, args_iqtree_p, args_thread)
    elif args_raxml:
        doraxml(out_f2p, out_put, args_raxml_p, args_thread)
    end2 = time.time()
    auto_build_log.info(
        'Contracting species tree used time: {} Seconds'.format(end2 - start2))
Ejemplo n.º 6
0
def starting_srna(in_put, out_put,
                  args_muscle, args_muscle_p,
                  args_clustalw, args_clustalw_p,
                  args_mafft, args_mafft_p,
                  args_gblocks, args_gblocks_p,
                  args_trimal, args_trimal_p,
                  args_raxml, args_raxml_p,
                  args_fasttree, args_fasttree_p,
                  args_iqtree,args_iqtree_p,
                  args_thread):
    '''reconstruct phylogenetic tree by ssu rna method'''
    ssu_input,recovery_dic = checkSilvaOrganism(in_put)
    out_retrieve = retrieve16srna(ssu_input, out_put)
    if not recovery_dic == {}:
        recovery(out_retrieve,recovery_dic)

    # set default aligned by muscle if not specify clustalw or mafft
    if args_clustalw:
        out_alg = doclustalw(out_retrieve, out_put, args_clustalw_p)
    elif args_mafft:
        out_alg = domafft(out_retrieve, out_put, args_mafft_p)
    elif args_muscle:
        out_alg = domuscle(out_retrieve, out_put, args_muscle_p)

    # set default trim by gblocks if not specify trimal
    if args_trimal:
        out_f2p = dotrimal(out_alg, args_trimal_p)
    elif args_gblocks:
        if args_gblocks_p is gblockspara_pro:
            args_gblocks_p = gblockspara_dna
            out_gblock = dogblocks(out_alg, args_gblocks_p)
        out_f2p = fasta2phy(out_gblock)

    # reconstruct tree
    if args_fasttree:
        args_fasttree_p_add = "-nt " + args_fasttree_p.lstrip()
        doFastTree(out_f2p, out_put, args_fasttree_p_add, args_thread)
    elif args_iqtree:
        doiqtree(out_f2p, out_put, args_iqtree_p, args_thread)
    elif args_raxml:
        if args_raxml_p is raxmlpara_pro:
            args_raxml_p = raxmlpara_dna
            doraxml(out_f2p, out_put, args_raxml_p, args_thread)
Ejemplo n.º 7
0
def starting_hcp(in_put, out_put,
                 args_muscle, args_muscle_p,
                 args_clustalw, args_clustalw_p,
                 args_mafft, args_mafft_p,
                 args_gblocks, args_gblocks_p,
                 args_trimal, args_trimal_p,
                 args_raxml, args_raxml_p,
                 args_fasttree, args_fasttree_p,
                 args_iqtree,args_iqtree_p,
                 args_thread):
    '''reconstruct phylogenetic tree by hcp method'''
    hcp_input, recovery_dic = checkKeggOrganism(in_put)
    out_retrieve = doretrieve(hcp_input, out_put)
    if not recovery_dic == {}:
        recovery(out_retrieve,recovery_dic)
    # set default aligned by muscle if not specify clustalw
    if args_clustalw:
        out_alg = doclustalw_file(out_retrieve, out_put, args_clustalw_p)
    elif args_mafft:
        out_alg = domafft_file(out_retrieve, out_put, args_mafft_p)
    elif args_muscle:
        out_alg = domuscle_file(out_retrieve, out_put, args_muscle_p)

    out_concat = cocat_path(out_alg)

    # set default trim by gblocks if not specify trimal
    if args_trimal:
        out_f2p = dotrimal(out_concat, args_trimal_p)
    elif args_gblocks:
        out_gblock = dogblocks(out_concat, args_gblocks_p)
        out_f2p = fasta2phy(out_gblock)

    # reconstruct tree
    if args_fasttree:
        doFastTree(out_f2p, out_put, args_fasttree_p, args_thread)
    elif args_iqtree:
        doiqtree(out_f2p, out_put, args_iqtree_p, args_thread)
    elif args_raxml:
        doraxml(out_f2p, out_put, args_raxml_p, args_thread)
Ejemplo n.º 8
0
def build_srna(in_put, out_put,
               args_muscle, args_muscle_p,
               args_clustalw, args_clustalw_p,
               args_mafft, args_mafft_p,
               args_gblocks, args_gblocks_p,
               args_trimal, args_trimal_p,
               args_raxml, args_raxml_p,
               args_fasttree, args_fasttree_p,
               args_iqtree,args_iqtree_p,
               args_thread):
    '''reconstruct phylogenetic tree by ssu rna method'''
    out_retrieve = in_put
    # set default aligned by muscle if not specify clustalw or mafft
    if args_clustalw:
        out_alg = doclustalw(out_retrieve, out_put, args_clustalw_p)
    elif args_mafft:
        out_alg = domafft(out_retrieve, out_put, args_mafft_p)
    elif args_muscle:
        out_alg = domuscle(out_retrieve, out_put, args_muscle_p)

    # set default trim by gblocks if not specify trimal
    if args_trimal:
        out_f2p = dotrimal(out_alg, args_trimal_p)
    elif args_gblocks:
        if args_gblocks_p == gblockspara_pro:
            args_gblocks_p = gblockspara_dna
            out_gblock = dogblocks(out_alg, args_gblocks_p)
        out_f2p = fasta2phy(out_gblock)
    # reconstruct tree
    if args_fasttree:
        args_fasttree_p_add = "-nt " + args_fasttree_p.lstrip()
        doFastTree(out_f2p, out_put, args_fasttree_p_add, args_thread)
    elif args_iqtree:
        doiqtree(out_f2p, out_put, args_iqtree_p, args_thread)
    elif args_raxml:
        if args_raxml_p == raxmlpara_pro:
            args_raxml_p = raxmlpara_dna
            doraxml(out_f2p, out_put, args_raxml_p, args_thread)
Ejemplo n.º 9
0
def build_srna(in_put, out_put, args_muscle, args_muscle_p, args_clustalw,
               args_clustalw_p, args_mafft, args_mafft_p, args_gblocks,
               args_gblocks_p, args_trimal, args_trimal_p, args_raxml,
               args_raxml_p, args_fasttree, args_fasttree_p, args_iqtree,
               args_iqtree_p, args_thread):
    '''reconstruct phylogenetic tree by ssu rna method'''
    start = time.time()
    out_retrieve = in_put
    # set default aligned by muscle if not specify clustalw or mafft
    if args_clustalw:
        out_alg = doclustalw(out_retrieve, out_put, args_clustalw_p)
    elif args_mafft:
        out_alg = domafft(out_retrieve, out_put, args_mafft_p)
    elif args_muscle:
        out_alg = domuscle(out_retrieve, out_put, args_muscle_p)

    # set default trim by gblocks if not specify trimal
    if args_trimal:
        out_f2p = dotrimal(out_alg, args_trimal_p)
    elif args_gblocks:
        if args_gblocks_p == gblockspara_pro:
            args_gblocks_p = gblockspara_dna
            out_gblock = dogblocks(out_alg, args_gblocks_p)
        out_f2p = fasta2phy(out_gblock)
    # reconstruct tree
    if args_fasttree:
        args_fasttree_p_add = "-nt " + args_fasttree_p.lstrip()
        doFastTree(out_f2p, out_put, args_fasttree_p_add, args_thread)
    elif args_iqtree:
        doiqtree(out_f2p, out_put, args_iqtree_p, args_thread)
    elif args_raxml:
        if args_raxml_p == raxmlpara_pro:
            args_raxml_p = raxmlpara_dna
            doraxml(out_f2p, out_put, args_raxml_p, args_thread)
    end = time.time()
    build_log.info(
        'Constructing species tree used time: {} Seconds'.format(end - start))
Ejemplo n.º 10
0
def build_hcp(in_put, out_put, args_muscle, args_muscle_p, args_clustalw,
              args_clustalw_p, args_mafft, args_mafft_p, args_gblocks,
              args_gblocks_p, args_trimal, args_trimal_p, args_raxml,
              args_raxml_p, args_fasttree, args_fasttree_p, args_iqtree,
              args_iqtree_p, args_thread):
    '''reconstruct phylogenetic tree by hcp method'''
    start = time.time()
    out_retrieve = in_put
    # set default aligned by muscle if not specify clustalw or mafft
    if args_clustalw:
        out_alg = doclustalw_file(out_retrieve, out_put, args_clustalw_p)
    elif args_mafft:
        out_alg = domafft_file(out_retrieve, out_put, args_mafft_p,
                               args_thread)
    elif args_muscle:
        out_alg = domuscle_file(out_retrieve, out_put, args_muscle_p,
                                args_thread)

    out_concat = cocat_path(out_alg)

    # set default trim by gblocks if not specify trimal
    if args_trimal:
        out_f2p = dotrimal(out_concat, args_trimal_p)
    elif args_gblocks:
        out_gblock = dogblocks(out_concat, args_gblocks_p)
        out_f2p = fasta2phy(out_gblock)

    # reconstruct tree
    if args_fasttree:
        doFastTree(out_f2p, out_put, args_fasttree_p, args_thread)
    elif args_iqtree:
        doiqtree(out_f2p, out_put, args_iqtree_p, args_thread)
    elif args_raxml:
        doraxml(out_f2p, out_put, args_raxml_p, args_thread)
    end = time.time()
    build_log.info(
        'Constructing species tree used time: {} Seconds'.format(end - start))
Ejemplo n.º 11
0
def build_hcp(in_put, out_put,
              args_muscle, args_muscle_p,
              args_clustalw, args_clustalw_p,
              args_mafft, args_mafft_p,
              args_gblocks, args_gblocks_p,
              args_trimal, args_trimal_p,
              args_raxml, args_raxml_p,
              args_fasttree, args_fasttree_p,
              args_iqtree,args_iqtree_p,
              args_thread):
    '''reconstruct phylogenetic tree by hcp method'''
    out_retrieve = in_put
    # set default aligned by muscle if not specify clustalw or mafft
    if args_clustalw:
        out_alg = doclustalw_file(out_retrieve, out_put, args_clustalw_p)
    elif args_mafft:
        out_alg = domafft_file(out_retrieve, out_put, args_mafft_p)
    elif args_muscle:
        out_alg = domuscle_file(out_retrieve, out_put, args_muscle_p)

    out_concat = cocat_path(out_alg)

    # set default trim by gblocks if not specify trimal
    if args_trimal:
        out_f2p = dotrimal(out_concat, args_trimal_p)
    elif args_gblocks:
        out_gblock = dogblocks(out_concat, args_gblocks_p)
        out_f2p = fasta2phy(out_gblock)

    # reconstruct tree
    if args_fasttree:
        doFastTree(out_f2p, out_put, args_fasttree_p, args_thread)
    elif args_iqtree:
        doiqtree(out_f2p, out_put, args_iqtree_p, args_thread)
    elif args_raxml:
        doraxml(out_f2p, out_put, args_raxml_p, args_thread)