Python STAP.EvaluateSpHMM Examples

Programming Language: Python

Namespace/Package Name: rpy2.robjects.packages

Class/Type: STAP

Method/Function: EvaluateSpHMM

Examples at hotexamples.com: 3

Python STAP.EvaluateSpHMM - 3 examples found. These are the top rated real world Python examples of rpy2.robjects.packages.STAP.EvaluateSpHMM extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

STAP(30)

EvaluateSpHMM(3)

colorcitos(3)

mdl_func(2)

orderInds(2)

plotCorRes(2)

plot_sequencing_depth_overview(1)

plotInPdf(1)

plot_beta_distribution_binary_class(1)

plot_density_heatmap(1)

plot_distribution(1)

plot_heatmap(1)

Chi_sq_based_bin(1)

pr_curve(1)

my_cego(1)

rcca_fit(1)

roc_curve(1)

runAnnotation(1)

run_fitZIG(1)

rx2(1)

sPLS(1)

simulate(1)

rcca_eval(1)

main(1)

multi_class_func(1)

dpbinom(1)

add_lag(1)

as_matrix(1)

assess_get_performances(1)

assess_read(1)

assess_summarise_performances(1)

combine_tx_txf(1)

corrMat(1)

ejecutable_espectro(1)

join_tables(1)

entropy_based_bin(1)

estacionariedad(1)

evaluate(1)

findPubs(1)

fitcoxnet(1)

fullAnnotationInGRanges(1)

initial_config(1)

woe_based_binning(1)

Example #1

Show file

File: MetaBGC-Build.py Project: shw079/MetaBGC

                if re.match(r".*txt$", file) and os.path.getsize(filePath) > 0:
                    with open(filePath) as infile:
                        for line in infile:
                            sampleName = ntpath.basename(filePath).split(".txt")[0]
                            outfile.write(line.strip() + "\t" + sampleName + "\t" + args.cohort_name + "\n")

    # Eval spHMMs
    rpackages.importr('base')
    #packageNames = ('tidyverse','ggsci','ggpubr')
    #utils = rpackages.importr('utils')
    #utils.chooseCRANmirror(ind=1)
    #packnames_to_install = [x for x in packageNames if not rpackages.isinstalled(x)]
    #if len(packnames_to_install) > 0:
    #    utils.install_packages(StrVector(packnames_to_install))

    rpackages.importr('tidyverse')
    rpackages.importr('ggsci')
    rpackages.importr('ggpubr')

    hp_hmm_directory = os.path.join(build_op_dir, 'HiPer_spHMMs')
    os.makedirs(hp_hmm_directory,0o777,True)
    with open('EvaluateSpHMMs.R', 'r') as f:
        rStr = f.read()
    myfunc = STAP(rStr, "EvaluateSpHMM")
    myfunc.EvaluateSpHMM(allHMMResult, allBLASTResult, gene_pos_file, args.prot_family_name, float(args.F1_Thresh), hmm_directory, hp_hmm_directory)

    timeTaken = time.time() - startTime
    mins = int(timeTaken / 60)
    secs = int(timeTaken) % 60
    print("\nTotal time taken : " + str(mins) + " mins " + str(secs) + " seconds")

Example #2

Show file

def mbgcbuild(prot_alignment, prot_family_name, cohort_name,
              nucl_seq_directory, prot_seq_directory, seq_fmt, pair_fmt, r1_file_suffix,
              r2_file_suffix, tp_genes_nucl, blast_db_directory_map_file, blastn_search_directory, hmm_search_directory, f1_thresh,
              output_directory, cpu):
    try:
        CPU_THREADS = 4
        startTime = time.time()
        if cpu is not None:
            CPU_THREADS = int(cpu)

        # setup paths
        build_op_dir = output_directory + os.sep + "build"
        hmm_directory = os.path.join(build_op_dir, 'spHMMs')
        tp_genes_prot = build_op_dir + os.sep + "TPGenes.faa"
        alnOutput = os.path.join(build_op_dir,"TP_Homolog_Alignment.afa")
        gene_pos_file = os.path.join(build_op_dir, 'Gene_Interval_Pos.txt')
        gene_pos_file_aa = os.path.join(build_op_dir, 'Gene_Interval_Pos_AA.txt')
        if hmm_search_directory is None:
            hmm_search_directory = os.path.join(build_op_dir, 'hmm_result')
        allHMMResult = os.path.join(build_op_dir,"CombinedHmmSearch.txt")
        if blastn_search_directory is None:
            blastn_search_directory = os.path.join(build_op_dir, 'blastn_result')
        allBLASTResult = os.path.join(build_op_dir,"CombinedBLASTSearch.txt")

        # Create OP dirs
        os.makedirs(hmm_directory, 0o777, True)

        # Translate protein sequence
        runTranSeq(tp_genes_nucl,"1",tp_genes_prot)

        # Join true positives in the sample with the BGC proteins
        tmpFile = os.path.join(build_op_dir,"TP_Homolog.faa")
        joinedSeqs = []
        tpGeneSeqs = list(SeqIO.parse(tp_genes_prot, "fasta"))
        # Removing _1 added by TranSeq
        for seq in tpGeneSeqs:
            seq.id = seq.id[:-2]
            seq.description = ""
            joinedSeqs.append(seq)
        SeqIO.write(joinedSeqs,tp_genes_prot,"fasta")
        protAlnSeqs = list(SeqIO.parse(prot_alignment, "fasta"))
        for seq in protAlnSeqs:
            joinedSeqs.append(seq)
        SeqIO.write(joinedSeqs, tmpFile, "fasta")

        # MUSCLE align TP genes with markers
        runMUSCLE(tmpFile, alnOutput)

        # Gen spHMMs and interval pos
        # Extract spHMM coordinates from MUSCLE alignment
        hmmDict = gensphmmfiles(prot_family_name, alnOutput, tp_genes_prot,
                                hmm_directory, gene_pos_file, gene_pos_file_aa)

        if r1_file_suffix is None:
            r1_file_suffix = ""
        if r2_file_suffix is None:
            r2_file_suffix = ""

        # #Preprocess synthetic reads
        nucl_seq_directory = PreProcessReadsPar(nucl_seq_directory,
                                                seq_fmt,pair_fmt,
                                                r1_file_suffix.strip(),
                                                r2_file_suffix.strip(),
                                                build_op_dir,
                                                CPU_THREADS)

        #Check if BLAST DB directory mapping file is provided or not
        if blast_db_directory_map_file is None:
            blast_db_directory_map_file = ""

        # Translate nucleotide seq
        if not os.path.isdir(prot_seq_directory):
            prot_seq_directory = TranseqReadsDir(build_op_dir, nucl_seq_directory, CPU_THREADS)

        # HMMER Search
        if not os.path.exists(allHMMResult):
            os.makedirs(hmm_search_directory,0o777,True)
            for hmmSeqPosKey, hmmFileObj in hmmDict.items():
                hmmInterval = str(hmmDict[hmmSeqPosKey].intervalStart)+"_"+str(hmmDict[hmmSeqPosKey].intervalEnd)
                RunHMMDirectoryParallel(prot_seq_directory,hmmFileObj.hmmFile, cohort_name, prot_family_name, "30_10", hmmInterval, hmm_search_directory, CPU_THREADS)

            with open(allHMMResult, 'w') as outfile:
                for subdir, dirs, files in os.walk(hmm_search_directory):
                    for file in files:
                        filePath = os.path.join(subdir, file)
                        if re.match(r".*txt$", file) and os.path.getsize(filePath) > 0:
                            with open(filePath) as infile:
                                for line in infile:
                                    outfile.write(line)

        # BLAST Alignment
        if not os.path.exists(allBLASTResult):
            if not os.path.isdir(blastn_search_directory):
                print("Constructing BLAST Search Dir:" + blastn_search_directory)
                os.makedirs(blastn_search_directory,0o777,True)
                RunMakeDBandBlastN(nucl_seq_directory, blast_db_directory_map_file,
                                   tp_genes_nucl, "blastn", "-max_target_seqs 10000 -perc_identity 90.0 -outfmt \"6 sseqid slen sstart send qseqid qlen qstart qend pident evalue\" ",
                                   blastn_search_directory, CPU_THREADS)

            with open(allBLASTResult, 'w') as outfile:
                outfile.write("sseqid\tslen\tsstart\tsend\tqseqid\tqlen\tqstart\tqend\tpident\tevalue\tSample\tsampleType\n")
                for subdir, dirs, files in os.walk(blastn_search_directory):
                    for file in files:
                        filePath = os.path.join(subdir, file)
                        if re.match(r".*txt$", file) and os.path.getsize(filePath) > 0:
                            with open(filePath) as infile:
                                for line in infile:
                                    sampleName = os.path.basename(filePath).split(".txt")[0]
                                    outfile.write(line.strip() + "\t" + sampleName + "\t" + cohort_name + "\n")

        # Eval spHMMs
        rpackages.importr('base')
        utils = rpackages.importr('utils')
        packageNames = ('tidyverse','ggsci','ggpubr','dplyr','ggplot2')
        packnames_to_install = [x for x in packageNames if not rpackages.isinstalled(x)]
        if len(packnames_to_install) > 0:
            utils.install_packages(StrVector(packnames_to_install))
        rpackages.importr('tidyverse')
        rpackages.importr('ggsci')
        rpackages.importr('ggpubr')
        rpackages.importr('dplyr')
        rpackages.importr('ggplot2')

        hp_hmm_directory = os.path.join(build_op_dir, 'HiPer_spHMMs')
        os.makedirs(hp_hmm_directory,0o777,True)
        module_dir = os.path.dirname(os.path.abspath(createhmm.__file__))
        print("\nR-script path : " + module_dir)
        r_script = os.path.join(module_dir,'EvaluateSpHMMs.R')

        with open(r_script, 'r') as f:
            rStr = f.read()
        myfunc = STAP(rStr, "EvaluateSpHMM")
        myfunc.EvaluateSpHMM(allHMMResult, allBLASTResult, gene_pos_file, prot_family_name, float(f1_thresh), hmm_directory, hp_hmm_directory)
        timeTaken = time.time() - startTime
        mins = int(timeTaken / 60)
        secs = int(timeTaken) % 60
        print("\nTotal time taken : " + str(mins) + " mins " + str(secs) + " seconds")
        return hp_hmm_directory
    except:
        print("Metabgc-build has failed. Please check your inputs and contact support on : https://github.com/donia-lab/MetaBGC")
        exit()

Example #3

Show file

def mbgcbuild(prot_alignment, prot_family_name, cohort_name,
              nucl_seq_directory, prot_seq_directory, seq_fmt, pair_fmt,
              r1_file_suffix, r2_file_suffix, tp_genes_nucl,
              blastn_search_directory, hmm_search_directory, f1_thresh,
              output_directory, cpu):
    startTime = time.time()
    if cpu is not None:
        CPU_THREADS = int(cpu)

    # setup paths
    build_op_dir = output_directory + os.sep + "build"
    hmm_directory = os.path.join(build_op_dir, 'spHMMs')
    prot_aln_file = os.path.join(hmm_directory,
                                 ntpath.basename(prot_alignment))
    tp_genes_prot = build_op_dir + os.sep + "TPGenes.faa"
    alnOutput = os.path.join(build_op_dir, "tmp.afa")
    gene_pos_file = os.path.join(build_op_dir, 'Gene_Interval_Pos.txt')
    if hmm_search_directory is None:
        hmm_search_directory = os.path.join(build_op_dir, 'hmm_result')
    allHMMResult = hmm_search_directory + os.sep + "CombinedHmmSearch.txt"
    if blastn_search_directory is None:
        blastn_search_directory = os.path.join(build_op_dir, 'blastn_result')
    allBLASTResult = blastn_search_directory + os.sep + "CombinedBLASTSearch.txt"

    # Gen spHMMs and interval pos
    os.makedirs(hmm_directory, 0o777, True)
    copyfile(prot_alignment, prot_aln_file)
    hmmDict = gensphmmfiles(prot_family_name, prot_aln_file, hmm_directory)

    runTranSeq(tp_genes_nucl, "1", tp_genes_prot)
    tmpFile = os.path.join(build_op_dir, "tmp.fa")

    # Join true positives in the sample with the BGC proteins
    joinedSeqs = []
    tpGeneSeqs = list(SeqIO.parse(tp_genes_prot, "fasta"))
    # Removing _1 added by TranSeq
    for seq in tpGeneSeqs:
        seq.id = seq.id[:-2]
        seq.description = ""
        joinedSeqs.append(seq)
    protAlnSeqs = list(SeqIO.parse(prot_aln_file, "fasta"))
    for seq in protAlnSeqs:
        joinedSeqs.append(seq)
    SeqIO.write(joinedSeqs, tmpFile, "fasta")

    # MUSCLE align TP genes with markers
    runMUSCLE(tmpFile, alnOutput)
    # Extract spHMM coordinates from MUSCLE alignment
    gengeneposlist(prot_family_name, protAlnSeqs, hmmDict, alnOutput,
                   gene_pos_file)

    if r1_file_suffix is None:
        r1_file_suffix = ""
    if r2_file_suffix is None:
        r2_file_suffix = ""

    # #Preprocess synthetic reads
    nucl_seq_directory = PreProcessReadsPar(nucl_seq_directory, seq_fmt,
                                            pair_fmt, r1_file_suffix.strip(),
                                            r2_file_suffix.strip(),
                                            build_op_dir, CPU_THREADS)
    # Translate nucleotide seq
    if not os.path.isdir(prot_seq_directory):
        prot_seq_directory = TranseqReadsDir(build_op_dir, nucl_seq_directory,
                                             CPU_THREADS)

    # HMMER Search
    os.makedirs(hmm_search_directory, 0o777, True)
    for hmmSeqPosKey, hmmFileObj in hmmDict.items():
        hmmInterval = str(hmmDict[hmmSeqPosKey].intervalStart) + "_" + str(
            hmmDict[hmmSeqPosKey].intervalEnd)
        RunHMMDirectory(prot_seq_directory, hmmFileObj.hmmFile, cohort_name,
                        prot_family_name, "30_10", hmmInterval,
                        hmm_search_directory, CPU_THREADS)

    with open(allHMMResult, 'w') as outfile:
        for subdir, dirs, files in os.walk(hmm_search_directory):
            for file in files:
                filePath = os.path.join(subdir, file)
                if re.match(r".*txt$", file) and os.path.getsize(filePath) > 0:
                    with open(filePath) as infile:
                        for line in infile:
                            outfile.write(line)

    # BLAST Alignment
    if not os.path.isdir(blastn_search_directory):
        os.makedirs(blastn_search_directory, 0o777, True)
        RunBLASTNDirectoryPar(nucl_seq_directory, tp_genes_nucl,
                              blastn_search_directory, CPU_THREADS)

    with open(allBLASTResult, 'w') as outfile:
        outfile.write(
            "sseqid\tslen\tsstart\tsend\tqseqid\tqlen\tqstart\tqend\tpident\tevalue\tSample\tsampleType\n"
        )
        for subdir, dirs, files in os.walk(blastn_search_directory):
            for file in files:
                filePath = os.path.join(subdir, file)
                if re.match(r".*txt$", file) and os.path.getsize(filePath) > 0:
                    with open(filePath) as infile:
                        for line in infile:
                            sampleName = ntpath.basename(filePath).split(
                                ".txt")[0]
                            outfile.write(line.strip() + "\t" + sampleName +
                                          "\t" + cohort_name + "\n")

    # Eval spHMMs
    rpackages.importr('base')
    utils = rpackages.importr('utils')
    packageNames = ('tidyverse', 'ggsci', 'ggpubr', 'dplyr', 'ggplot2')
    packnames_to_install = [
        x for x in packageNames if not rpackages.isinstalled(x)
    ]
    if len(packnames_to_install) > 0:
        utils.install_packages(StrVector(packnames_to_install))
    rpackages.importr('tidyverse')
    rpackages.importr('ggsci')
    rpackages.importr('ggpubr')
    rpackages.importr('dplyr')
    rpackages.importr('ggplot2')

    hp_hmm_directory = os.path.join(build_op_dir, 'HiPer_spHMMs')
    os.makedirs(hp_hmm_directory, 0o777, True)
    r_script = os.path.join(sys.path[0], 'metabgc', 'src', 'EvaluateSpHMMs.R')

    with open(r_script, 'r') as f:
        rStr = f.read()
    myfunc = STAP(rStr, "EvaluateSpHMM")
    myfunc.EvaluateSpHMM(allHMMResult,
                         allBLASTResult, gene_pos_file, prot_family_name,
                         float(f1_thresh), hmm_directory, hp_hmm_directory)
    timeTaken = time.time() - startTime
    mins = int(timeTaken / 60)
    secs = int(timeTaken) % 60
    print("\nTotal time taken : " + str(mins) + " mins " + str(secs) +
          " seconds")
    return hp_hmm_directory