Python Settings.load Examples

Programming Language: Python

Namespace/Package Name: misopy.settings

Class/Type: Settings

Method/Function: load

Examples at hotexamples.com: 9

Python Settings.load - 9 examples found. These are the top rated real world Python examples of misopy.settings.Settings.load extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

load(5)

get(4)

get_cluster_command(2)

get_min_event_reads(2)

get_num_processors(1)

get_sampler_params(1)

get_strand_param(1)

Example #1

Show file

File: misoWrapper.py Project: JEstabrook/MISO-Utilities

def runMISOlocal(pickledDir, bamFile, readlen, overhanglen, outdir,\
    paired_end, settings_f):
    """ Function to run MISO on a single bam file locally, i.e. do not copy to a node.

    Args:
        pickledDir (str/path): Directory pointing towards pickled MISO annotations. This database can be generated with the MISO -index flag
        bamFile (str/path): Directory containing sorted indexed bam file. *Please Note* Bam files must not be trimmed. MISO is not capable of processing mixed read lengths.
        readlen (int): Length of reads for bamFile
        overhanglen (int): The required number of nucleotides to overlap a splice junction to be considered in subsequent
        outDir (str/path): Directory where MISO results will be stored
        paired_end (bool): Paired-End mode. Currently MISO cannot handle paired-end data, this flag defaults to <False>
        settings_f (str/path): This file contains a list of flags to provide the cluster to allow for ease of job submission

    Returns:
        Nothing. Generates a directory <outDir> where pickled MISO events and PSI values are stored.

    """
    if paired_end == False or paired_end == 'False':
        paired_end = None
    Settings.load(settings_f)

    run_events_analysis.compute_all_genes_psi(
            pickledDir, bamFile, int(readlen), outdir,
            overhang_len=int(overhanglen),
            paired_end=paired_end, settings_fname=settings_f)

Example #2

Show file

File: misoWrapper.py Project: adamstruck/dmseq

def runMISOlocal(pickledDir, bamFile, readlen, overhanglen, outdir,\
    paired_end, settings_f):

    if paired_end == False or paired_end == 'False':
        paired_end = None
    Settings.load(settings_f)

    #if not os.path.exists(outdir):
    #    print 'running', outdir
    run_events_analysis.compute_all_genes_psi(\
        pickledDir, bamFile, int(readlen), outdir,\
        overhang_len=int(overhanglen),\
        paired_end=paired_end, settings_fname=settings_f)

Example #3

Show file

def main():
    from optparse import OptionParser
    parser = OptionParser()

    ##
    ## Main options
    ##
    parser.add_option("--compute-gene-psi", dest="compute_gene_psi",
                      nargs=4, default=None,
                      help="Compute Psi using for a given multi-isoform gene. "
                      "Expects four arguments: the first is a gene ID or set "
                      "of comma-separated (no spaces) gene IDs, "
                      "the second is a GFF indexed file with the gene "
                      "information, the third is a sorted and "
                      "indexed BAM file with reads aligned to the gene, "
                      "and the fourth is an output directory.")
    parser.add_option("--paired-end", dest="paired_end",
                      nargs=2, default=None,
                      help="Run in paired-end mode.  Takes a mean and standard "
                      "deviation for the fragment length distribution (assumed "
                      "to have discretized normal form.)")
    parser.add_option("--compute-genes-from-file", dest="compute_genes_from_file",
                      nargs=3, default=None,
                      help="Runs on a set of genes from a file. Takes as input: "
                      "(1) a two-column tab-delimited file, where column 1 is the "
                      "event ID (ID field from GFF) and the second column is "
                      "the path to the indexed GFF file for that event. "
                      "MISO will run on all the events described in the file, "
                      "(2) a sorted, indexed BAM file to run on, and (3) a "
                      "directory to output results to.")

    ##
    ## Psi utilities
    ##
    parser.add_option("--compare-samples", dest="samples_to_compare",
                      nargs=3, default=None,
                      help="Compute comparison statistics between the two "
                      "given samples. Expects three directories: the first is "
                      "sample1's MISO output, the second is sample2's MISO "
                      "output, and the third is the directory where "
                      "results of the sample comparison will be outputted.")
    parser.add_option("--comparison-labels", dest="comparison_labels",
                      nargs=2, default=None,
                      help="Use these labels for the sample comparison "
                      "made by --compare-samples. "
                      "Takes two arguments: the label for sample 1 "
                      "and the label for sample 2, where sample 1 and "
                      "sample 2 correspond to the order of samples given "
                      "to --compare-samples.")
    parser.add_option("--summarize-samples", dest="summarize_samples",
                      nargs=2, default=None,
                      help="Compute summary statistics of the given set "
                      "of samples. Expects a directory with MISO output "
                      "and a directory to output summary file to.")
    parser.add_option("--summary-label", dest="summary_label",
                      nargs=1, default=None,
                      help="Label for MISO summary file. If not given, "
                      "uses basename of MISO output directory.")
    parser.add_option("--use-cluster", action="store_true",
                      dest="use_cluster", default=False)
    parser.add_option("--chunk-jobs", dest="chunk_jobs",
                      default=False, type="int",
                      help="Size (in number of events) of each job to "
                      "chunk events file into. Only applies when "
                      "running on cluster.")
    parser.add_option("--settings-filename", dest="settings_filename",
                      default=os.path.join(miso_settings_path,
                                           "settings",
                                           "miso_settings.txt"),
                      help="Filename specifying MISO settings.")
    parser.add_option("--read-len", dest="read_len", type="int",
                      default=None)
    parser.add_option("--overhang-len", dest="overhang_len", type="int",
                      default=None)
    parser.add_option("--event-type", dest="event_type", default=None,
                      help="Event type of two-isoform "
                      "events (e.g. 'SE', 'RI', 'A3SS', ...)")
    parser.add_option("--use-compressed", dest="use_compressed",
                      nargs=1, default=None,
                      help="Use compressed event IDs. Takes as input a "
                      "genes_to_filenames.shelve file produced by the "
                      "index_gff script.")
    ##
    ## Gene utilities
    ##
    parser.add_option("--view-gene", dest="view_gene",
                      nargs=1, default=None,
                      help="View the contents of a gene/event that has "
                      "been indexed. Takes as input an "
                      "indexed (.pickle) filename.")
    (options, args) = parser.parse_args()

    if options.compute_gene_psi is None:
        greeting()

    ##
    ## Load the settings file
    ##
    Settings.load(os.path.expanduser(options.settings_filename))

    use_compressed = None
    if options.use_compressed is not None:
        use_compressed = \
            os.path.abspath(os.path.expanduser(options.use_compressed))
        if not os.path.exists(use_compressed):
            print "Error: mapping filename from event IDs to compressed IDs %s " \
                  "is not found." %(use_compressed)
            sys.exit(1)
        else:
            print "Compression being used."

    if options.samples_to_compare is not None:
        sample1_dirname = os.path.abspath(options.samples_to_compare[0])
        sample2_dirname = os.path.abspath(options.samples_to_compare[1])
        output_dirname = os.path.abspath(options.samples_to_compare[2])
        if not os.path.isdir(output_dirname):
            print "Making comparisons directory: %s" %(output_dirname)
            misc_utils.make_dir(output_dirname)
        ht.output_samples_comparison(sample1_dirname,
                                     sample2_dirname,
                                     output_dirname,
                                     sample_labels=options.comparison_labels,
                                     use_compressed=use_compressed)
    ##
    ## Main interface based on SAM files
    ##
    if options.compute_genes_from_file != None:
        # Run on events given by file
        run_compute_genes_from_file(options)
    if options.compute_gene_psi != None:
        run_compute_gene_psi(options)

    ##
    ## Summarizing samples
    ##
    if options.summarize_samples:
        samples_dir = \
            os.path.abspath(os.path.expanduser(options.summarize_samples[0]))
        if options.summary_label != None:
            samples_label = options.summary_label
            print "Using summary label: %s" %(samples_label)
        else:
            samples_label = \
                os.path.basename(os.path.expanduser(samples_dir))
        assert(len(samples_label) >= 1)
        summary_output_dir = \
            os.path.abspath(os.path.join(os.path.expanduser(options.summarize_samples[1]),
                                         'summary'))
        if not os.path.isdir(summary_output_dir):
            os.makedirs(summary_output_dir)

        summary_filename = os.path.join(summary_output_dir,
                                        '%s.miso_summary' %(samples_label))
        summarize_sampler_results(samples_dir, summary_filename,
                                  use_compressed=use_compressed)

    if options.view_gene != None:
        indexed_gene_filename = \
            os.path.abspath(os.path.expanduser(options.view_gene))
        print "Viewing genes in %s" %(indexed_gene_filename)
        gff_genes = gff_utils.load_indexed_gff_file(indexed_gene_filename)

        if gff_genes == None:
            print "No genes."
            sys.exit(1)

        for gene_id, gene_info in gff_genes.iteritems():
            print "Gene %s" %(gene_id)
            gene_obj = gene_info['gene_object']
            print " - Gene object: ", gene_obj
            print "=="
            print "Isoforms: "
            for isoform in gene_obj.isoforms:
                print " - ", isoform
            print "=="
            print "mRNA IDs: "
            for mRNA_id in gene_info['hierarchy'][gene_id]['mRNAs']:
                print "%s" %(mRNA_id)
            print "=="
            print "Exons: "
            for exon in gene_obj.parts:
                print " - ", exon

Example #4

Show file

File: miso.py Project: dm-druid/rmats2sashimiplot-1

def main():
    from optparse import OptionParser
    parser = OptionParser()
    parser.add_option("--run", dest="compute_genes_psi",
                      nargs=2, default=None,
                      help="Compute Psi values for a given GFF annotation "
                      "of either whole mRNA isoforms or isoforms produced by "
                      "single alternative splicing events. Expects two "
                      "arguments: an indexed GFF directory with genes to "
                      "process, and a sorted, indexed BAM file (with "
                      "headers) to run on.")
    parser.add_option("--event-type", dest="event_type", nargs=1,
                      help="[OPTIONAL] Type of event (e.g. SE, RI, A3SS, ...)",
                      default=None)
    parser.add_option("--use-cluster", dest="use_cluster",
                      action="store_true", default=False,
                      help="Run events on cluster.")
    parser.add_option("--chunk-jobs", dest="chunk_jobs",
                      default=False, type="int",
                      help="Size (in number of events) of each job to chunk "
                      "events file into. Only applies when running on cluster.")
    parser.add_option("--no-filter-events", dest="no_filter_events",
                      action="store_true", default=False,
                      help="Do not filter events for computing Psi. "
                      "By default, MISO computes Psi only for events that "
                      "have a sufficient number of junction reads. "
                      "The default filter varies by event type.")
    parser.add_option("--settings-filename", dest="settings_filename",
                      default=os.path.join(miso_settings_path,
                                           "settings",
                                           "miso_settings.txt"),                    
                      help="Filename specifying MISO settings.")
    parser.add_option("--read-len", dest="read_len", default=None, type="int",
                      help="Length of sequenced reads.")
    parser.add_option("--paired-end", dest="paired_end", nargs=2, default=None,
                      help="Run in paired-end mode. Takes mean and "
                      "standard deviation of insert length distribution.")
    parser.add_option("--overhang-len", dest="overhang_len",
                      default=None, type="int",
                      help="Length of overhang constraints "
                      "imposed on junctions.")
    parser.add_option("--output-dir", dest="output_dir", default=None,
                      help="Directory for MISO output.")
    parser.add_option("--job-name", dest="job_name", nargs=1,
                      help="Name for jobs submitted to queue for SGE jobs. " \
                      "Default is misojob", default="misojob")
    parser.add_option("--SGEarray", dest="SGEarray",
                      action="store_true", default=False,
                      help="Use MISO on cluster with Sun Grid Engine. "
                      "To be used in conjunction with --use-cluster option.")
    parser.add_option("--prefilter", dest="prefilter", default=False,
                      action="store_true",
                      help="Prefilter events based on coverage. If given as " 
                      "argument, run will begin by mapping BAM reads to event "
                      "regions (using bedtools), and omit events that do not "
                      "meet coverage criteria from the run. By default, turned "
                      "off. Note that events that do not meet the coverage criteria "
                      "will not be processed regardless, but --prefilter simply "
                      "does this filtering step at the start of the run, potentially "
                      "saving computation time so that low coverage events will not "
                      "be processed or distributed to jobs if MISO is run on a "
                      "cluster. This options requires bedtools to be installed and "
                      "available on path.")
    parser.add_option("-p", dest="num_proc", default=None, nargs=1,
                      help="Number of processors to use. Only applies when running " \
                      "MISO on a single machine with multiple cores; does not apply " \
                      "to runs submitted to cluster with --use-cluster.")
    parser.add_option("--version", dest="version", default=False,
                      action="store_true",
                      help="Print MISO version.")
    parser.add_option("--no-wait", dest="no_wait", default=False,
                      action="store_true",
                      help="If passed in, do not wait on cluster jobs after " \
                      "they are submitted. By default, wait.")
    ##
    ## Gene utilities
    ##
    parser.add_option("--view-gene", dest="view_gene",
                      nargs=1, default=None,
                      help="View the contents of a gene/event that has "
                      "been indexed. Takes as input an "
                      "indexed (.pickle) filename.")
    (options, args) = parser.parse_args()

    greeting()

    if options.version:
        print "MISO version %s\n" %(misopy.__version__)

    ##
    ## Load the settings file 
    ##
    if not os.path.isdir(miso_settings_path):
        print "Error: %s is not a directory containing a default MISO " \
              "settings filename. Please specify a settings filename " \
              "using --settings-filename."
        return
    
    settings_filename = \
        os.path.abspath(os.path.expanduser(options.settings_filename))
    Settings.load(settings_filename)
    
    if (not options.use_cluster) and options.chunk_jobs:
        print "Error: Chunking jobs only applies when using " \
              "the --use-cluster option to run MISO on cluster."
        sys.exit(1)
    if (not options.use_cluster) and options.SGEarray:
        print "Error: SGEarray implies that you are using an SGE cluster," \
              "please run again with --use-cluster option enabled."
        sys.exit(1)

    ##
    ## Quantitation using BAM for all genes
    ##
    if options.compute_genes_psi != None:
        # GFF filename with genes to process
        gff_filename = \
            os.path.abspath(os.path.expanduser(options.compute_genes_psi[0]))

        # BAM filename with reads
        bam_filename = \
            os.path.abspath(os.path.expanduser(options.compute_genes_psi[1]))

        if options.output_dir == None:
            print "Error: need --output-dir to compute Psi values."
            sys.exit(1)

        # Output directory to use
        output_dir = os.path.abspath(os.path.expanduser(options.output_dir))

        ##
        ## Load the main logging object
        ##
        logs_output_dir = os.path.join(output_dir, "logs")
        main_logger = get_main_logger(logs_output_dir)

        if options.read_len == None:
            main_logger.error("need --read-len to compute Psi values.")
            sys.exit(1)

        overhang_len = 1

        if options.paired_end != None and options.overhang_len != None:
            main_logger.warning("cannot use --overhang-len in paired-end mode.\n" \
                                "Using overhang = 1")
        if options.overhang_len != None:
            overhang_len = options.overhang_len

        # Whether to wait on cluster jobs or not
        wait_on_jobs = not options.no_wait
        compute_all_genes_psi(gff_filename, bam_filename,
                              options.read_len, output_dir,
                              main_logger,
                              overhang_len=overhang_len,
                              use_cluster=options.use_cluster,
                              SGEarray=options.SGEarray,
                              job_name=options.job_name,
                              chunk_jobs=options.chunk_jobs,
                              paired_end=options.paired_end,
                              settings_fname=settings_filename,
                              prefilter=options.prefilter,
                              num_proc=options.num_proc,
                              wait_on_jobs=wait_on_jobs)

    if options.view_gene != None:
        indexed_gene_filename = \
            os.path.abspath(os.path.expanduser(options.view_gene))
        print "Viewing genes in %s" %(indexed_gene_filename)
        gff_genes = gff_utils.load_indexed_gff_file(indexed_gene_filename)

        if gff_genes == None:
            print "No genes."
            sys.exit(1)

        for gene_id, gene_info in gff_genes.iteritems():
            print "Gene %s" %(gene_id)
            gene_obj = gene_info['gene_object']
            print " - Gene object: ", gene_obj
            print "=="
            print "Isoforms: "
            for isoform in gene_obj.isoforms:
                print " - ", isoform
            print "=="
            print "mRNA IDs: "
            for mRNA_id in gene_info['hierarchy'][gene_id]['mRNAs']:
                print "%s" %(mRNA_id)
            print "=="    
            print "Exons: "
            for exon in gene_obj.parts:
                print " - ", exon

Example #5

Show file

File: miso.py Project: wuxue/altanalyze

def main():
    from optparse import OptionParser
    parser = OptionParser()
    parser.add_option("--run", dest="compute_genes_psi",
                      nargs=2, default=None,
                      help="Compute Psi values for a given GFF annotation "
                      "of either whole mRNA isoforms or isoforms produced by "
                      "single alternative splicing events. Expects two "
                      "arguments: an indexed GFF directory with genes to "
                      "process, and a sorted, indexed BAM file (with "
                      "headers) to run on.")
    parser.add_option("--event-type", dest="event_type", nargs=1,
                      help="[OPTIONAL] Type of event (e.g. SE, RI, A3SS, ...)",
                      default=None)
    parser.add_option("--use-cluster", dest="use_cluster",
                      action="store_true", default=False,
                      help="Run events on cluster.")
    parser.add_option("--chunk-jobs", dest="chunk_jobs",
                      default=False, type="int",
                      help="Size (in number of events) of each job to chunk "
                      "events file into. Only applies when running on cluster.")
    parser.add_option("--no-filter-events", dest="no_filter_events",
                      action="store_true", default=False,
                      help="Do not filter events for computing Psi. "
                      "By default, MISO computes Psi only for events that "
                      "have a sufficient number of junction reads. "
                      "The default filter varies by event type.")
    parser.add_option("--settings-filename", dest="settings_filename",
                      default=os.path.join(miso_settings_path,
                                           "settings",
                                           "miso_settings.txt"),                    
                      help="Filename specifying MISO settings.")
    parser.add_option("--read-len", dest="read_len", default=None, type="int",
                      help="Length of sequenced reads.")
    parser.add_option("--paired-end", dest="paired_end", nargs=2, default=None,
                      help="Run in paired-end mode. Takes mean and "
                      "standard deviation of insert length distribution.")
    parser.add_option("--overhang-len", dest="overhang_len",
                      default=None, type="int",
                      help="Length of overhang constraints "
                      "imposed on junctions.")
    parser.add_option("--output-dir", dest="output_dir", default=None,
                      help="Directory for MISO output.")
    parser.add_option("--job-name", dest="job_name", nargs=1,
                      help="Name for jobs submitted to queue for SGE jobs. " \
                      "Default is misojob", default="misojob")
    parser.add_option("--SGEarray", dest="SGEarray",
                      action="store_true", default=False,
                      help="Use MISO on cluster with Sun Grid Engine. "
                      "To be used in conjunction with --use-cluster option.")
    parser.add_option("--prefilter", dest="prefilter", default=False,
                      action="store_true",
                      help="Prefilter events based on coverage. If given as " 
                      "argument, run will begin by mapping BAM reads to event "
                      "regions (using bedtools), and omit events that do not "
                      "meet coverage criteria from the run. By default, turned "
                      "off. Note that events that do not meet the coverage criteria "
                      "will not be processed regardless, but --prefilter simply "
                      "does this filtering step at the start of the run, potentially "
                      "saving computation time so that low coverage events will not "
                      "be processed or distributed to jobs if MISO is run on a "
                      "cluster. This options requires bedtools to be installed and "
                      "available on path.")
    parser.add_option("-p", dest="num_proc", default=None, nargs=1,
                      help="Number of processors to use. Only applies when running " \
                      "MISO on a single machine with multiple cores; does not apply " \
                      "to runs submitted to cluster with --use-cluster.")
    parser.add_option("--version", dest="version", default=False,
                      action="store_true",
                      help="Print MISO version.")
    parser.add_option("--no-wait", dest="no_wait", default=False,
                      action="store_true",
                      help="If passed in, do not wait on cluster jobs after " \
                      "they are submitted. By default, wait.")
    ##
    ## Gene utilities
    ##
    parser.add_option("--view-gene", dest="view_gene",
                      nargs=1, default=None,
                      help="View the contents of a gene/event that has "
                      "been indexed. Takes as input an "
                      "indexed (.pickle) filename.")
    (options, args) = parser.parse_args()

    greeting()

    if options.version:
        print "MISO version %s\n" %(misopy.__version__)

    ##
    ## Load the settings file 
    ##
    if not os.path.isdir(miso_settings_path):
        print "Error: %s is not a directory containing a default MISO " \
              "settings filename. Please specify a settings filename " \
              "using --settings-filename."
        return
    
    settings_filename = \
        os.path.abspath(os.path.expanduser(options.settings_filename))
    Settings.load(settings_filename)
    
    if (not options.use_cluster) and options.chunk_jobs:
        print "Error: Chunking jobs only applies when using " \
              "the --use-cluster option to run MISO on cluster."
        sys.exit(1)
    if (not options.use_cluster) and options.SGEarray:
        print "Error: SGEarray implies that you are using an SGE cluster," \
              "please run again with --use-cluster option enabled."
        sys.exit(1)

    ##
    ## Quantitation using BAM for all genes
    ##
    if options.compute_genes_psi != None:
        # GFF filename with genes to process
        gff_filename = \
            os.path.abspath(os.path.expanduser(options.compute_genes_psi[0]))

        # BAM filename with reads
        bam_filename = \
            os.path.abspath(os.path.expanduser(options.compute_genes_psi[1]))

        if options.output_dir == None:
            print "Error: need --output-dir to compute Psi values."
            sys.exit(1)

        # Output directory to use
        output_dir = os.path.abspath(os.path.expanduser(options.output_dir))

        if options.read_len == None:
            print "Error: need --read-len to compute Psi values."
            sys.exit(1)

        overhang_len = 1

        if options.paired_end != None and options.overhang_len != None:
            print "WARNING: cannot use --overhang-len in paired-end mode."
            print "Using overhang = 1"

        if options.overhang_len != None:
            overhang_len = options.overhang_len

        # Whether to wait on cluster jobs or not
        wait_on_jobs = not options.no_wait
        compute_all_genes_psi(gff_filename, bam_filename,
                              options.read_len, output_dir,
                              overhang_len=overhang_len,
                              use_cluster=options.use_cluster,
                              SGEarray=options.SGEarray,
                              job_name=options.job_name,
                              chunk_jobs=options.chunk_jobs,
                              paired_end=options.paired_end,
                              settings_fname=settings_filename,
                              prefilter=options.prefilter,
                              num_proc=options.num_proc,
                              wait_on_jobs=wait_on_jobs)

    if options.view_gene != None:
        indexed_gene_filename = \
            os.path.abspath(os.path.expanduser(options.view_gene))
        print "Viewing genes in %s" %(indexed_gene_filename)
        gff_genes = gff_utils.load_indexed_gff_file(indexed_gene_filename)

        if gff_genes == None:
            print "No genes."
            sys.exit(1)

        for gene_id, gene_info in gff_genes.iteritems():
            print "Gene %s" %(gene_id)
            gene_obj = gene_info['gene_object']
            print " - Gene object: ", gene_obj
            print "=="
            print "Isoforms: "
            for isoform in gene_obj.isoforms:
                print " - ", isoform
            print "=="
            print "mRNA IDs: "
            for mRNA_id in gene_info['hierarchy'][gene_id]['mRNAs']:
                print "%s" %(mRNA_id)
            print "=="    
            print "Exons: "
            for exon in gene_obj.parts:
                print " - ", exon

Example #6

Show file

File: misoWrapper.py Project: JEstabrook/MISO-Utilities

def runMISOsingle(pickledDir, bamFile, readlen, overhanglen, outdir,\
    paired_end, settings_f, scratchDir):
    """ Function to run MISO on a single bam file.

    Args:
        pickledDir (str/path): Directory pointing towards pickled MISO annotations. This database can be generated with the MISO -index flag
        bamFile (str/path): Directory containing sorted indexed bam file. *Please Note* Bam files must not be trimmed. MISO is not capable of processing mixed read lengths.
        readlen (int): Length of reads for bamFile
        overhanglen (int): The required number of nucleotides to overlap a splice junction to be considered in subsequent
        outDir (str/path): Directory where MISO results will be stored
        paired_end (bool): Paired-End mode. Currently MISO cannot handle paired-end data, this flag defaults to <False>
        settings_f (str/path): This file contains a list of flags to provide the cluster to allow for ease of job submission
        scratchDir (str/path): Directory where MISO output will be stored.

    Returns:
        Nothing. Generates a directory <outDir> where pickled MISO events and PSI values are stored.

    """
    if paired_end == 'False':
        paired_end = None

    t = str(time.time()) + str(random.random())

    print os.path.basename(pickledDir)
    if not os.path.exists(scratchDir):
        cmd = 'mkdir ' + scratchDir
        process = subprocess.Popen(cmd, shell=True)
        process.wait()
 
    # Copy pickled dir.
    pickled = os.path.join(scratchDir, os.path.basename(pickledDir) + \
        "." + t)
    cmd = 'mkdir ' + pickled
    process = subprocess.Popen(cmd, shell=True)
    process.wait()
    cmd = 'cp -r ' + pickledDir + '/* ' + pickled
    process = subprocess.Popen(cmd, shell=True)
    process.wait()

    # Copy bam file. 
    cmd = 'cp -fL ' + bamFile + ' ' + scratchDir
    process = subprocess.Popen(cmd, shell=True)
    process.wait()
    cmd = 'cp -fL ' + bamFile + '.bai ' + scratchDir
    process = subprocess.Popen(cmd, shell=True)
    process.wait()
    bam = os.path.join(scratchDir, os.path.basename(bamFile))

    # Give output directory in scratch a timestamp
    out = os.path.join(scratchDir, os.path.basename(outdir + "." + t))

    # LOAD SETTINGS FOR MISO
    Settings.load(settings_f)
 
    run_events_analysis.compute_all_genes_psi(\
        pickled, bam, int(readlen), out, overhang_len=int(overhanglen),\
        paired_end=paired_end, settings_fname=settings_f, prefilter=False)

    # Summarize sample
    #summary_fname = os.path.join(out, os.path.basename(outdir) + '.miso_summary') 
    #samples_utils.summarize_sampler_results(out, summary_fname)

    if not os.path.exists(outdir):
        cmd = 'mkdir -p ' + outdir
        process = subprocess.Popen(cmd, shell=True)
        process.wait()
        
    # Copy output back.
    cmd = 'cp -r ' + out + '/* ' + outdir
    process = subprocess.Popen(cmd, shell=True)
    process.wait()
 
    # Remove bam, output, and pickled dir. 
    cmd = 'rm ' + bam
    process = subprocess.Popen(cmd, shell=True)
    process.wait()
    cmd = 'rm ' + bam + '.bai'
    process = subprocess.Popen(cmd, shell=True)
    process.wait()
    cmd = 'rm -fr ' + out
    process = subprocess.Popen(cmd, shell=True)
    process.wait()
    cmd = 'rm -fr ' + pickled
    process = subprocess.Popen(cmd, shell=True)
    process.wait()

Example #7

Show file

File: run_miso.py Project: wuxue/altanalyze

def main():
    from optparse import OptionParser
    parser = OptionParser()

    ##
    ## Main options
    ##
    parser.add_option("--compute-gene-psi", dest="compute_gene_psi",
                      nargs=4, default=None,
                      help="Compute Psi using for a given multi-isoform gene. "
                      "Expects four arguments: the first is a gene ID or set "
                      "of comma-separated (no spaces) gene IDs, "
                      "the second is a GFF indexed file with the gene "
                      "information, the third is a sorted and "
                      "indexed BAM file with reads aligned to the gene, "
                      "and the fourth is an output directory.")
    parser.add_option("--paired-end", dest="paired_end",
                      nargs=2, default=None,
                      help="Run in paired-end mode.  Takes a mean and standard "
                      "deviation for the fragment length distribution (assumed "
                      "to have discretized normal form.)")
    parser.add_option("--compute-genes-from-file", dest="compute_genes_from_file",
                      nargs=3, default=None,
                      help="Runs on a set of genes from a file. Takes as input: "
                      "(1) a two-column tab-delimited file, where column 1 is the "
                      "event ID (ID field from GFF) and the second column is "
                      "the path to the indexed GFF file for that event. "
                      "MISO will run on all the events described in the file, "
                      "(2) a sorted, indexed BAM file to run on, and (3) a "
                      "directory to output results to.")
    
    ##
    ## Psi utilities
    ##
    parser.add_option("--compare-samples", dest="samples_to_compare",
                      nargs=3, default=None,
		      help="Compute comparison statistics between the two "
                      "given samples. Expects three directories: the first is "
                      "sample1's MISO output, the second is sample2's MISO "
                      "output, and the third is the directory where "
		      "results of the sample comparison will be outputted.")
    parser.add_option("--comparison-labels", dest="comparison_labels",
                      nargs=2, default=None,
                      help="Use these labels for the sample comparison "
                      "made by --compare-samples. "
                      "Takes two arguments: the label for sample 1 "
                      "and the label for sample 2, where sample 1 and "
                      "sample 2 correspond to the order of samples given "
                      "to --compare-samples.")
    parser.add_option("--summarize-samples", dest="summarize_samples",
                      nargs=2, default=None,
		      help="Compute summary statistics of the given set "
                      "of samples. Expects a directory with MISO output "
                      "and a directory to output summary file to.")
    parser.add_option("--summary-label", dest="summary_label",
                      nargs=1, default=None,
                      help="Label for MISO summary file. If not given, "
                      "uses basename of MISO output directory.")
    parser.add_option("--use-cluster", action="store_true",
                      dest="use_cluster", default=False)
    parser.add_option("--chunk-jobs", dest="chunk_jobs",
                      default=False, type="int",
		      help="Size (in number of events) of each job to "
                      "chunk events file into. Only applies when "
                      "running on cluster.")
    parser.add_option("--settings-filename", dest="settings_filename",
                      default=os.path.join(miso_settings_path,
                                           "settings",
                                           "miso_settings.txt"),
                      help="Filename specifying MISO settings.")
    parser.add_option("--read-len", dest="read_len", type="int",
                      default=None)
    parser.add_option("--overhang-len", dest="overhang_len", type="int",
                      default=None)
    parser.add_option("--event-type", dest="event_type", default=None,
		      help="Event type of two-isoform "
                      "events (e.g. 'SE', 'RI', 'A3SS', ...)")    
    parser.add_option("--use-compressed", dest="use_compressed",
                      nargs=1, default=None,
                      help="Use compressed event IDs. Takes as input a "
                      "genes_to_filenames.shelve file produced by the "
                      "index_gff script.")
    ##
    ## Gene utilities
    ##
    parser.add_option("--view-gene", dest="view_gene",
                      nargs=1, default=None,
                      help="View the contents of a gene/event that has "
                      "been indexed. Takes as input an "
                      "indexed (.pickle) filename.")
    (options, args) = parser.parse_args()

    if options.compute_gene_psi is None:
        greeting()

    ##
    ## Load the settings file 
    ##
    Settings.load(os.path.expanduser(options.settings_filename))

    use_compressed = None
    if options.use_compressed is not None:
        use_compressed = \
            os.path.abspath(os.path.expanduser(options.use_compressed))
        if not os.path.exists(use_compressed):
            print "Error: mapping filename from event IDs to compressed IDs %s " \
                  "is not found." %(use_compressed)
            sys.exit(1)
        else:
            print "Compression being used."
            
    if options.samples_to_compare is not None:
        sample1_dirname = os.path.abspath(options.samples_to_compare[0])
	sample2_dirname = os.path.abspath(options.samples_to_compare[1])
	output_dirname = os.path.abspath(options.samples_to_compare[2])
	if not os.path.isdir(output_dirname):
            print "Making comparisons directory: %s" %(output_dirname)
            misc_utils.make_dir(output_dirname)
	ht.output_samples_comparison(sample1_dirname,
                                     sample2_dirname,
                                     output_dirname,
                                     sample_labels=options.comparison_labels,
                                     use_compressed=use_compressed)
    ##
    ## Main interface based on SAM files
    ##
    if options.compute_genes_from_file != None:
        # Run on events given by file
        run_compute_genes_from_file(options)
    if options.compute_gene_psi != None:
        run_compute_gene_psi(options)
        
    ##
    ## Summarizing samples
    ##
    if options.summarize_samples:
	samples_dir = \
            os.path.abspath(os.path.expanduser(options.summarize_samples[0]))
        if options.summary_label != None:
            samples_label = options.summary_label
            print "Using summary label: %s" %(samples_label)
        else:
            samples_label = \
                os.path.basename(os.path.expanduser(samples_dir))
	assert(len(samples_label) >= 1)
	summary_output_dir = \
            os.path.abspath(os.path.join(os.path.expanduser(options.summarize_samples[1]),
                                         'summary'))
	if not os.path.isdir(summary_output_dir):
	    os.makedirs(summary_output_dir)
	    
	summary_filename = os.path.join(summary_output_dir,
					'%s.miso_summary' %(samples_label))
	summarize_sampler_results(samples_dir, summary_filename,
                                  use_compressed=use_compressed)

    if options.view_gene != None:
        indexed_gene_filename = \
            os.path.abspath(os.path.expanduser(options.view_gene))
        print "Viewing genes in %s" %(indexed_gene_filename)
        gff_genes = gff_utils.load_indexed_gff_file(indexed_gene_filename)

        if gff_genes == None:
            print "No genes."
            sys.exit(1)

        for gene_id, gene_info in gff_genes.iteritems():
            print "Gene %s" %(gene_id)
            gene_obj = gene_info['gene_object']
            print " - Gene object: ", gene_obj
            print "=="
            print "Isoforms: "
            for isoform in gene_obj.isoforms:
                print " - ", isoform
            print "=="
            print "mRNA IDs: "
            for mRNA_id in gene_info['hierarchy'][gene_id]['mRNAs']:
                print "%s" %(mRNA_id)
            print "=="    
            print "Exons: "
            for exon in gene_obj.parts:
                print " - ", exon

Example #8

Show file

File: misoWrapper.py Project: adamstruck/dmseq

def runMISOsingle(pickledDir, bamFile, readlen, overhanglen, outdir,\
    paired_end, settings_f, scratchDir):

    if paired_end == 'False':
        paired_end = None

    t = str(time.time()) + str(random.random())

    print os.path.basename(pickledDir)
    if not os.path.exists(scratchDir):
        cmd = 'mkdir ' + scratchDir
        process = subprocess.Popen(cmd, shell=True)
        process.wait()
 
    # Copy pickled dir.
    pickled = os.path.join(scratchDir, os.path.basename(pickledDir) + \
        "." + t)
    cmd = 'mkdir ' + pickled
    process = subprocess.Popen(cmd, shell=True)
    process.wait()
    cmd = 'cp -r ' + pickledDir + '/* ' + pickled
    process = subprocess.Popen(cmd, shell=True)
    process.wait()

    # Copy bam file. 
    cmd = 'cp -fL ' + bamFile + ' ' + scratchDir
    process = subprocess.Popen(cmd, shell=True)
    process.wait()
    cmd = 'cp -fL ' + bamFile + '.bai ' + scratchDir
    process = subprocess.Popen(cmd, shell=True)
    process.wait()
    bam = os.path.join(scratchDir, os.path.basename(bamFile))

    # Give output directory in scratch a timestamp
    out = os.path.join(scratchDir, os.path.basename(outdir + "." + t))

    # LOAD SETTINGS FOR MISO
    Settings.load(settings_f)
 
    run_events_analysis.compute_all_genes_psi(\
        pickled, bam, int(readlen), out, overhang_len=int(overhanglen),\
        paired_end=paired_end, settings_fname=settings_f, prefilter=True)

    # Summarize sample
    #summary_fname = os.path.join(out, os.path.basename(outdir) + '.miso_summary') 
    #samples_utils.summarize_sampler_results(out, summary_fname)

    if not os.path.exists(outdir):
        cmd = 'mkdir -p ' + outdir
        process = subprocess.Popen(cmd, shell=True)
        process.wait()
        
    # Copy output back.
    cmd = 'cp -r ' + out + '/* ' + outdir
    process = subprocess.Popen(cmd, shell=True)
    process.wait()
 
    # Remove bam, output, and pickled dir. 
    cmd = 'rm ' + bam
    process = subprocess.Popen(cmd, shell=True)
    process.wait()
    cmd = 'rm ' + bam + '.bai'
    process = subprocess.Popen(cmd, shell=True)
    process.wait()
    cmd = 'rm -fr ' + out
    process = subprocess.Popen(cmd, shell=True)
    process.wait()
    cmd = 'rm -fr ' + pickled
    process = subprocess.Popen(cmd, shell=True)
    process.wait()

Example #9

Show file

    def run(self, delay_constant=0.9):
        """
        Run batches either locally on multi-cores
        or using cluster.
        """
        batch_filenames = self.output_batch_files()
        # All MISO commands, each correspond to a batch,
        # and the number of jobs in each batch
        all_miso_cmds = []
        num_batches = len(batch_filenames)
        ##
        ## Prepare all the files necessary to run each batch
        ##
        print "Preparing to run %d batches of jobs..." % (num_batches)
        miso_run = os.path.join(miso_path, "run_miso.py")
        for batch_num, batch in enumerate(batch_filenames):
            batch_filename, batch_size = batch
            miso_cmd = \
              "python %s --compute-genes-from-file \"%s\" %s %s --read-len %d " \
                    %(miso_run,
                      batch_filename,
                      self.bam_filename,
                      self.output_dir,
                      self.read_len)
            # Add paired-end parameters and read len/overhang len
            if self.paired_end != None:
                # Run in paired-end mode
                frag_mean = float(self.paired_end[0])
                frag_sd = float(self.paired_end[1])
                miso_cmd += " --paired-end %.1f %.1f" % (frag_mean, frag_sd)
            else:
                # Overhang len only used in single-end mode
                miso_cmd += " --overhang-len %d" % (self.overhang_len)
            # Add settings filename if given
            if self.settings_fname != None:
                miso_cmd += " --settings-filename %s" \
                    %(self.settings_fname)
            all_miso_cmds.append((miso_cmd, batch_size))
        ##
        ## Run all MISO commands for the batches
        ## either locally using multi-cores or on cluster
        ##
        # First handle special case of SGE cluster submission
        if self.use_cluster and self.SGEarray:
            print "Using SGEarray..."
            # Call SGE
            batch_argfile = os.path.join(self.cluster_scripts_dir,
                                         "run_args.txt")
            cluster_utils.run_SGEarray_cluster(all_miso_cmds,
                                               batch_argfile,
                                               self.output_dir,
                                               settings=self.settings_fname,
                                               job_name=self.sge_job_name,
                                               chunk=self.chunk_jobs)
            # End SGE case
            return

        # All cluster jobs
        cluster_jobs = []
        for batch_num, cmd_info in enumerate(all_miso_cmds):
            miso_cmd, batch_size = cmd_info
            print "Running batch of %d genes.." % (batch_size)
            print "  - Executing: %s" % (miso_cmd)
            # Make a log file for the batch, where all the output
            # will be redirected
            time_str = time.strftime("%m-%d-%y_%H:%M:%S")
            batch_logfile = os.path.join(
                self.batch_logs_dir, "batch-%d-%s.log" % (batch_num, time_str))
            cmd_to_run = "%s >> \"%s\";" % (miso_cmd, batch_logfile)
            if not self.use_cluster:
                # Run locally
                p = subprocess.Popen(cmd_to_run, shell=True)
                thread_id = "batch-%d" % (batch_num)
                print "  - Submitted thread %s" % (thread_id)
                self.threads[thread_id] = p
            else:
                # Setup cluster engine
                Settings.load(self.settings_fname)
                clustercmd = Settings.get_cluster_command()

                self.cluster_engine = getClusterEngine(clustercmd,
                                                       self.settings_fname)

                # Run on cluster
                if batch_size >= self.long_thresh:
                    queue_type = "long"
                else:
                    queue_type = "short"
                # Run on cluster
                job_name = "gene_psi_batch_%d" % (batch_num)
                print "Submitting to cluster: %s" % (cmd_to_run)
                job_id = \
                    self.cluster_engine.run_on_cluster(cmd_to_run,
                                                 job_name,
                                                 self.output_dir,
                                                 queue_type=queue_type)
                if job_id is not None:
                    cluster_jobs.append(job_id)
                time.sleep(delay_constant)
            # Extra delay constant
            time.sleep(delay_constant)
        # If ran jobs on cluster, wait for them if there are any
        # to wait on.
        if self.wait_on_jobs:
            if self.use_cluster and (len(cluster_jobs) == 0):
                # If we're asked to use the cluster but the list
                # of cluster jobs is empty, it means we could not
                # find the IDs of the job from the submission
                # system. Report this to the user.
                self.main_logger.warning("Asked to wait on cluster jobs but cannot " \
                                         "parse their job IDs from the cluster submission " \
                                         "system.")
            # Try to wait on jobs no matter what; though if 'cluster_jobs'
            # is empty here, it will not wait
            self.cluster_engine.wait_on_jobs(cluster_jobs, self.cluster_cmd)
        else:
            if self.use_cluster:
                # If we're running in cluster mode and asked not
                # to wait for jobs, let user know
                self.main_logger.info("Not waiting on cluster jobs.")
        # If ran jobs locally, wait on them to finish
        # (this will do nothing if we submitted jobs to
        # cluster)
        self.wait_on_threads()