コード例 #1
0
ファイル: cummerbund.py プロジェクト: Kmurphy12/blacktie
def print_my_plots(r, rplots, out='', file_type='pdf'):
    """
    saves our plots to files named with the plotting method used.
    
    :param r:       pointer to the R instance
    :param rplots:  the ``Bunch`` object where we stored our plots
    :param out:  a base directory to add to our saved plots into
    :param file_type:  the type of output file to use, choices: ['pdf','jpeg','png','ps']
    """
    out = out.rstrip('/')
    
    mkdirp(out)
    
    for plot_id in rplots:
        file_path = "%s/%s.%s" % (out,plot_id,file_type)
        r.ggsave(filename=file_path,plot=rplots[plot_id])
コード例 #2
0
ファイル: calls.py プロジェクト: Kmurphy12/blacktie
 def get_cufflinks_gtfs(self):
     """
     Handles ``yaml_config.cuffmerge_options.positional_args.assembly_list: from_conditions``.
     """
     option = self.prog_yargs.positional_args.assembly_list
     if option == 'from_conditions':
         paths = []
         for condition in self._conditions:
             gtf_path = self.get_cuffGTF_path(condition)
             paths.append(gtf_path)
         mkdirp(self.out_dir)
         assembly_list_file = open("%s/assembly_list.txt" % (self.out_dir.rstrip('/')),'w')
         assembly_list_file.write("\n".join(paths))
         assembly_list_file.close()
         
         if self.mode == 'dry_run':
             shutil.rmtree(self.out_dir)
         else:
             pass
         
         return os.path.abspath(assembly_list_file.name)
     else:
         return option
コード例 #3
0
def main():
    """
    The main loop.  Lets ROCK!
    """

    desc = """This script reads options from a yaml formatted file and organizes the execution of tophat/cufflinks runs for multiple condition sets."""

    parser = argparse.ArgumentParser(description=desc)

    parser.add_argument(
        "--version", action="version", version="%(prog)s " + blacktie.__version__, help="""Print version number."""
    )
    parser.add_argument(
        "config_file", type=str, help="""Path to a yaml formatted config file containing setup options for the runs."""
    )
    parser.add_argument(
        "--prog",
        type=str,
        choices=["tophat", "cufflinks", "cuffmerge", "cuffdiff", "cummerbund", "all"],
        default="tophat",
        help="""Which program do you want to run? (default: %(default)s)""",
    )
    parser.add_argument(
        "--hide-logs",
        action="store_true",
        default=False,
        help="""Make your log directories hidden to keep a tidy 'looking' base directory. (default: %(default)s)""",
    )
    parser.add_argument(
        "--no-email",
        action="store_true",
        default=False,
        help="""Don't send email notifications. (default: %(default)s)""",
    )
    parser.add_argument(
        "--mode",
        type=str,
        choices=["analyze", "dry_run", "qsub_script"],
        default="analyze",
        help="""1) 'analyze': run the analysis pipeline. 2) 'dry_run': walk through all steps that
                        would be run and print out the command lines; however, do not send the commands to the
                        system to be run. 3) 'qsub_script': generate bash scripts suitable to be sent to a compute cluster's
                        SGE through the qsub command. (default: %(default)s)""",
    )

    if len(sys.argv) == 1:
        parser.print_help()
        exit(0)

    args = parser.parse_args()

    yargs = bunchify(yaml.load(open(args.config_file, "rU")))

    # set up run_id, log files, and email info
    if yargs.run_options.run_id:
        run_id = yargs.run_options.run_id
    else:

        run_id = get_time()

    base_dir = yargs.run_options.base_dir.rstrip("/")
    if args.hide_logs:
        run_logs = "%s/.%s.logs" % (base_dir, run_id)
    else:
        run_logs = "%s/%s.logs" % (base_dir, run_id)

    if not args.mode == "dry_run":
        mkdirp(run_logs)
    else:
        pass

    yaml_out = "%s/%s.yaml" % (run_logs, run_id)

    # copy yaml config file with run_id as name for records
    if not args.mode == "dry_run":
        shutil.copyfile(args.config_file, yaml_out)
    else:
        pass

    if not args.no_email:
        email_info = Bunch(
            {
                "email_from": yargs.run_options.email_info.sender,
                "email_to": yargs.run_options.email_info.to,
                "email_li": open(yargs.run_options.email_info.li, "rU").readline().rstrip("\n"),
            }
        )
    else:
        email_info = Bunch({"email_from": False, "email_to": False, "email_li": ""})

    yargs.prgbar_regex = re.compile(">.+Processing.+\[.+\].+%\w*$")
    yargs.groups = map_condition_groups(yargs)
    yargs.call_records = {}

    # loop through the queued conditions and send reports for tophat
    if args.prog in ["tophat", "all"]:
        print "[Note] Starting tophat step.\n"
        for condition in yargs.condition_queue:

            # Prep Tophat Call
            tophat_call = TophatCall(yargs, email_info, run_id, run_logs, conditions=condition, mode=args.mode)
            tophat_call.execute()

            # record the tophat_call object
            yargs.call_records[tophat_call.call_id] = tophat_call
    else:
        print "[Note] Skipping tophat step.\n"

    if args.prog in ["cufflinks", "all"]:
        # attempt to run more than one cufflinks call in parallel since cufflinks
        # seems to use only one processor no matter the value of -p you give it and
        # doesn't seem to consume massive amounts of memory
        print "[Note] Starting cufflinks step.\n"
        try:
            if args.mode == "dry_run":
                raise errors.BlacktieError("dry run")

            # TODO: on mac pprocess raised AttributeError "module" has no attrb "poll" or some crap
            try:
                queue = pprocess.Queue(limit=yargs.cufflinks_options.p)
            except AttributeError as exc:
                if "poll" in str(exc):
                    raise (errors.BlacktieError("no poll"))
                else:
                    raise

            def run_cufflinks_call(cufflinks_call):
                """
                function to start each parallel cufflinks_call inside the parallel job server.
                """
                cufflinks_call.execute()
                return cufflinks_call

            def change_processor_count(cufflinks_call):
                """
                Since we will run multiple instances of CufflinksCall at once, reduce
                the number of processors any one system call thinks it can use.
                """
                cufflinks_call.opt_dict["p"] = 2
                cufflinks_call.construct_options_list()
                cufflinks_call.options_list.extend([cufflinks_call.accepted_hits])
                cufflinks_call.arg_str = " ".join(cufflinks_call.options_list)
                return cufflinks_call

            execute = queue.manage(pprocess.MakeParallel(run_cufflinks_call))
            jobs = []
            for condition in yargs.condition_queue:
                cufflinks_call = CufflinksCall(
                    yargs, email_info, run_id, run_logs, conditions=condition, mode=args.mode
                )
                cufflinks_call = change_processor_count(cufflinks_call)
                jobs.append(cufflinks_call)
                execute(cufflinks_call)

            # record the cufflinks_call objects
            for call in queue:
                yargs.call_records[call.call_id] = call

        except (NameError, errors.BlacktieError) as exc:

            if ("'pprocess' is not defined" in str(exc)) or (str(exc) == "dry run") or (str(exc) == "no poll"):
                pass
            else:
                raise

            print "Running cufflinks in serial NOT parallel.\n"
            # loop through the queued conditions and send reports for cufflinks
            for condition in yargs.condition_queue:
                # Prep cufflinks_call
                cufflinks_call = CufflinksCall(
                    yargs, email_info, run_id, run_logs, conditions=condition, mode=args.mode
                )
                cufflinks_call.execute()

                # record the cufflinks_call object
                yargs.call_records[cufflinks_call.call_id] = cufflinks_call
    else:
        print "[Note] Skipping cufflinks step.\n"

    if args.prog in ["cuffmerge", "all"]:
        print "[Note] Starting cuffmerge step.\n"
        for exp_id in yargs.groups:

            # Prep cuffmerge call
            cuffmerge_call = CuffmergeCall(yargs, email_info, run_id, run_logs, conditions=exp_id, mode=args.mode)
            cuffmerge_call.execute()

            # record the cuffmerge_call object
            yargs.call_records[cuffmerge_call.call_id] = cuffmerge_call

    else:
        print "[Note] Skipping cuffmerge step.\n"

    if args.prog in ["cuffdiff", "all"]:
        print "[Note] Starting cuffdiff step.\n"
        for exp_id in yargs.groups:

            # Prep cuffmerge call
            cuffdiff_call = CuffdiffCall(yargs, email_info, run_id, run_logs, conditions=exp_id, mode=args.mode)
            cuffdiff_call.execute()

            # record the cuffdiff_call object
            yargs.call_records[cuffdiff_call.call_id] = cuffdiff_call

    else:
        print "[Note] Skipping cuffdiff step.\n"

    if args.prog in ["cummerbund", "all"]:

        # test to make sure R and cummeRbund libs exist
        from blacktie.scripts import cummerbund

        cummerbund.import_cummeRbund_library()

        print "[Note] Starting cummerbund step.\n"
        for exp_id in yargs.groups:

            # Prep cummerbund call
            cummerbund_call = CummerbundCall(yargs, email_info, run_id, run_logs, conditions=exp_id, mode=args.mode)
            cummerbund_call.execute()

            # record the cummerbund_call object
            yargs.call_records[cummerbund_call.call_id] = cummerbund_call

    else:
        print "[Note] Skipping cummerbund step.\n"