def run_analysis(work_dir, post_process, fc_dir, run_info):
    """Changes into the supplied work_dir directory and submits
        the job using the supplied arguments and with slurm parameters
        obtained from the post_process.yaml configuration
    """

    # Move to the working directory
    start_dir = os.getcwd()
    os.chdir(work_dir)

    config = load_config(post_process)

    if str(config["algorithm"]["num_cores"]) == "messaging":
        analysis_script = DISTRIBUTED_ANALYSIS_SCRIPT
    else:
        analysis_script = PARALLELL_ANALYSIS_SCRIPT

    # Launches the pipeline using PM module
    project_to_run, sample_to_run, flowcell_to_run = fc_dir.split('/')[-3:]
    cmd = ["pm",
           "production",
           "run",
           project_to_run,
           "--sample",
           sample_to_run,
           "--flowcell",
           flowcell_to_run,
           "--drmaa",
           "--force"]
    subprocess.check_call(cmd)

    # Change back to the starting directory
    os.chdir(start_dir)
def main(project_id, sample_names, single_end, config_file, Map_Stat,
         Read_Dist, FPKM, rRNA_table, GBC, stranded, strandness_table,
         complexity):
    if not sample_names:
        sample_names = commands.getoutput(
            "ls -d tophat_out_*|sed 's/tophat_out_//g'").split('\n')
    else:
        sample_names = sample_names.split(',')
    TEMPLATE = make_template(Map_Stat, FPKM, GBC, Read_Dist, rRNA_table,
                             strandness_table, complexity)
    if config_file:
        config = load_config(config_file)
    else:
        config = {}

    projectfile = "%s.mako" % (project_id)
    fp = open(projectfile, "w")
    fp.write(TEMPLATE)
    fp.close()
    tmpl = Template(filename=projectfile)

    proj_conf = {'id': project_id, 'config': config, 'samples': sample_names}
    d = generate_report(proj_conf, single_end, stranded)
    rstfile = "%s_analysis_report.rst" % (project_id)
    fp = open(rstfile, "w")
    fp.write(tmpl.render(**d))
    fp.close()
    os.system('rst2pdf ' + rstfile)
    print """
Beispiel #3
0
def main(config_file, fc_dir, analysis_dir, run_info_yaml=None):
    config = load_config(config_file)
    galaxy_api = (GalaxyApiAccess(config['galaxy_url'],
                                  config['galaxy_api_key'])
                  if config.has_key("galaxy_api_key") else None)
    fc_name, fc_date, run_info = get_run_info(fc_dir, config, run_info_yaml)

    base_folder_name = "%s_%s" % (fc_date, fc_name)
    run_details = lims_run_details(run_info, base_folder_name)
    for (library_name, access_role, dbkey, lane, bc_id, name, desc, local_name,
         fname_out) in run_details:
        library_id = (get_galaxy_library(library_name, galaxy_api)
                      if library_name else None)
        upload_files = list(
            select_upload_files(local_name, bc_id, fc_dir, analysis_dir,
                                config, fname_out))
        if len(upload_files) > 0:
            print lane, bc_id, name, desc, library_name
            print "Creating storage directory"
            if library_id:
                folder, cur_galaxy_files = get_galaxy_folder(
                    library_id, base_folder_name, name, desc, galaxy_api)
            else:
                cur_galaxy_files = []
            store_dir = move_to_storage(lane, bc_id, base_folder_name,
                                        upload_files, cur_galaxy_files, config,
                                        config_file, fname_out)
            if store_dir and library_id:
                print "Uploading directory of files to Galaxy"
                print galaxy_api.upload_directory(library_id, folder['id'],
                                                  store_dir, dbkey,
                                                  access_role)
    if galaxy_api and not run_info_yaml:
        add_run_summary_metrics(analysis_dir, galaxy_api)
Beispiel #4
0
def run_analysis(work_dir, post_process, fc_dir, run_info):
    """Changes into the supplied work_dir directory and submits 
        the job using the supplied arguments and with slurm parameters
        obtained from the post_process.yaml configuration
    """

    # Move to the working directory
    start_dir = os.getcwd()
    os.chdir(work_dir)

    config = load_config(post_process)

    if str(config["algorithm"]["num_cores"]) == "messaging":
        analysis_script = DISTRIBUTED_ANALYSIS_SCRIPT
    else:
        analysis_script = PARALLELL_ANALYSIS_SCRIPT

    job_cl = [analysis_script, post_process, fc_dir, run_info]

    cp = config["distributed"]["cluster_platform"]
    cluster = __import__("bcbio.distributed.{0}".format(cp), fromlist=[cp])
    platform_args = config["distributed"]["platform_args"].split()

    print "Submitting job"
    jobid = cluster.submit_job(platform_args, job_cl)
    print 'Your job has been submitted with id ' + jobid

    # Change back to the starting directory
    os.chdir(start_dir)
def main(config_file, month, year):
    config = load_config(config_file)
    galaxy_api = GalaxyApiAccess(config["galaxy_url"],
                                 config["galaxy_api_key"])
    smonth, syear = (month - 1, year) if month > 1 else (12, year - 1)
    start_date = datetime(syear, smonth, 15, 0, 0, 0)
    # last day calculation useful if definition of month is
    # from first to last day instead of 15th-15th
    #(_, last_day) = calendar.monthrange(year, month)
    end_date = datetime(year, month, 14, 23, 59, 59)
    out_file = "%s_%s" % (start_date.strftime("%b"),
                          end_date.strftime("%b-%Y-sequencing.csv"))
    with open(out_file, "w") as out_handle:
        writer = csv.writer(out_handle)
        writer.writerow([
            "Date", "Product", "Payment", "Researcher", "Lab", "Email",
            "Project", "Sample", "Description", "Genome", "Flowcell", "Lane",
            "Notes"
        ])
        for s in galaxy_api.sqn_report(start_date.isoformat(),
                                       end_date.isoformat()):
            f_parts = s["sqn_run"]["run_folder"].split("_")
            flowcell = "_".join([f_parts[0], f_parts[-1]])
            writer.writerow([
                s["sqn_run"]["date"], s["sqn_type"],
                s["project"]["payment_(fund_number)"],
                s["project"]["researcher"], s["project"]["lab_association"],
                s["project"]["email"], s["project"]["project_name"], s["name"],
                s["description"], s["genome_build"], flowcell,
                s["sqn_run"]["lane"], s["sqn_run"]["results_notes"]
            ])
def run_analysis(work_dir, post_process, fc_dir, run_info):
    """Changes into the supplied work_dir directory and submits 
        the job using the supplied arguments and with slurm parameters
        obtained from the post_process.yaml configuration
    """
    
    # Move to the working directory
    start_dir = os.getcwd()
    os.chdir(work_dir)
    
    config = load_config(post_process)
    
    if str(config["algorithm"]["num_cores"]) == "messaging":
        analysis_script = DISTRIBUTED_ANALYSIS_SCRIPT
    else:
        analysis_script = PARALLELL_ANALYSIS_SCRIPT
        
    job_cl = [analysis_script, post_process, fc_dir, run_info]
    
    cp = config["distributed"]["cluster_platform"]
    cluster = __import__("bcbio.distributed.{0}".format(cp), fromlist=[cp])
    platform_args = config["distributed"]["platform_args"].split()
    
    print "Submitting job"
    jobid = cluster.submit_job(platform_args, job_cl)
    print 'Your job has been submitted with id ' + jobid

    # Change back to the starting directory
    os.chdir(start_dir)
Beispiel #7
0
def analyze_and_upload(remote_info, config_file):
    """Main entry point for analysis and upload to Galaxy.
    """
    config = load_config(config_file)
    fc_dir = _copy_from_sequencer(remote_info, config)
    analysis_dir = _run_analysis(fc_dir, remote_info, config, config_file)
    _upload_to_galaxy(fc_dir, analysis_dir, remote_info, config, config_file)
Beispiel #8
0
def backup_data(remote_info, config_file):
    """Main entry point for fetching data from sequencer or pre-processing machine.
    """
    config = load_config(config_file)
    logger.info("Backing up run data over to remote storage: %s" %
                config["store_host"])
    _copy_from_sequencer(remote_info, config)
Beispiel #9
0
def main(config_file, fc_dir, project_dir, run_info_yaml=None, fc_alias=None, project_desc=None, lanes=None):
    if project_desc is None and lanes is None:
        log.error("No project description or lanes provided: cannot deliver files without this information")
        sys.exit()

    config = load_config(config_file)
    ## Set log file in project output directory
    config.update(log_dir=os.path.join(project_dir, "log"))
    log_handler = create_log_handler(config, log.name)

    fc_dir = os.path.normpath(fc_dir)
    fc_name, fc_date, run_info = get_run_info(fc_dir, config, run_info_yaml)
    with log_handler.applicationbound():
        run_info = prune_run_info_by_description(run_info['details'], project_desc, lanes)
    if len(run_info) == 0:
        log.error("No lanes found with matching description %s: please check your flowcell run information" % project_desc)
        sys.exit()

    dirs = dict(fc_dir=fc_dir, project_dir=project_dir)
    fc_name, fc_date = get_flowcell_id(run_info, dirs['fc_dir'])
    config.update(fc_name = fc_name, fc_date = fc_date)
    config.update(fc_alias = "%s_%s" % (fc_date, fc_name) if not fc_alias else fc_alias)
    dirs.update(fc_delivery_dir = os.path.join(dirs['project_dir'], options.data_prefix, config['fc_alias'] ))
    dirs.update(data_delivery_dir = os.path.join(dirs['project_dir'], options.data_prefix, "%s_%s" %(fc_date, fc_name) ))
    with log_handler.applicationbound():
        config = _make_delivery_directory(dirs, config)
        _save_run_info(run_info, dirs['fc_delivery_dir'], run_exit=options.only_run_info)
        run_main(run_info, config, dirs)
Beispiel #10
0
def main(config_file, fc_dir, analysis_dir, run_info_yaml=None):
    config = load_config(config_file)
    galaxy_api = (GalaxyApiAccess(config['galaxy_url'], config['galaxy_api_key'])
                  if config.has_key("galaxy_api_key") else None)
    fc_name, fc_date, run_info = get_run_info(fc_dir, config, run_info_yaml)

    base_folder_name = "%s_%s" % (fc_date, fc_name)
    run_details = lims_run_details(run_info, base_folder_name)
    for (library_name, access_role, dbkey, lane, bc_id, name, desc,
            local_name, fname_out) in run_details:
        library_id = (get_galaxy_library(library_name, galaxy_api)
                      if library_name else None)
        upload_files = list(select_upload_files(local_name, bc_id, fc_dir,
                                                analysis_dir, config, fname_out))
        if len(upload_files) > 0:
            print lane, bc_id, name, desc, library_name
            print "Creating storage directory"
            if library_id:
                folder, cur_galaxy_files = get_galaxy_folder(library_id,
                               base_folder_name, name, desc, galaxy_api)
            else:
                cur_galaxy_files = []
            store_dir = move_to_storage(lane, bc_id, base_folder_name, upload_files,
                                        cur_galaxy_files, config, config_file,
                                        fname_out)
            if store_dir and library_id:
                print "Uploading directory of files to Galaxy"
                print galaxy_api.upload_directory(library_id, folder['id'],
                                                  store_dir, dbkey, access_role)
    if galaxy_api and not run_info_yaml:
        add_run_summary_metrics(analysis_dir, galaxy_api)
Beispiel #11
0
def main(project_id,sample_names,single_end,config_file,Map_Stat,Read_Dist,FPKM,rRNA_table,GBC,stranded, strandness_table,complexity):
    if not sample_names:
        sample_names = commands.getoutput("ls -d tophat_out_*|sed 's/tophat_out_//g'").split('\n')
    else:
        sample_names = sample_names.split(',')
    TEMPLATE = make_template(Map_Stat, FPKM, GBC, Read_Dist, rRNA_table, strandness_table, complexity)
    if config_file:
        config = load_config(config_file)
    else:
        config = {}

    projectfile = "%s.mako" % (project_id)
    fp = open(projectfile, "w")
    fp.write(TEMPLATE)
    fp.close()
    tmpl = Template(filename=projectfile)

    proj_conf = {
        'id' : project_id,
        'config' : config,
        'samples': sample_names
         }
    d = generate_report(proj_conf,single_end,stranded)
    rstfile = "%s_analysis_report.rst" % (project_id)
    fp = open(rstfile, "w")
    fp.write(tmpl.render(**d))
    fp.close()
    os.system('rst2pdf '+ rstfile)
    print """
def run_analysis(work_dir, post_process, fc_dir, run_info):
    """Changes into the supplied work_dir directory and submits
        the job using the supplied arguments and with slurm parameters
        obtained from the post_process.yaml configuration
    """

    # Move to the working directory
    start_dir = os.getcwd()
    os.chdir(work_dir)

    config = load_config(post_process)

    if str(config["algorithm"]["num_cores"]) == "messaging":
        analysis_script = DISTRIBUTED_ANALYSIS_SCRIPT
    else:
        analysis_script = PARALLELL_ANALYSIS_SCRIPT

    # Launches the pipeline using PM module
    project_to_run, sample_to_run, flowcell_to_run = fc_dir.split('/')[-3:]
    cmd = [
        "pm", "production", "run", project_to_run, "--sample", sample_to_run,
        "--flowcell", flowcell_to_run, "--drmaa", "--force"
    ]
    subprocess.check_call(cmd)

    # Change back to the starting directory
    os.chdir(start_dir)
Beispiel #13
0
def main(config_file,
         fc_dir=None,
         run_info_yaml=None,
         numcores=None,
         paralleltype=None,
         profile="default"):
    work_dir = os.getcwd()
    config = load_config(config_file)
    if config.get("log_dir", None) is None:
        config["log_dir"] = os.path.join(work_dir, "log")
    paralleltype, numcores = _get_cores_and_type(config, fc_dir, run_info_yaml,
                                                 numcores, paralleltype)
    parallel = {
        "type": paralleltype,
        "cores": numcores,
        "profile": profile,
        "module": "bcbio.distributed"
    }
    if parallel["type"] in ["local", "messaging-main"]:
        if numcores is None:
            config["algorithm"]["num_cores"] = numcores
        run_main(config, config_file, work_dir, parallel, fc_dir,
                 run_info_yaml)
    elif parallel["type"] == "messaging":
        parallel["task_module"] = "bcbio.distributed.tasks"
        args = [config_file, fc_dir]
        if run_info_yaml:
            args.append(run_info_yaml)
        messaging.run_and_monitor(config, config_file, args, parallel)
    elif parallel["type"] == "ipython":
        run_main(config, config_file, work_dir, parallel, fc_dir,
                 run_info_yaml)
    else:
        raise ValueError("Unexpected type of parallel run: %s" %
                         parallel["type"])
    def setUp(self):
        self.file_dir = os.path.join(os.path.dirname(__file__))
        self.fc_dir = os.path.join(self.file_dir, "110106_FC70BUKAAXX")
        self.proj_dir = os.path.join(self.file_dir, "projects", "j_doe_00_01")
        ##self.fcdir = os.path.join(os.path.dirname(__file__), "test_automated_output")
        self.run_info = os.path.join(self.fc_dir, "run_info.yaml")
        self.archive_base_dir  = os.path.join(self.file_dir)
        self.analysis_base_dir = os.path.join(self.file_dir)

        # Remove fcdir if exists and setup new link
        init_flowcell_dir()
        if not os.path.exists(self.proj_dir):
            os.makedirs(self.proj_dir)
        if not os.path.exists(os.path.join(self.file_dir, "test_automated_output", "run_info.yaml")):
            os.symlink(os.path.join(self.file_dir, "data", "automated", "run_info-project.yaml"), os.path.join(self.file_dir, "test_automated_output", "run_info.yaml"))
        if not os.path.exists(os.path.join(self.file_dir, "test_automated_output", "tool-data")):
            os.symlink(os.path.join(self.file_dir, "data", "automated", "tool-data"), os.path.join(self.file_dir, "test_automated_output", "tool-data"))

        # Post_process.yaml
        post_process = load_config(os.path.join(self.file_dir, "data", "automated", "post_process.yaml"))
        post_process["analysis"]["store_dir"] = os.path.join(self.archive_base_dir)
        post_process["analysis"]["base_dir"] = os.path.join(self.analysis_base_dir)
        post_process["algorithm"]["snpcall"] = "true"
        post_process["algorithm"]["dbsnp"] = os.path.join("data", "genomes", "hg19", "variation", "dbsnp_132.vcf")
        with open(os.path.join(self.fc_dir, "post_process.yaml"), "w") as fh:
            yaml.dump(post_process, stream=fh)
Beispiel #15
0
def analyze_and_upload(remote_info, config_file):
    """Main entry point for analysis and upload to Galaxy.
    """
    config = load_config(config_file)
    fc_dir = _copy_from_sequencer(remote_info, config)
    analysis_dir = _run_analysis(fc_dir, remote_info, config, config_file)
    _upload_to_galaxy(fc_dir, analysis_dir, remote_info, config, config_file)
def main(config_file, fc_dir, run_info_yaml=None):
    config = load_config(config_file)
    work_dir = os.getcwd()
    if config.get("log_dir", None) is None:
        config["log_dir"] = os.path.join(work_dir, "log")
    setup_logging(config)
    run_main(config, config_file, fc_dir, work_dir, run_info_yaml)
Beispiel #17
0
def main(config_file, fc_dir=None, run_info_yaml=None, numcores=None,
         paralleltype=None, profile="default"):
    work_dir = os.getcwd()
    config = load_config(config_file)
    if config.get("log_dir", None) is None:
        config["log_dir"] = os.path.join(work_dir, "log")
    paralleltype, numcores = _get_cores_and_type(config, fc_dir, run_info_yaml,
                                                 numcores, paralleltype)
    parallel = {"type": paralleltype, "cores": numcores,
                "profile": profile,
                "module": "bcbio.distributed"}
    if parallel["type"] in ["local", "messaging-main"]:
        if numcores is None:
            config["algorithm"]["num_cores"] = numcores
        run_main(config, config_file, work_dir, parallel,
                 fc_dir, run_info_yaml)
    elif parallel["type"] == "messaging":
        parallel["task_module"] = "bcbio.distributed.tasks"
        args = [config_file, fc_dir]
        if run_info_yaml:
            args.append(run_info_yaml)
        messaging.run_and_monitor(config, config_file, args, parallel) 
    elif parallel["type"] == "ipython":
        run_main(config, config_file, work_dir, parallel,
                 fc_dir, run_info_yaml)
    else:
        raise ValueError("Unexpected type of parallel run: %s" % parallel["type"])
Beispiel #18
0
def main(args, mail, conffile, analysis, stranded, single, genome):
    project = args[0]
    ord_num = args[1]
    runs = args[2:]
    conf = cl.load_config(conffile)
    port = conf['statusdb']['port']
    username = conf['statusdb']['username']
    password = conf['statusdb']['password']
    URL = username + ':' + password + '@' + conf['statusdb']['url']
    extra_arg = "#SBATCH " + conf['sbatch']['extra_arg']
    couch = couchdb.Server("http://" + URL + ':' + str(port))
    proj_db = couch['projects']
    key = find_proj_from_view(proj_db, project)
    info = proj_db[key]
    reference_genome = genome if genome else info['reference_genome']
    RNA_analysis_settings = conf['custom_algorithms']['RNA-seq analysis']
    refpath = RNA_analysis_settings[reference_genome]['genomepath']
    gtfpath = RNA_analysis_settings[reference_genome]['gtfpath']
    bedpath = RNA_analysis_settings[reference_genome]['bedpath']
    today = str(datetime.today().isoformat()).replace(
        '-', '_').split('.')[0].replace(':', '_')
    command = [
        os.environ['HOME'] +
        '/opt/scilifelab/scripts/RNA_analysis/RNA_analysis.sh', '-p', project,
        '-o', ord_num, '-b', bedpath, '-g', gtfpath, '-m', mail, '-c',
        conffile, '-e', '"' + extra_arg + '"', '-a',
        str(analysis), '-s',
        str(stranded), '-d', today, '-f',
        str(single), '-G', reference_genome
    ] + runs
    command = ' '.join(command)
    print command
    os.system(command)
def main(config_file, fc_dir, run_info_yaml=None):
    config = load_config(config_file)
    work_dir = os.getcwd()
    if config.get("log_dir", None) is None:
        config["log_dir"] = os.path.join(work_dir, "log")
    setup_logging(config)
    run_main(config, config_file, fc_dir, work_dir, run_info_yaml)
Beispiel #20
0
def main(bam_file, config_file=None, chrom='all', start=0, end=None,
         outfile=None, normalize=False, use_tempfile=False):
    if config_file:
        config = load_config(config_file)
    else:
        config = {"program": {"ucsc_bigwig" : "wigToBigWig"}}
    if outfile is None:
        outfile = "%s.bigwig" % os.path.splitext(bam_file)[0]
    if start > 0:
        start = int(start) - 1
    if end is not None:
        end = int(end)
    regions = [(chrom, start, end)]
    if os.path.abspath(bam_file) == os.path.abspath(outfile):
        sys.stderr.write("Bad arguments, input and output files are the same.\n")
        sys.exit(1)
    if not (os.path.exists(outfile) and os.path.getsize(outfile) > 0):
        if use_tempfile:
            #Use a temp file to avoid any possiblity of not having write permission
            out_handle = tempfile.NamedTemporaryFile(delete=False)
            wig_file = out_handle.name
        else:
            wig_file = "%s.wig" % os.path.splitext(outfile)[0]
            out_handle = open(wig_file, "w")
        with closing(out_handle):
            chr_sizes, wig_valid = write_bam_track(bam_file, regions, config, out_handle,
                                                   normalize)
        try:
            if wig_valid:
                convert_to_bigwig(wig_file, chr_sizes, config, outfile)
        finally:
            os.remove(wig_file)
Beispiel #21
0
def main(local_config, post_config_file=None,
         process_msg=True, store_msg=True, qseq=True, fastq=True):
    config = load_config(local_config)
    log_handler = create_log_handler(config)

    with log_handler.applicationbound():
        search_for_new(config, local_config, post_config_file,
                       process_msg, store_msg, qseq, fastq)
Beispiel #22
0
def main(galaxy_config, processing_config):
    amqp_config = read_galaxy_amqp_config(galaxy_config)
    config = load_config(processing_config)
    store_tag = config["msg_store_tag"]
    log_handler = create_log_handler(config, LOG_NAME)
    handlers = [(store_tag, store_handler(config, store_tag))]
    with log_handler.applicationbound():
        message_reader(handlers, amqp_config)
Beispiel #23
0
 def test_1_notify(self):
     config = load_config(self.config_file)
     if not "email" in config:
         print "No email configured, skipping test!"
         return
     log_handler = self._get_log_handler(config)
     result = self._log_messages(log_handler, "Pipeline notification test email @ %s" % time.strftime("%x - %X"))
     assert result is None, "%s" % result
Beispiel #24
0
def main(galaxy_config, processing_config):
    amqp_config = read_galaxy_amqp_config(galaxy_config)
    config = load_config(processing_config)
    store_tag = config["msg_store_tag"]
    log_handler = create_log_handler(config, LOG_NAME)
    handlers = [(store_tag, store_handler(config, store_tag))]
    with log_handler.applicationbound():
        message_reader(handlers, amqp_config)
Beispiel #25
0
def main(local_config, post_config_file=None, fetch_msg=True, process_msg=True, store_msg=True, 
         backup_msg=False, qseq=True, fastq=True, remove_qseq=False, compress_fastq=False, casava=False):
    config = load_config(local_config)
    log_handler = create_log_handler(config, True)

    with log_handler.applicationbound():
        search_for_new(config, local_config, post_config_file, fetch_msg, \
            process_msg, store_msg, backup_msg, qseq, fastq, remove_qseq, compress_fastq, casava)
Beispiel #26
0
def analyze(remote_info, config_file):
    """Starts analysis of data that have been pushed to analysis location
    """
    config = load_config(config_file)
    fc_dir = os.path.join(remote_info["store_dir"],
                          os.path.basename(remote_info["directory"]))
    analysis_dir = _run_analysis(fc_dir, remote_info, config, config_file)
    _upload_to_galaxy(fc_dir, analysis_dir, remote_info, config, config_file)
Beispiel #27
0
def main(config_file, queues=None):
    task_module = "bcbio.distributed.tasks"
    config = load_config(config_file)
    with utils.curdir_tmpdir() as work_dir:
        dirs = {"work": work_dir, "config": os.path.dirname(config_file)}
        with create_celeryconfig(task_module, dirs, config,
                                 os.path.abspath(config_file)):
            run_celeryd(work_dir, queues)
def main(local_config, post_config_file=None,
         process_msg=True, store_msg=True, qseq=True, fastq=True):
    config = load_config(local_config)
    log_handler = create_log_handler(config, LOG_NAME)

    with log_handler.applicationbound():
        search_for_new(config, local_config, post_config_file,
                       process_msg, store_msg, qseq, fastq)
Beispiel #29
0
def long_term_storage(remote_info, config_file):
    config = load_config(config_file)
    log_handler = create_log_handler(config, log.name)
    with log_handler.applicationbound():
        log.info("Copying run data over to remote storage: %s" %
        config["store_host"])
        log.debug("The contents from AMQP for this dataset are:\n %s" %
        remote_info)
        _copy_for_storage(remote_info, config)
Beispiel #30
0
def analyze(remote_info, config_file):
    """Starts analysis of data that have been pushed to analysis location
    """
    config = load_config(config_file)
    fc_dir = os.path.join(remote_info["store_dir"], 
                          os.path.basename(remote_info["directory"]))
    analysis_dir = _run_analysis(fc_dir, remote_info, config, config_file)
    _upload_to_galaxy(fc_dir, analysis_dir, remote_info,
                          config, config_file)
Beispiel #31
0
def load_couch_server(config_file):
    """loads couch server with settings specified in 'config_file'"""
    try:
        db_conf = cl.load_config(config_file)['statusdb']
        url = db_conf['username']+':'+db_conf['password']+'@'+db_conf['url']+':'+str(db_conf['port'])
        couch = couchdb.Server("http://" + url)
        return couch
    except:
        return None
Beispiel #32
0
def main(*args, **kwargs):
    local_config = args[0]
    post_process_config = args[1] if len(args) > 1 else None
    kwargs["post_process_config"] = post_process_config
    config = load_config(local_config)

    log_handler = create_log_handler(config, True)
    with log_handler.threadbound():
        search_for_new(config, local_config, **kwargs)
Beispiel #33
0
def main(config_file, fc_dir, run_info_yaml=None, num_cores=None):
    config = load_config(config_file)
    work_dir = os.getcwd()
    if config.get("log_dir", None) is None:
        config["log_dir"] = os.path.join(work_dir, "log")
    if num_cores:
        config["algorithm"]["num_cores"] = int(num_cores)
    setup_logging(config)
    run_main(config, config_file, fc_dir, work_dir, run_info_yaml)
def main(config_file, fc_dir, run_info_yaml=None, num_cores=None):
    config = load_config(config_file)
    work_dir = os.getcwd()
    if config.get("log_dir", None) is None:
        config["log_dir"] = os.path.join(work_dir, "log")
    if num_cores:
        config["algorithm"]["num_cores"] = int(num_cores)
    setup_logging(config)
    run_main(config, config_file, fc_dir, work_dir, run_info_yaml)
Beispiel #35
0
def main(config_file, fc_dir, run_info_yaml=None):
    config = load_config(config_file)
    if config.get("qcdb", None) is None:
        sys.exit()
    else:
        qcdb_config = config.get("qcdb", {})
    analysis = config.get("analysis", {})
    setup_logging(config)
    qcdb_store_dir = qcdb_config.get("qcdb_store_dir", None)
    run_main(fc_dir, qcdb_store_dir)
Beispiel #36
0
 def setUp(self):
     self.data_dir = os.path.join(os.path.dirname(__file__), "data", "automated")
     config_file = os.path.join(self.data_dir, "post_process-statusdb.yaml")
     config = load_config(config_file)
     setup_logging(config)
     fc_date = "110106"
     fc_name = "FC70BUKAAXX"
     run_info_yaml = os.path.join(self.data_dir, "run_info.yaml")
     workdir = os.path.join(os.path.dirname(__file__), "110106_FC70BUKAAXX")
     fc_dir = os.path.join(self.data_dir, os.pardir, "110106_FC70BUKAAXX")
Beispiel #37
0
 def test_1_notify(self):
     config = load_config(self.config_file)
     if not "email" in config:
         print "No email configured, skipping test!"
         return
     log_handler = self._get_log_handler(config)
     result = self._log_messages(
         log_handler,
         "Pipeline notification test email @ %s" % time.strftime("%x - %X"))
     assert result is None, "%s" % result
Beispiel #38
0
def analyze_and_upload(remote_info, config_file):
    """Main entry point for analysis and upload to Galaxy.
    """
    config = load_config(config_file)
    log_handler = create_log_handler(config, log.name)
    with log_handler.applicationbound():
        fc_dir = _copy_from_sequencer(remote_info, config)
        analysis_dir = _run_analysis(fc_dir, remote_info, config, config_file)
        _upload_to_galaxy(fc_dir, analysis_dir, remote_info,
                          config, config_file)
Beispiel #39
0
def main(args, phred64, fai, projtag, mail, hours, conffile, fpath, single, stranded):
    proj_ID = args[0]
    flow_cell = args[1]
    if phred64 == True:
        qscale = "--solexa1.3-quals"
    else:
        qscale = ""
    if not len(hours.split(":")) == 3:
        sys.exit("Please specify the time allocation string as hours:minutes:seconds or days-hours:minutes:seconds")
    conf = cl.load_config(conffile)
    port = conf["statusdb"]["port"]
    username = conf["statusdb"]["username"]
    password = conf["statusdb"]["password"]
    URL = username + ":" + password + "@" + conf["statusdb"]["url"]
    extra_arg = "#SBATCH " + conf["sbatch"]["extra_arg"]
    couch = couchdb.Server("http://" + URL + ":" + str(port))
    proj_db = couch["projects"]
    key = find_proj_from_view(proj_db, proj_ID)
    try:
        info = proj_db[key]
    except:
        sys.exit("project " + proj_ID + " not found in statusdb")

    reference_genome = info["reference_genome"]
    RNA_analysis_settings = conf["custom_algorithms"]["RNA-seq analysis"]
    refpath = RNA_analysis_settings[reference_genome]["genomepath"]
    aligner_version = RNA_analysis_settings["aligner_version"]
    if stranded is True:
        aligner_libtype = RNA_analysis_settings["aligner_libtype"]
    else:
        aligner_libtype = ""
    p = os.getcwd()
    if not fpath:
        fpath = p.split("intermediate")[0] + "data/" + flow_cell
    file_info = get_names_from_fastqfiles(fpath, flow_cell)
    for lane in file_info:
        an_path = prepare_lane_run_dir(p, lane)
        for samp in sorted(file_info[lane]):
            innerdist, innnerdistflagg, R1, R2 = frag_len_from_couch(fpath, file_info[lane][samp], single, samp, info)
            Generat_sbatch_file(
                an_path,
                hours,
                samp,
                mail,
                aligner_version,
                innerdist,
                refpath,
                innnerdistflagg,
                R1,
                R2,
                extra_arg,
                aligner_libtype,
                fai,
                qscale,
            )
Beispiel #40
0
def main(config_file, in_file, space, start, end):
    config = load_config(config_file)
    runner = broad.runner_from_config(config)
    target_region = (space, int(start), int(end))
    for pair in [1, 2]:
        out_file = "%s_%s-%s.fastq" % (os.path.splitext(os.path.basename(in_file))[0],
                                          pair, target_region[0])
        with open(out_file, "w") as out_handle:
            for name, seq, qual in bam_to_fastq_pair(in_file, target_region, pair):
                out_handle.write("@%s/%s\n%s\n+\n%s\n" % (name, pair, seq, qual))
        sort_fastq(out_file, runner)
Beispiel #41
0
    def test_2_report_notification(self):
        config = load_config(self.config_file)
        if not "gdocs_upload" in config or not "gdocs_email_notification" in config["gdocs_upload"]:
            print "Google docs email reporting not configured, skipping test!"
            return

        config["email"] = config["gdocs_upload"]["gdocs_email_notification"]

        log_handler = self._get_log_handler(config)
        result = self._log_messages(log_handler, "Google Docs report notification test email @ %s" % time.strftime("%x - %X"))
        assert result is None, "%s" % result
Beispiel #42
0
def long_term_storage(remote_info, config_file):
    """Securely copy files from remote directory to the storage server.

    This requires ssh public keys to be setup so that no password entry
    is necessary, Fabric is used to manage setting up copies on the remote
    storage server.
    """
    config = load_config(config_file)
    logger.info("Copying run data over to remote storage: %s" % config["store_host"])
    logger.debug("The contents from AMQP for this dataset are:\n %s" % remote_info)
    _copy_for_storage(remote_info, config)
Beispiel #43
0
 def setUp(self):
     self.data_dir = os.path.join(os.path.dirname(__file__), "data",
                                  "automated")
     config_file = os.path.join(self.data_dir, "post_process-statusdb.yaml")
     config = load_config(config_file)
     setup_logging(config)
     fc_date = "110106"
     fc_name = "FC70BUKAAXX"
     run_info_yaml = os.path.join(self.data_dir, "run_info.yaml")
     workdir = os.path.join(os.path.dirname(__file__), "110106_FC70BUKAAXX")
     fc_dir = os.path.join(self.data_dir, os.pardir, "110106_FC70BUKAAXX")
def main(args, phred64, fai, projtag, mail, hours, conffile, fpath, single,
         stranded, genome, inner, adapter):
    proj_ID = args[0]
    flow_cell = args[1]
    if phred64 == True:
        qscale = '--solexa1.3-quals'
    else:
        qscale = ''
    if not len(hours.split(':')) == 3:
        sys.exit(
            "Please specify the time allocation string as hours:minutes:seconds or days-hours:minutes:seconds"
        )
    conf = cl.load_config(conffile)
    port = conf['statusdb']['port']
    username = conf['statusdb']['username']
    password = conf['statusdb']['password']
    URL = username + ':' + password + '@' + conf['statusdb']['url']
    extra_arg = "#SBATCH " + conf['sbatch']['extra_arg']
    couch = couchdb.Server("http://" + URL + ':' + str(port))
    proj_db = couch['projects']
    key = find_proj_from_view(proj_db, proj_ID)
    try:
        info = proj_db[key]
    except:
        sys.exit("project " + proj_ID + " not found in statusdb")

    reference_genome = genome if genome else info['reference_genome']
    RNA_analysis_settings = conf['custom_algorithms']['RNA-seq analysis']
    refpath = RNA_analysis_settings[reference_genome]['genomepath']
    aligner_version = RNA_analysis_settings['aligner_version']
    if stranded is True:
        aligner_libtype = RNA_analysis_settings['aligner_libtype']
    else:
        aligner_libtype = ''
    p = os.getcwd()
    if not fpath:
        fpath = p.split('intermediate')[0] + 'data/' + flow_cell
    file_info = get_names_from_fastqfiles(fpath, flow_cell)
    for lane in file_info:
        an_path = prepare_lane_run_dir(p, lane)
        for samp in sorted(file_info[lane]):
            try:
                innerdist, innnerdistflagg, R1, R2 = frag_len_from_couch(
                    fpath, file_info[lane][samp], single, samp, info, inner,
                    adapter)
                Generat_sbatch_file(an_path, hours, samp, mail,
                                    aligner_version, innerdist, refpath,
                                    innnerdistflagg, R1, R2, extra_arg,
                                    aligner_libtype, fai, qscale)
            except:
                print "{}\n[Error Occured] No sbatch script generated!".format(
                    "-" * 30)
Beispiel #45
0
def main(config_file, in_file, space, start, end):
    config = load_config(config_file)
    runner = broad.runner_from_config(config)
    target_region = (space, int(start), int(end))
    for pair in [1, 2]:
        out_file = "%s_%s-%s.fastq" % (os.path.splitext(
            os.path.basename(in_file))[0], pair, target_region[0])
        with open(out_file, "w") as out_handle:
            for name, seq, qual in bam_to_fastq_pair(in_file, target_region,
                                                     pair):
                out_handle.write("@%s/%s\n%s\n+\n%s\n" %
                                 (name, pair, seq, qual))
        sort_fastq(out_file, runner)
Beispiel #46
0
    def test_create_bc_report(self):
        """Create a demultiplex report and upload it to gdocs
        """
        # Parse the config
        config_file = os.path.join(self.data_dir, "post_process.yaml")
        self.config = load_config(config_file)

        # Loop over the runs
        for name in self.runname:
            print "\nProcessing %s" % name
            fc_name, fc_date = get_flowcell_info(name)
            analysisdir = os.path.join(self.workdir, name)
            assert create_report_on_gdocs(fc_date, fc_name, self.run_info_file, {"work": analysisdir, "flowcell": analysisdir}, self.config), "Report creation failed"
Beispiel #47
0
    def test_create_bc_report(self):
        """Create a demultiplex report and upload it to gdocs
        """
        # Parse the config
        config_file = os.path.join(self.data_dir, "post_process.yaml")
        self.config = load_config(config_file)

        # Loop over the runs
        for name in self.runname:
            print "\nProcessing %s" % name
            fc_name, fc_date = get_flowcell_info(name)
            analysisdir = os.path.join(self.workdir, name)
            create_bc_report_on_gdocs(fc_date, fc_name, analysisdir, {'details': self.run_info}, self.config)
Beispiel #48
0
def long_term_storage(remote_info, config_file):
    """Securely copy files from remote directory to the storage server.

    This requires ssh public keys to be setup so that no password entry
    is necessary, Fabric is used to manage setting up copies on the remote
    storage server.
    """
    config = load_config(config_file)
    logger.info("Copying run data over to remote storage: %s" %
                config["store_host"])
    logger.debug("The contents from AMQP for this dataset are:\n %s" %
                 remote_info)
    _copy_for_storage(remote_info, config)
Beispiel #49
0
def main(args, mail, conffile, analysis, stranded, single, genome):
    project = args[0]
    ord_num = args[1]
    runs = args[2:]
    conf = cl.load_config(conffile)
    port = conf["statusdb"]["port"]
    username = conf["statusdb"]["username"]
    password = conf["statusdb"]["password"]
    URL = username + ":" + password + "@" + conf["statusdb"]["url"]
    extra_arg = "#SBATCH " + conf["sbatch"]["extra_arg"]
    couch = couchdb.Server("http://" + URL + ":" + str(port))
    proj_db = couch["projects"]
    key = find_proj_from_view(proj_db, project)
    info = proj_db[key]
    reference_genome = genome if genome else info["reference_genome"]
    RNA_analysis_settings = conf["custom_algorithms"]["RNA-seq analysis"]
    refpath = RNA_analysis_settings[reference_genome]["genomepath"]
    gtfpath = RNA_analysis_settings[reference_genome]["gtfpath"]
    bedpath = RNA_analysis_settings[reference_genome]["bedpath"]
    today = str(datetime.today().isoformat()).replace("-", "_").split(".")[0].replace(":", "_")
    command = [
        os.environ["HOME"] + "/opt/scilifelab/scripts/RNA_analysis/RNA_analysis.sh",
        "-p",
        project,
        "-o",
        ord_num,
        "-b",
        bedpath,
        "-g",
        gtfpath,
        "-m",
        mail,
        "-c",
        conffile,
        "-e",
        '"' + extra_arg + '"',
        "-a",
        str(analysis),
        "-s",
        str(stranded),
        "-d",
        today,
        "-f",
        str(single),
        "-G",
        reference_genome,
    ] + runs
    command = " ".join(command)
    print command
    os.system(command)
Beispiel #50
0
    def test_2_report_notification(self):
        config = load_config(self.config_file)
        if not "gdocs_upload" in config or not "gdocs_email_notification" in config[
                "gdocs_upload"]:
            print "Google docs email reporting not configured, skipping test!"
            return

        config["email"] = config["gdocs_upload"]["gdocs_email_notification"]

        log_handler = self._get_log_handler(config)
        result = self._log_messages(
            log_handler, "Google Docs report notification test email @ %s" %
            time.strftime("%x - %X"))
        assert result is None, "%s" % result
Beispiel #51
0
def analyze_locally(dname, post_config_file, fastq_dir):
    """Run analysis directly on the local machine.
    """
    assert fastq_dir is not None
    post_config = load_config(post_config_file)
    analysis_dir = os.path.join(fastq_dir, os.pardir, "analysis")
    utils.safe_makedir(analysis_dir)
    with utils.chdir(analysis_dir):
        prog = "bcbio_nextgen.py"
        cl = [prog, post_config_file, dname]
        run_yaml = os.path.join(dname, "run_info.yaml")
        if os.path.exists(run_yaml):
            cl.append(run_yaml)
        subprocess.check_call(cl)
Beispiel #52
0
def analyze_locally(dname, post_config_file, fastq_dir):
    """Run analysis directly on the local machine.
    """
    assert fastq_dir is not None
    post_config = load_config(post_config_file)
    analysis_dir = os.path.join(fastq_dir, os.pardir, "analysis")
    utils.safe_makedir(analysis_dir)
    with utils.chdir(analysis_dir):
        prog = "bcbio_nextgen.py"
        cl = [prog, post_config_file, dname]
        run_yaml = os.path.join(dname, "run_info.yaml")
        if os.path.exists(run_yaml):
            cl.append(run_yaml)
        subprocess.check_call(cl)
Beispiel #53
0
def main(config_file, fc_dir, run_info_yaml=None):
    config = load_config(config_file)
    work_dir = os.getcwd()
    if config.get("log_dir", None) is None:
        config["log_dir"] = os.path.join(work_dir, "log")

    def insert_command(record):
        record.extra["command"] = sys.argv
        record.extra["version"] = version.get_pipeline_version()

    setup_logging(config)
    handler = create_log_handler(config)
    with handler, \
         logbook.Processor(insert_command):

        run_main(config, config_file, fc_dir, work_dir, run_info_yaml)
Beispiel #54
0
def analyze_locally(dname, post_config_file, fastq_dir):
    """Run analysis directly on the local machine.
    """
    assert fastq_dir is not None
    post_config = load_config(post_config_file)
    analysis_dir = os.path.join(fastq_dir, os.pardir, "analysis")
    utils.safe_makedir(analysis_dir)
    with utils.chdir(analysis_dir):
        if post_config["algorithm"]["num_cores"] == "messaging":
            prog = post_config["analysis"]["distributed_process_program"]
        else:
            prog = post_config["analysis"]["process_program"]
        cl = [prog, post_config_file, dname]
        run_yaml = os.path.join(dname, "run_info.yaml")
        if os.path.exists(run_yaml):
            cl.append(run_yaml)
        subprocess.check_call(cl)
Beispiel #55
0
def main(config_file, queues=None, task_module=None, base_dir=None):
    if base_dir is None:
        base_dir = os.getcwd()
    if task_module is None:
        task_module = "bcbio.distributed.tasks"
    config = load_config(config_file)
    if config.get("log_dir", None) is None:
        config["log_dir"] = os.path.join(base_dir, "log")
    signals.setup_logging.connect(celery_logger(config))
    setup_logging(config)
    logger.info("Starting distributed worker process: {0}".format(queues if queues else ""))
    with utils.chdir(base_dir):
        with utils.curdir_tmpdir() as work_dir:
            dirs = {"work": work_dir, "config": os.path.dirname(config_file)}
            with create_celeryconfig(task_module, dirs, config,
                                     os.path.abspath(config_file)):
                run_celeryd(work_dir, queues)
Beispiel #56
0
def test_variable_expansion():
    """Test expanding the environment variables in the
    test yaml.
    """
    config = load_config("data/loading_test/variables.yaml")

    try:
        for variable, value in config.items():
            assert (os.environ[variable] == value
                    ), "The strings %s and %s doesn't match (variable %s)" % (
                        os.environ[variable], value, variable)

    # When the key isn't in os.environ
    except KeyError as e:
        for variable, value in config[e.args[0]].items():
            assert (os.environ[variable] == value
                    ), "The strings %s and %s doesn't match (variable %s)" % (
                        os.environ[variable], value, variable)
def main(config_file, fc_dir, run_info_yaml=None, num_workers=None):
    config = load_config(config_file)
    assert config["algorithm"]["num_cores"] == "messaging", \
           "Use this script only with configured 'messaging' parallelization"
    if num_workers is None:
        if config["distributed"].get("num_workers", "") == "all":
            cp = config["distributed"]["cluster_platform"]
            cluster = __import__("bcbio.distributed.{0}".format(cp),
                                 fromlist=[cp])
            num_workers = cluster.available_nodes(
                config["distributed"]["platform_args"]) - 1
        if num_workers is None:
            num_workers = _needed_workers(
                get_run_info(fc_dir, config, run_info_yaml)[-1])
    task_module = "bcbio.distributed.tasks"
    args = [config_file, fc_dir]
    if run_info_yaml:
        args.append(run_info_yaml)
    run_and_monitor(config, config_file, args, num_workers, task_module)
Beispiel #58
0
def _generate_metrics(bam_fname, config_file, ref_file,
                      bait_file, target_file):
    """Run Picard commands to generate metrics files when missing.
    """
    config = load_config(config_file)
    broad_runner = broad.runner_from_config(config)
    bam_fname = os.path.abspath(bam_fname)
    path = os.path.dirname(bam_fname)
    out_dir = os.path.join(path, "metrics")
    utils.safe_makedir(out_dir)
    with utils.chdir(out_dir):
        with utils.curdir_tmpdir() as tmp_dir:
            cur_bam = os.path.basename(bam_fname)
            if not os.path.exists(cur_bam):
                os.symlink(bam_fname, cur_bam)
            gen_metrics = PicardMetrics(broad_runner, tmp_dir)
            gen_metrics.report(cur_bam, ref_file,
                               _bam_is_paired(bam_fname),
                               bait_file, target_file)
    return out_dir