Ejemplo n.º 1
0
def file_logger(namespace, config_file, log_file, log_path_key=None):
    CONFIG = cl.load_config(config_file)
    if not log_path_key:
        log_path = CONFIG['log_dir'] + '/' + log_file
    else:
        log_path = CONFIG[log_path_key] + '/' + log_file

    logger = logging.getLogger(namespace)
    logger.setLevel(logging.DEBUG)

    # file handler:
    fh = logging.FileHandler(log_path)
    fh.setLevel(logging.INFO)

    # console handler
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)

    # formatter
    formatter = logging.Formatter("%(asctime)s (%(levelname)s) : %(message)s")
    fh.setFormatter(formatter)

    # add handlers to logger
    logger.addHandler(ch)
    logger.addHandler(fh)

    return logger
Ejemplo n.º 2
0
def _pair_samples_with_pipelines(run_info_yaml, config):
    """Map samples defined in input file to pipelines to run.
    """
    samples = config_utils.load_config(run_info_yaml)
    if isinstance(samples, dict):
        resources = samples.pop("resources")
        samples = samples["details"]
    else:
        resources = {}
    ready_samples = []
    for sample in samples:
        if "files" in sample:
            del sample["files"]
        # add any resources to this item to recalculate global configuration
        usample = copy.deepcopy(sample)
        usample.pop("algorithm", None)
        if "resources" not in usample:
            usample["resources"] = {}
        for prog, pkvs in resources.items():
            if prog not in usample["resources"]:
                usample["resources"][prog] = {}
            if pkvs is not None:
                for key, val in pkvs.items():
                    usample["resources"][prog][key] = val
        config = config_utils.update_w_custom(config, usample)
        sample["resources"] = {}
        ready_samples.append(sample)
    paired = [(x, _get_pipeline(x)) for x in ready_samples]
    d = defaultdict(list)
    for x in paired:
        d[x[1]].append([x[0]])
    return d, config
Ejemplo n.º 3
0
def main(config_file, fc_dir, analysis_dir, run_info_yaml=None):
    config = load_config(config_file)
    galaxy_api = (GalaxyApiAccess(config['galaxy_url'], config['galaxy_api_key'])
                  if config.has_key("galaxy_api_key") else None)
    fc_name, fc_date, run_info = get_run_info(fc_dir, config, run_info_yaml)

    base_folder_name = "%s_%s" % (fc_date, fc_name)
    run_details = lims_run_details(run_info, base_folder_name)
    for (library_name, access_role, dbkey, lane, bc_id, name, desc,
            local_name, fname_out) in run_details:
        library_id = (get_galaxy_library(library_name, galaxy_api)
                      if library_name else None)
        upload_files = list(select_upload_files(local_name, bc_id, fc_dir,
                                                analysis_dir, config, fname_out))
        if len(upload_files) > 0:
            print lane, bc_id, name, desc, library_name
            print "Creating storage directory"
            if library_id:
                folder, cur_galaxy_files = get_galaxy_folder(library_id,
                               base_folder_name, name, desc, galaxy_api)
            else:
                cur_galaxy_files = []
            store_dir = move_to_storage(lane, bc_id, base_folder_name, upload_files,
                                        cur_galaxy_files, config, config_file,
                                        fname_out)
            if store_dir and library_id:
                print "Uploading directory of files to Galaxy"
                print galaxy_api.upload_directory(library_id, folder['id'],
                                                  store_dir, dbkey, access_role)
    if galaxy_api and not run_info_yaml:
        add_run_summary_metrics(analysis_dir, galaxy_api)
Ejemplo n.º 4
0
def _pair_samples_with_pipelines(run_info_yaml, config):
    """Map samples defined in input file to pipelines to run.
    """
    samples = config_utils.load_config(run_info_yaml)
    if isinstance(samples, dict):
        resources = samples.pop("resources")
        samples = samples["details"]
    else:
        resources = {}
    ready_samples = []
    for sample in samples:
        if "files" in sample:
            del sample["files"]
        # add any resources to this item to recalculate global configuration
        usample = copy.deepcopy(sample)
        usample.pop("algorithm", None)
        if "resources" not in usample:
            usample["resources"] = {}
        for prog, pkvs in resources.iteritems():
            if prog not in usample["resources"]:
                usample["resources"][prog] = {}
            for key, val in pkvs.iteritems():
                usample["resources"][prog][key] = val
        config = config_utils.update_w_custom(config, usample)
        sample["resources"] = {}
        ready_samples.append(sample)
    paired = [(x, _get_pipeline(x)) for x in ready_samples]
    d = defaultdict(list)
    for x in paired:
        d[x[1]].append([x[0]])
    return d, config
Ejemplo n.º 5
0
def main(bam_file, config_file=None, chrom='all', start=0, end=None,
         outfile=None, normalize=False, use_tempfile=False):
    if config_file:
        config = load_config(config_file)
    else:
        config = {"program": {"ucsc_bigwig" : "wigToBigWig"}}
    if outfile is None:
        outfile = "%s.bigwig" % os.path.splitext(bam_file)[0]
    if start > 0:
        start = int(start) - 1
    if end is not None:
        end = int(end)
    regions = [(chrom, start, end)]
    if os.path.abspath(bam_file) == os.path.abspath(outfile):
        sys.stderr.write("Bad arguments, input and output files are the same.\n")
        sys.exit(1)
    if not (os.path.exists(outfile) and os.path.getsize(outfile) > 0):
        if use_tempfile:
            #Use a temp file to avoid any possiblity of not having write permission
            out_handle = tempfile.NamedTemporaryFile(delete=False)
            wig_file = out_handle.name
        else:
            wig_file = "%s.wig" % os.path.splitext(outfile)[0]
            out_handle = open(wig_file, "w")
        with closing(out_handle):
            chr_sizes, wig_valid = write_bam_track(bam_file, regions, config, out_handle,
                                                   normalize)
        try:
            if wig_valid:
                convert_to_bigwig(wig_file, chr_sizes, config, outfile)
        finally:
            os.remove(wig_file)
Ejemplo n.º 6
0
def main(bam_file, config_file=None, chrom='all', start=0, end=None,
         outfile=None, normalize=False, use_tempfile=False):
    if config_file:
        config = load_config(config_file)
    else:
        config = {"program": {"ucsc_bigwig" : "wigToBigWig"}}
    if outfile is None:
        outfile = "%s.bigwig" % os.path.splitext(bam_file)[0]
    if start > 0:
        start = int(start) - 1
    if end is not None:
        end = int(end)
    regions = [(chrom, start, end)]
    if os.path.abspath(bam_file) == os.path.abspath(outfile):
        sys.stderr.write("Bad arguments, input and output files are the same.\n")
        sys.exit(1)
    if not (os.path.exists(outfile) and os.path.getsize(outfile) > 0):
        if use_tempfile:
            #Use a temp file to avoid any possiblity of not having write permission
            out_handle = tempfile.NamedTemporaryFile(delete=False)
            wig_file = out_handle.name
        else:
            wig_file = "%s.wig" % os.path.splitext(outfile)[0]
            out_handle = open(wig_file, "w")
        with closing(out_handle):
            chr_sizes, wig_valid = write_bam_track(bam_file, regions, config, out_handle,
                                                   normalize)
        try:
            if wig_valid:
                convert_to_bigwig(wig_file, chr_sizes, config, outfile)
        finally:
            os.remove(wig_file)
Ejemplo n.º 7
0
 def test_programs(self):
     """Identify programs and versions used in analysis.
     """
     config = load_config(
         os.path.join(self.data_dir, "automated",
                      "post_process-sample.yaml"))
     print programs.get_versions(config)
Ejemplo n.º 8
0
 def test_1_parallel_vcf_combine(self):
     """Parallel combination of VCF files, split by chromosome.
     """
     files = [
         os.path.join(self.var_dir, "S1-variants.vcf"),
         os.path.join(self.var_dir, "S2-variants.vcf")
     ]
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq",
                             "hg19.fa")
     with make_workdir() as workdir:
         config = load_config(
             get_post_process_yaml(self.automated_dir, workdir))
         config["algorithm"] = {}
     region_dir = os.path.join(self.var_dir, "S1_S2-combined-regions")
     if os.path.exists(region_dir):
         shutil.rmtree(region_dir)
     if os.path.exists(self.combo_file):
         os.remove(self.combo_file)
     with prun.start({
             "type": "local",
             "cores": 1
     }, [[config]], config) as run_parallel:
         vcfutils.parallel_combine_variants(files, self.combo_file,
                                            ref_file, config, run_parallel)
     for fname in files:
         if os.path.exists(fname + ".gz"):
             subprocess.check_call(["gunzip", fname + ".gz"])
         if os.path.exists(fname + ".gz.tbi"):
             os.remove(fname + ".gz.tbi")
Ejemplo n.º 9
0
 def test_1_parallel_vcf_combine(self):
     """Parallel combination of VCF files, split by chromosome.
     """
     var_dir = os.path.join(self.data_dir, "variants")
     files = [
         os.path.join(var_dir, "S1-variants.vcf"),
         os.path.join(var_dir, "S2-variants.vcf")
     ]
     out_file = os.path.join(var_dir, "S1_S2-combined.vcf")
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq",
                             "hg19.fa")
     config = load_config(
         os.path.join(self.data_dir, "automated",
                      "post_process-sample.yaml"))
     run_parallel = parallel_runner({
         "type": "local",
         "cores": 1
     }, {}, config)
     region_dir = os.path.join(var_dir, "S1_S2-combined-regions")
     if os.path.exists(region_dir):
         shutil.rmtree(region_dir)
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.parallel_combine_variants(files, out_file, ref_file, config,
                                        run_parallel)
Ejemplo n.º 10
0
 def test_1_parallel_vcf_combine(self):
     """Parallel combination of VCF files, split by chromosome.
     """
     from bcbio.variation import vcfutils
     files = [
         os.path.join(self.var_dir, "S1-variants.vcf"),
         os.path.join(self.var_dir, "S2-variants.vcf")
     ]
     with make_workdir() as workdir:
         config = load_config(
             get_post_process_yaml(self.automated_dir, workdir))
         config["algorithm"] = {}
     region_dir = os.path.join(self.var_dir, "S1_S2-combined-regions")
     if os.path.exists(region_dir):
         shutil.rmtree(region_dir)
     if os.path.exists(self.combo_file):
         os.remove(self.combo_file)
     reqs = {"type": "local", "cores": 1}
     with prun.start(reqs, [[config]], config) as run_parallel:
         vcfutils.parallel_combine_variants(
             files, self.combo_file, self.ref_file, config, run_parallel)
     for fname in files:
         if os.path.exists(fname + ".gz"):
             subprocess.check_call(["gunzip", fname + ".gz"])
         if os.path.exists(fname + ".gz.tbi"):
             os.remove(fname + ".gz.tbi")
Ejemplo n.º 11
0
 def test_programs(self):
     """Identify programs and versions used in analysis.
     """
     with make_workdir() as workdir:
         config = load_config(
             get_post_process_yaml(self.automated_dir, workdir))
         print programs._get_versions(config)
Ejemplo n.º 12
0
def main(config_file, month, year):
    config = load_config(config_file)
    galaxy_api = GalaxyApiAccess(config["galaxy_url"],
                                 config["galaxy_api_key"])
    smonth, syear = (month - 1, year) if month > 1 else (12, year - 1)
    start_date = datetime(syear, smonth, 15, 0, 0, 0)
    # last day calculation useful if definition of month is
    # from first to last day instead of 15th-15th
    #(_, last_day) = calendar.monthrange(year, month)
    end_date = datetime(year, month, 14, 23, 59, 59)
    out_file = "%s_%s" % (start_date.strftime("%b"),
                          end_date.strftime("%b-%Y-sequencing.csv"))
    with open(out_file, "w") as out_handle:
        writer = csv.writer(out_handle)
        writer.writerow([
            "Date", "Product", "Payment", "Researcher", "Lab", "Email",
            "Project", "Sample", "Description", "Genome", "Flowcell", "Lane",
            "Received", "Notes"
        ])
        for s in galaxy_api.sqn_report(start_date.isoformat(),
                                       end_date.isoformat()):
            f_parts = s["sqn_run"]["run_folder"].split("_")
            flowcell = "_".join([f_parts[0], f_parts[-1]])
            writer.writerow([
                s["sqn_run"]["date"], s["sqn_type"],
                s["project"]["payment_(fund_number)"],
                s["project"]["researcher"], s["project"]["lab_association"],
                s["project"]["email"], s["project"]["project_name"], s["name"],
                s["description"], s["genome_build"], flowcell,
                s["sqn_run"]["lane"],
                _received_date(s["events"]), s["sqn_run"]["results_notes"]
            ])
Ejemplo n.º 13
0
def main(config_file, fc_dir=None, run_info_yaml=None, numcores=None,
         paralleltype=None, queue=None, scheduler=None, upgrade=None,
         profile=None, workflow=None, inputs=None):
    work_dir = os.getcwd()
    config = load_config(config_file)
    if config.get("log_dir", None) is None:
        config["log_dir"] = os.path.join(work_dir, "log")
    paralleltype, numcores = _get_cores_and_type(config, fc_dir, run_info_yaml,
                                                 numcores, paralleltype)
    parallel = {"type": paralleltype, "cores": numcores,
                "scheduler": scheduler, "queue": queue,
                "profile": profile, "module": "bcbio.distributed"}
    if parallel["type"] in ["local", "messaging-main"]:
        if numcores is None:
            config["algorithm"]["num_cores"] = numcores
        run_main(config, config_file, work_dir, parallel,
                 fc_dir, run_info_yaml)
    elif parallel["type"] == "messaging":
        parallel["task_module"] = "bcbio.distributed.tasks"
        args = [config_file, fc_dir]
        if run_info_yaml:
            args.append(run_info_yaml)
        messaging.run_and_monitor(config, config_file, args, parallel)
    elif parallel["type"] == "ipython":
        assert parallel["queue"] is not None, "Ipython parallel requires a specified queue (-q)"
        run_main(config, config_file, work_dir, parallel,
                 fc_dir, run_info_yaml)
    else:
        raise ValueError("Unexpected type of parallel run: %s" % parallel["type"])
Ejemplo n.º 14
0
 def test_1_parallel_vcf_combine(self, global_config):
     """Parallel combination of VCF files, split by chromosome.
     """
     from bcbio.variation import vcfutils
     files = [
         os.path.join(self.var_dir, "S1-variants.vcf"),
         os.path.join(self.var_dir, "S2-variants.vcf")
     ]
     config = load_config(global_config)
     config["algorithm"] = {}
     region_dir = os.path.join(self.var_dir, "S1_S2-combined-regions")
     if os.path.exists(region_dir):
         shutil.rmtree(region_dir)
     if os.path.exists(self.combo_file):
         os.remove(self.combo_file)
     reqs = {"type": "local", "cores": 1}
     with prun.start(reqs, [[config]], config) as run_parallel:
         vcfutils.parallel_combine_variants(files, self.combo_file,
                                            self.ref_file, config,
                                            run_parallel)
     for fname in files:
         if os.path.exists(fname + ".gz"):
             subprocess.check_call(["gunzip", fname + ".gz"])
         if os.path.exists(fname + ".gz.tbi"):
             os.remove(fname + ".gz.tbi")
Ejemplo n.º 15
0
 def test_programs(self, data_dir):
     """Identify programs and versions used in analysis.
     """
     from bcbio.provenance import programs
     with make_workdir() as workdir:
         config = load_config(get_post_process_yaml(data_dir, workdir))
         print programs._get_versions(config)
Ejemplo n.º 16
0
def main(config_file, fc_dir=None, run_info_yaml=None, numcores=None,
         paralleltype=None, profile="default"):
    work_dir = os.getcwd()
    config = load_config(config_file)
    if config.get("log_dir", None) is None:
        config["log_dir"] = os.path.join(work_dir, "log")
    paralleltype, numcores = _get_cores_and_type(config, fc_dir, run_info_yaml,
                                                 numcores, paralleltype)
    parallel = {"type": paralleltype, "cores": numcores,
                "profile": profile,
                "module": "bcbio.distributed"}
    if parallel["type"] in ["local", "messaging-main"]:
        if numcores is None:
            config["algorithm"]["num_cores"] = numcores
        run_main(config, config_file, work_dir, parallel,
                 fc_dir, run_info_yaml)
    elif parallel["type"] == "messaging":
        parallel["task_module"] = "bcbio.distributed.tasks"
        args = [config_file, fc_dir]
        if run_info_yaml:
            args.append(run_info_yaml)
        messaging.run_and_monitor(config, config_file, args, parallel) 
    elif parallel["type"] == "ipython":
        run_main(config, config_file, work_dir, parallel,
                 fc_dir, run_info_yaml)
    else:
        raise ValueError("Unexpected type of parallel run: %s" % parallel["type"])
Ejemplo n.º 17
0
def file_logger(namespace, config_file , log_file, log_path_key = None):
    CONFIG = cl.load_config(config_file)
    if not log_path_key:
        log_path = CONFIG['log_dir'] + '/' + log_file
    else:
        log_path = CONFIG[log_path_key] + '/' + log_file

    logger = logging.getLogger(namespace)
    logger.setLevel(logging.DEBUG)

    # file handler:
    fh = logging.FileHandler(log_path)
    fh.setLevel(logging.INFO)

    # console handler
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)

    # formatter
    formatter = logging.Formatter("%(asctime)s (%(levelname)s) : %(message)s")
    fh.setFormatter(formatter)

    # add handlers to logger
    logger.addHandler(ch)
    logger.addHandler(fh)

    return logger
Ejemplo n.º 18
0
def main(config_file, fc_dir, analysis_dir, run_info_yaml=None):
    config = load_config(config_file)
    galaxy_api = (GalaxyApiAccess(config['galaxy_url'],
                                  config['galaxy_api_key'])
                  if config.has_key("galaxy_api_key") else None)
    fc_name, fc_date, run_info = get_run_info(fc_dir, config, run_info_yaml)

    base_folder_name = "%s_%s" % (fc_date, fc_name)
    run_details = lims_run_details(run_info, base_folder_name)
    for (library_name, access_role, dbkey, lane, bc_id, name, desc, local_name,
         fname_out) in run_details:
        library_id = (get_galaxy_library(library_name, galaxy_api)
                      if library_name else None)
        upload_files = list(
            select_upload_files(local_name, bc_id, fc_dir, analysis_dir,
                                config, fname_out))
        if len(upload_files) > 0:
            print lane, bc_id, name, desc, library_name
            print "Creating storage directory"
            if library_id:
                folder, cur_galaxy_files = get_galaxy_folder(
                    library_id, base_folder_name, name, desc, galaxy_api)
            else:
                cur_galaxy_files = []
            store_dir = move_to_storage(lane, bc_id, base_folder_name,
                                        upload_files, cur_galaxy_files, config,
                                        config_file, fname_out)
            if store_dir and library_id:
                print "Uploading directory of files to Galaxy"
                print galaxy_api.upload_directory(library_id, folder['id'],
                                                  store_dir, dbkey,
                                                  access_role)
    if galaxy_api and not run_info_yaml:
        add_run_summary_metrics(analysis_dir, galaxy_api)
Ejemplo n.º 19
0
 def test_programs(self, data_dir):
     """Identify programs and versions used in analysis.
     """
     from bcbio.provenance import programs
     with make_workdir() as workdir:
         config = load_config(get_post_process_yaml(data_dir, workdir))
         print programs._get_versions(config)
Ejemplo n.º 20
0
def coverage(align_bam):                     
    config_file = '/home/kwoklab-user/nextgen-python2.7/bcbio-nextgen/bcbio_system.yaml'
    ref_file = '/media/KwokRaid01/biodata/genomes/Hsapiens/GRCh37/seq/GRCh37.fa'
    bed_file = '/media/KwokRaid02/nina/ISMB2013/bed_files/capture_regions/130214_HG19_Cardiac_RD_EZ.GRCh37.target.bed' 
    config = load_config(config_file) 
    broad_runner = broad.runner_from_config(config)#    broad_runner.run_fn("picard_index_ref", ref_file)
    broad_runner.run_fn("picard_index", align_bam)
    broad_runner = broad.runner_from_config(config)
    config = load_config(config_file) 
    base, _ = os.path.splitext(os.path.basename(align_bam))
    work_dir =  os.path.dirname(align_bam)
    out_file = os.path.join( work_dir , base )
    params = [ "-R", ref_file]
#    with file_transaction(out_file) as tx_out_file:
    params += ["-T", "DepthOfCoverage", "-o", out_file, "-I", align_bam, "-L", bed_file]
    broad_runner.run_gatk(params)
    return out_file
Ejemplo n.º 21
0
def main(local_config, post_config_file=None,
         process_msg=True, store_msg=True, qseq=True, fastq=True):
    config = load_config(local_config)
    log_handler = create_log_handler(config)

    with log_handler.applicationbound():
        search_for_new(config, local_config, post_config_file,
                       process_msg, store_msg, qseq, fastq)
Ejemplo n.º 22
0
def load_couch_server(config_file):
    """loads couch server with settings specified in 'config_file'"""
    try:
        db_conf = cl.load_config(config_file)["statusdb"]
        url = db_conf["username"] + ":" + db_conf["password"] + "@" + db_conf["url"] + ":" + str(db_conf["port"])
        couch = couchdb.Server("http://" + url)
        return couch
    except KeyError:
        raise RuntimeError('"statusdb" section missing from configuration file.')
Ejemplo n.º 23
0
def load_couch_server(config_file):
    """loads couch server with settings specified in 'config_file'"""
    try:
        db_conf = cl.load_config(config_file)['statusdb']
        url = db_conf['username']+':'+db_conf['password']+'@'+db_conf['url']+':'+str(db_conf['port'])
        couch = couchdb.Server("http://" + url)
        return couch
    except KeyError:
        raise RuntimeError("\"statusdb\" section missing from configuration file.")
Ejemplo n.º 24
0
def load_couch_server(config_file):
    """loads couch server with settings specified in 'config_file'"""
    try:
        db_conf = cl.load_config(config_file)['statusdb']
        url = db_conf['username'] + ':' + db_conf['password'] + '@' + db_conf[
            'url'] + ':' + str(db_conf['port'])
        couch = couchdb.Server("http://" + url)
        return couch
    except KeyError:
        raise RuntimeError(
            "\"statusdb\" section missing from configuration file.")
Ejemplo n.º 25
0
 def test_2_vcf_exclusion(self):
     """Exclude samples from VCF files.
     """
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa")
     config = load_config(os.path.join(self.data_dir, "automated",
                                       "post_process-sample.yaml"))
     out_file = utils.append_stem(self.combo_file, "-exclude")
     to_exclude = ["S1"]
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.exclude_samples(self.combo_file, out_file, to_exclude, ref_file, config)
Ejemplo n.º 26
0
def main(local_config,
         post_config_file=None,
         process_msg=True,
         store_msg=True,
         qseq=True,
         fastq=True):
    config = load_config(local_config)
    log_handler = create_log_handler(config)

    with log_handler.applicationbound():
        search_for_new(config, local_config, post_config_file, process_msg,
                       store_msg, qseq, fastq)
Ejemplo n.º 27
0
 def test_2_vcf_exclusion(self, global_config):
     """Exclude samples from VCF files.
     """
     from bcbio.variation import vcfutils
     config = load_config(global_config)
     config["algorithm"] = {}
     out_file = utils.append_stem(self.combo_file, "-exclude")
     to_exclude = ["S1"]
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.exclude_samples(self.combo_file, out_file, to_exclude,
                              self.ref_file, config)
Ejemplo n.º 28
0
 def test_2_vcf_exclusion(self):
     """Exclude samples from VCF files.
     """
     fname = os.path.join(self.data_dir, "variants", "S1_S2-combined.vcf")
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa")
     config = load_config(os.path.join(self.data_dir, "automated",
                                       "post_process-sample.yaml"))
     out_file = "%s-exclude%s" % os.path.splitext(fname)
     to_exclude = ["S1"]
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.exclude_samples(fname, out_file, to_exclude, ref_file, config)
Ejemplo n.º 29
0
 def test_2_vcf_exclusion(self):
     """Exclude samples from VCF files.
     """
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa")
     with make_workdir() as workdir:
         config = load_config(get_post_process_yaml(self.automated_dir, workdir))
         config["algorithm"] = {}
     out_file = utils.append_stem(self.combo_file, "-exclude")
     to_exclude = ["S1"]
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.exclude_samples(self.combo_file, out_file, to_exclude, ref_file, config)
Ejemplo n.º 30
0
 def test_2_vcf_exclusion(self):
     """Exclude samples from VCF files.
     """
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa")
     with make_workdir() as workdir:
         config = load_config(get_post_process_yaml(self.data_dir, workdir))
         config["algorithm"] = {}
     out_file = utils.append_stem(self.combo_file, "-exclude")
     to_exclude = ["S1"]
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.exclude_samples(self.combo_file, out_file, to_exclude, ref_file, config)
Ejemplo n.º 31
0
 def test_2_vcf_exclusion(self):
     """Exclude samples from VCF files.
     """
     with make_workdir() as workdir:
         config = load_config(
             get_post_process_yaml(self.automated_dir, workdir))
         config["algorithm"] = {}
     out_file = utils.append_stem(self.combo_file, "-exclude")
     to_exclude = ["S1"]
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.exclude_samples(self.combo_file, out_file, to_exclude,
                              self.ref_file, config)
Ejemplo n.º 32
0
def analyze_locally(dname, post_config_file, fastq_dir):
    """Run analysis directly on the local machine.
    """
    assert fastq_dir is not None
    post_config = load_config(post_config_file)
    analysis_dir = os.path.join(fastq_dir, os.pardir, "analysis")
    utils.safe_makedir(analysis_dir)
    with utils.chdir(analysis_dir):
        prog = "bcbio_nextgen.py"
        cl = [prog, post_config_file, dname]
        run_yaml = os.path.join(dname, "run_info.yaml")
        if os.path.exists(run_yaml):
            cl.append(run_yaml)
        subprocess.check_call(cl)
Ejemplo n.º 33
0
 def test_2_vcf_exclusion(self):
     """Exclude samples from VCF files.
     """
     from bcbio.variation import vcfutils
     with make_workdir() as workdir:
         config = load_config(
             get_post_process_yaml(self.automated_dir, workdir))
         config["algorithm"] = {}
     out_file = utils.append_stem(self.combo_file, "-exclude")
     to_exclude = ["S1"]
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.exclude_samples(
         self.combo_file, out_file, to_exclude, self.ref_file, config)
Ejemplo n.º 34
0
 def test_3_vcf_split_combine(self, global_config):
     """Split a VCF file into SNPs and indels, then combine back together.
     """
     from bcbio.variation import vcfutils
     config = load_config(global_config)
     config["algorithm"] = {}
     fname = os.path.join(self.var_dir, "S1-variants.vcf")
     snp_file, indel_file = vcfutils.split_snps_indels(
         fname, self.ref_file, config)
     merge_file = "%s-merge%s.gz" % os.path.splitext(fname)
     vcfutils.combine_variant_files([snp_file, indel_file], merge_file,
                                    self.ref_file, config)
     for f in [snp_file, indel_file, merge_file]:
         self._remove_vcf(f)
Ejemplo n.º 35
0
 def test_2_vcf_exclusion(self):
     """Exclude samples from VCF files.
     """
     # Be back compatible with 0.7.6 -- remove after 0.7.7 release
     if prun is None:
         return
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa")
     config = load_config(os.path.join(self.data_dir, "automated",
                                       "post_process-sample.yaml"))
     out_file = utils.append_stem(self.combo_file, "-exclude")
     to_exclude = ["S1"]
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.exclude_samples(self.combo_file, out_file, to_exclude, ref_file, config)
Ejemplo n.º 36
0
 def test_3_vcf_split_combine(self):
     """Split a VCF file into SNPs and indels, then combine back together.
     """
     with make_workdir() as workdir:
         config = load_config(get_post_process_yaml(self.automated_dir, workdir))
         config["algorithm"] = {}
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa")
     fname = os.path.join(self.var_dir, "S1-variants.vcf")
     snp_file, indel_file = vcfutils.split_snps_indels(fname, ref_file, config)
     merge_file = "%s-merge%s.gz" % os.path.splitext(fname)
     vcfutils.combine_variant_files([snp_file, indel_file], merge_file, ref_file,
                                    config)
     for f in [snp_file, indel_file, merge_file]:
         self._remove_vcf(f)
Ejemplo n.º 37
0
def analyze_locally(dname, post_config_file, fastq_dir):
    """Run analysis directly on the local machine.
    """
    assert fastq_dir is not None
    post_config = load_config(post_config_file)
    analysis_dir = os.path.join(fastq_dir, os.pardir, "analysis")
    utils.safe_makedir(analysis_dir)
    with utils.chdir(analysis_dir):
        prog = "bcbio_nextgen.py"
        cl = [prog, post_config_file, dname]
        run_yaml = os.path.join(dname, "run_info.yaml")
        if os.path.exists(run_yaml):
            cl.append(run_yaml)
        subprocess.check_call(cl)
Ejemplo n.º 38
0
 def test_3_vcf_split_combine(self):
     """Split a VCF file into SNPs and indels, then combine back together.
     """
     with make_workdir() as workdir:
         config = load_config(get_post_process_yaml(self.data_dir, workdir))
         config["algorithm"] = {}
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa")
     fname = os.path.join(self.var_dir, "S1-variants.vcf")
     snp_file, indel_file = vcfutils.split_snps_indels(fname, ref_file, config)
     merge_file = "%s-merge%s.gz" % os.path.splitext(fname)
     vcfutils.combine_variant_files([snp_file, indel_file], merge_file, ref_file,
                                    config)
     for f in [snp_file, indel_file, merge_file]:
         self._remove_vcf(f)
Ejemplo n.º 39
0
 def test_2_vcf_exclusion(self):
     """Exclude samples from VCF files.
     """
     # Be back compatible with 0.7.6 -- remove after 0.7.7 release
     if prun is None:
         return
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa")
     with make_workdir() as workdir:
         config = load_config(get_post_process_yaml(self.data_dir, workdir))
         config["algorithm"] = {}
     out_file = utils.append_stem(self.combo_file, "-exclude")
     to_exclude = ["S1"]
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.exclude_samples(self.combo_file, out_file, to_exclude, ref_file, config)
Ejemplo n.º 40
0
 def test_3_vcf_split_combine(self):
     """Split a VCF file into SNPs and indels, then combine back together.
     """
     from bcbio.variation import vcfutils
     with make_workdir() as workdir:
         config = load_config(
             get_post_process_yaml(self.automated_dir, workdir))
         config["algorithm"] = {}
     fname = os.path.join(self.var_dir, "S1-variants.vcf")
     snp_file, indel_file = vcfutils.split_snps_indels(
         fname, self.ref_file, config)
     merge_file = "%s-merge%s.gz" % os.path.splitext(fname)
     vcfutils.combine_variant_files([snp_file, indel_file], merge_file,
                                    self.ref_file, config)
     for f in [snp_file, indel_file, merge_file]:
         self._remove_vcf(f)
Ejemplo n.º 41
0
 def test_1_parallel_vcf_combine(self):
     """Parallel combination of VCF files, split by chromosome.
     """
     var_dir = os.path.join(self.data_dir, "variants")
     files = [os.path.join(var_dir, "S1-variants.vcf"), os.path.join(var_dir, "S2-variants.vcf")]
     out_file = os.path.join(var_dir, "S1_S2-combined.vcf")
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa")
     config = load_config(os.path.join(self.data_dir, "automated",
                                       "post_process-sample.yaml"))
     run_parallel = parallel_runner({"type": "local", "cores": 1}, {}, config)
     region_dir = os.path.join(var_dir, "S1_S2-combined-regions")
     if os.path.exists(region_dir):
         shutil.rmtree(region_dir)
     if os.path.exists(out_file):
         os.remove(out_file)
     vcfutils.parallel_combine_variants(files, out_file, ref_file, config, run_parallel)
Ejemplo n.º 42
0
def main(config_file, queues=None, task_module=None, base_dir=None):
    if base_dir is None:
        base_dir = os.getcwd()
    if task_module is None:
        task_module = "bcbio.distributed.tasks"
    config = load_config(config_file)
    if config.get("log_dir", None) is None:
        config["log_dir"] = os.path.join(base_dir, "log")
    signals.setup_logging.connect(celery_logger(config))
    setup_logging(config)
    logger.info("Starting distributed worker process: {0}".format(queues if queues else ""))
    with utils.chdir(base_dir):
        with utils.curdir_tmpdir() as work_dir:
            dirs = {"work": work_dir, "config": os.path.dirname(config_file)}
            with create_celeryconfig(task_module, dirs, config,
                                     os.path.abspath(config_file)):
                run_celeryd(work_dir, queues)
Ejemplo n.º 43
0
def main(config_file,
         fc_dir=None,
         run_info_yaml=None,
         numcores=None,
         paralleltype=None,
         queue=None,
         scheduler=None,
         upgrade=None,
         profile=None,
         workflow=None,
         inputs=None):
    work_dir = os.getcwd()
    config = load_config(config_file)
    if config.get("log_dir", None) is None:
        config["log_dir"] = os.path.join(work_dir, "log")
    paralleltype, numcores = _get_cores_and_type(config, fc_dir, run_info_yaml,
                                                 numcores, paralleltype)
    parallel = {
        "type": paralleltype,
        "cores": numcores,
        "scheduler": scheduler,
        "queue": queue,
        "profile": profile,
        "module": "bcbio.distributed"
    }
    if parallel["type"] in ["local", "messaging-main"]:
        if numcores is None:
            config["algorithm"]["num_cores"] = numcores
        run_main(config, config_file, work_dir, parallel, fc_dir,
                 run_info_yaml)
    elif parallel["type"] == "messaging":
        parallel["task_module"] = "bcbio.distributed.tasks"
        args = [config_file, fc_dir]
        if run_info_yaml:
            args.append(run_info_yaml)
        messaging.run_and_monitor(config, config_file, args, parallel)
    elif parallel["type"] == "ipython":
        assert parallel[
            "queue"] is not None, "IPython parallel requires a specified queue (-q)"
        assert parallel[
            "scheduler"] is not None, "IPython parallel requires a specified scheduler (-s)"
        run_main(config, config_file, work_dir, parallel, fc_dir,
                 run_info_yaml)
    else:
        raise ValueError("Unexpected type of parallel run: %s" %
                         parallel["type"])
Ejemplo n.º 44
0
def main(config_file, fc_dir):
    work_dir = os.getcwd()
    config = load_config(config_file)
    galaxy_api = GalaxyApiAccess(config["galaxy_url"], config["galaxy_api_key"])
    fc_name, fc_date = flowcell.parse_dirname(fc_dir)
    run_info = galaxy_api.run_details(fc_name)
    fastq_dir = flowcell.get_fastq_dir(fc_dir)
    if config["algorithm"]["num_cores"] > 1:
        pool = Pool(config["algorithm"]["num_cores"])
        try:
            pool.map(
                _process_wrapper, ((i, fastq_dir, fc_name, fc_date, config, config_file) for i in run_info["details"])
            )
        except:
            pool.terminate()
            raise
    else:
        map(_process_wrapper, ((i, fastq_dir, fc_name, fc_date, config, config_file) for i in run_info["details"]))
Ejemplo n.º 45
0
def main(config_file, queues=None, task_module=None, base_dir=None):
    if base_dir is None:
        base_dir = os.getcwd()
    if task_module is None:
        task_module = "bcbio.distributed.tasks"
    config = load_config(config_file)
    if config.get("log_dir", None) is None:
        config["log_dir"] = os.path.join(base_dir, "log")
    signals.setup_logging.connect(celery_logger(config))
    setup_logging(config)
    logger.info("Starting distributed worker process: {0}".format(
        queues if queues else ""))
    with utils.chdir(base_dir):
        with utils.curdir_tmpdir() as work_dir:
            dirs = {"work": work_dir, "config": os.path.dirname(config_file)}
            with create_celeryconfig(task_module, dirs, config,
                                     os.path.abspath(config_file)):
                run_celeryd(work_dir, queues)
Ejemplo n.º 46
0
 def test_1_parallel_vcf_combine(self):
     """Parallel combination of VCF files, split by chromosome.
     """
     files = [os.path.join(self.var_dir, "S1-variants.vcf"), os.path.join(self.var_dir, "S2-variants.vcf")]
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa")
     config = load_config(os.path.join(self.data_dir, "automated",
                                       "post_process-sample.yaml"))
     run_parallel = parallel_runner({"type": "local", "cores": 1}, {}, config)
     region_dir = os.path.join(self.var_dir, "S1_S2-combined-regions")
     if os.path.exists(region_dir):
         shutil.rmtree(region_dir)
     if os.path.exists(self.combo_file):
         os.remove(self.combo_file)
     vcfutils.parallel_combine_variants(files, self.combo_file, ref_file, config, run_parallel)
     for fname in files:
         if os.path.exists(fname + ".gz"):
             subprocess.check_call(["gunzip", fname + ".gz"])
         if os.path.exists(fname + ".gz.tbi"):
             os.remove(fname + ".gz.tbi")
Ejemplo n.º 47
0
def main(config_file, fc_dir):
    work_dir = os.getcwd()
    config = load_config(config_file)
    galaxy_api = GalaxyApiAccess(config['galaxy_url'], config['galaxy_api_key'])
    fc_name, fc_date = get_flowcell_info(fc_dir)
    run_info = galaxy_api.run_details(fc_name)
    fastq_dir = get_fastq_dir(fc_dir)
    if config["algorithm"]["num_cores"] > 1:
        pool = Pool(config["algorithm"]["num_cores"])
        try:
            pool.map(_process_wrapper,
                    ((i, fastq_dir, fc_name, fc_date, config, config_file)
                        for i in run_info["details"]))
        except:
            pool.terminate()
            raise
    else:
        map(_process_wrapper,
            ((i, fastq_dir, fc_name, fc_date, config, config_file)
                for i in run_info["details"]))
Ejemplo n.º 48
0
def count(bam_file):
    bed_file = '/media/KwokRaid02/nina/ISMB2013/bed_files/auto_exon.bed' 
    ref_file = '/media/KwokRaid01/biodata/genomes/Hsapiens/GRCh37/seq/GRCh37.fa'
    config_file = '/home/kwoklab-user/nextgen-python2.7/bcbio-nextgen/bcbio_system.yaml'
    gene_file = '/home/kwoklab-user/nina/ISMB2013/bed_files/geneTrack.ensembl_mergedSorted_noChrMnumericCHR.txt'
    work_dir = '/media/KwokData02/ISMB2013-analysis'
    config = load_config(config_file)
    broad_runner = broad.runner_from_config(config)
    base, _ = os.path.splitext(os.path.basename(bam_file))

#    gtf_file = '/media/KwokRaid01/biodata/genomes/Hsapiens/GRCh37/ref-transcripts.gtf'    
#    out_file = os.path.join( work_dir , base + ".counts")
#    htseq = 'htseq-count'
#    in_file = sam_to_querysort_sam(bam_file, config)    
#    with file_transaction(out_file) as tmp_out_file:
#        htseq_cmd = ("{htseq} --mode=union --stranded=no --type=exon --idattr=gene_id {in_file}  {gtf_file} > {tmp_out_file}")
#        cmd = htseq_cmd.format(**locals())
#        print cmd
#        subprocess.check_call(cmd, shell=True)
		
    depth_of_coverage_file = os.path.join( work_dir , base + '.doc' )
    params = [ "-R", ref_file]
    params += ["-T", "DepthOfCoverage", "-o", depth_of_coverage_file, "-I", bam_file, "-L", bed_file, "-geneList",gene_file]
    broad_runner.run_gatk(params)
    #     diagnose_file = os.path.join( work_dir , base + '.DiagnoseTargets.vcf')
#     params = [ "-R", ref_file]
#     params += ["-T", "DiagnoseTargets", "-o", diagnose_file, "-I", bam_file, "-L", bed_file]
#     broad_runner.run_gatk(params)
#     
    
    callable_file = os.path.join( work_dir , base + '.callable.bed')
    params = [ "-R", ref_file]
    params += ["-T", "CallableLoci", "-o", callable_file, "-I", bam_file, "-L", bed_file]
    broad_runner.run_gatk(params)
    
    GCcontent_file = os.path.join( work_dir , base + '.GCcontent.bed')
    params = [ "-R", ref_file]
    params += ["-T", "GCContentByInterval", "-o", GCcontent_file, "-L", bed_file]
    broad_runner.run_gatk(params)
    return True
Ejemplo n.º 49
0
 def test_1_parallel_vcf_combine(self):
     """Parallel combination of VCF files, split by chromosome.
     """
     # Be back compatible with 0.7.6 -- remove after 0.7.7 release
     if prun is None:
         return
     files = [os.path.join(self.var_dir, "S1-variants.vcf"), os.path.join(self.var_dir, "S2-variants.vcf")]
     ref_file = os.path.join(self.data_dir, "genomes", "hg19", "seq", "hg19.fa")
     config = load_config(os.path.join(self.data_dir, "automated",
                                       "post_process-sample.yaml"))
     region_dir = os.path.join(self.var_dir, "S1_S2-combined-regions")
     if os.path.exists(region_dir):
         shutil.rmtree(region_dir)
     if os.path.exists(self.combo_file):
         os.remove(self.combo_file)
     with prun.start({"type": "local", "cores": 1}, [[config]], config) as run_parallel:
         vcfutils.parallel_combine_variants(files, self.combo_file, ref_file, config, run_parallel)
     for fname in files:
         if os.path.exists(fname + ".gz"):
             subprocess.check_call(["gunzip", fname + ".gz"])
         if os.path.exists(fname + ".gz.tbi"):
             os.remove(fname + ".gz.tbi")
Ejemplo n.º 50
0
def main(config_file, month, year):
    config = load_config(config_file)
    galaxy_api = GalaxyApiAccess(config["galaxy_url"],
        config["galaxy_api_key"])
    smonth, syear = (month - 1, year) if month > 1 else (12, year - 1)
    start_date = datetime(syear, smonth, 15, 0, 0, 0)
    # last day calculation useful if definition of month is
    # from first to last day instead of 15th-15th
    #(_, last_day) = calendar.monthrange(year, month)
    end_date = datetime(year, month, 14, 23, 59, 59)
    out_file = "%s_%s" % (start_date.strftime("%b"),
            end_date.strftime("%b-%Y-sequencing.csv"))
    with open(out_file, "w") as out_handle:
        writer = csv.writer(out_handle)
        writer.writerow([
            "Date", "Product", "Payment", "Researcher", "Lab", "Email",
            "Project", "Sample", "Description", "Genome", "Flowcell",
            "Lane", "Received", "Notes"])
        for s in galaxy_api.sqn_report(start_date.isoformat(),
                end_date.isoformat()):
            f_parts = s["sqn_run"]["run_folder"].split("_")
            flowcell = "_".join([f_parts[0], f_parts[-1]])
            writer.writerow([
                s["sqn_run"]["date"],
                s["sqn_type"],
                s["project"]["payment_(fund_number)"],
                s["project"]["researcher"],
                s["project"]["lab_association"],
                s["project"]["email"],
                s["project"]["project_name"],
                s["name"],
                s["description"],
                s["genome_build"],
                flowcell,
                s["sqn_run"]["lane"],
                _received_date(s["events"]),
                s["sqn_run"]["results_notes"]])
\FloatBarrier
"""

_base_template = r"""
\documentclass{article}
\usepackage{fullpage}
\usepackage{graphicx}
\usepackage{placeins}

\begin{document}
% for part in parts:
    ${part}
% endfor
\end{document}
"""

if __name__ == "__main__":
    # Handle arguments
    parser = argparse.ArgumentParser(description="Generate the summary pdf (as in earlier versions of the pipeline.)")
    parser.add_argument("bam_file", type=str, help="The analysis ready bam file.")
    parser.add_argument("fasta_ref", type=str, help="The reference fasta file.")
    parser.add_argument("bait_file", type=str, help="The bed file detailing the bait/target region.")
    parser.add_argument("sample_name", type=str, help="The name of the sample to appear in the summary.")
    parser.add_argument("config_file", type=str, help="The system configuration file used in the bcbio pipeline.")
    parser.add_argument("output_dir", type=str, help="The directory where the output will be written.")
    args = parser.parse_args()

    config = config_utils.load_config(args.config_file)
    variant_align_summary(args.bam_file, args.fasta_ref, args.bait_file, args.sample_name, config, args.output_dir)
Ejemplo n.º 52
0
 def test_programs(self, global_config):
     """Identify programs and versions used in analysis.
     """
     from bcbio.provenance import programs
     print(programs._get_versions(load_config(global_config)))
Ejemplo n.º 53
0
 def test_programs(self):
     """Identify programs and versions used in analysis.
     """
     with make_workdir() as workdir:
         config = load_config(get_post_process_yaml(self.data_dir, workdir))
         print programs._get_versions(config)
Ejemplo n.º 54
0
 def test_programs(self):
     """Identify programs and versions used in analysis.
     """
     config = load_config(os.path.join(self.data_dir, "automated",
                                       "post_process-sample.yaml"))
     print programs.get_versions(config)