Exemple #1
0
def search_for_new(config, amqp_config, process_msg, store_msg, qseq, fastq):
    """Search for any new directories that have not been reported.
    """
    reported = _read_reported(config["msg_db"])
    for dname in _get_directories(config):
        if os.path.isdir(dname) and dname not in reported:
            if _is_finished_dumping(dname):
                log.info(
                    "The instrument has finished dumping on directory %s" %
                    dname)
                _update_reported(config["msg_db"], dname)

                ss_file = samplesheet.run_has_samplesheet(dname, config)
                if ss_file:
                    out_file = os.path.join(dname, "run_info.yaml")
                    log.info("CSV Samplesheet %s found, converting to %s" %
                             (ss_file, out_file))
                    samplesheet.csv2yaml(ss_file, out_file)
                if qseq:
                    log.info("Generating qseq files for %s" % dname)
                    _generate_qseq(get_qseq_dir(dname), config)
                if fastq:
                    log.info("Generating fastq files for %s" % dname)
                    _generate_fastq(dname, config)

                store_files, process_files = _files_to_copy(dname)

                if process_msg:
                    finished_message(config["msg_process_tag"], dname,
                                     process_files, amqp_config)
                if store_msg:
                    finished_message(config["msg_store_tag"], dname,
                                     store_files, amqp_config)
Exemple #2
0
def search_for_new(config, amqp_config, process_msg, store_msg, qseq, fastq):
    """Search for any new directories that have not been reported.
    """
    reported = _read_reported(config["msg_db"])
    for dname in _get_directories(config):
        if os.path.isdir(dname) and dname not in reported:
            if _is_finished_dumping(dname):
                log.info("The instrument has finished dumping on directory %s" % dname)
                _update_reported(config["msg_db"], dname)

                ss_file = samplesheet.run_has_samplesheet(dname, config)
                if ss_file:
                    out_file = os.path.join(dname, "run_info.yaml")
                    log.info("CSV Samplesheet %s found, converting to %s" %
                             (ss_file, out_file))
                    samplesheet.csv2yaml(ss_file, out_file)
                if qseq:
                    log.info("Generating qseq files for %s" % dname)
                    _generate_qseq(get_qseq_dir(dname), config)
                if fastq:
                    log.info("Generating fastq files for %s" % dname)
                    _generate_fastq(dname, config)

                store_files, process_files = _files_to_copy(dname)

                if process_msg:
                    finished_message(config["msg_process_tag"], dname,
                                     process_files, amqp_config)
                if store_msg:
                    finished_message(config["msg_store_tag"], dname,
                                     store_files, amqp_config)
def _process_samplesheets(dname, config):
    """Process Illumina samplesheets into YAML files for post-processing.
    """
    ss_file = samplesheet.run_has_samplesheet(dname, config)
    if ss_file:
        out_file = os.path.join(dname, "run_info.yaml")
        log.info("CSV Samplesheet %s found, converting to %s" % (ss_file, out_file))
        samplesheet.csv2yaml(ss_file, out_file)
Exemple #4
0
def _process_samplesheets(dname, config):
    """Process Illumina samplesheets into YAML files for post-processing.
    """
    ss_file = samplesheet.run_has_samplesheet(dname, config)
    if ss_file:
        out_file = os.path.join(dname, "run_info.yaml")
        logger2.info("CSV Samplesheet %s found, converting to %s" %
                     (ss_file, out_file))
        samplesheet.csv2yaml(ss_file, out_file)
 def toyaml(self, ssheet):
     self.out_file = samplesheet.csv2yaml(ssheet)
     assert os.path.exists(self.out_file)
     with open(self.out_file) as in_handle:
         info = yaml.load(in_handle)
     in_handle.close()
     return info
 def toyaml(self, ssheet):
     self.out_file = samplesheet.csv2yaml(ssheet)
     assert os.path.exists(self.out_file)
     with open(self.out_file) as in_handle:
         info = yaml.load(in_handle)
     in_handle.close()
     return info
 def test_toyaml(self):
     """Convert CSV Illumina SampleSheet to YAML.
     """
     out_file = samplesheet.csv2yaml(self.ss_file)
     assert os.path.exists(out_file)
     with open(out_file) as in_handle:
         info = yaml.load(in_handle)
     assert info[0]['lane'] == '1'
     assert info[0]['multiplex'][0]['barcode_id'] == 5
     os.remove(out_file)
Exemple #8
0
 def test_toyaml(self):
     """Convert CSV Illumina SampleSheet to YAML.
     """
     out_file = samplesheet.csv2yaml(self.ss_file)
     assert os.path.exists(out_file)
     with open(out_file) as in_handle:
         info = yaml.load(in_handle)
     assert info[0]['lane'] == '1'
     assert info[0]['multiplex'][0]['barcode_id'] == 5
     os.remove(out_file)
Exemple #9
0
#!/usr/bin/env python
"""Convert Illumina SampleSheet CSV files to the run_info.yaml input file.

This allows running the analysis pipeline without Galaxy, using CSV input
files from Illumina SampleSheet or Genesifter.

Usage:
  convert_samplesheet_config.py <input csv>
"""
import sys

from bcbio.solexa import samplesheet

if __name__ == "__main__":
    samplesheet.csv2yaml(sys.argv[1])
def main(run_id, config_file, run_info_file=None, dryrun=False):

    assert run_id, \
    "No run id was specified"
    assert os.path.exists(config_file), \
    "The configuration file, {}, could not be found".format(config_file)

    config = load_config(config_file)
    assert "gdocs_upload" in config, \
    "The configuration file, {}, has no section specifying the Google docs details".format(config_file)

    analysis_cfg = config.get("analysis", {})
    if "store_dir" in analysis_cfg:
        archive_dir = os.path.join(analysis_cfg["store_dir"], run_id)
    else:
        archive_dir = os.getcwd()

    analysis_dir = None
    if "base_dir" in analysis_cfg:
        analysis_dir = os.path.join(analysis_cfg["base_dir"], run_id)
    if analysis_dir is None or not os.path.exists(analysis_dir):
        analysis_dir = tempfile.mkdtemp()

    dirs = {
        "work": os.path.normpath(analysis_dir),
        "flowcell": os.path.normpath(archive_dir)
    }
    assert os.path.exists(dirs["flowcell"]), \
    "The flowcell directory, {}, could not be found".format(dirs["flowcell"])
    assert os.path.exists(dirs["work"]), \
    "The work directory, {}, could not be found".format(dirs["work"])

    if run_info_file is None:
        run_info_file = os.path.join(dirs["flowcell"], "run_info.yaml")

        if not os.path.exists(run_info_file):
            # Locate the samplesheet and convert to yaml
            samplesheet = _find_samplesheet(dirs["flowcell"])
            assert samplesheet, \
            "Could not locate samplesheet in {}, aborting..".format(dirs["flowcell"])
            fh, run_info_file = tempfile.mkstemp()
            os.close(fh)
            run_info_file = ssheet.csv2yaml(samplesheet, run_info_file)

    assert os.path.exists(run_info_file), \
    "The run info configuration file, {}, could not be found".format(run_info_file)

    fc_name, fc_date = fc.get_flowcell_info(dirs["flowcell"])
    # If we have no bc_metrics files in the workdir, we may be looking at a Casava run.
    # In that case, attempt to parse the Demultiplex_Stats.htm file and create bc_metrics files
    metric_files = glob.glob(
        os.path.join(dirs["work"], "*_barcode", "*bc[_.]metrics")) + glob.glob(
            os.path.join(dirs["work"], "*bc[_.]metrics"))
    if len(metric_files) == 0:
        casava_report = _find_casava_report(dirs["flowcell"])
        assert len(casava_report) > 0, \
        "Could not locate CASAVA demultiplex report in {}, aborting..".format(dirs["flowcell"])
        metric_files = _casava_report_to_metrics(run_info_file, casava_report,
                                                 dirs)

    assert len(metric_files) > 0, \
    "Could not locate or create required metric files, aborting.."

    print(
        "A report will be created on Google Docs based on the demultiplexed data in {}"
        .format(dirs["work"]))
    print("The configuration file is {0} and the run info file is {1}".format(
        config_file, run_info_file))
    print("The run was started on {0} and has flowcell id {1}".format(
        fc_date, fc_name))

    if not dryrun:
        create_report_on_gdocs(fc_date, fc_name, run_info_file, dirs, config)
    else:
        print("DRY-RUN: nothing uploaded")
Exemple #11
0
def main(run_id, config_file, run_info_file=None, dryrun=False):
    
    assert run_id, \
    "No run id was specified"
    assert os.path.exists(config_file), \
    "The configuration file, {}, could not be found".format(config_file)

    config = load_config(config_file)    
    assert "gdocs_upload" in config, \
    "The configuration file, {}, has no section specifying the Google docs details".format(config_file)

    analysis_cfg = config.get("analysis",{})
    if "store_dir" in analysis_cfg:    
        archive_dir = os.path.join(analysis_cfg["store_dir"], run_id)
    else:
        archive_dir = os.getcwd()
    
    analysis_dir = None
    if "base_dir" in analysis_cfg:
        analysis_dir = os.path.join(analysis_cfg["base_dir"], run_id)
    if analysis_dir is None or not os.path.exists(analysis_dir):
        analysis_dir = tempfile.mkdtemp()
        
    dirs = {"work": os.path.normpath(analysis_dir),
            "flowcell": os.path.normpath(archive_dir)}
    assert os.path.exists(dirs["flowcell"]), \
    "The flowcell directory, {}, could not be found".format(dirs["flowcell"])
    assert os.path.exists(dirs["work"]), \
    "The work directory, {}, could not be found".format(dirs["work"])

    if run_info_file is None:
        run_info_file = os.path.join(dirs["flowcell"], "run_info.yaml")
        
        if not os.path.exists(run_info_file):
            # Locate the samplesheet and convert to yaml
            samplesheet = _find_samplesheet(dirs["flowcell"])
            assert samplesheet, \
            "Could not locate samplesheet in {}, aborting..".format(dirs["flowcell"])
            fh, run_info_file = tempfile.mkstemp()
            os.close(fh)
            run_info_file = ssheet.csv2yaml(samplesheet,run_info_file)
            
    assert os.path.exists(run_info_file), \
    "The run info configuration file, {}, could not be found".format(run_info_file)
    
    fc_name, fc_date = fc.get_flowcell_info(dirs["flowcell"])
    # If we have no bc_metrics files in the workdir, we may be looking at a Casava run.
    # In that case, attempt to parse the Demultiplex_Stats.htm file and create bc_metrics files
    metric_files = glob.glob(os.path.join(dirs["work"], "*_barcode", "*bc[_.]metrics")) + glob.glob(os.path.join(dirs["work"], "*bc[_.]metrics"))
    if len(metric_files) == 0:
        casava_report = _find_casava_report(dirs["flowcell"])
        assert len(casava_report) > 0, \
        "Could not locate CASAVA demultiplex report in {}, aborting..".format(dirs["flowcell"])
        metric_files = _casava_report_to_metrics(run_info_file, casava_report, dirs)

    assert len(metric_files) > 0, \
    "Could not locate or create required metric files, aborting.."
    
    print("A report will be created on Google Docs based on the demultiplexed data in {}".format(dirs["work"]))
    print("The configuration file is {0} and the run info file is {1}".format(config_file, run_info_file))
    print("The run was started on {0} and has flowcell id {1}".format(fc_date, fc_name))

    if not dryrun:
        create_report_on_gdocs(fc_date, fc_name, run_info_file, dirs, config)
    else:
        print("DRY-RUN: nothing uploaded")