Esempio n. 1
0
def verify_fastq_generation(ap,
                            unaligned_dir=None,
                            lanes=None,
                            include_sample_dir=False):
    """Check that generated Fastqs match sample sheet predictions

    Arguments:
      ap (AutoProcessor): autoprocessor pointing to the analysis
        directory to do Fastqs verification on
      unaligned_dir (str): explicitly specify the bcl2fastq output
        directory to check
      lanes (list): specify a list of lane numbers (integers) to
        check (others will be ignored)
      include_sample_dir (bool): if True then include a
        'sample_name' directory level when checking for
         bcl2fastq2 outputs, even if one shouldn't be present

     Returns:
       True if outputs match sample sheet, False otherwise.
    """
    if unaligned_dir is None:
        if ap.params.unaligned_dir is not None:
            unaligned_dir = ap.params.unaligned_dir
        else:
            raise Exception("Bcl2fastq output directory not defined")
    print "Checking bcl2fastq output directory '%s'" % unaligned_dir
    bcl_to_fastq_dir = os.path.join(ap.analysis_dir, unaligned_dir)
    if not os.path.isdir(bcl_to_fastq_dir):
        # Directory doesn't exist
        return False
    # Make a temporary sample sheet to verify against
    tmp_sample_sheet = os.path.join(
        ap.tmp_dir,
        "SampleSheet.verify.%s.csv" % time.strftime("%Y%m%d%H%M%S"))
    make_custom_sample_sheet(ap.params.sample_sheet,
                             tmp_sample_sheet,
                             lanes=lanes)
    # Try to create an IlluminaData object
    try:
        illumina_data = IlluminaData.IlluminaData(ap.analysis_dir,
                                                  unaligned_dir=unaligned_dir)
    except IlluminaData.IlluminaDataError as ex:
        # Failed to initialise
        logger.warning("Failed to get information from %s: %s" %
                       (bcl_to_fastq_dir, ex))
        return False
    # Do check
    return IlluminaData.verify_run_against_sample_sheet(
        illumina_data, tmp_sample_sheet, include_sample_dir=include_sample_dir)
Esempio n. 2
0
                for sample in project.samples:
                    if bcf_utils.name_matches(sample.name, sample_pattern):
                        for fastq in sample.fastq:
                            fastq_file = os.path.join(sample.dirn, fastq)
                            print "\tCopying .../%s" % os.path.basename(
                                fastq_file)
                            dst = os.path.abspath(os.path.basename(fastq_file))
                            if os.path.exists(dst):
                                logging.error(
                                    "File %s already exists! Skipped" % dst)
                            else:
                                shutil.copy(fastq_file, dst)

    # Verify against sample sheet
    if options.sample_sheet is not None:
        if IlluminaData.verify_run_against_sample_sheet(
                illumina_data, options.sample_sheet):
            print "Verification against sample sheet '%s': OK" % \
                options.sample_sheet
            status = 0
        else:
            logging.error("Verification against sample sheet '%s': FAILED" %
                          options.sample_sheet)
            status = 1
        sys.exit(status)

    # Merge multiple fastqs in each sample
    if options.merge_fastqs:
        for project in illumina_data.projects:
            for sample in project.samples:
                for read in (1, 2):
                    # Concatenate fastqs for this read
            if bcf_utils.name_matches(project.name,project_pattern):
                # Loop through samples
                for sample in project.samples:
                    if bcf_utils.name_matches(sample.name,sample_pattern):
                        for fastq in sample.fastq:
                            fastq_file = os.path.join(sample.dirn,fastq)
                            print "\tCopying .../%s" % os.path.basename(fastq_file)
                            dst = os.path.abspath(os.path.basename(fastq_file))
                            if os.path.exists(dst):
                                logging.error("File %s already exists! Skipped" % dst)
                            else:
                                shutil.copy(fastq_file,dst)

    # Verify against sample sheet
    if options.sample_sheet is not None:
        if IlluminaData.verify_run_against_sample_sheet(illumina_data,options.sample_sheet):
            print "Verification against sample sheet '%s': OK" % \
                options.sample_sheet
            status = 0
        else:
            logging.error("Verification against sample sheet '%s': FAILED" %
                          options.sample_sheet)
            status = 1
        sys.exit(status)

    # Merge multiple fastqs in each sample
    if options.merge_fastqs:
        for project in illumina_data.projects:
            for sample in project.samples:
                for read in (1,2):
                    # Concatenate fastqs for this read