Ejemplo n.º 1
0
def get_library_names_and_timestamps(name):
    """
    Extract sample/library names and timestamps

    Given a name from 'get_generic_name', extract the sample,
    library and timestamp elements.

    If no timestamp is present then it is returned
    as the string 'unknown'.

    For 'standard' SOLiD data directories, the top-level is a
    'sample', which may be split into multiple 'libraries', and
    then again in multiple timestamped filesets.

    Some examples:

    * LH_POOL/results.F1B1/libraries/LH1/primary.20111208144829752/reads/solid0127_20111207_FRAG_BC_LH_POOL_BC_LH1
    * ZD_hu/results.F1B1/primary.20091220022109452/reads/solid0424_20091214_ZD_hu_F3
    * SH_JC1_pool/results.F1B1/libraries_MM2/JC_SEQ30/primary.20120125063517232/reads/solid0127_20120117_PE_BC_SH_JC1_pool_F5-BC_JC_SEQ30

    """
    # Extract timestamp
    timestamp = SolidData.extract_library_timestamp(name)
    if timestamp is None:
        timestamp = 'unknown'
    # Attempt to extract sample and library from the path
    new_path = []
    for field in name.split(os.sep)[0:-1]:
        if field == "results.F1B1":
            continue
        elif field.startswith("libraries"):
            continue
        elif field == "reads":
            continue
        elif field.startswith("primary.") or field.startswith("secondary."):
            continue
        new_path.append(field)
    if len(new_path) < 2:
        sample = new_path[0]
        library = None
    else:
        sample,library = new_path[0:2]
    # Return
    return (sample,library,timestamp)
Ejemplo n.º 2
0
def get_library_names_and_timestamps(name):
    """
    Extract sample/library names and timestamps

    Given a name from 'get_generic_name', extract the sample,
    library and timestamp elements.

    If no timestamp is present then it is returned
    as the string 'unknown'.

    For 'standard' SOLiD data directories, the top-level is a
    'sample', which may be split into multiple 'libraries', and
    then again in multiple timestamped filesets.

    Some examples:

    * LH_POOL/results.F1B1/libraries/LH1/primary.20111208144829752/reads/solid0127_20111207_FRAG_BC_LH_POOL_BC_LH1
    * ZD_hu/results.F1B1/primary.20091220022109452/reads/solid0424_20091214_ZD_hu_F3
    * SH_JC1_pool/results.F1B1/libraries_MM2/JC_SEQ30/primary.20120125063517232/reads/solid0127_20120117_PE_BC_SH_JC1_pool_F5-BC_JC_SEQ30

    """
    # Extract timestamp
    timestamp = SolidData.extract_library_timestamp(name)
    if timestamp is None:
        timestamp = 'unknown'
    # Attempt to extract sample and library from the path
    new_path = []
    for field in name.split(os.sep)[0:-1]:
        if field == "results.F1B1":
            continue
        elif field.startswith("libraries"):
            continue
        elif field == "reads":
            continue
        elif field.startswith("primary.") or field.startswith("secondary."):
            continue
        new_path.append(field)
    if len(new_path) < 2:
        sample = new_path[0]
        library = None
    else:
        sample, library = new_path[0:2]
    # Return
    return (sample, library, timestamp)
Ejemplo n.º 3
0
def verify_runs(solid_dirs):
    """Do basic verification checks on SOLiD run directories

    For each SOLiD run directory, create a SolidRun object and check for the
    expected sample and library directories, and that primary data files
    (csfasta and qual) have been assigned and exist.

    Returns a UNIX-like status code: 0 indicates that the checks passed,
    1 indicates that they failed.

    Arguments:
      solid_dirs: a list of SOLiD sequencing directory names.

    Returns:
      0 if the run is verified, 1 if there is a problem.
    """
    print "Performing verification"
    status = 0
    for solid_dir in solid_dirs:
        # Initialise
        run_status = 0
        run = SolidData.SolidRun(solid_dir)
        if not run.verify():
            run_status = 1
        print "%s:" % run.run_name,
        if run_status == 0:
            print " [PASSED]"
        else:
            print " [FAILED]"
            status = 1
    # Completed
    print "Overall status:",
    if status == 0:
        print " [PASSED]"
    else:
        print " [FAILED]"
    return status
Ejemplo n.º 4
0
        logging.getLogger().setLevel(logging.DEBUG)
    elif options.no_warnings:
        logging.getLogger().setLevel(logging.ERROR)

    # Solid run directories
    for arg in args:
        if not os.path.isdir(arg):
            logging.error("'%s' not found or not a directory" % arg)
            sys.exit(1)
    if len(args) == 1:
        # Single directory supplied
        if options.only:
            solid_dirs = [args[0]]
        else:
            # Add associated directories
            solid_dirs = SolidData.list_run_directories(args[0])
    else:
        # Use all supplied arguments
        solid_dirs = args

    # Output spreadsheet name
    if options.xls:
        spreadsheet = os.path.splitext(os.path.basename(
            solid_dirs[0]))[0] + ".xls"
        print "Writing spreadsheet %s" % spreadsheet

    # Check there's at least one thing to do
    if not (options.report or options.layout or options.xls or options.verify
            or options.rsync or options.md5sum or options.copy_pattern
            or options.gzip_pattern or options.md5_pattern):
        options.report = True
        logging.getLogger().setLevel(logging.DEBUG)
    elif options.no_warnings:
        logging.getLogger().setLevel(logging.ERROR)

    # Solid run directories
    for arg in args:
        if not os.path.isdir(arg):
            logging.error("'%s' not found or not a directory" % arg)
            sys.exit(1)
    if len(args) == 1:
        # Single directory supplied
        if options.only:
            solid_dirs = [args[0]]
        else:
            # Add associated directories
            solid_dirs = SolidData.list_run_directories(args[0])
    else:
        # Use all supplied arguments
        solid_dirs = args

    # Output spreadsheet name
    if options.xls:
        spreadsheet = os.path.splitext(os.path.basename(solid_dirs[0]))[0] + ".xls"
        print "Writing spreadsheet %s" % spreadsheet

    # Check there's at least one thing to do
    if not (options.report or 
            options.layout or 
            options.xls or 
            options.verify or
            options.rsync or