예제 #1
0
def validate(input_file_name,
             working_directory,
             level=logging.INFO,
             logger=None):
    """
    Validates any file containing sequence data.

    Args:
        input_file_name: An input SBML file.
        working_directory: A directory where any output files produced by validation can be written.
        level: Logging level, defaults to logging.INFO.
    
    Returns:
        0 on success, 1 on failure.
        All statements passed to standard out via a logger, any errors throw an Exception
        and result in a non-zero exit status back to the caller.
    
    Authors:
        Srividya Ramikrishnan, Matt Henderson
    """

    if logger is None:
        logger = script_utils.stdoutlogger(__file__)

    command = os.path.join(os.environ.get("KB_TOP"), "bin/validateSBML")

    validated = False
    fileName = os.path.split(input_file_name)[-1]

    if not os.path.isfile(input_file_name):
        raise Exception("Not a file {0}".format(fileName))

    logger.info("Starting SBML validation of {0}".format(fileName))

    arguments = [command, input_file_name]

    tool_process = subprocess.Popen(arguments, stderr=subprocess.PIPE)
    stdout, stderr = tool_process.communicate()

    if len(stderr) > 0:
        logger.error("Validation failed on {0}".format(fileName))
    else:
        logger.info("Validation passed on {0}".format(fileName))
        validated = True

    if not validated:
        raise Exception("Validation failed!")
    else:
        logger.info("Validation passed.")
def validate(input_file_name, working_directory, level=logging.INFO, logger=None):
    """
    Validates any file containing sequence data.

    Args:
        input_file_name: An input SBML file.
        working_directory: A directory where any output files produced by validation can be written.
        level: Logging level, defaults to logging.INFO.
    
    Returns:
        0 on success, 1 on failure.
        All statements passed to standard out via a logger, any errors throw an Exception
        and result in a non-zero exit status back to the caller.
    
    Authors:
        Srividya Ramikrishnan, Matt Henderson
    """

    if logger is None:
        logger = script_utils.stdoutlogger(__file__)

    command = os.path.join(os.environ.get("KB_TOP"), "bin/validateSBML")
    
    validated = False
    fileName = os.path.split(input_file_name)[-1]
        
    if not os.path.isfile(input_file_name):
        raise Exception("Not a file {0}".format(fileName))

    logger.info("Starting SBML validation of {0}".format(fileName))
    
    arguments = [command, input_file_name]        
        
    tool_process = subprocess.Popen(arguments, stderr=subprocess.PIPE)
    stdout, stderr = tool_process.communicate()

    if len(stderr) > 0:
        logger.error("Validation failed on {0}".format(fileName))
    else:
        logger.info("Validation passed on {0}".format(fileName))
        validated = True
        
    if not validated:
        raise Exception("Validation failed!")
    else:
        logger.info("Validation passed.")
def transform(shock_service_url=None, workspace_service_url=None,
              workspace_name=None, object_name=None, contigset_object_name=None,
              input_directory=None, working_directory=None, 
              level=logging.INFO, logger=None):
    """
    Transforms Genbank file to KBaseGenomes.Genome and KBaseGenomes.ContigSet objects.
    
    Args:
        shock_service_url: If you have shock references you need to make.
        workspace_service_url: KBase Workspace URL
        workspace_name: Name of the workspace to save the data to
        object_name: Name of the genome object to save
        contigset_object_name: Name of the ContigSet object that is created with this Genome
        input_directory: A directory of either a genbank file or a directory of partial genome files to merge
        working_directory: A directory where you can do work
    
    Returns:
        Workspace objects saved to the user's workspace.
    
    Authors:
        Shinjae Yoo, Marcin Joachimiak, Matt Henderson
    """

    if logger is None:
        logger = script_utils.stdoutlogger(__file__, logging.INFO)

    logger.info("Starting transformation of Genbank to KBaseGenomes.Genome")

    # TODO get the classpath definition out into the config instead
    KB_TOP = os.environ["KB_TOP"]

    classpath = ["{}/lib/jars/kbase/transform/kbase_transform_deps.jar".format(KB_TOP),
                 "{}/lib/jars/kbase/genomes/kbase-genomes-20140411.jar".format(KB_TOP),
                 "{}/lib/jars/kbase/common/kbase-common-0.0.6.jar".format(KB_TOP),
                 "{}/lib/jars/jackson/jackson-annotations-2.2.3.jar".format(KB_TOP),
                 "{}/lib/jars/jackson/jackson-core-2.2.3.jar".format(KB_TOP),
                 "{}/lib/jars/jackson/jackson-databind-2.2.3.jar".format(KB_TOP),
                 "{}/lib/jars/kbase/auth/kbase-auth-1398468950-3552bb2.jar".format(KB_TOP),
                 "{}/lib/jars/kbase/workspace/WorkspaceClient-0.2.0.jar".format(KB_TOP)]

    for p in classpath:
        try:
            assert os.path.exists(p)
        except AssertionError, e:
            raise IOError("Unable to find classpath library {}".format(p))
def validate(input_directory, working_directory, level=logging.INFO, logger=None):
    """
    Validates any file containing sequence data.

    Args:
        input_directory: A directory containing one or more SequenceRead files.
        working_directory: A directory where any output files produced by validation can be written.
        level: Logging level, defaults to logging.INFO.
    
    Returns:
        Currently writes to stderr with a Java Exception trace on error, otherwise no output.
    
    Authors:
        Srividya Ramikrishnan, Jason Baumohl, Matt Henderson
    """

    if logger is None:
        logger = script_utils.stdoutlogger(__file__, level)

    # TODO get classpaths and binary paths into the config
    KB_TOP = os.environ["KB_TOP"]

    fasta_executable = "{}/lib/jars/FastaValidator/FastaValidator-1.0.jar".format(KB_TOP)
    fastq_executable = "fastQValidator"

    fasta_validator_present = False
    fastq_validator_present = False
    fastq_validator_runnable = False

    if os.path.isfile(fasta_executable):
        fasta_validator_present = True

    for path in os.environ["PATH"].split(os.pathsep):
        path = path.strip('"')
        exe_file = os.path.join(path, fastq_executable)
        if os.path.isfile(exe_file) and os.access(exe_file, os.X_OK):
            fastq_validator_present = True
            fastq_validator_runnable = True
            break
        elif os.path.isfile(exe_file):
            fastq_validator_present = True
            break

    if not fasta_validator_present:
        logger.warning("FASTA validator executable FastaValidator-1.0.jar could not be found.")

    if not fastq_validator_present:
        logger.warning("FASTQ validator executable fastQValidator could not be found.")
    elif not fastq_validator_runnable:
        logger.warning("FASTQ validator executable fastQValidator does not have execute permissions.")

    fasta_extensions = [".fa",".fas",".fasta",".fna"]
    fastq_extensions = [".fq",".fastq",".fnq"]
    
    extensions = fasta_extensions + fastq_extensions

    checked = False
    validated = True
    for input_file_name in os.listdir(input_directory):
        logger.info("Checking for SequenceReads file : {0}".format(input_file_name))

        filePath = os.path.abspath(os.path.join(input_directory, input_file_name))

        if not os.path.isfile(filePath):
            logger.warning("Skipping directory {0}".format(input_file_name))
            continue
        elif os.path.splitext(input_file_name)[-1] not in extensions:
            logger.warning("Unrecognized file type {}, skipping.".format(os.path.splitext(input_file_name)[-1]))
            continue

        logger.info("Starting SequenceReads validation of {0}".format(input_file_name))

        if os.path.splitext(input_file_name)[-1] in fasta_extensions:
            # TODO This needs to be changed, this is really just a demo program for this library and not a serious tool
            arguments = ["java", "-classpath", fasta_executable, "FVTester", "'{}'".format(filePath)]
        elif os.path.splitext(input_file_name)[-1] in fastq_extensions:
            logger.info("Checking FASTQ line count for errors.")
            line_number = 0
            with open(filePath, 'rb') as seqfile:
                for line in seqfile:
                    line_number += 1
            logger.info("FASTQ line count check completed.")

            if line_number % 4 > 0:
                logger.warning("Found extra lines, removing blank lines.")
                out = open(filePath + ".tmp", 'w')
                with open(filePath, 'r') as seqfile:
                    for line in seqfile:
                        if len(line.strip()) == 0:
                            pass
                        out.write(line)
                out.close()
                os.remove(filePath)
                os.rename(filePath + ".tmp", filePath)
                logger.warning("Blank lines removed from FASTQ.")

            arguments = [fastq_executable, "--file", "'{}'".format(filePath), "--maxErrors", "10"]

            if (check_interleavedPE(filePath) == 1):
                arguments.append("--disableSeqIDCheck")

        logger.info("Running {}".format(" ".join(arguments).replace(filePath, input_file_name)))
        tool_process = subprocess.Popen(" ".join(arguments), shell=True)
        tool_process.wait()

        if tool_process.returncode != 0:
            logger.error("Validation failed on {0}".format(input_file_name))
            validated = False
            break
        else:
            logger.info("Validation passed on {0}".format(input_file_name))
            checked = True

    if not validated:
        raise Exception("Validation failed!")
    elif not checked:
        raise Exception("No files were found that had a valid fasta or fastq extension.")
    else:
        logger.info("Validation passed.")
        raise Exception("Validation failed!")
    else:
        logger.info("Validation passed.")


if __name__ == "__main__":
    script_details = script_utils.parse_docs(validate.__doc__)

    import argparse

    parser = argparse.ArgumentParser(prog=__file__,
                                     description=script_details["Description"],                                     
                                     epilog=script_details["Authors"])
    parser.add_argument("--input_file_name", help=script_details["Args"]["input_file_name"], type=str, nargs="?", required=True)
    parser.add_argument("--working_directory", help=script_details["Args"]["working_directory"], type=str, nargs="?", required=True)

    args = parser.parse_args()

    logger = script_utils.stdoutlogger(__file__)
    
    try:
        validate(input_file_name = args.input_file_name, 
                 working_directory = args.working_directory,
                 level = logging.DEBUG,
                 logger = logger)
    except Exception, e:
        logger.exception(e)
        sys.exit(1)
    
    sys.exit(0)
예제 #6
0
    import argparse

    parser = argparse.ArgumentParser(prog=__file__,
                                     description=script_details["Description"],
                                     epilog=script_details["Authors"])
    parser.add_argument("--input_file_name",
                        help=script_details["Args"]["input_file_name"],
                        type=str,
                        nargs="?",
                        required=True)
    parser.add_argument("--working_directory",
                        help=script_details["Args"]["working_directory"],
                        type=str,
                        nargs="?",
                        required=True)

    args = parser.parse_args()

    logger = script_utils.stdoutlogger(__file__)

    try:
        validate(input_file_name=args.input_file_name,
                 working_directory=args.working_directory,
                 level=logging.DEBUG,
                 logger=logger)
    except Exception, e:
        logger.exception(e)
        sys.exit(1)

    sys.exit(0)
예제 #7
0
def validate(input_directory,
             working_directory,
             level=logging.INFO,
             logger=None):
    """
    Validates any file containing sequence data.

    Args:
        input_directory: A directory containing one or more SequenceRead files.
        working_directory: A directory where any output files produced by validation can be written.
        level: Logging level, defaults to logging.INFO.
    
    Returns:
        Currently writes to stderr with a Java Exception trace on error, otherwise no output.
    
    Authors:
        Srividya Ramikrishnan, Jason Baumohl, Matt Henderson
    """

    if logger is None:
        logger = script_utils.stdoutlogger(__file__, level)

    # TODO get classpaths and binary paths into the config
    KB_TOP = os.environ["KB_TOP"]

    fasta_executable = "{}/lib/jars/FastaValidator/FastaValidator-1.0.jar".format(
        KB_TOP)
    fastq_executable = "fastQValidator"

    fasta_validator_present = False
    fastq_validator_present = False
    fastq_validator_runnable = False

    if os.path.isfile(fasta_executable):
        fasta_validator_present = True

    for path in os.environ["PATH"].split(os.pathsep):
        path = path.strip('"')
        exe_file = os.path.join(path, fastq_executable)
        if os.path.isfile(exe_file) and os.access(exe_file, os.X_OK):
            fastq_validator_present = True
            fastq_validator_runnable = True
            break
        elif os.path.isfile(exe_file):
            fastq_validator_present = True
            break

    if not fasta_validator_present:
        logger.warning(
            "FASTA validator executable FastaValidator-1.0.jar could not be found."
        )

    if not fastq_validator_present:
        logger.warning(
            "FASTQ validator executable fastQValidator could not be found.")
    elif not fastq_validator_runnable:
        logger.warning(
            "FASTQ validator executable fastQValidator does not have execute permissions."
        )

    fasta_extensions = [".fa", ".fas", ".fasta", ".fna"]
    fastq_extensions = [".fq", ".fastq", ".fnq"]

    extensions = fasta_extensions + fastq_extensions

    checked = False
    validated = True
    for input_file_name in os.listdir(input_directory):
        logger.info(
            "Checking for SequenceReads file : {0}".format(input_file_name))

        filePath = os.path.abspath(
            os.path.join(input_directory, input_file_name))

        if not os.path.isfile(filePath):
            logger.warning("Skipping directory {0}".format(input_file_name))
            continue
        elif os.path.splitext(input_file_name)[-1] not in extensions:
            logger.warning("Unrecognized file type {}, skipping.".format(
                os.path.splitext(input_file_name)[-1]))
            continue

        logger.info(
            "Starting SequenceReads validation of {0}".format(input_file_name))

        if os.path.splitext(input_file_name)[-1] in fasta_extensions:
            # TODO This needs to be changed, this is really just a demo program for this library and not a serious tool
            arguments = [
                "java", "-classpath", fasta_executable, "FVTester",
                "'{}'".format(filePath)
            ]
        elif os.path.splitext(input_file_name)[-1] in fastq_extensions:
            logger.info("Checking FASTQ line count for errors.")
            line_number = 0
            with open(filePath, 'rb') as seqfile:
                for line in seqfile:
                    line_number += 1
            logger.info("FASTQ line count check completed.")

            if line_number % 4 > 0:
                logger.warning("Found extra lines, removing blank lines.")
                out = open(filePath + ".tmp", 'w')
                with open(filePath, 'r') as seqfile:
                    for line in seqfile:
                        if len(line.strip()) == 0:
                            pass
                        out.write(line)
                out.close()
                os.remove(filePath)
                os.rename(filePath + ".tmp", filePath)
                logger.warning("Blank lines removed from FASTQ.")

            arguments = [
                fastq_executable, "--file", "'{}'".format(filePath),
                "--maxErrors", "10"
            ]

            if (check_interleavedPE(filePath) == 1):
                arguments.append("--disableSeqIDCheck")

        logger.info("Running {}".format(" ".join(arguments).replace(
            filePath, input_file_name)))
        tool_process = subprocess.Popen(" ".join(arguments), shell=True)
        tool_process.wait()

        if tool_process.returncode != 0:
            logger.error("Validation failed on {0}".format(input_file_name))
            validated = False
            break
        else:
            logger.info("Validation passed on {0}".format(input_file_name))
            checked = True

    if not validated:
        raise Exception("Validation failed!")
    elif not checked:
        raise Exception(
            "No files were found that had a valid fasta or fastq extension.")
    else:
        logger.info("Validation passed.")