예제 #1
0
def prepare_and_validate_for_upload(sequencing_run):
    """
    Prepares IRIDA to accept the sequencing run
    Validates that projects exist,
    Creates Samples on Projects on Irida if they do not exist yet

    Collects all errors during prep/validation in ValidationResult

    :param sequencing_run: SequencingRun object
    :return: ValidationResult object with all errors that raised while prepping
    """
    # get api
    api_instance = _get_api_instance()

    validation_result = model.ValidationResult()
    # Start online validation
    logging.debug("Checking existence of projects")
    for project in sequencing_run.project_list:
        logging.debug("Checking existence of project: {}".format(project.id))
        if not api_instance.project_exists(project.id):
            # No project, add error to validation result and continue
            logging.debug("Could not find project: {}".format(project.id))
            err = api.exceptions.IridaResourceError("Project does not exist",
                                                    project.id)
            validation_result.add_error(err)
            continue
        logging.debug("Project {} exists".format(project.id))

        logging.debug("Checking existence of samples")
        for sample in project.sample_list:
            logging.debug(
                "Checking existence of Sample {} on Project {}".format(
                    sample.sample_name, project.id))
            if api_instance.sample_exists(sample.sample_name, project.id):
                logging.debug("Sample {} exists on Project {}".format(
                    sample.sample_name, project.id))
            else:
                logging.debug("Sample not found, creating new Sample")
                try:
                    api_instance.send_sample(sample, project.id)
                except api.exceptions.IridaResourceError as e:
                    logging.debug("Sample could not be created")
                    validation_result.add_error(e)
                    continue
                except api.exceptions.IridaConnectionError as e:
                    logging.debug("Sample could not be created")
                    validation_result.add_error(e)
                    continue
                logging.debug("Verifying sample was created")
                if not api_instance.sample_exists(sample.sample_name,
                                                  project.id):
                    logging.debug("Sample was not created")
                    err = api.exceptions.IridaResourceError(
                        "Could not create new Sample on Project {}",
                        project.id)
                    validation_result.add_error(err)
                    continue
                logging.debug("Sample Created")

    return validation_result
예제 #2
0
def validate_sequencing_run(sequencing_run):
    """
    Validate a SequencingRun object for upload to irida

    If the parser is working as intended (and has proper validation) this should never return
    a ValidationResult with a list of errors. This function should be used when building a model
    from scratch, building a new parser, and as a final redundancy on parsers.

    :param sequencing_run: SequencingRun object to validate
    :return: ValidationResult object with list of errors if any
    """
    validation_result = model.ValidationResult()

    # Validation objects
    v_sequencing_run = Validator(model.SequencingRun.uploadable_schema,
                                 allow_unknown=True)
    v_project = Validator(model.Project.uploadable_schema, allow_unknown=True)
    v_sample = Validator(model.Sample.uploadable_schema, allow_unknown=True)
    v_sequence_file = Validator(model.SequenceFile.uploadable_schema,
                                allow_unknown=True)

    # validation is nested so we can catch multiple levels of project/sample/file errors

    # Validate base SequencingRun Object
    try:
        _validate_object(v_sequencing_run, sequencing_run)

        # Validate projects in sequencing run
        for p in sequencing_run.project_list:
            try:
                _validate_object(v_project, p)

                # Validate samples in project
                for s in p.sample_list:
                    try:
                        _validate_object(v_sample, s)

                        # Validate SequenceFile on Sample
                        _validate_object(v_sequence_file, s.sequence_file)

                        # Validate tricky sequence_file rule
                        _validate_sequence_file_names(s.sequence_file)

                    except model.exceptions.ModelValidationError as e:
                        validation_result.add_error(e)

            except model.exceptions.ModelValidationError as e:
                validation_result.add_error(e)

    except model.exceptions.ModelValidationError as e:
        validation_result.add_error(e)

    return validation_result
예제 #3
0
    def get_sequencing_run(sample_sheet, run_data_directory_file_list=None):
        """
        Does local validation on the integrity of the run directory / sample sheet

        Throws a ValidationError with a validation result attached if it cannot make a sequencing run

        :param sample_sheet:
        :return: SequencingRun
        """

        # get file list
        validation_result = model.ValidationResult()

        try:
            if run_data_directory_file_list is None:
                data_dir = os.path.dirname(sample_sheet)
                run_data_directory_file_list = common.get_file_list(data_dir)
        except exceptions.DirectoryError as error:
            validation_result.add_error(error)
            logging.error("Errors occurred while parsing files")
            raise exceptions.ValidationError(
                "Errors occurred while parsing files", validation_result)

        # Try to get the sample sheet, validate that the sample sheet is valid
        validation_result = validation.validate_sample_sheet(sample_sheet)
        if not validation_result.is_valid():
            logging.error("Errors occurred while getting sample sheet")
            raise exceptions.ValidationError(
                "Errors occurred while getting sample sheet",
                validation_result)

        # Try to build sequencing run from sample sheet & meta data, raise validation error if errors occur
        try:
            sample_list = sample_parser.parse_sample_list(
                sample_sheet, run_data_directory_file_list)
            run_metadata = sample_parser.parse_metadata(sample_list)
            sequencing_run = common.build_sequencing_run_from_samples(
                sample_list, run_metadata)
        except exceptions.SequenceFileError as error:
            validation_result.add_error(error)
            logging.error(
                "Errors occurred while building sequence run from sample sheet"
            )
            raise exceptions.ValidationError(
                "Errors occurred while building sequence run from sample sheet",
                validation_result)

        return sequencing_run
예제 #4
0
    def get_sequencing_run(sample_sheet):
        """
        Does local validation on the integrety of the run directory / sample sheet

        Throws a ValidationError with a valadation result attached if it cannot make a sequencing run

        :param sample_sheet:
        :return: SequencingRun
        """

        # Try to get the sample sheet, validate that the sample sheet is valid
        validation_result = validation.validate_sample_sheet(sample_sheet)
        if not validation_result.is_valid():
            logging.error("Errors occurred while getting sample sheet")
            raise exceptions.ValidationError(
                "Errors occurred while getting sample sheet",
                validation_result)

        # Try to parse the meta data from the sample sheet, throw validation error if errors occur
        validation_result = model.ValidationResult()
        try:
            run_metadata = sample_parser.parse_metadata(sample_sheet)
        except exceptions.SampleSheetError as error:
            validation_result.add_error(error)
            logging.error("Errors occurred while parsing metadata")
            raise exceptions.ValidationError(
                "Errors occurred while parsing metadata", validation_result)

        # Try to build sequencing run from sample sheet & meta data, raise validation error if errors occur
        try:
            sequencing_run = sample_parser.build_sequencing_run_from_samples(
                sample_sheet, run_metadata)
        except exceptions.SequenceFileError as error:
            validation_result.add_error(error)
            logging.error(
                "Errors occurred while building sequence run from sample sheet"
            )
            raise exceptions.ValidationError(
                "Errors occurred while building sequence run from sample sheet",
                validation_result)

        return sequencing_run
def validate_file_size_minimum(sequencing_run):
    """
    Validate the files in a SequencingRun object have the minimum file size requirement from the config

    :param sequencing_run: SequencingRun object to validate
    :return: ValidationResult object with list of errors if any
    """

    minimum_file_size = config.read_config_option("minimum_file_size", int, 0)

    validation_result = model.ValidationResult()

    for p in sequencing_run.project_list:
        for s in p.sample_list:
            # do validation of file size
            if not _file_size_is_valid(s.sequence_file, minimum_file_size):
                error_msg = "File size for sample `{}`is smaller than configured minimum of `{} KB`. " \
                            "Please verify your data.".format(s.sample_name, minimum_file_size)
                validation_result.add_error(
                    FileSizeError(error_msg, s.sequence_file))

    return validation_result
예제 #6
0
    def get_sequencing_run(self,
                           sample_sheet,
                           run_data_directory=None,
                           run_data_directory_file_list=None):
        """
        Does local validation on the integrety of the run directory / sample sheet

        Throws a ValidationError with a valadation result attached if it cannot make a sequencing run

        :param sample_sheet: Sample Sheet File
        :param run_data_directory: Optional: Directory (including run directory) to data files.
                                   Can be provided for bypassing os calls when developing on cloud systems
        :param run_data_directory_file_list: Optional: List of files in data directory.
                                             Can be provided for bypassing os calls when developing on cloud systems
        :return: SequencingRun
        """

        # get data directory and file list
        validation_result = model.ValidationResult()

        try:
            if run_data_directory is None:
                run_data_directory = Parser.get_full_data_directory(
                    sample_sheet)
            if run_data_directory_file_list is None:
                run_data_directory_file_list = common.get_file_list(
                    run_data_directory)
        except exceptions.DirectoryError as error:
            validation_result.add_error(error)
            logging.error("Errors occurred while parsing files")
            raise exceptions.ValidationError(
                "Errors occurred while parsing files", validation_result)

        # Try to get the sample sheet, validate that the sample sheet is valid
        validation_result = validation.validate_sample_sheet(sample_sheet)
        if not validation_result.is_valid():
            logging.error("Errors occurred while getting sample sheet")
            raise exceptions.ValidationError(
                "Errors occurred while getting sample sheet",
                validation_result)

        # Try to parse the meta data from the sample sheet, throw validation error if errors occur
        validation_result = model.ValidationResult()
        try:
            run_metadata = sample_parser.parse_metadata(sample_sheet)
        except exceptions.SampleSheetError as error:
            validation_result.add_error(error)
            logging.error("Errors occurred while parsing metadata")
            raise exceptions.ValidationError(
                "Errors occurred while parsing metadata", validation_result)

        # Try to build sequencing run from sample sheet & meta data, raise validation error if errors occur
        try:
            sample_list = sample_parser.parse_sample_list(
                sample_sheet, run_data_directory, run_data_directory_file_list)
            sequencing_run = common.build_sequencing_run_from_samples(
                sample_list, run_metadata, self.get_parser_type_name())
        except exceptions.SequenceFileError as error:
            validation_result.add_error(error)
            logging.error(
                "Errors occurred while building sequence run from sample sheet"
            )
            raise exceptions.ValidationError(
                "Errors occurred while building sequence run from sample sheet",
                validation_result)

        return sequencing_run
예제 #7
0
def validate_sample_sheet(sample_sheet_file):
    """
    Checks if the given sample_sheet_file can be parsed
    Requires [Header] because it contains Workflow
    Requires [Data] for creating Sample objects and requires
        Sample_ID, Sample_Name, Sample_Project and Description table headers

    arguments:
            sample_sheet_file -- path to SampleSheet.csv

    returns ValidationResult object - stores list of string error messages
    """

    csv_reader = common.get_csv_reader(sample_sheet_file)

    v_res = model.ValidationResult()

    all_data_headers_found = False
    data_sect_found = False
    check_data_headers = False

    # status of required data headers
    found_data_headers = {
        "Sample_Name": False,
        "Project_ID": False,
        "File_Forward": False,
        "File_Reverse": False
    }

    for line in csv_reader:

        if "[Data]" in line:
            data_sect_found = True
            check_data_headers = True  # next line contains data headers

        elif check_data_headers:
            for data_header in found_data_headers.keys():
                if data_header in line:
                    found_data_headers[data_header] = True

            # if all required dataHeaders are found
            if all(found_data_headers.values()):
                all_data_headers_found = True

            check_data_headers = False

    if not all([data_sect_found, all_data_headers_found]):

        if data_sect_found is False:
            v_res.add_error(
                exceptions.SampleSheetError(
                    "[Data] section not found in SampleSheet",
                    sample_sheet_file))

        if all_data_headers_found is False:
            missing_str = ""
            for data_header in found_data_headers:
                if found_data_headers[data_header] is False:
                    missing_str = missing_str + data_header + ", "

            missing_str = missing_str[:-2]  # remove last ", "
            v_res.add_error(
                exceptions.SampleSheetError(
                    "Missing required data header(s): " + missing_str,
                    sample_sheet_file))

    return v_res