예제 #1
0
    def get_sample_sheet(directory):
        """
        gets the sample sheet file path from a given run directory

        :param directory:
        :return:
        """
        logging.info("Looking for sample sheet in {}".format(directory))

        # Checks if we can access to the given directory, return empty and log a warning if we cannot.
        if not os.access(directory, os.W_OK):
            logging.error((
                "The directory is not accessible, can not parse samples from this directory {}"
                "".format(directory), directory))
            raise exceptions.DirectoryError(
                "The directory is not accessible, "
                "can not parse samples from this directory {}".format(
                    directory), directory)

        sample_sheet_file_name = Parser.SAMPLE_SHEET_FILE_NAME
        file_list = common.get_file_list(directory)
        if sample_sheet_file_name not in file_list:
            logging.error(
                "No sample sheet file in the Directory Upload format found")
            raise exceptions.DirectoryError(
                "The directory {} has no sample sheet file in the Directory Upload format "
                "with the name {}"
                "".format(directory, sample_sheet_file_name), directory)
        else:
            logging.debug("Sample sheet found")
            return os.path.join(directory, sample_sheet_file_name)
예제 #2
0
def build_sample_list_from_sample_sheet_with_abs_path(sample_sheet_file):
    """
    Create a list of Sample objects, where each SequenceFile object has an absolute file path

    :param sample_sheet_file:
    :return:
    """
    sample_list = _parse_samples(sample_sheet_file)
    # Data directory is used if file names on sample sheet are not absolute paths (in directory files)
    data_dir = path.dirname(sample_sheet_file)
    sample_sheet_dir_file_list = common.get_file_list(data_dir)

    for sample in sample_list:
        sample_dict = sample.get_uploadable_dict()
        paired_end_read = len(sample_dict['File_Reverse']) > 0

        # create file list of full paths
        file_list = []
        # If file is not an abspath already, make it an abspath from filename + data dir
        if path.isabs(sample_dict['File_Forward']):
            file_list.append(sample_dict['File_Forward'])
        elif sample_dict['File_Forward'] in sample_sheet_dir_file_list:
            sample_dict['File_Forward'] = path.join(
                path.abspath(data_dir), sample_dict['File_Forward'])

            file_list.append(sample_dict['File_Forward'])
        else:
            raise exceptions.SampleSheetError((
                "Your sample sheet is malformed. {} Does not match any file in the directory {}"
                "".format(sample_dict['File_Forward'], data_dir)),
                                              sample_sheet_file)

        # reverse file is same as for forward file
        if paired_end_read:
            if path.isabs(sample_dict['File_Reverse']):
                file_list.append(sample_dict['File_Reverse'])
            elif sample_dict['File_Reverse'] in sample_sheet_dir_file_list:
                sample_dict['File_Reverse'] = path.join(
                    path.abspath(data_dir), sample_dict['File_Reverse'])
                file_list.append(sample_dict['File_Reverse'])
            else:
                raise exceptions.SampleSheetError((
                    "Your sample sheet is malformed. {} Does not match any file in the directory {}"
                    "".format(sample_dict['File_Reverse'], data_dir)),
                                                  sample_sheet_file)

        # Create sequence file object and attach to sample
        sq = model.SequenceFile(file_list=file_list)
        sample.sequence_file = deepcopy(sq)

    return sample_list
예제 #3
0
    def get_sequencing_run(sample_sheet, run_data_directory_file_list=None):
        """
        Does local validation on the integrity of the run directory / sample sheet

        Throws a ValidationError with a validation result attached if it cannot make a sequencing run

        :param sample_sheet:
        :return: SequencingRun
        """

        # get file list
        validation_result = model.ValidationResult()

        try:
            if run_data_directory_file_list is None:
                data_dir = os.path.dirname(sample_sheet)
                run_data_directory_file_list = common.get_file_list(data_dir)
        except exceptions.DirectoryError as error:
            validation_result.add_error(error)
            logging.error("Errors occurred while parsing files")
            raise exceptions.ValidationError(
                "Errors occurred while parsing files", validation_result)

        # Try to get the sample sheet, validate that the sample sheet is valid
        validation_result = validation.validate_sample_sheet(sample_sheet)
        if not validation_result.is_valid():
            logging.error("Errors occurred while getting sample sheet")
            raise exceptions.ValidationError(
                "Errors occurred while getting sample sheet",
                validation_result)

        # Try to build sequencing run from sample sheet & meta data, raise validation error if errors occur
        try:
            sample_list = sample_parser.parse_sample_list(
                sample_sheet, run_data_directory_file_list)
            run_metadata = sample_parser.parse_metadata(sample_list)
            sequencing_run = common.build_sequencing_run_from_samples(
                sample_list, run_metadata)
        except exceptions.SequenceFileError as error:
            validation_result.add_error(error)
            logging.error(
                "Errors occurred while building sequence run from sample sheet"
            )
            raise exceptions.ValidationError(
                "Errors occurred while building sequence run from sample sheet",
                validation_result)

        return sequencing_run
예제 #4
0
    def get_sequencing_run(self,
                           sample_sheet,
                           run_data_directory=None,
                           run_data_directory_file_list=None):
        """
        Does local validation on the integrety of the run directory / sample sheet

        Throws a ValidationError with a valadation result attached if it cannot make a sequencing run

        :param sample_sheet: Sample Sheet File
        :param run_data_directory: Optional: Directory (including run directory) to data files.
                                   Can be provided for bypassing os calls when developing on cloud systems
        :param run_data_directory_file_list: Optional: List of files in data directory.
                                             Can be provided for bypassing os calls when developing on cloud systems
        :return: SequencingRun
        """

        # get data directory and file list
        validation_result = model.ValidationResult()

        try:
            if run_data_directory is None:
                run_data_directory = Parser.get_full_data_directory(
                    sample_sheet)
            if run_data_directory_file_list is None:
                run_data_directory_file_list = common.get_file_list(
                    run_data_directory)
        except exceptions.DirectoryError as error:
            validation_result.add_error(error)
            logging.error("Errors occurred while parsing files")
            raise exceptions.ValidationError(
                "Errors occurred while parsing files", validation_result)

        # Try to get the sample sheet, validate that the sample sheet is valid
        validation_result = validation.validate_sample_sheet(sample_sheet)
        if not validation_result.is_valid():
            logging.error("Errors occurred while getting sample sheet")
            raise exceptions.ValidationError(
                "Errors occurred while getting sample sheet",
                validation_result)

        # Try to parse the meta data from the sample sheet, throw validation error if errors occur
        validation_result = model.ValidationResult()
        try:
            run_metadata = sample_parser.parse_metadata(sample_sheet)
        except exceptions.SampleSheetError as error:
            validation_result.add_error(error)
            logging.error("Errors occurred while parsing metadata")
            raise exceptions.ValidationError(
                "Errors occurred while parsing metadata", validation_result)

        # Try to build sequencing run from sample sheet & meta data, raise validation error if errors occur
        try:
            sample_list = sample_parser.parse_sample_list(
                sample_sheet, run_data_directory, run_data_directory_file_list)
            sequencing_run = common.build_sequencing_run_from_samples(
                sample_list, run_metadata, self.get_parser_type_name())
        except exceptions.SequenceFileError as error:
            validation_result.add_error(error)
            logging.error(
                "Errors occurred while building sequence run from sample sheet"
            )
            raise exceptions.ValidationError(
                "Errors occurred while building sequence run from sample sheet",
                validation_result)

        return sequencing_run