def get_sample_sheet(directory): """ gets the sample sheet file path from a given run directory :param directory: :return: """ logging.info("Looking for sample sheet in {}".format(directory)) # Checks if we can access to the given directory, return empty and log a warning if we cannot. if not os.access(directory, os.W_OK): logging.error(( "The directory is not accessible, can not parse samples from this directory {}" "".format(directory), directory)) raise exceptions.DirectoryError( "The directory is not accessible, " "can not parse samples from this directory {}".format( directory), directory) sample_sheet_file_name = Parser.SAMPLE_SHEET_FILE_NAME file_list = common.get_file_list(directory) if sample_sheet_file_name not in file_list: logging.error( "No sample sheet file in the Directory Upload format found") raise exceptions.DirectoryError( "The directory {} has no sample sheet file in the Directory Upload format " "with the name {}" "".format(directory, sample_sheet_file_name), directory) else: logging.debug("Sample sheet found") return os.path.join(directory, sample_sheet_file_name)
def get_file_list(directory): """ Get the list of file names in the data directory :param directory: directory to search for files :return: list of file names in data directory """ # verify that directory exists if not os.path.exists(directory): raise exceptions.DirectoryError("Could not list files, as directory does not exist.", directory) # Create a file list of the directory, only hit the os once file_list = next(os.walk(directory))[2] return file_list
def find_directory_list(directory): """Find and return all directories in the specified directory. Arguments: directory -- the directory to find directories in Returns: a list of directories including current directory """ # Checks if we can access to the given directory, return empty and log a warning if we cannot. if not os.access(directory, os.W_OK): raise exceptions.DirectoryError("The directory is not writeable, " "can not upload samples from this directory {}".format(directory), directory) dir_list = next(os.walk(directory))[1] # Gets the list of directories in the directory full_dir_list = [] for d in dir_list: full_dir_list.append(os.path.join(directory, d)) return full_dir_list
def get_full_data_directory(sample_sheet): """ Returns the path to where the sequence data files can be found, including the sample_sheet directory Note, this hits the os, and as such is not to be used with cloud solutions. For cloud solutions, use get_relative_data_directory() and solve the actual path for your cloud environment :param sample_sheet: Sample sheet acts as the starting point for the data directory :return: a string which represents the concatenated path components, as per os.path.join """ sample_sheet_dir = os.path.dirname(sample_sheet) partial_data_dir = os.path.join(sample_sheet_dir, "Alignment_1") # Verify the partial path exits, path could not exist if there was a sequencing error # Also, if someone runs the miniseq parser on a miseq directory, this is the failure point if not os.path.exists(partial_data_dir): raise exceptions.DirectoryError(( "The uploader was unable to find the data directory, Verify that the run directory is " "undamaged, and that it is a MiniSeq sequencing run."), partial_data_dir) # get the directories [1] get the first directory [0] data_dir = os.path.join(partial_data_dir, next(os.walk(partial_data_dir))[1][0], "Fastq") return data_dir