def test_sistr(variables): metadata = MetadataObject() method.runmetadata.samples = list() fasta = os.path.join(variables.sequencepath, 'NC_003198.fasta') metadata.name = os.path.split(fasta)[1].split('.')[0] # Initialise the general and run categories metadata.general = GenObject() metadata.run = GenObject() metadata.general.fastqfiles = list() # Set the destination folder outputdir = os.path.join(variables.sequencepath, metadata.name) make_path(outputdir) # Add the output directory to the metadata metadata.general.outputdirectory = outputdir metadata.general.logout = os.path.join(outputdir, 'out') metadata.general.logerr = os.path.join(outputdir, 'err') metadata.run.outputdirectory = outputdir metadata.general.bestassemblyfile = True # Initialise an attribute to store commands metadata.commands = GenObject() # Assume that all samples are Salmonella metadata.general.referencegenus = 'Salmonella' # Set the .fasta file as the best assembly metadata.general.bestassemblyfile = fasta method.runmetadata.samples.append(metadata) method.sistr() for sample in method.runmetadata.samples: assert sample.sistr.cgmlst_genome_match == 'SAL_BA2732AA' variable_update()
def test_sistr(variables): metadata = MetadataObject() method.runmetadata.samples = list() fasta = os.path.join(variables.sequencepath, 'NC_003198.fasta') metadata.name = os.path.split(fasta)[1].split('.')[0] # Initialise the general and run categories metadata.general = GenObject() metadata.run = GenObject() metadata.general.fastqfiles = list() # Set the destination folder outputdir = os.path.join(variables.sequencepath, metadata.name) make_path(outputdir) # Add the output directory to the metadata metadata.general.outputdirectory = outputdir metadata.run.outputdirectory = outputdir metadata.general.bestassemblyfile = True # Initialise an attribute to store commands metadata.commands = GenObject() # Assume that all samples are Salmonella metadata.general.referencegenus = 'Salmonella' # Set the .fasta file as the best assembly metadata.general.bestassemblyfile = fasta method.runmetadata.samples.append(metadata) method.sistr() for sample in method.runmetadata.samples: assert sample.sistr.cgmlst_genome_match == 'SAL_BA2732AA' variable_update()
def basic(self): # Grab any .fastq files in the path fastqfiles = glob(os.path.join(self.path, '*.fastq*')) # Extract the base name of the globbed name + path provided fastqnames = map(lambda x: os.path.split(x)[1], filer(fastqfiles)) # Iterate through the names of the fastq files for fastqname in sorted(fastqnames): # Set the name metadata = MetadataObject() metadata.name = fastqname # Set the destination folder outputdir = os.path.join(self.path, fastqname) # Make the destination folder make_path(outputdir) # Get the fastq files specific to the fastqname specificfastq = glob( os.path.join(self.path, '{}*.fastq*'.format(fastqname))) # Link the files to the output folder try: # Link the .gz files to :self.path/:filename list( map( lambda x: os.symlink( '../{}'.format(os.path.basename(x)), '{}/{}'. format(outputdir, os.path.basename(x))), specificfastq)) # Except os errors except OSError as exception: # If there is an exception other than the file exists, raise it if exception.errno != errno.EEXIST: raise # Initialise the general and run categories metadata.general = GenObject() metadata.run = GenObject() # Populate the .fastqfiles category of :self.metadata metadata.general.fastqfiles = [ fastq for fastq in sorted( glob( os.path.join(outputdir, '{}*.fastq*'.format( metadata.name)))) if 'trimmed' not in fastq and 'normalised' not in fastq and 'corrected' not in fastq and 'paired' not in fastq and 'unpaired' not in fastq ] # Add the output directory to the metadata metadata.general.outputdirectory = outputdir metadata.general.logout = os.path.join( self.path, metadata.name, '{}_log_out.txt'.format(metadata.name)) metadata.general.logerr = os.path.join( self.path, metadata.name, '{}_log_err.txt'.format(metadata.name)) # Append the metadata to the list of samples self.samples.append(metadata) # Grab metadata from previous runs previousmetadata = metadataReader.MetadataReader(self) # Update self.samples (if required) if previousmetadata.samples: self.samples = previousmetadata.samples # Run the read length method self.readlength()
def createobject(self): # Grab any .fastq files in the path fastqfiles = glob(os.path.join(self.path, '*.fastq*')) # Extract the base name of the globbed name + path provided fastqnames = map(lambda x: os.path.split(x)[1], filer(fastqfiles)) # Iterate through the names of the fastq files for fastqname in sorted(fastqnames): # Set the name metadata = MetadataObject() metadata.name = fastqname # Set the destination folder outputdir = os.path.join(self.path, fastqname) # Make the destination folder make_path(outputdir) # Get the fastq files specific to the fastqname specificfastq = glob( os.path.join(self.path, '{}*.fastq*'.format(fastqname))) # Make relative symlinks to the files in :self.path try: for fastq in specificfastq: # Get the basename of the file fastqfile = os.path.split(fastq)[-1] # Set the destination fastq path as the base name plus the destination folder destinationfastq = os.path.join(outputdir, fastqfile) # Symlink the files os.symlink('../{}'.format(fastqfile), destinationfastq) # Except os errors except OSError as exception: # If there is an exception other than the file exists, raise it if exception.errno != errno.EEXIST: raise # Initialise the general and run categories metadata.general = GenObject() metadata.run = GenObject() # Populate the .fastqfiles category of :self.metadata metadata.general.fastqfiles = [ fastq for fastq in glob( os.path.join(outputdir, '{}*.fastq*'.format(fastqname))) if 'trimmed' not in fastq ] # Add the output directory to the metadata metadata.general.outputdirectory = outputdir metadata.run.outputdirectory = outputdir metadata.general.bestassemblyfile = True metadata.general.trimmedcorrectedfastqfiles = metadata.general.fastqfiles metadata.general.logout = os.path.join( metadata.general.outputdirectory, 'logout') metadata.general.logerr = os.path.join( metadata.general.outputdirectory, 'logerr') # Initialise an attribute to store commands metadata.commands = GenObject() # Append the metadata to the list of samples self.samples.append(metadata)
# Define the start time arguments.starttime = time.time() # Find the files fastas = sorted(glob(os.path.join(arguments.sequencepath, '*.fa*'))) # Create a metadata object arguments.runmetadata = MetadataObject() arguments.runmetadata.samples = list() for fasta in fastas: metadata = MetadataObject() metadata.name = os.path.split(fasta)[1].split('.')[0] # Initialise the general and run categories metadata.general = GenObject() metadata.run = GenObject() # Set the destination folder outputdir = os.path.join(arguments.sequencepath, metadata.name) make_path(outputdir) # Add the output directory to the metadata metadata.general.outputdirectory = outputdir metadata.run.outputdirectory = outputdir metadata.general.bestassemblyfile = True # Initialise an attribute to store commands metadata.commands = GenObject() # Assume that all samples are Salmonella metadata.general.referencegenus = 'Salmonella' # Set the .fasta file as the best assembly metadata.general.bestassemblyfile = fasta arguments.runmetadata.samples.append(metadata)
def parsesamplesheet(self): """Parses the sample sheet (SampleSheet.csv) to determine certain values important for the creation of the assembly report""" # Open the sample sheet with open(self.samplesheet, "r") as samplesheet: # Iterate through the sample sheet samples, prev, header = False, 0, [] for count, line in enumerate(samplesheet): # Remove new lines, and split on commas # line = line.decode('utf-8') # Turn from bytes to string, since python3 is finicky. data = line.rstrip().split(",") if any(data): if "[Settings]" in line: samples = False if not line.startswith( "[") and not samples and not data == ['']: # Grab an data not in the [Data] Section setattr(self.header, data[0].replace(" ", ""), "".join(data[1:])) elif "[Data]" in line or "[Reads]" in line: samples = True elif samples and "Sample_ID" in line: header.extend([ x.replace("_", "").replace(' ', "") for x in data ]) prev = count elif header: # Try and replicate the Illumina rules to create file names from "Sample_Name" samplename = samplenamer(data) # Create an object for storing nested static variables strainmetadata = MetadataObject() # Set the sample name in the object strainmetadata.name = samplename # Add the header object to strainmetadata # strainmetadata.__setattr__("run", GenObject(dict(self.header))) strainmetadata.run = GenObject( copy.copy(self.header.datastore)) # Create the run object, so it will be easier to populate the object (eg run.SampleName = ... # instead of strainmetadata.run.SampleName = ... run = strainmetadata.run # Capture Sample_ID, Sample_Name, I7_Index_ID, index1, I5_Index_ID, index2, Sample_Project for idx, item in enumerate(data): setattr(run, header[idx], item) if item else setattr( run, header[idx], "NA") # Add the sample number run.SampleNumber = count - prev # Create the 'General' category for strainmetadata strainmetadata.general = GenObject({ 'outputdirectory': os.path.join(self.path, samplename), 'pipelinecommit': self.commit }) strainmetadata.general.logout = os.path.join( self.path, samplename, '{}_log_out.txt'.format(samplename)) strainmetadata.general.logerr = os.path.join( self.path, samplename, '{}_log_err.txt'.format(samplename)) # Add the output directory to the general category # Append the strainmetadata object to a list self.samples.append(strainmetadata) elif samples: setattr(self.header, 'forwardlength', data[0]) \ if 'forwardlength' not in self.header.datastore else \ setattr(self.header, 'reverselength', data[0]) self.totalreads += int(data[0]) self.date = self.header.Date if "Date" in self.header.datastore else self.date for sample in self.samples: if 'InvestigatorName' not in sample.run.datastore: sample.run.InvestigatorName = 'NA'