Ejemplo n.º 1
0
def test_sistr(variables):
    metadata = MetadataObject()
    method.runmetadata.samples = list()
    fasta = os.path.join(variables.sequencepath, 'NC_003198.fasta')
    metadata.name = os.path.split(fasta)[1].split('.')[0]
    # Initialise the general and run categories
    metadata.general = GenObject()
    metadata.run = GenObject()
    metadata.general.fastqfiles = list()
    # Set the destination folder
    outputdir = os.path.join(variables.sequencepath, metadata.name)
    make_path(outputdir)
    # Add the output directory to the metadata
    metadata.general.outputdirectory = outputdir
    metadata.general.logout = os.path.join(outputdir, 'out')
    metadata.general.logerr = os.path.join(outputdir, 'err')
    metadata.run.outputdirectory = outputdir
    metadata.general.bestassemblyfile = True
    # Initialise an attribute to store commands
    metadata.commands = GenObject()
    # Assume that all samples are Salmonella
    metadata.general.referencegenus = 'Salmonella'
    # Set the .fasta file as the best assembly
    metadata.general.bestassemblyfile = fasta
    method.runmetadata.samples.append(metadata)
    method.sistr()
    for sample in method.runmetadata.samples:
        assert sample.sistr.cgmlst_genome_match == 'SAL_BA2732AA'
    variable_update()
Ejemplo n.º 2
0
def test_sistr(variables):
    metadata = MetadataObject()
    method.runmetadata.samples = list()
    fasta = os.path.join(variables.sequencepath, 'NC_003198.fasta')
    metadata.name = os.path.split(fasta)[1].split('.')[0]
    # Initialise the general and run categories
    metadata.general = GenObject()
    metadata.run = GenObject()
    metadata.general.fastqfiles = list()
    # Set the destination folder
    outputdir = os.path.join(variables.sequencepath, metadata.name)
    make_path(outputdir)
    # Add the output directory to the metadata
    metadata.general.outputdirectory = outputdir
    metadata.run.outputdirectory = outputdir
    metadata.general.bestassemblyfile = True
    # Initialise an attribute to store commands
    metadata.commands = GenObject()
    # Assume that all samples are Salmonella
    metadata.general.referencegenus = 'Salmonella'
    # Set the .fasta file as the best assembly
    metadata.general.bestassemblyfile = fasta
    method.runmetadata.samples.append(metadata)
    method.sistr()
    for sample in method.runmetadata.samples:
        assert sample.sistr.cgmlst_genome_match == 'SAL_BA2732AA'
    variable_update()
Ejemplo n.º 3
0
 def basic(self):
     # Grab any .fastq files in the path
     fastqfiles = glob(os.path.join(self.path, '*.fastq*'))
     # Extract the base name of the globbed name + path provided
     fastqnames = map(lambda x: os.path.split(x)[1], filer(fastqfiles))
     # Iterate through the names of the fastq files
     for fastqname in sorted(fastqnames):
         # Set the name
         metadata = MetadataObject()
         metadata.name = fastqname
         # Set the destination folder
         outputdir = os.path.join(self.path, fastqname)
         # Make the destination folder
         make_path(outputdir)
         # Get the fastq files specific to the fastqname
         specificfastq = glob(
             os.path.join(self.path, '{}*.fastq*'.format(fastqname)))
         # Link the files to the output folder
         try:
             # Link the .gz files to :self.path/:filename
             list(
                 map(
                     lambda x: os.symlink(
                         '../{}'.format(os.path.basename(x)), '{}/{}'.
                         format(outputdir, os.path.basename(x))),
                     specificfastq))
         # Except os errors
         except OSError as exception:
             # If there is an exception other than the file exists, raise it
             if exception.errno != errno.EEXIST:
                 raise
         # Initialise the general and run categories
         metadata.general = GenObject()
         metadata.run = GenObject()
         # Populate the .fastqfiles category of :self.metadata
         metadata.general.fastqfiles = [
             fastq for fastq in sorted(
                 glob(
                     os.path.join(outputdir, '{}*.fastq*'.format(
                         metadata.name)))) if 'trimmed' not in fastq
             and 'normalised' not in fastq and 'corrected' not in fastq
             and 'paired' not in fastq and 'unpaired' not in fastq
         ]
         # Add the output directory to the metadata
         metadata.general.outputdirectory = outputdir
         metadata.general.logout = os.path.join(
             self.path, metadata.name,
             '{}_log_out.txt'.format(metadata.name))
         metadata.general.logerr = os.path.join(
             self.path, metadata.name,
             '{}_log_err.txt'.format(metadata.name))
         # Append the metadata to the list of samples
         self.samples.append(metadata)
     # Grab metadata from previous runs
     previousmetadata = metadataReader.MetadataReader(self)
     # Update self.samples (if required)
     if previousmetadata.samples:
         self.samples = previousmetadata.samples
     # Run the read length method
     self.readlength()
Ejemplo n.º 4
0
 def createobject(self):
     # Grab any .fastq files in the path
     fastqfiles = glob(os.path.join(self.path, '*.fastq*'))
     # Extract the base name of the globbed name + path provided
     fastqnames = map(lambda x: os.path.split(x)[1], filer(fastqfiles))
     # Iterate through the names of the fastq files
     for fastqname in sorted(fastqnames):
         # Set the name
         metadata = MetadataObject()
         metadata.name = fastqname
         # Set the destination folder
         outputdir = os.path.join(self.path, fastqname)
         # Make the destination folder
         make_path(outputdir)
         # Get the fastq files specific to the fastqname
         specificfastq = glob(
             os.path.join(self.path, '{}*.fastq*'.format(fastqname)))
         # Make relative symlinks to the files in :self.path
         try:
             for fastq in specificfastq:
                 # Get the basename of the file
                 fastqfile = os.path.split(fastq)[-1]
                 # Set the destination fastq path as the base name plus the destination folder
                 destinationfastq = os.path.join(outputdir, fastqfile)
                 # Symlink the files
                 os.symlink('../{}'.format(fastqfile), destinationfastq)
         # Except os errors
         except OSError as exception:
             # If there is an exception other than the file exists, raise it
             if exception.errno != errno.EEXIST:
                 raise
         # Initialise the general and run categories
         metadata.general = GenObject()
         metadata.run = GenObject()
         # Populate the .fastqfiles category of :self.metadata
         metadata.general.fastqfiles = [
             fastq for fastq in glob(
                 os.path.join(outputdir, '{}*.fastq*'.format(fastqname)))
             if 'trimmed' not in fastq
         ]
         # Add the output directory to the metadata
         metadata.general.outputdirectory = outputdir
         metadata.run.outputdirectory = outputdir
         metadata.general.bestassemblyfile = True
         metadata.general.trimmedcorrectedfastqfiles = metadata.general.fastqfiles
         metadata.general.logout = os.path.join(
             metadata.general.outputdirectory, 'logout')
         metadata.general.logerr = os.path.join(
             metadata.general.outputdirectory, 'logerr')
         # Initialise an attribute to store commands
         metadata.commands = GenObject()
         # Append the metadata to the list of samples
         self.samples.append(metadata)
Ejemplo n.º 5
0
 def setup(self):
     """
     Set up the metadata object to be passed to Vtyper()
     """
     from glob import glob
     files = sorted(glob('{}*.fasta'.format(self.sequencepath)))
     samples = list()
     # Create the metadata for each file
     for fasta in files:
         # Create a metadata object to store all metadata associated with each strain
         metadata = MetadataObject()
         metadata.general = GenObject()
         metadata.commands = GenObject()
         # Set the name
         metadata.name = os.path.basename(fasta).split('.')[0]
         metadata.general.bestassemblyfile = fasta
         metadata.general.stx = True
         metadata.general.outputdirectory = self.path
         metadata.general.filenoext = fasta.split('.')[0]
         metadata.general.fastqfiles = list()
         samples.append(metadata)
     return samples
Ejemplo n.º 6
0
    arguments = parser.parse_args()

    # Define the start time
    arguments.starttime = time.time()

    # Find the files
    fastas = sorted(glob(os.path.join(arguments.sequencepath, '*.fa*')))

    # Create a metadata object
    arguments.runmetadata = MetadataObject()
    arguments.runmetadata.samples = list()
    for fasta in fastas:
        metadata = MetadataObject()
        metadata.name = os.path.split(fasta)[1].split('.')[0]
        # Initialise the general and run categories
        metadata.general = GenObject()
        metadata.run = GenObject()
        # Set the destination folder
        outputdir = os.path.join(arguments.sequencepath, metadata.name)
        make_path(outputdir)
        # Add the output directory to the metadata
        metadata.general.outputdirectory = outputdir
        metadata.run.outputdirectory = outputdir
        metadata.general.bestassemblyfile = True
        # Initialise an attribute to store commands
        metadata.commands = GenObject()
        # Assume that all samples are Salmonella
        metadata.general.referencegenus = 'Salmonella'
        # Set the .fasta file as the best assembly
        metadata.general.bestassemblyfile = fasta
        arguments.runmetadata.samples.append(metadata)
Ejemplo n.º 7
0
 def parsesamplesheet(self):
     """Parses the sample sheet (SampleSheet.csv) to determine certain values
     important for the creation of the assembly report"""
     # Open the sample sheet
     with open(self.samplesheet, "r") as samplesheet:
         # Iterate through the sample sheet
         samples, prev, header = False, 0, []
         for count, line in enumerate(samplesheet):
             # Remove new lines, and split on commas
             # line = line.decode('utf-8')  # Turn from bytes to string, since python3 is finicky.
             data = line.rstrip().split(",")
             if any(data):
                 if "[Settings]" in line:
                     samples = False
                 if not line.startswith(
                         "[") and not samples and not data == ['']:
                     # Grab an data not in the [Data] Section
                     setattr(self.header, data[0].replace(" ", ""),
                             "".join(data[1:]))
                 elif "[Data]" in line or "[Reads]" in line:
                     samples = True
                 elif samples and "Sample_ID" in line:
                     header.extend([
                         x.replace("_", "").replace(' ', "") for x in data
                     ])
                     prev = count
                 elif header:
                     # Try and replicate the Illumina rules to create file names from "Sample_Name"
                     samplename = samplenamer(data)
                     # Create an object for storing nested static variables
                     strainmetadata = MetadataObject()
                     # Set the sample name in the object
                     strainmetadata.name = samplename
                     # Add the header object to strainmetadata
                     # strainmetadata.__setattr__("run", GenObject(dict(self.header)))
                     strainmetadata.run = GenObject(
                         copy.copy(self.header.datastore))
                     # Create the run object, so it will be easier to populate the object (eg run.SampleName = ...
                     # instead of strainmetadata.run.SampleName = ...
                     run = strainmetadata.run
                     # Capture Sample_ID, Sample_Name, I7_Index_ID, index1, I5_Index_ID,	index2, Sample_Project
                     for idx, item in enumerate(data):
                         setattr(run, header[idx],
                                 item) if item else setattr(
                                     run, header[idx], "NA")
                     # Add the sample number
                     run.SampleNumber = count - prev
                     # Create the 'General' category for strainmetadata
                     strainmetadata.general = GenObject({
                         'outputdirectory':
                         os.path.join(self.path, samplename),
                         'pipelinecommit':
                         self.commit
                     })
                     strainmetadata.general.logout = os.path.join(
                         self.path, samplename,
                         '{}_log_out.txt'.format(samplename))
                     strainmetadata.general.logerr = os.path.join(
                         self.path, samplename,
                         '{}_log_err.txt'.format(samplename))
                     # Add the output directory to the general category
                     # Append the strainmetadata object to a list
                     self.samples.append(strainmetadata)
                 elif samples:
                     setattr(self.header, 'forwardlength', data[0]) \
                         if 'forwardlength' not in self.header.datastore else \
                         setattr(self.header, 'reverselength', data[0])
                     self.totalreads += int(data[0])
     self.date = self.header.Date if "Date" in self.header.datastore else self.date
     for sample in self.samples:
         if 'InvestigatorName' not in sample.run.datastore:
             sample.run.InvestigatorName = 'NA'