def query_prep(self):
     """
     Create metadata objects for each sample
     """
     logging.info('Preparing query files')
     # Find all the sequence files in the path
     fastas = sorted(glob(os.path.join(self.query_path, '*.fasta')))
     for fasta in fastas:
         name = os.path.splitext(os.path.basename(fasta))[0]
         if name != 'combinedtargets':
             # Create a metadata object for each sample
             metadata = MetadataObject()
             metadata.samples = list()
             # Populate the metadata object with the required attributes
             metadata.name = name
             metadata.general = GenObject()
             metadata.commands = GenObject()
             metadata.alleles = GenObject()
             metadata.alleles.outputdirectory = os.path.join(self.query_path, metadata.name)
             # Set the name of the BLAST output file
             metadata.alleles.blast_report = os.path.join(metadata.alleles.outputdirectory,
                                                          '{seq_id}.tsv'.format(seq_id=metadata.name))
             try:
                 os.remove(metadata.alleles.blast_report)
             except FileNotFoundError:
                 pass
             make_path(metadata.alleles.outputdirectory)
             metadata.general.bestassemblyfile = relative_symlink(src_file=fasta,
                                                                  output_dir=metadata.alleles.outputdirectory,
                                                                  export_output=True)
             metadata.samples.append(metadata)
             self.runmetadata.samples.append(metadata)
예제 #2
0
def test_sistr_seqsero():
    metadata = MetadataObject()
    method.runmetadata.samples = list()
    fasta = os.path.join(var.sequencepath, 'NC_003198.fasta')
    metadata.name = os.path.split(fasta)[1].split('.')[0]
    # Initialise the general and run categories
    metadata.general = GenObject()
    metadata.run = GenObject()
    metadata.general.fastqfiles = list()
    metadata.general.trimmedcorrectedfastqfiles = [
        os.path.join(var.sequencepath, 'seqsero',
                     '2014-SEQ-1049_seqsero.fastq.gz')
    ]
    # Set the destination folder
    outputdir = os.path.join(var.sequencepath, metadata.name)
    make_path(outputdir)
    # Add the output directory to the metadata
    metadata.general.outputdirectory = outputdir
    metadata.general.logout = os.path.join(outputdir, 'out')
    metadata.general.logerr = os.path.join(outputdir, 'err')
    metadata.run.outputdirectory = outputdir
    metadata.general.bestassemblyfile = True
    # Initialise an attribute to store commands
    metadata.commands = GenObject()
    # Assume that all samples are Salmonella
    metadata.general.referencegenus = 'Salmonella'
    # Set the .fasta file as the best assembly
    metadata.general.bestassemblyfile = fasta
    method.runmetadata.samples.append(metadata)
    method.sistr()
    for sample in method.runmetadata.samples:
        assert sample.sistr.cgmlst_genome_match == 'ERR586739' or sample.sistr.cgmlst_genome_match == 'SAL_BA2732AA'
    method.seqsero()
    for sample in method.runmetadata.samples:
        assert sample.seqsero.predicted_serotype == '- 9:f,g,t:-'
    variable_update()