Beispiel #1
0
def run_mash(seqids, output_dir):
    """
    Use MASH to determine the genus of strains when the requested analysis has a genus-specific database
    :return: dictionary of MASH-calculated genera
    """
    # Dictionary to store the MASH results
    genus_dict = dict()
    # Run mash screen on each of the assemblies
    for seqid in seqids:
        screen_file = os.path.join(output_dir,
                                   '{seqid}_screen.tab'.format(seqid=seqid))
        mash.screen('/mnt/nas2/databases/confindr/databases/refseq.msh',
                    item,
                    threads=8,
                    w='',
                    i='0.95',
                    output_file=screen_file,
                    returncmd=True)
        screen_output = mash.read_mash_screen(screen_file)
        # Determine the genus from the screen output file
        for screen in screen_output:
            # Extract the genus from the mash results
            mash_organism = screen.query_id.split('/')[-3]
            # Populate the dictionary with the seqid, and the calculated genus
            genus_dict[seqid] = mash_organism
    return genus_dict
def find_genus(files, database, threads=12):
    """
    Uses MASH to find the genus of fasta files.
    :param files: File dictionary returned by filer method.
    :param database: Path to reduced refseq database sketch.
    :param threads: Number of threads to run mash with.
    :return: genus_dict: Dictionary of genus for each sample. Will return NA if genus could not be found.
    """
    genus_dict = dict()
    tmpdir = str(time.time()).split('.')[-1]
    if not os.path.isdir(tmpdir):
        os.makedirs(tmpdir)
    for file_name, fasta in files.items():
        mash.screen(database, fasta,
                    threads=threads,
                    w='',
                    i=0.95,
                    output_file=os.path.join(tmpdir, 'screen.tab'))
        screen_output = mash.read_mash_screen(os.path.join(tmpdir, 'screen.tab'))
        try:
            os.remove(os.path.join(tmpdir, 'screen.tab'))
        except IOError:
            pass
        try:
            genus = screen_output[0].query_id.split('/')[-3]
            if genus == 'Shigella':
                genus = 'Escherichia'
            genus_dict[file_name] = genus
        except IndexError:
            genus_dict[file_name] = 'NA'

    shutil.rmtree(tmpdir)
    return genus_dict
Beispiel #3
0
def pointfinder_redmine(redmine_instance, issue, work_dir, description):
    sentry_sdk.init(SENTRY_DSN, before_send=before_send)
    # Unpickle Redmine objects
    redmine_instance = pickle.load(open(redmine_instance, 'rb'))
    issue = pickle.load(open(issue, 'rb'))
    description = pickle.load(open(description, 'rb'))
    # Parse description to get list of SeqIDs
    seqids = list()
    for i in range(0, len(description)):
        item = description[i]
        item = item.upper()
        # Minimal check to make sure IDs provided somewhat resemble a valid sample ID
        if item.isalpha():
            pass
        else:
            seqids.append(item)

    # Run Mash
    with open(os.path.join(work_dir, 'seqid.txt'), 'w') as f:
        for seqid in seqids:
            f.write(seqid + '\n')
    # Drop FASTA files into workdir
    retrieve_nas_files(seqids=seqids,
                       outdir=work_dir,
                       filetype='fasta',
                       copyflag=False)
    # Create output directory
    output_dir = os.path.join(work_dir, 'output')
    make_path(output_dir)
    # Get all of the FASTA files
    fasta_list = sorted(glob.glob(os.path.join(work_dir, '*.fasta')))
    # Set the folder to store all the PointFinder outputs
    pointfinder_output_dir = os.path.join(work_dir, 'pointfinder_outputs')
    # Initialise a dictionaries to store the mash-calculated, and pointfinder-formatted genus outputs for each strain
    genus_dict = dict()
    organism_dict = dict()
    # Create lists to store missing and unprocessed seqids
    unprocessed_seqs = list()
    missing_seqs = list()
    mash_fails = list()
    # Dictionary to convert the mash-calculated genus to the pointfinder format
    pointfinder_org_dict = {
        'Campylobacter': 'campylobacter',
        'Escherichia': 'e.coli',
        'Shigella': 'e.coli',
        '‎Mycobacterium': 'tuberculosis',
        'Neisseria': 'gonorrhoeae',
        'Salmonella': 'salmonella'
    }
    # Reverse look-up dictionary
    rev_org_dict = {
        'campylobacter': 'Campylobacter',
        'e.coli': 'Escherichia',
        'tuberculosis': 'Mycobacterium',
        'gonorrhoeae': 'Neisseria',
        'salmonella': 'Salmonella'
    }
    summary_dict = {
        'Salmonella': {
            'prediction': {
                'header':
                'Strain,Colitsin,Colistin,Spectinomycin,Quinolones,\n',
                'output':
                str(),
                'summary':
                os.path.join(pointfinder_output_dir,
                             'Salmonella_prediction_summary.csv')
            },
            'table': {
                'header':
                'Strain,parE,parC,gyrA,pmrB,pmrA,gyrB,16S_rrsD,23S,\n',
                'output':
                str(),
                'summary':
                os.path.join(pointfinder_output_dir,
                             'Salmonella_table_summary.csv')
            },
            'results': {
                'header':
                'Strain,Genus,Mutation,NucleotideChange,AminoAcidChange,Resistance,PMID,\n',
                'output':
                str(),
                'summary':
                os.path.join(pointfinder_output_dir,
                             'PointFinder_results_summary.csv')
            }
        },
        'Escherichia': {
            'prediction': {
                'header':
                'Strain,Colistin,GentamicinC,gentamicinC,Streptomycin,Macrolide,Sulfonamide,'
                'Tobramycin,Neomycin,Fluoroquinolones,Aminocoumarin,Tetracycline,KanamycinA,'
                'Spectinomycin,B-lactamResistance,Paromomycin,Kasugamicin,Quinolones,G418,'
                'QuinolonesAndfluoroquinolones,\n',
                'output':
                str(),
                'summary':
                os.path.join(pointfinder_output_dir,
                             'Escherichia_prediction_summary.csv')
            },
            'table': {
                'header':
                'Strain,parE,parC,folP,gyrA,pmrB,pmrA,16S_rrsB,16S_rrsH,gyrB,ampC,16S_rrsC,23S,\n',
                'output':
                str(),
                'summary':
                os.path.join(pointfinder_output_dir,
                             'Escherichia_table_summary.csv')
            },
            'results': {
                'header':
                'Strain,Genus,Mutation,NucleotideChange,AminoAcidChange,Resistance,PMID,\n',
                'output':
                str(),
                'summary':
                os.path.join(pointfinder_output_dir,
                             'PointFinder_results_summary.csv')
            }
        },
        'Campylobacter': {
            'prediction': {
                'header':
                'Strain,LowLevelIncreaseMIC,AssociatedWithT86Mutations,Macrolide,Quinolone,'
                'Streptinomycin,Erythromycin,IntermediateResistance,HighLevelResistance_'
                'nalidixic_and_ciprofloxacin,\n',
                'output':
                str(),
                'summary':
                os.path.join(pointfinder_output_dir,
                             'Campylobacter_prediction_summary.csv')
            },
            'table': {
                'header':
                'Strain,L22,rpsL,cmeR,gyrA,23S,\n',
                'output':
                str(),
                'summary':
                os.path.join(pointfinder_output_dir,
                             'Campylobacter_table_summary.csv')
            },
            'results': {
                'header':
                'Strain,Genus,Mutation,NucleotideChange,AminoAcidChange,Resistance,PMID,\n',
                'output':
                str(),
                'summary':
                os.path.join(pointfinder_output_dir,
                             'PointFinder_results_summary.csv')
            }
        }
    }

    # Run mash screen on each of the assemblies
    for item in fasta_list:
        seqid = os.path.splitext(os.path.basename(item))[0]
        screen_file = os.path.join(output_dir,
                                   '{seqid}_screen.tab'.format(seqid=seqid))
        mash.screen('/mnt/nas2/databases/confindr/databases/refseq.msh',
                    item,
                    threads=8,
                    w='',
                    i='0.95',
                    output_file=screen_file,
                    returncmd=True)
        screen_output = mash.read_mash_screen(screen_file)
        # Determine the genus from the screen output file
        for screen in screen_output:
            # Extract the genus from the mash results
            mash_organism = screen.query_id.split('/')[-3]
            # Use the organism as a key in the pointfinder database name conversion dictionary
            try:
                mash_genus = pointfinder_org_dict[mash_organism]
            except KeyError:
                mash_genus = 'NA'
            # Populate the dictionaries with the seqid, and the calculated genus/pointfinder name
            genus_dict[seqid] = mash_genus
            organism_dict[seqid] = mash_organism
    # Delete all of the FASTA files
    for fasta in fasta_list:
        os.remove(fasta)
    # # Delete the output folder
    # shutil.rmtree(output_dir)

    # Pointfinder
    # These unfortunate hard coded paths appear to be necessary
    activate = 'source /home/ubuntu/miniconda3/bin/activate /mnt/nas2/virtual_environments/pointfinder'
    pointfinder_py = '/mnt/nas2/virtual_environments/pointfinder/pointfinder-3.0/pointfinder-3.0.py'
    # Database locations
    pointfinder_db = '/mnt/nas2/databases/assemblydatabases/0.3.4/pointfinder'
    # List of organisms in the pointfinder database
    pointfinder_list = [
        'campylobacter', 'e.coli', 'tuberculosis', 'gonorrhoeae', 'salmonella'
    ]
    try:
        os.mkdir(pointfinder_output_dir)
    except FileExistsError:
        pass
    # Pointfinder cannot handle an entire folder of sequences; each sample must be processed independently
    for seqid in sorted(seqids):
        # If the seqid isn't present in the dictionary, it is because the assembly could not be found - or because
        # MASH screen failed
        try:
            # Look up the PointFinder and the MASH-calculated genera
            pointfinder_genus = genus_dict[seqid]
            genus = rev_org_dict[pointfinder_genus]
            # If the genus isn't in the pointfinder database, do not attempt to process it
            if pointfinder_genus in pointfinder_list:
                # Create folder to drop FASTA files
                assembly_folder = os.path.join(work_dir, seqid)
                make_path(assembly_folder)
                # Extract FASTA files.
                retrieve_nas_files(seqids=[seqid],
                                   outdir=assembly_folder,
                                   filetype='fasta',
                                   copyflag=False)
                fasta = os.path.join(assembly_folder,
                                     '{seqid}.fasta'.format(seqid=seqid))
                # Prepare command
                cmd = 'python {py} -i {fasta} -s {orgn} -p {db} -o {output} -m blastn -m_p {blast_path}'\
                    .format(py=pointfinder_py,
                            fasta=fasta,
                            orgn=pointfinder_genus,
                            db=pointfinder_db,
                            output=pointfinder_output_dir,
                            blast_path='/mnt/nas2/virtual_environments/pointfinder/bin/blastn'
                            )
                # Create another shell script to execute within the PlasmidExtractor conda environment
                template = "#!/bin/bash\n{} && {}".format(activate, cmd)
                pointfinder_script = os.path.join(work_dir,
                                                  'run_pointfinder.sh')
                with open(pointfinder_script, 'w+') as file:
                    file.write(template)
                # Modify the permissions of the script to allow it to be run on the node
                make_executable(pointfinder_script)
                # Run shell script
                os.system(pointfinder_script)
                # Find the pointfinder outputs
                summary_dict[genus]['prediction']['output'] = \
                    glob.glob(os.path.join(pointfinder_output_dir, '{seq}*prediction.txt'.format(seq=seqid)))[0]
                summary_dict[genus]['table']['output'] = \
                    glob.glob(os.path.join(pointfinder_output_dir, '{seq}*table.txt'.format(seq=seqid)))[0]
                summary_dict[genus]['results']['output'] = \
                    glob.glob(os.path.join(pointfinder_output_dir, '{seq}*results.txt'.format(seq=seqid)))[0]
                # Process the predictions
                write_report(summary_dict=summary_dict,
                             seqid=seqid,
                             genus=genus,
                             key='prediction')
                # Process the results summary
                write_report(summary_dict=summary_dict,
                             seqid=seqid,
                             genus=genus,
                             key='results')
                # Process the table summary
                write_table_report(summary_dict=summary_dict,
                                   seqid=seqid,
                                   genus=genus)
            else:
                unprocessed_seqs.append(seqid)
        except KeyError:
            if not os.path.isfile(
                    os.path.join(output_dir,
                                 '{seq}_screen.tab'.format(seq=seqid))):
                missing_seqs.append(seqid)
            else:
                mash_fails.append(seqid)
    # Attempt to clear out the tmp folder from the pointfinder_output_dir
    try:
        shutil.rmtree(os.path.join(pointfinder_output_dir, 'tmp'))
    except FileNotFoundError:
        pass
    # Zip output
    output_filename = 'pointfinder_output'
    zip_filepath = zip_folder(results_path=pointfinder_output_dir,
                              output_dir=work_dir,
                              output_filename=output_filename)
    zip_filepath += '.zip'
    # Prepare upload
    output_list = [{
        'filename': os.path.basename(zip_filepath),
        'path': zip_filepath
    }]

    # Create a note to add to the updated Redmine issue
    notes = 'Pointfinder process complete!'
    # If there are missing, or unprocessed sequences, add details to the note
    if unprocessed_seqs:
        seq_list = list()
        for sequence in unprocessed_seqs:
            seq_list.append('{seqid} ({organism})'.format(
                seqid=sequence, organism=organism_dict[sequence]))
        if len(unprocessed_seqs) > 1:
            notes += '\n The following sequences were not processed, as they were determined to be genera not ' \
                     'present in the pointfinder database: {seqs}'.format(seqs=', '.join(seq_list))
        else:
            notes += '\n The following sequence was not processed, as it was determined to be a genus not ' \
                     'present in the pointfinder database: {seqs}'.format(seqs=', '.join(seq_list))
    if missing_seqs:
        if len(missing_seqs) > 1:
            notes += '\n The following sequences were not processed, as they could not be located in the strain ' \
                     'database: {seqs}'.format(seqs=', '.join(missing_seqs))
        else:
            notes += '\n The following sequence was not processed, as it could not be located in the strain database:' \
                     ' {seqs}'.format(seqs=', '.join(missing_seqs))
    if mash_fails:
        if len(mash_fails) > 1:
            notes += '\n The following sequences could not be processed by MASH screen: {seqs}'\
                .format(seqs=', '.join(mash_fails))
        else:
            notes += '\n The following sequence could not be processed by MASH screen: {seqs}'\
                .format(seqs=', '.join(mash_fails))
    # Create a list of all the folders - will be used to clean up the working directory
    folders = glob.glob(os.path.join(work_dir, '*/'))
    # Remove all the folders
    for folder in folders:
        if os.path.isdir(folder):
            shutil.rmtree(folder)
    # Wrap up issue
    redmine_instance.issue.update(resource_id=issue.id,
                                  uploads=output_list,
                                  status_id=4,
                                  notes=notes)
def staramr_redmine(redmine_instance, issue, work_dir, description):
    # Unpickle Redmine objects
    redmine_instance = pickle.load(open(redmine_instance, 'rb'))
    issue = pickle.load(open(issue, 'rb'))
    description = pickle.load(open(description, 'rb'))
    # Parse description to get list of SeqIDs
    seqids = list()
    for i in range(0, len(description)):
        item = description[i]
        item = item.upper()
        # Minimal check to make sure IDs provided somewhat resemble a valid sample ID
        if item.isalpha():
            pass
        else:
            seqids.append(item)

    # Run Mash
    with open(os.path.join(work_dir, 'seqid.txt'), 'w') as f:
        for seqid in seqids:
            f.write(seqid + '\n')
    # Drop FASTA files into workdir
    retrieve_nas_files(seqids=seqids,
                       outdir=work_dir,
                       filetype='fasta',
                       copyflag=False)
    # Create output directory
    output_dir = os.path.join(work_dir, 'output')
    make_path(output_dir)
    # Get all of the FASTA files
    fasta_list = sorted(glob.glob(os.path.join(work_dir, '*.fasta')))
    # Set the folder to store all the PointFinder outputs
    staramr_output_dir = os.path.join(work_dir, 'staramr_outputs')
    # Initialise a dictionaries to store the mash-calculated, and pointfinder-formatted genus outputs for each strain
    genus_dict = dict()
    organism_dict = dict()
    # Create lists to store missing and unprocessed seqids
    unprocessed_seqs = list()
    missing_seqs = list()
    mash_fails = list()
    # Dictionary to convert the mash-calculated genus to the pointfinder format
    pointfinder_org_dict = {
        'Campylobacter': 'campylobacter',
        'Escherichia': 'e.coli',
        'Shigella': 'e.coli',
        '‎Mycobacterium': 'tuberculosis',
        'Neisseria': 'gonorrhoeae',
        'Salmonella': 'salmonella'
    }
    # Reverse look-up dictionary
    rev_org_dict = {
        'campylobacter': 'Campylobacter',
        'e.coli': 'Escherichia',
        'tuberculosis': 'Mycobacterium',
        'gonorrhoeae': 'Neisseria',
        'salmonella': 'Salmonella'
    }

    # Run mash screen on each of the assemblies
    for item in fasta_list:
        seqid = os.path.splitext(os.path.basename(item))[0]
        screen_file = os.path.join(output_dir,
                                   '{seqid}_screen.tab'.format(seqid=seqid))
        mash.screen('/mnt/nas2/databases/confindr/databases/refseq.msh',
                    item,
                    threads=8,
                    w='',
                    i='0.95',
                    output_file=screen_file,
                    returncmd=True)
        screen_output = mash.read_mash_screen(screen_file)
        # Determine the genus from the screen output file
        for screen in screen_output:
            # Extract the genus from the mash results
            mash_organism = screen.query_id.split('/')[-3]
            # Use the organism as a key in the pointfinder database name conversion dictionary
            try:
                mash_genus = pointfinder_org_dict[mash_organism]
            except KeyError:
                mash_genus = 'NA'
            # Populate the dictionaries with the seqid, and the calculated genus/pointfinder name
            genus_dict[seqid] = mash_genus
            organism_dict[seqid] = mash_organism
    # Delete all of the FASTA files
    for fasta in fasta_list:
        os.remove(fasta)
    # # Delete the output folder
    # shutil.rmtree(output_dir)

    # Pointfinder
    # These unfortunate hard coded paths appear to be necessary
    activate = 'source /home/ubuntu/miniconda3/bin/activate /mnt/nas2/virtual_environments/staramr'
    staramr_py = '/mnt/nas2/virtual_environments/staramr/bin/staramr'
    # List of organisms in the pointfinder database
    staramr_list = ['campylobacter', 'salmonella']
    try:
        os.mkdir(staramr_output_dir)
    except FileExistsError:
        pass

    genus_seqid_dict = dict()
    for seqid in sorted(seqids):
        try:
            seqid_genus = genus_dict[seqid]
            if seqid_genus not in genus_seqid_dict:
                genus_seqid_dict[seqid_genus] = [seqid]
            else:
                genus_seqid_dict[seqid_genus].append(seqid)
        except KeyError:  # Mash sometimes doesn't find a genus!
            mash_fails.append(seqid)

    for genus in genus_seqid_dict:
        if genus in staramr_list:
            assembly_folder = os.path.join(work_dir, genus)
            make_path(assembly_folder)
            retrieve_nas_files(seqids=genus_seqid_dict[genus],
                               outdir=assembly_folder,
                               filetype='fasta',
                               copyflag=False)
            fastas = sorted(glob.glob(os.path.join(assembly_folder,
                                                   '*.fasta')))
            outdir = os.path.join(staramr_output_dir, genus)
            cmd = '{py} search --pointfinder-organism {orgn} -o {output} ' \
                .format(py=staramr_py,
                        orgn=genus,
                        output=outdir,
                        )
            for fasta in fastas:
                cmd += fasta + ' '
            # Create another shell script to execute within the PlasmidExtractor conda environment
            template = "#!/bin/bash\n{} && {}".format(activate, cmd)
            pointfinder_script = os.path.join(work_dir, 'run_staramr.sh')
            with open(pointfinder_script, 'w+') as f:
                f.write(template)
            # Modify the permissions of the script to allow it to be run on the node
            make_executable(pointfinder_script)
            # Run shell script
            os.system(pointfinder_script)
        else:
            for seqid in genus_seqid_dict[genus]:
                unprocessed_seqs.append(seqid)

    # Zip output
    output_filename = 'staramr_output'
    zip_filepath = zip_folder(results_path=staramr_output_dir,
                              output_dir=work_dir,
                              output_filename=output_filename)
    zip_filepath += '.zip'
    # Prepare upload
    output_list = [{
        'filename': os.path.basename(zip_filepath),
        'path': zip_filepath
    }]

    # Create a note to add to the updated Redmine issue
    notes = 'StarAMR process complete!'
    # If there are missing, or unprocessed sequences, add details to the note
    if unprocessed_seqs:
        seq_list = list()
        for sequence in unprocessed_seqs:
            seq_list.append('{seqid} ({organism})'.format(
                seqid=sequence, organism=organism_dict[sequence]))
        if len(unprocessed_seqs) > 1:
            notes += '\n The following sequences were not processed, as they were determined to be genera not ' \
                     'present in the StarAMR database: {seqs}'.format(seqs=', '.join(seq_list))
        else:
            notes += '\n The following sequence was not processed, as it was determined to be a genus not ' \
                     'present in the StarAMR database: {seqs}'.format(seqs=', '.join(seq_list))
    if missing_seqs:
        if len(missing_seqs) > 1:
            notes += '\n The following sequences were not processed, as they could not be located in the strain ' \
                     'database: {seqs}'.format(seqs=', '.join(missing_seqs))
        else:
            notes += '\n The following sequence was not processed, as it could not be located in the strain database:' \
                     ' {seqs}'.format(seqs=', '.join(missing_seqs))
    if mash_fails:
        if len(mash_fails) > 1:
            notes += '\n The following sequences could not be processed by MASH screen: {seqs}'\
                .format(seqs=', '.join(mash_fails))
        else:
            notes += '\n The following sequence could not be processed by MASH screen: {seqs}'\
                .format(seqs=', '.join(mash_fails))
    # Create a list of all the folders - will be used to clean up the working directory
    folders = glob.glob(os.path.join(work_dir, '*/'))
    # Remove all the folders
    for folder in folders:
        if os.path.isdir(folder):
            shutil.rmtree(folder)
    # Wrap up issue
    redmine_instance.issue.update(resource_id=issue.id,
                                  uploads=output_list,
                                  status_id=4,
                                  notes=notes)
def mash_for_potential_plasmids(plasmid_db,
                                forward_reads,
                                output_dir,
                                reverse_reads=None,
                                threads=1,
                                logfile=None,
                                identity_cutoff=0.95):
    """
    Uses mash to find a list of potential plasmids in a set of forward (and optionally reverse) reads.
    :param plasmid_db: Path to a multi-Fasta-formatted file that has plasmid sequences of interest.
    :param forward_reads: Path to forward reads.
    :param output_dir: Path to output directory where mash sketch/screen result file will be stored.
    :param reverse_reads: Path to reverse reads. If not specified, things will work in unpaired mode.
    :param threads: Number of threads to run mash analyses on.
    :param logfile: Path to logfile you want to use.
    :param identity_cutoff: Mash screen identity cutoff. Values lower than this won't be reported.
    :return: potential_plasmids: A list where each entry is a putatively present plasmid, identified by
    the fasta header.
    """
    potential_plasmids = list()
    # Make sure the output dir specified gets created if it doesn't exist.
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    # Make a sketch of the plasmid db.
    out, err = mash.sketch(plasmid_db,
                           output_sketch=os.path.join(output_dir,
                                                      'plasmid_sketch.msh'),
                           threads=threads,
                           i='')
    if logfile:
        accessoryFunctions.write_to_logfile(out, err, logfile)

    # Now it's time to use mash screen to try to figure out what plasmids might be present in our sample.
    if reverse_reads:  # As usual, do things slightly differently for paired vs unpaired reads.
        out, err = mash.screen(os.path.join(output_dir, 'plasmid_sketch.msh'),
                               forward_reads,
                               reverse_reads,
                               output_file=os.path.join(
                                   output_dir, 'screen_results.tsv'),
                               threads=threads,
                               i=identity_cutoff)
        if logfile:
            accessoryFunctions.write_to_logfile(out, err, logfile)
    else:  # Unpaired read mode.
        out, err = mash.screen(os.path.join(output_dir, 'plasmid_sketch.msh'),
                               forward_reads,
                               output_file=os.path.join(
                                   output_dir, 'screen_results.tsv'),
                               threads=threads,
                               i=identity_cutoff)
        if logfile:
            accessoryFunctions.write_to_logfile(out, err, logfile)

    # Now need to read through the list of potential plasmids generated by the mash screen.
    results = mash.read_mash_screen(
        screen_result=os.path.join(output_dir, 'screen_results.tsv'))
    for item in results:
        potential_plasmids.append(item.query_id)

    return potential_plasmids