Ejemplo n.º 1
0
    def setUp(self):
        blast_databases_repositories = AMRDatabasesManager.create_default_manager(
        ).get_database_repos()
        self.resfinder_dir = blast_databases_repositories.get_repo_dir(
            'resfinder')
        self.pointfinder_dir = blast_databases_repositories.get_repo_dir(
            'pointfinder')
        self.plasmidfinder_dir = blast_databases_repositories.get_repo_dir(
            'plasmidfinder')

        self.resfinder_database = ResfinderBlastDatabase(self.resfinder_dir)
        self.resfinder_drug_table = ARGDrugTableResfinder()
        self.pointfinder_drug_table = ARGDrugTablePointfinder()
        self.plasmidfinder_database = PlasmidfinderBlastDatabase(
            self.plasmidfinder_dir)
        self.pointfinder_database = None
        self.blast_out = tempfile.TemporaryDirectory()
        self.blast_handler = JobHandler(
            {
                'resfinder': self.resfinder_database,
                'pointfinder': self.pointfinder_database,
                'plasmidfinder': self.plasmidfinder_database
            }, 2, self.blast_out.name)

        self.outdir = tempfile.TemporaryDirectory()
        self.amr_detection = AMRDetectionResistance(
            self.resfinder_database,
            self.resfinder_drug_table,
            self.blast_handler,
            self.pointfinder_drug_table,
            self.pointfinder_database,
            output_dir=self.outdir.name)

        self.test_data_dir = path.join(path.dirname(__file__), '..', 'data')
Ejemplo n.º 2
0
    def run(self, args):
        super(Build, self).run(args)

        if path.exists(args.destination):
            if args.destination == self._default_dir:
                raise CommandParseException("Error, default destination [" +
                                            args.destination +
                                            "] already exists",
                                            self._root_arg_parser,
                                            print_help=True)
            else:
                raise CommandParseException(
                    "Error, destination [" + args.destination +
                    "] already exists", self._root_arg_parser)
        else:
            mkdir(args.destination)

        if args.destination == AMRDatabasesManager.get_default_database_directory(
        ):
            database_repos = AMRDatabasesManager.create_default_manager(
            ).get_database_repos()
        else:
            database_repos = AMRDatabasesManager(
                args.destination).get_database_repos()
        database_repos.build({
            'resfinder': args.resfinder_commit,
            'pointfinder': args.pointfinder_commit
        })
        if not AMRDatabasesManager.is_database_repos_default_commits(
                database_repos):
            logger.warning(
                "Built non-default ResFinder/PointFinder database version. This may lead to "
                +
                "differences in the detected AMR genes depending on how the database files are structured."
            )
Ejemplo n.º 3
0
    def run(self, args):
        super(Update, self).run(args)

        if len(args.directories) == 0:
            if not args.update_default:
                raise CommandParseException("Must pass at least one directory to update, or use '--update-default'",
                                            self._root_arg_parser,
                                            print_help=True)
            else:
                try:
                    database_repos = AMRDatabasesManager.create_default_manager().get_database_repos(
                        force_use_git=True)

                    database_repos.update(
                        {'resfinder': args.resfinder_commit, 'pointfinder': args.pointfinder_commit,
                         'plasmidfinder': args.plasmidfinder_commit})

                    if not AMRDatabasesManager.is_database_repos_default_commits(database_repos):
                        logger.warning(
                            "Updated to non-default ResFinder/PointFinder/PlasmidFinder database version. This may lead to " +
                            "differences in the detected AMR genes depending on how the database files are structured.")
                except DatabaseErrorException as e:
                    logger.error(
                        "Could not update default database. Please try restoring with 'staramr db restore-default'")
                    raise e
        else:
            for directory in args.directories:
                database_repos = AMRDatabasesManager(directory).get_database_repos()
                database_repos.update({'resfinder': args.resfinder_commit, 'pointfinder': args.pointfinder_commit,
                                       'plasmidfinder': args.plasmidfinder_commit})
                if not AMRDatabasesManager.is_database_repos_default_commits(database_repos):
                    logger.warning(
                        "Updated to non-default ResFinder/PointFinder/PlasmidFinder database version [%s]. This may lead to " +
                        "differences in the detected AMR genes depending on how the database files are structured.",
                        directory)
Ejemplo n.º 4
0
    def run(self, args):
        super(RestoreDefault, self).run(args)

        database_manager = AMRDatabasesManager.create_default_manager()

        if not args.force:
            response = self._confirm_restore()
        else:
            response = True

        if response:
            database_manager.restore_default()
Ejemplo n.º 5
0
    def run(self, args):
        super(Info, self).run(args)

        arg_drug_table = ARGDrugTable()

        if len(args.directories) == 0:
            database_repos = AMRDatabasesManager.create_default_manager(
            ).get_database_repos()
            if not AMRDatabasesManager.is_database_repos_default_commits(
                    database_repos):
                logger.warning(
                    "Using non-default ResFinder/PointFinder database versions. This may lead to differences in the detected "
                    +
                    "AMR genes depending on how the database files are structured."
                )

            try:
                database_info = database_repos.info()
                database_info.update(
                    arg_drug_table.get_resistance_table_info())
                sys.stdout.write(get_string_with_spacing(database_info))
            except DatabaseNotFoundException as e:
                logger.error(
                    "No database found. Perhaps try restoring the default with 'staramr db restore-default'"
                )
        else:
            for directory in args.directories:
                try:
                    database_repos = AMRDatabasesManager(
                        directory).get_database_repos()
                    if not AMRDatabasesManager.is_database_repos_default_commits(
                            database_repos):
                        logger.warning(
                            "Using non-default ResFinder/PointFinder database version [%s]. This may lead to "
                            +
                            "differences in the detected AMR genes depending on how the database files are structured.",
                            directory)

                    database_info = database_repos.info()
                    database_info.update(
                        arg_drug_table.get_resistance_table_info())
                    sys.stdout.write(get_string_with_spacing(database_info))
                except DatabaseNotFoundException as e:
                    logger.error(
                        "Database not found in [%s]. Perhaps try building with 'staramr db build --dir %s'",
                        directory, directory)
Ejemplo n.º 6
0
    def run(self, args):
        super(Search, self).run(args)

        if (len(args.files) == 0):
            raise CommandParseException("Must pass a fasta file to process",
                                        self._root_arg_parser,
                                        print_help=True)

        for file in args.files:
            if not path.exists(file):
                raise CommandParseException(
                    'File [' + file + '] does not exist',
                    self._root_arg_parser)

        if not path.isdir(args.database):
            if args.database == self._default_database_dir:
                raise CommandParseException(
                    "Default database does not exist. Perhaps try restoring with 'staramr db restore-default'",
                    self._root_arg_parser)
            else:
                raise CommandParseException(
                    "Database directory [" + args.database +
                    "] does not exist. Perhaps try building with" +
                    "'staramr db build --dir " + args.database + "'",
                    self._root_arg_parser)

        if args.database == AMRDatabasesManager.get_default_database_directory(
        ):
            database_repos = AMRDatabasesManager.create_default_manager(
            ).get_database_repos()
        else:
            database_repos = AMRDatabasesManager(
                args.database).get_database_repos()

        if not AMRDatabasesManager.is_database_repos_default_commits(
                database_repos):
            logger.warning(
                "Using non-default ResFinder/PointFinder. This may lead to differences in the detected "
                +
                "AMR genes depending on how the database files are structured."
            )

        resfinder_database = database_repos.build_blast_database('resfinder')
        if (args.pointfinder_organism):
            if args.pointfinder_organism not in PointfinderBlastDatabase.get_available_organisms(
            ):
                raise CommandParseException(
                    "The only Pointfinder organism(s) currently supported are "
                    + str(PointfinderBlastDatabase.get_available_organisms()),
                    self._root_arg_parser)
            pointfinder_database = database_repos.build_blast_database(
                'pointfinder', {'organism': args.pointfinder_organism})
        else:
            logger.info(
                "No --pointfinder-organism specified. Will not search the PointFinder databases"
            )
            pointfinder_database = None

        if (args.plasmidfinder_database_type):
            if args.plasmidfinder_database_type not in PlasmidfinderBlastDatabase.get_available_databases(
            ):
                raise CommandParseException(
                    "The only Plasmidfinder databases that are currently supported are "
                    +
                    str(PlasmidfinderBlastDatabase.get_available_databases()),
                    self._root_arg_parser)
            plasmidfinder_database = database_repos.build_blast_database(
                'plasmidfinder',
                {'database_type': args.plasmidfinder_database_type})
        else:
            logger.info(
                "No --plasmidfinder-database-type specified. Will search the entire PlasmidFinder database"
            )
            plasmidfinder_database = database_repos.build_blast_database(
                'plasmidfinder')

        hits_output_dir = None
        output_summary = None
        output_detailed_summary = None
        output_resfinder = None
        output_pointfinder = None
        output_plasmidfinder = None
        output_mlst = None
        output_excel = None
        output_settings = None
        if args.output_dir:
            if path.exists(args.output_dir):
                raise CommandParseException(
                    "Output directory [" + args.output_dir +
                    "] already exists", self._root_arg_parser)
            elif args.output_summary or args.output_detailed_summary or args.output_resfinder or args.output_pointfinder or args.output_plasmidfinder or args.output_excel or \
                    args.hits_output_dir:
                raise CommandParseException(
                    'You cannot use --output-[type] with --output-dir',
                    self._root_arg_parser)
            else:
                mkdir(args.output_dir)

                hits_output_dir = path.join(args.output_dir, 'hits')
                output_resfinder = path.join(args.output_dir, "resfinder.tsv")
                output_pointfinder = path.join(args.output_dir,
                                               "pointfinder.tsv")
                output_plasmidfinder = path.join(args.output_dir,
                                                 "plasmidfinder.tsv")
                output_summary = path.join(args.output_dir, "summary.tsv")
                output_detailed_summary = path.join(args.output_dir,
                                                    "detailed_summary.tsv")
                output_mlst = path.join(args.output_dir, "mlst.tsv")
                output_settings = path.join(args.output_dir, "settings.txt")
                output_excel = path.join(args.output_dir, 'results.xlsx')

                mkdir(hits_output_dir)

                logger.info(
                    "--output-dir set. All files will be output to [%s]",
                    args.output_dir)
        elif args.output_summary or args.output_excel or args.output_detailed_summary:
            logger.info(
                '--output-dir not set. Files will be output to the respective --output-[type] setting'
            )
            output_resfinder = args.output_resfinder
            output_pointfinder = args.output_pointfinder
            output_plasmidfinder = args.output_plasmidfinder
            output_summary = args.output_summary
            output_detailed_summary = args.output_detailed_summary
            output_mlst = args.output_mlst
            output_settings = args.output_settings
            output_excel = args.output_excel
            hits_output_dir = args.hits_output_dir

            if hits_output_dir is not None:
                if path.exists(
                        hits_output_dir) and not path.isdir(hits_output_dir):
                    raise CommandParseException(
                        "--output-hits-dir [" + hits_output_dir +
                        "] exists and is not a directory",
                        self._root_arg_parser)
                elif path.exists(hits_output_dir):
                    logger.debug(
                        "Found --output-hits-dir [%s] and is a directory. Will write hits here",
                        hits_output_dir)
                else:
                    logger.debug("Making directory [%s]", hits_output_dir)
                    mkdir(hits_output_dir)
        else:
            raise CommandParseException(
                'You must set one of --output-dir, --output-summary, --output-detailed-summary, or --output-excel',
                self._root_arg_parser)

        if args.no_exclude_genes:
            logger.info(
                "--no-exclude-genes enabled. Will not exclude any ResFinder/PointFinder genes."
            )
            exclude_genes = []
        else:
            if not path.exists(args.exclude_genes_file):
                raise CommandParseException(
                    '--exclude-genes-file [{}] does not exist'.format(
                        args.exclude_genes_file), self._root_arg_parser)
            else:
                logger.info(
                    "Will exclude ResFinder/PointFinder genes listed in [%s]. Use --no-exclude-genes to disable",
                    args.exclude_genes_file)
                exclude_genes = ExcludeGenesList(
                    args.exclude_genes_file).tolist()

        results = self._generate_results(
            database_repos=database_repos,
            resfinder_database=resfinder_database,
            pointfinder_database=pointfinder_database,
            plasmidfinder_database=plasmidfinder_database,
            nprocs=args.nprocs,
            include_negatives=not args.exclude_negatives,
            include_resistances=not args.exclude_resistance_phenotypes,
            hits_output=hits_output_dir,
            pid_threshold=args.pid_threshold,
            plength_threshold_resfinder=args.plength_threshold_resfinder,
            plength_threshold_pointfinder=args.plength_threshold_pointfinder,
            plength_threshold_plasmidfinder=args.
            plength_threshold_plasmidfinder,
            report_all_blast=args.report_all_blast,
            genes_to_exclude=exclude_genes,
            files=args.files,
            ignore_invalid_files=args.ignore_valid_files,
            mlst_scheme=args.mlst_scheme,
            genome_size_lower_bound=args.genome_size_lower_bound,
            genome_size_upper_bound=args.genome_size_upper_bound,
            minimum_N50_value=args.minimum_N50_value,
            minimum_contig_length=args.minimum_contig_length,
            unacceptable_num_contigs=args.unacceptable_num_contigs)
        amr_detection = results['results']
        settings = results['settings']

        if output_resfinder:
            logger.info("Writing resfinder to [%s]", output_resfinder)
            with open(output_resfinder, 'w') as fh:
                self._print_dataframe_to_text_file_handle(
                    amr_detection.get_resfinder_results(), fh)
        else:
            logger.info(
                "--output-dir or --output-resfinder unset. No resfinder file will be written"
            )

        if args.pointfinder_organism and output_pointfinder:
            logger.info("Writing pointfinder to [%s]", output_pointfinder)
            with open(output_pointfinder, 'w') as fh:
                self._print_dataframe_to_text_file_handle(
                    amr_detection.get_pointfinder_results(), fh)
        else:
            logger.info(
                "--output-dir or --output-pointfinder unset. No pointfinder file will be written"
            )

        if output_plasmidfinder:
            logger.info("Writing plasmidfinder to [%s]", output_plasmidfinder)
            with open(output_plasmidfinder, 'w') as fh:
                self._print_dataframe_to_text_file_handle(
                    amr_detection.get_plasmidfinder_results(), fh)
        else:
            logger.info(
                "--output-dir or --output-plasmidfinder unset. No plasmidfinder file will be written"
            )

        if output_summary:
            logger.info("Writing summary to [%s]", output_summary)
            with open(output_summary, 'w') as fh:
                self._print_dataframe_to_text_file_handle(
                    amr_detection.get_summary_results(), fh)
        else:
            logger.info(
                "--output-dir or --output-summary unset. No summary file will be written"
            )

        if output_mlst:
            logger.info("Writing MLST summary to [%s]", output_mlst)
            with open(output_mlst, 'w') as fh:
                self._print_dataframe_to_text_file_handle(
                    amr_detection.get_mlst_results(), fh)
        else:
            logger.info(
                "--output-dir or --output-mlst unset. No mlst file will be written"
            )

        if output_detailed_summary:
            logger.info("Writing detailed summary to [%s]",
                        output_detailed_summary)
            with open(output_detailed_summary, 'w') as fh:
                self._print_dataframe_to_text_file_handle(
                    amr_detection.get_detailed_summary_results(), fh)
        else:
            logger.info(
                "--output-dir or --output-detailed-summary unset. No detailed summary file will be written"
            )

        if output_settings:
            logger.info("Writing settings to [%s]", output_settings)
            self._print_settings_to_file(settings, output_settings)
        else:
            logger.info(
                "--output-dir or --output-settings unset. No settings file will be written"
            )

        if output_excel:
            logger.info("Writing Excel to [%s]", output_excel)
            settings_dataframe = pd.DataFrame.from_dict(settings,
                                                        orient='index')
            settings_dataframe.index.name = 'Key'
            settings_dataframe.set_axis(['Value'],
                                        axis='columns',
                                        inplace=True)

            self._print_dataframes_to_excel(
                output_excel, amr_detection.get_summary_results(),
                amr_detection.get_resfinder_results(),
                amr_detection.get_pointfinder_results(),
                amr_detection.get_plasmidfinder_results(),
                amr_detection.get_detailed_summary_results(),
                amr_detection.get_mlst_results(), settings_dataframe,
                args.minimum_contig_length)
        else:
            logger.info(
                "--output-dir or --output-excel unset. No excel file will be written"
            )

        if hits_output_dir:
            logger.info("BLAST hits are stored in [%s]", hits_output_dir)
        else:
            logger.info(
                "--output-dir or --output-hits-dir not set. No BLAST hits will be saved."
            )