Beispiel #1
0
 def setUp(self):
     test_db_utils.create_empty_test_db()
     self.alchemist = AlchemyHandler(database=DB,
                                     username=USER,
                                     password=PWD)
     self.alchemist.build_engine()
     self.engine = self.alchemist.engine
Beispiel #2
0
    def setUp(self):
        if not test_db_utils.check_if_exists():
            test_db_utils.create_empty_test_db()

        alchemist = AlchemyHandler(username=USER, password=PWD, database=DB)
        alchemist.connect()
        self.engine = alchemist.engine
Beispiel #3
0
    def setUp(self):
        self.alchemist1 = AlchemyHandler(database=DB, username=USER, password=PWD)
        self.alchemist1.build_engine()
        self.engine1 = self.alchemist1.engine

        self.alchemist2 = AlchemyHandler(database=DB2, username=USER, password=PWD)
        self.alchemist2.build_engine()
        self.engine2 = self.alchemist2.engine
Beispiel #4
0
    def setUp(self):
        self.alchemist = AlchemyHandler()
        self.alchemist.username = USER
        self.alchemist.password = PWD
        self.alchemist.database = DB
        self.alchemist.connect(ask_database=True, login_attempts=0)

        self.resubmit_test_dir = self.test_dir.joinpath("resubmit_test_dir")
Beispiel #5
0
def establish_database_connection(database_name: str):
    if not isinstance(database_name, str):
        print("establish_database_connection requires string input")
        raise TypeError
    alchemist = AlchemyHandler(database=database_name)
    alchemist.connect()

    return alchemist
Beispiel #6
0
    def setUp(self):
        stmt1 = "UPDATE version SET Version = 1"
        test_db_utils.execute(stmt1)
        stmt2 = "UPDATE version SET Version = 0"
        test_db_utils.execute(stmt2, db=DB2)

        self.alchemist = AlchemyHandler(database=DB, username=USER, password=PWD)
        self.alchemist.build_engine()
        self.engine = self.alchemist.engine
Beispiel #7
0
    def setUp(self):
        if not test_db_utils.check_if_exists():
            test_db_utils.create_empty_test_db()
        else:
            if len(test_db_utils.execute(TABLES_QUERY.format(DB))) == 0:
                test_db_utils.install_db(test_db_utils.SCHEMA_FILEPATH)

        self.alchemist = AlchemyHandler(username=USER, password=PWD)
        self.alchemist.build_engine()
        self.engine = self.alchemist.engine
Beispiel #8
0
def main(unparsed_args):
    """Runs the complete update pipeline."""
    args = parse_args(unparsed_args[2:])

    # Verify database connection and schema compatibility.
    print("Connecting to the MySQL database...")

    # Create config object with data obtained from file and/or defaults.
    config = configfile.build_complete_config(args.config_file)
    mysql_creds = config["mysql"]
    alchemist = AlchemyHandler(database=args.database,
                               username=mysql_creds["user"],
                               password=mysql_creds["password"])
    alchemist.connect(pipeline=True)
    engine = alchemist.engine
    mysqldb.check_schema_compatibility(engine, "the update pipeline")

    if args.version is True:
        mysqldb.change_version(engine)
        print("Database version updated.")

    if args.ticket_table is not None:
        update_table_path = basic.set_path(args.ticket_table,
                                           kind="file",
                                           expect=True)

        # Iterate through the tickets and process them sequentially.
        list_of_update_tickets = []
        with update_table_path.open(mode='r') as f:
            file_reader = csv.DictReader(f)
            for dict in file_reader:
                list_of_update_tickets.append(dict)

        # Variables to be used for end summary
        processed = 0
        succeeded = 0
        failed = 0

        for dict in list_of_update_tickets:
            status = update_field(alchemist, dict)

            if status == 1:
                processed += 1
                succeeded += 1
            else:
                processed += 1
                failed += 1

        engine.dispose()
        print("\nDone iterating through tickets.")
        if succeeded > 0:
            print(f"{succeeded} / {processed} tickets successfully handled.")
        if failed > 0:
            print(f"{failed} / {processed} tickets failed to be handled.")
Beispiel #9
0
    def setUp(self):
        self.alchemist = AlchemyHandler()
        self.alchemist.username=USER
        self.alchemist.password=PWD
        self.alchemist.database=DB
        self.alchemist.connect(ask_database=True, login_attempts=0)
        self.alchemist.build_graph()

        self.db_filter = Filter(alchemist=self.alchemist)
        
        self.export_test_dir = self.test_dir.joinpath("export_test_dir")
Beispiel #10
0
    def setUp(self):

        # Create test database that contains data for several phages.
        test_db_utils.create_filled_test_db()

        self.alchemist = AlchemyHandler(database=DB,
                                        username=USER,
                                        password=PWD)
        self.alchemist.build_engine()
        self.engine = self.alchemist.engine
        self.temp_dir = "/tmp/pdm_utils_tests_phamerate"
Beispiel #11
0
class TestMySQLdbBasic3(unittest.TestCase):
    @classmethod
    def setUpClass(self):
        test_db_utils.create_filled_test_db()
        test_db_utils.create_empty_test_db(db=DB2)

    @classmethod
    def tearDownClass(self):
        test_db_utils.remove_db()
        test_db_utils.remove_db(db=DB2)

    def setUp(self):
        stmt1 = "UPDATE version SET Version = 1"
        test_db_utils.execute(stmt1)
        stmt2 = "UPDATE version SET Version = 0"
        test_db_utils.execute(stmt2, db=DB2)

        self.alchemist = AlchemyHandler(database=DB, username=USER, password=PWD)
        self.alchemist.build_engine()
        self.engine = self.alchemist.engine

    def tearDown(self): 
        self.engine.dispose()

    def test_get_mysql_dbs_2(self):
        """Verify set of databases is retrieved when engine
        is connected to a specific database."""
        databases = mysqldb_basic.get_mysql_dbs(self.engine)
        self.assertTrue(DB in databases)

    def test_get_tables_2(self):
        """Verify set of tables is retrieved when engine
        is connected to the same database."""
        tables = mysqldb_basic.get_tables(self.engine, DB)
        self.assertTrue(TABLE in tables)

    def test_get_tables_3(self):
        """Verify set of tables is retrieved when engine
        is connected to a different database."""
        tables = mysqldb_basic.get_tables(self.engine, DB2)
        self.assertTrue(TABLE in tables)

    def test_get_columns_2(self):
        """Verify set of columns is retrieved when engine
        is not connected to the same database."""
        columns = mysqldb_basic.get_columns(self.engine, DB, TABLE)
        self.assertTrue(COLUMN in columns)

    def test_get_columns_3(self):
        """Verify set of columns is retrieved when engine
        is not connected to a different database."""
        columns = mysqldb_basic.get_columns(self.engine, DB2, TABLE)
        self.assertTrue(COLUMN in columns)
Beispiel #12
0
    def setUp(self):
        self.alchemist = AlchemyHandler()
        self.alchemist.username = USER
        self.alchemist.password = PWD
        self.alchemist.database = DB
        self.alchemist.connect(ask_database=True, login_attempts=0)
        self.alchemist.build_graph()

        self.fileio_test_dir = self.test_dir.joinpath("fileio_test_dir")
        self.fileio_test_dir.mkdir()
        self.data_dict_file = self.fileio_test_dir.joinpath("table.csv")
        self.fasta_file = self.fileio_test_dir.joinpath("translations.fasta")
Beispiel #13
0
    def setUp(self):
        self.review_test_dir = self.test_dir.joinpath("review_test_dir")

        self.alchemist = AlchemyHandler()
        self.alchemist.username = USER
        self.alchemist.password = PWD
        self.alchemist.database = DB
        self.alchemist.connect(ask_database=True, login_attempts=0)

        self.db_filter = Filter(alchemist=self.alchemist)
        self.db_filter.add(review.BASE_CONDITIONALS)
        self.db_filter.key = "gene.PhamID"
Beispiel #14
0
def install_db(database, db_filepath=None, schema_version=None):
    """Install database. If database already exists, it is first removed."""
    # No need to specify database yet, since it needs to first check if the
    # database exists.

    alchemist1 = AlchemyHandler(database="")
    alchemist1.connect(pipeline=True)
    engine1 = alchemist1.engine
    result = mysqldb_basic.drop_create_db(engine1, database)
    if result != 0:
        print("Unable to create new, empty database.")
    else:
        alchemist2 = AlchemyHandler(database=database,
                                    username=engine1.url.username,
                                    password=engine1.url.password)
        alchemist2.connect(pipeline=True)
        engine2 = alchemist2.engine
        if engine2 is None:
            print(f"No connection to the {database} database due "
                  "to invalid credentials or database.")
        else:
            if db_filepath is not None:
                mysqldb_basic.install_db(engine2, db_filepath)
            else:
                mysqldb.execute_transaction(engine2, db_schema_0.STATEMENTS)
                convert_args = [
                    "pdm_utils.run", "convert", database, "-s",
                    str(schema_version)
                ]
                convert_db.main(convert_args, engine2)
            # Close up all connections in the connection pool.
            engine2.dispose()
    # Close up all connections in the connection pool.
    engine1.dispose()
 def setUp(self):
     test_folder.mkdir()
     self.alchemist = AlchemyHandler(database=DB, username=USER, password=PWD)
     self.alchemist.build_engine()
     # Standardize values in certain fields to define the data
     stmt1 = create_update("phage", "Status", "draft")
     test_db_utils.execute(stmt1)
     stmt2 = create_update("phage", "HostGenus", "Mycobacterium")
     test_db_utils.execute(stmt2)
     stmt3 = create_update("phage", "Accession", "")
     test_db_utils.execute(stmt3)
     stmt4 = create_update("gene", "Notes", "repressor")
     test_db_utils.execute(stmt4)
     stmt5 = "UPDATE version SET Version = 1"
     test_db_utils.execute(stmt5)
     self.unparsed_args = get_unparsed_args()
Beispiel #16
0
    def setUp(self):
        alchemist = AlchemyHandler()
        alchemist.username = "******"
        alchemist.password = "******"
        alchemist.database = "test_db"
        alchemist.connect()
        alchemist.build_graph()
        self.alchemist = alchemist

        self.db_filter = Filter(alchemist=self.alchemist)

        phageid = self.alchemist.get_column("phage.PhageID")
        self.phageid = phageid
Beispiel #17
0
    def setUp(self):
        self.alchemist = AlchemyHandler()
        self.alchemist.username = USER
        self.alchemist.password = PWD
        self.alchemist.database = DB
        self.alchemist.connect(ask_database=True, login_attempts=0)

        self.revise_test_dir = self.test_dir.joinpath("revise_test_dir")
        self.fr_input_file_path = self.test_dir.joinpath("FunctionReport.csv")
        self.csv_input_file_path = self.revise_test_dir.joinpath("gene.csv")

        fileio.export_data_dict(TEST_FR_DATA,
                                self.fr_input_file_path,
                                REVIEW_HEADER,
                                include_headers=True)

        self.assertTrue(self.fr_input_file_path.is_file())
Beispiel #18
0
    def setUp(self):

        self.alchemist = AlchemyHandler(database=DB, username=USER, password=PWD)
        self.alchemist.build_engine()
        test_folder.mkdir()

        # Standardize values in certain fields to define the data
        stmt1 = create_update("phage", "Status", "unknown")
        test_db_utils.execute(stmt1)
        stmt2 = create_update("phage", "HostGenus", "unknown")
        test_db_utils.execute(stmt2)
        stmt3 = create_update("phage", "Accession", "")
        test_db_utils.execute(stmt3)
        stmt4 = create_update("phage", "DateLastModified", "1900-01-01")
        test_db_utils.execute(stmt4)
        stmt5 = create_update("phage", "RetrieveRecord", "0")
        test_db_utils.execute(stmt5)
Beispiel #19
0
    def setUp(self):
        test_folder.mkdir()

        # Standardize values in certain fields to define the data
        stmts = []
        stmts.append(create_update("phage", "Status", "draft"))
        stmts.append(create_update("phage", "Accession", ""))
        stmts.append(create_update("phage", "AnnotationAuthor", "0"))

        stmts.append(create_update("phage", "Accession", TRIXIE_ACC, "Trixie"))
        stmts.append(create_update("phage", "Accession", ALICE_ACC, "Alice"))
        stmts.append(create_update("phage", "Accession", L5_ACC, "L5"))
        stmts.append(create_update("phage", "Accession", TRIXIE_ACC, "D29"))

        stmts.append(create_update("phage", "Status", "final", "Trixie"))
        stmts.append(create_update("phage", "Status", "final", "Alice"))
        stmts.append(create_update("phage", "Status", "final", "L5"))
        stmts.append(create_update("phage", "Status", "final", "D29"))

        stmts.append(create_update("phage", "AnnotationAuthor", "1", "Trixie"))
        stmts.append(create_update("phage", "AnnotationAuthor", "1", "Alice"))
        stmts.append(create_update("phage", "AnnotationAuthor", "1", "L5"))
        stmts.append(create_update("phage", "AnnotationAuthor", "1", "D29"))

        for stmt in stmts:
            test_db_utils.execute(stmt)

        self.unparsed_args = get_unparsed_args()

        self.alchemist = AlchemyHandler(database=DB,
                                        username=USER,
                                        password=PWD)
        self.alchemist.build_engine()

        self.pdb_data1 = get_pdb_dict()
        self.pdb_data2 = get_pdb_dict()
        self.pdb_data3 = get_pdb_dict()
        self.pdb_data1["phage_name"] = "Trixie"
        self.pdb_data2["phage_name"] = "L5"
        self.pdb_data3["phage_name"] = "unmatched"

        json_results = [self.pdb_data1, self.pdb_data2, self.pdb_data3]
        self.pdb_json_data = get_pdb_json_data()
        self.pdb_json_data["results"] = json_results
        self.pdb_json_results = json_results
Beispiel #20
0
def main(unparsed_args_list):
    """Uses parsed args to run the entirety of the review pipeline.

    :param unparsed_args_list: Input a list of command line args.
    :type unparsed_args_list: list[str]
    """
    args = parse_review(unparsed_args_list)

    alchemist = AlchemyHandler(database=args.database)
    alchemist.connect(ask_database=True, pipeline=True)

    values = export_db.parse_value_input(args.input)
    
    execute_review(alchemist, args.folder_path, args.folder_name,
                   review=args.review, values=values,
                   filters=args.filters, groups=args.groups, sort=args.sort,
                   g_reports=args.gene_reports, s_report=args.summary_report,
                   verbose=args.verbose)
Beispiel #21
0
    def setUp(self):

        self.alchemist = AlchemyHandler(database=DB, username=USER, password=PWD)
        self.alchemist.build_engine()
        test_folder.mkdir()

        # Standardize values in certain fields to define the data
        stmt1 = create_update("phage", "Status", "unknown")
        test_db_utils.execute(stmt1)
        stmt2 = create_update("phage", "HostGenus", "unknown")
        test_db_utils.execute(stmt2)
        stmt3 = create_update("phage", "Accession", "")
        test_db_utils.execute(stmt3)
        stmt4 = create_update("phage", "Cluster", "Z")
        test_db_utils.execute(stmt4)
        stmt5 = create_update("phage", "Subcluster", "Z1")
        test_db_utils.execute(stmt5)
        stmt6 = "UPDATE version SET Version = 0"
        test_db_utils.execute(stmt6)
Beispiel #22
0
def main(unparsed_args_list):
    """Uses parsed args to run the entirety of the resubmit pipeline.

    :param unparsed_args_list: Input a list of command line args.
    :type unparsed_args_list: list[str]
    """
    args = parse_resubmit(unparsed_args_list)

    alchemist = AlchemyHandler(database=args.database)
    alchemist.connect(ask_database=True, pipeline=True)

    revisions_data_dicts = basic.retrieve_data_dict(args.revisions_file)

    execute_resubmit(alchemist,
                     revisions_data_dicts,
                     args.folder_path,
                     args.folder_name,
                     filters=args.filters,
                     groups=args.groups,
                     verbose=args.verbose)
Beispiel #23
0
    def connect(self, alchemist=None):
        """Connect Filter object to a database with an AlchemyHandler.

        :param alchemist: An AlchemyHandler object.
        :type alchemist: AlchemyHandler
        """
        if alchemist != None:
            self.link(alchemist)
            return

        if self._connected:
            return

        alchemist = AlchemyHandler()
        alchemist.connect(ask_database=True)

        self._engine = alchemist.engine
        self._graph = alchemist.graph
        self._session = alchemist.session
        self._mapper = alchemist.mapper

        self._connected = True
Beispiel #24
0
def main(unparsed_args_list):
    """Run main get_gb_records pipeline."""
    # Parse command line arguments
    args = parse_args(unparsed_args_list)

    # Filters input: phage.Status=draft AND phage.HostGenus=Mycobacterium
    # Args structure: [['phage.Status=draft'], ['phage.HostGenus=Mycobacterium']]
    filters = args.filters
    ncbi_cred_dict = ncbi.get_ncbi_creds(args.ncbi_credentials_file)
    output_folder = basic.set_path(args.output_folder, kind="dir", expect=True)
    working_dir = pathlib.Path(RESULTS_FOLDER)
    working_path = basic.make_new_dir(output_folder, working_dir, attempt=50)
    if working_path is None:
        print(f"Invalid working directory '{working_dir}'")
        sys.exit(1)

    # Verify database connection and schema compatibility.
    print("Connecting to the MySQL database...")
    alchemist = AlchemyHandler(database=args.database)
    alchemist.connect(pipeline=True)
    engine = alchemist.engine
    mysqldb.check_schema_compatibility(engine, "the get_gb_records pipeline")

    # Get SQLAlchemy metadata Table object
    # table_obj.primary_key.columns is a
    # SQLAlchemy ColumnCollection iterable object
    # Set primary key = 'phage.PhageID'
    alchemist.build_metadata()
    table = querying.get_table(alchemist.metadata, TARGET_TABLE)
    for column in table.primary_key.columns:
        primary_key = column

    # Create filter object and then add command line filter strings
    db_filter = Filter(alchemist=alchemist, key=primary_key)
    db_filter.values = []

    # Attempt to add filters and exit if needed.
    add_filters(db_filter, filters)

    # Performs the query
    db_filter.update()

    # db_filter.values now contains list of PhageIDs that pass the filters.
    # Get the accessions associated with these PhageIDs.
    keep_set = set(db_filter.values)

    # Create data sets
    print("Retrieving accessions from the database...")
    query = construct_accession_query(keep_set)
    list_of_dicts = mysqldb_basic.query_dict_list(engine, query)
    id_acc_dict = get_id_acc_dict(list_of_dicts)
    acc_id_dict = get_acc_id_dict(id_acc_dict)
    engine.dispose()
    if len(acc_id_dict.keys()) > 0:
        get_data(working_path, acc_id_dict, ncbi_cred_dict)
    else:
        print("There are no records to retrieve.")
Beispiel #25
0
    def setUpClass(self):
        base_dir = Path(TMPDIR_BASE)
        self.test_dir = base_dir.joinpath(TMPDIR_PREFIX)
        test_db_utils.create_filled_test_db()

        if self.test_dir.is_dir():
            shutil.rmtree(self.test_dir)

        self.test_dir.mkdir()

        self.alchemist = AlchemyHandler()
        self.alchemist.username = USER
        self.alchemist.password = PWD
        self.alchemist.database = DB
        self.alchemist.connect(ask_database=True, login_attempts=0)
        self.acc_id_dict = get_acc_id_dict(self.alchemist)

        accession_list = list(self.acc_id_dict.keys())
        ncbi_handle = Entrez.efetch(db="nucleotide",
                                    rettype="ft",
                                    id=",".join(accession_list),
                                    retmode="text")

        copy_gb_ft_files(ncbi_handle, self.acc_id_dict, self.test_dir)
Beispiel #26
0
    def setUp(self):
        alchemist = AlchemyHandler()
        alchemist.username=user
        alchemist.password=pwd
        alchemist.database=db
        alchemist.connect()
        self.alchemist = alchemist

        self.db_filter = Filter(alchemist=self.alchemist)

        self.phage = self.alchemist.metadata.tables["phage"]
        self.gene = self.alchemist.metadata.tables["gene"]
        self.trna = self.alchemist.metadata.tables["trna"]

        self.PhageID = self.phage.c.PhageID
        self.Cluster = self.phage.c.Cluster
        self.Subcluster = self.phage.c.Subcluster
        
        self.Notes = self.gene.c.Notes
Beispiel #27
0
def main(unparsed_args_list):
    """Uses parsed args to run the entirety of the file export pipeline.

    :param unparsed_args_list: Input a list of command line args.
    :type unparsed_args_list: list[str]
    """
    #Returns after printing appropriate error message from parsing/connecting.
    args = parse_export(unparsed_args_list)

    alchemist = AlchemyHandler(database=args.database)
    alchemist.connect(ask_database=True, pipeline=True)
    alchemist.build_graph()

    # Exporting as a SQL file is not constricted by schema version.
    if args.pipeline != "sql":
        mysqldb.check_schema_compatibility(alchemist.engine, "export")

    values = []
    if args.pipeline in FILTERABLE_PIPELINES:
        values = parse_value_input(args.input)

    if not args.pipeline in PIPELINES:
        print("ABORTED EXPORT: Unknown pipeline option discrepency.\n"
              "Pipeline parsed from command line args is not supported")
        sys.exit(1)

    if args.pipeline != "I":
        execute_export(alchemist,
                       args.folder_path,
                       args.folder_name,
                       args.pipeline,
                       table=args.table,
                       values=values,
                       filters=args.filters,
                       groups=args.groups,
                       sort=args.sort,
                       include_columns=args.include_columns,
                       exclude_columns=args.exclude_columns,
                       sequence_columns=args.sequence_columns,
                       raw_bytes=args.raw_bytes,
                       concatenate=args.concatenate,
                       verbose=args.verbose)
    else:
        pass
Beispiel #28
0
class TestPhamerationFunctions(unittest.TestCase):
    def setUp(self):

        # Create test database that contains data for several phages.
        test_db_utils.create_filled_test_db()

        self.alchemist = AlchemyHandler(database=DB,
                                        username=USER,
                                        password=PWD)
        self.alchemist.build_engine()
        self.engine = self.alchemist.engine
        self.temp_dir = "/tmp/pdm_utils_tests_phamerate"

    def tearDown(self):
        self.engine.dispose()
        test_db_utils.remove_db()

        run_dir = Path.cwd()
        err_file = run_dir.joinpath("error.log")
        if err_file.exists():
            print("Found leftover blastclust file... removing")
            err_file.unlink()

    def test_1_get_pham_geneids(self):
        """Verify we get back a dictionary"""
        old_phams = get_pham_geneids(self.engine)
        # old_phams should be a dict
        self.assertEqual(type(old_phams), type(dict()))

    def test_2_get_pham_colors(self):
        """Verify we get back a dictionary"""
        old_colors = get_pham_colors(self.engine)
        # old_colors should be a dict
        self.assertEqual(type(old_colors), type(dict()))

    def test_3_get_pham_geneids_and_colors(self):
        """Verify both dictionaries have the same keys"""
        old_phams = get_pham_geneids(self.engine)
        old_colors = get_pham_colors(self.engine)

        # Can't have same keys without the same number of keys...
        with self.subTest():
            self.assertEqual(len(old_phams), len(old_colors))

        # Intersection should be equal to either set of keys - check against old_phams
        with self.subTest():
            self.assertEqual(
                set(old_phams.keys()).intersection(set(old_colors.keys())),
                set(old_phams.keys()))

    def test_4_get_unphamerated_genes(self):
        """Verify we get back a set of length 0"""
        unphamerated = get_new_geneids(self.engine)
        # unphamerated should be a set
        with self.subTest():
            self.assertEqual(type(unphamerated), type(set()))
        # pdm_test_db has 0 unphamerated genes
        with self.subTest():
            self.assertEqual(len(unphamerated), 0)

    def test_5_map_geneids_to_translations(self):
        """Verify we get back a dictionary"""
        gs_to_ts = map_geneids_to_translations(self.engine)

        command = "SELECT distinct(GeneID) FROM gene"
        results = mysqldb_basic.query_dict_list(self.engine, command)

        # gs_to_ts should be a dictionary
        with self.subTest():
            self.assertEqual(type(gs_to_ts), type(dict()))
        # gs_to_ts should have the right number of geneids
        with self.subTest():
            self.assertEqual(len(gs_to_ts), len(results))

    def test_6_map_translations_to_geneids(self):
        """Verify we get back a dictionary"""
        ts_to_gs = map_translations_to_geneids(self.engine)

        command = "SELECT distinct(CONVERT(Translation USING utf8)) FROM gene"
        results = mysqldb_basic.query_dict_list(self.engine, command)

        # ts_to_gs should be a dictionary
        with self.subTest():
            self.assertEqual(type(ts_to_gs), type(dict()))
        # ts_to_gs should have the right number of translations
        with self.subTest():
            self.assertEqual(len(ts_to_gs), len(results))

    def test_7_refresh_tempdir_1(self):
        """Verify if no temp_dir, refresh can make one"""
        if not os.path.exists(self.temp_dir):
            refresh_tempdir(self.temp_dir)
        self.assertTrue(os.path.exists(self.temp_dir))

    def test_8_refresh_tempdir_2(self):
        """Verify if temp_dir with something, refresh makes new empty one"""
        filename = f"{self.temp_dir}/test.txt"
        if not os.path.exists(self.temp_dir):
            refresh_tempdir(self.temp_dir)
        f = open(filename, "w")
        f.write("test\n")
        f.close()

        # Our test file should now exist
        with self.subTest():
            self.assertTrue(os.path.exists(filename))

        # Refresh temp_dir
        refresh_tempdir(self.temp_dir)

        # temp_dir should now exist, but test file should not
        with self.subTest():
            self.assertTrue(os.path.exists(self.temp_dir))
        with self.subTest():
            self.assertFalse(os.path.exists(filename))

    def test_9_write_fasta(self):
        """Verify file gets written properly"""
        filename = f"{self.temp_dir}/input.fasta"

        # refresh_tempdir
        refresh_tempdir(self.temp_dir)

        # Get translations to geneid mappings
        ts_to_gs = map_translations_to_geneids(self.engine)

        # Write fasta
        write_fasta(ts_to_gs, self.temp_dir)

        # Read fasta, make sure number of lines is 2x number of unique translations
        with open(filename, "r") as fh:
            lines = fh.readlines()

        with self.subTest():
            self.assertEqual(len(lines), 2 * len(ts_to_gs))

        # all odd-index lines should map to a key in ts_to_gs
        for i in range(len(lines)):
            if i % 2 == 1:
                with self.subTest():
                    self.assertTrue(
                        lines[i].lstrip(">").rstrip() in ts_to_gs.keys())

    # TODO: comment out this method if you don't have blast_2.2.14 binaries
    def test_10_create_blastdb(self):
        """Verify blastclust database gets made"""
        refresh_tempdir(self.temp_dir)
        db_file = f"{self.temp_dir}/sequenceDB"

        ts_to_gs = map_translations_to_geneids(self.engine)
        write_fasta(ts_to_gs, self.temp_dir)

        create_clusterdb("blast", self.temp_dir)

        # Check that database files were made
        for ext in ["phr", "pin", "psd", "psi", "psq"]:
            with self.subTest():
                self.assertTrue(os.path.exists(f"{db_file}.{ext}"))

    def test_11_create_mmseqsdb(self):
        """Verify mmseqs database gets made"""
        refresh_tempdir(self.temp_dir)
        db_file = f"{self.temp_dir}/sequenceDB"

        ts_to_gs = map_translations_to_geneids(self.engine)
        write_fasta(ts_to_gs, self.temp_dir)

        create_clusterdb("mmseqs", self.temp_dir)

        # Check that database file was made
        self.assertTrue(os.path.exists(db_file))

    def test_12_create_clusterdb(self):
        """Verify no database file gets made"""
        refresh_tempdir(self.temp_dir)
        db_file = f"{self.temp_dir}/sequenceDB"

        ts_to_gs = map_translations_to_geneids(self.engine)
        write_fasta(ts_to_gs, self.temp_dir)

        create_clusterdb("unknown", self.temp_dir)

        # Check that database file was not made
        self.assertFalse(os.path.exists(db_file))

    # TODO: comment out this method if you don't have blast_2.2.14 binaries
    def test_13_phamerate_blast(self):
        """Verify we can phamerate with blastclust"""
        refresh_tempdir(self.temp_dir)

        ts_to_gs = map_translations_to_geneids(self.engine)
        write_fasta(ts_to_gs, self.temp_dir)

        create_clusterdb("blast", self.temp_dir)

        phamerate(get_program_params("blast"), "blast", self.temp_dir)

        # Make sure clustering output file exists
        self.assertTrue(os.path.exists(f"{self.temp_dir}/output.txt"))

    def test_14_phamerate_mmseqs(self):
        """Verify we can phamerate with mmseqs2"""
        refresh_tempdir(self.temp_dir)

        ts_to_gs = map_translations_to_geneids(self.engine)
        write_fasta(ts_to_gs, self.temp_dir)

        create_clusterdb("mmseqs", self.temp_dir)

        phamerate(get_program_params("mmseqs"), "mmseqs", self.temp_dir)

        # Make sure clustering output file exists
        self.assertTrue(os.path.exists(f"{self.temp_dir}/clusterDB.index"))

    def test_15_phamerate_unknown(self):
        """Verify we cannot phamerate with unknown"""
        refresh_tempdir(self.temp_dir)

        ts_to_gs = map_translations_to_geneids(self.engine)
        write_fasta(ts_to_gs, self.temp_dir)

        create_clusterdb("unknown", self.temp_dir)

        phamerate(get_program_params("unknown"), "unknown", self.temp_dir)

        # Make sure clustering output file does not exist
        self.assertFalse(os.path.exists(f"{self.temp_dir}/clusterDB"))

    # TODO: comment out this method if you don't have blast_2.2.14 binaries
    def test_16_parse_blast_output(self):
        """Verify we can open and parse blastclust output"""
        refresh_tempdir(self.temp_dir)

        ts_to_gs = map_translations_to_geneids(self.engine)
        write_fasta(ts_to_gs, self.temp_dir)

        create_clusterdb("blast", self.temp_dir)

        phamerate(get_program_params("blast"), "blast", self.temp_dir)

        phams = parse_output("blast", self.temp_dir)

        # The number of phams should be greater than 0 and less than or equal to
        # the number of distinct translations
        with self.subTest():
            self.assertEqual(type(phams), type(dict()))
        with self.subTest():
            self.assertGreater(len(phams), 0)
        with self.subTest():
            self.assertLessEqual(len(phams), len(ts_to_gs))

    def test_17_parse_mmseqs_output(self):
        """Verify we can open and parse MMseqs2 output"""
        refresh_tempdir(self.temp_dir)

        ts_to_gs = map_translations_to_geneids(self.engine)
        write_fasta(ts_to_gs, self.temp_dir)

        create_clusterdb("mmseqs", self.temp_dir)

        phamerate(get_program_params("mmseqs"), "mmseqs", self.temp_dir)

        phams = parse_output("mmseqs", self.temp_dir)

        # The number of phams should be greater than 0 and less than or equal to
        # the number of distinct translations
        with self.subTest():
            self.assertEqual(type(phams), type(dict()))
        with self.subTest():
            self.assertGreater(len(phams), 0)
        with self.subTest():
            self.assertLessEqual(len(phams), len(ts_to_gs))

    def test_18_parse_unknown_output(self):
        """Verify we cannot open and parse unknown output"""
        refresh_tempdir(self.temp_dir)

        ts_to_gs = map_translations_to_geneids(self.engine)
        write_fasta(ts_to_gs, self.temp_dir)

        create_clusterdb("unknown", self.temp_dir)

        phamerate(get_program_params("unknown"), "unknown", self.temp_dir)

        phams = parse_output("unknown", self.temp_dir)

        # The number of phams should be greater than 0 and less than or equal to
        # the number of distinct translations
        with self.subTest():
            self.assertEqual(type(phams), type(dict()))
        with self.subTest():
            self.assertEqual(len(phams), 0)

    def test_19_reintroduce_duplicates(self):
        """Verify that we can put de-duplicated GeneIDs back together"""
        refresh_tempdir(self.temp_dir)
        gs_to_ts = map_geneids_to_translations(self.engine)

        ts_to_gs = map_translations_to_geneids(self.engine)
        write_fasta(ts_to_gs, self.temp_dir)

        create_clusterdb("mmseqs", self.temp_dir)
        phamerate(get_program_params("mmseqs"), "mmseqs", self.temp_dir)

        new_phams = parse_output("mmseqs", self.temp_dir)

        re_duped_phams = reintroduce_duplicates(new_phams=new_phams,
                                                trans_groups=ts_to_gs,
                                                genes_and_trans=gs_to_ts)

        geneid_total = 0
        for key in re_duped_phams.keys():
            geneid_total += len(re_duped_phams[key])

        # All geneids should be represented in the re_duped_phams
        self.assertEqual(geneid_total, len(gs_to_ts.keys()))

    def test_20_preserve_phams(self):
        """Verify that pham preservation seems to be working"""
        refresh_tempdir(self.temp_dir)

        old_phams = get_pham_geneids(self.engine)
        old_colors = get_pham_colors(self.engine)
        unphamerated = get_new_geneids(self.engine)

        gs_to_ts = map_geneids_to_translations(self.engine)
        ts_to_gs = map_translations_to_geneids(self.engine)

        write_fasta(ts_to_gs, self.temp_dir)

        create_clusterdb("mmseqs", self.temp_dir)

        phamerate(get_program_params("mmseqs"), "mmseqs", self.temp_dir)

        new_phams = parse_output("mmseqs", self.temp_dir)

        new_phams = reintroduce_duplicates(new_phams=new_phams,
                                           trans_groups=ts_to_gs,
                                           genes_and_trans=gs_to_ts)

        final_phams, new_colors = preserve_phams(old_phams=old_phams,
                                                 new_phams=new_phams,
                                                 old_colors=old_colors,
                                                 new_genes=unphamerated)

        # Final phams should be a dict with same number of keys as new_phams
        # since we aren't re-dimensioning, just renaming some keys
        with self.subTest():
            self.assertEqual(type(final_phams), type(dict()))
        with self.subTest():
            self.assertEqual(len(final_phams), len(new_phams))

        # New colors should be a dict with the same number of keys as
        # final_phams
        with self.subTest():
            self.assertEqual(type(new_colors), type(dict()))
        with self.subTest():
            self.assertEqual(len(new_colors), len(final_phams))

        # Can't compare the keys or phams since there's no guarantee that
        # any of the phams were preserved but we can make sure all genes are
        # accounted for
        genes_1_count = len(unphamerated)
        for key in old_phams.keys():
            genes_1_count += len(old_phams[key])
        genes_2_count = 0
        for key in new_phams.keys():
            genes_2_count += len(new_phams[key])
        with self.subTest():
            self.assertEqual(genes_1_count, genes_2_count)
Beispiel #29
0
class TestUpdate(unittest.TestCase):

    @classmethod
    def setUpClass(self):
        test_db_utils.create_filled_test_db()

    @classmethod
    def tearDownClass(self):
        test_db_utils.remove_db()

    def setUp(self):

        self.alchemist = AlchemyHandler(database=DB, username=USER, password=PWD)
        self.alchemist.build_engine()
        test_folder.mkdir()

        # Standardize values in certain fields to define the data
        stmt1 = create_update("phage", "Status", "unknown")
        test_db_utils.execute(stmt1)
        stmt2 = create_update("phage", "HostGenus", "unknown")
        test_db_utils.execute(stmt2)
        stmt3 = create_update("phage", "Accession", "")
        test_db_utils.execute(stmt3)
        stmt4 = create_update("phage", "Cluster", "Z")
        test_db_utils.execute(stmt4)
        stmt5 = create_update("phage", "Subcluster", "Z1")
        test_db_utils.execute(stmt5)
        stmt6 = "UPDATE version SET Version = 0"
        test_db_utils.execute(stmt6)

    def tearDown(self):
        shutil.rmtree(test_folder)




    @patch("pdm_utils.pipelines.update_field.AlchemyHandler")
    def test_main_1(self, alchemy_mock):
        """Verify update runs with empty ticket table."""
        alchemy_mock.return_value = self.alchemist
        create_update_table([], update_table)
        unparsed_args = get_unparsed_args(file=update_table)
        run.main(unparsed_args)
        version_table = test_db_utils.get_data(test_db_utils.version_table_query)
        phage_table = test_db_utils.get_data(test_db_utils.phage_table_query)
        data_dict = phage_id_dict(phage_table)
        alice = data_dict["Alice"]
        trixie = data_dict["Trixie"]
        # Nothing should be different.
        with self.subTest():
            self.assertEqual(alice["HostGenus"], "unknown")
        with self.subTest():
            self.assertEqual(trixie["HostGenus"], "unknown")
        with self.subTest():
            self.assertEqual(version_table[0]["Version"], 0)

    @patch("pdm_utils.pipelines.update_field.AlchemyHandler")
    def test_main_2(self, alchemy_mock):
        """Verify update runs with five tickets in ticket table."""
        alchemy_mock.return_value = self.alchemist
        host_genus = "Mycobacterium"
        cluster = "A"
        subcluster = "A2"
        status = "final"
        accession = "ABC123"
        tkt1 = get_alice_ticket("HostGenus", host_genus)
        tkt2 = get_alice_ticket("Cluster", cluster)
        tkt3 = get_alice_ticket("Subcluster", subcluster)
        tkt4 = get_alice_ticket("Status", status)
        tkt5 = get_alice_ticket("Accession", accession)
        tkts = [tkt1, tkt2, tkt3, tkt4, tkt5]
        create_update_table(tkts, update_table)
        unparsed_args = get_unparsed_args(file=update_table)
        run.main(unparsed_args)
        version_table = test_db_utils.get_data(test_db_utils.version_table_query)
        phage_table = test_db_utils.get_data(test_db_utils.phage_table_query)
        data_dict = phage_id_dict(phage_table)
        alice = data_dict["Alice"]
        trixie = data_dict["Trixie"]
        with self.subTest():
            self.assertEqual(alice["HostGenus"], host_genus)
        with self.subTest():
            self.assertEqual(alice["Cluster"], cluster)
        with self.subTest():
            self.assertEqual(alice["Subcluster"], subcluster)
        with self.subTest():
            self.assertEqual(alice["Accession"], accession)
        with self.subTest():
            self.assertEqual(alice["Status"], status)
        # Just confirm that only Alice data was changed.
        with self.subTest():
            self.assertEqual(trixie["HostGenus"], "unknown")
        with self.subTest():
            self.assertEqual(version_table[0]["Version"], 0)

    @patch("pdm_utils.pipelines.update_field.AlchemyHandler")
    def test_main_3(self, alchemy_mock):
        """Verify version data is updated."""
        alchemy_mock.return_value = self.alchemist
        unparsed_args = get_unparsed_args(version=True)
        run.main(unparsed_args)
        version_table = test_db_utils.get_data(test_db_utils.version_table_query)
        phage_table = test_db_utils.get_data(test_db_utils.phage_table_query)
        data_dict = phage_id_dict(phage_table)
        alice = data_dict["Alice"]
        with self.subTest():
            self.assertEqual(version_table[0]["Version"], 1)
        # Just confirm that only version data was changed.
        with self.subTest():
            self.assertEqual(alice["HostGenus"], "unknown")

    @patch("pdm_utils.pipelines.update_field.AlchemyHandler")
    def test_main_4(self, alchemy_mock):
        """Verify version data and phage table data are updated."""
        alchemy_mock.return_value = self.alchemist
        host_genus = "Mycobacterium"
        tkt = get_alice_ticket("HostGenus", host_genus)
        create_update_table([tkt], update_table)
        unparsed_args = get_unparsed_args(file=update_table, version=True)
        run.main(unparsed_args)
        version_table = test_db_utils.get_data(test_db_utils.version_table_query)
        phage_table = test_db_utils.get_data(test_db_utils.phage_table_query)
        data_dict = phage_id_dict(phage_table)
        alice = data_dict["Alice"]
        with self.subTest():
            self.assertEqual(alice["HostGenus"], host_genus)
        with self.subTest():
            self.assertEqual(version_table[0]["Version"], 1)
Beispiel #30
0
class TestMysqldbBasic6(unittest.TestCase):
    @classmethod
    def setUpClass(self):
        test_db_utils.create_empty_test_db(db=DB2) 
        test_db_utils.create_empty_test_db()
        

        phage_data1 = test_data_utils.get_trixie_phage_data()
        phage_data2 = test_data_utils.get_trixie_phage_data()
        phage_data3 = test_data_utils.get_trixie_phage_data()

        phage_data1["PhageID"] = "L5"
        phage_data2["PhageID"] = "Trixie"
        phage_data3["PhageID"] = "D29"

        phage_data1["HostGenus"] = "Mycobacterium"
        phage_data2["HostGenus"] = "Mycobacterium"
        phage_data3["HostGenus"] = "Gordonia"

        phage_data1["Accession"] = "ABC123"
        phage_data2["Accession"] = "XYZ456"
        phage_data3["Accession"] = ""

        phage_data1["Cluster"] = "A"
        phage_data2["Cluster"] = "B"
        phage_data3["Cluster"] = "NULL"

        phage_data1["Subcluster"] = "A1"
        phage_data2["Subcluster"] = "NULL"
        phage_data3["Subcluster"] = "NULL"

        phage_data1["Sequence"] = "atcg"
        phage_data2["Sequence"] = "AATT"
        phage_data3["Sequence"] = "GGCC"

        phage_data1["Length"] = 6
        phage_data2["Length"] = 4
        phage_data3["Length"] = 5

        phage_data1["DateLastModified"] = constants.EMPTY_DATE
        phage_data2["DateLastModified"] = constants.EMPTY_DATE
        phage_data3["DateLastModified"] = constants.EMPTY_DATE

        phage_data_list = [phage_data1, phage_data2, phage_data3]
        for phage_data in phage_data_list:
            test_db_utils.insert_data(PHAGE, phage_data)

        gene_data1 = test_data_utils.get_trixie_gene_data()
        gene_data2 = test_data_utils.get_trixie_gene_data()
        gene_data3 = test_data_utils.get_trixie_gene_data()
        gene_data4 = test_data_utils.get_trixie_gene_data()

        gene_data1["PhageID"] = "Trixie"
        gene_data2["PhageID"] = "Trixie"
        gene_data3["PhageID"] = "Trixie"
        gene_data4["PhageID"] = "D29"

        gene_data1["GeneID"] = "Trixie_1"
        gene_data2["GeneID"] = "Trixie_2"
        gene_data3["GeneID"] = "Trixie_3"
        gene_data4["GeneID"] = "D29_1"

        gene_data_list = [gene_data1, gene_data2, gene_data3, gene_data4]
        for gene_data in gene_data_list:
            test_db_utils.insert_data(GENE, gene_data)

    @classmethod
    def tearDownClass(self): 
        test_db_utils.remove_db()
        test_db_utils.remove_db(db=DB2)

    def setUp(self):
        self.alchemist1 = AlchemyHandler(database=DB, username=USER, password=PWD)
        self.alchemist1.build_engine()
        self.engine1 = self.alchemist1.engine

        self.alchemist2 = AlchemyHandler(database=DB2, username=USER, password=PWD)
        self.alchemist2.build_engine()
        self.engine2 = self.alchemist2.engine

    def tearDown(self):
        self.engine1.dispose()
        self.engine2.dispose()

    def test_get_distinct_1(self):
        """Retrieve a set of all distinct values when data is not present."""
        result = mysqldb_basic.get_distinct(self.engine2, "phage", "PhageID")
        exp = set()
        self.assertEqual(result, exp)

    def test_get_distinct_2(self):
        """Retrieve a set of all distinct values when data is present."""
        result1 = mysqldb_basic.get_distinct(
                        self.engine1, "phage", "PhageID")
        result2 = mysqldb_basic.get_distinct(
                        self.engine1, "phage", "HostGenus", null="test")
        result3 = mysqldb_basic.get_distinct(
                        self.engine1, "phage", "Accession")
        result4 = mysqldb_basic.get_distinct(
                        self.engine1, "phage", "Cluster", null="Singleton")
        result5 = mysqldb_basic.get_distinct(
                        self.engine1, "phage", "Subcluster", null="none")

        exp_phage_id = {"L5", "Trixie", "D29"}
        exp_host_genus = {"Mycobacterium", "Gordonia"}
        exp_accession = {"ABC123", "XYZ456", ""}
        exp_cluster = {"A", "B", "Singleton"}
        exp_subcluster = {"A1", "none"}

        with self.subTest():
            self.assertEqual(result1, exp_phage_id)
        with self.subTest():
            self.assertEqual(result2, exp_host_genus)
        with self.subTest():
            self.assertEqual(result3, exp_accession)
        with self.subTest():
            self.assertEqual(result4, exp_cluster)
        with self.subTest():
            self.assertEqual(result5, exp_subcluster)

    def test_retrieve_data_1(self):
        """Verify that a dictionary of data is retrieved for a valid PhageID."""
        result_list = mysqldb_basic.retrieve_data(
                        self.engine1, column="PhageID",
                        id_list=["L5"], query=PHAGE_QUERY)
        with self.subTest():
            self.assertEqual(len(result_list[0].keys()), 14)
        with self.subTest():
            self.assertEqual(result_list[0]["PhageID"], "L5")

    def test_retrieve_data_2(self):
        """Verify that an empty list is retrieved for an invalid PhageID."""
        result_list = mysqldb_basic.retrieve_data(
                        self.engine1, column="PhageID",
                        id_list=["EagleEye"], query=PHAGE_QUERY)
        self.assertEqual(len(result_list), 0)

    def test_retrieve_data_3(self):
        """Verify that dictionaries of data are retrieved for a list of two
        valid PhageIDs."""
        result_list = mysqldb_basic.retrieve_data(
                        self.engine1, column="PhageID",
                        id_list=["L5","Trixie"], query=PHAGE_QUERY)
        self.assertEqual(len(result_list), 2)

    def test_retrieve_data_4(self):
        """Verify that dictionaries of data are retrieved for a list of three
        valid PhageIDs and one invalid PhageID."""
        result_list = mysqldb_basic.retrieve_data(
                        self.engine1, column="PhageID",
                        id_list=["L5","Trixie","EagleEye","D29"],
                        query=PHAGE_QUERY)
        self.assertEqual(len(result_list), 3)

    def test_retrieve_data_5(self):
        """Verify that dictionaries of data are retrieved for multiple
        valid PhageIDs when no list is provided."""
        result_list = mysqldb_basic.retrieve_data(
                        self.engine1, query=PHAGE_QUERY)
        self.assertEqual(len(result_list), 3)

    def test_retrieve_data_6(self):
        """Verify that a list of CDS data is retrieved for a valid PhageID."""
        result_list = mysqldb_basic.retrieve_data(
                        self.engine1, column="PhageID",
                        id_list=["Trixie"], query=GENE_QUERY)
        with self.subTest():
            self.assertEqual(len(result_list), 3)
        with self.subTest():
            self.assertEqual(len(result_list[0].keys()), 13)
        with self.subTest():
            self.assertEqual(result_list[0]["PhageID"], "Trixie")

    def test_retrieve_data_7(self):
        """Verify that an empty list of CDS data is retrieved
        for an invalid PhageID."""
        result_list = mysqldb_basic.retrieve_data(
                        self.engine1, column="PhageID",
                        id_list=["L5"], query=GENE_QUERY)
        self.assertEqual(len(result_list), 0)

    def test_retrieve_data_8(self):
        """Verify that a list of all CDS data is retrieved when no
        PhageID is provided."""
        result_list = mysqldb_basic.retrieve_data(
                                self.engine1, query=GENE_QUERY)
        self.assertEqual(len(result_list), 4)