Ejemplo n.º 1
0
    def setUp(self, is_instance_mock):
        is_instance_mock.return_value = True

        alchemist = Mock(spec=AlchemyHandler)
        engine = Mock(spec=Engine)
        graph = Mock(spec=Graph)
        session = Mock(spec=Session)
        mapper = Mock(spec=DeclarativeMeta)
        key = Mock(spec=Column)
        proxy = Mock()

        type(alchemist).engine = PropertyMock(return_value=engine)
        type(alchemist).graph = PropertyMock(return_value=graph)
        type(alchemist).session = PropertyMock(return_value=session)
        type(alchemist).mapper = PropertyMock(return_value=mapper)
        type(alchemist).connected = PropertyMock(return_value=True)

        engine.execute.return_value = proxy
        proxy.fetchall.return_value = []

        self.mock_isinstance = is_instance_mock
        self.mock_alchemist = alchemist
        self.mock_engine = engine
        self.mock_graph = graph
        self.mock_session = session
        self.mock_mapper = mapper
        self.mock_key = key

        self.db_filter = Filter(alchemist=alchemist, key=key)
        self.assertEqual(self.db_filter.engine, self.mock_engine)
        self.assertEqual(self.db_filter.graph, self.mock_graph)
Ejemplo n.º 2
0
def get_cds_seqrecords(alchemist, values, data_cache=None, nucleotide=False,
                       verbose=False):
    if data_cache is None:
        data_cache = {}

    cds_list = parse_feature_data(alchemist, values=values)

    db_filter = Filter(alchemist)
    db_filter.key = 'gene.GeneID'

    if verbose:
        print("...Converting SQL data...")

    seqrecords = []
    for cds in cds_list:
        parent_genome = data_cache.get(cds.genome_id)

        if parent_genome is None:
            parent_genome = get_single_genome(alchemist, cds.genome_id,
                                              data_cache=data_cache)

        cds.genome_length = parent_genome.length
        cds.set_seqfeature()

        db_filter.values = [cds.id]
        gene_domains = db_filter.select(CDD_DATA_COLUMNS)

        record = flat_files.cds_to_seqrecord(cds, parent_genome,
                                             gene_domains=gene_domains)
        seqrecords.append(record)

    return seqrecords
Ejemplo n.º 3
0
def main(unparsed_args_list):
    """Run main get_gb_records pipeline."""
    # Parse command line arguments
    args = parse_args(unparsed_args_list)

    # Filters input: phage.Status=draft AND phage.HostGenus=Mycobacterium
    # Args structure: [['phage.Status=draft'], ['phage.HostGenus=Mycobacterium']]
    filters = args.filters
    ncbi_cred_dict = ncbi.get_ncbi_creds(args.ncbi_credentials_file)
    output_folder = basic.set_path(args.output_folder, kind="dir", expect=True)
    working_dir = pathlib.Path(RESULTS_FOLDER)
    working_path = basic.make_new_dir(output_folder, working_dir, attempt=50)
    if working_path is None:
        print(f"Invalid working directory '{working_dir}'")
        sys.exit(1)

    # Verify database connection and schema compatibility.
    print("Connecting to the MySQL database...")
    alchemist = AlchemyHandler(database=args.database)
    alchemist.connect(pipeline=True)
    engine = alchemist.engine
    mysqldb.check_schema_compatibility(engine, "the get_gb_records pipeline")

    # Get SQLAlchemy metadata Table object
    # table_obj.primary_key.columns is a
    # SQLAlchemy ColumnCollection iterable object
    # Set primary key = 'phage.PhageID'
    alchemist.build_metadata()
    table = querying.get_table(alchemist.metadata, TARGET_TABLE)
    for column in table.primary_key.columns:
        primary_key = column

    # Create filter object and then add command line filter strings
    db_filter = Filter(alchemist=alchemist, key=primary_key)
    db_filter.values = []

    # Attempt to add filters and exit if needed.
    add_filters(db_filter, filters)

    # Performs the query
    db_filter.update()

    # db_filter.values now contains list of PhageIDs that pass the filters.
    # Get the accessions associated with these PhageIDs.
    keep_set = set(db_filter.values)

    # Create data sets
    print("Retrieving accessions from the database...")
    query = construct_accession_query(keep_set)
    list_of_dicts = mysqldb_basic.query_dict_list(engine, query)
    id_acc_dict = get_id_acc_dict(list_of_dicts)
    acc_id_dict = get_acc_id_dict(id_acc_dict)
    engine.dispose()
    if len(acc_id_dict.keys()) > 0:
        get_data(working_path, acc_id_dict, ncbi_cred_dict)
    else:
        print("There are no records to retrieve.")
Ejemplo n.º 4
0
    def setUp(self):
        self.review_test_dir = self.test_dir.joinpath("review_test_dir")

        self.alchemist = AlchemyHandler()
        self.alchemist.username = USER
        self.alchemist.password = PWD
        self.alchemist.database = DB
        self.alchemist.connect(ask_database=True, login_attempts=0)

        self.db_filter = Filter(alchemist=self.alchemist)
        self.db_filter.add(review.BASE_CONDITIONALS)
        self.db_filter.key = "gene.PhamID"
Ejemplo n.º 5
0
    def setUp(self):
        alchemist = AlchemyHandler()
        alchemist.username = "******"
        alchemist.password = "******"
        alchemist.database = "test_db"
        alchemist.connect()
        alchemist.build_graph()
        self.alchemist = alchemist

        self.db_filter = Filter(alchemist=self.alchemist)

        phageid = self.alchemist.get_column("phage.PhageID")
        self.phageid = phageid
Ejemplo n.º 6
0
def build_filter(alchemist, key, filters, values=None, verbose=False):
    """Applies MySQL WHERE clause filters using a Filter.

    :param alchemist: A connected and fully built AlchemyHandler object.
    :type alchemist: AlchemyHandler
    :param table: MySQL table name.
    :type table: str
    :param filters: A list of lists with filter values, grouped by ORs.
    :type filters: list[list[str]]
    :param groups: A list of supported MySQL column names.
    :type groups: list[str]
    :returns: filter-Loaded Filter object.
    :rtype: Filter
    """
    db_filter = Filter(alchemist=alchemist)
    db_filter.key = key
    db_filter.values = values

    if filters != "":
        try:
            db_filter.add(filters)
        except:
            print("Please check your syntax for the conditional string: "
                  f"{filters}")
            exit(1)

        db_filter.parenthesize()

    return db_filter
Ejemplo n.º 7
0
    def setUp(self, IsInstance):
        IsInstance.return_value = True

        alchemist = Mock()
        engine = Mock()
        graph = Mock()
        key = Mock()

        alchemist.engine.return_value = engine
        alchemist.graph.return_value = graph
        alchemist.connected.return_value = True

        self.mock_isinstance = IsInstance
        self.mock_alchemist = alchemist
        self.mock_engine = engine
        self.mock_graph = graph
        self.mock_key = key

        self.db_filter = Filter(alchemist=alchemist, key=key)
Ejemplo n.º 8
0
    def setUp(self):
        alchemist = AlchemyHandler()
        alchemist.username=user
        alchemist.password=pwd
        alchemist.database=db
        alchemist.connect()
        self.alchemist = alchemist

        self.db_filter = Filter(alchemist=self.alchemist)

        self.phage = self.alchemist.metadata.tables["phage"]
        self.gene = self.alchemist.metadata.tables["gene"]
        self.trna = self.alchemist.metadata.tables["trna"]

        self.PhageID = self.phage.c.PhageID
        self.Cluster = self.phage.c.Cluster
        self.Subcluster = self.phage.c.Subcluster
        
        self.Notes = self.gene.c.Notes
Ejemplo n.º 9
0
    def setUp(self):
        self.alchemist = AlchemyHandler()
        self.alchemist.username=USER
        self.alchemist.password=PWD
        self.alchemist.database=DB
        self.alchemist.connect(ask_database=True, login_attempts=0)
        self.alchemist.build_graph()

        self.db_filter = Filter(alchemist=self.alchemist)
        
        self.export_test_dir = self.test_dir.joinpath("export_test_dir")
Ejemplo n.º 10
0
def get_acc_id_dict(alchemist):
    """Test helper function to retrieve accessions of database entries.
    """
    db_filter = Filter(alchemist=alchemist)
    db_filter.key = "phage.PhageID"

    db_filter.values = db_filter.build_values()
    groups = db_filter.group("phage.Accession")

    return groups
Ejemplo n.º 11
0
class TestFilter(unittest.TestCase):
    @patch("pdm_utils.classes.filter.isinstance")
    def setUp(self, is_instance_mock):
        is_instance_mock.return_value = True

        alchemist = Mock(spec=AlchemyHandler)
        engine = Mock(spec=Engine)
        graph = Mock(spec=Graph)
        session = Mock(spec=Session)
        mapper = Mock(spec=DeclarativeMeta)
        key = Mock(spec=Column)
        proxy = Mock()

        type(alchemist).engine = PropertyMock(return_value=engine)
        type(alchemist).graph = PropertyMock(return_value=graph)
        type(alchemist).session = PropertyMock(return_value=session)
        type(alchemist).mapper = PropertyMock(return_value=mapper)
        type(alchemist).connected = PropertyMock(return_value=True)

        engine.execute.return_value = proxy
        proxy.fetchall.return_value = []

        self.mock_isinstance = is_instance_mock
        self.mock_alchemist = alchemist
        self.mock_engine = engine
        self.mock_graph = graph
        self.mock_session = session
        self.mock_mapper = mapper
        self.mock_key = key

        self.db_filter = Filter(alchemist=alchemist, key=key)
        self.assertEqual(self.db_filter.engine, self.mock_engine)
        self.assertEqual(self.db_filter.graph, self.mock_graph)

    def test_updated_1(self):
        """Verify that the upload property portrays Filter._upload.
        """
        self.db_filter._updated = True

        self.assertTrue(self.db_filter.updated)

    def test_updated_2(self):
        """Verify that upload property is immutable.
        """
        with self.assertRaises(AttributeError):
            self.db_filter.updated = True

    def test_values_valid_1(self):
        """Verify that the values_valid property portrays Filter._values_valid.
        """
        self.db_filter._values_valid = True

        self.assertTrue(self.db_filter.values_valid)

    def test_values_valid_2(self):
        """Verify that the values_valid property is immutable.
        """
        with self.assertRaises(AttributeError):
            self.db_filter.values_valid = True

    def test_connected_1(self):
        """Verify that the connected property portrays Filter._connected.
        """
        self.db_filter._connected = True

        self.assertTrue(self.db_filter.connected)

    def test_connected_2(self):
        """Verify that the connected property is immutable.
        """
        with self.assertRaises(AttributeError):
            self.db_filter.connected = True

    def test_engine_1(self):
        """Verify that the engine property portrays Filter._engine.
        """
        self.db_filter._engine = self.mock_engine

        self.assertEqual(self.db_filter.engine, self.mock_engine)

    def test_graph_1(self):
        """Verify that the graph property portrays Filter._graph.
        """
        self.db_filter._graph = self.mock_graph

        self.assertEqual(self.db_filter.graph, self.mock_graph)

    def test_session_1(self):
        """Verify that the session property portrays Filter._session.
        """
        self.db_filter._session = self.mock_session

        self.assertEqual(self.db_filter.session, self.mock_session)

    def test_mapper_1(self):
        """Verify that the mapper property portrays Filter._mapper.
        """
        self.db_filter._mapper = self.mock_mapper

        self.assertEqual(self.db_filter.mapper, self.mock_mapper)

    def test_values_1(self):
        """Verify that the values property portrays Filter._values.
        """
        self.db_filter._values = ["Test1", "Test2"]

        self.assertEqual(self.db_filter.values, ["Test1", "Test2"])

    def test_values_2(self):
        """Verify that the values property can set Filter._values.
        """
        self.db_filter.values = ["Test1", "Test2"]

        self.assertEqual(self.db_filter.values, ["Test1", "Test2"])

    def test_values_3(self):
        """Verify that the values property modifies the values_valid property.
        """
        self.db_filter.values = ["Test1", "Test2"]

        self.assertFalse(self.db_filter.values_valid)

    def test_values_2(self):
        """Verify that the values property raises TypeError on invalid input.
        """
        with self.assertRaises(TypeError):
            self.db_filter.values = "Hello"

    def test_key_1(self):
        """Verify that the key property modifies the Filter._key
        """
        self.db_filter.key = self.mock_key

        self.assertEqual(self.db_filter.key, self.mock_key)

    def test_key_2(self):
        """Verify that the key property raises TypeError on invalid input.
        """
        with self.assertRaises(TypeError):
            self.db_filter.key = Mock()

    @patch("pdm_utils.classes.filter.AlchemyHandler")
    def test_connect_1(self, alchemyhandler_mock):
        """Verfiy that connect() returns when Filter is already connected.
        """
        alchemyhandler_mock.return_value = self.mock_alchemist

        self.db_filter._connected = True

        self.db_filter.connect()
        alchemyhandler_mock.assert_not_called()

    @patch("pdm_utils.classes.filter.AlchemyHandler")
    def test_connect_2(self, alchemyhandler_mock):
        """Verify that the Filter creates an AlchemyHandler.
        """
        alchemyhandler_mock.return_value = self.mock_alchemist

        self.db_filter._connected = False

        self.db_filter.connect()
        alchemyhandler_mock.assert_called()

    @patch("pdm_utils.classes.filter.AlchemyHandler")
    def test_connect_3(self, alchemyhandler_mock):
        """Verify that the Filter uses an AlchemyHandler to connect. 
        """
        alchemyhandler_mock.return_value = self.mock_alchemist

        self.db_filter._connected = False

        self.db_filter.connect()

        self.mock_alchemist.connect.assert_called()

    @patch("pdm_utils.classes.filter.isinstance")
    def test_link_1(self, isinstance_mock):
        """Verify link() calls isinstance().
        """
        self.db_filter.link(self.mock_alchemist)

        isinstance_mock.assert_called()

    def test_link_2(self):
        """Verify link() raises TypeError upon bad alchemist input.
        """
        with self.assertRaises(TypeError):
            self.db_filter.link("Bad input")

    def test_link_3(self):
        """Verify function structure of link().
        """
        type(self.mock_alchemist).connected = PropertyMock(return_value=False)
        type(self.mock_alchemist).graph = PropertyMock(return_value=None)

        self.db_filter.link(self.mock_alchemist)

        self.mock_alchemist.connect.assert_called()

    @patch("pdm_utils.classes.filter.Filter.connect")
    def test_check_1(self, connect_mock):
        """Verify that the Filter will connect if not connected.
        """
        self.db_filter._connected = False

        self.db_filter.check()

        connect_mock.assert_called()

    @patch("pdm_utils.classes.filter.isinstance")
    def test_check_2(self, is_instance_mock):
        """Verify that the Filter calls isinstance() with correct paremeters.
        """
        is_instance_mock.return_value = True

        self.db_filter.check()

        is_instance_mock.assert_any_call(self.mock_engine, Engine)
        is_instance_mock.assert_any_call(self.mock_graph, Graph)
        is_instance_mock.assert_any_call(self.mock_key, Column)

    def test_check_3(self):
        """Verify that the Filter raises AttributeError with an invalid engine.
        """
        self.db_filter._engine = "Not a valid engine"

        with self.assertRaises(AttributeError):
            self.db_filter.check()

    def test_check_4(self):
        """Verify that the Filter raises AttributeError with an invalid graph.
        """
        self.db_filter._graph = "Not a valid graph"

        with self.assertRaises(AttributeError):
            self.db_filter.check()

    def test_check_5(self):
        """Verify that the Filter raises AttibuteError with an invalid key.
        """
        self.db_filter._key = "Not a valid key"

        with self.assertRaises(AttributeError):
            self.db_filter.check()

    @patch("pdm_utils.classes.filter.q.build_distinct")
    @patch("pdm_utils.classes.filter.isinstance")
    def test_build_values_1(self, is_instance_mock, build_distinct_mock):
        """Verify that build_distinct() is called with correct parameters.
        """
        is_instance_mock.return_value = True
        self.db_filter.build_values(where="Not a list")

        build_distinct_mock.assert_called_with(self.mock_graph,
                                               self.mock_key,
                                               where="Not a list",
                                               add_in=self.mock_key)

    @patch("pdm_utils.classes.filter.Filter.check")
    def test_transpose_1(self, check_mock):
        """Verify that transpose() returns without values.
        """
        self.db_filter.transpose("gene.Notes")

        check_mock.assert_called()

    @patch("pdm_utils.classes.filter.Filter.check")
    def test_mass_transpose_1(self, check_mock):
        """Verify that mass_tranpose() returns without values.
        """
        self.db_filter.mass_transpose("Column")

        check_mock.assert_called()

    @patch("pdm_utils.classes.filter.q.build_distinct")
    @patch("pdm_utils.classes.filter.Filter.check")
    def test_retrieve_1(self, check_mock, build_distinct_mock):
        """Verify that retrieve() returns without values.
        """
        self.db_filter.retrieve("Column")

        check_mock.assert_called()
        build_distinct_mock.assert_not_called()

    @patch("pdm_utils.classes.filter.Filter.check")
    @patch("pdm_utils.classes.filter.Filter.build_values")
    def test_refresh_1(self, build_values_mock, check_mock):
        """Verify that refresh() returns without values.
        """
        self.db_filter.refresh()

        check_mock.assert_called()
        build_values_mock.assert_not_called()

    @patch("pdm_utils.classes.filter.Filter.check")
    @patch("pdm_utils.classes.filter.Filter.build_values")
    def test_refresh_2(self, build_values_mock, check_mock):
        """Verify that refresh() calls build_values() and conserves values.
        """
        build_values_mock.return_value = ["Phage"]

        self.db_filter._values_valid = False
        self.db_filter.refresh()

        check_mock.assert_called()
        build_values_mock.assert_called()

        self.assertTrue(self.db_filter.values_valid)
        self.assertEqual(self.db_filter.values, ["Phage"])

    @patch("pdm_utils.classes.filter.Filter.check")
    @patch("pdm_utils.classes.filter.Filter.refresh")
    @patch("pdm_utils.classes.filter.Filter.build_values")
    def test_update_1(self, build_values_mock, Refresh, check_mock):
        """Verify update() returns without values.
        """
        self.db_filter.update()

        check_mock.assert_called()
        Refresh.assert_not_called()
        build_values_mock.assert_not_called()

    @patch("pdm_utils.classes.filter.Filter.check")
    @patch("pdm_utils.classes.filter.Filter.refresh")
    @patch("pdm_utils.classes.filter.Filter.build_values")
    def test_update_2(self, build_values_mock, Refresh, check_mock):
        """Verify update() refreshes values before updating.
        """
        self.db_filter._values_valid = False

        self.db_filter.update()

        check_mock.assert_called()
        Refresh.assert_called()
        build_values_mock.assert_not_called()

    @patch("pdm_utils.classes.filter.Filter.check")
    @patch("pdm_utils.classes.filter.Filter.refresh")
    @patch("pdm_utils.classes.filter.Filter.build_values")
    def test_update_3(self, build_values_mock, Refresh, check_mock):
        """Verify function structure of update().
        """
        self.db_filter._values_valid = False
        self.db_filter._updated = False

        self.db_filter.update()

        check_mock.assert_called()
        Refresh.assert_called()
        build_values_mock.assert_called()

        self.assertTrue(self.db_filter._values_valid)
        self.assertTrue(self.db_filter._values_valid)

    @patch("pdm_utils.classes.filter.q.first_column")
    @patch("pdm_utils.classes.filter.q.build_select")
    @patch("pdm_utils.classes.filter.Filter.get_column")
    def test_sort_1(self, get_column_mock, build_select_mock,
                    first_column_mock):
        """Verify function structure of sort().
        """
        self.db_filter._values_valid = False

        first_column_mock.return_value = ["Phage"]

        self.db_filter.sort("column")

        get_column_mock.assert_called()
        build_select_mock.assert_called()
        first_column_mock.assert_called()

        self.assertTrue(self.db_filter._values_valid)
        self.assertEqual(self.db_filter.values, ["Phage"])

    def test_sort_2(self):
        """Verify that sort() raises TypeError at bad ORDER BY input.
        """
        with self.assertRaises(TypeError):
            self.db_filter.sort(None)

    def test_reset_1(self):
        """Verify that reset() clears filters.
        """
        self.db_filter._filters = [{"Expression": "Some Whereclause"}]

        self.db_filter.reset()

        self.assertEqual(self.db_filter.filters, [])

    def test_reset_2(self):
        """Verify that reset() clears values.
        """
        self.db_filter._values = ["Phage1", "Phage2", "Phage3"]

        self.db_filter.reset()

        self.assertEqual(self.db_filter.values, [])

    def test_reset_3(self):
        """Verify that reset() sets values_valid Filter property.
        """
        self.db_filter._values_valid = False

        self.db_filter.reset()

        self.assertTrue(self.db_filter.values_valid)

    def test_reset_4(self):
        """Verify that reset() sets updated Filter property.
        """
        self.db_filter._updated = False

        self.db_filter.reset()

        self.assertTrue(self.db_filter.updated)

    def test_reset_5(self):
        """Verify that reset sets on_index property.
        """
        self.db_filter._on_index = 5

        self.db_filter.reset()

        self.assertEqual(self.db_filter.or_index, -1)

    def test_hits_1(self):
        """Verify that hits() accurately portrays no values.
        """
        self.assertEqual(self.db_filter.hits(), 0)

    def test_hits_2(self):
        """Verify that hits() accurately portrays a number of values.
        """
        self.db_filter._values = ["Phage1", "Phage2", "Phage3"]

        self.assertEqual(self.db_filter.hits(), 3)

    def test_copy_1(self):
        """Verify that copy() reflects a Filter's updated property.
        """
        self.db_filter._updated = False

        copy_filter = self.db_filter.copy()

        self.assertEqual(copy_filter.updated, self.db_filter.updated)

    def test_copy_2(self):
        """Verify that copy() reflects a Filter's values_valid property.
        """
        copy_filter = self.db_filter.copy()

        self.assertEqual(copy_filter.values_valid, self.db_filter.values_valid)

    def test_copy_3(self):
        """Verify that copy() reflects a Filter's filters property.
        """
        copy_filter = self.db_filter.copy()

        self.assertEqual(copy_filter.filters, self.db_filter.filters)

    def test_copy_4(self):
        """Verify that copy() reflects a Filter's engine property.
        """
        copy_filter = self.db_filter.copy()

        self.assertEqual(copy_filter.engine, self.db_filter.engine)

    def test_copy_5(self):
        """Verify that copy() reflects a Filter's graph property.
        """
        copy_filter = self.db_filter.copy()

        self.assertEqual(copy_filter.graph, self.db_filter.graph)

    def test_copy_6(self):
        """Verify that copy() reflects a Fitler's key property.
        """
        copy_filter = self.db_filter.copy()

        self.assertEqual(copy_filter.key, self.db_filter.key)

    def test_copy_7(self):
        """Verify that copy() reflects a Filter's values property.
        """
        self.db_filter._values = ["Values"]

        copy_filter = self.db_filter.copy()

        self.assertEqual(copy_filter.values, self.db_filter.values)

    def test_copy_8(self):
        """Verify that copy() reflects a Filter's connected property.
        """
        self.db_filter._connected = True
        copy_filter = self.db_filter.copy()

        self.assertEqual(copy_filter._connected, self.db_filter._connected)

    def test_copy_filters_1(self):
        """Verify that copy_filters() replicates filters.
        """
        self.db_filter._filters = [{
            "Filter1": "Filter1",
            "Filter2": "Filter2",
            "Filter3": "Filter3"
        }]

        filters_copy = self.db_filter.copy_filters()

        self.assertEqual(filters_copy, self.db_filter._filters)

    def test_copy_filters_2(self):
        """Verify that copy_filters() creates new address for copied filters.
        """
        self.db_filter._filters = [{
            "Filter1": "Filter1",
            "Filter2": "Filter2",
            "Filter3": "Filter3"
        }]

        filters_copy = self.db_filter.copy_filters()

        self.db_filter._filters[0].update({"Filter2": "filter2"})

        self.assertNotEqual(filters_copy, self.db_filter._filters)
Ejemplo n.º 12
0
class TestFilter(unittest.TestCase):
    @patch("pdm_utils.classes.filter.isinstance")
    def setUp(self, IsInstance):
        IsInstance.return_value = True

        alchemist = Mock()
        engine = Mock()
        graph = Mock()
        key = Mock()

        alchemist.engine.return_value = engine
        alchemist.graph.return_value = graph
        alchemist.connected.return_value = True

        self.mock_isinstance = IsInstance
        self.mock_alchemist = alchemist
        self.mock_engine = engine
        self.mock_graph = graph
        self.mock_key = key

        self.db_filter = Filter(alchemist=alchemist, key=key)

    def test_constructor_1(self):
        db_filter = Filter()

    def test_constructor_2(self):
        self.mock_isinstance.assert_any_call(self.mock_alchemist,
                                             AlchemyHandler)
        self.mock_isinstance.assert_any_call(self.mock_key, Column)

    def test_values_1(self):
        self.db_filter.values = ["Test1", "Test2"]
        self.assertFalse(self.db_filter.values_valid)

    def test_values_2(self):
        with self.assertRaises(TypeError):
            self.db_filter.values = "Hello"

    @patch("pdm_utils.classes.filter.Filter.check")
    @patch("pdm_utils.classes.filter.q.build_distinct")
    def test_transpose_2(self, BuildDistinct, Check):
        empty_values = self.db_filter.transpose("gene.Notes")

        self.assertEqual(empty_values, [])

        BuildDistinct.assert_not_called()
        Check.assert_called()

    @patch("pdm_utils.classes.filter.Filter.check")
    @patch("pdm_utils.classes.filter.Filter.transpose")
    def test_retrieve_1(self, Transpose, Check):
        empty_data = self.db_filter.retrieve("Error")

        self.assertEqual(empty_data, {})

        Transpose.assert_not_called()
        Check.assert_called()

    @patch("pdm_utils.classes.filter.Filter.check")
    @patch("pdm_utils.classes.filter.Filter.build_values")
    def test_refresh_1(self, BuildValues, Check):
        self.db_filter.refresh()

        Check.assert_called()
        BuildValues.assert_not_called()

    @patch("pdm_utils.classes.filter.Filter.check")
    @patch("pdm_utils.classes.filter.Filter.build_values")
    def test_refresh_2(self, BuildValues, Check):
        BuildValues.return_value = ["Phage"]

        self.db_filter._values_valid = False
        self.db_filter.refresh()

        Check.assert_called()
        BuildValues.assert_called()

        self.assertTrue(self.db_filter.values_valid)
        self.assertEqual(self.db_filter.values, ["Phage"])

    @patch("pdm_utils.classes.filter.Filter.check")
    @patch("pdm_utils.classes.filter.Filter.refresh")
    @patch("pdm_utils.classes.filter.Filter.build_values")
    def test_update_1(self, BuildValues, Refresh, Check):
        self.db_filter.update()

        Check.assert_called()
        Refresh.assert_not_called()
        BuildValues.assert_not_called()

    @patch("pdm_utils.classes.filter.Filter.check")
    @patch("pdm_utils.classes.filter.Filter.refresh")
    @patch("pdm_utils.classes.filter.Filter.build_values")
    def test_update_2(self, BuildValues, Refresh, Check):
        self.db_filter._values_valid = False

        self.db_filter.update()

        Check.assert_called()
        Refresh.assert_called()
        BuildValues.assert_not_called()

    @patch("pdm_utils.classes.filter.Filter.check")
    @patch("pdm_utils.classes.filter.Filter.refresh")
    @patch("pdm_utils.classes.filter.Filter.build_values")
    def test_update_3(self, BuildValues, Refresh, Check):
        self.db_filter._values_valid = False
        self.db_filter._updated = False

        self.db_filter.update()

        Check.assert_called()
        Refresh.assert_called()
        BuildValues.assert_called()

        self.assertTrue(self.db_filter._values_valid)
        self.assertTrue(self.db_filter._values_valid)

    @patch("pdm_utils.classes.filter.isinstance")
    @patch("pdm_utils.classes.filter.Filter.build_values")
    def test_sort_1(self, BuildValues, IsInstance):
        IsInstance.return_value = True
        BuildValues.return_value = ["Phage"]

        self.db_filter._values_valid = False

        self.db_filter.sort("column")

        IsInstance.assert_called()
        BuildValues.assert_called()

        self.assertTrue(self.db_filter._values_valid)
        self.assertEqual(self.db_filter.values, ["Phage"])

    @patch("pdm_utils.classes.filter.isinstance")
    @patch("pdm_utils.classes.filter.Filter.build_values")
    def test_sort_1(self, BuildValues, IsInstance):
        IsInstance.return_value = False

        with self.assertRaises(TypeError):
            self.db_filter.sort()

    def test_reset_1(self):
        self.db_filter._filters = {"Expression": "Some Whereclause"}

        self.db_filter.reset()

        self.assertEqual(self.db_filter.filters, {})

    def test_reset_2(self):
        self.db_filter._values = ["Phage1", "Phage2", "Phage3"]

        self.db_filter.reset()

        self.assertEqual(self.db_filter.values, [])

    def test_reset_3(self):
        self.db_filter._values_valid = False

        self.db_filter.reset()

        self.assertTrue(self.db_filter.values_valid)

    def test_reset_4(self):
        self.db_filter._updated = False

        self.db_filter.reset()

        self.assertTrue(self.db_filter.updated)

    def test_hits_1(self):
        self.assertEqual(self.db_filter.hits(), 0)

    def test_hits_2(self):
        self.db_filter._values = ["Phage1", "Phage2", "Phage3"]

        self.assertEqual(self.db_filter.hits(), 3)

    def test_copy_1(self):
        self.db_filter._updated = False

        copy_filter = self.db_filter.copy()

        self.assertEqual(copy_filter.updated, self.db_filter.updated)

    def test_copy_2(self):
        copy_filter = self.db_filter.copy()

        self.assertEqual(copy_filter.values_valid, self.db_filter.values_valid)

    def test_copy_3(self):
        copy_filter = self.db_filter.copy()

        self.assertEqual(copy_filter.filters, self.db_filter.filters)

    def test_copy_4(self):
        copy_filter = self.db_filter.copy()

        self.assertEqual(copy_filter.engine, self.db_filter.engine)

    def test_copy_5(self):
        copy_filter = self.db_filter.copy()

        self.assertEqual(copy_filter.graph, self.db_filter.graph)

    def test_copy_6(self):
        copy_filter = self.db_filter.copy()

        self.assertEqual(copy_filter.key, self.db_filter.key)

    def test_copy_7(self):
        self.db_filter._values = ["Values"]

        copy_filter = self.db_filter.copy()

        self.assertEqual(copy_filter.values, self.db_filter.values)

    def test_copy_filters_1(self):
        self.db_filter._filters = {
            "Filter1": ["Filter1"],
            "Filter2": ["Filter2"],
            "Filter3": ["Filter3"]
        }

        filters_copy = self.db_filter.copy_filters()

        self.assertEqual(filters_copy, self.db_filter._filters)

    def test_copy_filters_2(self):
        self.db_filter._filters = {
            "Filter1": ["Filter1"],
            "Filter2": ["Filter2"],
            "Filter3": ["Filter3"]
        }

        filters_copy = self.db_filter.copy_filters()

        self.db_filter._filters.update({"Filter2": []})

        self.assertNotEqual(filters_copy, self.db_filter._filters)
Ejemplo n.º 13
0
class TestFilter(unittest.TestCase):
    @classmethod
    def setUpClass(self):
        test_db_utils.create_filled_test_db()

    @classmethod
    def tearDownClass(self):
        test_db_utils.remove_db()

    def setUp(self):
        alchemist = AlchemyHandler()
        alchemist.username=user
        alchemist.password=pwd
        alchemist.database=db
        alchemist.connect()
        self.alchemist = alchemist

        self.db_filter = Filter(alchemist=self.alchemist)

        self.phage = self.alchemist.metadata.tables["phage"]
        self.gene = self.alchemist.metadata.tables["gene"]
        self.trna = self.alchemist.metadata.tables["trna"]

        self.PhageID = self.phage.c.PhageID
        self.Cluster = self.phage.c.Cluster
        self.Subcluster = self.phage.c.Subcluster
        
        self.Notes = self.gene.c.Notes

    def tearDown(self):
        self.alchemist.clear()

    def test_and__1(self):
        """Verify that and_() creates a dictionary key as expected.
        """
        self.db_filter.and_("phage.PhageID=Myrna")

        or_block = self.db_filter.filters[0]
        self.assertTrue("phage.PhageID=Myrna" in or_block.keys())

    def test_and__2(self):
        """Verify that and_() stores BinaryExpression data as expected.
        """

        self.db_filter.and_("phage.PhageID=Myrna")

        or_block = self.db_filter.filters[0]
        self.assertTrue(isinstance(or_block, dict))

        where_clauses = or_block["phage.PhageID=Myrna"]
        self.assertTrue(isinstance(where_clauses, BinaryExpression))

    def test_and_3(self):
        """Verify that and_() recognizes previous and_() data.
        """
        self.db_filter.and_("phage.PhageID =  Myrna")
        self.db_filter.and_("phage.PhageID=Myrna")
        self.db_filter.and_("phage.PhageID=D29")

        or_block = self.db_filter.filters[0]
        self.assertEqual(len(or_block), 2)

    def test_remove_1(self):
        """Verify that remove() removes dictionary entry after depleted.
        """
        self.db_filter.and_("phage.PhageID=Myrna")
        self.db_filter.remove("phage.PhageID=Myrna")
        self.assertEqual(self.db_filter.filters, [{}])

    def test_remove_2(self):
        """Verify that remove() conserves dictionary entry if not depleted.
        """
        self.db_filter.and_("phage.PhageID=Myrna")
        self.db_filter.and_("phage.PhageID=D29")

        self.db_filter.remove("phage.PhageID=Myrna")

        or_block = self.db_filter.filters[0]
        where_clauses = or_block["phage.PhageID=D29"]

        self.assertEqual(where_clauses.right.value, "D29")

    def test_add_1(self):
        """Verify that add() creates a dictionary key as expected.
        """
        self.db_filter.add("phage.PhageID=Myrna")

        or_block = self.db_filter.filters[0]
        self.assertTrue("phage.PhageID=Myrna" in or_block.keys())

    def test_add_2(self):
        """Verify that add() creates multiple keys as expected.
        """
        self.db_filter.add("phage.PhageID=Myrna AND phage.PhageID = Trixie")

        or_block = self.db_filter.filters[0]

        self.assertTrue(len(or_block) == 2)

        self.assertTrue("phage.PhageID=Myrna" in or_block.keys())
        self.assertTrue("phage.PhageID=Trixie" in or_block.keys())

    def test_add_3(self):
        """Verify that add() creates multiple or blocks as expected.
        """
        self.db_filter.add("phage.PhageID=Myrna OR phage.PhageID = Trixie")

        self.assertTrue(len(self.db_filter.filters) == 2)

        first_or_block = self.db_filter.filters[0]
        second_or_block = self.db_filter.filters[1]

        self.assertTrue("phage.PhageID=Myrna" in first_or_block.keys())
        self.assertFalse("phage.PhageID=Trixie" in first_or_block.keys())

        self.assertFalse("phage.PhageID=Myrna" in second_or_block.keys())
        self.assertTrue("phage.PhageID=Trixie" in second_or_block.keys())


    def test_get_column_1(self):
        """Verify that get_column() converts string column input.
        """
        self.db_filter.key = self.Cluster

        column = self.db_filter.get_column("phage.PhageID")

        self.assertEqual(column, self.PhageID)

    def test_get_column_2(self):
        """Verify that get_column() conserves Column input.
        """
        self.db_filter.key = self.Cluster

        column = self.db_filter.get_column(self.PhageID)

        self.assertEqual(column, self.PhageID)

    def test_get_column_3(self):
        """Verify that get_column() raises TypeError.
        get_column() should raise TypeError when column input is
        neither a string or a Column.
        """
        self.db_filter.key = self.Cluster

        with self.assertRaises(TypeError):
            self.db_filter.get_column(None)

    def test_build_where_clauses_1(self):
        """Verify that build_where_clauses() forms list of expected length.
        """
        self.db_filter.and_("phage.PhageID=Myrna")
        self.db_filter.and_("phage.PhageID=D29")

        queries = self.db_filter.build_where_clauses()

        self.assertEqual(len(queries[0]), 2)

    def test_build_where_clauses_2(self):
        """Verify that build_where_clauses() forms list of BinaryExpressions.
        """
        self.db_filter.and_("phage.PhageID=Myrna")
        self.db_filter.and_("phage.PhageID=D29")

        queries = self.db_filter.build_where_clauses()

        for query in queries:
            self.assertTrue(isinstance(query, BooleanClauseList))

    def test_build_values_1(self):
        """Verify that build_values() does not exclude values as expected.
        """
        self.db_filter.key = self.PhageID

        values = self.db_filter.build_values()

        self.assertTrue("Myrna" in values)
        self.assertTrue("D29" in values)
        self.assertTrue("Alice" in values)
        self.assertTrue("Trixie" in values)

    def test_build_values_2(self):
        """Verify that build_values() utilizes WHERE clauses as expected.
        """
        self.db_filter.key = self.PhageID

        where_clause = (self.Cluster == "A")
        values = self.db_filter.build_values(where=where_clause)

        self.assertTrue("D29" in values)
        self.assertTrue("Trixie" in values)
        self.assertFalse("Myrna" in values)
        self.assertFalse("Alice" in values)

    def test_build_values_3(self):
        """Verify that build_values() creates DISTINCT values as expected.
        """
        self.db_filter.key = self.Cluster

        where_clause = (self.Subcluster == "A2")
        values = self.db_filter.build_values(where=where_clause)

        self.assertEqual(len(values), 1)
        self.assertEqual(values, ["A"])

    def test_build_values_4(self):
        """Verify that build_values() recognizes bytes-type column data.
        """
        self.db_filter.key = self.Notes

        values = self.db_filter.build_values()

        self.assertTrue(isinstance(values[0], str))

    def test_query_1(self):
        """Verify that query() creates instances as expected.
        """
        self.db_filter.key = "phage.PhageID"
        self.db_filter.values = ["Trixie", "D29"]
        self.db_filter.refresh()

        instances = self.db_filter.query("phage")

        instance_ids = []
        for instance in instances:
            instance_ids.append(instance.PhageID)

        self.assertTrue("Trixie" in instance_ids)
        self.assertTrue("D29" in instance_ids)
        self.assertFalse("Myrna" in instance_ids)

    def test_query_2(self):
        """Verify that query() creates instances as expected.
        """
        self.db_filter.key = "phage.PhageID"
        self.db_filter.values = ["Trixie", "D29"]
        self.db_filter.refresh()

        instances = self.db_filter.query("gene")

        instance_ids = set()

        for instance in instances:
            instance_ids.add(instance.phage.PhageID)

        instance_ids = list(instance_ids)

        self.assertTrue("Trixie" in instance_ids)
        self.assertTrue("D29" in instance_ids)
        self.assertFalse("Myrna" in instance_ids)

    def test_transpose_1(self):
        """Verify that transpose() utilizes Filter values as expected.
        """
        self.db_filter.values = ["Myrna"]
        self.db_filter.key = self.PhageID

        self.db_filter.refresh()

        clusters = self.db_filter.transpose("phage.Cluster")

        self.assertEqual(clusters, ["C"])

    def test_transpose_2(self):
        """Verify that transpose() can optionally create dict return value.
        """
        self.db_filter.values = ["Myrna"]
        self.db_filter.key = self.PhageID

        self.db_filter.refresh()

        clusters_dict = self.db_filter.transpose(self.Cluster, return_dict=True)

        self.assertEqual(clusters_dict["Cluster"], ["C"])

    def test_transpose_3(self):
        """Verify that transpose() can alter Filter properties as expected.
        """
        self.db_filter.values = ["Myrna"]
        self.db_filter.key = self.PhageID

        self.db_filter.refresh()

        self.db_filter.transpose("phage.Cluster", set_values=True)

        self.assertEqual(self.db_filter.key, self.Cluster)
        self.assertEqual(self.db_filter.values, ["C"])

    def test_transpose_4(self):
        """Verify that transpose() filter parameter functions as expected.
        """
        self.db_filter.values = ["Myrna", "D29"]
        self.db_filter.key = self.PhageID

        self.db_filter.add("gene.GeneID = Myrna_CDS_28")
        values = self.db_filter.transpose("gene.GeneID", filter=True)

        self.assertEqual(len(values), 1)
        self.assertEqual(values[0], "Myrna_CDS_28")

    def test_mass_transpose_1(self):
        """Verify that mass_tranpose() returns DISTINCT values as expected.
        """
        self.db_filter.values = ["Myrna"]
        self.db_filter.key = self.PhageID

        self.db_filter.refresh()

        myrna_data = self.db_filter.mass_transpose(["phage.HostGenus",
                                              "phage.Cluster",
                                              "gene.Notes"])

        self.assertTrue(len(myrna_data) == 3 )
        self.assertTrue(isinstance(myrna_data, dict))

        self.assertEqual(myrna_data["HostGenus"], ["Mycobacterium"])
        self.assertEqual(myrna_data["Cluster"], ["C"])

    def test_mass_transpose_2(self):
        """Verify that mass_tranpose() utilizes all values as expected.
        """
        self.db_filter.values = ["Myrna", "Trixie"]
        self.db_filter.key = self.PhageID

        self.db_filter.refresh()

        data = self.db_filter.mass_transpose(["phage.HostGenus",
                                        "phage.Cluster",
                                        "gene.Notes"])

        self.assertTrue(len(data) == 3)
        self.assertTrue(isinstance(data, dict))

        self.assertEqual(data["HostGenus"], ["Mycobacterium"])
        self.assertEqual(data["Cluster"], ["C", "A"])

    def test_retrieve_1(self):
        """Verify that retrieve() separates data as expected.
        """
        self.db_filter.values = ["Myrna", "Trixie"]
        self.db_filter.key = self.PhageID

        self.db_filter.refresh()

        data = self.db_filter.retrieve(["phage.HostGenus",
                                        "phage.Cluster"])

        myrna_data = data["Myrna"]
        self.assertEqual(myrna_data["HostGenus"], ["Mycobacterium"])
        self.assertEqual(myrna_data["Cluster"], ["C"])

        trixie_data = data["Trixie"]
        self.assertEqual(trixie_data["HostGenus"], ["Mycobacterium"])
        self.assertEqual(trixie_data["Cluster"], ["A"])

    def test_retrieve_2(self):
        """Verify that retrieve() separates data as expected.
        """
        self.db_filter.values = ["A", "C"]
        self.db_filter.key = self.Cluster

        self.db_filter.refresh()

        data = self.db_filter.retrieve(["phage.Cluster", "phage.PhageID"])

        a_data = data["A"]
        self.assertEqual(a_data["Cluster"], ["A"])
        self.assertTrue("Trixie" in a_data["PhageID"])
        self.assertFalse("Myrna" in a_data["PhageID"])

        c_data = data["C"]
        self.assertEqual(c_data["Cluster"], ["C"])
        self.assertFalse("Trixie" in c_data["PhageID"])
        self.assertTrue("Myrna" in c_data["PhageID"])
    
    def test_refresh_1(self):
        """Verify that refresh() eliminates invalid data.
        """
        self.db_filter.key = self.PhageID
        self.db_filter.values = ["Myrna", "D29", "Sheetz"]
        self.db_filter.refresh()

        self.assertTrue("Myrna" in self.db_filter.values)
        self.assertTrue("D29" in self.db_filter.values)
        self.assertFalse("Sheetz" in self.db_filter.values)

    def test_update_1(self):
        """Verify that update() filters out values.
        """
        self.db_filter.key = self.PhageID
        self.db_filter.values = ["Myrna", "D29"]
        self.db_filter.and_("phage.PhageID=Myrna")
        self.db_filter.update()

        self.assertTrue("Myrna" in self.db_filter.values)
        self.assertFalse("D29" in self.db_filter.values)

    def test_sort_1(self):
        """Verify that sort() orders values as expected.
        """
        self.db_filter.key = self.PhageID
        self.db_filter.values = ["Myrna", "D29"]
        self.db_filter.sort(self.PhageID)

        self.assertTrue("Myrna" in self.db_filter.values)
        self.assertTrue("D29" in self.db_filter.values)
        self.assertEqual(self.db_filter.values[0], "D29")

    def test_sort_2(self):
        """Verify that sort() orders values with multiple sort columns.
        """
        self.db_filter.key = self.PhageID
        self.db_filter.values = ["Myrna", "D29", "Alice"]
        self.db_filter.sort([self.Cluster, self.PhageID])

        self.assertTrue("Myrna" in self.db_filter.values)
        self.assertTrue("D29" in self.db_filter.values)
        self.assertTrue("Alice" in self.db_filter.values)
        
        self.assertEqual(self.db_filter.values[0], "D29")
        self.assertEqual(self.db_filter.values[1], "Alice")

    def test_group_1(self):
        """Verify that group() creates separate groups as expected.
        """
        self.db_filter.key = self.PhageID
        self.db_filter.values = ["Myrna", "D29"]
        group_results = self.db_filter.group(self.PhageID)

        self.assertTrue("Myrna" in group_results.keys())
        self.assertTrue("Myrna" in group_results["Myrna"])

        self.assertTrue("D29" in group_results.keys())
        self.assertTrue("D29" in group_results["D29"])

    def test_group_2(self):
        """Verify that group() recognizes similarities in values as expected.
        """
        self.db_filter.key = self.PhageID
        self.db_filter.values = ["Myrna", "D29"]
        group_results = self.db_filter.group("phage.HostGenus")

        self.assertTrue("Mycobacterium" in group_results.keys())

        self.assertTrue("Myrna" in group_results["Mycobacterium"])
        self.assertTrue("D29" in group_results["Mycobacterium"])

    def test_group_3(self):
        """Verify that group() recognizes differences in values as expected.
        """
        self.db_filter.key = self.PhageID
        self.db_filter.values = ["Myrna", "D29", "Trixie"]
        group_results = self.db_filter.group("phage.Cluster")

        self.assertTrue("A" in group_results.keys())
        self.assertTrue("C" in group_results.keys())

        self.assertTrue("Myrna" in group_results["C"])
        self.assertTrue("D29" in group_results["A"])
        self.assertTrue("Trixie" in group_results["A"])
Ejemplo n.º 14
0
 def test_constructor_1(self):
     db_filter = Filter(alchemist=self.alchemist)
Ejemplo n.º 15
0
def execute_review(alchemist, folder_path, folder_name, 
                              review=True, values=[],
                              filters="", groups=[], sort=[],
                              g_reports=False, s_report=False,
                              verbose=False):
    """Executes the entirety of the pham review pipeline.
    
    :param alchemist: A connected and fully built AlchemyHandler object.
    :type alchemist: AlchemyHandler
    :param folder_path: Path to a valid dir for new dir creation.
    :type folder_path: Path
    :param folder_name: A name for the export folder.
    :type folder_name: str
    :param csv_title: Title for an appended csv file prefix.
    :type csv_title: str
    :param review: A boolean to toggle filtering of phams by pham discrepancies.
    :type review: bool
    :param values: List of values to filter database results.
    :type values: list[str]
    :param filters: A list of lists with filter values, grouped by ORs.
    :type filters: list[list[str]]
    :param groups: A list of supported MySQL column names to group by.
    :type groups: list[str]
    :param sort: A list of supported MySQL column names to sort by. 
    :param g_reports: A boolean to toggle export of additional pham information.
    :type g_reports: bool
    :param verbose: A boolean value to toggle progress print statements.
    :type verbose: bool
    """
    db_filter = Filter(alchemist=alchemist)
    db_filter.key = ("gene.PhamID")
 
    if values:
        db_filter.values = values

    if verbose:
        print(f"Identified {len(values)} phams to review...")
           
    if filters != "":
        try:
            db_filter.add(filters)
        except:
            print("Please check your syntax for the conditional string:\n"
                 f"{filters}")
            sys.exit(1)
        finally:
            db_filter.update() 

    db_filter._filters = []
    db_filter._updated = False 
    db_filter._or_index = -1

    db_filter.add(BASE_CONDITIONALS)
    db_filter.update()

    if not db_filter.values:
        print("Current settings produced no database hits.")
        sys.exit(1)

    if review: 
        review_phams(db_filter, verbose=verbose)

    if sort:
        db_filter.sort(sort)

    if verbose:
        print("Creating export folder...")
    export_path = folder_path.joinpath(folder_name)
    export_path = basic.make_new_dir(folder_path, export_path, attempt=50)

    conditionals_map = {}
    export_db.build_groups_map(db_filter, export_path, conditionals_map, 
                                                       groups=groups,
                                                       verbose=verbose)

    if verbose:
        print("Prepared query and path structure, beginning review export...")
    original_phams = db_filter.values
    total_g_data = {}
    for mapped_path in conditionals_map.keys():
        conditionals = conditionals_map[mapped_path]
        db_filter.values = original_phams
        db_filter.values = db_filter.build_values(where=conditionals)

        pf_data = get_pf_data(alchemist, db_filter, verbose=verbose) 
        write_report(pf_data, mapped_path, PF_HEADER,
                     csv_name=f"FunctionReport",
                     verbose=verbose)

        if g_reports:
            execute_g_report_export(alchemist, db_filter, mapped_path, 
                                                    total_g_data=total_g_data,
                                                    verbose=verbose)

        if s_report:
            execute_s_report_export(alchemist, db_filter, conditionals, 
                                                    mapped_path,
                                                    verbose=verbose)
Ejemplo n.º 16
0
class TestFilter(unittest.TestCase):
    @classmethod
    def setUpClass(self):
        setup_test_db()

    def setUp(self):
        alchemist = AlchemyHandler()
        alchemist.username = "******"
        alchemist.password = "******"
        alchemist.database = "test_db"
        alchemist.connect()
        alchemist.build_graph()
        self.alchemist = alchemist

        self.db_filter = Filter(alchemist=self.alchemist)

        phageid = self.alchemist.get_column("phage.PhageID")
        self.phageid = phageid

    def test_constructor_1(self):
        db_filter = Filter(alchemist=self.alchemist)

    def test_constructor_2(self):
        self.assertTrue(isinstance(self.db_filter.graph, Graph))
        self.assertTrue(isinstance(self.db_filter.engine, Engine))

    def test_add_1(self):
        self.db_filter.add("phage.PhageID=Myrna")
        self.assertTrue("phage.PhageID=" in self.db_filter.filters.keys())

    def test_add_2(self):
        self.db_filter.add("phage.PhageID=Myrna")
        where_clauses = self.db_filter.filters["phage.PhageID="]
        self.assertTrue(isinstance(where_clauses, list))
        self.assertTrue(isinstance(where_clauses[0], BinaryExpression))

    def test_add_3(self):
        self.db_filter.add("phage.PhageID=Myrna")
        self.db_filter.add("phage.PhageID=D29")
        where_clauses = self.db_filter.filters["phage.PhageID="]
        self.assertEqual(len(where_clauses), 2)

    def test_remove_1(self):
        self.db_filter.add("phage.PhageID=Myrna")
        self.db_filter.remove("phage.PhageID=Myrna")
        self.assertEqual(self.db_filter.filters, {})

    def test_remove_2(self):
        self.db_filter.add("phage.PhageID=Myrna")
        self.db_filter.add("phage.PhageID=D29")
        self.db_filter.remove("phage.PhageID=Myrna")
        where_clauses = self.db_filter.filters["phage.PhageID="]
        self.assertTrue(len(where_clauses) == 1)
        self.assertEqual(where_clauses[0].right.value, "D29")

    def test_build_where_clauses_1(self):
        self.db_filter.add("phage.PhageID=Myrna")
        self.db_filter.add("phage.PhageID=D29")

        queries = self.db_filter.build_where_clauses()

        self.assertEqual(len(queries), 2)

    def test_build_where_clauses_2(self):
        self.db_filter.add("phage.PhageID=Myrna")
        self.db_filter.add("phage.PhageID=D29")

        queries = self.db_filter.build_where_clauses()

        for query in queries:
            self.assertTrue(isinstance(query, BinaryExpression))

    def test_build_values_1(self):
        self.db_filter.key = self.phageid

        self.db_filter.values = ["Myrna", "D29"]
        values = self.db_filter.build_values()

        self.assertTrue("Myrna" in values)
        self.assertTrue("D29" in values)

    def test_transpose_1(self):
        self.db_filter.values = ["Myrna"]
        self.db_filter.key = self.phageid

        self.db_filter.refresh()

        host_genera = self.db_filter.transpose("phage.HostGenus")

        self.assertEqual(host_genera, ["Mycobacterium"])

    def test_retrieve_1(self):
        self.db_filter.values = ["Myrna"]
        self.db_filter.key = self.phageid

        self.db_filter.refresh()

        myrna_data = self.db_filter.retrieve(
            ["phage.HostGenus", "phage.Cluster", "gene.Notes"])

        self.assertTrue(len(myrna_data) == 3)
        self.assertTrue(isinstance(myrna_data, dict))

        self.assertEqual(myrna_data["HostGenus"], ["Mycobacterium"])
        self.assertEqual(myrna_data["Cluster"], ["C"])

    def test_refresh_2(self):
        self.db_filter.key = self.phageid
        self.db_filter.values = ["Myrna", "D29", "Sheetz"]
        self.db_filter.refresh()

        self.assertTrue("Myrna" in self.db_filter.values)
        self.assertTrue("D29" in self.db_filter.values)
        self.assertFalse("Sheetz" in self.db_filter.values)

    def test_update_1(self):
        self.db_filter.key = self.phageid
        self.db_filter.values = ["Myrna", "D29"]
        self.db_filter.add("phage.PhageID=Myrna")
        self.db_filter.update()

        self.assertTrue("Myrna" in self.db_filter.values)
        self.assertFalse("D29" in self.db_filter.values)

    def test_sort_1(self):
        self.db_filter.key = self.phageid
        self.db_filter.values = ["Myrna", "D29"]
        self.db_filter.sort(self.phageid)

        self.assertTrue("Myrna" in self.db_filter.values)
        self.assertTrue("D29" in self.db_filter.values)
        self.assertEqual(self.db_filter.values[0], "D29")

    def test_group_1(self):
        self.db_filter.key = self.phageid
        self.db_filter.values = ["Myrna", "D29"]
        group_results = self.db_filter.group(self.phageid)

        self.assertTrue("Myrna" in group_results.keys())
        self.assertTrue("Myrna" in group_results["Myrna"])

        self.assertTrue("D29" in group_results.keys())
        self.assertTrue("D29" in group_results.keys())

    def test_group_2(self):
        self.db_filter.key = self.phageid
        self.db_filter.values = ["Myrna", "D29"]
        group_results = self.db_filter.group("phage.HostGenus")

        self.assertTrue("Mycobacterium" in group_results.keys())

        self.assertTrue("Myrna" in group_results["Mycobacterium"])
        self.assertTrue("D29" in group_results["Mycobacterium"])

    @classmethod
    def tearDownClass(self):
        teardown_test_db()
Ejemplo n.º 17
0
def main(unparsed_args_list):
    """Run main freeze database pipeline."""
    args = parse_args(unparsed_args_list)
    ref_database = args.database
    reset = args.reset
    new_database = args.new_database_name
    prefix = args.prefix

    # Filters input: phage.Status=draft AND phage.HostGenus=Mycobacterium
    # Args structure: [['phage.Status=draft'], ['phage.HostGenus=Mycobacterium']]
    filters = args.filters

    # Create config object with data obtained from file and/or defaults.
    config = configfile.build_complete_config(args.config_file)
    mysql_creds = config["mysql"]

    # Verify database connection and schema compatibility.
    print("Connecting to the MySQL database...")
    alchemist1 = AlchemyHandler(database=ref_database,
                                username=mysql_creds["user"],
                                password=mysql_creds["password"])
    alchemist1.connect(pipeline=True)
    engine1 = alchemist1.engine
    mysqldb.check_schema_compatibility(engine1, "the freeze pipeline")

    # Get SQLAlchemy metadata Table object
    # table_obj.primary_key.columns is a
    # SQLAlchemy ColumnCollection iterable object
    # Set primary key = 'phage.PhageID'
    alchemist1.build_metadata()
    table = querying.get_table(alchemist1.metadata, TARGET_TABLE)
    for column in table.primary_key.columns:
        primary_key = column

    # Create filter object and then add command line filter strings
    db_filter = Filter(alchemist=alchemist1, key=primary_key)
    db_filter.values = []

    # Attempt to add filters and exit if needed.
    add_filters(db_filter, filters)

    # Performs the query
    db_filter.update()

    # db_filter.values now contains list of PhageIDs that pass the filters.
    # Get the number of genomes that will be retained and build the
    # MYSQL DELETE statement.
    keep_set = set(db_filter.values)
    delete_stmt = construct_delete_stmt(TARGET_TABLE, primary_key, keep_set)
    count_query = construct_count_query(TARGET_TABLE, primary_key, keep_set)
    phage_count = mysqldb_basic.scalar(alchemist1.engine, count_query)

    # Determine the name of the new database.
    if new_database is None:
        if prefix is None:
            prefix = get_prefix()
        new_database = f"{prefix}_{phage_count}"

    # Create the new database, but prevent overwriting of current database.
    if engine1.url.database != new_database:
        result = mysqldb_basic.drop_create_db(engine1, new_database)
    else:
        print(
            "Error: names of the reference and frozen databases are the same.")
        print("No database will be created.")
        result = 1

    # Copy database.
    if result == 0:
        print(f"Reference database: {ref_database}")
        print(f"New database: {new_database}")
        result = mysqldb_basic.copy_db(engine1, new_database)
        if result == 0:
            print(f"Deleting genomes...")
            alchemist2 = AlchemyHandler(database=new_database,
                                        username=engine1.url.username,
                                        password=engine1.url.password)
            alchemist2.connect(pipeline=True)
            engine2 = alchemist2.engine
            engine2.execute(delete_stmt)
            if reset:
                engine2.execute(RESET_VERSION)

            # Close up all connections in the connection pool.
            engine2.dispose()
        else:
            print("Unable to copy the database.")
        # Close up all connections in the connection pool.
        engine1.dispose()
    else:
        print(f"Error creating new database: {new_database}.")
    print("Freeze database script completed.")
Ejemplo n.º 18
0
 def test_constructor_1(self):
     db_filter = Filter()
Ejemplo n.º 19
0
class TestPhamReview(unittest.TestCase):
    @classmethod
    def setUpClass(self):
        test_db_utils.create_filled_test_db()

        self.test_dir = Path(TEST_DIR)
        if self.test_dir.is_dir():
            shutil.rmtree(TEST_DIR)

        self.test_dir.mkdir()

    @classmethod
    def tearDownClass(self):
        test_db_utils.remove_db()
        shutil.rmtree(TEST_DIR)

    def setUp(self):
        self.review_test_dir = self.test_dir.joinpath("review_test_dir")

        self.alchemist = AlchemyHandler()
        self.alchemist.username = USER
        self.alchemist.password = PWD
        self.alchemist.database = DB
        self.alchemist.connect(ask_database=True, login_attempts=0)

        self.db_filter = Filter(alchemist=self.alchemist)
        self.db_filter.add(review.BASE_CONDITIONALS)
        self.db_filter.key = "gene.PhamID"

    def tearDown(self):
        if self.review_test_dir.is_dir():
            shutil.rmtree(str(self.review_test_dir))

    def test_execute_review_1(self):
        """Verify execute_review() creates new directory as expected.
        """
        review.execute_review(self.alchemist, self.test_dir,
                              self.review_test_dir.name)

        self.assertTrue(self.review_test_dir.is_dir())

    def test_execute_review_2(self):
        """Verify execute_review() filter parameter functions as expected.
        """
        review.execute_review(self.alchemist,
                              self.test_dir,
                              self.review_test_dir.name,
                              filters=("phage.Cluster='A' "
                                       "AND phage.Subcluster='A2'"))

        self.assertTrue(self.review_test_dir.is_dir())

    def test_execute_review_3(self):
        """Verify execute_review() group parameter functions as expected.
        """
        review.execute_review(self.alchemist,
                              self.test_dir,
                              self.review_test_dir.name,
                              groups=["phage.Cluster"])

        self.assertTrue(self.review_test_dir.is_dir())

        clusterA_dir = self.review_test_dir.joinpath("A")
        self.assertTrue(clusterA_dir.is_dir())

    def test_execute_review_4(self):
        """Verify execute_review() sort parameter functions as expected.
        """
        review.execute_review(self.alchemist,
                              self.test_dir,
                              self.review_test_dir.name,
                              sort=["gene.Name"])

        self.assertTrue(self.review_test_dir.is_dir())

    def test_execute_review_5(self):
        """Verify execute_review() review parameter functions as expected.
        """
        review.execute_review(self.alchemist,
                              self.test_dir,
                              self.review_test_dir.name,
                              review=False)

        self.assertTrue(self.review_test_dir.is_dir())

    def test_execute_review_6(self):
        """Verify execute_review() g_reports parameter functions as expected.
        """
        review.execute_review(self.alchemist,
                              self.test_dir,
                              self.review_test_dir.name,
                              g_reports=True)

        self.assertTrue(self.review_test_dir.is_dir())

        gene_report_dir = self.review_test_dir.joinpath("GeneReports")
        self.assertTrue(gene_report_dir.is_dir())

    def test_execute_review_7(self):
        """Verify execute_review() s_report parameter functions as expected.
        """
        review.execute_review(self.alchemist,
                              self.test_dir,
                              self.review_test_dir.name,
                              s_report=True)

        self.assertTrue(self.review_test_dir.is_dir())

        summary_report_file = self.review_test_dir.joinpath(
            "SummaryReport.txt")
        self.assertTrue(summary_report_file.is_file())

    def test_review_phams_1(self):
        """Verify review_phams() correctly identifies disrepencies.
        """
        self.db_filter.values = self.db_filter.build_values(
            where=self.db_filter.build_where_clauses())

        review.review_phams(self.db_filter)

        self.assertFalse(39854 in self.db_filter.values)
        self.assertTrue(40481 in self.db_filter.values)

    def test_get_pf_data_1(self):
        """Verify get_pf_data() retrieves and returns data as expected.
        """
        self.db_filter.values = [40481]

        pf_data = review.get_pf_data(self.alchemist, self.db_filter)

        self.assertTrue(isinstance(pf_data, list))

        for header in review.PF_HEADER:
            with self.subTest(header=header):
                self.assertTrue(header in pf_data[0].keys())
                self.assertFalse(isinstance(pf_data[0][header], list))

    def test_get_g_data_1(self):
        """Verify get_g_data() retreives and retrusn data as expected.
        """
        self.db_filter.values = [40481]

        g_data = review.get_g_data(self.alchemist, self.db_filter, 40481)

        self.assertTrue(isinstance(g_data, list))

        for header in review.PG_HEADER:
            with self.subTest(header=header):
                self.assertTrue(header in g_data[0].keys())
                self.assertFalse(isinstance(g_data[0][header], list))
Ejemplo n.º 20
0
def execute_resubmit(alchemist,
                     revisions_data_dicts,
                     folder_path,
                     folder_name,
                     filters="",
                     groups=[],
                     verbose=False):
    """Executes the entirety of the genbank resubmit pipeline.

    :param alchemist: A connected and fully built AlchemyHandler object.
    :type alchemist: AlchemyHandler
    :param revisions_data_dicts: Data dictionaries containing pham/notes data.
    :type revisions_data_dicts: list[dict]
    :param folder_path: Path to a valid dir for new dir creation.
    :type folder_path: Path
    :param folder_name: A name for the export folder.
    :type folder_name: str
    :param verbose: A boolean value to toggle progress print statements.
    :type verbose: bool
    """
    db_filter = Filter(alchemist=alchemist)
    db_filter.key = "gene.PhamID"
    db_filter.add(BASE_CONDITIONALS)

    if filters != "":
        try:
            db_filter.add(filters)
        except:
            print("Please check your syntax for the conditional string:\n"
                  f"{filters}")

    resubmit_columns = db_filter.get_columns(RESUBMIT_COLUMNS)

    phams = []
    for data_dict in revisions_data_dicts:
        phams.append(data_dict["Pham"])

    db_filter.values = phams

    if verbose:
        print("Creating export folder...")
    export_path = folder_path.joinpath(folder_name)
    export_path = basic.make_new_dir(folder_path, export_path, attempt=50)

    conditionals_map = {}
    export_db.build_groups_map(db_filter,
                               export_path,
                               conditionals_map,
                               groups=groups,
                               verbose=verbose)

    if verbose:
        print("Prepared query and path structure, beginning review export...")

    for mapped_path in conditionals_map.keys():
        if verbose:
            print("Retreiving phage data for pham revisions...")
        export_dicts = []
        for data_dict in revisions_data_dicts:
            if verbose:
                print(f"...Retrieving data for pham {data_dict['Pham']}...")

            conditionals = conditionals_map[mapped_path]

            final_call = data_dict["Final Call"]
            if final_call == "Hypothetical Protein":
                final_call = ""
            conditionals.append(
                querying.build_where_clause(alchemist.graph,
                                            f"gene.Notes!={final_call}"))

            query = querying.build_select(alchemist.graph,
                                          resubmit_columns,
                                          where=conditionals)

            results = querying.execute(alchemist.engine,
                                       query,
                                       in_column=db_filter.key,
                                       values=[data_dict["Pham"]])

            for result in results:
                format_resubmit_data(result, data_dict["Final Call"])
                export_dicts.append(result)

        if not export_dicts:
            if verbose:
                print("'{mapped_path.name}' data selected for resubmision "
                      "matches selected call; no resubmision exported...")

            mapped_path.rmdir()
            continue

        export_dicts = sorted(export_dicts,
                              key=lambda export_dict: export_dict["Phage"])

        if verbose:
            print(f"Writing {CSV_NAME} in {mapped_path.name}...")
        file_path = mapped_path.joinpath(CSV_NAME)
        basic.export_data_dict(export_dicts,
                               file_path,
                               RESUBMIT_HEADER,
                               include_headers=True)
Ejemplo n.º 21
0
def execute_export(alchemist,
                   output_path,
                   output_name,
                   values=[],
                   verbose=False,
                   csv_export=False,
                   ffile_export=None,
                   db_export=False,
                   table="phage",
                   filters=[],
                   groups=[]):
    """Executes the entirety of the file export pipeline.

    :param sql_handle:
        Input a valid SQLAlchemy Engine object.
    :type sql_handle: Engine:
    :param export_path:
        Input a valid path to place export folder.
    :type export_path: Path
    :param folder_name:
        Input a name for the export folder.
    :type folder_name: str
    :param phage_filter_list:
        Input a list of phageIDs.
    :type phage_filter_list: List[str]
    :param verbose:
        Input a boolean value for verbose option.
    :type verbose: boolean
    :param csv_export:
        Input a boolean value to toggle csv_export.
    :type csv_export: boolean
    :param ffile_export:
        Input a SeqIO supported file format to toggle ffile_export.
    :type ffile_export: str
    :param db_export:
        Input a boolean value to toggle db_export.
    :type db_export: boolean
    :param filters:
        Input a list of lists with filter values
    :type filters: List[List[str]]
    :param groups:
        Input a list of supported group values.
    :type groups: List[str]
    """

    if verbose:
        print("Retrieving database version...")
    db_version = mysqldb.get_version_table_data(alchemist.engine)

    if verbose:
        print("Creating export folder...")

    export_path = output_path.joinpath(output_name)
    export_path = basic.make_new_dir(output_path, export_path, attempt=50)

    if db_export:
        if verbose:
            print("Writing SQL database file...")
        write_database(alchemist, db_version["Version"], export_path)

    elif csv_export or ffile_export != None:
        table_obj = alchemist.get_table(table)
        for column in table_obj.primary_key.columns:
            primary_key = column

        db_filter = Filter(alchemist=alchemist, key=primary_key)
        db_filter.values = values

        for or_filters in filters:
            for filter in or_filters:
                db_filter.add(filter)

        db_filter.update()

        if filters and not db_filter.values:
            return

        values_map = {}
        if groups:
            build_groups_map(db_filter,
                             export_path,
                             groups=groups,
                             values_map=values_map,
                             verbose=verbose)
        else:
            values_map.update({export_path: db_filter.values})

        for export_path in values_map.keys():
            values = values_map[export_path]

            if csv_export:
                execute_csv_export(alchemist,
                                   export_path,
                                   table=table,
                                   values=values,
                                   verbose=verbose)

            elif ffile_export != None:
                execute_ffx_export(alchemist,
                                   export_path,
                                   ffile_export,
                                   db_version,
                                   table=table,
                                   values=values,
                                   verbose=verbose)