Ejemplo n.º 1
0
    def setUp(self):  # pylint; disable-msg=C0103
        """test fixture"""
        self.search_distances = {'upstream': (-20, 150)}
        self.scan_distances = {'upstream': (-30, 250)}

        matrix_factory = dm.DataMatrixFactory(
            [dm.nochange_filter, dm.center_scale_filter])
        infile = util.read_dfile('example_data/hal/halo_ratios5.tsv',
                                 has_header=True,
                                 quote='\"')
        self.ratio_matrix = matrix_factory.create_from(infile)
        self.organism = make_halo(self.ratio_matrix, self.search_distances,
                                  self.scan_distances)
        self.config_params = {
            'memb.min_cluster_rows_allowed': 3,
            'memb.max_cluster_rows_allowed': 70,
            'multiprocessing': False,
            'memb.clusters_per_row': 2,
            'memb.clusters_per_col': int(round(43 * 2.0 / 3.0)),
            'num_clusters': 43,
            'output_dir': 'out',
            'remap_network_nodes': False,
            'num_iterations': 2000,
            'debug': False
        }
        self.membership = self.__read_members()  # relies on config_params
        self.iteration_result = {'iteration': 51}
Ejemplo n.º 2
0
    def test_motif_scoring(self):
        """tests the motif scoring in integration"""
        search_distances = {'upstream': (-20, 150)}
        scan_distances = {'upstream': (-30, 250)}

        matrix_factory = dm.DataMatrixFactory([dm.nochange_filter, dm.center_scale_filter])
        infile = util.read_dfile('example_data/hal/halo_ratios5.tsv',
                                 has_header=True, quote='\"')
        ratio_matrix = matrix_factory.create_from(infile)
        meme_suite = meme.MemeSuite430(remove_tempfiles=True)
        sequence_filters = [
            motif.unique_filter,
            motif.get_remove_low_complexity_filter(meme_suite),
            motif.get_remove_atgs_filter(search_distances['upstream'])]

        organism = make_halo(ratio_matrix, search_distances, scan_distances)
        membership = FakeMembership()
        config_params = {'memb.min_cluster_rows_allowed': 3,
                         'memb.max_cluster_rows_allowed': 70,
                         'multiprocessing': False,
                         'num_clusters': 1,
                         'output_dir': 'out',
                         'debug': False,
                         'num_iterations': 2000}
        func = motif.MemeScoringFunction(organism, membership, ratio_matrix,
                                         meme_suite,
                                         sequence_filters=sequence_filters,
                                         scaling_func=lambda iter: 1.0,
                                         num_motif_func=lambda iter: 1,
                                         update_in_iteration=lambda x: True,
                                         motif_in_iteration=lambda x: True,
                                         config_params=config_params)
        iteration_result = { 'iteration': 100 }
        matrix = func.compute(iteration_result)
        """
Ejemplo n.º 3
0
 def test_simple_filter(self):
     """test a factory using a single filter"""
     factory = dm.DataMatrixFactory([times2])
     matrix = factory.create_from(self.dfile)
     self.assertEquals(2, matrix.num_rows)
     self.assertEquals(2, matrix.num_columns)
     self.assertEquals(matrix.column_names, ["H2", "H3"])
     self.assertEquals(matrix.row_names, ["R1", "R2"])
     self.assertTrue((matrix.values[0] == [2, 4]).all())
     self.assertTrue((matrix.values[1] == [6, 8]).all())
Ejemplo n.º 4
0
 def test_no_filters(self):
     """test a factory without filters"""
     factory = dm.DataMatrixFactory([])
     matrix = factory.create_from(self.dfile)
     self.assertEquals(2, matrix.num_rows)
     self.assertEquals(2, matrix.num_columns)
     self.assertEquals(matrix.column_names, ["H2", "H3"])
     self.assertEquals(matrix.row_names, ["R1", "R2"])
     self.assertTrue((matrix.values[0] == [1, 2]).all())
     self.assertTrue((matrix.values[1] == [3, 4]).all())
Ejemplo n.º 5
0
 def test_with_na_values(self):
     """test a factory with a DelimitedFile containing NA values"""
     factory = dm.DataMatrixFactory([])
     matrix = factory.create_from(self.dfile_with_na)
     self.assertEquals(2, matrix.num_rows)
     self.assertEquals(2, matrix.num_columns)
     self.assertEquals(matrix.column_names, ["H2", "H3"])
     self.assertEquals(matrix.row_names, ["R1", "R2"])
     self.assertTrue(np.isnan(matrix.values[0][0]))
     self.assertEquals(2.0, matrix.values[0][1])
     self.assertTrue(np.isnan(matrix.values[1][0]))
     self.assertEquals(4.0, matrix.values[1][1])
Ejemplo n.º 6
0
    def test_motif_scoring(self):
        """tests the motif scoring in integration"""
        search_distances = {'upstream': (-20, 150)}
        scan_distances = {'upstream': (-30, 250)}

        matrix_factory = dm.DataMatrixFactory(
            [dm.nochange_filter, dm.center_scale_filter])
        infile = util.read_dfile('example_data/hal/halo_ratios5.tsv',
                                 has_header=True,
                                 quote='\"')
        ratio_matrix = matrix_factory.create_from(infile)
        organism = testutil.make_halo(search_distances, scan_distances,
                                      ratio_matrix)
        membership = FakeMembership()
        config_params = {
            'memb.min_cluster_rows_allowed': 3,
            'memb.max_cluster_rows_allowed': 70,
            'multiprocessing': False,
            'num_clusters': 1,
            'output_dir': 'out',
            'debug': {},
            'search_distances': {
                'upstream': (-20, 150)
            },
            'num_iterations': 2000,
            'MEME': {
                'schedule': lambda i: True,
                'version': '4.3.0',
                'global_background': False,
                'arg_mod': 'zoops',
                'nmotifs_rvec':
                'c(rep(1, num_iterations/3), rep(2, num_iterations/3))',
                'use_revcomp': 'True',
                'max_width': 24,
                'background_order': 3
            },
            'Motifs': {
                'schedule': lambda i: True,
                'scaling': ('scaling_const', 1.0)
            }
        }
        func = motif.MemeScoringFunction(organism,
                                         membership,
                                         ratio_matrix,
                                         config_params=config_params)
        iteration_result = {'iteration': 100}
        matrix = func.compute(iteration_result)
Ejemplo n.º 7
0
 def __read_colscores_refresult(self):
     dfile = util.read_dfile('testdata/column_scores_refresult.tsv',
                             has_header=True, quote='"')
     return dm.DataMatrixFactory([]).create_from(dfile, case_sensitive=True)
Ejemplo n.º 8
0
 def __read_ratios(self):
     dfile = util.read_dfile('testdata/row_scores_testratios.tsv',
                             has_header=True)
     return dm.DataMatrixFactory([]).create_from(dfile, case_sensitive=True)
    def setUp(self):  # pylint; disable-msg=C0103
        """test fixture"""
        self.search_distances = {'upstream': (-20, 150)}
        self.scan_distances = {'upstream': (-30, 250)}

        matrix_factory = dm.DataMatrixFactory(
            [dm.nochange_filter, dm.center_scale_filter])
        infile = util.read_dfile('example_data/hal/halo_ratios5.tsv',
                                 has_header=True,
                                 quote='\"')
        self.ratio_matrix = matrix_factory.create_from(infile)
        self.organism = testutil.make_halo(self.search_distances,
                                           self.scan_distances,
                                           self.ratio_matrix)
        self.config_params = {
            'memb.min_cluster_rows_allowed': 3,
            'memb.max_cluster_rows_allowed': 70,
            'multiprocessing': False,
            'num_cores': None,
            'memb.clusters_per_row': 2,
            'memb.clusters_per_col': int(round(43 * 2.0 / 3.0)),
            'num_clusters': 43,
            'output_dir': 'out',
            'remap_network_nodes': False,
            'use_BSCM': False,
            'num_iterations': 2000,
            'debug': {},
            'search_distances': {
                'upstream': (-20, 150)
            },
            'Columns': {
                'schedule': lambda i: True
            },
            'Rows': {
                'schedule': lambda i: True,
                'scaling': ('scaling_const', 6.0)
            },
            'Motifs': {
                'schedule': lambda i: True,
                'scaling':
                ('scaling_rvec', 'seq(0, 1, length=num_iterations*3/4)')
            },
            'MEME': {
                'version': '4.3.0',
                'global_background': False,
                'schedule': lambda i: True,
                'nmotifs_rvec':
                'c(rep(1, num_iterations/3), rep(2, num_iterations/3))',
                'max_width': 24,
                'arg_mod': 'zoops',
                'background_order': 3,
                'use_revcomp': 'True'
            },
            'Networks': {
                'schedule':
                lambda i: True,
                'scaling':
                ('scaling_rvec', 'seq(1e-5, 0.5, length=num_iterations*3/4)')
            }
        }
        self.membership = self.__read_members()  # relies on config_params
        self.iteration_result = {'iteration': 51, 'score_means': {}}
def read_matrix(filename):
    """reads a matrix file"""
    infile = util.read_dfile(filename, has_header=True, quote='\"')
    return dm.DataMatrixFactory([]).create_from(
        infile, case_sensitive=True).sorted_by_row_name()
Ejemplo n.º 11
0
 def __read_rowscores_refresult(self):
     dfile = util.read_dfile('testdata/row_scores_refresult.tsv',
                             has_header=True,
                             quote='"')
     return dm.DataMatrixFactory([]).create_from(dfile)