def read_ratios(params, args_in): """reading ratios matrix""" if params['normalize_ratios']: if test_data_change(params, args_in) == True: #Turn off the nochange_filter if you're resuming a run an have changed the data matrix ratio_filters = [dm.center_scale_filter] else: ratio_filters = [dm.nochange_filter, dm.center_scale_filter] else: ratio_filters = [] matrix_factory = dm.DataMatrixFactory(ratio_filters) matrix_filename = args_in.ratios if matrix_filename.startswith('http://'): indata = util.read_url(matrix_filename).decode('utf-8') infile = util.dfile_from_text(indata, has_header=True, quote='\"') else: infile = util.read_dfile(matrix_filename, has_header=True, quote='\"') if params['case_sensitive'] or args_in.case_sensitive: ratios = matrix_factory.create_from(infile, True) else: ratios = matrix_factory.create_from(infile, False) return ratios
def setUp(self): # pylint; disable-msg=C0103 """test fixture""" self.search_distances = {'upstream': (-20, 150)} self.scan_distances = {'upstream': (-30, 250)} matrix_factory = dm.DataMatrixFactory( [dm.nochange_filter, dm.center_scale_filter]) infile = util.read_dfile('example_data/hal/halo_ratios5.tsv', has_header=True, quote='\"') self.ratio_matrix = matrix_factory.create_from(infile) self.organism = testutil.make_halo(self.search_distances, self.scan_distances, self.ratio_matrix) self.config_params = { 'memb.min_cluster_rows_allowed': 3, 'memb.max_cluster_rows_allowed': 70, 'multiprocessing': False, 'memb.clusters_per_row': 2, 'memb.clusters_per_col': int(round(43 * 2.0 / 3.0)), 'num_clusters': 43, 'num_iterations': 2000 } self.membership = self.__read_members() # relies on config_params self.iteration_result = {'iteration': 51}
def test_simple_filter(self): """test a factory using a single filter""" factory = dm.DataMatrixFactory([times2]) matrix = factory.create_from(self.dfile) self.assertEquals(2, matrix.num_rows) self.assertEquals(2, matrix.num_columns) self.assertEquals(matrix.column_names, ["H2", "H3"]) self.assertEquals(matrix.row_names, ["R1", "R2"]) self.assertTrue((matrix.values[0] == [2, 4]).all()) self.assertTrue((matrix.values[1] == [6, 8]).all())
def test_no_filters(self): """test a factory without filters""" factory = dm.DataMatrixFactory([]) matrix = factory.create_from(self.dfile) self.assertEquals(2, matrix.num_rows) self.assertEquals(2, matrix.num_columns) self.assertEquals(matrix.column_names, ["H2", "H3"]) self.assertEquals(matrix.row_names, ["R1", "R2"]) self.assertTrue((matrix.values[0] == [1, 2]).all()) self.assertTrue((matrix.values[1] == [3, 4]).all())
def test_with_na_values(self): """test a factory with a DelimitedFile containing NA values""" factory = dm.DataMatrixFactory([]) matrix = factory.create_from(self.dfile_with_na) self.assertEquals(2, matrix.num_rows) self.assertEquals(2, matrix.num_columns) self.assertEquals(matrix.column_names, ["H2", "H3"]) self.assertEquals(matrix.row_names, ["R1", "R2"]) self.assertTrue(np.isnan(matrix.values[0][0])) self.assertEquals(2.0, matrix.values[0][1]) self.assertTrue(np.isnan(matrix.values[1][0])) self.assertEquals(4.0, matrix.values[1][1])
def test_motif_scoring(self): """tests the motif scoring in integration""" search_distances = {'upstream': (-20, 150)} scan_distances = {'upstream': (-30, 250)} matrix_factory = dm.DataMatrixFactory( [dm.nochange_filter, dm.center_scale_filter]) infile = util.read_dfile('example_data/hal/halo_ratios5.tsv', has_header=True, quote='\"') ratio_matrix = matrix_factory.create_from(infile) organism = testutil.make_halo(search_distances, scan_distances, ratio_matrix) membership = FakeMembership() config_params = { 'memb.min_cluster_rows_allowed': 3, 'memb.max_cluster_rows_allowed': 70, 'multiprocessing': False, 'num_clusters': 1, 'output_dir': 'out', 'debug': {}, 'search_distances': { 'upstream': (-20, 150) }, 'num_iterations': 2000, 'MEME': { 'schedule': lambda i: True, 'version': '4.3.0', 'global_background': False, 'arg_mod': 'zoops', 'nmotifs_rvec': 'c(rep(1, num_iterations/3), rep(2, num_iterations/3))', 'use_revcomp': 'True', 'max_width': 24, 'background_order': 3 }, 'Motifs': { 'schedule': lambda i: True, 'scaling': ('scaling_const', 1.0) } } func = motif.MemeScoringFunction(organism, membership, ratio_matrix, config_params=config_params) iteration_result = {'iteration': 100} matrix = func.compute(iteration_result)
def setUp(self): # pylint; disable-msg=C0103 """test fixture""" self.search_distances = {'upstream': (-20, 150)} self.scan_distances = {'upstream': (-30, 250)} matrix_factory = dm.DataMatrixFactory( [dm.nochange_filter, dm.center_scale_filter]) infile = util.read_dfile('example_data/hal/halo_ratios5.tsv', has_header=True, quote='\"') self.ratio_matrix = matrix_factory.create_from(infile) self.organism = testutil.make_halo(self.search_distances, self.scan_distances, self.ratio_matrix) self.config_params = { 'memb.min_cluster_rows_allowed': 3, 'memb.max_cluster_rows_allowed': 70, 'multiprocessing': False, 'num_cores': None, 'memb.clusters_per_row': 2, 'memb.clusters_per_col': int(round(43 * 2.0 / 3.0)), 'num_clusters': 43, 'output_dir': 'out', 'remap_network_nodes': False, 'use_BSCM': False, 'num_iterations': 2000, 'debug': {}, 'search_distances': { 'upstream': (-20, 150) }, 'Columns': { 'schedule': lambda i: True }, 'Rows': { 'schedule': lambda i: True, 'scaling': ('scaling_const', 6.0) }, 'Motifs': { 'schedule': lambda i: True, 'scaling': ('scaling_rvec', 'seq(0, 1, length=num_iterations*3/4)') }, 'MEME': { 'version': '4.3.0', 'global_background': False, 'schedule': lambda i: True, 'nmotifs_rvec': 'c(rep(1, num_iterations/3), rep(2, num_iterations/3))', 'max_width': 24, 'arg_mod': 'zoops', 'background_order': 3, 'use_revcomp': 'True' }, 'Networks': { 'schedule': lambda i: True, 'scaling': ('scaling_rvec', 'seq(1e-5, 0.5, length=num_iterations*3/4)') } } self.membership = self.__read_members() # relies on config_params self.iteration_result = {'iteration': 51, 'score_means': {}}
def read_matrix(filename): """reads a matrix file""" infile = util.read_dfile(filename, has_header=True, quote='\"') return dm.DataMatrixFactory([]).create_from( infile, case_sensitive=True).sorted_by_row_name()
cond_id = cond_map[ratios.column_names[col]] value = ratios.values[row][col] outfile.write("%d\t%d\t%f\n" % (gene_id, cond_id, value)) if __name__ == '__main__': description = 'addnwportal.py - adding a cMonkey/python run to the database' parser = argparse.ArgumentParser(description=description) parser.add_argument('--resultdir', required=True, help='cMonkey result directory') parser.add_argument('--exptable', help='filename of expression table to generate', default=None) args = parser.parse_args() resultdb = os.path.join(args.resultdir, 'cmonkey_run.db') ratiofile = os.path.join(args.resultdir, 'ratios.tsv.gz') # read the matrix matrix_factory = dm.DataMatrixFactory([dm.nochange_filter, dm.center_scale_filter]) infile = util.read_dfile(ratiofile, has_header=True, quote='\"') ratios = matrix_factory.create_from(infile) # access the run information conn = sqlite3.connect(resultdb) cursor = conn.cursor() cursor.execute('select organism, species, num_iterations, num_clusters from run_infos') orgcode, species, num_iterations, num_clusters = cursor.fetchone() print "organism: %s species: %s iterations: %d clusters: %d" % (orgcode, species, num_iterations, num_clusters) # start populating the database microbedb, organism = make_microbe(orgcode) ncbi_code = microbedb.rsat_info.taxonomy_id
def __read_colscores_refresult(self): dfile = util.read_dfile('testdata/column_scores_refresult.tsv', has_header=True, quote='"') return dm.DataMatrixFactory([]).create_from(dfile, case_sensitive=True)
def __read_ratios(self): dfile = util.read_dfile('testdata/row_scores_testratios.tsv', has_header=True) return dm.DataMatrixFactory([]).create_from(dfile, case_sensitive=True)
def read_matrix(filename): dfile = util.DelimitedFile.read(filename, has_header=True, sep='\t', quote='"') return dm.DataMatrixFactory([]).create_from(dfile).sorted_by_row_name()