def test_motif_scoring(self): """tests the motif scoring in integration""" search_distances = {'upstream': (-20, 150)} scan_distances = {'upstream': (-30, 250)} matrix_factory = dm.DataMatrixFactory([dm.nochange_filter, dm.center_scale_filter]) infile = util.read_dfile('example_data/hal/halo_ratios5.tsv', has_header=True, quote='\"') ratio_matrix = matrix_factory.create_from(infile) meme_suite = meme.MemeSuite430(remove_tempfiles=True) sequence_filters = [ motif.unique_filter, motif.get_remove_low_complexity_filter(meme_suite), motif.get_remove_atgs_filter(search_distances['upstream'])] organism = make_halo(ratio_matrix, search_distances, scan_distances) membership = FakeMembership() config_params = {'memb.min_cluster_rows_allowed': 3, 'memb.max_cluster_rows_allowed': 70, 'multiprocessing': False, 'num_clusters': 1, 'output_dir': 'out', 'debug': False, 'num_iterations': 2000} func = motif.MemeScoringFunction(organism, membership, ratio_matrix, meme_suite, sequence_filters=sequence_filters, scaling_func=lambda iter: 1.0, num_motif_func=lambda iter: 1, update_in_iteration=lambda x: True, motif_in_iteration=lambda x: True, config_params=config_params) iteration_result = { 'iteration': 100 } matrix = func.compute(iteration_result) """
def test_motif_scoring(self): """tests the motif scoring in integration""" search_distances = {'upstream': (-20, 150)} scan_distances = {'upstream': (-30, 250)} matrix_factory = dm.DataMatrixFactory([dm.nochange_filter, dm.center_scale_filter]) infile = util.DelimitedFile.read('halo_ratios5.tsv', has_header=True, quote='\"') ratio_matrix = matrix_factory.create_from(infile) meme_suite = meme.MemeSuite430(remove_tempfiles=True) sequence_filters = [ motif.unique_filter, motif.get_remove_low_complexity_filter(meme_suite), motif.get_remove_atgs_filter(search_distances['upstream'])] organism = make_halo(ratio_matrix, search_distances, scan_distances) membership = FakeMembership() config_params = {'memb.min_cluster_rows_allowed': 3, 'memb.max_cluster_rows_allowed': 70, 'multiprocessing': False, 'num_clusters': 1, 'output_dir': 'out', 'num_iterations': 2000} func = motif.MemeScoringFunction(organism, membership, ratio_matrix, meme_suite, sequence_filters=sequence_filters, scaling_func=lambda iter: 1.0, num_motif_func=motif.default_nmotif_fun, config_params=config_params) iteration_result = { 'iteration': 100 } matrix = func.compute(iteration_result) """
def make_row_scoring(self): """returns the row scoring function""" # we need to remove the location from the sequence when selecting for # individual clusters sequence_filters = [lambda seqs, feature_ids: {key: seqs[key][1] for key in seqs.keys()}] network_scaling_fun = scoring.get_default_network_scaling(self['num_iterations']) meme_scoring = None weeder_scoring = None row_scoring = microarray.RowScoringFunction( self.membership(), self.ratio_matrix, lambda iteration: ROW_WEIGHT, config_params=self.config_params) scoring_funcs = [row_scoring, self.make_network_scoring(network_scaling_fun)] if ADD_SET_ENRICHMENT: scoring_funcs.append(self.make_set_scoring(network_scaling_fun, USE_SET_TYPES)) if ADD_MEME: meme_suite_meme = self.meme_suite('upstream') meme_scoring = self.make_meme_scoring('upstream', meme_suite_meme, sequence_filters + [motif.get_remove_low_complexity_filter(meme_suite_meme)]) if ADD_WEEDER: meme_suite_weeder = self.meme_suite(WEEDER_SEQ_TYPE) weeder_scoring = self.make_weeder_scoring(WEEDER_SEQ_TYPE, meme_suite_weeder, sequence_filters + [motif.get_remove_low_complexity_filter(meme_suite_weeder)]) if ADD_MEME and ADD_WEEDER: scoring_funcs.append(scoring.ScoringFunctionCombiner( self.membership(), [meme_scoring, weeder_scoring], scaling_func=lambda iteration: 0.5, config_params=self.config_params)) else: if ADD_MEME: scoring_funcs.append(meme_scoring) if ADD_WEEDER: scoring_funcs.append(weeder_scoring) return scoring.ScoringFunctionCombiner(self.membership(), scoring_funcs, config_params=self.config_params)
def test_motif_scoring(self): meme_suite = meme.MemeSuite430({'MEME': {'max_width': 24, 'background_order': 3, 'use_revcomp': 'True'}}) sequence_filters = [ motif.unique_filter, motif.get_remove_low_complexity_filter(meme_suite), motif.get_remove_atgs_filter(self.search_distances['upstream'])] motif_scoring = motif.MemeScoringFunction( self.organism, self.membership, self.ratio_matrix, config_params=self.config_params) motscores = motif_scoring.compute(self.iteration_result).sorted_by_row_name() motscores.fix_extreme_values() ref_motscores = read_matrix('testdata/motscores_fixed.tsv') self.assertTrue(check_matrix_values(motscores, ref_motscores))
def make_row_scoring(self): """makes a row scoring function on demand""" # Default row scoring functions row_scoring = microarray.RowScoringFunction( self.membership(), self.ratio_matrix, scaling_func=lambda iteration: self['row_scaling'], config_params=self.config_params) self.row_scoring = row_scoring meme_suite = meme.MemeSuite430() sequence_filters = [ motif.unique_filter, motif.get_remove_low_complexity_filter(meme_suite), motif.get_remove_atgs_filter(self['search_distances']['upstream'])] motif_scaling_fun = scoring.get_default_motif_scaling( self['num_iterations']) motif_scoring = motif.MemeScoringFunction( self.organism(), self.membership(), self.ratio_matrix, meme_suite, sequence_filters=sequence_filters, scaling_func=motif_scaling_fun, num_motif_func=motif.default_nmotif_fun, update_in_iteration=scoring.schedule(601, 3), motif_in_iteration=scoring.schedule(600, 100), #update_in_iteration=scoring.schedule(100, 10), #motif_in_iteration=scoring.schedule(100, 100), config_params=self.config_params) self.motif_scoring = motif_scoring network_scaling_fun = scoring.get_default_network_scaling( self['num_iterations']) network_scoring = nw.ScoringFunction( self.organism(), self.membership(), self.ratio_matrix, scaling_func=network_scaling_fun, run_in_iteration=scoring.schedule(1, 7), config_params=self.config_params) self.network_scoring = network_scoring row_scoring_functions = [row_scoring, motif_scoring, network_scoring] return scoring.ScoringFunctionCombiner( self.membership(), row_scoring_functions, config_params=self.config_params, log_subresults=True)
def test_motif_scoring(self): meme_suite = meme.MemeSuite430() sequence_filters = [ motif.unique_filter, motif.get_remove_low_complexity_filter(meme_suite), motif.get_remove_atgs_filter(self.search_distances['upstream'])] motif_scaling_fun = scoring.get_default_motif_scaling(2000) motif_scoring = motif.MemeScoringFunction( self.organism, self.membership, self.ratio_matrix, meme_suite, sequence_filters=sequence_filters, scaling_func=motif_scaling_fun, num_motif_func=motif.default_nmotif_fun, update_in_iteration=lambda x: True, motif_in_iteration=lambda x: True, config_params=self.config_params) motscores = motif_scoring.compute(self.iteration_result).sorted_by_row_name() ref_motscores = read_matrix('testdata/motscores_fixed.tsv') self.assertTrue(check_matrix_values(motscores, ref_motscores))
def test_motif_scoring(self): meme_suite = meme.MemeSuite430({ 'MEME': { 'max_width': 24, 'background_order': 3, 'use_revcomp': 'True', 'arg_mod': 'zoops' } }) sequence_filters = [ motif.unique_filter, motif.get_remove_low_complexity_filter(meme_suite), motif.get_remove_atgs_filter(self.search_distances['upstream']) ] motif_scoring = motif.MemeScoringFunction( self.organism, self.membership, self.ratio_matrix, config_params=self.config_params) motscores = motif_scoring.compute( self.iteration_result).sorted_by_row_name() motscores.fix_extreme_values() ref_motscores = read_matrix('testdata/ref_motscores.tsv') self.assertTrue(check_matrix_values(motscores, ref_motscores))
def test_motif_scoring(self): meme_suite = meme.MemeSuite430() sequence_filters = [ motif.unique_filter, motif.get_remove_low_complexity_filter(meme_suite), motif.get_remove_atgs_filter(self.search_distances['upstream']) ] motif_scaling_fun = get_default_motif_scaling(2000, offset=0) motif_scoring = motif.MemeScoringFunction( self.organism, self.membership, self.ratio_matrix, meme_suite, sequence_filters=sequence_filters, scaling_func=motif_scaling_fun, num_motif_func=lambda iteration: 1, update_in_iteration=lambda x: True, motif_in_iteration=lambda x: True, config_params=self.config_params) motscores = motif_scoring.compute( self.iteration_result).sorted_by_row_name() motscores.fix_extreme_values() ref_motscores = read_matrix('testdata/motscores_fixed.tsv') self.assertTrue(check_matrix_values(motscores, ref_motscores))
def make_row_scoring(self): """makes a row scoring function on demand""" # Default row scoring functions row_scoring = microarray.RowScoringFunction( self.membership(), self.ratio_matrix, scaling_func=lambda iteration: self['row_scaling'], config_params=self.config_params) meme_suite = meme.MemeSuite481(remove_tempfiles=True) sequence_filters = [ motif.unique_filter, motif.get_remove_low_complexity_filter(meme_suite), motif.get_remove_atgs_filter(self['search_distances']['upstream'])] motif_scoring = motif.MemeScoringFunction( self.organism(), self.membership(), self.ratio_matrix, meme_suite, sequence_filters=sequence_filters, pvalue_filter=motif.MinPValueFilter(-20.0), scaling_func=lambda iteration: 1.0, # TODO run_in_iteration=motif_iterations, config_params=self.config_params) network_scoring = nw.ScoringFunction(self.organism(), self.membership(), self.ratio_matrix, scaling_func=lambda iteration: 0.0, run_in_iteration=network_iterations, config_params=self.config_params) row_scoring_functions = [row_scoring, motif_scoring, network_scoring] return scoring.ScoringFunctionCombiner(self.membership(), row_scoring_functions, log_subresults=True)
def make_row_scoring(self): """makes a row scoring function on demand""" # Default row scoring functions row_scaling_fun = scoring.get_scaling(self, 'row_') row_scoring = microarray.RowScoringFunction( self.organism(), self.membership(), self.ratio_matrix, scaling_func=row_scaling_fun, schedule=self["row_schedule"], config_params=self.config_params) row_scoring_functions = [row_scoring] if self['domotifs']: background_file = None if self['global_background']: background_file = meme.global_background_file( self.organism(), self.ratio_matrix.row_names, self['sequence_types'][0]) if self['meme_version'] == '4.3.0': meme_suite = meme.MemeSuite430(background_file=background_file) elif (self['meme_version'] and (self['meme_version'].startswith('4.8') or self['meme_version'].startswith('4.9'))): meme_suite = meme.MemeSuite481(background_file=background_file) else: logging.error("MEME version %s currently not supported !", self['meme_version']) raise Exception("unsupported MEME version") sequence_filters = [ motif.unique_filter, motif.get_remove_low_complexity_filter(meme_suite), motif.get_remove_atgs_filter( self['search_distances']['upstream']) ] motif_scaling_fun = scoring.get_scaling(self, 'motif_') nmotif_fun = motif.num_meme_motif_fun(self) motif_scoring = motif.MemeScoringFunction( self.organism(), self.membership(), self.ratio_matrix, meme_suite, sequence_filters=sequence_filters, scaling_func=motif_scaling_fun, num_motif_func=nmotif_fun, update_in_iteration=self['motif_schedule'], motif_in_iteration=self['meme_schedule'], config_params=self.config_params) row_scoring_functions.append(motif_scoring) if self['donetworks']: network_scaling_fun = scoring.get_scaling(self, 'network_') network_scoring = nw.ScoringFunction( self.organism(), self.membership(), self.ratio_matrix, scaling_func=network_scaling_fun, schedule=self['network_schedule'], config_params=self.config_params) row_scoring_functions.append(network_scoring) return scoring.ScoringFunctionCombiner( self.organism(), self.membership(), row_scoring_functions, config_params=self.config_params)
def make_row_scoring(self): """makes a row scoring function on demand""" # Default row scoring functions row_scaling_fun = scoring.get_scaling(self, 'row_') row_scoring = microarray.RowScoringFunction(self.organism(), self.membership(), self.ratio_matrix, scaling_func=row_scaling_fun, schedule=self["row_schedule"], config_params=self.config_params) row_scoring_functions = [row_scoring] if self['domotifs']: background_file = None if self['global_background']: background_file = meme.global_background_file( self.organism(), self.ratio_matrix.row_names, self['sequence_types'][0]) if self['meme_version'] == '4.3.0': meme_suite = meme.MemeSuite430(background_file=background_file) elif (self['meme_version'] and (self['meme_version'].startswith('4.8') or self['meme_version'].startswith('4.9'))): meme_suite = meme.MemeSuite481(background_file=background_file) else: logging.error("MEME version %s currently not supported !", self['meme_version']) raise Exception("unsupported MEME version") sequence_filters = [ motif.unique_filter, motif.get_remove_low_complexity_filter(meme_suite), motif.get_remove_atgs_filter(self['search_distances']['upstream'])] motif_scaling_fun = scoring.get_scaling(self, 'motif_') nmotif_fun = motif.num_meme_motif_fun(self) motif_scoring = motif.MemeScoringFunction(self.organism(), self.membership(), self.ratio_matrix, meme_suite, sequence_filters=sequence_filters, scaling_func=motif_scaling_fun, num_motif_func=nmotif_fun, update_in_iteration=self['motif_schedule'], motif_in_iteration=self['meme_schedule'], config_params=self.config_params) row_scoring_functions.append(motif_scoring) if self['donetworks']: network_scaling_fun = scoring.get_scaling(self, 'network_') network_scoring = nw.ScoringFunction(self.organism(), self.membership(), self.ratio_matrix, scaling_func=network_scaling_fun, schedule=self['network_schedule'], config_params=self.config_params) row_scoring_functions.append(network_scoring) return scoring.ScoringFunctionCombiner( self.organism(), self.membership(), row_scoring_functions, config_params=self.config_params)