def fit_sample(graphs, random_state=random.random()): ''' graphs -> more graphs ''' graphs = list(graphs) estimator=estimatorwrapper( nu=.33, cv=2, n_jobs=-1) sampler=rna.AbstractSampler(radius_list=[0,1], thickness_list=[2], min_cip_count=1, min_interface_count=2, preprocessor=rna.PreProcessor(base_thickness_list=[1],ignore_inserts=True), postprocessor=rna.PostProcessor(), estimator=estimator #feasibility_checker=feasibility ) sampler.fit(graphs,grammar_n_jobs=4,grammar_batch_size=1) #logger.info('graph grammar stats:') dataset_size, interface_counts, core_counts, cip_counts = sampler.grammar().size() #logger.info('#instances:%d #interfaces: %d #cores: %d #core-interface-pairs: %d' % (dataset_size, interface_counts, core_counts, cip_counts)) graphs = [ b for a ,b in graphs ] graphs = sampler.sample(graphs, n_samples=3, batch_size=1, n_steps=50, n_jobs=4, quick_skip_orig_cip=True, probabilistic_core_choice=True, burnin=10, improving_threshold=0.9, improving_linear_start=0.3, max_size_diff=20, accept_min_similarity=0.65, select_cip_max_tries=30, keep_duplicates=False, include_seed=False, backtrack=10, monitor=False) result=[] for graphlist in graphs: result+=graphlist # note that this is a list [('',sequ),..] return result
def fit_sample_noabstr(sequences, argz, random_state=random.random()): ''' graphs -> more graphs graphs are pretty mich (NAME,SEQUENCE),()... ''' # fit a sampler sequences = list(sequences) estimator = estimatorwrapper( nu=.5, cv=2, n_jobs=1) # with .5 it also works for the fewer ones.. sampler = rna.AbstractSampler( radius_list=argz['radius_list'], #[0, 1, 2], # war 0,1 thickness_list=argz['thickness_list'], #[1], # war 2 min_cip_count=argz['mincip_count'], min_interface_count=argz['min_interfacecount'], preprocessor=rnana.PreProcessor(base_thickness_list=[1], ignore_inserts=True), postprocessor=rna.PostProcessor(), estimator=estimator # feasibility_checker=feasibility ) sampler.fit(sequences, grammar_n_jobs=1, grammar_batch_size=1) # logger.info('graph grammar stats:') dataset_size, interface_counts, core_counts, cip_counts = sampler.grammar( ).size() # logger.info('#instances:%d #interfaces: %d #cores: %d #core-interface-pairs: %d' % (dataset_size, interface_counts, core_counts, cip_counts)) sequences = [b for a, b in sequences] sequences = sampler.sample(sequences, n_samples=5, batch_size=1, n_steps=55, n_jobs=1, quick_skip_orig_cip=True, probabilistic_core_choice=False, burnin=6, improving_threshold=0.5, improving_linear_start=0.15, max_size_diff=6, accept_min_similarity=0.55, select_cip_max_tries=30, keep_duplicates=False, include_seed=False, backtrack=2, monitor=False) result = [] for li in sequences: result += li return [r[1] for r in result]
def fit_sample(graphs, random_state=random.random()): ''' graphs -> more graphs ''' graphs = list(graphs) estimator = estimatorwrapper(nu=.5, cv=2, n_jobs=-1) sampler = rna.AbstractSampler(radius_list=[0, 1], thickness_list=[2], min_cip_count=1, min_interface_count=2, preprocessor=rna.PreProcessor( base_thickness_list=[1], ignore_inserts=True), postprocessor=rna.PostProcessor(), estimator=estimator #feasibility_checker=feasibility ) sampler.fit(graphs, grammar_n_jobs=4, grammar_batch_size=1) logger.info('graph grammar stats:') dataset_size, interface_counts, core_counts, cip_counts = sampler.grammar( ).size() logger.info( '#instances:%d #interfaces: %d #cores: %d #core-interface-pairs: %d' % (dataset_size, interface_counts, core_counts, cip_counts)) graphs = [b for a, b in graphs] graphs = sampler.sample(graphs, n_samples=3, batch_size=1, n_steps=50, n_jobs=4, quick_skip_orig_cip=True, probabilistic_core_choice=True, burnin=10, improving_threshold=0.9, improving_linear_start=0.3, max_size_diff=20, accept_min_similarity=0.65, select_cip_max_tries=30, keep_duplicates=False, include_seed=False, backtrack=10, monitor=False) result = [] for graphlist in graphs: result += graphlist # note that this is a list [('',sequ),..] return result
def fit_sample(graphs, random_state=random.random()): ''' graphs -> more graphs arguments are generated above Oo ''' global arguments graphs = list(graphs) estimator = estimatorwrapper(nu=.5, cv=2, n_jobs=NJOBS) sampler = rna.AbstractSampler( radius_list=[0, 1], thickness_list=[2], min_cip_count=arguments['mincipcount'], min_interface_count=arguments['mininterfacecount'], preprocessor=rna.PreProcessor(base_thickness_list=[1], ignore_inserts=True), postprocessor=rna.PostProcessor(), estimator=estimator #feasibility_checker=feasibility ) sampler.fit(graphs, grammar_n_jobs=NJOBS, grammar_batch_size=1) graphs = [b for a, b in graphs] graphs = sampler.sample(graphs, n_samples=arguments['n_samples'], batch_size=1, n_steps=arguments['n_steps'], n_jobs=NJOBS, quick_skip_orig_cip=arguments['quick_skip'], probabilistic_core_choice=arguments['core_choice'], burnin=arguments['burnin'], improving_threshold=arguments['imp_thresh'], improving_linear_start=arguments['imp_lin_start'], max_size_diff=arguments['maxsizediff'], accept_min_similarity=arguments['acc_min_sim'], select_cip_max_tries=30, keep_duplicates=False, include_seed=False, backtrack=2, monitor=False) result = [] for graphlist in graphs: result += graphlist # note that this is a list [('',sequ),..] return result