def oneclasstest_fraction(fraction=0.1, repeats=2): # choosing some graphs, # having array to save results for i in range(repeats): badscores = [] goodscores = [] graphs = get_sequences_with_names(size=923) graphs, not_used = random_bipartition_iter( graphs, fraction, random_state=random.random() * i * 1000) estimator = Wrapper(nu=.27, cv=3, n_jobs=-1) sampler = rna.AbstractSampler(radius_list=[0, 1], thickness_list=[2], min_cip_count=1, min_interface_count=2, preprocessor=rna.PreProcessor( base_thickness_list=[1], ignore_inserts=True), postprocessor=rna.PostProcessor(), estimator=estimator) sampler.preprocessor.set_param(sampler.vectorizer) graphmanagers = sampler.preprocessor.fit_transform(graphs) sampler.estimatorobject.fit(graphmanagers, vectorizer=sampler.vectorizer, random_state=sampler.random_state) #test for graphman in graphmanagers: struct = evaltools.dotbracket_to_shape(graphman.structure, shapesversion=SHAPEVERSION) score = sampler.estimatorobject.score(graphman) if struct == "[[][][]]": goodscores.append(score) else: badscores.append(score) print "afraction=%f , instances=%f, good=%d , bad=%d" % ( fraction, fraction * 923, len(goodscores), len(badscores)) a = numpy.array(badscores) print 'bad:mean/std ', numpy.mean(a, axis=0), ' ', numpy.std(a, axis=0) a = numpy.array(goodscores) print 'cgood:mean/std ', numpy.mean(a, axis=0), ' ', numpy.std(a, axis=0) a = numpy.array(goodscores + badscores) print 'dbad+good:mean/std ', numpy.mean(a, axis=0), ' ', numpy.std(a, axis=0) print ''
def fit_sample(graphs, random_state=random.random()): ''' graphs -> more graphs ''' graphs = list(graphs) estimator = estimatorwrapper(nu=.5, cv=2, n_jobs=-1) sampler = rna.AbstractSampler(radius_list=[0, 1], thickness_list=[2], min_cip_count=1, min_interface_count=2, preprocessor=rna.PreProcessor( base_thickness_list=[1], ignore_inserts=True), postprocessor=rna.PostProcessor(), estimator=estimator #feasibility_checker=feasibility ) sampler.fit(graphs, grammar_n_jobs=4, grammar_batch_size=1) logger.info('graph grammar stats:') dataset_size, interface_counts, core_counts, cip_counts = sampler.grammar( ).size() logger.info( '#instances:%d #interfaces: %d #cores: %d #core-interface-pairs: %d' % (dataset_size, interface_counts, core_counts, cip_counts)) graphs = [b for a, b in graphs] graphs = sampler.sample(graphs, n_samples=3, batch_size=1, n_steps=50, n_jobs=4, quick_skip_orig_cip=True, probabilistic_core_choice=True, burnin=10, improving_threshold=0.9, improving_linear_start=0.3, max_size_diff=20, accept_min_similarity=0.65, select_cip_max_tries=30, keep_duplicates=False, include_seed=False, backtrack=10, monitor=False) result = [] for graphlist in graphs: result += graphlist # note that this is a list [('',sequ),..] return result
def fit_sample(graphs, random_state=random.random()): ''' graphs -> more graphs arguments are generated above Oo ''' global arguments graphs = list(graphs) estimator = estimatorwrapper(nu=.5, cv=2, n_jobs=NJOBS) sampler = rna.AbstractSampler( radius_list=[0, 1], thickness_list=[2], min_cip_count=arguments['mincipcount'], min_interface_count=arguments['mininterfacecount'], preprocessor=rna.PreProcessor(base_thickness_list=[1], ignore_inserts=True), postprocessor=rna.PostProcessor(), estimator=estimator #feasibility_checker=feasibility ) sampler.fit(graphs, grammar_n_jobs=NJOBS, grammar_batch_size=1) graphs = [b for a, b in graphs] graphs = sampler.sample(graphs, n_samples=arguments['n_samples'], batch_size=1, n_steps=arguments['n_steps'], n_jobs=NJOBS, quick_skip_orig_cip=arguments['quick_skip'], probabilistic_core_choice=arguments['core_choice'], burnin=arguments['burnin'], improving_threshold=arguments['imp_thresh'], improving_linear_start=arguments['imp_lin_start'], max_size_diff=arguments['maxsizediff'], accept_min_similarity=arguments['acc_min_sim'], select_cip_max_tries=30, keep_duplicates=False, include_seed=False, backtrack=2, monitor=False) result = [] for graphlist in graphs: result += graphlist # note that this is a list [('',sequ),..] return result