def _run_pytorch(self, config: BenchmarkConfig) -> Benchmark: """ :return: """ LOGGER.info("Running PyTorch Eager benchmark") benchmark = Benchmark() dummy_inputs = self._get_dummy_inputs( batch_size=config.batch_size, seq_len=(config.sequence_length - self.tokenizer.num_special_tokens_to_add(pair=False)) ) inputs = self.tokenizer( dummy_inputs, is_split_into_words=True, return_tensors=TensorType.PYTORCH, ) inputs = inputs.to(config.device) self.model = self.model.to(config.device) # Warmup for _ in trange(config.warmup_runs, desc="Warming up"): self.model(**inputs) # Run benchmark benchmark_duration_ns = config.benchmark_duration * SEC_TO_NS_SCALE while sum(benchmark.latencies) < benchmark_duration_ns: with benchmark.track(): self.model(**inputs) benchmark.finalize(benchmark_duration_ns) return benchmark
def __init__(self, suite = 'slideshow'): if self._SUITES.has_key(suite.lower()): self.suite = suite else: raise WebMarkException("Unsupported suite %s, " "should be one of 'slideshow', 'zoom', 'grayscale', 'fancyshow'." % suite) Benchmark.__init__(self)
def __init__(self, version = '0.9.1'): if self._VERSIONS.has_key(version): self.version = version else: raise WebMarkException("Unsupported version %s, " "should be one of '0.9.1', '0.9'." % version) Benchmark.__init__(self)
def __init__(self, m=50, p=40, d=20, lb=-100, ub=100, f=7): self.G = 10 self.rpercent = 0.2 self.hpercent = 0.6 self.mpercent = 0.1 self.m = m self.pop = p self.dim = d self.rNum = int(p * self.rpercent) self.hNum = int(p * self.hpercent) self.cNum = self.pop - self.rNum - self.hNum self.mNum = int(p * self.mpercent) self.lowb = lb self.upb = ub self.fit = np.zeros(self.pop) self.pfit = np.zeros(self.pop) self.gbest = np.zeros(self.dim) self.f = f self.gfit_list = np.zeros(0) xi = np.zeros(self.dim) # 初始化鸡的位置 self.x = pd.DataFrame(-1 + 2 * np.random.rand(self.pop, self.dim)) * 100 self.px = self.x self.test = Benchmark(self.dim, self.f) for i in np.arange(self.pop): # 初始化鸡的适应度 xi = self.x.iloc[i] self.fit[i] = self.test.Fitness(xi) self.pfit = pd.DataFrame(self.fit) # 初始化更新前适应度 self.gfit = self.pfit.min() # 初始化全局最优适应度 self.ind = self.pfit.idxmin() self.gbest = self.x.iloc[self.ind] # 初始化全局最优位置
def tweak_memory(benchmark : Benchmark): short = benchmark.get_model_short_name() params = benchmark.get_parameter_values_string() instance = short + "." + params prop = benchmark.get_property_name() full = short + "." + params + "." + prop tweak = "" if(instance == "beb.5-16-15" or instance == "coupon.15-4-5" or short == "egl" or short == "exploding-blocksworld" or short == "triangle-tireworld" or instance == "philosophers.20-1" or instance == "pnueli-zuck.10" or short == "rabin" or short == "tireworld" or short == "ftpp" or instance == "hecs.3-2" ): tweak += " -S Hybrid --store-compress None" else: tweak += " -S Memory" return tweak
def __init__(self, suite = 'text'): if self._SUITES.has_key(suite.lower()): self.suite = suite else: raise WebMarkException("Unsupported suite %s, " "should be one of 'text', 'list', 'table'." % suite) Benchmark.__init__(self)
def run_mini_ds2_benchmark(max_length, nbands, str_w, batch_size, max_iter, skip_iter, nfilters, filter_width, depth, hidden_size, batch_norm, device_id, device, transformer, visualize=False): inputs, train_set, eval_set = generate_ds2_data(max_length, str_w, nbands, batch_size, max_iter) model_out = get_mini_ds2(inputs, nfilters, filter_width, str_w, nbands, depth, hidden_size, batch_norm, device_id) fprop_computation_op = ng.computation(model_out, "all") benchmark_fprop = Benchmark(fprop_computation_op, train_set, inputs, transformer, device) Benchmark.print_benchmark_results( benchmark_fprop.time(max_iter, skip_iter, 'ds2_fprop', visualize))
def __init__(self, style = 'medium'): if self._STYLES.has_key(style.lower()): self.style = style else: raise WebMarkException("Unsupported style %s, " "should be one of 'low', 'medium', 'high'." % style) Benchmark.__init__(self)
def __init__(self, suite = 'All JavaScript Tests'): if self._SUITES.has_key(suite.lower()): self.suite = suite else: raise WebMarkException("Unsupported suite %s, " "should be one of 'All JavaScript Tests', 'All DOM Tests'." % suite) Benchmark.__init__(self)
def __init__(self, version='v7'): if version in self._VERSIONS: self.version = version else: raise WebMarkException("Unsupported version %s, " "should be one of ('v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'v7')" % version) Benchmark.__init__(self)
def __init__(self, suite = 'transform'): if suite in self._SUITES: self.suite = suite else: raise WebMarkException("Unsupported suite %s, " "should be one of 'transform', 'transition', 'animation'." % suite) Benchmark.__init__(self)
def __init__(self, suite = 'fullscreen'): if suite in self._SUITES: self.suite = suite else: raise WebMarkException("Unsupported suite %s, " "should be one of 'fullscreen', 'non-fullscreen'." % suite) Benchmark.__init__(self)
def __init__(self, benchmarkGroupId, benchmarkRunId, buildSettings, **kwargs): Benchmark.__init__(self, benchmarkGroupId, benchmarkRunId, buildSettings, **kwargs) # self._dirHyriseDB = os.path.join(os.getcwd(), "hyrise") os.environ["HYRISE_DB_PATH"] = self._dirHyriseDB self.setUserClass(MixedWLUser) self._queryDict = self.loadQueryDict()
def __init__(self, fishNumber=50): if fishNumber in self.fishes: self.fishNumber = fishNumber else: raise WebMarkException("Unsupported fish number %d, " "should be one of (1, 10, 50, 100, 250, 500, 1000)" % fishNumber) Benchmark.__init__(self)
def __init__(self, benchmarkGroupId, benchmarkRunId, buildSettings, **kwargs): Benchmark.__init__(self, benchmarkGroupId, benchmarkRunId, buildSettings, **kwargs) #self._dirHyriseDB = os.path.join(os.getcwd(), "hyrise") os.environ['HYRISE_DB_PATH'] = self._dirHyriseDB self.setUserClass(MixedWLUser) self._queryDict = self.loadQueryDict()
def __init__(self, suite="bitmap"): if self._SUITES.has_key(suite.lower()): self.suite = suite else: raise WebMarkException( "Unsupported suite %s, " "should be one of 'bitmap', 'bitmap cache', 'vector'." % suite ) Benchmark.__init__(self)
def __init__(self): # self.actions = ['toB+tree', 'toHash'] self.structure = ['hash', 'b+tree'] self.preIndStructure = 'b+tree' self.db = Database() self.bm = Benchmark(BENCHMARK_FILE_DATAPATH) self.qlines = self.bm.query_read() with open("./testdata/data.txt", 'r') as f: self.labellines = [line.rstrip('\n') for line in f]
def run_sequential_benchmark(benchmark: Benchmark): ### # Deploy cloudstash ### log("----- Create Infrastructure") # deploy cloudstash using serverless, get the api gateway url of the deployment gateway_url, deployed = deploy_cloudstash(benchmark.stage) # set gateway_url in benchmark object benchmark.gateway_url = gateway_url # make sure everything is ready before starting benchmark log(f"Waiting {config.ORCHESTRATION_DELAY} seconds before starting benchmark" ) sleep(config.ORCHESTRATION_DELAY) ### # Run the benchmark ### log("----- Run Benchmark") # run benchmark if deployed: benchmark_ran, benchmark_data = run_benchmark(benchmark) # save when the experiment finished running benchmark.log_experiment_stop_time() ### # Parse benchmark output ### log("----- Parse Benchmark results") if benchmark_ran: benchmark_output_file = f"{config.BENCHMARK_OUTPUT_PATH}/{benchmark.stage}-{benchmark.benchmark}-{benchmark.number_of_artefacts}.csv" wrote_file = write_benchmark_results_csv_file(benchmark, benchmark_output_file, benchmark_data) ### # Teardown cloudstash instance ### log("----- Remove Benchmark Infrastructure") # remove the cloudstash deployment removed = remove_deployment(benchmark.stage) ### # End Benchmark orchestration ### log("-----") log(f"Benchmark orchestration finished.") log(f"Benchmark running time: {benchmark.running_time}")
def __init__(self, benchmarkGroupId, benchmarkRunId, buildSettings, **kwargs): Benchmark.__init__(self, benchmarkGroupId, benchmarkRunId, buildSettings, **kwargs) self.scalefactor = kwargs["scalefactor"] if kwargs.has_key("scalefactor") else 1 self.warehouses = kwargs["warehouses"] if kwargs.has_key("warehouses") else 4 self.driverClass = createDriverClass("hyrise") self.driver = self.driverClass(os.path.join(os.getcwd(), "pytpcc", "tpcc.sql")) self.scaleParameters = scaleparameters.makeWithScaleFactor(self.warehouses, self.scalefactor) self.regenerate = False self.noLoad = kwargs["noLoad"] if kwargs.has_key("noLoad") else False self.table_dir = kwargs["tabledir"] if kwargs.has_key("tabledir") else None self.onlyNeworders = kwargs["onlyNeworders"] if kwargs.has_key("onlyNeworders") else False self.setUserClass(TPCCUser)
def is_benchmark_supported(benchmark: Benchmark): """returns True if the provided benchmark is supported by the tool and if the given benchmark should appear on the generated benchmark list""" short = benchmark.get_model_short_name() prop = benchmark.get_property_name() prop_type = benchmark.get_short_property_type() if (short == "bluetooth" # multiple initial states or short == "herman" # multiple initial states or short == "oscillators" # model file too large, cannot be parsed or short == "repudiation_malicious" # open clock constraints ): return False return True
def __init__(self, benchmarkGroupId, benchmarkRunId, buildSettings, **kwargs): Benchmark.__init__(self, benchmarkGroupId, benchmarkRunId, buildSettings, **kwargs) self._dirHyriseDB = os.path.join(os.getcwd(), "hyrise") os.environ['HYRISE_DB_PATH'] = self._dirHyriseDB self.scalefactor = kwargs["scalefactor"] if kwargs.has_key("scalefactor") else 1 self.warehouses = kwargs["warehouses"] if kwargs.has_key("warehouses") else 4 self.driverClass = createDriverClass("hyrise") self.driver = self.driverClass(os.path.join(os.getcwd(), "pytpcc", "tpcc.sql")) self.scaleParameters = scaleparameters.makeWithScaleFactor(self.warehouses, self.scalefactor) self.regenerate = False self.noLoad = kwargs["noLoad"] if kwargs.has_key("noLoad") else False self.setUserClass(TPCCUser)
def run_mini_ds2_benchmark(args, **kwargs): device_id = kwargs.get('device_id') inputs, train_set, eval_set = generate_ds2_data(args.max_length, args.str_w, args.nout, args.nbands, args.batch_size, args.num_iterations) model_out = get_mini_ds2(inputs, args.nfilters, args.filter_width, args.str_w, args.nbands, args.depth, args.hidden_size, args.batch_norm, args.hetr_device, device_id) if args.bprop: with ng.metadata(device=args.hetr_device, device_id=device_id, parallel=ax.N): loss = ng.ctc(model_out, ng.flatten(inputs["char_map"]), inputs["audio_length"], inputs["trans_length"]) optimizer = GradientDescentMomentum(learning_rate=2e-5, momentum_coef=0.99, gradient_clip_norm=400, nesterov=args.nesterov) updates = optimizer(loss) mean_cost = ng.sequential([updates, ng.mean(loss, out_axes=())]) bprop_computation_op = ng.computation(mean_cost, "all") benchmark = Benchmark(bprop_computation_op, train_set, inputs, args.backend, args.hetr_device) Benchmark.print_benchmark_results( benchmark.time(args.num_iterations, args.skip_iter, 'ds2_bprop', args.visualize, preprocess=True)) else: fprop_computation_op = ng.computation(model_out, "all") benchmark_fprop = Benchmark(fprop_computation_op, train_set, inputs, args.backend, args.hetr_device) Benchmark.print_benchmark_results( benchmark_fprop.time(args.num_iterations, args.skip_iter, 'ds2_fprop', args.visualize, preprocess=True))
def main(): # XXX Deduce benchmark scope from commandline arguments # Topology names if len(sys.argv) == 2: names = sys.argv[1:] else: names = get_names('rocketfuel-*-r0.cch') #names.extend(get_names('rocketfuelbb-*-r0.cch')) topologies = [Topology(name) for name in names] patterns = [p() for p in MobilityPattern.factory_list().values()] tasks = list() for topology in topologies: for pattern in patterns: tasks.extend(Benchmark(topology, pattern).get_tasks()) if PARALLEL: pool = multiprocessing.Pool(processes=NUM_PROCESSES) pool.map(Task.virtual_run, tasks) pool.close() pool.join() else: for task in tasks: task.run()
def test_benchmark(self): # Test creating a benchmark with all the fields filled out. b1 = Benchmark( 'b1_test', # name 'octane', # test_name '', # test_args 3, # iterations False, # rm_chroot_tmp 'record -e cycles', # perf_args 'telemetry_Crosperf', # suite True) # show_all_results self.assertTrue(b1.suite, 'telemetry_Crosperf') # Test creating a benchmark field with default fields left out. b2 = Benchmark( 'b2_test', # name 'octane', # test_name '', # test_args 3, # iterations False, # rm_chroot_tmp 'record -e cycles') # perf_args self.assertEqual(b2.suite, '') self.assertFalse(b2.show_all_results) # Test explicitly creating 'suite=Telemetry' and 'show_all_results=False" # and see what happens. b3 = Benchmark( 'b3_test', # name 'octane', # test_name '', # test_args 3, # iterations False, # rm_chroot_tmp 'record -e cycles', # perf_args 'telemetry', # suite False) # show_all_results self.assertTrue(b3.show_all_results) # Check to see if the args to Benchmark have changed since the last time # this test was updated. args_list = [ 'self', 'name', 'test_name', 'test_args', 'iterations', 'rm_chroot_tmp', 'perf_args', 'suite', 'show_all_results', 'retries', 'run_local' ] arg_spec = inspect.getargspec(Benchmark.__init__) self.assertEqual(len(arg_spec.args), len(args_list)) for arg in args_list: self.assertIn(arg, arg_spec.args)
def execute_all(method, template, show_images): """ Start a benchmark for the given method for every avaiable file. """ benchmark = Benchmark(method(), template) benchmark.execute_all(show_images) print "Recall: " + str(benchmark.recall()) print "Precision: " + str(benchmark.precision()) print "F-Measure: " + str(benchmark.fmeasure())
def closure(): print "reading file..." lines = [l.rstrip() for l in file(os.path.abspath(__file__))] print "finished" @worker def _worker(kvargs): if lines[0] == 1: return 0 else: return -1 return _worker # worker_num = 1 时, 计算性能已经很不错了, 如果io比较多的情况下, 可以增加线程数 config = { "worker_num": 1, "time": 20, "max_qps": 1000000, "step": 1, # step 越小 qps控制得越好 } # 简单测试 with Timer(True): # b = Benchmark(test_worker, **config) # b = Benchmark(worker_class, **config) b = Benchmark(closure(), **config) b.loop()
argument_parser.add_argument('--plot', dest='plot', help="Plotting enabling", nargs='?', const=1) def generate_dataset(n_classes=5, n_samples=300, n_features=100, center_box=(5.0, 10.0), cluster_std=3.0): print '''Dataset parameters: Number of classes: {} Number of samples: {} Number of features: {} The box of centers of classes: {} Standard deviation of class elements: {} '''.format(n_classes, n_samples, n_features, center_box, cluster_std) return make_blobs(n_samples, n_features, n_classes, center_box=center_box, cluster_std=cluster_std) if __name__ == '__main__': add_arguments(argument_parser) args = argument_parser.parse_args() benchmark = Benchmark(plot=args.plot, logging=args.log, average=args.average) n_classes, n_samples, n_features, cluster_std = args.classes,\ args.samples, args.features, args.std try: center_box = tuple(float(coord) for coord in args.box) except ValueError: center_box = (5.0, 10.0) dataset = generate_dataset(n_classes, n_samples, n_features, center_box, cluster_std) benchmark.alpha_experiment(dataset) benchmark.benchmark_experiment(dataset)
def seed(self, track, locked=False): """Calculate relations based on track as seed. """ if not locked: self.acquire() benchmark = Benchmark() timestamp = now() seed_track = set() seed_artist = set() if track: seed_track.add(track) seed_artist.add(track.artist) self.lookup(track, True) # check artist relations cnt = 0 benchmark.start() tt = [] for seed_a in seed_artist.union(self._seed_artists): self._logger.info(u'check artist relations for {}'.format(seed_a)) for artist_relation in ArtistRelationFactory.by_artist(seed_a): cnt += 1 other_artist = artist_relation.artistA if artist_relation.artistA.name == seed_a.name: other_artist = artist_relation.artistB other_artist.relation_sum += artist_relation.rating other_artist.relation_cnt += 1 other_artist.relation = (other_artist.relation_sum / other_artist.relation_cnt) top_ten(tt, u'artist related with {}({}/{}={}) to {}'.format( scale_rating(artist_relation.rating), scale_rating(other_artist.relation_sum), scale_rating(other_artist.relation_cnt), scale_rating(other_artist.relation), other_artist), artist_relation.rating) artist_relation.lastused = timestamp top_ten_dump(tt, self._logger.info) self._logger.info(u"update ranking: check artist took %s" % benchmark) self._logger.info(u"updated %d artist(s)" % cnt) cnt = 0 benchmark.start() tt = [] for seed_t in seed_track.union(self._seed_tracks): self._logger.info(u'check track relations for {}'.format(seed_t)) for track_relation in TrackRelationFactory.by_track(seed_t): other_track = track_relation.trackA if track_relation.trackA.title == seed_t.title and \ track_relation.trackA.artist.name == seed_t.artist.name: other_track = track_relation.trackB cnt += 1 if not track.ban: other_track.relation_sum += track_relation.rating other_track.relation_cnt += 1 other_track.relation = (other_track.relation_sum / other_track.relation_cnt) top_ten(tt, u'track related with {} to {}'.format( scale_rating(track_relation.rating), other_track), track_relation.rating) track_relation.lastused = timestamp top_ten_dump(tt, self._logger.info) self._logger.info(u"update ranking: check track took %s" % benchmark) self._logger.info(u"updated %d track(s)" % cnt) if not locked: self.release()
import helper helper.load_backends() code = """ (begin (define (grow) (set! s (string-append "123" s "456" s "789")) (set! s (string-append (substring s (quotient (string-length s) 2) (string-length s)) (substring s 0 (+ 1 (quotient (string-length s) 2))))) s) (define (trial n) (do ((i 0 (+ i 1))) ((> (string-length s) n) (string-length s)) (grow)))) """ def call_trial(vm): vm.eval(vm.compile('(define s "abcdef")')) scm = vm.eval(vm.compile('(trial 1000000)')) assert vm.fromscheme(scm) == 1048566 bm = Benchmark(title="string-append and substring performance", repeat=10) for backend in helper.BACKENDS: vm = helper.VM(backend=backend) vm.eval(vm.compile(code)) bm.measure(backend, call_trial, vm) helper.report(bm.report())
from benchmark import Benchmark modules = ['numpy','Numeric','numarray'] b = Benchmark(modules,runs=3,reps=100) N = 10000 b.title = 'Sorting %d elements' % N b['numarray'] = ('a=np.array(None,shape=%d,typecode="i");a.sort()'%N,'') b['numpy'] = ('a=np.empty(shape=%d, dtype="i");a.sort()'%N,'') b['Numeric'] = ('a=np.empty(shape=%d, typecode="i");np.sort(a)'%N,'') b.run() N1,N2 = 100,100 b.title = 'Sorting (%d,%d) elements, last axis' % (N1,N2) b['numarray'] = ('a=np.array(None,shape=(%d,%d),typecode="i");a.sort()'%(N1,N2),'') b['numpy'] = ('a=np.empty(shape=(%d,%d), dtype="i");a.sort()'%(N1,N2),'') b['Numeric'] = ('a=np.empty(shape=(%d,%d),typecode="i");np.sort(a)'%(N1,N2),'') b.run() N1,N2 = 100,100 b.title = 'Sorting (%d,%d) elements, first axis' % (N1,N2) b['numarray'] = ('a=np.array(None,shape=(%d,%d), typecode="i");a.sort(0)'%(N1,N2),'') b['numpy'] = ('a=np.empty(shape=(%d,%d),dtype="i");np.sort(a,0)'%(N1,N2),'') b['Numeric'] = ('a=np.empty(shape=(%d,%d),typecode="i");np.sort(a,0)'%(N1,N2),'') b.run()
# we don't asser the euqality because sometimes the time to # test the equality may be longer than the conversion (e.g. # when testing two big chunk of string). vm.fromscheme(scm) class Foo(object): pass BIG_TEXT = open(__file__).read() cases = [("integers", [1, 10, -5]), ("float numbers", [0.5, -3.2, 0.0]), ("big numbers", [2**33, -2**34, 10**10]), ("bool values", [True, False, False]), ("strings", ["foo", "", "baz"]), ("big string", [BIG_TEXT, BIG_TEXT, BIG_TEXT]), ("symbols", [helper.Symbol("foo"), helper.Symbol(""), helper.Symbol("bar")]), ("cons pairs", [helper.Cons(1, 2), helper.Cons([], []), helper.Cons(1, helper.Cons(2, []))]), ("lists", [[1, 2, 3], [1, 2, 3, 4], []]), ("dicts", [{1:1, 2:2}, {}, {1:10, 10:1}]), ("callables", [__import__, do_convert, list.sort]), ("objects", [Foo(), Foo(), object()]) ] for case in cases: bm = Benchmark(title="performance of converting " + case[0], repeat=1000) for backend in helper.BACKENDS: vm = helper.VM(backend=backend) bm.measure(backend, do_convert, vm, case[1]) helper.report(bm.report())
return output if __name__ == "__main__": from benchmark import Benchmark, BenchmarkSuite setup = "" statement = "lst = ['c'] * 100000" bench = Benchmark(statement, setup, name='list with "*"') statement = "lst = ['c' for x in xrange(100000)]" bench2 = Benchmark(statement, setup, name="list with xrange") statement = "lst = ['c' for x in range(100000)]" bench3 = Benchmark(statement, setup, name="list with range") results = bench3.run() rst_text = bench3.to_rst(results) with open("teste.rst", "w") as f: f.write(rst_text) suite = BenchmarkSuite() suite.append(bench) suite.append(bench2) suite.append(bench3) runner = BenchmarkRunner(suite, ".", "List Creation") n_benchs, results = runner.run() # print results # fig = runner.plot_relative(results, horizontal=True) # plt.savefig('%s_r.png' % runner.name, bbox_inches='tight')
from benchmark import Benchmark modules = ["numpy", "Numeric", "numarray"] b = Benchmark(modules, runs=3, reps=100) N = 10000 b.title = "Sorting %d elements" % N b["numarray"] = ('a=np.array(None,shape=%d,typecode="i");a.sort()' % N, "") b["numpy"] = ('a=np.empty(shape=%d, dtype="i");a.sort()' % N, "") b["Numeric"] = ('a=np.empty(shape=%d, typecode="i");np.sort(a)' % N, "") b.run() N1, N2 = 100, 100 b.title = "Sorting (%d,%d) elements, last axis" % (N1, N2) b["numarray"] = ('a=np.array(None,shape=(%d,%d),typecode="i");a.sort()' % (N1, N2), "") b["numpy"] = ('a=np.empty(shape=(%d,%d), dtype="i");a.sort()' % (N1, N2), "") b["Numeric"] = ('a=np.empty(shape=(%d,%d),typecode="i");np.sort(a)' % (N1, N2), "") b.run() N1, N2 = 100, 100 b.title = "Sorting (%d,%d) elements, first axis" % (N1, N2) b["numarray"] = ('a=np.array(None,shape=(%d,%d), typecode="i");a.sort(0)' % (N1, N2), "") b["numpy"] = ('a=np.empty(shape=(%d,%d),dtype="i");np.sort(a,0)' % (N1, N2), "") b["Numeric"] = ('a=np.empty(shape=(%d,%d),typecode="i");np.sort(a,0)' % (N1, N2), "") b.run()
def main(): args = parse_args() property_files_dir = args.property_files_dir alignment_dir = args.alignment_dir psipred_dir = args.psipred_dir netsurfp_dir = args.netsurfp_dir mi_dir = args.mi_dir omes_dir = args.omes_dir braw_dir = args.braw_dir qij_dir = args.qij_dir evaluation_dir = args.evaluation_dir n_proteins = args.n_proteins n_threads = args.n_threads sequence_separation = args.sequence_separation contact_threshold = args.contact_threshold evaluate_likelihood = args.evaluate_likelihood evaluate_bayes_factor = args.evaluate_bayes_factor contact_prior_model_file = args.contact_prior_model_file coupling_prior_parameters_file = args.coupling_prior_parameters_file method_name = args.name #debugging # evaluation_dir = "/home/vorberg/work/data/benchmarkset_cathV4.1/evaluation/" # property_files_dir = "/home/vorberg/work/data/benchmarkset_cathV4.1/dataset/dataset_properties/" # alignment_dir = "/home/vorberg/work/data/benchmarkset_cathV4.1/psicov/" # psipred_dir = "/home/vorberg/work/data/benchmarkset_cathV4.1/psipred/hhfilter_results_n5e01/" # netsurfp_dir = "/home/vorberg/work/data/benchmarkset_cathV4.1/netsurfp/" # mi_dir = "/home/vorberg/work/data/benchmarkset_cathV4.1/contact_prediction/local_methods/mi_pc/" # omes_dir = "/home/vorberg/work/data/benchmarkset_cathV4.1/contact_prediction/local_methods/omes_fodoraldrich/" # # method_name = "pLL_3comp_reg100prec01mu_100k_ncthr8" # braw_dir = "/home/vorberg/work/data/benchmarkset_cathV4.1/contact_prediction/ccmpred-pll-centerv/braw/" # qij_dir = "/home/vorberg/work/data/benchmarkset_cathV4.1/contact_prediction/ccmpred-pll-centerv/qij/" # contact_prior_model_file = "/home/vorberg/work/data/bayesian_framework/contact_prior/random_forest/new_pipeline_5folds/random_forest/classweightNone_noncontactthr8/100000contacts_500000noncontacts_5window_8noncontactthreshold_maxfeatures030/random_forest_nestimators1000_maxfeatures0.3_maxdepth100_minsamplesleaf10_75features.pkl" # coupling_prior_parameters_file = "/home/vorberg/work/data//bayesian_framework/mle_for_couplingPrior_cath4.1/ccmpred-pll-centerv/3/reg_prec100_mu01/diagonal_100000_nrcomponents3_noncontactthr8/parameters" # sequence_separation = 8 # contact_threshold = 8 # n_threads = 8 # n_proteins = 50 ########### Setup dataset_id dataset_properties = pd.DataFrame() for id, property_file in enumerate(sorted(glob.glob(property_files_dir + "/*"))): properties = pd.read_table(property_file) properties['id'] = id + 1 properties.columns = ['protein', 'resol', 'CATH-topology', 'domlength', 'alilength', 'dataset_id'] dataset_properties = dataset_properties.append(properties, ignore_index=True) ########## Setup Benchmark framework b = Benchmark(evaluation_dir) ######### Load contact prior model rf_clf, rf_meta = BayesianContactPredictor.load_contact_prior_model(contact_prior_model_file) ######### Load coupling prior parameters coupling_prior_parameters = BayesianContactPredictor.load_coupling_prior_hyperparameters(coupling_prior_parameters_file) #get all existing braw files benchmark_dataset_id = [6, 7, 8] proteins_in_testset = dataset_properties.query('dataset_id in @benchmark_dataset_id')['protein'].values braw_files = [braw_dir+"/" + protein.strip() + ".filt.braw.gz" for protein in proteins_in_testset] braw_files_existing = [braw_file for braw_file in braw_files if os.path.exists(braw_file)] braw_files_shuff = random.sample(braw_files_existing[:n_proteins], len(braw_files_existing[:n_proteins])) print("Start processing {0} braw files...".format(len(braw_files_shuff))) ########## Iterate over proteins for braw_file in braw_files_shuff: #braw_file ='/home/vorberg/work/data/benchmarkset_cathV4.1/contact_prediction/ccmpred-pll-centerv/braw//2j5yA00.filt.braw.gz' protein = braw_file.split("/")[-1].split(".")[0] alignment_file = alignment_dir + "/" + protein + ".filt.psc" psipred_file = psipred_dir + "/" + protein + ".filt.withss.a3m.ss2" netsurfp_file = netsurfp_dir + "/" + protein + ".filt.netsurfp" mi_file = mi_dir + "/" + protein + ".filt.mi.pc.mat" omes_file = omes_dir + "/" + protein + ".filt.omes.fodoraldrich.mat" qij_file = qij_dir + "/" + protein + ".filt.bqij.gz" if not os.path.exists(alignment_file): print("Alignment file {0} does not exist. Skip this protein. ".format(alignment_file)) continue if not os.path.exists(braw_file): print("binary Raw file {0} does not exist. Skip this protein. ".format(braw_file)) continue if not os.path.exists(qij_file): print("Qij file {0} does not exist. Skip this protein. ".format(qij_file)) continue print("Compute posterior probabilities for contact with Bayesian model for protein {0}".format(protein)) BCP = BayesianContactPredictor(alignment_file) BCP.set_contact_prior_model(rf_clf, rf_meta) BCP.set_coupling_prior_parameters(coupling_prior_parameters) BCP.set_n_threads(n_threads) BCP.set_sequence_separation(sequence_separation) BCP.set_contact_threshold(contact_threshold) BCP.contact_prior(psipred_file, netsurfp_file, mi_file, omes_file) BCP.contact_likelihood(braw_file, qij_file) BCP.contact_posterior() contact_prior_mat = BCP.get_contact_prior(contact=1) contact_posterior_mat = BCP.get_contact_posterior(contact=1) posterior_meta = BCP.get_meta() b.add_method(protein.strip(), method_name, contact_posterior_mat, posterior_meta, apc=False, update=True) b.add_method(protein.strip(), "rf_contact_prior", contact_prior_mat, posterior_meta, apc=False, update=True) if evaluate_bayes_factor: contact_likelihood_mat = BCP.get_contact_likelihood(contact=1, normalized=False, bayes_factor=True) b.add_method(protein.strip(), method_name+"_logbf", contact_likelihood_mat, posterior_meta, apc=False, update=True) if evaluate_likelihood: contact_likelihood_mat = BCP.get_contact_likelihood(contact=1, normalized=True, bayes_factor=False) b.add_method(protein.strip(), method_name+"_llik", contact_likelihood_mat, posterior_meta, apc=False, update=True)
from benchmark import Benchmark modules = ['numpy','Numeric','numarray'] b = Benchmark(modules, title='Casting a (10,10) integer array to float.', runs=3,reps=10000) N = [10,10] b['numpy'] = ('b = a.astype(int)', 'a=numpy.zeros(shape=%s,dtype=float)' % N) b['Numeric'] = ('b = a.astype("l")', 'a=Numeric.zeros(shape=%s,typecode="d")' % N) b['numarray'] = ("b = a.astype('l')", "a=numarray.zeros(shape=%s,typecode='d')" % N) b.run()
from benchmark import Benchmark import helper helper.load_backends() code = """ (lambda (n) (define (iter sum i) (if (= i 0) sum (iter (+ sum i) (- i 1)))) (iter 0 n)) """ def call_sum(proc, vm): scm = vm.apply(proc, [vm.toscheme(10000)]) assert vm.fromscheme(scm) == 50005000 bm = Benchmark(title="Tail call performance", repeat=10) for backend in helper.BACKENDS: vm = helper.VM(backend=backend) proc = vm.eval(vm.compile(code)) bm.measure(backend, call_sum, proc, vm) helper.report(bm.report())
"""\ This benchmark test the loading time of each backend. But it seems to be hard to make this benchmark accurate. Because it is only slow for the first time when a VM is to be loaded. Later it will be very fast since all related stuffs are already in memory. So repeating here just makes no help. """ from benchmark import Benchmark import helper def load_backend(backend): vm = helper.VM(backend=backend) bm = Benchmark(title="Time to load the VM", repeat=1) for backend in helper.BACKENDS: bm.measure(backend, load_backend, backend) helper.report(bm.report())
def __init__(self, driver, logf): Benchmark.__init__(self, driver, logf)
def main(): args = parse_args() property_files_dir = args.property_files_dir alignment_dir = args.alignment_dir psipred_dir = args.psipred_dir netsurfp_dir = args.netsurfp_dir mi_dir = args.mi_dir omes_dir = args.omes_dir model_file = args.model_file evaluation_dir = args.evaluation_dir method_name = args.method n_proteins = args.n_proteins n_threads = args.n_threads sequence_separation = args.sequence_separation contact_threshold = args.contact_threshold pll_braw_dir = args.pll_braw cd_braw_dir = args.cd_braw pcd_braw_dir = args.pcd_braw bayposterior_mat_dir = args.bayposterior_mat bayesfactor_mat_dir = args.bayesfactor_mat print("Add evaluation files for method {0} to {1}".format(method_name, evaluation_dir)) print("\nPaths to data:\n") print("Alignment dir: \t\t {0}".format(alignment_dir)) print("Psipred dir: \t\t {0}".format(psipred_dir)) print("Netsurfp dir: \t\t {0}".format(netsurfp_dir)) print("MI dir: \t\t {0}".format(mi_dir)) print("OMES dir: \t\t {0}".format(omes_dir)) print("Modelfile dir: \t\t {0}".format(model_file)) print("\nPaths to additional data:\n") print("pLL Braw dir: \t\t {0}".format(pll_braw_dir)) print("CD Braw dir: \t\t {0}".format(cd_braw_dir)) print("PCD Braw dir: \t\t {0}".format(pcd_braw_dir)) print("BayPost Mat dir: \t\t {0}".format(bayposterior_mat_dir)) print("BayFactor Mat dir: \t\t {0}".format(bayesfactor_mat_dir)) #update existing files? update=False ########### Setup dataset_id dataset_properties = pd.DataFrame() for id, property_file in enumerate(sorted(glob.glob(property_files_dir+"/*"))): properties = pd.read_table(property_file) properties['id'] = id+1 properties.columns=['protein', 'resol', 'CATH-topology', 'domlength', 'alilength', 'dataset_id'] dataset_properties = dataset_properties.append(properties, ignore_index=True) ########## Setup Benchmark framework b = Benchmark(evaluation_dir) ########## Benchmark on these datasets benchmark_dataset_id = [6,7,8] ######### Load model rf_clf, rf_meta = BayesianContactPredictor.load_contact_prior_model(model_file) #get all existing alignment files proteins_in_testset = dataset_properties.query('dataset_id in @benchmark_dataset_id')['protein'].values print("Start processing alignment files...") ########## Iterate over proteins counter=0 it = -1 while counter < n_proteins: it += 1 proteins_subset = proteins_in_testset[(it * n_proteins):( (it+1)*n_proteins)] np.random.shuffle(proteins_subset) for protein in proteins_subset: if counter >= n_proteins: break protein = protein.strip() alignment_file = alignment_dir + "/" + protein + ".filt.psc" psipred_file = psipred_dir + "/" + protein + ".filt.withss.a3m.ss2" netsurfp_file = netsurfp_dir + "/" + protein + ".filt.netsurfp" mi_file = mi_dir + "/" + protein + ".filt.mi.pc.mat" omes_file = omes_dir + "/" + protein + "filt.omes.fodoraldrich.mat" eval_file = evaluation_dir + "/" + protein.strip() + "." + method_name pll_braw_file = None cd_braw_file = None pcd_braw_file = None bayposterior_mat_file = None bayfactor_mat_file = None if os.path.exists(eval_file) and not update: print("Evaluation file {0} already exists. Skip this protein. ".format(eval_file)) continue if not os.path.exists(alignment_file): print("Alignment file {0} does not exist. Skip this protein. ".format(alignment_file)) continue if not os.path.exists(psipred_file): print(" Psipred file {0} does not exist. Skip protein {1}!".format(psipred_file, protein)) continue if not os.path.exists(netsurfp_file): print(" NetsurfP file {0} does not exist. Skip protein {1}!".format(netsurfp_file, protein)) continue if pll_braw_dir is not None: pll_braw_file = pll_braw_dir + "/" + protein.strip() + ".filt.braw.gz" if not os.path.exists(pll_braw_file): print(" pLL braw file {0} does not exist. Skip protein {1}!".format(pll_braw_file, protein)) continue if cd_braw_dir is not None: cd_braw_file = cd_braw_dir + "/" + protein.strip() + ".filt.braw.gz" if not os.path.exists(cd_braw_file): print(" CD braw file {0} does not exist. Skip protein {1}!".format(cd_braw_file, protein)) continue if pcd_braw_dir is not None: pcd_braw_file = pcd_braw_dir + "/" + protein.strip() + ".filt.braw.gz" if not os.path.exists(pcd_braw_file): print(" PCD braw file {0} does not exist. Skip protein {1}!".format(pcd_braw_file, protein)) continue if bayposterior_mat_dir is not None: bayposterior_mat_file = bayposterior_mat_dir + "/" + protein.strip() + ".bayesian_3comp_pLL.mat" if not os.path.exists(bayposterior_mat_file): print(" bayesian posterior mat file {0} does not exist. Skip protein {1}!".format(bayposterior_mat_file, protein)) continue if bayesfactor_mat_dir is not None: bayfactor_mat_file = bayesfactor_mat_dir + "/" + protein.strip() + ".bayesian_3comp_pLL.mat" if not os.path.exists(bayfactor_mat_file): print(" bayes factor mat file {0} does not exist. Skip protein {1}!".format(bayfactor_mat_file, protein)) continue BCP = BayesianContactPredictor(alignment_file) BCP.set_contact_prior_model(rf_clf, rf_meta) BCP.set_n_threads(n_threads) BCP.set_sequence_separation(sequence_separation) BCP.set_contact_threshold(contact_threshold) BCP.contact_prior( psipred_file, netsurfp_file, mi_file, omes_file, pll_braw_file, cd_braw_file, pcd_braw_file, bayposterior_mat_file, bayfactor_mat_file ) contact_prior_mat = BCP.get_contact_prior(contact=1) meta = { 'opt_code' : 1, 'rf' : rf_meta } b.add_method(protein, method_name, contact_prior_mat, meta, apc=False, update=update) counter += 1
def __init__(self): Benchmark.__init__(self)
def _update_ranking(self, locked=False): benchmark = Benchmark() if not locked: self.acquire() seed_track = None if self._lastplayed_track: seed_track = self._lastplayed_track elif self._playing_track: seed_track = self._playing_track if not self._relation_resetted: benchmark.start() self._relation_resetted = True for track in TrackFactory.active_tracks(): track.relation_old = track.relation track.relation_sum = 0.0 track.relation_cnt = 0 for artist in ArtistFactory.active_artists(): artist.relation_old = artist.relation artist.relation_sum = 0.0 artist.relation_cnt = 0 self._logger.info(u"update ranking: resetting took %s" % benchmark) has_active_tracks = False for track in TrackFactory.active_tracks(): has_active_tracks = True break benchmark.start() # new relation = old relation * decay # e.g. for decay = 0.5 (0.75) # decfacA = 0.5 * 0.5 = 0.25 (0.75 * 0.5 = 0.375) # decfacB = 1.0 - 0.5 = 0.5 (1.0 - 0.75 = 0.25) # relation_old=0.75 -> 0.25+0.5*0.75=0.625 (0.375+0.25*0.75=0.5625) decfacA = self._relation_decay * 0.5 decfacB = 1.0 - self._relation_decay for track in TrackFactory.active_tracks(): if (track.relation_old > 0.501) or (track.relation_old < 0.499): track.relation = (decfacA + decfacB * track.relation_old) else: track.relation = 0.5 for artist in ArtistFactory.active_artists(): if (artist.relation_old > 0.501) or (artist.relation_old < 0.499): artist.relation = (decfacA + decfacB * artist.relation_old) else: artist.relation = 0.5 self._logger.info(u"update ranking: set old relation + decay took %s" % benchmark) if has_active_tracks: self._ranking_updated = True self.seed(seed_track, True) benchmark.start() at = [] tt = [] for track in TrackFactory.active_tracks(): """ so we have: ranking [0-1.0] (old) rating [0-1.0] relation [0-1.0] random [0-1.0] and: factor min(track_lastplayed/started,artist_lastplayed/started) [0-1.0] ? moved to next_file() """ artist = track.artist r = random() # calculate new ranking if track.boost: self._logger.info(u"pre boost: %s" % track) track.relation = (track.relation + 99.0) / 100.0 self._logger.info(u"post boost: %s" % track) elif track.ban: track.relation = 0.0 # mix with artist relation if we don't have a track relation if track.relation_cnt == 0: if artist.relation_cnt > 0: track.relation = (0.75 * track.relation + 0.25 * artist.relation) top_ten(at, u'relation cnt = {} with {} now {} to {}'.format( artist.relation_cnt, scale_rating(artist.relation), scale_rating(track.relation), artist), track.relation) else: top_ten(tt, u'relation cnt = {} with {} to {}'.format( track.relation_cnt, scale_rating(track.relation), track), track.relation) track.ranking = ( self._factor_ranking['rating'] * track.get_rating() + self._factor_ranking['relation'] * track.relation + self._factor_ranking['random'] * r ) self._logger.info(u"update ranking: took %s" % benchmark) self._update_queued = False if not locked: self.release()