def user_relation(self, user): self.logger.info('========> relatioin') try: uri = '/uplus-api/meow/import/relation' er_list = [] for er in self.usa_session.execute('SELECT * FROM cb.cb_er_dt WHERE follower_id=%s;' % user[0]): er_list.append({ "fromUserId":str(user[0]), "toUserId":str(er.followee_id), "isLiked":"1", "createTime": time.mktime(time.strptime(str(er.dt)[0:18], '%Y-%m-%d %H:%M:%S'))*1000 }) if 0 < len(er_list): # 当没有关系时不用请求 #切分处理 s_list = util.split_list(er_list, 100) for sublist in s_list: payload = { "list":sublist, "uid":str(user[0]), "type":"0" } self.logger.info(simplejson.dumps(payload)) self.api_request(uri=uri, body=simplejson.dumps(payload)) except Exception as ex: self.logger.warn('Exception %s' % str(ex)) try: uri = '/uplus-api/meow/import/relation' ee_list = [] for ee in self.usa_session.execute('SELECT * FROM cb.cb_ee_dt WHERE followee_id=%s;' % user[0]): ee_list.append({ "fromUserId":str(ee.follower_id), "toUserId":str(user[0]), "isLiked":"1", "createTime": time.mktime(time.strptime(str(ee.dt)[0:18], '%Y-%m-%d %H:%M:%S'))*1000 }) if 0 < len(ee_list): # 当没有关系时不用请求 # 切分处理 s_ee_list = util.split_list(ee_list, 100) for sublist in s_ee_list: payload = { "list":sublist, "uid":str(user[0]), "type":"1" } self.logger.info(simplejson.dumps(payload)) self.api_request(uri=uri, body=simplejson.dumps(payload)) except Exception as ex: self.logger.warn('Exception %s' % str(ex))
def is_safe_cfgval(key, cfg_str, new, safe): new = list(set(split_list(new))) # remove list duplicates keys = CTK.cfg.keys(key) cfg = [CTK.cfg.get_val(cfg_str % key) for key in keys] old = [x for sublist in map(split_list, cfg) for x in sublist] old.sort() dupes = list(set(old) & set(new)) if safe: # Do not worry about safe values safe = split_list(safe) dupes = list(set(dupes) - set(safe)) if dupes: raise ValueError, '%s: %s' % (_('Already in use'), ', '.join(dupes)) return ','.join(new)
def build_classifier(self, extract_features_func): self.__parse_learn_data() random.shuffle(self.learn_data_array) train_data, test_data = util.split_list(self.learn_data_array, 75) # percentage split for train/test data self.features = self.__get_word_features(train_data) self.bi_features = self.__get_bigram_features(train_data) train_set = nltk.classify.apply_features(\ functools.partial(extract_features_func, self.features,\ self.bi_features), train_data) test_set = nltk.classify.apply_features(\ functools.partial(extract_features_func, self.features,\ self.bi_features), test_data) print "Total set length", len(self.learn_data_array) print "Train set length", len(train_set) print "Test set length", len(test_set) classifier = nltk.NaiveBayesClassifier.train(train_set) print "Test set accuracy",\ nltk.classify.accuracy(classifier, test_set) self.classifier = classifier
def is_safe_cfgval (key, cfg_str, new, safe): new = list(set(split_list (new))) # remove list duplicates keys = CTK.cfg.keys (key) cfg = [CTK.cfg.get_val(cfg_str % key) for key in keys] old = [x for sublist in map(split_list, cfg) for x in sublist] old.sort() dupes = list (set(old) & set(new)) if safe: # Do not worry about safe values safe = split_list (safe) dupes = list (set(dupes) - set(safe)) if dupes: raise ValueError, '%s: %s' %(_('Already in use'), ', '.join(dupes)) return ','.join(new)
def _create_filter_ev_mb(self): self.show_object={} self.obj_cat_list={} self.filter_ev_mb.delete(1,END) for obj_type in self.object_types: self.show_object[obj_type] = IntVar() self.obj_cat_list[obj_type] = util.split_list(util.grep_list \ (self.ev_object_listing,obj_type+':'),':',1) self.filter_ev_mb.add_checkbutton(label=obj_type, variable=self.show_object[obj_type], \ command=self.apply_filter)
def get_score(rets): by_opt = lambda ret: ret['optimizee_name'] splits, opt_names = util.split_list(rets, by_opt) scores = {} for opt_name in opt_names: losses = [ret['loss'] for ret in splits[opt_name]] scores[opt_name] = -np.mean(losses) return scores
def get_mine_count_in_neighboord(self, xCoord, yCoord): boxes_matrix = list(util.split_list( self.boxes, self.level.column_count)) neighboord = util.get_neighboord( xCoord, yCoord, len(boxes_matrix), len(boxes_matrix[0]) ) mine_count = 0 for neighbor in neighboord: box = self.__get_box(neighbor[0], neighbor[1]) if box is not None and box.get_is_mine(): mine_count += 1 return mine_count
def convert_to_table(boxes, level): table = [] table_header = [" "] for x in range(1, level.column_count+1): table_header.append(str(x)) table.append(table_header) boxes_matrix = list(util.split_list(boxes, level.row_count)) for row in range(len(boxes_matrix)): row_data = [str(row + 1)] for col in range(len(boxes_matrix[0])): row_data.append(box_text(boxes_matrix[row][col])) table.append(row_data) return table
def get_full_cmd_embeds(self): embeds = [] description = "Prefix: {}\nCommand names are not case sensitive.\n" \ "'()' means optional.\n'(-letter/letter)' means optional options, " \ "can be combined if specified, slash isn't included in the command.\n" \ "'[]' means required.".format(self.parent_client.prefix) for fields in util.split_list(self.handler.get_cmd_inlines(), 25): embeds.append( util.make_embed(colour=util.colour_royal_purple, description=description, author_name="Help", author_icon_url=util.image_question_mark, fields=fields)) return embeds
async def get_full_birthday_embeds(self, message: discord.Message): embeds = [] description = "" if len(self.get_admin_inlines(message)) == 0: embeds.append( util.make_embed(colour=util.colour_admin, description="No one is in this list!", author_name="Admin List", author_icon_url=util.image_admin_lock)) if self.has_wanted_argument(message, "c"): description = self.get_count_description(message) for fields in util.split_list(self.get_admin_inlines(message), 25): embeds.append( util.make_embed(colour=util.colour_admin, description=description, author_name="Admin List", author_icon_url=util.image_admin_lock, fields=fields)) return embeds
def is_list (value): tmp = split_list (value) if not tmp: return '' return ','.join(tmp)
def plot_training_results(flags, experiment_path, results): by_opt = lambda ret: ret['optimizee_name'] train_results, test_results = results train_results_splits, opts = util.split_list(train_results, by_opt) test_results_splits, _ = util.split_list(test_results, by_opt) for opt_name, rets in train_results_splits.items(): print("{}: {} iterations".format(opt_name, len(rets))) fig, axes = plt.subplots(nrows=len(opts), figsize=(15, 12)) if len(opts) == 1: axes = (axes, ) alpha = 1.0 if flags.plot_moving: alpha = 0.5 for i, opt_name in enumerate(opts): ax = axes[i] losses_train = [ret['loss'] for ret in train_results_splits[opt_name]] try: losses_test = [ ret['loss'] for ret in test_results_splits[opt_name] ] except: losses_test = [] l_train = int(len(losses_train) * (1. - flags.frac)) l_test = int(len(losses_test) * (1. - flags.frac)) if flags.plot_moving: moving_train = util.get_moving(losses_train, mu=0.95)[l_train:] try: moving_test = util.get_moving(losses_test, mu=0.95)[l_test:] except: moving_test = [] losses_train = losses_train[l_train:] losses_test = losses_test[l_test:] if len(losses_test): s = len(losses_train) // len(losses_test) lt = list(range(0, len(losses_train), s)) lt = lt[:len(losses_test)] else: lt = [] p_train = ax.plot(losses_train, label='train', alpha=alpha) p_test = ax.plot(lt, losses_test, label='test', alpha=alpha) if flags.plot_moving: ax.plot(moving_train, label='moving train', color=p_train[-1].get_color()) ax.plot(lt, moving_test, label='moving test', color=p_test[-1].get_color()) ax.set_title(opt_name) ax.set_ylabel('loss') ax.set_xlabel('iteration number') ax.legend(loc='best') fig.tight_layout() save_figure(fig, filename=experiment_path / 'training')
def is_ip_or_netmask_list (value): re = [] for e in split_list(value): re.append(is_ip_or_netmask(e)) return ','.join(re)
def main(): global parent_pid parent_pid = os.getpid() args_info = ArgsProcessing(sys.argv[1:]) # 处理命令行参数 work_mode = args_info.current_mode # 工作模式 log_file_list = args_info.log_file_list # 所有的日志文件 signal.signal(signal.SIGINT, signal_handler) # SIGINT是ctrl+c发出的信号,值为2 start_time = time.time() # 模式1:分析单笔交易 模式2:分析所有交易 模式3:分析单个区块 模式4:分析所有区块 if work_mode == 1: tx_hash = args_info.tx_hash # 交易Hash # 获取全部的字典格式的交易数据 all_tx_dict_list = get_all_log_dict(log_file_list, 'transaction') overall_earliest_msg, overall_latest_msg = retrieve_earliest_latest_msg( all_tx_dict_list, tx_hash) if overall_earliest_msg and overall_latest_msg: print('最早: %s' % overall_earliest_msg) print('最晚: %s' % overall_latest_msg) interval_time = millisecond2time_format( calc_millisecond_interval(overall_latest_msg[0], overall_earliest_msg[0])) print('间隔: %s' % interval_time) else: print('The transaction %s was not found in log file!' % tx_hash) elif work_mode == 2: all_tx_dict_list = get_all_log_dict(log_file_list, 'transaction') all_tx_hash = [] # 所有的交易Hash for tx_dict in all_tx_dict_list: all_tx_hash.extend(list(tx_dict.keys())) # 去除重复元素 all_tx_hash = list(set(all_tx_hash)) broadcasting_time_queue = Queue() # 存储广播时间的Queue processes = [] # 获取cpu核心数 processor_num = cpu_count() # 将所有的交易Hash分割为与cpu核心数相等的尽量平均的份数 split_all_tx_hash = split_list(all_tx_hash, processor_num) for work_list in split_all_tx_hash: # 创建与交易Hash份数(cpu核心数)数量相同的子进程用于计算广播时间 p = Process(target=calc_broadcasting_time, args=(work_list, broadcasting_time_queue, all_tx_dict_list)) p.start() processes.append(p) for process in processes: # 等待所有子进程结束 process.join() broadcasting_time_list = [] while True: # 将子进程的分析结果存储到父进程中的列表 broadcasting_time_list.append(broadcasting_time_queue.get()) if broadcasting_time_queue.empty(): break # 使用最小堆对所有子进程生成的有序列表合并 broadcasting_time_list = list(heapq.merge(*broadcasting_time_list)) # 计算广播时间列表平均值和中位数 average, median = get_average_median(broadcasting_time_list) print('最短时间: %s' % millisecond2time_format(broadcasting_time_list[0])) print('最长时间: %s' % millisecond2time_format(broadcasting_time_list[-1])) print('平均值: %s' % millisecond2time_format(average)) print('中位数: %s' % millisecond2time_format(median)) elif work_mode == 3: height = args_info.height # 区块高度 all_block_dict_list = get_all_log_dict(log_file_list, 'block') overall_earliest_msg, overall_latest_msg = retrieve_earliest_latest_msg( all_block_dict_list, height) if overall_earliest_msg and overall_latest_msg: print('最早: %s' % overall_earliest_msg) print('最晚: %s' % overall_latest_msg) interval_time = millisecond2time_format( calc_millisecond_interval(overall_latest_msg[0], overall_earliest_msg[0])) print('间隔: %s' % interval_time) else: print('The block height %s was not found in log file!' % height) elif work_mode == 4: all_block_dict_list = get_all_log_dict(log_file_list, 'block') all_block_height = [] for block_dict in all_block_dict_list: all_block_height.extend(list(block_dict.keys())) all_block_height = list(set(all_block_height)) broadcasting_time_queue = Queue() processes = [] processor_num = cpu_count() split_all_block_height = split_list(all_block_height, processor_num) for work_list in split_all_block_height: p = Process(target=calc_broadcasting_time, args=(work_list, broadcasting_time_queue, all_block_dict_list)) p.start() processes.append(p) for process in processes: process.join() broadcasting_time_list = [] while True: broadcasting_time_list.append(broadcasting_time_queue.get()) if broadcasting_time_queue.empty(): break broadcasting_time_list = list(heapq.merge(*broadcasting_time_list)) average, median = get_average_median(broadcasting_time_list) print('最短时间: %s' % millisecond2time_format(broadcasting_time_list[0])) print('最长时间: %s' % millisecond2time_format(broadcasting_time_list[-1])) print('平均值: %s' % millisecond2time_format(average)) print('中位数: %s' % millisecond2time_format(median)) print('分析用时:', time.time() - start_time)
def is_safe_id_list (value): ids = [] for id in split_list (value): ids.append(is_safe_id (id)) return ','.join(ids)
def is_ip_list (value): re = [] for e in split_list(value): re.append(is_ip(e)) return ','.join(re)
def eval_mpi(args, evaldir, modelfile, model_index): from mpi4py import MPI as mpi rank = mpi.COMM_WORLD.Get_rank() all = mpi.COMM_WORLD.Get_size() if modelfile is None: model = None else: if args.model_type == "ppo2": model = PPO2.load(modelfile) elif args.model_type == "ppo1": model = PPO1.load(modelfile) env = ProofEnv.ProofEnv(args) env.set_model(model) dirparts = evaldir.split("/") if dirparts[-1] == "": dirname = dirparts[-2] else: dirname = dirparts[-1] evalprefix = "eval_{}_{}_{}_{}".format(model_index, dirname, args.evaltype, args.evalcount) proofs_found = 0 det_proofs_found = 0 proofs_tried = 0 len_sum = 0.0 attempts_sum = 0.0 prove.guidance_time = 0 filenames_original = sorted(util.list_problems(evaldir)) chunks = util.split_list(filenames_original, all, extensible=False) chunk = chunks[rank] for index in range(len(chunks[0])): if index >= len(chunk): tried, success, prooflen, attempts, det_success = (0, 0, 0, 0, 0) else: filepath = chunk[index] print("\n\nTrying to find proof for {}".format(filepath)) success, prooflen, attempts, evaltype = find_one_proof( args, model, env, filepath) tried = 1 det_success = success * (evaltype == "det") results = mpi.COMM_WORLD.gather( (tried, success, prooflen, attempts, det_success), root=0) if rank == 0: # print(results) for i in range(len(results)): proofs_tried += results[i][0] succ = results[i][1] if succ == 1: proofs_found += 1 len_sum += results[i][2] attempts_sum += results[i][3] det_succ = results[i][4] if det_succ: det_proofs_found += 1 logger.record_tabular("update_no", proofs_tried) logger.record_tabular("{}_proofs_found".format(evalprefix), proofs_found) logger.record_tabular("{}_det_proofs_found".format(evalprefix), det_proofs_found) logger.record_tabular("{}_found".format(evalprefix), util.safediv(proofs_found, proofs_tried)) logger.record_tabular("{}_avg_prooflen".format(evalprefix), util.safediv(len_sum, proofs_found)) logger.record_tabular("{}_avg_attempts".format(evalprefix), util.safediv(attempts_sum, proofs_found)) logger.dumpkvs() print("Found: {}({})/{} proofs".format(proofs_found, det_proofs_found, proofs_tried)) sys.stdout.flush() print("\n\nEVALUATION {}".format(rank)) print(" evaltime: {}".format(args.evaltime)) print(" evaldir: {}".format(dirname)) print(" model_index: {}".format(model_index)) print(" evaltype: {}".format(args.evaltype)) print(" evalcount: {}".format(args.evalcount)) print(" FOUND: {}/{}".format(proofs_found, proofs_tried)) print(" DET FOUND: {}/{}".format(det_proofs_found, proofs_tried)) print(" Avg proof length: {}".format(util.safediv(len_sum, proofs_found))) print(" Avg attempts: {}".format(util.safediv(attempts_sum, proofs_found))) print(" Avg step time: {}".format(env.backend.step_time / env.backend.step_count)) env.close() del env del model
def collect_data(db): """should be ideally run in background""" # the names in interested_clusters are not arbitrary, it has to match the # clusternames in static/xml/clusters.xml, e.g. scinet is not the same as # Scinet, SciNet, or Sci Net # be sure to use config var INTERESTED_CLUSTERS on heroku # here, just use scinet is for local testing, interested_clusters = os.getenv( "INTERESTED_CLUSTERS", "scinet mp2 colosse guillimin lattice nestor parallel orcinus orca").split() # interested_clusters = ["orcinus"] # sort of variables initialization figs_data_dict, usage_frac_dict = {}, {} usermap = util.user_mapping() delta_ts, resolutions = get_delta_ts_and_resolutions() durations = DURATIONS while True: for ic in interested_clusters: # ic: interested cluster ic_obj = util.gen_cluster_obj_from_clustername(ic) raw_xml = ic_obj.fetch_raw_xml() created = datetime.datetime.now() if raw_xml: global RAW_XML RAW_XML[ic] = raw_xml # having such error for scinet and nestor, # MemcachedError: error 37 from memcached_set: SUCCESS # guess those xml data may be too big for memcached, # using system memory instead for now 2012-06-12 # MEMC.set("RAW_XML", raw_xml_cache) # rcu, qcu: running & queuing core usages rcu, qcu = ic_obj.process_raw_xml(usermap, raw_xml) # 1. generate reports and cache it reports = MEMC.get("REPORTS") if not reports: # meaning: first time reports = {} report = ic_obj.gen_report(rcu, qcu, usermap, created) reports[ic_obj.clustername] = report MEMC.set("REPORTS", reports) # 2. insert to database insert2db(rcu, qcu, ic_obj, created, db) # 3. cache usage data for later plotting # dur_queries = [last_day_data, last_week_data, last_month_data, # last_year_data] dur_queries = prepare_data_for_plotting(ic, created, db) # this is for /.json kind of url figs_data_dict[ic] = {i:j for i, j in zip(durations, dur_queries)} MEMC.set("FIGS_DATA", figs_data_dict) # ldd:last_day_data; lwd:last_week_data # lmd:last_month_data; lyd:last_year_data ldd, lwd, lmd, lyd = dur_queries total_sec_to_now = ( lyd[0][-1] - THE_VERY_BEGINNING).total_seconds() # inte_coresec: integrate core seconds usage_frac_dict[ic] = { 'day': inte_coresec(*ldd) / (ic_obj.quota * 24 * 3600), 'week': inte_coresec(*lwd) / (ic_obj.quota * 7 * 24 * 3600), 'month': inte_coresec(*lmd) / (ic_obj.quota * 30 * 24 * 3600), 'year': inte_coresec(*lyd) / (ic_obj.quota * total_sec_to_now), } MEMC.set("USAGE_FRAC", usage_frac_dict) # 4. Now let's do the real plotting, first: usage vs. time, then: histogram # 1). usage vs. time keys = sorted(figs_data_dict.keys()) for index, key_group in enumerate(util.split_list(keys, step=4)): figs, axes = {}, {} for dur in durations: figs[dur] = plt.figure(figsize=(24, 13.5)) axes[dur] = figs[dur].add_subplot(111) fig, ax = figs[dur], axes[dur] fig = do_fig_plotting(fig, ax, key_group, dur, figs_data_dict, usage_frac_dict) canvas = FigureCanvas(fig) png_output = StringIO.StringIO() canvas.print_png(png_output) plt.close(fig) # clear up the memory # figure naming pattern should be systematically redesigned # when # gets large ident = str('_'.join([dur, str(index)])) fig_content = png_output.getvalue() db = update_the_figure(db, Figure, ident, fig_content, created) db.session.commit() # 2). histogram plotting usage_frac_dict_by_dur = {} for dur in durations: usage_frac_dict_by_dur[dur] = {} for ic in usage_frac_dict: for dur in usage_frac_dict[ic]: usage_frac_dict_by_dur[dur][ic] = usage_frac_dict[ic][dur] for dur in usage_frac_dict_by_dur: N = len(usage_frac_dict_by_dur[dur]) width = 1. # the width of the bars ind = np.arange(0, N, width) # the x locations for the groups keys = usage_frac_dict_by_dur[dur].keys() keys.sort(key=lambda k:usage_frac_dict_by_dur[dur][k], reverse=True) # make sure the order is right durMeans = [usage_frac_dict_by_dur[dur][k] for k in keys] fig = plt.figure(figsize=(16, 10)) fig.subplots_adjust(bottom=0.2) # otherwise, xticklabels cannot be shown fully ax = fig.add_subplot(111) for i, d in zip(ind, durMeans): # 'g': green; 'r': red col = 'g' if d > 1 else 'r' ax.bar(i, d, width, color=col) ylim = list(ax.get_ylim()) ylim[1] = ylim[1] * 1.1 if ylim[1] > 1 else 1.05 ax.plot([0, 100], [1, 1], 'k--') ax.set_xlim([0, N*width]) ax.set_ylim(ylim) ax.set_ylabel('Usage', labelpad=40) ax.set_title(dur, size=40, family="monospace", bbox={'facecolor':'red', 'alpha':0.5}) ax.title.set_y(1.02) # offset title position ax.set_xticks(ind+width / 2.) ax.set_xticklabels(keys, size=25, rotation=45) ax.grid(b=True, which="both") canvas = FigureCanvas(fig) png_output = StringIO.StringIO() canvas.print_png(png_output) plt.close(fig) ident = "histo_{0}".format(dur) fig_content = png_output.getvalue() db = update_the_figure(db, Figure, ident, fig_content, created) db.session.commit() # when at last, maybe 10min is too frequent, think about 30 min dt = os.environ.get('DELTAT') if not dt: time.sleep(600) # sleep 10 min else: time.sleep(float(dt))
def is_ip_list(value): re = [] for e in split_list(value): re.append(is_ip(e)) return ','.join(re)
def is_path_list(value): re = [] for p in split_list(value): re.append(is_path(p)) return ','.join(re)
def collect_data(db): """should be ideally run in background""" # the names in interested_clusters are not arbitrary, it has to match the # clusternames in static/xml/clusters.xml, e.g. scinet is not the same as # Scinet, SciNet, or Sci Net # be sure to use config var INTERESTED_CLUSTERS on heroku # here, just use scinet is for local testing, interested_clusters = os.getenv( "INTERESTED_CLUSTERS", "scinet mp2 colosse guillimin lattice nestor parallel orcinus orca" ).split() # interested_clusters = ["orcinus"] # sort of variables initialization figs_data_dict, usage_frac_dict = {}, {} usermap = util.user_mapping() delta_ts, resolutions = get_delta_ts_and_resolutions() durations = DURATIONS while True: for ic in interested_clusters: # ic: interested cluster ic_obj = util.gen_cluster_obj_from_clustername(ic) raw_xml = ic_obj.fetch_raw_xml() created = datetime.datetime.now() if raw_xml: global RAW_XML RAW_XML[ic] = raw_xml # having such error for scinet and nestor, # MemcachedError: error 37 from memcached_set: SUCCESS # guess those xml data may be too big for memcached, # using system memory instead for now 2012-06-12 # MEMC.set("RAW_XML", raw_xml_cache) # rcu, qcu: running & queuing core usages rcu, qcu = ic_obj.process_raw_xml(usermap, raw_xml) # 1. generate reports and cache it reports = MEMC.get("REPORTS") if not reports: # meaning: first time reports = {} report = ic_obj.gen_report(rcu, qcu, usermap, created) reports[ic_obj.clustername] = report MEMC.set("REPORTS", reports) # 2. insert to database insert2db(rcu, qcu, ic_obj, created, db) # 3. cache usage data for later plotting # dur_queries = [last_day_data, last_week_data, last_month_data, # last_year_data] dur_queries = prepare_data_for_plotting(ic, created, db) # this is for /.json kind of url figs_data_dict[ic] = {i: j for i, j in zip(durations, dur_queries)} MEMC.set("FIGS_DATA", figs_data_dict) # ldd:last_day_data; lwd:last_week_data # lmd:last_month_data; lyd:last_year_data ldd, lwd, lmd, lyd = dur_queries total_sec_to_now = (lyd[0][-1] - THE_VERY_BEGINNING).total_seconds() # inte_coresec: integrate core seconds usage_frac_dict[ic] = { 'day': inte_coresec(*ldd) / (ic_obj.quota * 24 * 3600), 'week': inte_coresec(*lwd) / (ic_obj.quota * 7 * 24 * 3600), 'month': inte_coresec(*lmd) / (ic_obj.quota * 30 * 24 * 3600), 'year': inte_coresec(*lyd) / (ic_obj.quota * total_sec_to_now), } MEMC.set("USAGE_FRAC", usage_frac_dict) # 4. Now let's do the real plotting, first: usage vs. time, then: histogram # 1). usage vs. time keys = sorted(figs_data_dict.keys()) for index, key_group in enumerate(util.split_list(keys, step=4)): figs, axes = {}, {} for dur in durations: figs[dur] = plt.figure(figsize=(24, 13.5)) axes[dur] = figs[dur].add_subplot(111) fig, ax = figs[dur], axes[dur] fig = do_fig_plotting(fig, ax, key_group, dur, figs_data_dict, usage_frac_dict) canvas = FigureCanvas(fig) png_output = StringIO.StringIO() canvas.print_png(png_output) plt.close(fig) # clear up the memory # figure naming pattern should be systematically redesigned # when # gets large ident = str('_'.join([dur, str(index)])) fig_content = png_output.getvalue() db = update_the_figure(db, Figure, ident, fig_content, created) db.session.commit() # 2). histogram plotting usage_frac_dict_by_dur = {} for dur in durations: usage_frac_dict_by_dur[dur] = {} for ic in usage_frac_dict: for dur in usage_frac_dict[ic]: usage_frac_dict_by_dur[dur][ic] = usage_frac_dict[ic][dur] for dur in usage_frac_dict_by_dur: N = len(usage_frac_dict_by_dur[dur]) width = 1. # the width of the bars ind = np.arange(0, N, width) # the x locations for the groups keys = usage_frac_dict_by_dur[dur].keys() keys.sort(key=lambda k: usage_frac_dict_by_dur[dur][k], reverse=True) # make sure the order is right durMeans = [usage_frac_dict_by_dur[dur][k] for k in keys] fig = plt.figure(figsize=(16, 10)) fig.subplots_adjust( bottom=0.2) # otherwise, xticklabels cannot be shown fully ax = fig.add_subplot(111) for i, d in zip(ind, durMeans): # 'g': green; 'r': red col = 'g' if d > 1 else 'r' ax.bar(i, d, width, color=col) ylim = list(ax.get_ylim()) ylim[1] = ylim[1] * 1.1 if ylim[1] > 1 else 1.05 ax.plot([0, 100], [1, 1], 'k--') ax.set_xlim([0, N * width]) ax.set_ylim(ylim) ax.set_ylabel('Usage', labelpad=40) ax.set_title(dur, size=40, family="monospace", bbox={ 'facecolor': 'red', 'alpha': 0.5 }) ax.title.set_y(1.02) # offset title position ax.set_xticks(ind + width / 2.) ax.set_xticklabels(keys, size=25, rotation=45) ax.grid(b=True, which="both") canvas = FigureCanvas(fig) png_output = StringIO.StringIO() canvas.print_png(png_output) plt.close(fig) ident = "histo_{0}".format(dur) fig_content = png_output.getvalue() db = update_the_figure(db, Figure, ident, fig_content, created) db.session.commit() # when at last, maybe 10min is too frequent, think about 30 min dt = os.environ.get('DELTAT') if not dt: time.sleep(600) # sleep 10 min else: time.sleep(float(dt))
def is_extension_list(value): re = [] for p in split_list(value): re.append(is_extension(p)) return ','.join(re)
def main(): args = get_configuration() args.state_dim = util.get_state_dim(args) if not os.path.exists(args.outdir): os.makedirs(args.outdir, exist_ok=True) if args.graph_embedding: class MyPolicy(EmbeddingPolicy): def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=True, **_kwargs): super().__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, args, reuse=reuse, **_kwargs) else: class MyPolicy(EnigmaPolicy): def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=True, **_kwargs): super().__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, args, reuse=reuse, **_kwargs) t0 = time.time() from mpi4py import MPI as mpi comm = mpi.COMM_WORLD rank = comm.Get_rank() all = comm.Get_size() gpus = os.environ["CUDA_VISIBLE_DEVICES"].split(',') gpu_count = len(gpus) gpu = gpus[rank % gpu_count] os.environ["CUDA_VISIBLE_DEVICES"] = gpu print("My rank is {} out of {}, using GPU {}".format(rank, all, gpu)) if args.model_type == "ppo2": from stable_baselines import PPO2 as PPO env = SubprocVecEnv([(lambda: ProofEnv.ProofEnv(args)) for _ in range(args.parallel_envs) ]) #, start_method="spawn") elif args.model_type == "ppo1": args.parallel_envs = 1 env = DummyVecEnv([lambda: ProofEnv.ProofEnv(args)]) # from stable_baselines import PPO1 as PPO from ppo import PPO1 as PPO if args.saved_model == None: myPolicy = MyPolicy if args.model_type == "ppo2": model = PPO( policy=myPolicy, env=env, n_steps=args.actorbatch, # nminibatches=args.optim_stepsize, lam=0.95, gamma=args.gamma, noptepochs=4, ent_coef=args.entcoeff, learning_rate=lambda f: f * 2.5e-4, cliprange=lambda f: f * 0.1, verbose=1) elif args.model_type == "ppo1": model = PPO(myPolicy, env, verbose=2, timesteps_per_actorbatch=args.actorbatch, schedule=args.lr_schedule, optim_stepsize=args.optim_stepsize, entcoeff=args.entcoeff, optim_batchsize=args.optim_batchsize, gamma=args.gamma) else: print("Loading model from {}".format(args.saved_model)) model = PPO.load(args.saved_model) model.set_env(env) counter = 0 for ind in range(args.parallel_envs): env.env_method("set_model", model, indices=list(range(args.parallel_envs))) modelfiles = [] for train_timestep, train_dir in zip(args.train_timesteps, args.train_dirs): problem_files = sorted(util.list_problems(train_dir)) problem_files = util.split_list(problem_files, all)[rank] problem_files_splitted = util.split_list(problem_files, args.parallel_envs, extensible=False) if args.add_repeating_pretraining: for ind in range(args.parallel_envs): env.env_method("set_source", problem_files_splitted[ind], indices=[ind], generator_type="repeating") # all_thread_timestep = train_timestep * all print("PRETRAINING") model.learn(total_timesteps=train_timestep) print("Pretraining on {} finished in {}".format( train_dir, util.format_time(time.time() - t0))) for ind in range(args.parallel_envs): env.env_method("set_source", problem_files_splitted[ind], indices=[ind]) # all_thread_timestep = train_timestep * all model.learn(total_timesteps=train_timestep) modelfile = "{}/ppo1_fcop_train_{}".format(args.outdir, counter) modelfiles.append(modelfile) if rank == 0: model.save(modelfile) # logger.logkv("finished_train_problems", counter) counter += 1 print("Training on {} finished in {}".format( train_dir, util.format_time(time.time() - t0))) statistics_list = env.get_attr("statistics", indices=list(range(args.parallel_envs))) blacklist_list = env.get_attr("blacklist", indices=list(range(args.parallel_envs))) for i, statistics in enumerate(statistics_list): print("ENV {} - {} - blacklist: {}\n".format( rank, i, blacklist_list[i])), util.print_problemdict(statistics, rank) # for f in statistics: # statistics[f]["mcts"].display_tree([0]) # util.print_problemdict(env.envs[0].statistics) if len(args.train_dirs) > 0 and len( args.train_timesteps) > 0: # we did training print("We have finished training, rank {}".format(rank)) # for p in problem_files: # vis_policy.vis_policy(env.envs[0], model, p) env.close() del env del model # here we wait for everyone comm.Barrier() print("We have started evaluation, rank {}".format(rank)) # evaluation without training if (args.saved_model is not None) and (len( args.train_dirs) == 0): # no training, just evaluation modelfiles = [args.saved_model] for evaldir in args.evaldirs: for model_index, modelfile in enumerate(modelfiles): eval.eval_mpi(args, evaldir, modelfile, model_index) # here we wait for everyone comm.Barrier()
def is_safe_id_list(value): ids = [] for id in split_list(value): ids.append(is_safe_id(id)) return ','.join(ids)
def generateHeaderForCppFile(fn): cur_mtime = os.path.getmtime(fn) old_mtime = file_data_cache[fn]['mtime'] if fn in file_data_cache else 0 if cur_mtime > old_mtime: members = [] logging.debug(fn) tfn = fn.replace('Private', 'Public').replace('.cpp', '.h') if not os.path.isdir(os.path.dirname(tfn)): os.makedirs(os.path.dirname(tfn)) className = re.search('([A-Z0-9a-z]+)\\.cpp', fn).group(1) extends = [] isClassFile = False classMods = '' isInFunction = False currentFunction = None is_enum_file = False enum_values = [] try: with open(fn) as f: for l in f: m = re.match('extends\\s*\\(([^)]+)\\)', l) if m: if m.group(1)[0] in ['U', 'E', 'F', 'T', 'A']: className = m.group(1)[0] + className extends = m.group(1).split(' ') isClassFile = True if isClassFile: if (re.match("\\S.+", l)): newMember = ClassMember.createFromLine(l) if newMember: members.append(newMember) if newMember.type == 'FUNCTION': currentFunction = newMember if l.startswith('}'): logging.debug("End of function") currentFunction = None if 'Super::' in l and currentFunction: logging.debug("Found Super::") currentFunction.isOverride = True # m = re.match('blueprintEvent\\((?P<event>[^()]+)(\\((?P<args>[^)]+)\\))?\\)', l) if m: members.append(ClassMember('FUNCTION', cppType='void', name=m.group('event'), access='public', isConst=False, mods='BlueprintImplementableEvent', args=m.group('args'))) m = re.match('classMods\\((?P<mods>.+)\\)', l) if m: classMods = util.join_list(util.split_list(m.group('mods'))) m = re.match('enumValue\\((?P<value>[^)]+)\\)', l) if m: is_enum_file = True enum_values.append(m.group('value')) except Exception as ex: logging.error("Error parsing CPP: " + str(ex) + '\n' + traceback.format_exc()) file_data_cache[fn] = { 'members': members, 'className': className, 'extends': extends, 'classMods': classMods, 'enum_values': enum_values, 'mtime': cur_mtime, 'isClassFile': isClassFile, 'is_enum_file': is_enum_file, 'tfn': tfn, } d = file_data_cache[fn] if d['isClassFile']: return classfileheadergen.generate_class_file_header(fn, list(d['members']), d['tfn'], d['className'], d['extends'], d['classMods']) if d['is_enum_file']: return enumfileheadergen.generate_enum_file_header(fn, list(d['enum_values']), d['tfn'], d['className'], d['classMods']) logging.debug("Cannot generate header for " + fn) return None
def is_ip_or_netmask_list(value): re = [] for e in split_list(value): re.append(is_ip_or_netmask(e)) return ','.join(re)
def is_extension_list (value): re = [] for p in split_list(value): re.append(is_extension(p)) return ','.join(re)
def is_list(value): tmp = split_list(value) if not tmp: return '' return ','.join(tmp)
def is_path_list (value): re = [] for p in split_list(value): re.append(is_path(p)) return ','.join(re)