def random_draw(self, event=None): if debug: print('predictor_ss') self.update("random", "draw") b = predictor_ss() b.init_data() rs = b.print_random() top_line, bottom_line = utils.format_str(rs, "RAND") self.update(top_line, bottom_line)
def best_draw(self, event=None): if debug: print('predictor_ss') self.update("best", "draw") b = predictor_ss() b.init_data() rs = b.print_best_number() print('best', rs) top_line, bottom_line = utils.format_str(rs, "MAX") self.update(top_line, bottom_line)
def test_format_str_max(self, ): rs = [11, 12, 13, 14, 15, 16, 17] top_line, bottom_line = utils.format_str(rs, "MAX") print(str(top_line)) print(str(bottom_line)) print(len(str(top_line))) print(len(str(bottom_line))) self.assertTrue(len(top_line) > 0 and len(top_line) < 17) self.assertTrue(len(bottom_line) > 0 and len(bottom_line) < 17)
def remove_duplicate_word_from_software(content_line_software): word_list = content_line_software.split() new_word_list = [] idx = 0 for word in word_list: if idx - 1 >= 0: if word_list[idx - 1] != word: new_word_list.append(word) else: new_word_list.append(word) idx += 1 new_software = utils.format_str(new_word_list, sep=' ') # if content_line_software != new_software: # print(new_software, '|', content_line_software) return new_software
def get_format_result(accuracy_gt, precision_gt, recall_gt, accuracy_all, precision_all, recall_all, pd_dict, pd_dict_complete, gt_dict, gt_dict_complete, cc): str_write_one_cat = utils.format_str([ cc, 'gt', float(accuracy_gt), float(precision_gt), float(recall_gt), 'all', float(accuracy_all), float(precision_all), float(recall_all) ]) str_write_one_cat_loose, db_match_rate_str_loose = get_format_result_loose( pd_dict, pd_dict_complete, gt_dict, gt_dict_complete, cc, loose=True) str_write_one_cat_strict, db_match_rate_str_strict = get_format_result_loose( pd_dict, pd_dict_complete, gt_dict, gt_dict_complete, cc, loose=False) return str_write_one_cat + '\t' + str_write_one_cat_loose + '\t' + str_write_one_cat_strict, \ db_match_rate_str_loose + '\t' + db_match_rate_str_strict
def get_format_result_loose(pd_dict, pd_dict_complete, gt_dict, gt_dict_complete, cc, loose=True): pd_match_rate, _ = clean_version_dict(pd_dict, cc, loose=loose) print(1, _) gt_match_rate, _ = clean_version_dict(gt_dict, cc, loose=loose) print(2, _) pd_match_rate_complete, db_match_rate_str = clean_version_dict( pd_dict_complete, cc, loose=loose) print(3, db_match_rate_str) gt_match_rate_complete, _ = clean_version_dict(gt_dict_complete, cc, loose=loose) print(4, _) deviation = '-' if type(pd_match_rate) != str and type(gt_match_rate) != str: deviation = pd_match_rate - gt_match_rate normalized_deviation = '-' if gt_match_rate != 0 and deviation != '-': normalized_deviation = abs(deviation / gt_match_rate) deviation_complete = '-' if type(pd_match_rate_complete) != str and type( gt_match_rate_complete) != str: deviation_complete = pd_match_rate_complete - gt_match_rate_complete normalized_deviation_complete = '-' if gt_match_rate_complete != 0 and deviation_complete != '-': normalized_deviation_complete = abs(deviation_complete / gt_match_rate_complete) str_write_one_cat = utils.format_str([ 'loose' if loose else 'strict', 'rate', pd_match_rate, deviation, normalized_deviation, 'complete_rate', pd_match_rate_complete, deviation_complete, normalized_deviation_complete ]) return str_write_one_cat, db_match_rate_str
def test_re(): parser = argparse.ArgumentParser() parser.add_argument('--transfer', type=utils.str2bool) parser.add_argument('--duplicate', type=utils.str2bool) parser.add_argument('--gru', type=int) parser.add_argument('--neroutput', type=utils.str2bool) # parser.add_argument('--model_notes', type=str) parser.add_argument('--labeled', type=utils.str2bool) parser.add_argument('--operation', type=str) parser.add_argument('--gazetteer', type=utils.str2bool) parser.add_argument('--re', type=utils.str2bool) parser.add_argument('--category', type=int) parser.add_argument('--case_idx', type=str) args = parser.parse_args() # transfer = args.transfer test_data_duplicate = args.duplicate test_gru = args.gru test_ner_output_not_gt = args.neroutput # model_notes = args.model_notes labeled = args.labeled operation = args.operation gazetteer = args.gazetteer re = args.re category_ = config.num_cat_dict[args.category] case_idx = args.case_idx transfer = not (category_ == 'memc') logging.info('transfer: ' + str(transfer)) logging.info('test_data_duplicate: ' + str(test_data_duplicate)) logging.info('test_gru: ' + str(test_gru)) logging.info('test_ner_output_not_gt: ' + str(test_ner_output_not_gt)) # logging.info('model_notes: ' + str(model_notes)) logging.info('labeled: ' + str(labeled)) logging.info('operation: ' + str(operation)) logging.info('gazetteer: ' + str(gazetteer)) logging.info('re: ' + str(re)) logging.info('category: ' + str(category_)) logging.info('case_idx: ' + str(case_idx)) # configure ner output path generate_from_ner_output_path = None # if labeled: # if test_ner_output_not_gt: # generate_from_ner_output_path = config.labeled_ner_data_output_before_transfer_path # if transfer: # generate_from_ner_output_path = config.labeled_ner_data_output_after_transfer_path # else: # generate_from_ner_output_path = config.unlabeled_ner_data_output_path # if os.path.exists(commons.unlabeled_version_dict_file_path_and_name): # copy(commons.unlabeled_version_dict_file_path_and_name, # commons.unlabeled_version_dict_file_path_and_name.replace('.py', '_bkp.py')) # os.remove(commons.unlabeled_version_dict_file_path_and_name) if test_ner_output_not_gt: generate_from_ner_output_path = config.labeled_ner_data_output_before_transfer_path if transfer: generate_from_ner_output_path = config.labeled_ner_data_output_after_transfer_path # configure model path model_path = config.re_model_path # if transfer: # model_path = config.re_model_path_after_transfer # else: # model_path = config.re_model_path_before_transfer # configure test cats, output path test_cat_list = None output_path = None test_flg = None # if labeled: test_flg = '_test' test_cat_list = config.cat_list # test_cat_list = ['memc'] output_path = config.labeled_re_data_output_before_transfer_path if transfer: test_cat_list = config.cat_list[1:] output_path = config.labeled_re_data_output_after_transfer_path # else: # test_flg = '_full' # output_path = commons.unlabeled_re_data_output_path if test_data_duplicate: test_flg += '_duplicate' if gazetteer: test_flg += '_gaze' # if not labeled: # if cat_num is None: # test_cat_list = get_separate_cat_file_list(generate_from_ner_output_path, commons.cat_list[1:8] + commons.cat_list[9:], test_flg) # else: # test_cat_list = get_separate_cat_file_list(generate_from_ner_output_path, [commons.cat_list[cat_num]], test_flg) if generate_from_ner_output_path is not None: test_flg += '_neroutput' # f_test_result_name = output_path + test_flg + '.txt' result_name = utils.get_f_result_name(case_idx, operation, transfer, labeled, test_data_duplicate, gazetteer, neroutput=test_ner_output_not_gt, re=re) f_test_result_name = config.sh_output_path + result_name logging.info('test result is saved in ' + f_test_result_name) title_list = [ 'category', 'gt', 'gt_acc', 'gt_precision', 'gt_recall', 'all', 'all_acc', 'all_precision', 'all_recall', 'rate', 'pd_rate', 'deviation', 'norm_deviation', 'complete_rate', 'pd_complete_rate', 'deviation_complete', 'norm_deviation_complete' ] str_write = utils.format_str(title_list) + '\n' all_pd_dictionary_complete, merged_pd_dictionary_complete = dict(), dict() measurement_performance_str_all_cat = '' # for category in test_cat_list: for category in ['dirtra']: file_idx = None if type(category) == list: category, file_idx = category model_id = utils_RE.get_model_list_from_re_model_dir( model_path, category, transfer) logging.info('category: ' + category) logging.info('model id: ' + model_id) test_result = main_test( model_id, category=category, duplicate=test_data_duplicate, model_path=model_path, save_prediction=False, test_gru=test_gru, return_pd_and_gt_version_dict=labeled, return_pd_version_dict=not labeled, output_path=output_path, generate_from_ner_output_path=generate_from_ner_output_path, labeled=labeled, category_separate_idx=file_idx, gaze=gazetteer, transfer=transfer, ) if test_result is None: logging.info(category + ' re data is none!') continue pd_dictionary = None if labeled: pd_dictionary, gt_dictionary, \ prec_gt_, rec_gt_, f1_gt_, acc_gt_, \ prec_all_, rec_all_, f1_all_, acc_all_ = test_result # todo: prepare a sample data ''' pd_dictionary, pd_dictionary_complete = complete_version_dict_from_dict(category, pd_dictionary) gt_dictionary, gt_dictionary_complete = complete_version_dict_from_dict(category, gt_dictionary) re_performance_str, measurement_performance_str = get_format_result(acc_gt_, prec_gt_, rec_gt_, acc_all_, prec_all_, rec_all_, pd_dictionary, pd_dictionary_complete, gt_dictionary, gt_dictionary_complete, category) str_write += re_performance_str + '\n' measurement_performance_str_all_cat += measurement_performance_str + '\n' print(5, measurement_performance_str) ''' # else: # pd_dictionary = test_result # _, pd_dictionary_complete = complete_version_dict_from_dict(category, pd_dictionary) # write_version_dict_to_py_file(category, pd_dictionary_complete, labeled_data=False, category_separate_idx=file_idx, gaze=gazetteer) # if file_idx is not None: # all_pd_dictionary_complete[category + '_' + str(file_idx)] = pd_dictionary_complete # else: # all_pd_dictionary_complete[category] = pd_dictionary_complete # all_pd_dictionary_complete[category] = pd_dictionary_complete # merged_pd_dictionary_complete = utils.merge_dict_to_write(merged_pd_dictionary_complete, pd_dictionary_complete) # merged_pd_dictionary, merged_pd_dictionary_complete = complete_version_dict_from_dict(None, merged_pd_dictionary, all_data_name='all') # measurement_performance_str_all_cat += '\n' + measure_by_year_os_software(all_pd_dictionary_complete, merged_pd_dictionary_complete, 50, debug_mode=False, by_year=True, by_os=True, by_software=True) + '\n' with open(f_test_result_name, 'a') as f_test_result: f_test_result.write('\n\nmodel name: ' + model_id + '\n') f_test_result.write('model notes: ' + result_name + '\n') # f_test_result.write('model notes: ' + model_notes + '\n') if generate_from_ner_output_path is not None: f_test_result.write('ner output path: ' + generate_from_ner_output_path + '\n') logging.info('\n\nmodel name: ' + model_id + '\nmodel notes: ' + result_name + '\n')
def test_ner(): category = config.num_cat_dict[category_] word_index, word_cnt = create_word_index([config.hash_file]) TRAIN_DATA = config.labeled_ner_data_input_path + category + '_train' + config.data_suffix # TRAIN_DATA = include_valid_set() TEST_DATA = config.labeled_ner_data_input_path + category + '_test' + config.data_suffix wx, y, m = read_data(TRAIN_DATA, word_index) twx, ty, tm = read_data(TEST_DATA, word_index) char_index, char_cnt = create_char_index([config.hash_file]) x, cm = read_char_data(TRAIN_DATA, char_index) tx, tcm = read_char_data(TEST_DATA, char_index) gaze, tgaze = None, None transfer = not (category == 'memc') model_name = get_ner_model_from_dir(config.ner_model_path, category) logging.info('test model is ' + model_name) test_cat_list = [category] # if transfer: # test_cat_list = config.cat_list[1:] # save_path = config.ner_model_path_before_transfer # if transfer: # save_path = config.ner_model_path_after_transfer test_flg = None ner_data_input_path = None output_path = None labeled = True if labeled: test_flg = '_test' ner_data_input_path = config.labeled_ner_data_input_path output_path = config.labeled_ner_data_output_after_transfer_path if not transfer: output_path = config.labeled_ner_data_output_before_transfer_path if duplicate: test_flg += '_duplicate' # elif not labeled: # test_flg = '_full' # if duplicate: # test_flg += '_duplicate' # ner_data_input_path = commons.unlabeled_ner_data_input_path # output_path = commons.unlabeled_ner_data_output_path # # test_cat_list = commons.cat_list[9:] # if cat_num is None: # test_cat_list = separate_unlabeled_ner_input_data(ner_data_input_path, # commons.cat_list[1:8] + commons.cat_list[9:], test_flg) # else: # test_cat_list = separate_unlabeled_ner_input_data(ner_data_input_path, [commons.cat_list[cat_num]], # test_flg) # else: # print('ERROR') # return # f_test_result_name = output_path + test_flg + '.txt' ''' result_name = utils.get_f_result_name(case_idx, 'test', transfer, labeled, duplicate, gazetteer, neroutput=False, ner=True) f_test_result_name = config.sh_output_path + result_name logging.info('test result is saved in ' + f_test_result_name) with open(f_test_result_name, 'a') as f_test_result: f_test_result.write('\n\nmodel name: ' + model_name + '\n') # f_test_result.write('model notes: ' + str(model_notes) + '\n') f_test_result.write('model notes: ' + result_name + '\n') f_test_result.write(utils.format_str( ['category', 'acc', 'prec_version', 'recall_version', 'prec_software', 'recall_software']) + '\n') ''' logging.info(test_cat_list) for category in test_cat_list: # for category in ['csrf', 'xss']: logging.info(category) file_idx = None if type(category) == list: category, file_idx = category SMALL_DATA_NAME = category + test_flg + config.data_suffix if file_idx is not None: SMALL_DATA_NAME += '_' + str(file_idx) SMALL_DATA = ner_data_input_path + SMALL_DATA_NAME test_data_dic = generate_line_dict_from_ner_data(SMALL_DATA) SMALL_DATA_PD_NOGAZE = output_path + SMALL_DATA_NAME SMALL_DATA_PD_GAZE = SMALL_DATA_PD_NOGAZE.replace(config.data_suffix, '_gaze' + config.data_suffix) SMALL_DATA_PD = SMALL_DATA_PD_GAZE if gazetteer else SMALL_DATA_PD_NOGAZE tx, tcm = read_char_data(SMALL_DATA, char_index) twx, ty, tm = read_data(SMALL_DATA, word_index) py = None if not gazetteer: logging.info('reading data from ' + SMALL_DATA) # tx, tcm = read_char_data(SMALL_DATA, char_index) # twx, ty, tm = read_data(SMALL_DATA, word_index) logging.info('building model ...') model = network_NER.cnn_rnn(char_cnt, len(config.labels), word_cnt, config.ner_model_path, category, use_crf=True) model.add_data(x, y, m, wx, cm, gaze, tx, ty, tm, twx, tcm, tgaze, test_data_dic) model.build() model.load_params(config.ner_model_path + model_name) # model_bkp = deepcopy(model) logging.info('finish loading, predicting starts...') py, py_score = model.predict(tx, tm, twx, tcm, tgaze=None, reload_model_path=config.ner_model_path + model_name) word2embedding = read_word2embedding() logging.info("set word embeddings...") model.set_embedding(word2embedding, word_index) # if 'crf' not in str(model_notes): # ttt = 0.5 # py = fast(py_score, axis=1, threshold=float(ttt)) # logging.info('prediction is saved in ' + SMALL_DATA_PD) else: logging.info('applying gaze on no-gaze prediction result path :' + SMALL_DATA_PD) _, py, _ = read_data(SMALL_DATA_PD_NOGAZE, word_index) extracted_f = open(SMALL_DATA_PD, 'w') # pdb.set_trace() acc, f1, metric_result = evaluate_each_class(py, test_data_dic, ty, tm, apply_rule=gazetteer, file_result=extracted_f, debug=False) extracted_f.close() logging.info(str(acc) + ' ' + str(f1) + ' ' + str(metric_result)) str_write = utils.format_str( [category, float(acc), metric_result['V'][3], metric_result['V'][4], metric_result['N'][3], metric_result['N'][4]]) logging.info(str_write) '''
def remove_last_dot_word(company): company = company.lower() company_split = company.split() if company_split[-1].endswith('.'): company = utils.format_str(company_split[:-1], sep=' ') return company
save_and_eval = evaluate.start_eval_worker(state, env) save_and_eval(policy, 0) # eval untrained policy total_timesteps = 0 timesteps_since_eval = 0 episode_num = 0 done = True while total_timesteps < state.max_timesteps: if done: if total_timesteps > 0: logging.info( utils.format_str(total=total_timesteps, episode=episode_num, episode_T=episode_timesteps, reward=episode_reward)) # if args.policy_name == "TD3": policy.train(replay_buffer, episode_timesteps, state.batch_size, state.discount, state.tau, state.policy_noise, state.noise_clip, state.policy_freq) # else: # policy.train(replay_buffer, episode_timesteps, args.batch_size, args.discount, args.tau) # Evaluate episode if timesteps_since_eval >= state.eval.freq: timesteps_since_eval %= state.eval.freq save_and_eval(policy, total_timesteps) # Reset environment
# PONDS # density: number of ponds/km^2 DENSITY = 0.4 # minimum distance: used to buffer out from existing ponds to create territories MINIMUM_DISTANCE = 1000 POND_ABANDONMENT_PROBABILITY = 10 DAM_HEIGHT = 9 # GARDENS PROXIMITY_BUFFER = 500 PER_CAPITA_GARDEN_AREA = 15 POPULATION_VARIATION = range(-5, 6) ABANDONMENT_PROBABILITY = 5 # INTERNAL CONFIGURATION trial = utils.format_str(TRIAL_NAME)[:20] TRIAL_DIR = os.path.join(DATA_DIR, '{}_{}'.format(REGION, trial)) INPUT_DIR = os.path.join(TRIAL_DIR, 'inputs') OUTPUT_DIR = os.path.join(TRIAL_DIR, 'outputs') TEMP_DIR = os.path.join(DATA_DIR, 'temp') FIRE_DIR = os.path.join(INPUT_DIR, 'fire') WTR_DIR = os.path.join(INPUT_DIR_FULL, 'tables', 'fire', 'wtr') # region-specific spatial inputs created by initiate_disturbance_inputs or manual farsite manipulation # ecocommunities lifecycle: # initial full extent ec: ECOCOMMUNITIES_FE # modified full extent ec (initiate_disturbance_inputs): s.TEMP_DIR, 'ecocommunities_fe.tif' # initial region ec (initiate_disturbance_inputs): ecocommunities below # yearly output ecs (disturbance scripts): s.OUTPUT_DIR, self._ecocommunities_filename % self.year ecocommunities = os.path.join(INPUT_DIR, 'ecocommunities.tif') reference_ascii = os.path.join(INPUT_DIR, 'reference_grid.asc')