Esempio n. 1
0
    def random_draw(self, event=None):
        if debug: print('predictor_ss')
        self.update("random", "draw")
        b = predictor_ss()
        b.init_data()
        rs = b.print_random()
        top_line, bottom_line = utils.format_str(rs, "RAND")

        self.update(top_line, bottom_line)
Esempio n. 2
0
 def best_draw(self, event=None):
     if debug: print('predictor_ss')
     self.update("best", "draw")
     b = predictor_ss()
     b.init_data()
     rs = b.print_best_number()
     print('best', rs)
     top_line, bottom_line = utils.format_str(rs, "MAX")
     self.update(top_line, bottom_line)
 def test_format_str_max(self, ):
     rs = [11, 12, 13, 14, 15, 16, 17]
     top_line, bottom_line = utils.format_str(rs, "MAX")
     print(str(top_line))
     print(str(bottom_line))
     print(len(str(top_line)))
     print(len(str(bottom_line)))
     self.assertTrue(len(top_line) > 0 and len(top_line) < 17)
     self.assertTrue(len(bottom_line) > 0 and len(bottom_line) < 17)
Esempio n. 4
0
def remove_duplicate_word_from_software(content_line_software):
    word_list = content_line_software.split()
    new_word_list = []
    idx = 0
    for word in word_list:
        if idx - 1 >= 0:
            if word_list[idx - 1] != word:
                new_word_list.append(word)
        else:
            new_word_list.append(word)
        idx += 1
    new_software = utils.format_str(new_word_list, sep=' ')
    # if content_line_software != new_software:
    #     print(new_software, '|', content_line_software)
    return new_software
def get_format_result(accuracy_gt, precision_gt, recall_gt, accuracy_all,
                      precision_all, recall_all, pd_dict, pd_dict_complete,
                      gt_dict, gt_dict_complete, cc):
    str_write_one_cat = utils.format_str([
        cc, 'gt',
        float(accuracy_gt),
        float(precision_gt),
        float(recall_gt), 'all',
        float(accuracy_all),
        float(precision_all),
        float(recall_all)
    ])
    str_write_one_cat_loose, db_match_rate_str_loose = get_format_result_loose(
        pd_dict, pd_dict_complete, gt_dict, gt_dict_complete, cc, loose=True)
    str_write_one_cat_strict, db_match_rate_str_strict = get_format_result_loose(
        pd_dict, pd_dict_complete, gt_dict, gt_dict_complete, cc, loose=False)
    return str_write_one_cat + '\t' + str_write_one_cat_loose + '\t' + str_write_one_cat_strict, \
           db_match_rate_str_loose + '\t' + db_match_rate_str_strict
def get_format_result_loose(pd_dict,
                            pd_dict_complete,
                            gt_dict,
                            gt_dict_complete,
                            cc,
                            loose=True):
    pd_match_rate, _ = clean_version_dict(pd_dict, cc, loose=loose)
    print(1, _)
    gt_match_rate, _ = clean_version_dict(gt_dict, cc, loose=loose)

    print(2, _)
    pd_match_rate_complete, db_match_rate_str = clean_version_dict(
        pd_dict_complete, cc, loose=loose)
    print(3, db_match_rate_str)
    gt_match_rate_complete, _ = clean_version_dict(gt_dict_complete,
                                                   cc,
                                                   loose=loose)

    print(4, _)
    deviation = '-'
    if type(pd_match_rate) != str and type(gt_match_rate) != str:
        deviation = pd_match_rate - gt_match_rate
    normalized_deviation = '-'
    if gt_match_rate != 0 and deviation != '-':
        normalized_deviation = abs(deviation / gt_match_rate)

    deviation_complete = '-'
    if type(pd_match_rate_complete) != str and type(
            gt_match_rate_complete) != str:
        deviation_complete = pd_match_rate_complete - gt_match_rate_complete
    normalized_deviation_complete = '-'
    if gt_match_rate_complete != 0 and deviation_complete != '-':
        normalized_deviation_complete = abs(deviation_complete /
                                            gt_match_rate_complete)

    str_write_one_cat = utils.format_str([
        'loose' if loose else 'strict', 'rate', pd_match_rate, deviation,
        normalized_deviation, 'complete_rate', pd_match_rate_complete,
        deviation_complete, normalized_deviation_complete
    ])
    return str_write_one_cat, db_match_rate_str
Esempio n. 7
0
def test_re():
    parser = argparse.ArgumentParser()

    parser.add_argument('--transfer', type=utils.str2bool)
    parser.add_argument('--duplicate', type=utils.str2bool)
    parser.add_argument('--gru', type=int)
    parser.add_argument('--neroutput', type=utils.str2bool)
    # parser.add_argument('--model_notes', type=str)
    parser.add_argument('--labeled', type=utils.str2bool)

    parser.add_argument('--operation', type=str)
    parser.add_argument('--gazetteer', type=utils.str2bool)
    parser.add_argument('--re', type=utils.str2bool)
    parser.add_argument('--category', type=int)

    parser.add_argument('--case_idx', type=str)

    args = parser.parse_args()

    # transfer = args.transfer
    test_data_duplicate = args.duplicate
    test_gru = args.gru
    test_ner_output_not_gt = args.neroutput
    # model_notes = args.model_notes
    labeled = args.labeled
    operation = args.operation
    gazetteer = args.gazetteer
    re = args.re
    category_ = config.num_cat_dict[args.category]
    case_idx = args.case_idx

    transfer = not (category_ == 'memc')

    logging.info('transfer: ' + str(transfer))
    logging.info('test_data_duplicate: ' + str(test_data_duplicate))
    logging.info('test_gru: ' + str(test_gru))
    logging.info('test_ner_output_not_gt: ' + str(test_ner_output_not_gt))
    # logging.info('model_notes: ' + str(model_notes))
    logging.info('labeled: ' + str(labeled))
    logging.info('operation: ' + str(operation))
    logging.info('gazetteer: ' + str(gazetteer))
    logging.info('re: ' + str(re))
    logging.info('category: ' + str(category_))
    logging.info('case_idx: ' + str(case_idx))

    # configure ner output path
    generate_from_ner_output_path = None
    # if labeled:
    #     if test_ner_output_not_gt:
    #         generate_from_ner_output_path = config.labeled_ner_data_output_before_transfer_path
    #         if transfer:
    #             generate_from_ner_output_path = config.labeled_ner_data_output_after_transfer_path
    # else:
    #     generate_from_ner_output_path = config.unlabeled_ner_data_output_path

    # if os.path.exists(commons.unlabeled_version_dict_file_path_and_name):
    #     copy(commons.unlabeled_version_dict_file_path_and_name,
    #          commons.unlabeled_version_dict_file_path_and_name.replace('.py', '_bkp.py'))
    #     os.remove(commons.unlabeled_version_dict_file_path_and_name)

    if test_ner_output_not_gt:
        generate_from_ner_output_path = config.labeled_ner_data_output_before_transfer_path
        if transfer:
            generate_from_ner_output_path = config.labeled_ner_data_output_after_transfer_path

    # configure model path
    model_path = config.re_model_path
    # if transfer:
    #     model_path = config.re_model_path_after_transfer
    # else:
    #     model_path = config.re_model_path_before_transfer

    # configure test cats, output path
    test_cat_list = None
    output_path = None
    test_flg = None

    # if labeled:
    test_flg = '_test'
    test_cat_list = config.cat_list
    # test_cat_list = ['memc']
    output_path = config.labeled_re_data_output_before_transfer_path

    if transfer:
        test_cat_list = config.cat_list[1:]
        output_path = config.labeled_re_data_output_after_transfer_path

    # else:
    #     test_flg = '_full'
    #     output_path = commons.unlabeled_re_data_output_path

    if test_data_duplicate:
        test_flg += '_duplicate'

    if gazetteer:
        test_flg += '_gaze'

    # if not labeled:
    #     if cat_num is None:
    #         test_cat_list = get_separate_cat_file_list(generate_from_ner_output_path, commons.cat_list[1:8] + commons.cat_list[9:], test_flg)
    #     else:
    #         test_cat_list = get_separate_cat_file_list(generate_from_ner_output_path, [commons.cat_list[cat_num]], test_flg)

    if generate_from_ner_output_path is not None:
        test_flg += '_neroutput'

    # f_test_result_name = output_path + test_flg + '.txt'
    result_name = utils.get_f_result_name(case_idx,
                                          operation,
                                          transfer,
                                          labeled,
                                          test_data_duplicate,
                                          gazetteer,
                                          neroutput=test_ner_output_not_gt,
                                          re=re)
    f_test_result_name = config.sh_output_path + result_name
    logging.info('test result is saved in ' + f_test_result_name)

    title_list = [
        'category', 'gt', 'gt_acc', 'gt_precision', 'gt_recall', 'all',
        'all_acc', 'all_precision', 'all_recall', 'rate', 'pd_rate',
        'deviation', 'norm_deviation', 'complete_rate', 'pd_complete_rate',
        'deviation_complete', 'norm_deviation_complete'
    ]
    str_write = utils.format_str(title_list) + '\n'

    all_pd_dictionary_complete, merged_pd_dictionary_complete = dict(), dict()
    measurement_performance_str_all_cat = ''

    # for category in test_cat_list:
    for category in ['dirtra']:
        file_idx = None
        if type(category) == list:
            category, file_idx = category
        model_id = utils_RE.get_model_list_from_re_model_dir(
            model_path, category, transfer)
        logging.info('category: ' + category)
        logging.info('model id: ' + model_id)
        test_result = main_test(
            model_id,
            category=category,
            duplicate=test_data_duplicate,
            model_path=model_path,
            save_prediction=False,
            test_gru=test_gru,
            return_pd_and_gt_version_dict=labeled,
            return_pd_version_dict=not labeled,
            output_path=output_path,
            generate_from_ner_output_path=generate_from_ner_output_path,
            labeled=labeled,
            category_separate_idx=file_idx,
            gaze=gazetteer,
            transfer=transfer,
        )
        if test_result is None:
            logging.info(category + ' re data is none!')
            continue
        pd_dictionary = None
        if labeled:
            pd_dictionary, gt_dictionary, \
            prec_gt_, rec_gt_, f1_gt_, acc_gt_, \
            prec_all_, rec_all_, f1_all_, acc_all_ = test_result
            # todo: prepare a sample data
            '''
            pd_dictionary, pd_dictionary_complete = complete_version_dict_from_dict(category, pd_dictionary)
            gt_dictionary, gt_dictionary_complete = complete_version_dict_from_dict(category, gt_dictionary)

            re_performance_str, measurement_performance_str = get_format_result(acc_gt_, prec_gt_, rec_gt_, acc_all_, prec_all_, rec_all_, pd_dictionary, pd_dictionary_complete, gt_dictionary, gt_dictionary_complete, category)
            str_write += re_performance_str + '\n'
            measurement_performance_str_all_cat += measurement_performance_str + '\n'
            print(5, measurement_performance_str)
            '''

        # else:
        #     pd_dictionary = test_result
        #     _, pd_dictionary_complete = complete_version_dict_from_dict(category, pd_dictionary)
        #     write_version_dict_to_py_file(category, pd_dictionary_complete, labeled_data=False, category_separate_idx=file_idx, gaze=gazetteer)

        # if file_idx is not None:
        #     all_pd_dictionary_complete[category + '_' + str(file_idx)] = pd_dictionary_complete
        # else:
        #     all_pd_dictionary_complete[category] = pd_dictionary_complete

        # all_pd_dictionary_complete[category] = pd_dictionary_complete
        # merged_pd_dictionary_complete = utils.merge_dict_to_write(merged_pd_dictionary_complete, pd_dictionary_complete)

    # merged_pd_dictionary, merged_pd_dictionary_complete = complete_version_dict_from_dict(None, merged_pd_dictionary, all_data_name='all')
    # measurement_performance_str_all_cat += '\n' + measure_by_year_os_software(all_pd_dictionary_complete, merged_pd_dictionary_complete, 50, debug_mode=False, by_year=True, by_os=True, by_software=True) + '\n'

    with open(f_test_result_name, 'a') as f_test_result:
        f_test_result.write('\n\nmodel name: ' + model_id + '\n')
        f_test_result.write('model notes: ' + result_name + '\n')

        # f_test_result.write('model notes: ' + model_notes + '\n')

        if generate_from_ner_output_path is not None:
            f_test_result.write('ner output path: ' +
                                generate_from_ner_output_path + '\n')
        logging.info('\n\nmodel name: ' + model_id + '\nmodel notes: ' +
                     result_name + '\n')
Esempio n. 8
0
def test_ner():
    category = config.num_cat_dict[category_]

    word_index, word_cnt = create_word_index([config.hash_file])
    TRAIN_DATA = config.labeled_ner_data_input_path + category + '_train' + config.data_suffix
    # TRAIN_DATA = include_valid_set()
    TEST_DATA = config.labeled_ner_data_input_path + category + '_test' + config.data_suffix

    wx, y, m = read_data(TRAIN_DATA, word_index)
    twx, ty, tm = read_data(TEST_DATA, word_index)
    char_index, char_cnt = create_char_index([config.hash_file])

    x, cm = read_char_data(TRAIN_DATA, char_index)
    tx, tcm = read_char_data(TEST_DATA, char_index)
    gaze, tgaze = None, None

    transfer = not (category == 'memc')

    model_name = get_ner_model_from_dir(config.ner_model_path, category)
    logging.info('test model is ' + model_name)

    test_cat_list = [category]
    # if transfer:
    #     test_cat_list = config.cat_list[1:]

    # save_path = config.ner_model_path_before_transfer
    # if transfer:
    #     save_path = config.ner_model_path_after_transfer

    test_flg = None
    ner_data_input_path = None
    output_path = None

    labeled = True
    if labeled:
        test_flg = '_test'
        ner_data_input_path = config.labeled_ner_data_input_path
        output_path = config.labeled_ner_data_output_after_transfer_path
        if not transfer:
            output_path = config.labeled_ner_data_output_before_transfer_path
        if duplicate:
            test_flg += '_duplicate'
    # elif not labeled:
    #     test_flg = '_full'
    #     if duplicate:
    #         test_flg += '_duplicate'
    #     ner_data_input_path = commons.unlabeled_ner_data_input_path
    #     output_path = commons.unlabeled_ner_data_output_path
    #     # test_cat_list = commons.cat_list[9:]
    #     if cat_num is None:
    #         test_cat_list = separate_unlabeled_ner_input_data(ner_data_input_path,
    #                                                           commons.cat_list[1:8] + commons.cat_list[9:], test_flg)
    #     else:
    #         test_cat_list = separate_unlabeled_ner_input_data(ner_data_input_path, [commons.cat_list[cat_num]],
    #                                                           test_flg)
    # else:
    #     print('ERROR')
    #     return

    # f_test_result_name = output_path + test_flg + '.txt'
    '''
    result_name = utils.get_f_result_name(case_idx, 'test', transfer, labeled, duplicate, gazetteer,
                                            neroutput=False, ner=True)
    f_test_result_name = config.sh_output_path + result_name

    logging.info('test result is saved in ' + f_test_result_name)
    with open(f_test_result_name, 'a') as f_test_result:
        f_test_result.write('\n\nmodel name: ' + model_name + '\n')
        # f_test_result.write('model notes: ' + str(model_notes) + '\n')
        f_test_result.write('model notes: ' + result_name + '\n')
        f_test_result.write(utils.format_str(
            ['category', 'acc', 'prec_version', 'recall_version', 'prec_software', 'recall_software']) + '\n')
    '''
    logging.info(test_cat_list)
    for category in test_cat_list:
        # for category in ['csrf', 'xss']:
        logging.info(category)

        file_idx = None
        if type(category) == list:
            category, file_idx = category

        SMALL_DATA_NAME = category + test_flg + config.data_suffix

        if file_idx is not None:
            SMALL_DATA_NAME += '_' + str(file_idx)

        SMALL_DATA = ner_data_input_path + SMALL_DATA_NAME
        test_data_dic = generate_line_dict_from_ner_data(SMALL_DATA)
        SMALL_DATA_PD_NOGAZE = output_path + SMALL_DATA_NAME
        SMALL_DATA_PD_GAZE = SMALL_DATA_PD_NOGAZE.replace(config.data_suffix, '_gaze' + config.data_suffix)
        SMALL_DATA_PD = SMALL_DATA_PD_GAZE if gazetteer else SMALL_DATA_PD_NOGAZE

        tx, tcm = read_char_data(SMALL_DATA, char_index)
        twx, ty, tm = read_data(SMALL_DATA, word_index)

        py = None

        if not gazetteer:
            logging.info('reading data from ' + SMALL_DATA)

            # tx, tcm = read_char_data(SMALL_DATA, char_index)
            # twx, ty, tm = read_data(SMALL_DATA, word_index)
            logging.info('building model ...')
            model = network_NER.cnn_rnn(char_cnt, len(config.labels), word_cnt, config.ner_model_path, category, use_crf=True)
            model.add_data(x, y, m, wx, cm, gaze, tx, ty, tm, twx, tcm, tgaze, test_data_dic)
            model.build()

            model.load_params(config.ner_model_path + model_name)
            # model_bkp = deepcopy(model)
            logging.info('finish loading, predicting starts...')

            py, py_score = model.predict(tx, tm, twx, tcm, tgaze=None, reload_model_path=config.ner_model_path + model_name)

            word2embedding = read_word2embedding()

            logging.info("set word embeddings...")
            model.set_embedding(word2embedding, word_index)

            # if 'crf' not in str(model_notes):
            #     ttt = 0.5
            #     py = fast(py_score, axis=1, threshold=float(ttt))
            #     logging.info('prediction is saved in ' + SMALL_DATA_PD)
        else:
            logging.info('applying gaze on no-gaze prediction result path :' + SMALL_DATA_PD)
            _, py, _ = read_data(SMALL_DATA_PD_NOGAZE, word_index)
        extracted_f = open(SMALL_DATA_PD, 'w')
        # pdb.set_trace()
        acc, f1, metric_result = evaluate_each_class(py, test_data_dic, ty, tm, apply_rule=gazetteer,
                                                     file_result=extracted_f, debug=False)
        extracted_f.close()
        logging.info(str(acc) + ' ' + str(f1) + ' ' + str(metric_result))

        str_write = utils.format_str(
            [category, float(acc), metric_result['V'][3], metric_result['V'][4],
             metric_result['N'][3], metric_result['N'][4]])
        logging.info(str_write)
        '''
Esempio n. 9
0
def remove_last_dot_word(company):
    company = company.lower()
    company_split = company.split()
    if company_split[-1].endswith('.'):
        company = utils.format_str(company_split[:-1], sep=' ')
    return company
Esempio n. 10
0
File: main.py Progetto: SsnL/TD3
    save_and_eval = evaluate.start_eval_worker(state, env)
    save_and_eval(policy, 0)  # eval untrained policy

    total_timesteps = 0
    timesteps_since_eval = 0
    episode_num = 0
    done = True

    while total_timesteps < state.max_timesteps:

        if done:
            if total_timesteps > 0:
                logging.info(
                    utils.format_str(total=total_timesteps,
                                     episode=episode_num,
                                     episode_T=episode_timesteps,
                                     reward=episode_reward))
                # if args.policy_name == "TD3":
                policy.train(replay_buffer, episode_timesteps,
                             state.batch_size, state.discount, state.tau,
                             state.policy_noise, state.noise_clip,
                             state.policy_freq)
                # else:
                #     policy.train(replay_buffer, episode_timesteps, args.batch_size, args.discount, args.tau)

            # Evaluate episode
            if timesteps_since_eval >= state.eval.freq:
                timesteps_since_eval %= state.eval.freq
                save_and_eval(policy, total_timesteps)

            # Reset environment
# PONDS
# density: number of ponds/km^2
DENSITY = 0.4
# minimum distance: used to buffer out from existing ponds to create territories
MINIMUM_DISTANCE = 1000
POND_ABANDONMENT_PROBABILITY = 10
DAM_HEIGHT = 9

# GARDENS
PROXIMITY_BUFFER = 500
PER_CAPITA_GARDEN_AREA = 15
POPULATION_VARIATION = range(-5, 6)
ABANDONMENT_PROBABILITY = 5

# INTERNAL CONFIGURATION
trial = utils.format_str(TRIAL_NAME)[:20]
TRIAL_DIR = os.path.join(DATA_DIR, '{}_{}'.format(REGION, trial))
INPUT_DIR = os.path.join(TRIAL_DIR, 'inputs')
OUTPUT_DIR = os.path.join(TRIAL_DIR, 'outputs')
TEMP_DIR = os.path.join(DATA_DIR, 'temp')
FIRE_DIR = os.path.join(INPUT_DIR, 'fire')
WTR_DIR = os.path.join(INPUT_DIR_FULL, 'tables', 'fire', 'wtr')

# region-specific spatial inputs created by initiate_disturbance_inputs or manual farsite manipulation
# ecocommunities lifecycle:
# initial full extent ec: ECOCOMMUNITIES_FE
# modified full extent ec (initiate_disturbance_inputs): s.TEMP_DIR, 'ecocommunities_fe.tif'
# initial region ec (initiate_disturbance_inputs): ecocommunities below
# yearly output ecs (disturbance scripts): s.OUTPUT_DIR, self._ecocommunities_filename % self.year
ecocommunities = os.path.join(INPUT_DIR, 'ecocommunities.tif')
reference_ascii = os.path.join(INPUT_DIR, 'reference_grid.asc')