예제 #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'source_file',
        help='the file you want to tag with product',
        action='store',
    )
    parser.add_argument(
        'product_file',
        help='the file contain product name',
        action='store',
    )
    parser.add_argument(
        'base_word_file',
        help='the file contain base word',
        action='store',
    )

    parser.add_argument('--user_dir',
                        help='user dir path',
                        action='store',
                        dest='user_dir',
                        default=os.path.join(root_dir, 'usr'))

    args = parser.parse_args()
    output_dir = os.path.join(args.user_dir, sub_dir)
    hf.check_dir_exist(output_dir)
    # output_tagged_file_path = os.path.join(output_dir, 'source_tag.txt')
    start_tag(args.source_file, args.product_file, args.base_word_file,
              output_dir)
예제 #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'input_file',
        help='the file you want to add reference',
        action='store',
    )
    parser.add_argument(
        'reference_file',
        help='the reference sentence file',
        action='store',
    )
    parser.add_argument('--user_dir',
                        help='user dir path',
                        action='store',
                        dest='user_dir',
                        default=os.path.join(root_dir, 'usr'))
    # parser.add_argument('--output',
    #                     help='the output path after adding reference',
    #                     action='store',
    #                     dest='output_path',
    #                     default=os.path.join(
    #                         root_dir, 'usr', sub_dir, 'product_reference.txt')
    #                     )
    # add_reference()
    args = parser.parse_args()
    output_path = os.path.join(args.user_dir, sub_dir,
                               'recognition_reference.txt')
    hf.check_dir_exist(os.path.join(args.user_dir, sub_dir))
    add_reference(args.input_file, args.reference_file, output_path)
예제 #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--user-dir',
                        help='user dir path',
                        action='store',
                        dest='user_dir',
                        default=os.path.join(root_dir, 'usr'))
    default_user_dir = parser.parse_args().user_dir
    parser.add_argument('--input', 
                        help="the detail information file' path",
                        action='store',
                        dest='detail_information_path',
                        default=os.path.join(default_user_dir, sub_dir, 'detail_information.txt'))
    parser.add_argument('--output-result',
                        help="output result path",
                        action='store',
                        dest='all_alg_path',
                        default=os.path.join(default_user_dir, sub_dir, 'all_alg.txt'))
    parser.add_argument('--output-filter',
                        help='output the filtered result path',
                        action='store',
                        dest='filtered_alg_path',
                        default=os.path.join(default_user_dir, sub_dir, 'filtered_alg.txt'))
    args = parser.parse_args()
    hf.check_dir_exist(os.path.join(args.user_dir, sub_dir))
    run_alg(args.detail_information_path, args.all_alg_path, args.filtered_alg_path)
def set_up_kcws(char_vector_p, source_tag_p, source_seg_p, tag_scheme_f,
                output_dir):

    kcws_temp_dir = os.path.join(output_dir, 'kcws_temp')
    kcws_temp_dir = os.path.abspath(kcws_temp_dir)
    hf.check_dir_exist(kcws_temp_dir)
    path_dict = {}
    path_dict['kcws_temp_dir'] = kcws_temp_dir
    path_dict['char_vector_p'] = os.path.abspath(char_vector_p)
    path_dict['tag_scheme_f'] = os.path.abspath(tag_scheme_f)
    path_dict['source_tag_p'] = os.path.abspath(source_tag_p)
    path_dict['source_seg_p'] = os.path.abspath(source_seg_p)
    path_dict['output_dir'] = os.path.abspath(output_dir)
    path_dict['word_vec_p'] = os.path.join(kcws_temp_dir, 'word_vec.txt')
    path_dict['save_vocab'] = os.path.join(kcws_temp_dir, 'pre_word_vec.txt')
    path_dict['save_unk_p'] = os.path.join(kcws_temp_dir,
                                           'source_lines_with_unk.txt')
    ''' use in training model'''
    '''this two file are defined in generate_ner_train.py'''
    path_dict['train_data_p'] = os.path.join(kcws_temp_dir,
                                             'train_for_train.txt')
    path_dict['test_data_P'] = os.path.join(kcws_temp_dir,
                                            'train_for_test.txt')
    path_dict['log_dir_p'] = os.path.join(kcws_temp_dir, 'log')
    ''' use in freeze model'''
    log_dir_p = path_dict['log_dir_p']
    path_dict['graph_path'] = os.path.join(log_dir_p, 'graph.pb')
    path_dict['checkpoint'] = os.path.join(log_dir_p, 'model.ckpt')
    path_dict['output_graph_p'] = os.path.join(output_dir,
                                               'product_model.pbtxt')
    '''use in dump vocaborary'''
    output_dir = path_dict['output_dir']
    path_dict['dump_char_vector_p'] = os.path.join(output_dir,
                                                   'char_vec_index.txt')
    path_dict['dump_word_vector_p'] = os.path.join(output_dir,
                                                   'word_vec_index.txt')
    # try:
    #     print('start configure kcws....')
    #     output = subprocess.check_call('{}/configure'.format(kcws_dir))
    #     print('configure kcws done')
    # except subprocess.CalledProcessError:
    #     print('Exception handled')
    return path_dict
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'input_file',
        help='the file you want to decompose',
        action='store',
    )
    parser.add_argument('--user_dir',
                        help='user dir path',
                        action='store',
                        dest='user_dir',
                        default=os.path.join(root_dir, 'usr'))
    args = parser.parse_args()

    output_dir = os.path.join(args.user_dir, sub_dir)
    hf.check_dir_exist(output_dir)
    output_bw = os.path.join(output_dir, 'base_word.txt')

    decompose(args.input_file, output_bw)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'input_file',
        help='the file you want to seperate',
        action='store',
    )
    parser.add_argument('--user_dir',
                        help='user dir path',
                        action='store',
                        dest='user_dir',
                        default=os.path.join(root_dir, 'usr'))
    args = parser.parse_args()

    output_dir = os.path.join(args.user_dir, sub_dir)
    hf.check_dir_exist(output_dir)
    output_product = os.path.join(output_dir, 'product.txt')
    output_word = os.path.join(output_dir, 'word.txt')

    seperate(args.input_file, output_product, output_word)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'source_segment_file',
        help='the file had been segmented',
        action='store',
    )
    parser.add_argument(
        'source_tag_file',
        help='the file had been tagged accroding to product name',
        action='store',
    )
    parser.add_argument(
        'tag_scheme_file',
        help='the file contain tag scheme',
        action='store',
    )
    parser.add_argument(
        'char_vector_path',
        help='the char vector path',
        action='store',
    )
    parser.add_argument('--user_dir',
                        help='user dir path',
                        action='store',
                        dest='user_dir',
                        default=os.path.join(root_dir, 'usr'))

    # parser.add_argument('--kcws_temp_dir',
    #                     help='kcws temp dir',
    #                     action='store',
    #                     dest='kcws_temp_dir',
    #                     default=os.path.join(current_dir, 'current_dir'))

    args = parser.parse_args()
    output_dir = os.path.join(args.user_dir, sub_dir)
    hf.check_dir_exist(output_dir)
    run_kcws(args.source_segment_file, args.source_tag_file,
             args.tag_scheme_file, args.char_vector_path, output_dir)
예제 #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--user_dir',
                        help='user dir path',
                        action='store',
                        dest='user_dir',
                        default=os.path.join(root_dir, 'usr'))
    default_user_dir = parser.parse_args().user_dir
    parser.add_argument('--source',
                        help='the file you want to find new word in',
                        action='store',
                        dest='source_file',
                        default=os.path.join(default_user_dir, sub_dir,
                                             'source.txt'))
    parser.add_argument('--MI_entropy',
                        help='output Mutual entropy file path',
                        action='store',
                        dest='MI_entropy_path',
                        default=os.path.join(default_user_dir, sub_dir,
                                             'word_mutual_entropy.txt'))
    parser.add_argument('--neighbor_entropy',
                        help='output neighbor entropy file path',
                        action='store',
                        dest='neighbor_entropy_path',
                        default=os.path.join(default_user_dir, sub_dir,
                                             'word_entropy.txt'))
    parser.add_argument(
        '--detail_inforamtion',
        help='output neightbor entropy and mutual entropy file path',
        action='store',
        dest='detail_information_path',
        default=os.path.join(default_user_dir, sub_dir,
                             'detail_information.txt'))
    args = parser.parse_args()
    # print(args)
    hf.check_dir_exist(os.path.join(args.user_dir, sub_dir))
    find_frequency_pattern(args.source_file, args.MI_entropy_path,
                           args.neighbor_entropy_path,
                           args.detail_information_path)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'test_file',
        help=
        'the segmented sentence file you want to recognize product name in it',
        action='store')

    parser.add_argument('--recognize_result',
                        help='output path of recongnized result',
                        action='store',
                        dest='output_recognize_result',
                        default=os.path.join(sub_dir_path,
                                             'recognize_result.txt'))

    parser.add_argument('--product_name_result',
                        help='output path of recongnized product_name',
                        action='store',
                        dest='output_product_name_result',
                        default=os.path.join(sub_dir_path,
                                             'discover_word.txt'))

    parser.add_argument('--frozen_graph',
                        help='recognization model path',
                        action='store',
                        dest='model_path',
                        default=os.path.join(root_dir, 'usr', 'training_model',
                                             'product_model.pbtxt'))

    parser.add_argument('--char_vec_index',
                        help='char vector index file path',
                        dest='char_index_path',
                        default=os.path.join(root_dir, 'usr', 'training_model',
                                             'char_vec_index.txt'))

    parser.add_argument('--word_vec_index',
                        help='word vector index file path',
                        dest='word_index_path',
                        default=os.path.join(root_dir, 'usr', 'training_model',
                                             'word_vec_index.txt'))

    parser.add_argument('--tag_vob',
                        help='tag scheme index path',
                        dest='tag_vob_path',
                        default=os.path.join(root_dir, 'usr', 'training_model',
                                             'tag_vocab.txt'))

    args = parser.parse_args()
    hf.check_dir_exist(sub_dir_path)

    input_list = load_unrecongnition_file(args.test_file)
    recognition_result_list = run_product_recognition(input_list,
                                                      args.model_path,
                                                      args.char_index_path,
                                                      args.word_index_path,
                                                      args.tag_vob_path)

    print('input len :{} output len :{}'.format(len(input_list),
                                                len(recognition_result_list)))
    product_name_set = fetch_product_name(recognition_result_list)

    hf.write_data(args.output_product_name_result, product_name_set)
    write_recongnition_result(args.output_recognize_result,
                              recognition_result_list)