def main(): parser = argparse.ArgumentParser() parser.add_argument( 'source_file', help='the file you want to tag with product', action='store', ) parser.add_argument( 'product_file', help='the file contain product name', action='store', ) parser.add_argument( 'base_word_file', help='the file contain base word', action='store', ) parser.add_argument('--user_dir', help='user dir path', action='store', dest='user_dir', default=os.path.join(root_dir, 'usr')) args = parser.parse_args() output_dir = os.path.join(args.user_dir, sub_dir) hf.check_dir_exist(output_dir) # output_tagged_file_path = os.path.join(output_dir, 'source_tag.txt') start_tag(args.source_file, args.product_file, args.base_word_file, output_dir)
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'input_file', help='the file you want to add reference', action='store', ) parser.add_argument( 'reference_file', help='the reference sentence file', action='store', ) parser.add_argument('--user_dir', help='user dir path', action='store', dest='user_dir', default=os.path.join(root_dir, 'usr')) # parser.add_argument('--output', # help='the output path after adding reference', # action='store', # dest='output_path', # default=os.path.join( # root_dir, 'usr', sub_dir, 'product_reference.txt') # ) # add_reference() args = parser.parse_args() output_path = os.path.join(args.user_dir, sub_dir, 'recognition_reference.txt') hf.check_dir_exist(os.path.join(args.user_dir, sub_dir)) add_reference(args.input_file, args.reference_file, output_path)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--user-dir', help='user dir path', action='store', dest='user_dir', default=os.path.join(root_dir, 'usr')) default_user_dir = parser.parse_args().user_dir parser.add_argument('--input', help="the detail information file' path", action='store', dest='detail_information_path', default=os.path.join(default_user_dir, sub_dir, 'detail_information.txt')) parser.add_argument('--output-result', help="output result path", action='store', dest='all_alg_path', default=os.path.join(default_user_dir, sub_dir, 'all_alg.txt')) parser.add_argument('--output-filter', help='output the filtered result path', action='store', dest='filtered_alg_path', default=os.path.join(default_user_dir, sub_dir, 'filtered_alg.txt')) args = parser.parse_args() hf.check_dir_exist(os.path.join(args.user_dir, sub_dir)) run_alg(args.detail_information_path, args.all_alg_path, args.filtered_alg_path)
def set_up_kcws(char_vector_p, source_tag_p, source_seg_p, tag_scheme_f, output_dir): kcws_temp_dir = os.path.join(output_dir, 'kcws_temp') kcws_temp_dir = os.path.abspath(kcws_temp_dir) hf.check_dir_exist(kcws_temp_dir) path_dict = {} path_dict['kcws_temp_dir'] = kcws_temp_dir path_dict['char_vector_p'] = os.path.abspath(char_vector_p) path_dict['tag_scheme_f'] = os.path.abspath(tag_scheme_f) path_dict['source_tag_p'] = os.path.abspath(source_tag_p) path_dict['source_seg_p'] = os.path.abspath(source_seg_p) path_dict['output_dir'] = os.path.abspath(output_dir) path_dict['word_vec_p'] = os.path.join(kcws_temp_dir, 'word_vec.txt') path_dict['save_vocab'] = os.path.join(kcws_temp_dir, 'pre_word_vec.txt') path_dict['save_unk_p'] = os.path.join(kcws_temp_dir, 'source_lines_with_unk.txt') ''' use in training model''' '''this two file are defined in generate_ner_train.py''' path_dict['train_data_p'] = os.path.join(kcws_temp_dir, 'train_for_train.txt') path_dict['test_data_P'] = os.path.join(kcws_temp_dir, 'train_for_test.txt') path_dict['log_dir_p'] = os.path.join(kcws_temp_dir, 'log') ''' use in freeze model''' log_dir_p = path_dict['log_dir_p'] path_dict['graph_path'] = os.path.join(log_dir_p, 'graph.pb') path_dict['checkpoint'] = os.path.join(log_dir_p, 'model.ckpt') path_dict['output_graph_p'] = os.path.join(output_dir, 'product_model.pbtxt') '''use in dump vocaborary''' output_dir = path_dict['output_dir'] path_dict['dump_char_vector_p'] = os.path.join(output_dir, 'char_vec_index.txt') path_dict['dump_word_vector_p'] = os.path.join(output_dir, 'word_vec_index.txt') # try: # print('start configure kcws....') # output = subprocess.check_call('{}/configure'.format(kcws_dir)) # print('configure kcws done') # except subprocess.CalledProcessError: # print('Exception handled') return path_dict
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'input_file', help='the file you want to decompose', action='store', ) parser.add_argument('--user_dir', help='user dir path', action='store', dest='user_dir', default=os.path.join(root_dir, 'usr')) args = parser.parse_args() output_dir = os.path.join(args.user_dir, sub_dir) hf.check_dir_exist(output_dir) output_bw = os.path.join(output_dir, 'base_word.txt') decompose(args.input_file, output_bw)
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'input_file', help='the file you want to seperate', action='store', ) parser.add_argument('--user_dir', help='user dir path', action='store', dest='user_dir', default=os.path.join(root_dir, 'usr')) args = parser.parse_args() output_dir = os.path.join(args.user_dir, sub_dir) hf.check_dir_exist(output_dir) output_product = os.path.join(output_dir, 'product.txt') output_word = os.path.join(output_dir, 'word.txt') seperate(args.input_file, output_product, output_word)
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'source_segment_file', help='the file had been segmented', action='store', ) parser.add_argument( 'source_tag_file', help='the file had been tagged accroding to product name', action='store', ) parser.add_argument( 'tag_scheme_file', help='the file contain tag scheme', action='store', ) parser.add_argument( 'char_vector_path', help='the char vector path', action='store', ) parser.add_argument('--user_dir', help='user dir path', action='store', dest='user_dir', default=os.path.join(root_dir, 'usr')) # parser.add_argument('--kcws_temp_dir', # help='kcws temp dir', # action='store', # dest='kcws_temp_dir', # default=os.path.join(current_dir, 'current_dir')) args = parser.parse_args() output_dir = os.path.join(args.user_dir, sub_dir) hf.check_dir_exist(output_dir) run_kcws(args.source_segment_file, args.source_tag_file, args.tag_scheme_file, args.char_vector_path, output_dir)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--user_dir', help='user dir path', action='store', dest='user_dir', default=os.path.join(root_dir, 'usr')) default_user_dir = parser.parse_args().user_dir parser.add_argument('--source', help='the file you want to find new word in', action='store', dest='source_file', default=os.path.join(default_user_dir, sub_dir, 'source.txt')) parser.add_argument('--MI_entropy', help='output Mutual entropy file path', action='store', dest='MI_entropy_path', default=os.path.join(default_user_dir, sub_dir, 'word_mutual_entropy.txt')) parser.add_argument('--neighbor_entropy', help='output neighbor entropy file path', action='store', dest='neighbor_entropy_path', default=os.path.join(default_user_dir, sub_dir, 'word_entropy.txt')) parser.add_argument( '--detail_inforamtion', help='output neightbor entropy and mutual entropy file path', action='store', dest='detail_information_path', default=os.path.join(default_user_dir, sub_dir, 'detail_information.txt')) args = parser.parse_args() # print(args) hf.check_dir_exist(os.path.join(args.user_dir, sub_dir)) find_frequency_pattern(args.source_file, args.MI_entropy_path, args.neighbor_entropy_path, args.detail_information_path)
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'test_file', help= 'the segmented sentence file you want to recognize product name in it', action='store') parser.add_argument('--recognize_result', help='output path of recongnized result', action='store', dest='output_recognize_result', default=os.path.join(sub_dir_path, 'recognize_result.txt')) parser.add_argument('--product_name_result', help='output path of recongnized product_name', action='store', dest='output_product_name_result', default=os.path.join(sub_dir_path, 'discover_word.txt')) parser.add_argument('--frozen_graph', help='recognization model path', action='store', dest='model_path', default=os.path.join(root_dir, 'usr', 'training_model', 'product_model.pbtxt')) parser.add_argument('--char_vec_index', help='char vector index file path', dest='char_index_path', default=os.path.join(root_dir, 'usr', 'training_model', 'char_vec_index.txt')) parser.add_argument('--word_vec_index', help='word vector index file path', dest='word_index_path', default=os.path.join(root_dir, 'usr', 'training_model', 'word_vec_index.txt')) parser.add_argument('--tag_vob', help='tag scheme index path', dest='tag_vob_path', default=os.path.join(root_dir, 'usr', 'training_model', 'tag_vocab.txt')) args = parser.parse_args() hf.check_dir_exist(sub_dir_path) input_list = load_unrecongnition_file(args.test_file) recognition_result_list = run_product_recognition(input_list, args.model_path, args.char_index_path, args.word_index_path, args.tag_vob_path) print('input len :{} output len :{}'.format(len(input_list), len(recognition_result_list))) product_name_set = fetch_product_name(recognition_result_list) hf.write_data(args.output_product_name_result, product_name_set) write_recongnition_result(args.output_recognize_result, recognition_result_list)