def _run_training_classifier(tempo_merged_file, filename, feature_path): # # Run tarsqi again with the merged input training file # # This time with TRAIN_CLASSIFIER, just to extract the features # args = [ 'timebank', 'pipeline=%s' % TRAIN_CLASSIFIER, tempo_merged_file, os.path.join(OUTPUT_DATA_PATH, filename + '.cla.trained.xml') ] run_tarsqi(args) # Copy two features files to the training features folder ee_fragment_files = glob.glob(os.path.join(TEMPO_DATA_PATH, '*.train.EE')) et_fragment_files = glob.glob(os.path.join(TEMPO_DATA_PATH, '*.train.ET')) EE_FEATURES_PATH = os.path.join(feature_path, 'ee') ET_FEATURES_PATH = os.path.join(feature_path, 'et') for fragment_file in ee_fragment_files: relative_fragment_file_name = fragment_file[fragment_file. rindex(SLASH) + 1:] shutil.copyfile( fragment_file, os.path.join(EE_FEATURES_PATH, filename + '.' + relative_fragment_file_name)) for fragment_file in et_fragment_files: relative_fragment_file_name = fragment_file[fragment_file. rindex(SLASH) + 1:] shutil.copyfile( fragment_file, os.path.join(ET_FEATURES_PATH, filename + '.' + relative_fragment_file_name))
def _run_preprocessing(filename, full_filename): output_filename = os.path.join(OUTPUT_DATA_PATH, filename) args = [ 'timebank', 'pipeline=%s' % PREPROCESSOR, full_filename, output_filename ] run_tarsqi(args)
def get_date_time_all_posts(): options = ["--pipeline", "TOKENIZER,TAGGER,CHUNKER,GUTIME", "fb_post_files/in_files/", "fb_post_files/out_files/"] tarsqi.run_tarsqi(options)
def _run_training_classifier(tempo_merged_file, filename, feature_path): # # Run tarsqi again with the merged input training file # # This time with TRAIN_CLASSIFIER, just to extract the features logger.info('Training classifier') args = [ 'timebank', 'pipeline=%s' % TRAIN_CLASSIFIER, 'trap_errors=True', tempo_merged_file, os.path.join(OUTPUT_DATA_PATH, filename + '.cla.trained.xml') ] run_tarsqi_exception = False try: run_tarsqi(args) except Exception as e: """ There is some weird problem when I change from normal features to svm features, it \ doesn\' create the output file of classifier [Errno 2] No such file or directory: '/home/l/tuandn/tarsqi/ttk-1.0/ttk-1.0/code/data/tmp/fragment_001.cla.o.xml' """ insignificant_error_prefix = '[Errno 2] No such file or directory' insignificant_error_suffix = '.cla.o.xml\'' if type(e) == IOError: if (str(e)[:len(insignificant_error_prefix)] == insignificant_error_prefix and str(e)[-len(insignificant_error_suffix):] == insignificant_error_suffix): logger.warn('That\'s an insignificant error so I don\'t care') else: logger.exception("Some significant exception happened.") logger.exception(e) raise Exception """ Here I'm kind of cheating, I first ignore the run_tarsqi_exception because the exception could be of the output file that I don't want to handle. Then I try to copy the training file as normal, if it couldn't be carried out, it means that there is severe problem in tarsqi running, and need to raise an Exception, and to be logged for later resolution. """ # Copy two features files to the training features folder ee_fragment_files = glob.glob(os.path.join(TEMPO_DATA_PATH, '*.train.EE')) et_fragment_files = glob.glob(os.path.join(TEMPO_DATA_PATH, '*.train.ET')) EE_FEATURES_PATH = os.path.join(feature_path, 'ee') ET_FEATURES_PATH = os.path.join(feature_path, 'et') for fragment_file in ee_fragment_files: relative_fragment_file_name = fragment_file[fragment_file. rindex(SLASH) + 1:] logger.info('EE File to be copied ' + relative_fragment_file_name) shutil.copyfile( fragment_file, os.path.join(EE_FEATURES_PATH, filename + '.' + relative_fragment_file_name)) for fragment_file in et_fragment_files: relative_fragment_file_name = fragment_file[fragment_file. rindex(SLASH) + 1:] logger.info('ET File to be copied ' + relative_fragment_file_name) shutil.copyfile( fragment_file, os.path.join(ET_FEATURES_PATH, filename + '.' + relative_fragment_file_name))
def get_date_time(in_file, out_file): options = ["--pipeline", "TOKENIZER,TAGGER,GUTIME", "fb_post_files/in_files/" + in_file, "fb_post_files/out_files/" + out_file] tarsqi.run_tarsqi(options) return