Esempio n. 1
0
def _run_training_classifier(tempo_merged_file, filename, feature_path):
    #     # Run tarsqi again with the merged input training file
    #     # This time with TRAIN_CLASSIFIER, just to extract the features
    #
    args = [
        'timebank',
        'pipeline=%s' % TRAIN_CLASSIFIER, tempo_merged_file,
        os.path.join(OUTPUT_DATA_PATH, filename + '.cla.trained.xml')
    ]
    run_tarsqi(args)
    # Copy two features files to the training features folder
    ee_fragment_files = glob.glob(os.path.join(TEMPO_DATA_PATH, '*.train.EE'))
    et_fragment_files = glob.glob(os.path.join(TEMPO_DATA_PATH, '*.train.ET'))

    EE_FEATURES_PATH = os.path.join(feature_path, 'ee')
    ET_FEATURES_PATH = os.path.join(feature_path, 'et')
    for fragment_file in ee_fragment_files:
        relative_fragment_file_name = fragment_file[fragment_file.
                                                    rindex(SLASH) + 1:]
        shutil.copyfile(
            fragment_file,
            os.path.join(EE_FEATURES_PATH,
                         filename + '.' + relative_fragment_file_name))

    for fragment_file in et_fragment_files:
        relative_fragment_file_name = fragment_file[fragment_file.
                                                    rindex(SLASH) + 1:]
        shutil.copyfile(
            fragment_file,
            os.path.join(ET_FEATURES_PATH,
                         filename + '.' + relative_fragment_file_name))
Esempio n. 2
0
def _run_preprocessing(filename, full_filename):
    output_filename = os.path.join(OUTPUT_DATA_PATH, filename)
    args = [
        'timebank',
        'pipeline=%s' % PREPROCESSOR, full_filename, output_filename
    ]
    run_tarsqi(args)
def get_date_time_all_posts():
	options = ["--pipeline", "TOKENIZER,TAGGER,CHUNKER,GUTIME", "fb_post_files/in_files/", "fb_post_files/out_files/"]
	tarsqi.run_tarsqi(options)
Esempio n. 4
0
def _run_training_classifier(tempo_merged_file, filename, feature_path):
    #     # Run tarsqi again with the merged input training file
    #     # This time with TRAIN_CLASSIFIER, just to extract the features
    logger.info('Training classifier')
    args = [
        'timebank',
        'pipeline=%s' % TRAIN_CLASSIFIER, 'trap_errors=True',
        tempo_merged_file,
        os.path.join(OUTPUT_DATA_PATH, filename + '.cla.trained.xml')
    ]

    run_tarsqi_exception = False
    try:
        run_tarsqi(args)
    except Exception as e:
        """
        There is some weird problem when I change from normal features to svm features, it \
        doesn\' create the output file of classifier
        [Errno 2] No such file or directory: '/home/l/tuandn/tarsqi/ttk-1.0/ttk-1.0/code/data/tmp/fragment_001.cla.o.xml'
        """
        insignificant_error_prefix = '[Errno 2] No such file or directory'
        insignificant_error_suffix = '.cla.o.xml\''
        if type(e) == IOError:
            if (str(e)[:len(insignificant_error_prefix)]
                    == insignificant_error_prefix
                    and str(e)[-len(insignificant_error_suffix):]
                    == insignificant_error_suffix):
                logger.warn('That\'s an insignificant error so I don\'t care')
        else:
            logger.exception("Some significant exception happened.")
            logger.exception(e)
            raise Exception
    """
    Here I'm kind of cheating, I first ignore the run_tarsqi_exception
    because the exception could be of the output file that I don't want 
    to handle. Then I try to copy the training file as normal, if it 
    couldn't be carried out, it means that there is severe problem in
    tarsqi running, and need to raise an Exception, and to be logged
    for later resolution.
    """
    # Copy two features files to the training features folder
    ee_fragment_files = glob.glob(os.path.join(TEMPO_DATA_PATH, '*.train.EE'))
    et_fragment_files = glob.glob(os.path.join(TEMPO_DATA_PATH, '*.train.ET'))

    EE_FEATURES_PATH = os.path.join(feature_path, 'ee')
    ET_FEATURES_PATH = os.path.join(feature_path, 'et')
    for fragment_file in ee_fragment_files:
        relative_fragment_file_name = fragment_file[fragment_file.
                                                    rindex(SLASH) + 1:]
        logger.info('EE File to be copied ' + relative_fragment_file_name)
        shutil.copyfile(
            fragment_file,
            os.path.join(EE_FEATURES_PATH,
                         filename + '.' + relative_fragment_file_name))

    for fragment_file in et_fragment_files:
        relative_fragment_file_name = fragment_file[fragment_file.
                                                    rindex(SLASH) + 1:]
        logger.info('ET File to be copied ' + relative_fragment_file_name)
        shutil.copyfile(
            fragment_file,
            os.path.join(ET_FEATURES_PATH,
                         filename + '.' + relative_fragment_file_name))
def get_date_time(in_file, out_file):
	options = ["--pipeline", "TOKENIZER,TAGGER,GUTIME", "fb_post_files/in_files/" + in_file, "fb_post_files/out_files/" + out_file]
	tarsqi.run_tarsqi(options)
	return