예제 #1
0
def train_expression_classifier():
    # 1) Create the training file from all the features
    # Load the feature description
    path_feat_desc = my_config_manager.get_feature_desc_filename()
    fic = open(path_feat_desc)
    fields = fic.read().strip()
    fic.close()
    separator = '\t'
    feat_folder = my_config_manager.get_feature_folder_name()
    crf_folder = my_config_manager.get_crf_expression_folder()
    # Create all the CRF files calling to the crfutils.extract_features_to_crf 
       
    crf_out_files = []
    
    templates_exp = my_config_manager.get_templates_expr() 
    #possible_classes = my_config_manager.get_possible_expression_values()
    possible_classes = [OPINION_EXPRESSION]
    
    ##FOR MPQA 
    possible_classes = ['dse']
      
    # Only set the target class for the tokens of possible_classes
    # For others, it's set to O (out sequence)
    for feat_file in glob.glob(feat_folder+'/*.feat'):
        base_name = os.path.basename(feat_file)
        base_name = base_name[:-5]
        out_crf = os.path.join(crf_folder,base_name)
        logging.debug('Creating crf file in --> '+out_crf)
        
        try:
            extract_features_to_crf(feat_file,out_crf,fields,separator,templates_exp,possible_classes)
            crf_out_files.append(out_crf)
        except:
            print>>sys.stderr,'Failed conversion to tab-expression -> CRF: ',feat_file
    ###########################################################################################
   
    # Concatenate all the crf files just created 
    out_f = open(my_config_manager.get_training_dataset_exp(),'w')
    for crf_file in crf_out_files:
        f = open(crf_file)
        out_f.write(f.read())
        f.close()
    out_f.close()
    logging.debug('Created training data for crf, op.exp '+my_config_manager.get_training_dataset_exp())
    #############################################
    
    #Train the model
    crf_params = my_config_manager.get_crfsuite_params()
    input_file = my_config_manager.get_training_dataset_exp()
    model_file = my_config_manager.get_filename_model_expression()
    logging.debug('Training the classifier for opinion expressions (could take a while)')
    run_crfsuite(crf_params,input_file,model_file)
예제 #2
0
def train_expression_classifier():
    # 1) Create the training file from all the features
    # Load the feature description
    path_feat_desc = my_config_manager.get_feature_desc_filename()
    fic = open(path_feat_desc)
    fields = fic.read().strip()
    fic.close()
    separator = '\t'
    feat_folder = my_config_manager.get_feature_folder_name()
    crf_folder = my_config_manager.get_crf_expression_folder()
    # Create all the CRF files calling to the crfutils.extract_features_to_crf

    crf_out_files = []

    templates_exp = my_config_manager.get_templates_expr()
    possible_classes = my_config_manager.get_possible_expression_values()

    # Only set the target class for the tokens of possible_classes
    # For others, it's set to O (out sequence)
    for feat_file in glob.glob(feat_folder + '/*.feat'):
        base_name = os.path.basename(feat_file)
        base_name = base_name[:-5]
        out_crf = os.path.join(crf_folder, base_name)
        logging.debug('Creating crf file in --> ' + out_crf)

        try:
            extract_features_to_crf(feat_file, out_crf, fields, separator,
                                    templates_exp, possible_classes)
            crf_out_files.append(out_crf)
        except:
            print >> sys.stderr, 'Failed conversion to tab-expression -> CRF: ', feat_file
    ###########################################################################################

    # Concatenate all the crf files just created
    out_f = open(my_config_manager.get_training_dataset_exp(), 'w')
    for crf_file in crf_out_files:
        f = open(crf_file)
        out_f.write(f.read())
        f.close()
    out_f.close()
    logging.debug('Created training data for crf, op.exp ' +
                  my_config_manager.get_training_dataset_exp())
    #############################################

    #Train the model
    crf_params = my_config_manager.get_crfsuite_params()
    input_file = my_config_manager.get_training_dataset_exp()
    model_file = my_config_manager.get_filename_model_expression()
    logging.debug(
        'Training the classifier for opinion expressions (could take a while)')
    run_crfsuite(crf_params, input_file, model_file)
def convert_to_crf(input_file,templates):
    out_desc = NamedTemporaryFile(delete=False)
    out_desc.close()
    
    out_crf = out_desc.name
    
    ##Load description of features
    path_feat_desc = my_config_manager.get_feature_desc_filename()
    fic = open(path_feat_desc)
    fields = fic.read().strip()
    fic.close()
    ####
    
    extract_features_to_crf(input_file,out_crf,fields,separator,templates,possible_classes=None)
    return out_crf
def convert_to_crf(input_file,templates):
    out_desc = NamedTemporaryFile(delete=False)
    out_desc.close()

    out_crf = out_desc.name

    ##Load description of features
    path_feat_desc = my_config_manager.get_feature_desc_filename()
    fic = open(path_feat_desc)
    fields = fic.read().strip()
    fic.close()
    ####

    extract_features_to_crf(input_file,out_crf,fields,separator,templates,possible_classes=None)
    return out_crf
예제 #5
0
def train_holder_classifier():
    
    # 1) Create the training file from all the features
    # Load the feature description
    path_feat_desc = my_config_manager.get_feature_desc_filename()
    fic = open(path_feat_desc)
    fields = fic.read().strip()
    fic.close()
    separator = '\t'
    feat_folder = my_config_manager.get_feature_folder_name()
    crf_folder = my_config_manager.get_crf_holder_folder()
    # Create all the CRF files calling to the crfutils.extract_features_to_crf    
    crf_out_files = []
    templates_holder = my_config_manager.get_templates_holder()
    possible_classes = ['holder']
    for feat_file in glob.glob(feat_folder+'/*.feat'):
        base_name = os.path.basename(feat_file)
        base_name = base_name[:-5]
        out_crf = os.path.join(crf_folder,base_name)
        logging.debug('Creating crf file in --> '+out_crf)
    
        try:
            extract_features_to_crf(feat_file,out_crf,fields,separator,templates_holder,possible_classes)
            crf_out_files.append(out_crf)
        except:
            print>>sys.stderr,'Failed conversion to tab-holder -> CRF: ',feat_file
    ###########################################################################################
   
    # Concatenate all the crf files just created 
    out_f = open(my_config_manager.get_training_dataset_holder(),'w')
    for crf_file in crf_out_files:
        f = open(crf_file)
        out_f.write(f.read())
        f.close()
    out_f.close()
    logging.debug('Created training data for crf, op.exp '+my_config_manager.get_training_dataset_holder())
    #############################################
    
    #Train the model
    crf_params = my_config_manager.get_crfsuite_params()
    input_file = my_config_manager.get_training_dataset_holder()
    model_file = my_config_manager.get_filename_model_holder()
    logging.debug('Training the classifier for opinion holder (could take a while)')
    run_crfsuite(crf_params,input_file,model_file)