def train_expression_classifier(): # 1) Create the training file from all the features # Load the feature description path_feat_desc = my_config_manager.get_feature_desc_filename() fic = open(path_feat_desc) fields = fic.read().strip() fic.close() separator = '\t' feat_folder = my_config_manager.get_feature_folder_name() crf_folder = my_config_manager.get_crf_expression_folder() # Create all the CRF files calling to the crfutils.extract_features_to_crf crf_out_files = [] templates_exp = my_config_manager.get_templates_expr() #possible_classes = my_config_manager.get_possible_expression_values() possible_classes = [OPINION_EXPRESSION] ##FOR MPQA possible_classes = ['dse'] # Only set the target class for the tokens of possible_classes # For others, it's set to O (out sequence) for feat_file in glob.glob(feat_folder+'/*.feat'): base_name = os.path.basename(feat_file) base_name = base_name[:-5] out_crf = os.path.join(crf_folder,base_name) logging.debug('Creating crf file in --> '+out_crf) try: extract_features_to_crf(feat_file,out_crf,fields,separator,templates_exp,possible_classes) crf_out_files.append(out_crf) except: print>>sys.stderr,'Failed conversion to tab-expression -> CRF: ',feat_file ########################################################################################### # Concatenate all the crf files just created out_f = open(my_config_manager.get_training_dataset_exp(),'w') for crf_file in crf_out_files: f = open(crf_file) out_f.write(f.read()) f.close() out_f.close() logging.debug('Created training data for crf, op.exp '+my_config_manager.get_training_dataset_exp()) ############################################# #Train the model crf_params = my_config_manager.get_crfsuite_params() input_file = my_config_manager.get_training_dataset_exp() model_file = my_config_manager.get_filename_model_expression() logging.debug('Training the classifier for opinion expressions (could take a while)') run_crfsuite(crf_params,input_file,model_file)
def train_expression_classifier(): # 1) Create the training file from all the features # Load the feature description path_feat_desc = my_config_manager.get_feature_desc_filename() fic = open(path_feat_desc) fields = fic.read().strip() fic.close() separator = '\t' feat_folder = my_config_manager.get_feature_folder_name() crf_folder = my_config_manager.get_crf_expression_folder() # Create all the CRF files calling to the crfutils.extract_features_to_crf crf_out_files = [] templates_exp = my_config_manager.get_templates_expr() possible_classes = my_config_manager.get_possible_expression_values() # Only set the target class for the tokens of possible_classes # For others, it's set to O (out sequence) for feat_file in glob.glob(feat_folder + '/*.feat'): base_name = os.path.basename(feat_file) base_name = base_name[:-5] out_crf = os.path.join(crf_folder, base_name) logging.debug('Creating crf file in --> ' + out_crf) try: extract_features_to_crf(feat_file, out_crf, fields, separator, templates_exp, possible_classes) crf_out_files.append(out_crf) except: print >> sys.stderr, 'Failed conversion to tab-expression -> CRF: ', feat_file ########################################################################################### # Concatenate all the crf files just created out_f = open(my_config_manager.get_training_dataset_exp(), 'w') for crf_file in crf_out_files: f = open(crf_file) out_f.write(f.read()) f.close() out_f.close() logging.debug('Created training data for crf, op.exp ' + my_config_manager.get_training_dataset_exp()) ############################################# #Train the model crf_params = my_config_manager.get_crfsuite_params() input_file = my_config_manager.get_training_dataset_exp() model_file = my_config_manager.get_filename_model_expression() logging.debug( 'Training the classifier for opinion expressions (could take a while)') run_crfsuite(crf_params, input_file, model_file)
def convert_to_crf(input_file,templates): out_desc = NamedTemporaryFile(delete=False) out_desc.close() out_crf = out_desc.name ##Load description of features path_feat_desc = my_config_manager.get_feature_desc_filename() fic = open(path_feat_desc) fields = fic.read().strip() fic.close() #### extract_features_to_crf(input_file,out_crf,fields,separator,templates,possible_classes=None) return out_crf
def train_holder_classifier(): # 1) Create the training file from all the features # Load the feature description path_feat_desc = my_config_manager.get_feature_desc_filename() fic = open(path_feat_desc) fields = fic.read().strip() fic.close() separator = '\t' feat_folder = my_config_manager.get_feature_folder_name() crf_folder = my_config_manager.get_crf_holder_folder() # Create all the CRF files calling to the crfutils.extract_features_to_crf crf_out_files = [] templates_holder = my_config_manager.get_templates_holder() possible_classes = ['holder'] for feat_file in glob.glob(feat_folder+'/*.feat'): base_name = os.path.basename(feat_file) base_name = base_name[:-5] out_crf = os.path.join(crf_folder,base_name) logging.debug('Creating crf file in --> '+out_crf) try: extract_features_to_crf(feat_file,out_crf,fields,separator,templates_holder,possible_classes) crf_out_files.append(out_crf) except: print>>sys.stderr,'Failed conversion to tab-holder -> CRF: ',feat_file ########################################################################################### # Concatenate all the crf files just created out_f = open(my_config_manager.get_training_dataset_holder(),'w') for crf_file in crf_out_files: f = open(crf_file) out_f.write(f.read()) f.close() out_f.close() logging.debug('Created training data for crf, op.exp '+my_config_manager.get_training_dataset_holder()) ############################################# #Train the model crf_params = my_config_manager.get_crfsuite_params() input_file = my_config_manager.get_training_dataset_holder() model_file = my_config_manager.get_filename_model_holder() logging.debug('Training the classifier for opinion holder (could take a while)') run_crfsuite(crf_params,input_file,model_file)