def train_classifier_relation_exp_hol(): #Load the human readable training file train_filename = my_config_manager.get_relation_exp_hol_training_filename() feature_file_obj = Cfeature_file(train_filename) ########################################### # Convert it into index based feature file, for svm-light feature_index = Cfeature_index() feat_bin_filename = my_config_manager.get_rel_exp_hol_training_idx_filename() fic_out = open(feat_bin_filename,'w') feature_index.encode_feature_file_to_svm(feature_file_obj,fic_out) fic_out.close() ########################################### ## Save the feature index feat_index_filename = my_config_manager.get_index_features_exp_hol_filename() feature_index.save_to_file(feat_index_filename) ######################### # Train the model example_file = my_config_manager.get_rel_exp_hol_training_idx_filename() model = my_config_manager.get_filename_model_exp_hol() svm_opts = my_config_manager.get_svm_params() logging.debug('Training SVMlight classifier for RELATION(expression,holder) in '+model+ '(could take a while)') run_svmlight_learn(example_file,model,svm_opts)
def classify(kaf_obj,term_ids,index_filename,model_filename, svm_path): index_features = Cfeature_index() index_features.load_from_file(index_filename) test_file = NamedTemporaryFile(delete=False) tokens = get_tokens_for_terms(kaf_obj,term_ids) features = extract_features_polarity_classifier_from_tokens(tokens) index_features.encode_example_for_classification(features,test_file) test_file.close() results = run_svm_classify(svm_path, test_file.name,model_filename) os.remove(test_file.name) if results[0] >= 0: return 'positive' else: return 'negative'
def train_polarity_classifier(): feature_filename = my_config_manager.get_filename_features_polarity_classifier() encoded_filename = feature_filename+'.svm_encoded' index_filename = my_config_manager.get_filename_index_polarity_classifier() model_filename = my_config_manager.get_filename_model_polarity_classifier() feature_file_obj = Cfeature_file(feature_filename) fd_encoded = open(encoded_filename,'w') index_features = Cfeature_index() index_features.encode_feature_file_to_svm(feature_file_obj,fd_encoded) fd_encoded.close() index_features.save_to_file(index_filename) params = '-x 1 -c 0.1' run_svmlight_learn(encoded_filename, model_filename, params)
def train_classifier_relation_exp_hol(): #Load the human readable training file train_filename = my_config_manager.get_relation_exp_hol_training_filename() feature_file_obj = Cfeature_file(train_filename) ########################################### # Convert it into index based feature file, for svm-light feature_index = Cfeature_index() feat_bin_filename = my_config_manager.get_rel_exp_hol_training_idx_filename( ) fic_out = open(feat_bin_filename, 'w') feature_index.encode_feature_file_to_svm(feature_file_obj, fic_out) fic_out.close() ########################################### ## Save the feature index feat_index_filename = my_config_manager.get_index_features_exp_hol_filename( ) feature_index.save_to_file(feat_index_filename) ######################### # Train the model example_file = my_config_manager.get_rel_exp_hol_training_idx_filename() model = my_config_manager.get_filename_model_exp_hol() svm_opts = my_config_manager.get_svm_params() logging.debug( 'Training SVMlight classifier for RELATION(expression,holder) in ' + model + '(could take a while)') run_svmlight_learn(example_file, model, svm_opts)
def link_exp_tar_all(expressions,targets, knaf_obj,threshold, use_dependencies=True,use_tokens=True, use_lemmas=True): pairs = [] if len(targets) == 0: for exp_ids, exp_type in expressions: pairs.append((exp_ids,exp_type,[])) else: feat_index_filename = config_manager.get_index_features_exp_tar_filename() feat_index = Cfeature_index() feat_index.load_from_file(feat_index_filename) examples_file = NamedTemporaryFile(delete=False) T = 0 for exp_ids, exp_type in expressions: #print>>sys.stderr,exp_ids, T, 'of', len(expressions) T = T+1 for tar_ids in targets: #print>>sys.stderr,'\t',tar_ids feats = extract_feats_exp_tar(exp_ids,tar_ids,knaf_obj, use_dependencies=use_dependencies,use_tokens=use_tokens,use_lemmas=use_lemmas) feat_index.encode_example_for_classification(feats, examples_file,my_class='0') examples_file.close() model_file = config_manager.get_filename_model_exp_tar() results = run_svm_classify(examples_file.name, model_file) os.remove(examples_file.name) #threshold = -0.75 idx = 0 for exp,exp_type in expressions: at_least_one = False for num_tar, tar in enumerate(targets): value = results[idx] idx += 1 if value >= threshold: pairs.append((exp,exp_type,tar)) at_least_one = True if not at_least_one: pairs.append((exp,exp_type,[])) return pairs
def link_exp_hol(expressions,holders, knaf_obj,threshold_hol,use_dependencies=True,use_tokens=True,use_lemmas=True): assigned_holders = [] # (expression_type, exp_ids, if len(holders) == 0: for exp_ids in expressions: assigned_holders.append([]) else: feat_index_filename = config_manager.get_index_features_exp_hol_filename() feat_index = Cfeature_index() feat_index.load_from_file(feat_index_filename) examples_file = NamedTemporaryFile(delete=False) for exp_ids in expressions: for hol_ids in holders: feats = extract_feats_exp_hol(exp_ids,hol_ids,knaf_obj, use_dependencies=use_dependencies,use_tokens=use_tokens,use_lemmas=use_lemmas) feat_index.encode_example_for_classification(feats,examples_file,my_class='0') examples_file.close() ## In examples_file.name we can find the examples file ## The format in the example file will be: # exp1 --> hol1 # exp1 --> hol2 # exp1 --> hol3 # exp2 --> hol1 # exp2 --> hol2 # exp2 --> hol3 model_file = config_manager.get_filename_model_exp_hol() results = run_svm_classify(examples_file.name, model_file) idx = 0 # This idx will iterate from 0 to num_exp X num_tar selected = [] # will stor for each exp --> (best_tar_idx, best_svm_val) for exp in expressions: #Selecting the best for this exp best_value = -1 best_idx = -1 for num_hol , hol in enumerate(holders): #This is the probably of exp to be related with the target num_tar value = results[idx] #We select the best among the targets for the exp processed if value > best_value: best_value = value best_idx = num_hol idx += 1 selected.append((best_idx,best_value)) #print selected for best_hol_idx, best_value in selected: if best_value >= threshold_hol: assigned_holders.append(holders[best_hol_idx]) else: assigned_holders.append([]) os.remove(examples_file.name) return assigned_holders
def link_exp_tar_all(expressions,targets, knaf_obj,threshold, use_dependencies=True,use_tokens=True, use_lemmas=True): pairs = [] if len(targets) == 0: for exp_ids, exp_type in expressions: pairs.append((exp_ids,exp_type,[])) else: feat_index_filename = config_manager.get_index_features_exp_tar_filename() feat_index = Cfeature_index() feat_index.load_from_file(feat_index_filename) examples_file = NamedTemporaryFile(delete=False) for exp_ids, exp_type in expressions: for tar_ids in targets: feats = extract_feats_exp_tar(exp_ids,tar_ids,knaf_obj, use_dependencies=use_dependencies,use_tokens=use_tokens,use_lemmas=use_lemmas) feat_index.encode_example_for_classification(feats, examples_file,my_class='0') examples_file.close() model_file = config_manager.get_filename_model_exp_tar() results = run_svm_classify(examples_file.name, model_file) threshold = -0.75 idx = 0 for exp,exp_type in expressions: at_least_one = False for num_tar, tar in enumerate(targets): value = results[idx] idx += 1 if value >= threshold: pairs.append((exp,exp_type,tar)) at_least_one = True if not at_least_one: pairs.append((exp,exp_type,[])) os.remove(examples_file.name) return pairs
def link_exp_tar(expressions,targets, knaf_obj,use_dependencies=True,use_tokens=True, use_lemmas=True): assigned_targets = [] # (expression_type, exp_ids, if len(targets) == 0: for exp_ids in expressions: assigned_targets.append([]) elif len(targets) == 1: for exp_ids in expressions: assigned_targets.append(targets[0]) else: feat_index_filename = config_manager.get_index_features_exp_tar_filename() feat_index = Cfeature_index() feat_index.load_from_file(feat_index_filename) examples_file = NamedTemporaryFile(delete=False) for exp_ids in expressions: for tar_ids in targets: feats = extract_feats_exp_tar(exp_ids,tar_ids,knaf_obj, use_dependencies=use_dependencies,use_tokens=use_tokens,use_lemmas=use_lemmas) feat_index.encode_example_for_classification(feats, examples_file,my_class='0') examples_file.close() ## In examples_file.name we can find the examples file ## The format in the example file will be: # exp1 --> tar1 # exp1 --> tar2 # exp1 --> tar3 # exp2 --> tar1 # exp2 --> tar2 # exp2 --> tar3 model_file = config_manager.get_filename_model_exp_tar() results = run_svm_classify(examples_file.name, model_file) idx = 0 # This idx will iterate from 0 to num_exp X num_tar selected = [] # will stor for each exp --> (best_tar_idx, best_svm_val) for exp in expressions: #Selecting the best for this exp best_value = -100 best_idx = -100 #print>>sys.stderr,' Exp:', exp for num_tar , tar in enumerate(targets): #This is the probably of exp to be related with the target num_tar value = results[idx] #print>>sys.stderr,' Target:',tar #print>>sys.stderr,' Value:', value #print>>sys.stderr, exp #print>>sys.stderr, tar #print>>sys.stderr, num_tar, value #print #We select the best among the targets for the exp processed if value > best_value: best_value = value best_idx = num_tar idx += 1 selected.append((best_idx,best_value)) #print>>sys.stderr,' Selected:', targets[best_idx] #print selected for best_tar_idx, best_value in selected: assigned_targets.append(targets[best_tar_idx]) #print>>sys.stderr, 'SELECTED',best_tar_idx,targets[best_tar_idx] os.remove(examples_file.name) return assigned_targets