コード例 #1
0
ファイル: train.py プロジェクト: AshBT/opinion_miner_deluxe
def train_classifier_relation_exp_hol():
    #Load the human readable training file    
    train_filename = my_config_manager.get_relation_exp_hol_training_filename()
    feature_file_obj = Cfeature_file(train_filename)
    ###########################################

    
    # Convert it into index based feature file, for svm-light
    feature_index = Cfeature_index()
    feat_bin_filename = my_config_manager.get_rel_exp_hol_training_idx_filename()
    fic_out = open(feat_bin_filename,'w')
    feature_index.encode_feature_file_to_svm(feature_file_obj,fic_out)
    fic_out.close()
    ###########################################

    
    ## Save the feature index
    feat_index_filename = my_config_manager.get_index_features_exp_hol_filename()
    feature_index.save_to_file(feat_index_filename)
    #########################
    
    # Train the model
    example_file = my_config_manager.get_rel_exp_hol_training_idx_filename()
    model = my_config_manager.get_filename_model_exp_hol()
    svm_opts = my_config_manager.get_svm_params()
    logging.debug('Training SVMlight classifier for RELATION(expression,holder) in '+model+ '(could take a while)')
    run_svmlight_learn(example_file,model,svm_opts)
コード例 #2
0
def classify(kaf_obj,term_ids,index_filename,model_filename, svm_path):
    index_features = Cfeature_index()
    index_features.load_from_file(index_filename)
    test_file = NamedTemporaryFile(delete=False)
    tokens = get_tokens_for_terms(kaf_obj,term_ids)
    features = extract_features_polarity_classifier_from_tokens(tokens)
    index_features.encode_example_for_classification(features,test_file)
    test_file.close()
    results = run_svm_classify(svm_path, test_file.name,model_filename)
    os.remove(test_file.name)
    if results[0] >= 0:
        return 'positive'
    else:
        return 'negative'
コード例 #3
0
ファイル: train.py プロジェクト: AshBT/opinion_miner_deluxe
def train_polarity_classifier():
    feature_filename = my_config_manager.get_filename_features_polarity_classifier()
    encoded_filename = feature_filename+'.svm_encoded'
    index_filename =  my_config_manager.get_filename_index_polarity_classifier()
    model_filename = my_config_manager.get_filename_model_polarity_classifier()
    
    feature_file_obj = Cfeature_file(feature_filename)
    fd_encoded = open(encoded_filename,'w')
    index_features = Cfeature_index()
    index_features.encode_feature_file_to_svm(feature_file_obj,fd_encoded)
    fd_encoded.close()
    
    index_features.save_to_file(index_filename)
    
    params = '-x 1 -c 0.1'
    run_svmlight_learn(encoded_filename, model_filename, params)
コード例 #4
0
def train_classifier_relation_exp_hol():
    #Load the human readable training file
    train_filename = my_config_manager.get_relation_exp_hol_training_filename()
    feature_file_obj = Cfeature_file(train_filename)
    ###########################################

    # Convert it into index based feature file, for svm-light
    feature_index = Cfeature_index()
    feat_bin_filename = my_config_manager.get_rel_exp_hol_training_idx_filename(
    )
    fic_out = open(feat_bin_filename, 'w')
    feature_index.encode_feature_file_to_svm(feature_file_obj, fic_out)
    fic_out.close()
    ###########################################

    ## Save the feature index
    feat_index_filename = my_config_manager.get_index_features_exp_hol_filename(
    )
    feature_index.save_to_file(feat_index_filename)
    #########################

    # Train the model
    example_file = my_config_manager.get_rel_exp_hol_training_idx_filename()
    model = my_config_manager.get_filename_model_exp_hol()
    svm_opts = my_config_manager.get_svm_params()
    logging.debug(
        'Training SVMlight classifier for RELATION(expression,holder) in ' +
        model + '(could take a while)')
    run_svmlight_learn(example_file, model, svm_opts)
コード例 #5
0
def link_exp_tar_all(expressions,targets, knaf_obj,threshold, use_dependencies=True,use_tokens=True, use_lemmas=True):
    pairs = []

    if len(targets) == 0:
        for exp_ids, exp_type in expressions:
            pairs.append((exp_ids,exp_type,[]))
    else:
        feat_index_filename = config_manager.get_index_features_exp_tar_filename()
        feat_index = Cfeature_index()
        feat_index.load_from_file(feat_index_filename)
        examples_file = NamedTemporaryFile(delete=False)
        T = 0
        for exp_ids, exp_type in expressions:
            #print>>sys.stderr,exp_ids, T, 'of', len(expressions)
            T = T+1
            for tar_ids in targets:
                #print>>sys.stderr,'\t',tar_ids
                feats = extract_feats_exp_tar(exp_ids,tar_ids,knaf_obj, use_dependencies=use_dependencies,use_tokens=use_tokens,use_lemmas=use_lemmas)
                feat_index.encode_example_for_classification(feats, examples_file,my_class='0')
        examples_file.close()
         
        model_file = config_manager.get_filename_model_exp_tar()
        results = run_svm_classify(examples_file.name, model_file)
        os.remove(examples_file.name)
        
        #threshold = -0.75
        idx = 0
        for exp,exp_type in expressions:
            at_least_one = False
            for num_tar, tar in enumerate(targets):
                value = results[idx]
                idx += 1
                if value >= threshold:
                    pairs.append((exp,exp_type,tar))
                    at_least_one = True
            
            if not at_least_one:
                pairs.append((exp,exp_type,[]))
 
    return pairs
コード例 #6
0
def link_exp_hol(expressions,holders, knaf_obj,threshold_hol,use_dependencies=True,use_tokens=True,use_lemmas=True):
    assigned_holders = []  #     (expression_type, exp_ids, 

    if len(holders) == 0:
        for exp_ids in expressions:
            assigned_holders.append([])
    else:
        feat_index_filename = config_manager.get_index_features_exp_hol_filename()
        feat_index = Cfeature_index()
        feat_index.load_from_file(feat_index_filename)
        examples_file = NamedTemporaryFile(delete=False)
        for exp_ids in expressions:
            for hol_ids in holders:
                feats = extract_feats_exp_hol(exp_ids,hol_ids,knaf_obj, use_dependencies=use_dependencies,use_tokens=use_tokens,use_lemmas=use_lemmas)
                feat_index.encode_example_for_classification(feats,examples_file,my_class='0')
        examples_file.close()
        ## In examples_file.name we can find the examples file
        
        ## The format in the example file will be:
        # exp1 --> hol1
        # exp1 --> hol2
        # exp1 --> hol3
        # exp2 --> hol1
        # exp2 --> hol2
        # exp2 --> hol3        
        
        model_file = config_manager.get_filename_model_exp_hol()
        results = run_svm_classify(examples_file.name, model_file)
        
        idx = 0         # This idx will iterate from 0 to num_exp X num_tar
        selected = []   # will stor for each exp --> (best_tar_idx, best_svm_val)
        for exp in expressions:
            #Selecting the best for this exp
            best_value = -1
            best_idx = -1
            for num_hol , hol in enumerate(holders):
                #This is the probably of exp to be related with the target num_tar
                value = results[idx]
                
                #We select the best among the targets for the exp processed
                if value > best_value:
                    best_value = value
                    best_idx = num_hol
                idx += 1
            selected.append((best_idx,best_value))
        #print selected
        
        for best_hol_idx, best_value in selected:
            if best_value >= threshold_hol:
                assigned_holders.append(holders[best_hol_idx])
            else:
                assigned_holders.append([])
        os.remove(examples_file.name)
    return assigned_holders
コード例 #7
0
def link_exp_tar_all(expressions,targets, knaf_obj,threshold, use_dependencies=True,use_tokens=True, use_lemmas=True):
    pairs = []

    if len(targets) == 0:
        for exp_ids, exp_type in expressions:
            pairs.append((exp_ids,exp_type,[]))
    else:
        feat_index_filename = config_manager.get_index_features_exp_tar_filename()
        feat_index = Cfeature_index()
        feat_index.load_from_file(feat_index_filename)
        examples_file = NamedTemporaryFile(delete=False)
        for exp_ids, exp_type in expressions:
            for tar_ids in targets:
                feats = extract_feats_exp_tar(exp_ids,tar_ids,knaf_obj, use_dependencies=use_dependencies,use_tokens=use_tokens,use_lemmas=use_lemmas)
                feat_index.encode_example_for_classification(feats, examples_file,my_class='0')
        examples_file.close()
         
        model_file = config_manager.get_filename_model_exp_tar()
        results = run_svm_classify(examples_file.name, model_file)
        
        
        threshold = -0.75
        idx = 0
        for exp,exp_type in expressions:
            at_least_one = False
            for num_tar, tar in enumerate(targets):
                value = results[idx]
                idx += 1
                if value >= threshold:
                    pairs.append((exp,exp_type,tar))
                    at_least_one = True
            
            if not at_least_one:
                pairs.append((exp,exp_type,[]))
 
        os.remove(examples_file.name)                
    return pairs
コード例 #8
0
def link_exp_tar(expressions,targets, knaf_obj,use_dependencies=True,use_tokens=True, use_lemmas=True):
    assigned_targets = []  #     (expression_type, exp_ids, 

    if len(targets) == 0:
        for exp_ids in expressions:
            assigned_targets.append([])
    elif len(targets) == 1:
        for exp_ids in expressions:
            assigned_targets.append(targets[0])
    else:
        feat_index_filename = config_manager.get_index_features_exp_tar_filename()
        feat_index = Cfeature_index()
        feat_index.load_from_file(feat_index_filename)
        examples_file = NamedTemporaryFile(delete=False)
        for exp_ids in expressions:
            for tar_ids in targets:
                feats = extract_feats_exp_tar(exp_ids,tar_ids,knaf_obj, use_dependencies=use_dependencies,use_tokens=use_tokens,use_lemmas=use_lemmas)
                feat_index.encode_example_for_classification(feats, examples_file,my_class='0')
        examples_file.close()
        ## In examples_file.name we can find the examples file
        
        ## The format in the example file will be:
        # exp1 --> tar1
        # exp1 --> tar2
        # exp1 --> tar3
        # exp2 --> tar1
        # exp2 --> tar2
        # exp2 --> tar3        
        
        model_file = config_manager.get_filename_model_exp_tar()
        results = run_svm_classify(examples_file.name, model_file)
        
        idx = 0         # This idx will iterate from 0 to num_exp X num_tar
        selected = []   # will stor for each exp --> (best_tar_idx, best_svm_val)
        for exp in expressions:
            #Selecting the best for this exp
            best_value = -100
            best_idx = -100
            #print>>sys.stderr,' Exp:', exp
            for num_tar , tar in enumerate(targets):
                
                #This is the probably of exp to be related with the target num_tar
                value = results[idx]
                #print>>sys.stderr,'  Target:',tar
                #print>>sys.stderr,'      Value:', value
                #print>>sys.stderr, exp
                #print>>sys.stderr, tar
                #print>>sys.stderr, num_tar, value
                #print
                
                #We select the best among the targets for the exp processed
                if value > best_value:
                    best_value = value
                    best_idx = num_tar
                idx += 1
            selected.append((best_idx,best_value))
            #print>>sys.stderr,'  Selected:', targets[best_idx]
        #print selected
        
        for best_tar_idx, best_value in selected:
            assigned_targets.append(targets[best_tar_idx])
            #print>>sys.stderr,  'SELECTED',best_tar_idx,targets[best_tar_idx]
        os.remove(examples_file.name)                
    return assigned_targets