import numpy
import networkx as nx

if __name__ == "__main__":
    # Method variables
    # n (path length) should be either 1 or 2
    # note: tried 3, but didn't give good results, so now code is limited to 1 or 2
    n = 1
    input_file = "test-full-new.txt"
    output_file = "test-local-path-1.txt"

    # Generate graph from edges file
    G = ex.read_graph(c.output_dir + "edges-directed-6.txt")

    # Get feature space and target words
    features_index = u.read_features_file(c.output_dir + "feat-space.txt")
    target_words = u.get_target_words()

    # Get sentences/vectors of data to expand
    sentences, data = ex.get_expansion_data(input_file, features_index)

    # Get matrix of weight vectors
    print "generating weight matrix..."
    W, b_arr = u.get_weight_matrix(target_words)

    # Iterate over instances and expand them
    print "expanding feature vectors..."
    i = 0
    for vect in data:
        #if i == 3:
        #    break
    print "Accuracy:", (
        (pos_correct + neg_correct + neut_correct) / float(total)) * 100
    print "Pos accuracy:", (pos_correct / float(pos_total)) * 100
    print "Neg accuracy:", (neg_correct / float(neg_total)) * 100
    print "Neut accuracy:", (neut_correct / float(neut_total)) * 100
    print ""


if __name__ == "__main__":
    train_data_file = "train-output1.txt"
    #test_data_file = "wordnet-expanded-test.txt"
    #test_data_file = "output-700/expanded-data/test-expand-short-path1.txt"
    test_data_file = "test-output1.txt"

    # Get feature space
    features_index = u.read_features_file("feat-space-sent-prefix.txt")
    feature_size = len(features_index)

    # Get idf values for features
    term_idf = {}
    with open("term-idf.txt") as file:
        for line in file:
            line = line.strip().split()
            term_idf[line[0]] = float(line[1])

    # Get train data
    print "Getting train data..."
    train_data = []
    train_labels = []
    f = 0.2  # factor to weight feature values
    threshold = 0.65
    print "# Results for:", title
    print "Accuracy:", ((pos_correct + neg_correct) / float(total))*100
    print "Pos accuracy:", (pos_correct / float(pos_total))*100
    print "Neg accuracy:", (neg_correct / float(neg_total))*100
    print ""
    
    return incorrect


if __name__ == "__main__":
    train_data_file = "train-6000.txt"
    #test_data_file = "wordnet-expanded-test.txt"
    test_data_file = "output-700/expanded-data/test-threshold.txt"
    
    # Get feature space
    features_index = u.read_features_file("feat-space-sent-prefix.txt")
    feature_size = len(features_index)
    
    # Get idf values for features
    term_idf = {}
    with open("term-idf.txt") as file:
        for line in file:
            line = line.strip().split()
            term_idf[line[0]] = float(line[1])
    
    # Get train data
    print "Getting train data..."
    train_data = []
    train_labels = []
    f = 0.2 # factor to weight feature values
    threshold = 0.65
import numpy
import networkx as nx


if __name__ == "__main__":
    # Method variables
    #predict_threshold = 0.9 # prediction threshold
    
    input_file = "test-full.txt"
    output_file = "test-expanded-all-neighb.txt"
    
    # Generate graph from edges file
    G = ex.read_graph(c.output_dir + "edges-directed-6.txt")
    
    # Get feature space and target words
    features_index = u.read_features_file(c.output_dir + "feat-space.txt")
    target_words = u.get_target_words()
    
    # Get sentences/vectors of data to expand
    sentences, data = ex.get_expansion_data(input_file, features_index)
    
    # Get matrix of weight vectors
    print "generating weight matrix..."
    W, b_arr = u.get_weight_matrix(target_words)
    
    
    print "expanding feature vectors..."
    i = 0
    for vect in data:
        #if i == 5:
        #    break
Beispiel #5
0
            value = i[1]
            #print word, value
            try:
                vect[features_index[word]] = float(value)
            except:
                pass
        l.append(line[0])
        d.append(vect)
    
    return d, l


if __name__ == "__main__":
    # Get feature vectors
    print "generating feature vectors..."
    features_index = u.read_features_file("output-700/feat-space.txt")
    feature_size = len(features_index)
    target_words = u.get_target_words()
    
    # Now let's do SCL
    print "getting targets..."
    targets = u.get_target_words()
    
    # Load target word weight vectors as rows into matrix W
    print "creating W..."
    W, _ = u.get_weight_matrix(targets)
    
    # Perform SVD on W
    print "performing SVD(W)..."
    WT = W.T
    # SVD returns U, S, V.T