Esempio n. 1
0
def shuffle_examples(labels, sentences, pct_train=0.5):
    """
    Shuffles training examples and splits into trainin and validation sets.

    labels, sentences - File names pointing to data sets.
    pct_train - Pctge of data to keep at training data. The rest 
                becomes validation data.
    """
    
    # first import the training data
    train_labels_ordered = dp.labels_as_ints(labels)
    train_sentences_ordered = dp.import_sentences(sentences)
    Nex = len(train_labels_ordered)
    Ntrain = int(pct_train*Nex + 1)
    
    # split into training and validation sets
    train_labels = []
    train_sentences = []
    validation_labels = []
    validation_sentences = []
    
    np.random.seed(1987)
    examples = np.arange(0,Nex,dtype='int')
    np.random.shuffle(examples)
    train_examples = examples[:Ntrain]
    validation_examples = examples[Ntrain:]
    
    for tex in train_examples:
        train_labels.append(train_labels_ordered[tex])
        train_sentences.append(train_sentences_ordered[tex])
    for vex in validation_examples:
        validation_labels.append(train_labels_ordered[vex])
        validation_sentences.append(train_sentences_ordered[vex])
    
    return train_labels, train_sentences, validation_labels, validation_sentences
Esempio n. 2
0
    dummy = 0

# load weights
if method != 'dummy':
    weights_0p25 = np.load('training_output/'+method+'_w_0p25.npy')
    weights_0p5 = np.load('training_output/'+method+'_w_0p5.npy')
    weights_0p75 = np.load('training_output/'+method+'_w_0p75.npy')
else:
    weights_0p25 = np.load('training_output/collins_w_0p25.npy')
    weights_0p5 = np.load('training_output/collins_w_0p5.npy')
    weights_0p75 = np.load('training_output/collins_w_0p75.npy')
weights = [weights_0p25, weights_0p5, weights_0p75]

# load the test data
test_labels = dp.labels_as_ints('dataset/testLabels.txt')
test_sentences = dp.import_sentences('dataset/testSentences.txt')

# calculate the scores
score = []

for w in weights:
    if scoretype == 'word':
        score.append(sr.score_by_word(w,test_labels,test_sentences,dummy))
    elif scoretype == 'sentence':
        score.append(sr.score_by_sentence(w,test_labels,test_sentences,dummy))
    elif scoretype == 'mark':
        score.append(sr.score_by_mark(w,test_labels,test_sentences,dummy))
    else:
        print 'Not a valid method!\n'
        exit(0)