def shuffle_examples(labels, sentences, pct_train=0.5): """ Shuffles training examples and splits into trainin and validation sets. labels, sentences - File names pointing to data sets. pct_train - Pctge of data to keep at training data. The rest becomes validation data. """ # first import the training data train_labels_ordered = dp.labels_as_ints(labels) train_sentences_ordered = dp.import_sentences(sentences) Nex = len(train_labels_ordered) Ntrain = int(pct_train*Nex + 1) # split into training and validation sets train_labels = [] train_sentences = [] validation_labels = [] validation_sentences = [] np.random.seed(1987) examples = np.arange(0,Nex,dtype='int') np.random.shuffle(examples) train_examples = examples[:Ntrain] validation_examples = examples[Ntrain:] for tex in train_examples: train_labels.append(train_labels_ordered[tex]) train_sentences.append(train_sentences_ordered[tex]) for vex in validation_examples: validation_labels.append(train_labels_ordered[vex]) validation_sentences.append(train_sentences_ordered[vex]) return train_labels, train_sentences, validation_labels, validation_sentences
else: dummy = 0 # load weights if method != 'dummy': weights_0p25 = np.load('training_output/'+method+'_w_0p25.npy') weights_0p5 = np.load('training_output/'+method+'_w_0p5.npy') weights_0p75 = np.load('training_output/'+method+'_w_0p75.npy') else: weights_0p25 = np.load('training_output/collins_w_0p25.npy') weights_0p5 = np.load('training_output/collins_w_0p5.npy') weights_0p75 = np.load('training_output/collins_w_0p75.npy') weights = [weights_0p25, weights_0p5, weights_0p75] # load the test data test_labels = dp.labels_as_ints('dataset/testLabels.txt') test_sentences = dp.import_sentences('dataset/testSentences.txt') # calculate the scores score = [] for w in weights: if scoretype == 'word': score.append(sr.score_by_word(w,test_labels,test_sentences,dummy)) elif scoretype == 'sentence': score.append(sr.score_by_sentence(w,test_labels,test_sentences,dummy)) elif scoretype == 'mark': score.append(sr.score_by_mark(w,test_labels,test_sentences,dummy)) else: print 'Not a valid method!\n' exit(0)