def list_samplers(model, maximize=False): node = model.dummy() models.align(node, model) samplers = get_samplers('dummy', node, maximize) node.model.display() print for s in samplers: print s
def fit_model(structure, data_matrix, old_root=None, gibbs_steps=200): if old_root is None: X = data_matrix.sample_latent_values(np.zeros((data_matrix.m, data_matrix.n)), 1.) old_root = GaussianNode(X, 'scalar', 1.) root = initialization.initialize(data_matrix, old_root, old_root.structure(), structure, num_iter=gibbs_steps) model = models.get_model(structure, fixed_noise_variance=data_matrix.fixed_variance()) models.align(root, model) dumb_samplers.sweep(data_matrix, root, num_iter=gibbs_steps) dumb_samplers.sweep(data_matrix, root, maximize=True, num_iter=1) return root
if __name__ == '__main__': ensemble = list() ## Bow model requires the path to a pre-trained word2vect or GloVe vector space in binary format #model = md.bow("/Users/fa/workspace/repos/_codes/MODELS/Rob/word2vec_100_6/vectorsW.bin") #ensemble.append(md.bow("/home/ds/STS/GoogleNews-vectors-negative300.bin")) ## FeatureBased model is standalone and does not need any pre-trained or external resource #ensemble.append(md.featureBased()) # Word align model based on Sultan et. al. ensemble.append(md.align()) ## Load some data for training (standard SICK dataset) trainSet, devSet, testSet = load_data_SICK('../data/SICK/') ## Train a classifier using train and development subsets classifier = train(ensemble, trainSet, devSet) ## Test the classifier on test data of the same type (coming from SICK) test(ensemble, classifier, testSet).to_csv('../data/local/SICK-trained_SICK-test.csv') ## FileName to save the trained classifier for later use fileName = '../data/local/SICK-Classifier.h5' ## VERSION THREE SAVE / LOAD (the only one that works) classifier.save(fileName)
print len(allA) print len(trainA) + len(devA) + len(testA) print len(trainA), len(devA), len(testA) return [trainA, trainB, trainS], [devA, devB, devS], [testA, testB, testS] if __name__ == '__main__': df = pd.DataFrame(columns=['bow', 'fb', 'aligner']) ensemble = list() ## Load some data for training (standard SICK dataset) trainSet, devSet, testSet = load_data_SICK('../data/SICK/') bowm = md.bow("/home/ds/STS/GoogleNews-vectors-negative300.bin") fbm = md.featureBased() alignm = md.align() ensemble.append(bowm) ensemble.append(fbm) ensemble.append(alignm) classifiers = list() ## Train the different models in the ensemble using train and development subsets for index, model in enumerate(ensemble): classifier, df = train([model], trainSet, devSet, df) classifiers.append(classifier) print 'writing the final DataFrame' df.to_csv('final' + str(index) + '.csv', sep='\t') bow = pd.read_csv("final0.csv", sep='\t', engine='python')['bow']