Ejemplo n.º 1
0
def list_samplers(model, maximize=False):
    node = model.dummy()
    models.align(node, model)
    samplers = get_samplers('dummy', node, maximize)
    node.model.display()
    print
    for s in samplers:
        print s
def fit_model(structure, data_matrix, old_root=None, gibbs_steps=200):
    if old_root is None:
        X = data_matrix.sample_latent_values(np.zeros((data_matrix.m, data_matrix.n)), 1.)
        old_root = GaussianNode(X, 'scalar', 1.)
    root = initialization.initialize(data_matrix, old_root, old_root.structure(), structure, num_iter=gibbs_steps)
    model = models.get_model(structure, fixed_noise_variance=data_matrix.fixed_variance())
    models.align(root, model)
    dumb_samplers.sweep(data_matrix, root, num_iter=gibbs_steps)
    dumb_samplers.sweep(data_matrix, root, maximize=True, num_iter=1)  
    return root
Ejemplo n.º 3
0


if __name__ == '__main__':
    
    ensemble = list()
    
    ## Bow model requires the path to a pre-trained word2vect or GloVe vector space in binary format
    #model = md.bow("/Users/fa/workspace/repos/_codes/MODELS/Rob/word2vec_100_6/vectorsW.bin")
    #ensemble.append(md.bow("/home/ds/STS/GoogleNews-vectors-negative300.bin"))
    
    ## FeatureBased model is standalone and does not need any pre-trained or external resource
    #ensemble.append(md.featureBased())
    
    # Word align model based on Sultan et. al.
    ensemble.append(md.align())
    
    ## Load some data for training (standard SICK dataset)
    trainSet, devSet, testSet = load_data_SICK('../data/SICK/')

    ## Train a classifier using train and development subsets
    classifier = train(ensemble, trainSet, devSet)
    
    ## Test the classifier on test data of the same type (coming from SICK)
    test(ensemble, classifier, testSet).to_csv('../data/local/SICK-trained_SICK-test.csv')

    ## FileName to save the trained classifier for later use
    fileName = '../data/local/SICK-Classifier.h5'
    
    ## VERSION THREE SAVE / LOAD (the only one that works)
    classifier.save(fileName)
Ejemplo n.º 4
0
    print len(allA)
    print len(trainA) + len(devA) + len(testA)
    print len(trainA), len(devA), len(testA)
    return [trainA, trainB, trainS], [devA, devB, devS], [testA, testB, testS]


if __name__ == '__main__':

    df = pd.DataFrame(columns=['bow', 'fb', 'aligner'])
    ensemble = list()
    ## Load some data for training (standard SICK dataset)
    trainSet, devSet, testSet = load_data_SICK('../data/SICK/')

    bowm = md.bow("/home/ds/STS/GoogleNews-vectors-negative300.bin")
    fbm = md.featureBased()
    alignm = md.align()

    ensemble.append(bowm)
    ensemble.append(fbm)
    ensemble.append(alignm)

    classifiers = list()

    ## Train the different models in the ensemble using train and development subsets
    for index, model in enumerate(ensemble):
        classifier, df = train([model], trainSet, devSet, df)
        classifiers.append(classifier)
        print 'writing the final DataFrame'
        df.to_csv('final' + str(index) + '.csv', sep='\t')

    bow = pd.read_csv("final0.csv", sep='\t', engine='python')['bow']