def LModel(eta,batchsize,dSize,relSize, updatewords): trainSize = [50] acti = ['relu','tanh'] evaT = ['sum','max','cause'] layersize =dSize params.frac = 1.0 params.outfile = 'Model_FA'+'_eta_'+str(eta)+'_dSize_'+ str(dSize) + '_batchsize_'+ str(batchsize) + '_relSize_'+ str(relSize) + '_trainSize_'+str(trainSize[0]) + '_updatewords_' + str(updatewords) #params.dataf = '../data/conceptnet/AddModelData/omcs_train_new'+str(trainSize[0])+'.txt' #params.dataf = '../data/conceptnet/AddModelData/causes_omcs.txt' params.dataf = '../data/conceptnet/AddModelData/new_omcs100.txt' params.batchsize = batchsize params.hiddensize = 25 params.type = "MAX" params.save = True params.constraints = False params.embedsize = dSize params.relsize = relSize params.activation = acti[0] params.evaType = evaT[0] params.usepeep = True params.LC = 0.00001 params.Lw = 0.01 params.eta = eta params.margin = 1 params.save= True (words, We) = getWordmap('../data/conceptnet/embeddings/embeddings.skip.newtask.en.d'+str(dSize)+'.m1.w5.s0.it20.txt') #print We.shape rel = getRelation('../data/conceptnet/rel.txt') params.outfile = "../models/"+params.outfile+"_"+str(params.LC)+"_"+str(params.Lw)+".txt" #examples are shuffled data examples = getData(params.dataf) params.data = examples[0:int(params.frac*len(examples))] #print "Using Training Data"+params.dataf #print "Using Word Embeddings with Dimension "+str(dSize[0]) #print "Training on "+str(len(params.data)) #print "Saving models to: "+params.outfile Rel_init = np.zeros((35,params.relsize,params.relsize)) for k in range(35): for i in range(params.relsize): for j in range(params.relsize): if(i==j): Rel_init[k][i][j] = 1+random.uniform(-0.2,0.2) else: Rel_init[k][i][j] = random.uniform(-0.2,0.2) tm = theano_word_model(We, words, layersize, params.embedsize, rel, params.relsize, Rel_init, params.LC, params.Lw, params.eta, params.margin, params.usepeep, updatewords) tm.train( params.data, params)
params.outfile = 'Bilinear_Hinge' + 'trainSize100dSize' + str( sys.argv[1]) + 'relSize' + str(sys.argv[2]) + 'acti' + str(sys.argv[3]) params.dataf = '../commonsendata/Training/new_omcs100.txt' #if you want to save the model, just change this to 'True' params.save = False params.constraints = False params.activation = 'tanh' params.evaType = 'cause' params.usepeep = True params.margin = 1 # (words, We) = getWordmap('../commonsendata/embeddings/tuples/embeddings.skip.newtask.en.d'+str(sys.argv[1])+'.m1.w5.s0.it20.txt') # print We.shape # if downloading data from http://ttic.uchicago.edu/~kgimpel/commonsense.html (words, We) = getWordmap('../commonsendata/embeddings/embeddings.txt') rel = getRelation('../commonsendata/Training/rel.txt') params.outfile = "../models/" + params.outfile + "." + str( params.lam) + "." + str( params.batchsize ) + "." + params.type + "." + params.activation + "." + str( params.frac) + ".txt" #Examples are shuffled data examples = getData(params.dataf) params.data = examples[0:int(params.frac * len(examples))] print "Using Training Data" + params.dataf print "Using Word Embeddings with Dimension " + str(sys.argv[1]) print "Training on " + str(len(params.data)) + " examples using lambda=" + str(
binaryScore = [] Exp_S_sorted = sorted(Exp_S) for j in xrange(len(Exp_S)): temp_thr = Exp_S_sorted[j] for j1 in xrange(int(len(Exp_S) / 2)): if (Exp_S[j1] >= temp_thr): right = right + 1 else: wrong = wrong + 1 for j2 in xrange(int(len(Exp_S) / 2), int(len(Exp_S)), 1): if (Exp_S[j2] <= temp_thr): right = right + 1 else: wrong = wrong + 1 if ((right / (len(Exp_S))) > accurancy): accurancy = (1.0 * right / (len(Exp_S))) threshold = temp_thr right = 0 wrong = 0 #print 'Dev1-Accurancy',accurancy return threshold, accurancy if __name__ == "__main__": (words, We) = getWordmap('../data/conceptnet/embeddings.txt') tm = theano_word_model(We) rel = getRelation('../data/conceptnet/rel.txt') Rel = tm.getRel() evaluate_adagrad(We, words, Rel, rel)