def main3(): ent = need.load_pickle("new-mentions-sum-test.p") old_keys_m = collections.defaultdict(list) mentions_sum = collections.defaultdict(list) old_keys = need.load_pickle("new-mentions-sum-keys.p") duplicate_keys1 = [] duplicate_keys2 = [] j = 0 for key in ent.keys(): if key not in duplicate_keys1: mentions_sum[j] = ent[key] old_keys_m[j].append(old_keys[key]) key_list = delete_or_not(set(ent[key]), ent, key) for x in key_list: duplicate_keys1.append(x) old_keys_m[j].append(old_keys[x]) j = j + 1 '''for key in ent.keys(): if key not in duplicate_keys2: mentions_sum[j]=ent[key] old_keys_m[j].append(old_keys[key]) key_list1=delete_or_not1(set(ent[key]),ent,key) duplicate_keys2.append(key_list1) # if key_list!=-1: # old_keys_m[j].append(old_keys[key_list]) j=j+1''' need.save_pickle(old_keys_m, "old-keys-m.p") need.save_pickle(mentions_sum, "new_reduced_test.p") print_dict(old_keys_m)
def main(): '''ent=need.load_pickle("mentions1-sum-test.p") test_keys=need.load_pickle("test-keys.p") new_test_keys=collections.defaultdict(list) mentions_sum=collections.defaultdict(list) #mentions_sum={} i=0 j=0 duplicate_keys=[] for key in ent.keys(): if key not in duplicate_keys: #print j,":",key #print key mentions_sum[j]=ent[key] new_test_keys[j].append(test_keys[key]) key_list=delete_or_not(ent[key],ent,key) #print key_list for x in key_list: duplicate_keys.append(x) new_test_keys[j].append(test_keys[x]) #print duplicate_keys j=j+1''' '''for key in ent.keys(): if key.find("test")!=-1 and key.find("train")==-1: mentions_sum[i]=find_clusters(key,ent) #print i,":",key.replace("test","") i=i+1''' '''need.save_pickle(mentions_sum,"reduced1-sum-test.p") print_dict(mentions_sum) need.save_pickle(new_test_keys,"new-keys.p")''' #print_dict(new_test_keys) #print_dict(need.load_pickle("entities-overlap.p"))''' ent = need.load_pickle("new-entities-overlap.p") mentions_sum_train = collections.defaultdict(list) mentions_sum = {} i = 0 #for key in ent.keys(): # if key.find("test")!=-1 and key.find("train")==-1: #print key # ind=len(key)-4 #print ind #mentions_sum[i]=key.replace("test","",ind) # mentions_sum[i]=''.join(key.rsplit('test', 1)) #print i,": ",mentions_sum[i] # i=i+1 for key in ent.keys(): if key.find("test") != -1: mentions_sum_train[i] = find_clusters(key, ent) mentions_sum[i] = ''.join(key.rsplit('test', 1)) i = i + 1 #unique_mentions=set() #for key in ent.keys(): # if key.find("train")!=-1: # unique_mentions.add(key.replace("train","")) # for val in ent[key]: # unique_mentions.add(val) #print_dict(mentions_sum) need.save_pickle(mentions_sum, "new-mentions-sum-keys.p")
def main(): #mentions=np.load("../preprocess/short_list1.npy") #mentions=mentions[0:210] f = open("test-ch.txt", "r") entities = collections.defaultdict(list) mentions = [] for line in f: line = line.strip() line = line.split() mention = line[0] for i in range(1, len(line) - 1): mention = mention + " " + line[i] mentions.append(mention) total_y = 0 cou_ov = 0 cou_trov = 0 #entities=collections.defaultdict(list) entity_ov = collections.defaultdict(list) for i in range(0, len(mentions)): print i print mentions[i] train_file = path4 + mentions[i] + "-train-y.dat" test_file = path3 + mentions[i] + "-test-data.dat" #X_train,y_train,X_test,y_test=load_data(whole_file,test_file,mentions[i]) '''try: y_train=need.load_pickle(train_file) X_test,y_test=load_svmlight_file(test_file) print "loading done..." new_y=sort_prior(mentions[i].lower(),y_train) entities[mentions[i]+'train'] = new_y entities[mentions[i]+'test'] = y_test except: entities[mentions[i]+'train'] = [] entities[mentions[i]+'test'] = []''' #print "Training set: ", new_y #print "Testing set: ", Set(y_test) other_mentions = np.delete(mentions, i) ovcount, trovcount = entity_overlap(other_mentions, mentions[i], entity_ov) if ovcount > 0: cou_ov = cou_ov + 1 print "ovcount is greater than zero: ", mentions[i] if trovcount > 0: cou_trov = cou_trov + 1 print "trovcount is greater than zero: ", mentions[i] print len(mentions) print "cou_ov :", cou_ov print "cou_trov :", cou_trov need.save_pickle(entity_ov, "new-entities-overlap.p")
def main(): mentions = np.load("short_list1.npy") for i in range(int(sys.argv[1]), int(sys.argv[2])): print i print mentions[i] whole_file = path2 + "whole-data-" + mentions[i] + ".dat" test_file = path3 + mentions[i] + "-test-data.dat" start_time = time.time() X_train, y_train, X_test, y_test = load_data(whole_file, test_file, mentions[i]) need.save_pickle(X_train, path4 + mentions[i] + "-train-x.dat") need.save_pickle(y_train, path4 + mentions[i] + "-train-y.dat") need.save_pickle(X_test, path4 + mentions[i] + "-test-x.dat") #X_train,y_train=load_svmlight_file(path4+mentions[i]+"-train-x-1.dat") need.save_pickle(y_test, path4 + mentions[i] + "-test-y.dat") #X_test,y_test=load_svmlight_file(path4+mentions[i]+"-test-x-1.dat") print X_train.shape print time.time() - start_time
def main1(): sel_mention = need.load_pickle("reduced1-sum-test.p") test_keys = need.load_pickle("test-keys.p") agg_keys = collections.defaultdict(list) train_entities = collections.defaultdict(list) test_entities = collections.defaultdict(list) for key in sel_mention.keys(): agg_keys[test_keys[key]] = sel_mention[key] need.save_pickle(agg_keys, "agg-keys.p") for key in sel_mention.keys(): mentions = sel_mention[key] for x in mentions: try: y_train = need.load_pickle(path2 + x + "-train-y.dat") new_y = sort_prior(x, y_train) y_test = need.load_pickle(path2 + x + "-test-y.dat") test = set(y_test) for y in new_y: #print int(y) y = int(y) ins = map_ind_name(y) if ins not in train_entities[key]: train_entities[key].append(ins) for z in test: z = int(z) ins = map_ind_name(z) if ins not in test_entities[key]: test_entities[key].append(ins) except: print "error on, ", x #print train_entities[key] print "Train entities" #print_dict(train_entities) need.save_pickle(train_entities, "train-entities.p") print "Test entities" print_dict(test_entities) need.save_pickle(test_entities, "test-entities.p")
def main(): sel_mention = mention_list() mentions = sel_mention[str(sys.argv[1])] total_y = 0 lens = [] for i in range(0, len(mentions)): print i whole_file = path2 + "whole-data-" + mentions[i] + ".dat" test_file = path3 + mentions[i] + "-test-data.dat" X_train = need.load_pickle(path4 + mentions[i] + "-train-x.dat") y_train = need.load_pickle(path4 + mentions[i] + "-train-y.dat") print "loading done..." new_y = sort_prior(mentions[i].lower(), y_train) total_y = total_y + len(new_y) #print y_train.shape[0],y_test.shape[0] if i == 0: X_train1, y_train1 = select_data_point(X_train, y_train, new_y) print X_train1.shape lens.append(X_train1.shape[0]) if i >= 1: X_train2, y_train2 = select_data_point(X_train, y_train, new_y) print X_train2.shape X_train1, y_train1 = aggregate(X_train1, X_train2, y_train1, y_train2) lens.append(X_train2.shape[0]) max_col = len(Set(y_train1)) print max_col y, classes = make_y(y_train1, max_col) need.save_pickle(classes, "classes/" + "ind_classes_" + str(sys.argv[1]) + ".p") #classes=need.load_pickle("ind_classes_1.p") print "preprocessing done..." x = X_train1 for a in range(0, int(sys.argv[2])): print a one_time = time.time() count = 0 ind = 0 for i in range(0, len(mentions)): print mentions[i] whole_file = path2 + "whole-data-" + mentions[i] + ".dat" test_file = path3 + mentions[i] + "-test-data.dat" y_train = need.load_pickle(path4 + mentions[i] + "-train-y.dat") new_y = sort_prior(mentions[i].lower(), y_train) print "loading done..." ind_list = [] for cl in new_y: if classes[cl] not in ind_list: ind_list.append(classes[cl]) mini_batch_size = 1000 start = 0 for j in range(0, int(lens[ind] / mini_batch_size) + 1): start_time = time.time() if sum(lens[0:ind + 1]) - count < mini_batch_size: batch_x, batch_y = x[count:count + (sum(lens[0:ind + 1]) - count )], y[count:count + (sum(lens[0:ind + 1]) - count)] count = sum(lens[0:ind + 1]) else: batch_x, batch_y = x[count:count + mini_batch_size], y[count:count + mini_batch_size] count = count + mini_batch_size col_list = unique_col(batch_x) model.cost = model.train(batch_x[:, col_list], batch_y[:, ind_list], col_list, ind_list) ind = ind + 1 print "Iter time: ", time.time() - one_time for i in range(0, len(mentions)): #print i print mentions[i] whole_file = path2 + "whole-data-" + mentions[i] + ".dat" test_file = path3 + mentions[i] + "-test-data.dat" #X_train,y_train,x_t,y_temp=load_data(whole_file,test_file,mentions[i]) y_train = need.load_pickle(path4 + mentions[i] + "-train-y.dat") x_t = need.load_pickle(path4 + mentions[i] + "-test-x.dat") #print x_t.shape y_temp = need.load_pickle(path4 + mentions[i] + "-test-y.dat") ys = Set(y_train).union(Set(y_temp)) new_y = sort_prior(mentions[i].lower(), y_train) ind_list = [] fw = open("multitask-results-full-1/" + mentions[i] + ".txt", "w") for x in new_y: ind_list.append(classes[x]) #indices=need.load_pickle("ind-classes.p") #print len(ind_list) if len(ys) > 1: y_t = make_y_test(y_temp, max_col, classes) mask = np.zeros((y_t.shape)) for j in range(0, y_t.shape[0]): for k in range(0, len(ind_list)): mask[j, ind_list[k]] = 1 col_list = unique_col(x_t) prediction1 = model.predict(x_t[:, col_list], col_list, ind_list) prediction = [] for j in range(0, len(prediction1)): prediction.append(ind_list[prediction1[j]]) print x_t.shape, len(ys) print np.mean(np.argmax(y_t, axis=1) == prediction) test = (np.argmax(y_t, axis=1) == prediction) miss = [] for j in range(0, len(test)): if test[j] == False: miss.append(j) for j in range(0, len(test)): if j in miss: fw.write("prediction: " + str(prediction[j]) + ' ') fw.write("actual: " + str(np.argmax(y_t, axis=1)[j]) + '\n') else: print x_t.shape, len(ys) print "1.0"
def main(): sel_mention = need.load_pickle("new_reduced_test.p") mentions = sel_mention[int(sys.argv[1])] #mentions.remove('British-based') total_y = 0 lens = [] i = 0 for x in mentions: print x whole_file = path2 + "whole-data-" + x + ".dat" test_file = path3 + x + "-test-data.dat" #X_train,y_train,X_test,y_test=load_data(whole_file,test_file,mentions[i]) try: X_train = need.load_pickle(path4 + x + "-train-x.dat") y_train = need.load_pickle(path4 + x + "-train-y.dat") except: exit(1) print "loading done..." new_y = sort_prior(x.lower(), y_train) total_y = total_y + len(new_y) #print y_train.shape[0],y_test.shape[0] if i == 0: X_train1, y_train1 = select_data_point(X_train, y_train, new_y) print X_train1.shape lens.append(X_train1.shape[0]) if i >= 1: X_train2, y_train2 = select_data_point(X_train, y_train, new_y) print X_train2.shape X_train1, y_train1 = aggregate(X_train1, X_train2, y_train1, y_train2) lens.append(X_train2.shape[0]) i = i + 1 #need.save_pickle(X_train1,path2+"short_data-x.dat") #need.save_pickle(y_train1,path2+"short_data-y.dat") #x=need.load_pickle(path2+"short_data-x.dat") #full_y=need.load_pickle(path2+"short_data-y.dat") max_col = len(set(y_train1)) print max_col y, classes = make_y(y_train1, max_col) need.save_pickle(classes, "classes/" + "ind_classes_" + str(sys.argv[1]) + ".p") #classes=need.load_pickle("ind_classes_1.p") print "preprocessing done..." x = X_train1 prev_norm = 10000 prev_cost = 10000 stop = int(sys.argv[2]) flag = 0 for a in range(0, stop + 30): print a one_time = time.time() count = 0 ind = 0 #lr=0.5 lr = 1 / float(np.sqrt(a) + 1) tot_cost = 0 full_gradient = np.zeros((x.shape[1], max_col)) for m in mentions: print m #X_train=need.load_pickle(path4+mentions[i]+"-train-x.dat") #y_train=need.load_pickle(path4+mentions[i]+"-train-y.dat") whole_file = path2 + "whole-data-" + m + ".dat" test_file = path3 + m + "-test-data.dat" #X_train,y_train,X_test,y_test=load_data(whole_file,test_file,mentions[i]) #X_train=need.load_pickle(path4+mentions[i]+"-train-x.dat") y_train = need.load_pickle(path4 + m + "-train-y.dat") if len(set(y_train)) > 1: new_y = sort_prior(m.lower(), y_train) #print y_train.shape[0],y_test.shape[0] print "loading done..." #x,y_temp=select_data_point(X_train,y_train,new_y) #print x.shape #y = make_y_test(y_temp,57,classes) ind_list = [] for cl in new_y: if classes[cl] not in ind_list: ind_list.append(classes[cl]) mini_batch_size = 1000 start = 0 #full_x=x[count:count+lens[ind]] #full_y=y[count:count+lens[ind]] #print full_x.shape #print count for j in range(0, int(lens[ind] / mini_batch_size) + 1): start_time = time.time() if sum(lens[0:ind + 1]) - count < mini_batch_size: batch_x, batch_y = x[count:count + ( sum(lens[0:ind + 1]) - count)], y[count:count + (sum(lens[0:ind + 1]) - count)] count = sum(lens[0:ind + 1]) else: batch_x, batch_y = x[count:count + mini_batch_size], y[ count:count + mini_batch_size] count = count + mini_batch_size mask = np.zeros((batch_y.shape)) for k in range(0, batch_y.shape[0]): for l in range(0, len(ind_list)): mask[k, ind_list[l]] = 1 col_list = unique_col(batch_x) #print col_list #print len(col_list) #print batch_x[:,col_list].shape model.cost = model.train(batch_x[:, col_list], batch_y[:, ind_list], col_list, ind_list, lr) #full_cost = model.cost_print(full_x,full_y[:,ind_list],ind_list) #print model.cost #gradient=model.gradient_print(batch_x[:,col_list],batch_y[:,ind_list],col_list,ind_list) #for k in range(0,len(col_list)): # for l in range(0,len(ind_list)): # full_gradient[col_list[k],ind_list[l]]=full_gradient[col_list[k],ind_list[l]]+gradient[k,l] #tot_norm=tot_norm+np tot_cost = tot_cost + model.cost #print time.time()-start_time else: count = count + lens[ind] ind = ind + 1 if a == (stop + 1): weights = model.weight_val() np.save("weights/" + str(sys.argv[1]) + ".npy", weights) if a > (stop + 1): w_o = np.load("weights/" + str(sys.argv[1]) + ".npy") weights = model.weight_val() #print weights.shape w_n = (w_o + weights) / 2 #print w_n.shape np.save("weights/" + str(sys.argv[1]) + ".npy", w_n) if a - (stop + 1) == 31: break #gradient_norm=LA.norm(full_gradient) print tot_cost print "Iter time: ", time.time() - one_time fw1 = open( "results-news/" + str(sys.argv[1]) + "-" + str(sys.argv[3]) + ".txt", "w") for x in mentions: #print i print x whole_file = path2 + "whole-data-" + x + ".dat" test_file = path3 + x + "-test-data.dat" #X_train,y_train,x_t,y_temp=load_data(whole_file,test_file,mentions[i]) y_train = need.load_pickle(path4 + x + "-train-y.dat") x_t = need.load_pickle(path4 + x + "-test-x.dat") #print x_t.shape y_temp = need.load_pickle(path4 + x + "-test-y.dat") ys = set(y_train).union(set(y_temp)) new_y = sort_prior(x.lower(), y_train) ind_list = [] fw = open( "multitask-results-full-testb/" + x + "-" + str(sys.argv[1]) + "-" + str(sys.argv[3]) + ".txt", "w") for y in new_y: #print y ind_list.append(classes[y]) #print ind_list w_o = np.load("weights/" + str(sys.argv[1]) + ".npy") #print w_o.shape #indices=need.load_pickle("ind-classes.p") #print len(ind_list) if len(ys) > 1: y_t = make_y_test(y_temp, max_col, classes) mask = np.zeros((y_t.shape)) for j in range(0, y_t.shape[0]): for k in range(0, len(ind_list)): mask[j, ind_list[k]] = 1 col_list = unique_col(x_t) prediction1 = model.predict(x_t[:, col_list], w_o, col_list, ind_list) #print prediction1 prediction = [] for j in range(0, len(prediction1)): prediction.append(ind_list[prediction1[j]]) print x_t.shape, len(ys) acc = np.mean(np.argmax(y_t, axis=1) == prediction) print acc fw1.write(x + " " + str(acc) + " " + str(x_t.shape[0]) + '\n') test = (np.argmax(y_t, axis=1) == prediction) miss = [] for j in range(0, len(test)): if test[j] == False: miss.append(j) for j in range(0, len(test)): if j in miss: fw.write("prediction: " + str(prediction[j]) + ' ') fw.write("actual: " + str(np.argmax(y_t, axis=1)[j]) + '\n') else: fw1.write(x + " " + str(1.0) + " " + str(len(ys)) + '\n') print x_t.shape, len(ys) print "1.0" os.remove("weights/" + str(sys.argv[1]) + ".npy")
def main(): sel_mention = need.load_pickle( "reduced-sum-test.p" ) # reduced-test-sum.p is the dictionary where key is cluster number and value is the list of mentions in the cluster cluster_num = int(sys.argv[1]) iteration_num = int(sys.argv[2]) it_weight_save = int(sys.argv[3]) mentions = sel_mention[cluster_num] total_y = 0 lens = [] sizes = need.load_pickle( "reduced-sizes.p" ) # reduce-sizes.p is the dictionary where key is cluster number and value is the number of total training examples in the cluster #print sizes #Xf_train=lil_matrix((51879,600000)) Xf_train = lil_matrix( (sizes[str(sys.argv[1]) + "train"], 1000000 * len(mentions))) Xf_test = lil_matrix( (sizes[str(sys.argv[1]) + "test"], 1000000 * len(mentions))) #Xf_test=lil_matrix((371,600000)) print Xf_train.shape st = 0 i = 0 for x in mentions: whole_file = path2 + "whole-data-" + x + ".dat" test_file = path3 + x + "-test-data.dat" #X_train,y_train,X_test,y_test=load_data(whole_file,test_file,mentions[i]) X_train = need.load_pickle(path4 + x + "-train-x.dat") y_train = need.load_pickle(path4 + x + "-train-y.dat") print "loading done..." new_y = sort_prior(x.lower(), y_train) print new_y total_y = total_y + len(new_y) #print y_train.shape[0],y_test.shape[0] if i == 0: X_train1, y_train1 = select_data_point(X_train, y_train, new_y) Xf_train = need.make_features( X_train1, Xf_train, i + 1, X_train1.shape[1], st ) # creating a mention specific dataset so that weight matrices learnt in the first layer are not shared by the mentions st = st + X_train1.shape[0] print X_train1.shape lens.append(X_train1.shape[0]) if i >= 1: X_train2, y_train2 = select_data_point(X_train, y_train, new_y) Xf_train = need.make_features( X_train2, Xf_train, i + 1, X_train2.shape[1], st ) # creating a mention specific dataset so that weight matrices learnt in the first layer are not shared by the mentions y_train1 = aggregate_y(y_train1, y_train2) st = st + X_train2.shape[0] print X_train2.shape lens.append(X_train2.shape[0]) i = i + 1 #print st #need.save_pickle(X_train1,path2+"short_data-x.dat") #need.save_pickle(y_train1,path2+"short_data-y.dat") #x=need.load_pickle(path2+"short_data-x.dat") #full_y=need.load_pickle(path2+"short_data-y.dat") max_col = len(Set(y_train1)) print max_col y, classes = make_y(y_train1, max_col) need.save_pickle(classes, "men-classes/" + "ind_classes_" + str(sys.argv[1]) + ".p") #classes=need.load_pickle("ind_classes_1.p") print "preprocessing done..." need.save_pickle(Xf_train, "mention-specific-data/" + sys.argv[1] + ".dat") #Xf_train=need.load_pickle("mention-specific-data/"+sys.argv[1]+".dat") x = Xf_train.tocsr() for a in range(0, iteration_num): print a one_time = time.time() ind = 0 lr = 1 / float(np.sqrt(a) + 1) #lr=1 tot_cost = 0 count = 0 for m in mentions: print m #X_train=need.load_pickle(path4+mentions[i]+"-train-x.dat") #y_train=need.load_pickle(path4+mentions[i]+"-train-y.dat") whole_file = path2 + "whole-data-" + m + ".dat" test_file = path3 + m + "-test-data.dat" #X_train,y_train,X_test,y_test=load_data(whole_file,test_file,mentions[i]) #X_train=need.load_pickle(path4+mentions[i]+"-train-x.dat") y_train = need.load_pickle(path4 + m + "-train-y.dat") if len(set(y_train)) > 1: new_y = sort_prior(m.lower(), y_train) #print y_train.shape[0],y_test.shape[0] print "loading done..." #x,y_temp=select_data_point(X_train,y_train,new_y) #print x.shape #y = make_y_test(y_temp,57,classes) ind_list = [] for cl in new_y: if classes[cl] not in ind_list: ind_list.append(classes[cl]) mini_batch_size = 1000 start = 0 #full_x=x[count:count+lens[ind]] #full_y=y[count:count+lens[ind]] #print full_x.shape #print count for j in range(0, int(lens[ind] / mini_batch_size) + 1): if sum(lens[0:ind + 1]) - count < mini_batch_size: batch_x, batch_y = x[count:count + ( sum(lens[0:ind + 1]) - count)], y[count:count + (sum(lens[0:ind + 1]) - count)] count = sum(lens[0:ind + 1]) else: batch_x, batch_y = x[count:count + mini_batch_size], y[ count:count + mini_batch_size] count = count + mini_batch_size '''mask=np.zeros((batch_y.shape)) for k in range(0,batch_y.shape[0]): for l in range(0,len(ind_list)): mask[k,ind_list[l]]=1''' col_list = unique_col(batch_x) #print col_list #print len(col_list) #print batch_x[:,col_list].shape start_time = time.time() print batch_x.shape model.cost = model.train( batch_x[:, col_list], batch_y[:, ind_list], col_list, ind_list, lr ) # col_list is for selecting a subset of features that has nonzero entries in batch_x matrix and ind_list is for maintain the candidacy of each mention tot_cost = tot_cost + model.cost #print time.time()-start_time else: count = count + lens[ind] ind = ind + 1 if a == it_weight_save: weights1, weights2 = model.weight_val() np.savez("weights/" + str(sys.argv[1]) + ".npz", weights1, weights2) if a > it_weight_save: w_o = np.load("weights/" + str(sys.argv[1]) + ".npz") weights1, weights2 = model.weight_val() w_n1 = (w_o['arr_0'] + weights1) / 2 w_n2 = (w_o['arr_1'] + weights2) / 2 np.savez("weights/" + str(sys.argv[1]) + ".npz", w_n1, w_n2) print tot_cost print "Iter time: ", time.time() - one_time st = 0 i = 0 for m in mentions: x_test = need.load_pickle(path4 + m + "-test-x.dat") Xf_test = need.make_features(x_test, Xf_test, i + 1, x_test.shape[1], st) st = st + x_test.shape[0] i = i + 1 cut = 0 for m in mentions: #print i print m whole_file = path2 + "whole-data-" + m + ".dat" test_file = path3 + m + "-test-data.dat" #X_train,y_train,x_t,y_temp=load_data(whole_file,test_file,mentions[i]) y_train = need.load_pickle(path4 + m + "-train-y.dat") #print x_t.shape y_temp = need.load_pickle(path4 + m + "-test-y.dat") x_t = Xf_test[cut:cut + len(y_temp)] cut = cut + len(y_temp) ys = Set(y_train).union(Set(y_temp)) new_y = sort_prior(m.lower(), y_train) ind_list = [] fw = open("multitask-results-full-men/" + m + ".txt", "w") w_o = np.load("weights/" + str(sys.argv[1]) + ".npz") for x in new_y: ind_list.append(classes[x]) #indices=need.load_pickle("ind-classes.p") #print len(ind_list) if len(ys) > 1: y_t = make_y_test(y_temp, max_col, classes) mask = np.zeros((y_t.shape)) for j in range(0, y_t.shape[0]): for k in range(0, len(ind_list)): mask[j, ind_list[k]] = 1 col_list = unique_col(x_t) prediction1 = model.predict(x_t[:, col_list], w_o['arr_0'], w_o['arr_1'], col_list, ind_list) prediction = [] for j in range(0, len(prediction1)): prediction.append(ind_list[prediction1[j]]) print x_t.shape, len(ys) print np.mean(np.argmax(y_t, axis=1) == prediction) test = (np.argmax(y_t, axis=1) == prediction) miss = [] for j in range(0, len(test)): if test[j] == False: miss.append(j) for j in range(0, len(test)): fw.write("prediction: " + str(prediction[j]) + ' ') fw.write("actual: " + str(np.argmax(y_t, axis=1)[j]) + '\n') else: print x_t.shape, len(ys) print "1.0"