Exemple #1
0
def main3():
    ent = need.load_pickle("new-mentions-sum-test.p")
    old_keys_m = collections.defaultdict(list)
    mentions_sum = collections.defaultdict(list)
    old_keys = need.load_pickle("new-mentions-sum-keys.p")

    duplicate_keys1 = []
    duplicate_keys2 = []
    j = 0
    for key in ent.keys():
        if key not in duplicate_keys1:
            mentions_sum[j] = ent[key]
            old_keys_m[j].append(old_keys[key])
            key_list = delete_or_not(set(ent[key]), ent, key)
            for x in key_list:
                duplicate_keys1.append(x)
                old_keys_m[j].append(old_keys[x])
            j = j + 1
    '''for key in ent.keys():
		if key not in duplicate_keys2:
			mentions_sum[j]=ent[key]
			old_keys_m[j].append(old_keys[key])
			key_list1=delete_or_not1(set(ent[key]),ent,key)
			duplicate_keys2.append(key_list1)
	#		if key_list!=-1:
	#			old_keys_m[j].append(old_keys[key_list])
			j=j+1'''
    need.save_pickle(old_keys_m, "old-keys-m.p")
    need.save_pickle(mentions_sum, "new_reduced_test.p")
    print_dict(old_keys_m)
Exemple #2
0
def main2():
    agg_keys = need.load_pickle("new-keys.p")
    cluster = need.load_pickle("reduced1-sum-test.p")
    train_entities = need.load_pickle("train-entities.p")
    #test_entities=need.load_pickle("test-entities.p")
    classes = need.load_pickle("classes.p")
    with open('tripartite-new.csv', 'w') as csvfile:
        fieldnames = ['cluster-id', 'cluster-mentions', 'entity']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        i = 0
        flag = 0
        for key in agg_keys.keys():
            print i
            sort_cl = sorted(cluster[key])
            for x in sort_cl:
                try:
                    y_train = need.load_pickle(path2 + x + "-train-y.dat")
                    new_y = sort_prior(x, y_train)
                    for y in train_entities[key]:
                        if float(classes[y]) in new_y:
                            writer.writerow({
                                'cluster-id': 'C' + str(i),
                                'cluster-mentions': x,
                                'entity': y
                            })
                except:
                    print "error on: ", key
                    flag = 1
            if flag == 0:
                i = i + 1
            else:
                flag = 0
Exemple #3
0
def main():
    '''ent=need.load_pickle("mentions1-sum-test.p")
	test_keys=need.load_pickle("test-keys.p")
	new_test_keys=collections.defaultdict(list)
	mentions_sum=collections.defaultdict(list)
	#mentions_sum={}
	i=0
	j=0	
	duplicate_keys=[]
	for key in ent.keys():
		if key not in duplicate_keys:
			#print j,":",key
			#print key
			mentions_sum[j]=ent[key]
			new_test_keys[j].append(test_keys[key])
			key_list=delete_or_not(ent[key],ent,key)
			#print key_list
			for x in key_list:
				duplicate_keys.append(x)
				new_test_keys[j].append(test_keys[x])
			#print duplicate_keys
			j=j+1'''
    '''for key in ent.keys():
		if key.find("test")!=-1 and key.find("train")==-1:
			mentions_sum[i]=find_clusters(key,ent)
			#print i,":",key.replace("test","")
			i=i+1'''
    '''need.save_pickle(mentions_sum,"reduced1-sum-test.p")
	print_dict(mentions_sum)
	need.save_pickle(new_test_keys,"new-keys.p")'''
    #print_dict(new_test_keys)

    #print_dict(need.load_pickle("entities-overlap.p"))'''
    ent = need.load_pickle("new-entities-overlap.p")
    mentions_sum_train = collections.defaultdict(list)
    mentions_sum = {}
    i = 0
    #for key in ent.keys():
    #	if key.find("test")!=-1 and key.find("train")==-1:
    #print key
    #		ind=len(key)-4
    #print ind
    #mentions_sum[i]=key.replace("test","",ind)
    #		mentions_sum[i]=''.join(key.rsplit('test', 1))
    #print i,": ",mentions_sum[i]
    #		i=i+1
    for key in ent.keys():
        if key.find("test") != -1:
            mentions_sum_train[i] = find_clusters(key, ent)
            mentions_sum[i] = ''.join(key.rsplit('test', 1))
            i = i + 1

    #unique_mentions=set()
    #for key in ent.keys():
    #	if key.find("train")!=-1:
    #		unique_mentions.add(key.replace("train",""))
    #		for val in ent[key]:
    #			unique_mentions.add(val)
    #print_dict(mentions_sum)
    need.save_pickle(mentions_sum, "new-mentions-sum-keys.p")
Exemple #4
0
def main1():
    sel_mention = need.load_pickle("reduced1-sum-test.p")
    test_keys = need.load_pickle("test-keys.p")
    agg_keys = collections.defaultdict(list)
    train_entities = collections.defaultdict(list)
    test_entities = collections.defaultdict(list)
    for key in sel_mention.keys():
        agg_keys[test_keys[key]] = sel_mention[key]
    need.save_pickle(agg_keys, "agg-keys.p")
    for key in sel_mention.keys():
        mentions = sel_mention[key]
        for x in mentions:
            try:
                y_train = need.load_pickle(path2 + x + "-train-y.dat")
                new_y = sort_prior(x, y_train)
                y_test = need.load_pickle(path2 + x + "-test-y.dat")
                test = set(y_test)
                for y in new_y:
                    #print int(y)
                    y = int(y)
                    ins = map_ind_name(y)
                    if ins not in train_entities[key]:
                        train_entities[key].append(ins)
                for z in test:
                    z = int(z)
                    ins = map_ind_name(z)
                    if ins not in test_entities[key]:
                        test_entities[key].append(ins)

            except:
                print "error on, ", x
        #print train_entities[key]
    print "Train entities"
    #print_dict(train_entities)
    need.save_pickle(train_entities, "train-entities.p")
    print "Test entities"
    print_dict(test_entities)
    need.save_pickle(test_entities, "test-entities.p")
Exemple #5
0
def sort_prior(mention, y):
    prior_dict = need.load_pickle(path1 + mention.lower() + "-pind.p")
    #print prior_dict.keys()[0]
    ys = set(y)
    #print len(ys)
    new_y = []
    sorted_prior_dict = sorted(prior_dict.items(),
                               key=operator.itemgetter(1),
                               reverse=True)
    if len(ys) <= 30:
        #print mention
        return ys
    i = 0
    while len(new_y) < 30:
        if int(sorted_prior_dict[i][0]) in ys:
            new_y.append(int(sorted_prior_dict[i][0]))
        i = i + 1
    return new_y
Exemple #6
0
def entity_overlap(other_mentions, mention, entity_ov):
    entities = need.load_pickle("entities.p")
    test_count = 0
    train_count = 0
    mention_overlap = []
    train_mention_overlap = []
    for i in range(0, len(other_mentions)):
        test_overlap = Set(entities[mention + 'test']).intersection(
            Set(entities[other_mentions[i] + 'train']))
        train_overlap = Set(entities[mention + 'train']).intersection(
            Set(entities[other_mentions[i] + 'train']))
        if len(test_overlap) > 0:
            test_count = test_count + 1
            entity_ov[mention + 'test'].append(other_mentions[i])
            mention_overlap.append(other_mentions[i])
        if len(train_overlap) > 0:
            train_count = train_count + 1
            #train_mention_overlap.append(other_mentions[i])
            entity_ov[mention + 'train'].append(other_mentions[i])
            train_mention_overlap.append(other_mentions[i])
    #print mention_overlap
    #print train_mention_overlap
    #need.save_pickle(entity_overlap,"entity-overlap.p")
    return test_count, train_count
Exemple #7
0
def map_ind_name(ind):
    prior_name = need.load_pickle("classes.p")
    for key in prior_name.keys():
        if prior_name[key] == str(ind):
            return key
Exemple #8
0
def main():
    sel_mention = mention_list()
    mentions = sel_mention[str(sys.argv[1])]
    total_y = 0
    lens = []
    for i in range(0, len(mentions)):
        print i
        whole_file = path2 + "whole-data-" + mentions[i] + ".dat"
        test_file = path3 + mentions[i] + "-test-data.dat"

        X_train = need.load_pickle(path4 + mentions[i] + "-train-x.dat")
        y_train = need.load_pickle(path4 + mentions[i] + "-train-y.dat")
        print "loading done..."
        new_y = sort_prior(mentions[i].lower(), y_train)
        total_y = total_y + len(new_y)
        #print y_train.shape[0],y_test.shape[0]
        if i == 0:
            X_train1, y_train1 = select_data_point(X_train, y_train, new_y)
            print X_train1.shape
            lens.append(X_train1.shape[0])
        if i >= 1:
            X_train2, y_train2 = select_data_point(X_train, y_train, new_y)
            print X_train2.shape
            X_train1, y_train1 = aggregate(X_train1, X_train2, y_train1,
                                           y_train2)
            lens.append(X_train2.shape[0])

    max_col = len(Set(y_train1))
    print max_col

    y, classes = make_y(y_train1, max_col)
    need.save_pickle(classes,
                     "classes/" + "ind_classes_" + str(sys.argv[1]) + ".p")

    #classes=need.load_pickle("ind_classes_1.p")
    print "preprocessing done..."

    x = X_train1

    for a in range(0, int(sys.argv[2])):
        print a
        one_time = time.time()
        count = 0
        ind = 0
        for i in range(0, len(mentions)):
            print mentions[i]

            whole_file = path2 + "whole-data-" + mentions[i] + ".dat"
            test_file = path3 + mentions[i] + "-test-data.dat"

            y_train = need.load_pickle(path4 + mentions[i] + "-train-y.dat")
            new_y = sort_prior(mentions[i].lower(), y_train)

            print "loading done..."

            ind_list = []
            for cl in new_y:
                if classes[cl] not in ind_list:
                    ind_list.append(classes[cl])
            mini_batch_size = 1000

            start = 0

            for j in range(0, int(lens[ind] / mini_batch_size) + 1):
                start_time = time.time()
                if sum(lens[0:ind + 1]) - count < mini_batch_size:
                    batch_x, batch_y = x[count:count +
                                         (sum(lens[0:ind + 1]) - count
                                          )], y[count:count +
                                                (sum(lens[0:ind + 1]) - count)]
                    count = sum(lens[0:ind + 1])
                else:
                    batch_x, batch_y = x[count:count +
                                         mini_batch_size], y[count:count +
                                                             mini_batch_size]
                    count = count + mini_batch_size

                col_list = unique_col(batch_x)

                model.cost = model.train(batch_x[:, col_list],
                                         batch_y[:,
                                                 ind_list], col_list, ind_list)

            ind = ind + 1
        print "Iter time: ", time.time() - one_time
    for i in range(0, len(mentions)):
        #print i
        print mentions[i]
        whole_file = path2 + "whole-data-" + mentions[i] + ".dat"
        test_file = path3 + mentions[i] + "-test-data.dat"
        #X_train,y_train,x_t,y_temp=load_data(whole_file,test_file,mentions[i])
        y_train = need.load_pickle(path4 + mentions[i] + "-train-y.dat")
        x_t = need.load_pickle(path4 + mentions[i] + "-test-x.dat")
        #print x_t.shape
        y_temp = need.load_pickle(path4 + mentions[i] + "-test-y.dat")
        ys = Set(y_train).union(Set(y_temp))
        new_y = sort_prior(mentions[i].lower(), y_train)
        ind_list = []
        fw = open("multitask-results-full-1/" + mentions[i] + ".txt", "w")
        for x in new_y:
            ind_list.append(classes[x])
        #indices=need.load_pickle("ind-classes.p")
        #print len(ind_list)
        if len(ys) > 1:
            y_t = make_y_test(y_temp, max_col, classes)
            mask = np.zeros((y_t.shape))
            for j in range(0, y_t.shape[0]):
                for k in range(0, len(ind_list)):
                    mask[j, ind_list[k]] = 1
            col_list = unique_col(x_t)
            prediction1 = model.predict(x_t[:, col_list], col_list, ind_list)
            prediction = []
            for j in range(0, len(prediction1)):
                prediction.append(ind_list[prediction1[j]])
            print x_t.shape, len(ys)
            print np.mean(np.argmax(y_t, axis=1) == prediction)
            test = (np.argmax(y_t, axis=1) == prediction)
            miss = []
            for j in range(0, len(test)):
                if test[j] == False:
                    miss.append(j)
            for j in range(0, len(test)):
                if j in miss:
                    fw.write("prediction: " + str(prediction[j]) + ' ')
                    fw.write("actual: " + str(np.argmax(y_t, axis=1)[j]) +
                             '\n')
        else:
            print x_t.shape, len(ys)
            print "1.0"
Exemple #9
0
def main():
    sel_mention = need.load_pickle("new_reduced_test.p")

    mentions = sel_mention[int(sys.argv[1])]
    #mentions.remove('British-based')
    total_y = 0
    lens = []
    i = 0
    for x in mentions:
        print x
        whole_file = path2 + "whole-data-" + x + ".dat"
        test_file = path3 + x + "-test-data.dat"
        #X_train,y_train,X_test,y_test=load_data(whole_file,test_file,mentions[i])
        try:
            X_train = need.load_pickle(path4 + x + "-train-x.dat")
            y_train = need.load_pickle(path4 + x + "-train-y.dat")
        except:
            exit(1)
        print "loading done..."
        new_y = sort_prior(x.lower(), y_train)
        total_y = total_y + len(new_y)
        #print y_train.shape[0],y_test.shape[0]
        if i == 0:
            X_train1, y_train1 = select_data_point(X_train, y_train, new_y)
            print X_train1.shape
            lens.append(X_train1.shape[0])
        if i >= 1:
            X_train2, y_train2 = select_data_point(X_train, y_train, new_y)
            print X_train2.shape

            X_train1, y_train1 = aggregate(X_train1, X_train2, y_train1,
                                           y_train2)
            lens.append(X_train2.shape[0])
        i = i + 1
    #need.save_pickle(X_train1,path2+"short_data-x.dat")
    #need.save_pickle(y_train1,path2+"short_data-y.dat")

    #x=need.load_pickle(path2+"short_data-x.dat")
    #full_y=need.load_pickle(path2+"short_data-y.dat")
    max_col = len(set(y_train1))
    print max_col

    y, classes = make_y(y_train1, max_col)
    need.save_pickle(classes,
                     "classes/" + "ind_classes_" + str(sys.argv[1]) + ".p")

    #classes=need.load_pickle("ind_classes_1.p")
    print "preprocessing done..."

    x = X_train1
    prev_norm = 10000
    prev_cost = 10000
    stop = int(sys.argv[2])
    flag = 0
    for a in range(0, stop + 30):
        print a
        one_time = time.time()
        count = 0
        ind = 0
        #lr=0.5
        lr = 1 / float(np.sqrt(a) + 1)
        tot_cost = 0
        full_gradient = np.zeros((x.shape[1], max_col))
        for m in mentions:
            print m
            #X_train=need.load_pickle(path4+mentions[i]+"-train-x.dat")
            #y_train=need.load_pickle(path4+mentions[i]+"-train-y.dat")
            whole_file = path2 + "whole-data-" + m + ".dat"
            test_file = path3 + m + "-test-data.dat"
            #X_train,y_train,X_test,y_test=load_data(whole_file,test_file,mentions[i])
            #X_train=need.load_pickle(path4+mentions[i]+"-train-x.dat")
            y_train = need.load_pickle(path4 + m + "-train-y.dat")
            if len(set(y_train)) > 1:
                new_y = sort_prior(m.lower(), y_train)
                #print y_train.shape[0],y_test.shape[0]

                print "loading done..."

                #x,y_temp=select_data_point(X_train,y_train,new_y)
                #print x.shape
                #y = make_y_test(y_temp,57,classes)
                ind_list = []
                for cl in new_y:
                    if classes[cl] not in ind_list:
                        ind_list.append(classes[cl])
                mini_batch_size = 1000

                start = 0
                #full_x=x[count:count+lens[ind]]
                #full_y=y[count:count+lens[ind]]
                #print full_x.shape
                #print count
                for j in range(0, int(lens[ind] / mini_batch_size) + 1):
                    start_time = time.time()
                    if sum(lens[0:ind + 1]) - count < mini_batch_size:
                        batch_x, batch_y = x[count:count + (
                            sum(lens[0:ind + 1]) -
                            count)], y[count:count +
                                       (sum(lens[0:ind + 1]) - count)]
                        count = sum(lens[0:ind + 1])
                    else:
                        batch_x, batch_y = x[count:count + mini_batch_size], y[
                            count:count + mini_batch_size]
                        count = count + mini_batch_size
                    mask = np.zeros((batch_y.shape))
                    for k in range(0, batch_y.shape[0]):
                        for l in range(0, len(ind_list)):
                            mask[k, ind_list[l]] = 1
                    col_list = unique_col(batch_x)
                    #print col_list
                    #print len(col_list)
                    #print batch_x[:,col_list].shape
                    model.cost = model.train(batch_x[:, col_list],
                                             batch_y[:, ind_list], col_list,
                                             ind_list, lr)
                    #full_cost = model.cost_print(full_x,full_y[:,ind_list],ind_list)

                    #print model.cost
                    #gradient=model.gradient_print(batch_x[:,col_list],batch_y[:,ind_list],col_list,ind_list)
                    #for k in range(0,len(col_list)):
                    #	for l in range(0,len(ind_list)):
                    #		full_gradient[col_list[k],ind_list[l]]=full_gradient[col_list[k],ind_list[l]]+gradient[k,l]

                    #tot_norm=tot_norm+np

                    tot_cost = tot_cost + model.cost

                #print time.time()-start_time
            else:
                count = count + lens[ind]
            ind = ind + 1
        if a == (stop + 1):
            weights = model.weight_val()
            np.save("weights/" + str(sys.argv[1]) + ".npy", weights)
        if a > (stop + 1):
            w_o = np.load("weights/" + str(sys.argv[1]) + ".npy")
            weights = model.weight_val()
            #print weights.shape
            w_n = (w_o + weights) / 2
            #print w_n.shape
            np.save("weights/" + str(sys.argv[1]) + ".npy", w_n)
        if a - (stop + 1) == 31:
            break
        #gradient_norm=LA.norm(full_gradient)
        print tot_cost

        print "Iter time: ", time.time() - one_time
    fw1 = open(
        "results-news/" + str(sys.argv[1]) + "-" + str(sys.argv[3]) + ".txt",
        "w")
    for x in mentions:
        #print i
        print x
        whole_file = path2 + "whole-data-" + x + ".dat"
        test_file = path3 + x + "-test-data.dat"
        #X_train,y_train,x_t,y_temp=load_data(whole_file,test_file,mentions[i])
        y_train = need.load_pickle(path4 + x + "-train-y.dat")
        x_t = need.load_pickle(path4 + x + "-test-x.dat")
        #print x_t.shape
        y_temp = need.load_pickle(path4 + x + "-test-y.dat")
        ys = set(y_train).union(set(y_temp))
        new_y = sort_prior(x.lower(), y_train)
        ind_list = []
        fw = open(
            "multitask-results-full-testb/" + x + "-" + str(sys.argv[1]) +
            "-" + str(sys.argv[3]) + ".txt", "w")
        for y in new_y:
            #print y
            ind_list.append(classes[y])
        #print ind_list
        w_o = np.load("weights/" + str(sys.argv[1]) + ".npy")
        #print w_o.shape
        #indices=need.load_pickle("ind-classes.p")
        #print len(ind_list)
        if len(ys) > 1:
            y_t = make_y_test(y_temp, max_col, classes)
            mask = np.zeros((y_t.shape))
            for j in range(0, y_t.shape[0]):
                for k in range(0, len(ind_list)):
                    mask[j, ind_list[k]] = 1
            col_list = unique_col(x_t)
            prediction1 = model.predict(x_t[:, col_list], w_o, col_list,
                                        ind_list)
            #print prediction1
            prediction = []
            for j in range(0, len(prediction1)):
                prediction.append(ind_list[prediction1[j]])
            print x_t.shape, len(ys)
            acc = np.mean(np.argmax(y_t, axis=1) == prediction)

            print acc
            fw1.write(x + " " + str(acc) + " " + str(x_t.shape[0]) + '\n')
            test = (np.argmax(y_t, axis=1) == prediction)
            miss = []
            for j in range(0, len(test)):
                if test[j] == False:
                    miss.append(j)
            for j in range(0, len(test)):
                if j in miss:
                    fw.write("prediction: " + str(prediction[j]) + ' ')
                    fw.write("actual: " + str(np.argmax(y_t, axis=1)[j]) +
                             '\n')
        else:
            fw1.write(x + " " + str(1.0) + " " + str(len(ys)) + '\n')
            print x_t.shape, len(ys)
            print "1.0"
    os.remove("weights/" + str(sys.argv[1]) + ".npy")
	corrects = predictions[targets.nonzero()]
	rest = theano.tensor.reshape(predictions[(1-targets).nonzero()],(-1, num_cls-1))
	rest = theano.tensor.max(rest, axis=1)
	return theano.tensor.nnet.relu(rest - corrects + delta)
X = sparse.csr_matrix(name='X', dtype='float64')
Y = T.fmatrix()
mask=T.fmatrix()
ind=T.ivector()
col=T.ivector()
lr=T.fscalar()
w_p1=T.fmatrix()
w_p2=T.fmatrix()
#save_flag=T.bscalar()
#w_h = init_weights((16074140, 100))
#w_h2 = init_weights((625, 625))
sel_mention=need.load_pickle("new_reduced_test.p")
mentions=sel_mention[int(sys.argv[1])]
num_class=need.load_pickle("num-classes.p")
sel_class=num_class[int(sys.argv[1])]
print sel_class
w_h = init_weights((1000000*len(mentions), 100))
#w_o = init_weights((16074140, 102))
w_o = init_weights((100, sel_class))
print "Weight initialization done..."
#w_o2=w_o[col]
#print w_o.get_value().shape
#print w_o2.get_value().shape
w_h1=w_h[col]
w_o1=w_o[:,ind]
#noise_h, noise_py_x = model(X, w_h, w_h2, w_o, 0.2, 0.5)
lambda=float(sys.argv[4])
Exemple #11
0
def model(X, w_o, p_drop_input, p_drop_hidden):
    #X = dropout(X, p_drop_input)
    #h = rectify(sparse.basic.dot(X, w_h))
    #h = T.nnet.sigmoid(sparse.basic.dot(X, w_h))

    #h = dropout(h, p_drop_hidden)
    #h2 = T.dot(h, w_h2)

    #h2 = dropout(h2, p_drop_hidden)
    py_x = sparse.basic.dot(X, w_o)
    return py_x


#trX, teX, trY, teY = mnist(onehot=True)
num_class = need.load_pickle("num-classes.p")
sel_class = num_class[int(sys.argv[1])]
print sel_class
X = sparse.csr_matrix(name='X', dtype='float64')
Y = T.fmatrix()
mask = T.fmatrix()
ind = T.ivector()
col = T.ivector()
w_p = T.fmatrix()
lr = T.fscalar()
#w_h = init_weights((16074140, 100))
#w_h2 = init_weights((625, 625))
w_o = init_weights((1000000, sel_class))
#w_o = init_weights((16074140, 107))
print "Weight initialization done..."
#w_o2=w_o[col]
def main():
    sel_mention = need.load_pickle(
        "reduced-sum-test.p"
    )  # reduced-test-sum.p is the dictionary where key is cluster number and value is the list of mentions in the cluster

    cluster_num = int(sys.argv[1])
    iteration_num = int(sys.argv[2])
    it_weight_save = int(sys.argv[3])
    mentions = sel_mention[cluster_num]
    total_y = 0
    lens = []
    sizes = need.load_pickle(
        "reduced-sizes.p"
    )  # reduce-sizes.p is the dictionary where key is cluster number and value is the number of total training examples in the cluster
    #print sizes
    #Xf_train=lil_matrix((51879,600000))
    Xf_train = lil_matrix(
        (sizes[str(sys.argv[1]) + "train"], 1000000 * len(mentions)))
    Xf_test = lil_matrix(
        (sizes[str(sys.argv[1]) + "test"], 1000000 * len(mentions)))
    #Xf_test=lil_matrix((371,600000))
    print Xf_train.shape
    st = 0
    i = 0
    for x in mentions:
        whole_file = path2 + "whole-data-" + x + ".dat"
        test_file = path3 + x + "-test-data.dat"
        #X_train,y_train,X_test,y_test=load_data(whole_file,test_file,mentions[i])
        X_train = need.load_pickle(path4 + x + "-train-x.dat")
        y_train = need.load_pickle(path4 + x + "-train-y.dat")
        print "loading done..."
        new_y = sort_prior(x.lower(), y_train)
        print new_y
        total_y = total_y + len(new_y)
        #print y_train.shape[0],y_test.shape[0]
        if i == 0:
            X_train1, y_train1 = select_data_point(X_train, y_train, new_y)
            Xf_train = need.make_features(
                X_train1, Xf_train, i + 1, X_train1.shape[1], st
            )  # creating a mention specific dataset so that weight matrices learnt in the first layer are not shared by the mentions
            st = st + X_train1.shape[0]
            print X_train1.shape
            lens.append(X_train1.shape[0])
        if i >= 1:
            X_train2, y_train2 = select_data_point(X_train, y_train, new_y)
            Xf_train = need.make_features(
                X_train2, Xf_train, i + 1, X_train2.shape[1], st
            )  # creating a mention specific dataset so that weight matrices learnt in the first layer are not shared by the mentions
            y_train1 = aggregate_y(y_train1, y_train2)
            st = st + X_train2.shape[0]
            print X_train2.shape
            lens.append(X_train2.shape[0])
        i = i + 1
        #print st
    #need.save_pickle(X_train1,path2+"short_data-x.dat")
    #need.save_pickle(y_train1,path2+"short_data-y.dat")

    #x=need.load_pickle(path2+"short_data-x.dat")
    #full_y=need.load_pickle(path2+"short_data-y.dat")
    max_col = len(Set(y_train1))
    print max_col

    y, classes = make_y(y_train1, max_col)
    need.save_pickle(classes,
                     "men-classes/" + "ind_classes_" + str(sys.argv[1]) + ".p")

    #classes=need.load_pickle("ind_classes_1.p")
    print "preprocessing done..."
    need.save_pickle(Xf_train, "mention-specific-data/" + sys.argv[1] + ".dat")
    #Xf_train=need.load_pickle("mention-specific-data/"+sys.argv[1]+".dat")

    x = Xf_train.tocsr()
    for a in range(0, iteration_num):
        print a
        one_time = time.time()
        ind = 0
        lr = 1 / float(np.sqrt(a) + 1)
        #lr=1
        tot_cost = 0
        count = 0
        for m in mentions:
            print m
            #X_train=need.load_pickle(path4+mentions[i]+"-train-x.dat")
            #y_train=need.load_pickle(path4+mentions[i]+"-train-y.dat")
            whole_file = path2 + "whole-data-" + m + ".dat"
            test_file = path3 + m + "-test-data.dat"
            #X_train,y_train,X_test,y_test=load_data(whole_file,test_file,mentions[i])
            #X_train=need.load_pickle(path4+mentions[i]+"-train-x.dat")
            y_train = need.load_pickle(path4 + m + "-train-y.dat")
            if len(set(y_train)) > 1:

                new_y = sort_prior(m.lower(), y_train)
                #print y_train.shape[0],y_test.shape[0]

                print "loading done..."

                #x,y_temp=select_data_point(X_train,y_train,new_y)
                #print x.shape
                #y = make_y_test(y_temp,57,classes)
                ind_list = []
                for cl in new_y:
                    if classes[cl] not in ind_list:
                        ind_list.append(classes[cl])
                mini_batch_size = 1000

                start = 0
                #full_x=x[count:count+lens[ind]]
                #full_y=y[count:count+lens[ind]]
                #print full_x.shape
                #print count
                for j in range(0, int(lens[ind] / mini_batch_size) + 1):
                    if sum(lens[0:ind + 1]) - count < mini_batch_size:
                        batch_x, batch_y = x[count:count + (
                            sum(lens[0:ind + 1]) -
                            count)], y[count:count +
                                       (sum(lens[0:ind + 1]) - count)]
                        count = sum(lens[0:ind + 1])
                    else:
                        batch_x, batch_y = x[count:count + mini_batch_size], y[
                            count:count + mini_batch_size]
                        count = count + mini_batch_size
                    '''mask=np.zeros((batch_y.shape))
					for k in range(0,batch_y.shape[0]):
					for l in range(0,len(ind_list)):
						mask[k,ind_list[l]]=1'''
                    col_list = unique_col(batch_x)
                    #print col_list
                    #print len(col_list)
                    #print batch_x[:,col_list].shape
                    start_time = time.time()
                    print batch_x.shape
                    model.cost = model.train(
                        batch_x[:, col_list], batch_y[:, ind_list], col_list,
                        ind_list, lr
                    )  # col_list is for selecting a subset of features that has nonzero entries in batch_x matrix and ind_list is for maintain the candidacy of each mention
                    tot_cost = tot_cost + model.cost
                    #print time.time()-start_time
            else:
                count = count + lens[ind]
            ind = ind + 1
        if a == it_weight_save:
            weights1, weights2 = model.weight_val()
            np.savez("weights/" + str(sys.argv[1]) + ".npz", weights1,
                     weights2)
        if a > it_weight_save:
            w_o = np.load("weights/" + str(sys.argv[1]) + ".npz")
            weights1, weights2 = model.weight_val()
            w_n1 = (w_o['arr_0'] + weights1) / 2
            w_n2 = (w_o['arr_1'] + weights2) / 2

            np.savez("weights/" + str(sys.argv[1]) + ".npz", w_n1, w_n2)
        print tot_cost
        print "Iter time: ", time.time() - one_time
    st = 0
    i = 0
    for m in mentions:

        x_test = need.load_pickle(path4 + m + "-test-x.dat")
        Xf_test = need.make_features(x_test, Xf_test, i + 1, x_test.shape[1],
                                     st)
        st = st + x_test.shape[0]
        i = i + 1
    cut = 0
    for m in mentions:
        #print i
        print m
        whole_file = path2 + "whole-data-" + m + ".dat"
        test_file = path3 + m + "-test-data.dat"
        #X_train,y_train,x_t,y_temp=load_data(whole_file,test_file,mentions[i])
        y_train = need.load_pickle(path4 + m + "-train-y.dat")
        #print x_t.shape
        y_temp = need.load_pickle(path4 + m + "-test-y.dat")
        x_t = Xf_test[cut:cut + len(y_temp)]
        cut = cut + len(y_temp)

        ys = Set(y_train).union(Set(y_temp))
        new_y = sort_prior(m.lower(), y_train)
        ind_list = []
        fw = open("multitask-results-full-men/" + m + ".txt", "w")
        w_o = np.load("weights/" + str(sys.argv[1]) + ".npz")
        for x in new_y:
            ind_list.append(classes[x])
        #indices=need.load_pickle("ind-classes.p")
        #print len(ind_list)
        if len(ys) > 1:
            y_t = make_y_test(y_temp, max_col, classes)
            mask = np.zeros((y_t.shape))
            for j in range(0, y_t.shape[0]):
                for k in range(0, len(ind_list)):
                    mask[j, ind_list[k]] = 1
            col_list = unique_col(x_t)
            prediction1 = model.predict(x_t[:, col_list], w_o['arr_0'],
                                        w_o['arr_1'], col_list, ind_list)
            prediction = []
            for j in range(0, len(prediction1)):
                prediction.append(ind_list[prediction1[j]])
            print x_t.shape, len(ys)
            print np.mean(np.argmax(y_t, axis=1) == prediction)
            test = (np.argmax(y_t, axis=1) == prediction)
            miss = []
            for j in range(0, len(test)):
                if test[j] == False:
                    miss.append(j)
            for j in range(0, len(test)):
                fw.write("prediction: " + str(prediction[j]) + ' ')
                fw.write("actual: " + str(np.argmax(y_t, axis=1)[j]) + '\n')
        else:
            print x_t.shape, len(ys)
            print "1.0"