isTr = 1 for i in range (2,3) : X = read_data("Xtr"+str(i), isTr) Y = read_data("Ytr"+str(i), isTr) max_info = "" max_predic = 0 Y['Bound'][Y['Bound'] == 0] = -1 #f= open("/Users/noch/Documents/workspace/data_challenge/result/console_svm_ker_gaussi_C_big.txt","a+") #f= open("/home/jibril/Desktop/data_challenge/result/console_svm_ker_gaussi.txt","a+") print("\n testing on Xtr" +str(i)+ ", Ytr" +str(i)) for k in range(2,5): print("\n number of char:"+str(k+1)) data_new = prepare_data_div(X, k+1) data_new['Bound'] = Y['Bound'] data_train, data_test = split_data(data_new, 70) X_train = data_train.iloc[:,:-1] Y_tr = pd.DataFrame.as_matrix(data_train['Bound']).astype(float).tolist() X_te = pd.DataFrame.as_matrix(data_test.iloc[:,:-1]) Y_te = pd.DataFrame.as_matrix(data_test['Bound']).astype(float).tolist() print("\n finished preparing number of char:" + str(k+1)) gamma_arr = [100, 20, 10, 1, 0.1, 0.01]
nm_char = [6, 6, 5] lmda = [10**(-5), 0.0001, 10**(-5)] epoch = [400000, 300000, 300000] for i in range(3): isTr = 1 Xtr = read_data("Xtr" + str(i), isTr) Ytr = read_data("Ytr" + str(i), isTr) Ytr['Bound'][Ytr['Bound'] == 0] = -1 isTr = 0 Xte = read_data("Xte" + str(i), isTr) Xte['Id'] = pd.DataFrame({'Id': range(i * 1000, (i + 1) * 1000)}) print("preparing data:" + str(i)) Xtr_p = prepare_data_div(Xtr, nm_char[i]) Xtr_p['Bound'] = Ytr['Bound'] Xte_p = prepare_data_div(pd.DataFrame(Xte['DNA']), nm_char[i]) Xte_p['Id'] = Xte['Id'] Xtr_p = Xtr_p.sample(frac=1) X_tr = pd.DataFrame.as_matrix(Xtr_p.iloc[:, :-1]) Y_tr = pd.DataFrame.as_matrix(Xtr_p['Bound']).astype(float).tolist() print("training on data:" + str(i)) w, b = pegasos_(X_tr, Y_tr, lmda[i], epoch[i]) print("testing on data:" + str(i)) result = test_with_id(w, b, Xte_p)
Alpha = [] Y_all_te = [] Y_all_tr = [] D = 0 epoch = [500000, 400000, 400000, 300000, 200000, 500000] lmda = [1e-05, 1e-05, 0.0001, 0.0001, 0.0001, 1e-05] num_char = [5, 5, 5, 5, 5, 6] for t in range(5): epsl = 0 Z = 0 print("... is training on classifier pegasos" + str(t)) X_tr = prepare_data_div(X_train, num_char[t]) X_tr = pd.DataFrame.as_matrix(X_tr) D = 1 / len(X_tr) X_te = prepare_data_div(X_test, num_char[t]) X_te = pd.DataFrame.as_matrix(X_te) #based-classifier w, b = pegasos_(X_tr, Y_tr, lmda[t], epoch[t]) Y_pre_tr = predict_pegasos(w, b, X_tr) Y_pre_te = predict_pegasos(w, b, X_te) predicted_sco_tr = accuracy_score(Y_pre_tr, Y_tr, normalize=False) / len(Y_pre_tr) print("predicted_score_tr:" + str(predicted_sco_tr))