def main(): # args from Simple Queries paper DIM = 30 WORDGRAMS = 2 MINCOUNT = 8 MINN = 3 MAXN = 3 BUCKET = 1000000 # adjust these EPOCH = 5 LR = 0.15 # 0.15 good for ~5000 KERN = 'lin' # lin or rbf or poly NUM_RUNS = 1 # number of test runs SUBSET_VAL = 300 # number of subset instances for self reported dataset LIN_C = 0.90 # hyperparameter for linear kernel run = 0 print("starting dictionary creation.............................") dictionary = Dictionary(WORDGRAMS, MINCOUNT, BUCKET, SUBSET_VAL, run) X_train, X_test, y_train, y_test = dictionary.get_train_and_test() print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) n_train = dictionary.get_n_train_instances() n_test = dictionary.get_n_manual_instances() X_train = dictionary.get_trainset() X_test = dictionary.get_manual_testset() print() print("starting optimization") #coef = kernel_mean_matching(X_train, X_test, n_train, n_test, kern='lin', B=10) coef = kernel_mean_matching(X_test, X_train[0], LIN_C, kern='lin', B=10) print(coef)
################################################################### WORDGRAMS = 3 MINCOUNT = 2 BUCKET = 1000000 print("starting dictionary creation.............................") dictionary = Dictionary(WORDGRAMS, MINCOUNT, BUCKET) X_train, X_test, y_train, y_test = dictionary.get_train_and_test() print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) n_train = dictionary.get_n_train_instances() n_test = dictionary.get_n_manual_instances() X_train = dictionary.get_trainset() X_test = dictionary.get_manual_testset() B = n_train #sigma = np.std(X_train) # compute standard deviation ???? sigma = 0.25 b = (0.0, B) bounds = (b, b, b, b, b) beta0 = np.zeros((n_train)) print("creating gram matrix") K = create_K() k = create_k() print(K.shape, k.shape) print("dont creating gram matrix")