# For 1000 train # C= 1.0 # gamma = 0.1 # For 1m train # C = 0.10000000000000001 # gamma = 0.0001 logging.info("Train SVC with C: %f, gamma: %f" % (C, gamma)) svc = SVC(C=C, gamma=gamma, probability=True, verbose=True).fit(X_train, y_train) # Load test data for 5 times field = ["id", "click"] for part in range(1, 6): test_filepath = test_filepattern % part logging.info("Loading test set [%s]..." % test_filepath) X_test, ids_test = p.load_test_data(test_filepath) logging.info("Shape X = %r, ids =%r" % (X_test.shape, ids_test.shape)) logging.info("example X = %s\nids =%r" % (X_test[0], ids_test[0])) svc_probs = svc.predict_proba(X_test) # [prob of 0, prob of 1] logging.info("prob of test: %s" % svc_probs[:10]) out_filepath = "%s-svc-t1M-s%d-c%f-g%f.csv" % (test_filepath, SAMPLE, C, gamma) logging.info("Writing out file %s" % out_filepath) if len(ids_test) != len(svc_probs): logging.error("Test case count don:t match") else: with open(out_filepath, "a") as ofile: writer = csv.DictWriter(ofile, field) if part == 1: writer.writeheader()
#XXX skip test #exit() #Load test data for 5 times test_filepattern = 'data/test_%d_M.out' field = ['id', 'click'] for part in range(1, 6): test_filepath = test_filepattern % part logging.info("Loading test set [%s]..." % test_filepath) #X_test, ids_test= p.load_test_data(test_filepath) #Load data with category width = 10000 for a_slice in range(100): logging.info("+++Doing slice %d+++" %(a_slice)) X_test, ids_test= p.load_test_data(test_filepath, enc = enc, map_dict = map_dict, start_line_no = a_slice*width) if X_test == None: break logging.info("Shape X = %r, ids =%r" %(X_test.shape, ids_test.shape )) logging.info("example X = %s\nids =%r" %(X_test[0], ids_test[0])) learner_probs = gs_learner.predict_proba(X_test) #[prob of 0, prob of 1] logging.info("prob of test: %s" % learner_probs[:10]) out_filepath = "%s-knn-s%d-n%d-w-%s-a-%s.csv" %(test_filepath, n_subsamples, gs_learner.best_params_['n_neighbors'], gs_learner.best_params_['weights'], gs_learner.best_params_['algorithm']) logging.info("Writing out file %s" % out_filepath) if len(ids_test) != len(learner_probs): logging.error("Test case count don:t match") else : with open(out_filepath, 'a') as ofile: writer = csv.DictWriter(ofile, field) if part == 1 and a_slice == 0: