import sys sys.exit() f_data = options.filename eval = False if 'test' in f_data: eval = True wesave = options.save #--------------------------------------------------------# # Prepare data #--------------------------------------------------------# data = pd.read_csv(f_data) data, keeps = prep(data, eval) import sys sys.exit() #--------------------------------------------------------# # Choose a method #--------------------------------------------------------# # K-nn training and testing if opt == 0: from KNN import KNN_test_train KNN_test_train(data[ keeps[:-1] ], data[ keeps[-1] ], wesave) # BDT if opt == 1: from BDT import BDT_test_train BDT_test_train(data[ keeps[:-1] ], data[ keeps[-1] ])
sys.exit() # --------------------------------------------------------# # Prepare data differently. Load in chuncks since it # takes so much memory to process # --------------------------------------------------------# f_data = "data/test.csv" data = pd.read_csv(f_data, chunksize=50000) # Loop over each chunk and run options probs = None counter = 0 for chunk in data: print "Working on ", counter, "..." chunk, keeps = prep(chunk, True) # Run KNN and get probs if opt == 0: if probs == None: probs = KNN_evaluate(chunk[keeps]) else: probs = np.concatenate([probs, KNN_evaluate(chunk[keeps])]) counter += 1 # Now we are done, write the output outcat = np.array(np.arange(len(categories)), dtype="string") for key in categories: outcat[categories[key]] = key