def main(argv): """ function: main -------------- KDD process for Reuter article database :param argv: commend line arguments """ start_time = time.time() # Preprocessing = # 1. Text Extraction # 2. Feature Selection # 3. Feature Vector Generation print('Step 1: Preprocessing') fv = preprocessing.begin() # Classification = 3x2 Experiment Set # K-Nearest-Neighbors, Decision-Tree, Naive Bayes # Standard & Pared-Down Feature Vector print('\nStep 2: Classification (Skipped); Uncomment to Run') # classification.begin(fv) # Clustering = 2x2x2 Experiment Set print('\nStep 3: Clustering') clustering.begin(fv) # Report Total Running Time end_time = time.time() - start_time print '\nProcess finished in', end_time, 'seconds!'
def main(argv): """ function: main -------------- KDD process for Reuter article database :param argv: commend line arguments """ start_time = time.time() # Preprocessing = # 1. Text Extraction # 2. Feature Selection # 3. Feature Vector Generation print ("Step 1: Preprocessing") fv = preprocessing.begin() # Minhash for k = 16, 32, 64, 128, 256 print ("\nStep 2: K-Minwise Hashing:") minwisehash.begin(fv) # Report Total Running Time end_time = time.time() - start_time print "\nProcess finished in", end_time, "seconds!"