def preprocess(lower_threshold, numlines_train, numlines_test, only_trainingset): csv_train, storage_train, features_train, targets_train, csv_test, \ storage_test, features_test = create_names(lower_threshold, upper_threshold, numlines_train, numlines_test, only_trainingset, 1) '''preprocesses the training data and optionally a separate test data file. Creates features and extracts targets. ''' prep = pp(lower_threshold, upper_threshold, numlines_train, numlines_test, only_trainingset) #import data from csv files: prep.import_csv(csv_train, csv_test, storage_train, storage_test) #create features and save to new csv files: prep.create_new_csvs(features_train, targets_train, features_test, storage_train, storage_test)
''' from NaiveBayes import NaiveBayes from create_filenames import create_names import numpy as np from slice_merge import slice_csv, merge_csvs lower_threshold = 7 upper_threshold = 100 numlines_train = 5000 # 0 will be interpreted as all lines numlines_test = 0 # 0 will be interpreted as all lines slice_size = 1000 use_only_trainingset_and_split = True features_file_train, targets_file_train, features_file_test, \ predictions_file_test = create_names(lower_threshold, upper_threshold, numlines_train, numlines_test, 0, 0) def main(): features_train = None IDs_train = None all_targets_train = None features_file_test = None try: print 'load training features...' features_train, IDs_train = load_features(features_file_train, 1) print 'load training targets...' all_targets_train = load_targets(targets_file_train) print 'load test features...' features_file_test = load_features(features_file_test, 1) except IOError: print "The corresponding files have not been created yet."
''' from NaiveBayes import NaiveBayes from create_filenames import create_names import numpy as np from slice_merge import slice_csv, merge_csvs lower_threshold = 5 upper_threshold = 100 numlines_train = 100 # 0 will be interpreted as all lines split_at = 0.7 features_file_train, targets_file_train, features_file_test, \ probabilities_filename, predictions_filename = create_names(lower_threshold, upper_threshold, numlines_train, 0, 1, 0) def main(): features = None IDs = None all_targets = None try: print 'load training features...' features, IDs = load_features(features_file_train, 1) print 'load training targets...' all_targets = load_targets(targets_file_train) except IOError: print "The corresponding files have not been created yet." print "Please run preprocessing with the same parameters and try again." raise SystemExit(0)