import sys import os import numpy if len(sys.argv) < 2: print ERROR+" Please provide the target CSV file containing the data.\n" sys.exit() file_name = str(sys.argv[1]) if not os.path.isfile(file_name): print ERROR+" File "+file_name+" not found.\n" sys.exit() #load everything into a matrix (not very scalable I think) data_matrix = files.load_into_matrix(file_name,num_targets=NUM_TARGETS,num_attributes=NUM_ATTRS,input_delimiter=INPUT_DELIMITER,skip_first=HAS_HEADER) #normalizing and shuffling data_matrix = prep.normalize(data_matrix,NUM_ATTRS,NUM_TARGETS,NORMALIZE_TARGETS) data_matrix = prep.shuffle(data_matrix) #training set is twice as large as test set train_set_matrix = prep.take_train_set(data_matrix,TRAIN_RATIO) test_set_matrix = prep.take_test_set(data_matrix,TRAIN_RATIO) # finding out the target directory where i should save the partitions current_dir = os.path.dirname(os.path.realpath(__file__)) partitions_directory = files.get_partitions_dir_from_file_name(file_name)
constants.py\n""" sys.exit() dir_name = sys.argv[1].lstrip('.').rstrip('/').strip(" ") if not os.path.isfile(dir_name+'/train_set.csv'): print ERROR+" File "+dir_name+"/train_set.csv not found.\n" sys.exit() if not os.path.isfile(dir_name+'/test_set.csv'): print ERROR+" File "+dir_name+"/test_set.csv not found.\n" sys.exit() train_set_file = dir_name+'/train_set.csv' train_set = files.load_into_matrix(train_set_file,skip_first=False) # we won't load the targets because they're what we're trying to predict prediction_set = files.load_into_matrix(dir_name+'/test_set.csv',skip_first=False,num_attributes=NUM_ATTRS,load_targets=False,num_targets=NUM_TARGETS) #some attributes may be ignored if user has set config option EXCLUDE_ATTRS attributes_to_use = filter(lambda x: False if x in EXCLUDE_ATTRS else True,np.arange(NUM_ATTRS)) #training the network #this is where the weights (between the input and the hidden layers) are kept wij = ann.init_input_weights(len(attributes_to_use),NUM_NEURONS_HIDDEN_LAYER,NUM_DIGITS) #these are the weights between the hidden layer and the output neuron (one per neuron in the hidden layer) wj = ann.init_output_weights(NUM_NEURONS_HIDDEN_LAYER,NUM_DIGITS)