def get_dataset(dir, win_size=5, is_negful=True): input_data = InputData(dir, window_size=win_size) cv_features, cv_labels, extra_negatives, extra_negative_labels = input_data.getData( ) if is_negful: return cross_validation.CrossValidation( cv_features, cv_labels, extra_negatives=extra_negatives, extra_labels=extra_negative_labels) else: return cross_validation.CrossValidation(cv_features, cv_labels)
def test_writeFolds(self): cs = cross_validation.CrossValidation(self.raddress, self.waddress, 3) cs.readFolder() cs.writeFolder() testSet = [ '/staf/amir/robotica/Brain/data/hog_test/cswrite/1.jpg', '/staf/amir/robotica/Brain/data/hog_test/cswrite/1320757889.jpg' ] trainingSet = [ '/staf/amir/robotica/Brain/data/hog_test/cswrite/4.jpg', '/staf/amir/robotica/Brain/data/hog_test/cswrite/5.jpg' ] for i in range(3): cs.writeFolds(i, testSet, trainingSet) testList = os.listdir( '/staf/amir/robotica/Brain/data/hog_test/cswrite/' + str(i) + "/testset") trainingList = os.listdir( '/staf/amir/robotica/Brain/data/hog_test/cswrite/' + str(i) + "/trainingset") self.assertNotEqual(testList, [], "Test directory is empty") self.assertNotEqual(trainingList, [], "Training directory is empty") for tests in testList: for trains in trainingList: self.assertNotEqual( tests, trains, "The file in the test set is equal to the file in train set" )
def get_test_dateset(dir, site='Y', win_size=5, is_context=False): input_data = TestData(dir, site, window_size=win_size, is_context=is_context) ids, seqs, feature, label = input_data.getData() return ids, seqs, cross_validation.CrossValidation( feature, label), np.concatenate(label, axis=0)
def test_writeFolder(self): cs = cross_validation.CrossValidation(self.raddress, self.waddress, self.k_fold) cs.readFolder() data = cs.writeFolder() self.assertEqual( 9, data, "The number of files are not equal to the actual number of files in folders" )
def test_readFolder(self): cs = cross_validation.CrossValidation(self.raddress, self.waddress, self.k_fold) data = cs.readFolder() self.assertEqual( len(data), 9, "The number of classes is not equal to the number of folders") self.assertEqual( os.path.isfile(os.path.join(self.raddress, 'data-label.dat')), True, "The data-label file is not written.")
def test_dataShuffler(self): testData = [ '/staf/amir/test/1.png', '/staf/amir/test/2.png', '/staf/amir/test/3.png', '/staf/amir/test/2.png', '/staf/amir/test/3.png', '/staf/amir/test/2.png', '/staf/amir/test/3.png' ] length = len(testData) cs = cross_validation.CrossValidation(self.raddress, self.waddress, self.k_fold) testShuffle = cs.dataShuffler(testData) self.assertEqual(length, len(testShuffle), "The lenght of the input and output differs")
def test_setFolds(self): testSliced = [ ['/staf/amir/robotica/Brain/data/hog_test/cswrite/1.jpg'], ['/staf/amir/robotica/Brain/data/hog_test/cswrite/4.jpg'], ['/staf/amir/robotica/Brain/data/hog_test/cswrite/5.jpg'] ] cs = cross_validation.CrossValidation(self.raddress, self.waddress, 3) cs.readFolder() cs.writeFolder() set = cs.setFolds(testSliced) for i in range(3): testList = os.listdir( '/staf/amir/robotica/Brain/data/hog_test/cswrite/' + str(i) + "/testset") trainingList = os.listdir( '/staf/amir/robotica/Brain/data/hog_test/cswrite/' + str(i) + "/trainingset") for tests in testList: for trains in trainingList: self.assertNotEqual( tests, trains, "The file in the test set is equal to the file in train set" )
def test_dataOrganizer(self): cs = cross_validation.CrossValidation(self.raddress, self.waddress, self.k_fold) cs.readFolder() cs.writeFolder() cs.dataOrganizer()
else: x_source_tf = x_source transformed = False # Scale data scaler = StandardScaler() scale_data = True if scale_data: print 'scale data' scaler.fit(x_source_tf) x_source_tf = scaler.transform(x_source_tf) # Cross validation data_cv = np.hstack((ids, y_source, x_source_tf)) cross_validation = cv.CrossValidation(data_cv, 3) # switch on/off if cross validation or test data prediction cross_validate = True if cross_validate: print 'Doing Cross Validation' param_manager = ParameterManager() myrange = [5 * x for x in range(1, 5)] # Define parameters here: (parameter_name, [parameter_values]) parameter_settings = [('alpha', myrange), ('layer_size', [(100, 100, 100, 100, 100, 100, 100, 100), (50)]),
if __name__ == "__main__": if len(sys.argv) < NUM_ARGS + 1: print _usage() sys.exit(1) data_file = sys.argv[1] handle = open(data_file, 'r') handle.readline() csv_file = csv.reader(handle) data = [] for line in csv_file: l = tuple(map(lambda x: float(x), line[0:2])) data.append(l) weight_kernel = 1 weight_uniform = 1 cv = cross_validation.CrossValidation(NUM_FOLDS, data, True) for i in xrange(NUM_FOLDS): N = cv.num_training_examples(i) data = cv.training_examples for t in cv.training_examples(i): k_log_likelihood = kernel_log_likelihood(t, i, data, N) u_log_likelihood = uniform_log_likelihood(t) if k_log_likelihood > u_log_likelihood: weight_kernel += 1 else: weight_uniform += 1 print weight_kernel, weight_uniform, k_log_likelihood, u_log_likelihood
y_source = source_tf[:, 0].reshape(source_tf.shape[0], 1) else: x_source_tf = feature_transform(feature_vec, x_source) else: x_source_tf = x_source else: x_source_tf = x_source print ids.shape print y_source.shape print x_source_tf.shape data_cv = np.hstack((ids, y_source, x_source_tf)) cross_validation = cv.CrossValidation(data_cv, int(20)) # lin_reg = lr.LinearRegression() # cross validate over lamda in ridge regression cross_validate = False if cross_validate: print 'Doing Cross Validation' results = [] scale = 0.02 scale_h = 1. for i in range(10, 20, 1): print i for j in [1]:
ga.evolve(freq_stats=1) print ga.bestIndividual() best_chromosome = ga.bestIndividual() return best_chromosome if __name__ == "__main__": arguments = docopt(__doc__) method = arguments['<method>'] bindres_file = arguments['<binding_residue_file>'] pssms_file = arguments['<pssms_file>'] log_file = arguments['<log_file>'] output_file = arguments['<output_file>'] crossValidation = cross_validation.CrossValidation(bindres_file, pssms_file, log_file, method) best_chromosome = run_ga(crossValidation) with open(output_file, "w") as fp: if crossValidation.method == "neuralNetwork": fp.write( "#method\tnode_num\tlearning_rate\twindow_size\tdecision_value\n" ) elif crossValidation.method == "randomForest": fp.write( "#method\tn_estimators\tmax_features\twindow_size\tdecision_value\n" ) elif crossValidation.method == "SVM": fp.write("#method\tcost\tgamma\twindow_size\tdecision_value\n") gene1, gene2, gene3 = crossValidation.decode_chromosome( best_chromosome)