Ejemplo n.º 1
0
def get_dataset(dir, win_size=5, is_negful=True):
    input_data = InputData(dir, window_size=win_size)
    cv_features, cv_labels, extra_negatives, extra_negative_labels = input_data.getData(
    )
    if is_negful:
        return cross_validation.CrossValidation(
            cv_features,
            cv_labels,
            extra_negatives=extra_negatives,
            extra_labels=extra_negative_labels)
    else:
        return cross_validation.CrossValidation(cv_features, cv_labels)
Ejemplo n.º 2
0
    def test_writeFolds(self):

        cs = cross_validation.CrossValidation(self.raddress, self.waddress, 3)
        cs.readFolder()
        cs.writeFolder()

        testSet = [
            '/staf/amir/robotica/Brain/data/hog_test/cswrite/1.jpg',
            '/staf/amir/robotica/Brain/data/hog_test/cswrite/1320757889.jpg'
        ]
        trainingSet = [
            '/staf/amir/robotica/Brain/data/hog_test/cswrite/4.jpg',
            '/staf/amir/robotica/Brain/data/hog_test/cswrite/5.jpg'
        ]
        for i in range(3):
            cs.writeFolds(i, testSet, trainingSet)
            testList = os.listdir(
                '/staf/amir/robotica/Brain/data/hog_test/cswrite/' + str(i) +
                "/testset")
            trainingList = os.listdir(
                '/staf/amir/robotica/Brain/data/hog_test/cswrite/' + str(i) +
                "/trainingset")
            self.assertNotEqual(testList, [], "Test directory is empty")
            self.assertNotEqual(trainingList, [],
                                "Training directory is empty")
            for tests in testList:
                for trains in trainingList:
                    self.assertNotEqual(
                        tests, trains,
                        "The file in the test set is equal to the file in train set"
                    )
Ejemplo n.º 3
0
def get_test_dateset(dir, site='Y', win_size=5, is_context=False):
    input_data = TestData(dir,
                          site,
                          window_size=win_size,
                          is_context=is_context)
    ids, seqs, feature, label = input_data.getData()
    return ids, seqs, cross_validation.CrossValidation(
        feature, label), np.concatenate(label, axis=0)
Ejemplo n.º 4
0
 def test_writeFolder(self):
     cs = cross_validation.CrossValidation(self.raddress, self.waddress,
                                           self.k_fold)
     cs.readFolder()
     data = cs.writeFolder()
     self.assertEqual(
         9, data,
         "The number of files are not equal to the actual number of files in folders"
     )
Ejemplo n.º 5
0
    def test_readFolder(self):
        cs = cross_validation.CrossValidation(self.raddress, self.waddress,
                                              self.k_fold)
        data = cs.readFolder()

        self.assertEqual(
            len(data), 9,
            "The number of classes is not equal to the number of folders")
        self.assertEqual(
            os.path.isfile(os.path.join(self.raddress, 'data-label.dat')),
            True, "The data-label file is not written.")
Ejemplo n.º 6
0
 def test_dataShuffler(self):
     testData = [
         '/staf/amir/test/1.png', '/staf/amir/test/2.png',
         '/staf/amir/test/3.png', '/staf/amir/test/2.png',
         '/staf/amir/test/3.png', '/staf/amir/test/2.png',
         '/staf/amir/test/3.png'
     ]
     length = len(testData)
     cs = cross_validation.CrossValidation(self.raddress, self.waddress,
                                           self.k_fold)
     testShuffle = cs.dataShuffler(testData)
     self.assertEqual(length, len(testShuffle),
                      "The lenght of the input and output differs")
Ejemplo n.º 7
0
    def test_setFolds(self):
        testSliced = [
            ['/staf/amir/robotica/Brain/data/hog_test/cswrite/1.jpg'],
            ['/staf/amir/robotica/Brain/data/hog_test/cswrite/4.jpg'],
            ['/staf/amir/robotica/Brain/data/hog_test/cswrite/5.jpg']
        ]
        cs = cross_validation.CrossValidation(self.raddress, self.waddress, 3)
        cs.readFolder()
        cs.writeFolder()
        set = cs.setFolds(testSliced)

        for i in range(3):
            testList = os.listdir(
                '/staf/amir/robotica/Brain/data/hog_test/cswrite/' + str(i) +
                "/testset")
            trainingList = os.listdir(
                '/staf/amir/robotica/Brain/data/hog_test/cswrite/' + str(i) +
                "/trainingset")
            for tests in testList:
                for trains in trainingList:
                    self.assertNotEqual(
                        tests, trains,
                        "The file in the test set is equal to the file in train set"
                    )
Ejemplo n.º 8
0
 def test_dataOrganizer(self):
     cs = cross_validation.CrossValidation(self.raddress, self.waddress,
                                           self.k_fold)
     cs.readFolder()
     cs.writeFolder()
     cs.dataOrganizer()
Ejemplo n.º 9
0
else:
    x_source_tf = x_source
    transformed = False

# Scale data
scaler = StandardScaler()

scale_data = True
if scale_data:
    print 'scale data'
    scaler.fit(x_source_tf)
    x_source_tf = scaler.transform(x_source_tf)

# Cross validation
data_cv = np.hstack((ids, y_source, x_source_tf))
cross_validation = cv.CrossValidation(data_cv, 3)

# switch on/off if cross validation or test data prediction
cross_validate = True

if cross_validate:
    print 'Doing Cross Validation'

    param_manager = ParameterManager()

    myrange = [5 * x for x in range(1, 5)]

    # Define parameters here: (parameter_name, [parameter_values])
    parameter_settings = [('alpha', myrange),
                          ('layer_size', [(100, 100, 100, 100, 100, 100, 100,
                                           100), (50)]),
Ejemplo n.º 10
0
if __name__ == "__main__":
    if len(sys.argv) < NUM_ARGS + 1:
        print _usage()
        sys.exit(1)

    data_file = sys.argv[1]
    handle = open(data_file, 'r')
    handle.readline()
    csv_file = csv.reader(handle)
    data = []

    for line in csv_file:
        l = tuple(map(lambda x: float(x), line[0:2]))
        data.append(l)

    weight_kernel = 1
    weight_uniform = 1
    cv = cross_validation.CrossValidation(NUM_FOLDS, data, True)
    for i in xrange(NUM_FOLDS):
        N = cv.num_training_examples(i)
        data = cv.training_examples
        for t in cv.training_examples(i):
            k_log_likelihood = kernel_log_likelihood(t, i, data, N)
            u_log_likelihood = uniform_log_likelihood(t)
            if k_log_likelihood > u_log_likelihood:
                weight_kernel += 1
            else:
                weight_uniform += 1
            print weight_kernel, weight_uniform, k_log_likelihood, u_log_likelihood

Ejemplo n.º 11
0
            y_source = source_tf[:, 0].reshape(source_tf.shape[0], 1)

        else:
            x_source_tf = feature_transform(feature_vec, x_source)
    else:
        x_source_tf = x_source

else:
    x_source_tf = x_source

print ids.shape
print y_source.shape
print x_source_tf.shape
data_cv = np.hstack((ids, y_source, x_source_tf))

cross_validation = cv.CrossValidation(data_cv, int(20))

# lin_reg = lr.LinearRegression()

# cross validate over lamda in ridge regression
cross_validate = False

if cross_validate:
    print 'Doing Cross Validation'

    results = []
    scale = 0.02
    scale_h = 1.
    for i in range(10, 20, 1):
        print i
        for j in [1]:
Ejemplo n.º 12
0
    ga.evolve(freq_stats=1)

    print ga.bestIndividual()
    best_chromosome = ga.bestIndividual()
    return best_chromosome


if __name__ == "__main__":
    arguments = docopt(__doc__)
    method = arguments['<method>']
    bindres_file = arguments['<binding_residue_file>']
    pssms_file = arguments['<pssms_file>']
    log_file = arguments['<log_file>']
    output_file = arguments['<output_file>']
    crossValidation = cross_validation.CrossValidation(bindres_file,
                                                       pssms_file, log_file,
                                                       method)
    best_chromosome = run_ga(crossValidation)
    with open(output_file, "w") as fp:
        if crossValidation.method == "neuralNetwork":
            fp.write(
                "#method\tnode_num\tlearning_rate\twindow_size\tdecision_value\n"
            )
        elif crossValidation.method == "randomForest":
            fp.write(
                "#method\tn_estimators\tmax_features\twindow_size\tdecision_value\n"
            )
        elif crossValidation.method == "SVM":
            fp.write("#method\tcost\tgamma\twindow_size\tdecision_value\n")
        gene1, gene2, gene3 = crossValidation.decode_chromosome(
            best_chromosome)