Example #1
0
    def run(self, argv):       
        assert(self.data is not None)

        param_grid = {}
        results = []
        sum_acc = 0

        self.start_logging([self.kernel_type, 
                            str(int(self.randomize_data)), argv[0]])        
        
        i = 0
        while i < self.n_runs:
            sys.stdout.write('%i ' % i)
            sys.stdout.flush()

            if self.limit_sets:
                new_sets = make_d.merge_multiclasses(self.data, self.set1, self.set2)
                sets = make_d.make_set(new_sets, training_fraction=0.75)
            else:
                sets = make_d.make_set(self.data, training_fraction=0.75)
            train_y, train_x, test_y, test_x = sets

            if self.randomize_data:
                random.shuffle(train_y)
                random.shuffle(test_y)
                pass
        
            print [len(x) for x in sets]

            train_x = [make_d.encode(x, make_d.encode_dic) for x in train_x]
Example #2
0
    def run(self, argv):       
        assert(self.data is not None)

        encode_dic = {'A': [0], 'C': [1], 'G': [2], 'U': [3]}
        recog_rates = []

        
        self.start_logging(['RFOREST',
                            str(int(self.randomize_data)), argv[0]])        
        
        i = 0
        while i < self.n_runs:
            sys.stdout.write('%i ' % i)
            sys.stdout.flush()

            if self.limit_sets:
                new_sets = make_d.merge_multiclasses(self.data, 
                                                     self.set1, self.set2)
                sets = make_d.make_set(new_sets, training_fraction=1.0)
            else:
                sets = make_d.make_set(self.data, training_fraction=1.0)
            train_y, train_x, test_y, test_x = sets

            if self.randomize_data:
                random.shuffle(train_y)
                random.shuffle(test_y)
                pass
        
            print [len(x) for x in sets]

            train_x = [make_d.encode(x, encode_dic) for x in train_x]
Example #3
0
def main(argv):

    global C_RANGE
    global GAMMA_RANGE
    global SET1
    global SET2

    i = 0
    param_grid = {}
    results = []
    sum_acc = 0

    init(argv[1:])    
    print SET1, SET2

    fn = argv[0]
    dataset = make_d.read_data(open(fn))
    dataset = make_d.assign_classes(dataset)
    data = make_d.prepare_data(dataset)
    print data.keys(), [len(v) for v in data.values()]

    param = svm.svm_parameter('-b 1')
    if KERNEL_TYPE == 'LINEAR':
        param.kernel_type = svm.LINEAR
        GAMMA_RANGE = 1, 0, -2
    else:
        param.kernel_type = svm.RBF

    cvfunc = svmfun.leave_one_out
    n_cv = None

    limit_sets = not SET1 is None and not SET2 is None

    outfile = os.path.basename(fn)
    outfile = outfile.replace('.fasta', '')
    outfile = outfile.replace('.fas', '')
    if limit_sets:
        outfile = ''.join(map(str, map(int, SET1))) + 'vs'
        outfile += ''.join(map(str, map(int, SET2)))

    log_name = '%s-%s-%i-%s.csv' % (TIMESTAMP, 
                                    KERNEL_TYPE,
                                    int(RANDOMIZE_DATA),
                                    outfile)
    logfile = open(log_name, 'w')                                    

    while i < N_RUNS:
        sys.stdout.write('%i ' % i)
        sys.stdout.flush()

        if limit_sets:
            new_sets = make_d.merge_multiclasses(data, SET1, SET2)
            sets = make_d.make_set(new_sets, training_fraction=0.75)
        else:
            sets = make_d.make_set(data, training_fraction=0.75)
        train_y, train_x, test_y, test_x = sets

        if RANDOMIZE_DATA:
            random.shuffle(train_y)
            random.shuffle(test_y)
            pass
        
        print [len(x) for x in sets]