def run(self, argv): assert(self.data is not None) param_grid = {} results = [] sum_acc = 0 self.start_logging([self.kernel_type, str(int(self.randomize_data)), argv[0]]) i = 0 while i < self.n_runs: sys.stdout.write('%i ' % i) sys.stdout.flush() if self.limit_sets: new_sets = make_d.merge_multiclasses(self.data, self.set1, self.set2) sets = make_d.make_set(new_sets, training_fraction=0.75) else: sets = make_d.make_set(self.data, training_fraction=0.75) train_y, train_x, test_y, test_x = sets if self.randomize_data: random.shuffle(train_y) random.shuffle(test_y) pass print [len(x) for x in sets] train_x = [make_d.encode(x, make_d.encode_dic) for x in train_x]
def run(self, argv): assert(self.data is not None) encode_dic = {'A': [0], 'C': [1], 'G': [2], 'U': [3]} recog_rates = [] self.start_logging(['RFOREST', str(int(self.randomize_data)), argv[0]]) i = 0 while i < self.n_runs: sys.stdout.write('%i ' % i) sys.stdout.flush() if self.limit_sets: new_sets = make_d.merge_multiclasses(self.data, self.set1, self.set2) sets = make_d.make_set(new_sets, training_fraction=1.0) else: sets = make_d.make_set(self.data, training_fraction=1.0) train_y, train_x, test_y, test_x = sets if self.randomize_data: random.shuffle(train_y) random.shuffle(test_y) pass print [len(x) for x in sets] train_x = [make_d.encode(x, encode_dic) for x in train_x]
def main(argv): global C_RANGE global GAMMA_RANGE global SET1 global SET2 i = 0 param_grid = {} results = [] sum_acc = 0 init(argv[1:]) print SET1, SET2 fn = argv[0] dataset = make_d.read_data(open(fn)) dataset = make_d.assign_classes(dataset) data = make_d.prepare_data(dataset) print data.keys(), [len(v) for v in data.values()] param = svm.svm_parameter('-b 1') if KERNEL_TYPE == 'LINEAR': param.kernel_type = svm.LINEAR GAMMA_RANGE = 1, 0, -2 else: param.kernel_type = svm.RBF cvfunc = svmfun.leave_one_out n_cv = None limit_sets = not SET1 is None and not SET2 is None outfile = os.path.basename(fn) outfile = outfile.replace('.fasta', '') outfile = outfile.replace('.fas', '') if limit_sets: outfile = ''.join(map(str, map(int, SET1))) + 'vs' outfile += ''.join(map(str, map(int, SET2))) log_name = '%s-%s-%i-%s.csv' % (TIMESTAMP, KERNEL_TYPE, int(RANDOMIZE_DATA), outfile) logfile = open(log_name, 'w') while i < N_RUNS: sys.stdout.write('%i ' % i) sys.stdout.flush() if limit_sets: new_sets = make_d.merge_multiclasses(data, SET1, SET2) sets = make_d.make_set(new_sets, training_fraction=0.75) else: sets = make_d.make_set(data, training_fraction=0.75) train_y, train_x, test_y, test_x = sets if RANDOMIZE_DATA: random.shuffle(train_y) random.shuffle(test_y) pass print [len(x) for x in sets]