def sparseAdaboost(svmfilepath="combinedsvm.dat",learners=100,factor=1): sparsedata, target = load_svmlight_file(svmfilepath) data = sparsedata.toarray() dataset = Bunch(data=data, target=target) inputsList = dataset.data.tolist() outputsList = dataset.target.tolist() posExamples = [] negExamples = [] for i, inputs in enumerate(inputsList): inputs.append(outputsList[i]) if outputsList[i] == 1: posExamples.append(inputs) elif outputsList[i] == -1: negExamples.append(inputs) trainSets,testSets = splitData(posExamples,negExamples,factor=factor) sum1 = sum([example[-1] for example in trainSets[0] if example[-1] == 1]) print 'oversampling by factor of', factor print sum1, ' positive ', len(trainSets[0]) - sum1, ' negative in trainsets' TP = TN = FP = FN = 0 for i in range(3): clf = AdaBoostClassifier(n_estimators=learners) testing = testSets[i] training = trainSets[i] trIn = [x[:-1] for x in training] trOut = [x[-1] for x in training] tsIn = [x[:-1] for x in testing] expected = [x[-1] for x in testing] clf.fit(trIn,trOut) actual = clf.predict(tsIn) actual = actual.tolist() tp, tn, fp, fn = PDneuralnets.confusionMatrix(expected, actual,neg=-1.0) TP += tp TN += tn FP += fp FN += fn print TP,TN,FP,FN results = PDneuralnets.evaluate(TP,TN,FP,FN) recall = results['r'] precision = results['p'] f = (2 * recall * precision) / (recall + precision) print "F SCORE", f print results return
def runAdaboost(dataset_name="Parkinsons",learners=1000): d = dataparser.DataSet(name=dataset_name) examples = [example[1:] for example in d.examples] n_samples = len(examples) n_features = len(examples[0])-1 data = np.empty((n_samples, n_features)) target = np.empty((n_samples,), dtype=np.int) for i, ir in enumerate(examples): data[i] = np.asarray(ir[:-1], dtype=np.float) target[i] = np.asarray(ir[-1], dtype=np.int) dataset = Bunch(data=data, target=target) # weak_learner = SVC(C=10000,kernel='poly',degree=1,probability=True) # clf = AdaBoostClassifier(base_estimator=weak_learner, n_estimators=learners) inputsList = dataset.data.tolist() outputsList = dataset.target.tolist() for i, inputs in enumerate(inputsList): inputs.append(outputsList[i]) k=10 random.shuffle(inputsList) buckets = [inputsList[i::k] for i in range(k)] TP = TN = FP = FN = 0 for i in range(k): clf = AdaBoostClassifier(n_estimators=learners) testing = buckets[i] training = buckets[0:i] + buckets[i+1:len(buckets)] training = [datum for bucket in training for datum in bucket] #flatten trIn = [x[:-1] for x in training] trOut = [x[-1] for x in training] tsIn = [x[:-1] for x in testing] expected = [x[-1] for x in testing] clf.fit(trIn, trOut) actual = clf.predict(tsIn) tp, tn, fp, fn = PDneuralnets.confusionMatrix(expected, actual) TP += tp TN += tn FP += fp FN += fn print TP,TN,FP,FN results = PDneuralnets.evaluate(TP,TN,FP,FN) print results return
def combinedAdaboost(dataset_name="Parkinsons",learners=100,factor=180): d = dataparser.DataSet(name=dataset_name) # examples = [example[1:] for example in d.examples] examples1, examples2 = combineDatasets(d, dataparser.PDregression) posExamples = [example for example in examples1 if example[-1] == 1] negExamples = [example for example in examples1 if example[-1] == 0] random.shuffle(examples2) trainSets,testSets = splitData(posExamples,negExamples,factor=factor,additional=examples2) print 'oversampling by factor of', factor sum1 = sum([example[-1] for example in trainSets[0]]) print sum1, ' positive ', len(trainSets[0]) - sum1, ' negative in trainsets' # weak_learner = SVC(C=10000,kernel='poly',degree=1,probability=True) # clf = AdaBoostClassifier(base_estimator=weak_learner, n_estimators=learners) clf = AdaBoostClassifier(n_estimators=learners) TP = TN = FP = FN = 0 for i in range(3): clf = AdaBoostClassifier(n_estimators=learners) training = trainSets[i] testing = testSets[i] trIn = [x[:-1] for x in training] trOut = [x[-1] for x in training] tsIn = [x[:-1] for x in testing] expected = [x[-1] for x in testing] clf.fit(trIn, trOut) actual = clf.predict(tsIn) tp, tn, fp, fn = PDneuralnets.confusionMatrix(expected, actual) TP += tp TN += tn FP += fp FN += fn print TP,TN,FP,FN results = PDneuralnets.evaluate(TP,TN,FP,FN) recall = results['r'] precision = results['p'] f = (2 * recall * precision) / (recall + precision) print "F SCORE", f print results return