X = [ list(map(int, x.split(',')[:-1])) for x in open('covtype.data').read().splitlines()[:SIZE_DATA] ] _Y = [ x.split(',')[-1] for x in open('covtype.data').read().splitlines()[:SIZE_DATA] ] larg = largestClass(_Y) # treat the largest class as positive, the rest as negative Y = [1 if x == larg else -1 for x in _Y] xTrain, xTest, yTrain, yTest = cv.train_test_split(X, Y, train_size=5000 / len(X)) # In[2]: import Classifiers as clfs clfs.KNN(xTrain, xTest, yTrain, yTest) clfs.RandomForest(xTrain, xTest, yTrain, yTest) clfs.BoostedDecisionTree(xTrain, xTest, yTrain, yTest) clfs.NeuralNets(xTrain, xTest, yTrain, yTest) #clfs.SVM(xTrain, xTest, yTrain, yTest) clfs.linearSVC(xTrain, xTest, yTrain, yTest) import Classifiers as clfs clfs.XGBoost(xTrain, xTest, yTrain, yTest) # In[ ]:
if sys.argv[1] == "lr": Traindata, TrainLabels, testdata, testlabels = convert_to_tfidf( Traindata, TrainLabels, testdata, testlabels) clf.lr(Traindata, TrainLabels, testdata, testlabels) if sys.argv[1] == "svm": Traindata, TrainLabels, testdata, testlabels = convert_to_tfidf( Traindata, TrainLabels, testdata, testlabels) clf.SVM_predict(Traindata, TrainLabels, testdata, testlabels) if sys.argv[1] == "rfc": Traindata, TrainLabels, testdata, testlabels = convert_to_tfidf( Traindata, TrainLabels, testdata, testlabels) clf.randomforest_predict(Traindata, TrainLabels, testdata, testlabels) if sys.argv[1] == "gbc": Traindata, TrainLabels, testdata, testlabels = convert_to_tfidf( Traindata, TrainLabels, testdata, testlabels) clf.XGBoost(Traindata, TrainLabels, testdata, testlabels) if sys.argv[1] == "abc": Traindata, TrainLabels, testdata, testlabels = convert_to_tfidf( Traindata, TrainLabels, testdata, testlabels) clf.ADABoost(Traindata, TrainLabels, testdata, testlabels) if sys.argv[1] == "nn": Traindata, TrainLabels, testdata, testlabels = convert_to_tfidf( Traindata, TrainLabels, testdata, testlabels) clf.NN(Traindata, TrainLabels, testdata, testlabels) if sys.argv[1] == "dt": Traindata, TrainLabels, testdata, testlabels = convert_to_tfidf( Traindata, TrainLabels, testdata, testlabels) clf.Decision_tree(Traindata, TrainLabels, testdata, testlabels) if sys.argv[1] == "automl": Traindata, TrainLabels, testdata, testlabels = convert_to_tfidf( Traindata, TrainLabels, testdata, testlabels)