def main(): print("We're going to do some regression now") # read data print("Reading the data from ../data/ ; run me from /src") fft_dict, fft_labels, ffts = utils.read_features(feature='fft') mfc_dict, mfc_labels, mfcs = utils.read_features(feature='mfc') # fit classifiers lrc_fft = LogisticRegressionClassifier(ffts, fft_labels, fft_dict) lrc_mfc = LogisticRegressionClassifier(mfcs, mfc_labels, mfc_dict) # cross validate print("training the first fft model with 10-fold CV") lrc_fft.cross_validate(k=10) # get high variance features and retrain print("extracting features from that model/the fft data") from sklearn.feature_selection import VarianceThreshold sel = VarianceThreshold(0.01150) fft_variance = sel.fit_transform(ffts) lr_fftvar = LogisticRegressionClassifier(fft_variance, fft_labels, fft_dict) print("training the first reduced fft model with 10-fold CV") lr.fftvar.cross_validate() # try with fscores print("training the mfcs with 10-fold CV") lrc_mfc.cross_validate(10)
def main(): # Get the arguments args = parse_arguments() # Get the training parameters class_size = args.class_size if args.class_size is not None else DEFAULT_CLASS_SIZE # Get the train and dev feature files train_files = args.train_files dev_files = args.dev_files # Create the list of models to train models = [] if args.svm: models.append(svm.SVM()) if args.mlp: models.append(mlp.MLP(10)) # If there no development files, perform cross-validation if dev_files == None: train_data, train_labels = utils.read_features(train_files) models = train_cross_validation(train_data, train_labels, models, class_size) # Otherwise use the development files else: train_data, train_labels = utils.read_features(train_files) dev_data, dev_labels = utils.read_features(dev_files) train(train_data, train_labels, dev_data, dev_labels)
def main(): """ Parsing command lines, creating target matrix, fitting BANE and saving the embedding. """ args = parameter_parser() tab_printer(args) P = read_graph(args) X = read_features(args) model = BANE(args, P, X) model.fit() model.save_embedding()
def __init__(self, args): """ Initializing the training object. :param args: Arguments parsed from command line. """ self.args = args self.graph = read_graph(self.args.edge_path) self.features = read_features(self.args.feature_path) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.initialize_model() self.simulate_walks()
def execute_factorization(): """ Reading the target matrix, running optimization and saving to hard drive. """ args = parameter_parser() tab_printer(args) X = read_features(args.input_path) print("\nTraining started.\n") model = ADMM_NMF(X, args) model.optimize() print("\nFactors saved.\n") model.save_user_factors() model.save_item_factors()
def main(): # Read the arguments args = parse_arguments() # Get the class size class_size = args.class_size if args.class_size is not None else DEFAULT_CLASS_SIZE # Read the features from the test files test_files = args.test_files # Ensure at least 1 test file is passed in if test_files is None: print 'Error. Please provide testing feature files' exit(1) test_data, test_labels = utils.read_features(test_files) test_data, test_labels, map = utils.partition(test_data, test_labels, class_size) # Read and load the model if args.svm: model = svm.SVM() model.load(args.model) if args.mlp: model = mlp.MLP(10) model.load(args.model) # Ensure a model was created if model is None: print 'Error. Model invalid' exit(1) # Test the model predictions = model.predict(test_data) accuracy = 1.0 * sum([ 1 for label, predict in zip(test_labels, predictions) if label == predict ]) / len(predictions) # Output results print 'Accuracy is: ', accuracy
def main(): # Read the arguments args = parse_arguments() # Get the class size class_size = args.class_size if args.class_size is not None else DEFAULT_CLASS_SIZE # Read the features from the test files test_files = args.test_files # Ensure at least 1 test file is passed in if test_files is None: print 'Error. Please provide testing feature files' exit(1) test_data, test_labels = utils.read_features(test_files) test_data, test_labels, map = utils.partition(test_data, test_labels, class_size) # Read and load the model if args.svm: model = svm.SVM() model.load(args.model) if args.mlp: model = mlp.MLP(10) model.load(args.model) # Ensure a model was created if model is None: print 'Error. Model invalid' exit(1) # Test the model predictions = model.predict(test_data) accuracy = 1.0*sum([1 for label, predict in zip(test_labels, predictions) if label == predict]) / len(predictions) # Output results print 'Accuracy is: ', accuracy
import naive_bayes_classifier as lib clf = lib.NaiveBayesClassifier("hi") # X=[[1,2],[3,4],[5,6]] # y=[1, 2, 3] # clf.train(X,y) # category = clf.predict([1,2,3,4,5,6]) # print category # import impl as nb import utils as utils import numpy as np path = '20news-bydate-matlab/matlab' features = utils.read_features("expanded.txt") label_array = utils.read_label(path, 'train.label') print len(label_array) answer_label_array = utils.read_label(path, 'test.label') test_features = utils.read_features("test_expanded.txt") vocab = utils.read_vocab("vocabulary.txt") #remove stop words from stop_words import get_stop_words stop_words = get_stop_words('en') # vocab = set(vocab)