コード例 #1
0
def main():
    print("We're going to do some regression now")
    # read data
    print("Reading the data from ../data/ ; run me from /src")
    fft_dict, fft_labels, ffts = utils.read_features(feature='fft')
    mfc_dict, mfc_labels, mfcs = utils.read_features(feature='mfc')
    # fit classifiers
    lrc_fft = LogisticRegressionClassifier(ffts, fft_labels, fft_dict)
    lrc_mfc = LogisticRegressionClassifier(mfcs, mfc_labels, mfc_dict)
    # cross validate
    print("training the first fft model with 10-fold CV")
    lrc_fft.cross_validate(k=10)
    # get high variance features and retrain
    print("extracting features from that model/the fft data")
    from sklearn.feature_selection import VarianceThreshold
    sel = VarianceThreshold(0.01150)
    fft_variance = sel.fit_transform(ffts)
    lr_fftvar = LogisticRegressionClassifier(fft_variance, fft_labels,
                                             fft_dict)
    print("training the first reduced fft model with 10-fold CV")
    lr.fftvar.cross_validate()

    # try with fscores
    print("training the mfcs with 10-fold CV")
    lrc_mfc.cross_validate(10)
コード例 #2
0
ファイル: train.py プロジェクト: javierlores/CloudID
def main():
    # Get the arguments
    args = parse_arguments()

    # Get the training parameters
    class_size = args.class_size if args.class_size is not None else DEFAULT_CLASS_SIZE

    # Get the train and dev feature files
    train_files = args.train_files
    dev_files = args.dev_files

    # Create the list of models to train
    models = []
    if args.svm: models.append(svm.SVM())
    if args.mlp: models.append(mlp.MLP(10))

    # If there no development files, perform cross-validation
    if dev_files == None:
        train_data, train_labels = utils.read_features(train_files)

        models = train_cross_validation(train_data, train_labels, models,
                                        class_size)
    # Otherwise use the development files
    else:
        train_data, train_labels = utils.read_features(train_files)
        dev_data, dev_labels = utils.read_features(dev_files)

        train(train_data, train_labels, dev_data, dev_labels)
コード例 #3
0
ファイル: train.py プロジェクト: javierlores/CloudID
def main():
    # Get the arguments
    args = parse_arguments()

    # Get the training parameters
    class_size = args.class_size if args.class_size is not None else DEFAULT_CLASS_SIZE
 
    # Get the train and dev feature files
    train_files = args.train_files
    dev_files = args.dev_files

    # Create the list of models to train
    models = []
    if args.svm: models.append(svm.SVM())
    if args.mlp: models.append(mlp.MLP(10))

    # If there no development files, perform cross-validation
    if dev_files == None:
        train_data, train_labels = utils.read_features(train_files)

        models = train_cross_validation(train_data, train_labels, models, class_size)
    # Otherwise use the development files
    else:
        train_data, train_labels = utils.read_features(train_files)
        dev_data, dev_labels = utils.read_features(dev_files)

        train(train_data, train_labels, dev_data, dev_labels)
コード例 #4
0
ファイル: main.py プロジェクト: mraihan19/BANE
def main():
    """
    Parsing command lines, creating target matrix, fitting BANE and saving the embedding.
    """
    args = parameter_parser()
    tab_printer(args)
    P = read_graph(args)
    X = read_features(args)
    model = BANE(args, P, X)
    model.fit()
    model.save_embedding()
コード例 #5
0
ファイル: sine.py プロジェクト: mosaddek-hossain/SINE
 def __init__(self, args):
     """
     Initializing the training object.
     :param args: Arguments parsed from command line.
     """
     self.args = args
     self.graph = read_graph(self.args.edge_path)
     self.features = read_features(self.args.feature_path)
     self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     self.initialize_model()
     self.simulate_walks()
コード例 #6
0
ファイル: main.py プロジェクト: oliveai/NMFADMM
def execute_factorization():
    """
    Reading the target matrix, running optimization and saving to hard drive.
    """
    args = parameter_parser()
    tab_printer(args)
    X = read_features(args.input_path)
    print("\nTraining started.\n")
    model = ADMM_NMF(X, args)
    model.optimize()
    print("\nFactors saved.\n")
    model.save_user_factors()
    model.save_item_factors()
コード例 #7
0
def main():
    # Read the arguments
    args = parse_arguments()

    # Get the class size
    class_size = args.class_size if args.class_size is not None else DEFAULT_CLASS_SIZE

    # Read the features from the test files
    test_files = args.test_files

    # Ensure at least 1 test file is passed in
    if test_files is None:
        print 'Error. Please provide testing feature files'
        exit(1)

    test_data, test_labels = utils.read_features(test_files)
    test_data, test_labels, map = utils.partition(test_data, test_labels,
                                                  class_size)

    # Read and load the model
    if args.svm:
        model = svm.SVM()
        model.load(args.model)
    if args.mlp:
        model = mlp.MLP(10)
        model.load(args.model)

    # Ensure a model was created
    if model is None:
        print 'Error. Model invalid'
        exit(1)

    # Test the model
    predictions = model.predict(test_data)
    accuracy = 1.0 * sum([
        1
        for label, predict in zip(test_labels, predictions) if label == predict
    ]) / len(predictions)

    # Output results
    print 'Accuracy is: ', accuracy
コード例 #8
0
ファイル: test.py プロジェクト: javierlores/CloudID
def main():
    # Read the arguments
    args = parse_arguments()

    # Get the class size
    class_size = args.class_size if args.class_size is not None else DEFAULT_CLASS_SIZE

    # Read the features from the test files
    test_files = args.test_files

    # Ensure at least 1 test file is passed in
    if test_files is None:
        print 'Error. Please provide testing feature files'
        exit(1)

    test_data, test_labels = utils.read_features(test_files)
    test_data, test_labels, map = utils.partition(test_data, test_labels, class_size)

    # Read and load the model
    if args.svm:
        model = svm.SVM()
        model.load(args.model)
    if args.mlp:
        model = mlp.MLP(10)
        model.load(args.model)

    # Ensure a model was created
    if model is None:
        print 'Error. Model invalid'
        exit(1)

    # Test the model
    predictions = model.predict(test_data)
    accuracy = 1.0*sum([1 for label, predict in zip(test_labels, predictions) if label == predict]) / len(predictions)

    # Output results
    print 'Accuracy is: ', accuracy
コード例 #9
0
ファイル: runner.py プロジェクト: eggie5/UCSD-MAS-DSE210
import naive_bayes_classifier as lib

clf =  lib.NaiveBayesClassifier("hi")
# X=[[1,2],[3,4],[5,6]]
# y=[1, 2, 3]
# clf.train(X,y)
# category = clf.predict([1,2,3,4,5,6])
# print category

# import impl as nb
import utils as utils
import numpy as np


path = '20news-bydate-matlab/matlab'
features = utils.read_features("expanded.txt")
label_array = utils.read_label(path, 'train.label')
print len(label_array)




answer_label_array = utils.read_label(path, 'test.label')
test_features = utils.read_features("test_expanded.txt")

vocab = utils.read_vocab("vocabulary.txt")

#remove stop words
from stop_words import get_stop_words
stop_words = get_stop_words('en')
# vocab = set(vocab)