Beispiel #1
0
def train():
    from judgments import judgments_from_file, judgments_by_qid

    es = elastic_connection(timeout=1000)
    # Load features into Elasticsearch
    init_default_store()
    load_features(FEATURE_SET_NAME)
    # Parse a judgments
    movieJudgments = judgments_by_qid(
        judgments_from_file(filename=JUDGMENTS_FILE))
    # Use proposed Elasticsearch queries (1.json.jinja ... N.json.jinja) to generate a training set
    # output as "sample_judgments_wfeatures.txt"
    log_features(es, judgments_dict=movieJudgments, search_index=INDEX_NAME)
    build_features_judgments_file(movieJudgments,
                                  filename=JUDGMENTS_FILE_FEATURES)
    # Train each ranklib model type
    for modelType in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
        # 0, MART
        # 1, RankNet
        # 2, RankBoost
        # 3, AdaRank
        # 4, coord Ascent
        # 6, LambdaMART
        # 7, ListNET
        # 8, Random Forests
        # 9, Linear Regression
        Logger.logger.info("*** Training %s " % modelType)
        train_model(judgments_with_features_file=JUDGMENTS_FILE_FEATURES,
                    model_output='model.txt',
                    which_model=modelType)
        save_model(script_name="gsearch_model_%s" % modelType,
                   feature_set=FEATURE_SET_NAME,
                   model_fname='model.txt')
Beispiel #2
0
def gogo_kpca(fxpath, mpath):

    kpca_params = {
        'n_components': 256,
        'kernel': 'rbf',
        'gamma': None,
        'degree': 3,
        'coef0': 1,
        'kernel_params': None,
        'alpha': 1.0,
        'fit_inverse_transform': False,
        'eigen_solver': 'auto',
        'tol': 0,
        'max_iter': None,
        'remove_zero_eig': True
    }

    kpca_fname = '%s/kpca_rbf_{0}_{1}.pkl' % mpath

    for i in range(7):
        if i < 5:
            nbreed = 1
            sbreed = 'dog'
            nsubject = i + 1
        else:
            nbreed = 2
            sbreed = 'human'
            nsubject = 1 + abs(5 - i)

        print('breed%d.subject%d..' % (nbreed, nsubject))

        X_ictal = load_features(fxpath, nbreed, nsubject, 1)
        X_inter = load_features(fxpath, nbreed, nsubject, 2)

        X = vstack((X_inter, X_ictal))
        del X_inter, X_ictal
        gc.collect()

        X_test = load_features(fxpath, nbreed, nsubject, 3)

        X = vstack((X, X_test))
        del X_test
        gc.collect()

        kpca = KernelPCA(**kpca_params)
        skip_interval = get_skip_interval(X)
        X = kpca_preprocess_features(X)
        kpca.fit(X[::skip_interval])
        with open(kpca_fname.format(sbreed, nsubject), 'wb') as f:
            pickle.dump(kpca, f)

        del X, kpca
        gc.collect()
def gogo_kpca( fxpath, mpath ):
    
    kpca_params = {'n_components':256,
                   'kernel':'rbf',
                   'gamma':None,
                   'degree':3,
                   'coef0':1,
                   'kernel_params':None,
                   'alpha':1.0,
                   'fit_inverse_transform':False,
                   'eigen_solver':'auto',
                   'tol':0,
                   'max_iter':None,
                   'remove_zero_eig':True}

    kpca_fname = '%s/kpca_rbf_{0}_{1}.pkl' % mpath

    for i in range(7):
        if i < 5:
            nbreed = 1
            sbreed = 'dog'
            nsubject = i+1
        else:
            nbreed = 2
            sbreed = 'human'
            nsubject = 1 + abs(5-i)

        print 'breed%d.subject%d..' % ( nbreed, nsubject )

        X_ictal = load_features( fxpath, nbreed, nsubject, 1 )
        X_inter = load_features( fxpath, nbreed, nsubject, 2 )

        X = vstack((X_inter, X_ictal))
        del X_inter, X_ictal; gc.collect()

        X_test = load_features( fxpath, nbreed, nsubject, 3 )
    
        X = vstack((X, X_test))
        del X_test; gc.collect()
    
        kpca = KernelPCA(**kpca_params)
        skip_interval = get_skip_interval(X)
        X = kpca_preprocess_features(X)
        kpca.fit(X[::skip_interval])
        with open(kpca_fname.format(sbreed,nsubject),'wb') as f:
            cPickle.dump(kpca,f)

        del X, kpca; gc.collect()
Beispiel #4
0
def main():

    data = load_data()
    features_cnt,label_features = load_features.load_features(data["inputFile"])
    #for label in label_features:
     #   label_features[label] = random.shuffle(label_features[label])
        
    training_data = {}
    for label in label_features:
        training_data.update({label:label_features[label][:data["traininSamplesCnt"]]})

    testing_data = {}
    for label in label_features:
        testing_data.update({label:label_features[label][data["traininSamplesCnt"]:]})
    
    network = NeuralNetwork(features_cnt,
                            data["hiddenLayers"]["cnt"],
                            data["hiddenLayers"]["layersNeuronsCnt"],
                            data["hiddenLayers"]["layersActivFns"],
                            data["outputLayer"]["neorunsCnt"],
                            data["outputLayer"]["activFn"],
                            data["withBias"])

    model,loss_curve = network.train(training_data,
                                data["eta"],
                                data["epochsNo"],
                                data["stopMSE"],
                                data["MSE"])

    

    draw_result.draw_training(loss_curve)

    x = network.test(testing_data,data["outputLayer"]["neorunsCnt"])*100
    print "Accuracy: %f%%"%x
Beispiel #5
0
def training_pipeline():
    from utils import elastic_connection
    es = elastic_connection()
    file_judgments = parse_data_and_get_judgement()
    print(file_judgments)
    init_default_store()
    load_features(FEATURE_SET_NAME)
    log_features(es, judgments_dict=file_judgments, search_index=INDEX_NAME)
    build_features_judgments_file(file_judgments,
                                  filename=JUDGMENTS_FILE_FEATURES)

    for modelType in [6, 7, 9]:
        # for modelType in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
        Logger.logger.info("*** Training %s " % modelType)
        train_model(judgments_with_features_file=JUDGMENTS_FILE_FEATURES,
                    model_output='model.txt',
                    which_model=modelType)
        save_model(script_name="test_%s" % modelType,
                   feature_set=FEATURE_SET_NAME,
                   model_fname='model.txt')
Beispiel #6
0
def upload_model():
    init_default_store()
    load_features(FEATURE_SET_NAME)
    save_model(script_name="test_9",
               feature_set=FEATURE_SET_NAME,
               model_fname='model.txt')
Beispiel #7
0
                             data=json.dumps(model_payload),
                             headers=head,
                             auth=ES_AUTH,
                             verify=False)
        Logger.logger.info(resp.status_code)
        if resp.status_code >= 300:
            Logger.logger.error(resp.text)


if __name__ == "__main__":
    from judgments import judgments_from_file, judgments_by_qid

    es = elastic_connection(timeout=1000)
    # Load features into Elasticsearch
    init_default_store()
    load_features(FEATURE_SET_NAME)
    # Parse a judgments
    movieJudgments = judgments_by_qid(
        judgments_from_file(filename=JUDGMENTS_FILE))
    # Use proposed Elasticsearch queries (1.json.jinja ... N.json.jinja) to generate a training set
    # output as "sample_judgments_wfeatures.txt"
    log_features(es, judgments_dict=movieJudgments, search_index=INDEX_NAME)
    build_features_judgments_file(movieJudgments,
                                  filename=JUDGMENTS_FILE_FEATURES)
    # Train each ranklib model type
    #for modelType in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
    for modelType in [6]:
        # 0, MART
        # 1, RankNet
        # 2, RankBoost
        # 3, AdaRank
# to change when small data: n_train and n_test in utils.py, n_components in fisher_feature_extractor.py
folder_name = 'data/'
# folder_name = 'data_small/'

nclasses = 10
classifier = 'svm_ovo'
do_validation = True
validation = 0.2
do_prediction = False

svm_kernel = LinearKernel()
#svm_kernel = LaplacianRBFKernel(1.6)
C = 1

Xtrain, Ytrain, Xtest = load_features(feature_extractor, overwrite_features,
                                      overwrite_kpca, kernel_pca,
                                      kernel_pca_kernel, cut_percentage,
                                      folder_name)
#Xtrain = numpy.reshape(Xtrain, (Xtrain.shape[0], -1))
#Xtest = numpy.reshape(Xtest, (Xtest.shape[0], -1))
print(Xtrain.shape)
print(Xtest.shape)
assert Xtrain.ndim == 2 and Xtrain.shape[1] == Xtest.shape[1]

print("Fitting on training data")
if classifier == 'cross_entropy':
    Xtrain = concat_bias(Xtrain)
    Xtest = concat_bias(Xtest)

    model = CrossEntropyClassifier(nclasses)
    iterations = 500
    lr = 0.01
Beispiel #9
0
def gogo_bagged_svm( fxpath, mpath, spath ):

    transform = True

    svc_params = {'penalty':'l2',
                  'loss':'l2', 
                  'dual':False,
                  'C':33.0, 
                  'intercept_scaling':1e4, 
                  'class_weight':'auto',
                  'random_state':42}

    bc_params = {'base_estimator':LinearSVC(**svc_params),
                 'n_estimators':96, 
                 'max_samples':0.1, 
                 'max_features':0.8,  
                 'oob_score':False,
                 
                 # if you have tons of memory (i.e. 32gb ram + 32gb swap)
                 #  incresaing this parameter may help performance.  else,
                 #  increasing it may cause "out of memory" errors.
                 'n_jobs':1,
                 #'n_jobs':8,

                 'verbose':1,
                 'random_state':42}

    '''
    lr_params = {'C':1e6,#tr(-3,3,7),
                 'penalty':'l2',
                 'class_weight':'auto',
                 'intercept_scaling':1e6}#tr(-1,6,7)}
    '''

    preds = []

    kpca_fname = '%s/kpca_rbf_{0}_{1}.pkl' % mpath
    s_fname = '%s/kpca_linear_svm{0}_{1}_preds.csv' % spath

    for i in range(7):
        if i < 5:
            nbreed = 1
            sbreed = 'dog'
            nsubject = i+1
        else:
            nbreed = 2
            sbreed = 'human'
            nsubject = 1 + abs(5-i)

        print('breed%d.subject%d..' % ( nbreed, nsubject ))

        X_ictal = load_features( fxpath, nbreed, nsubject, 1 )
        X_inter = load_features( fxpath, nbreed, nsubject, 2 )
    
        X_train = vstack((X_inter, X_ictal))
        Y = [0 for x in X_inter] + [1 for x in X_ictal]
        wi = 1.0/len(X_inter) * 1000
        wp = 1.0/len(X_ictal) * 1000
        W = array([wp if y else wi for y in Y])
    
        del X_inter, X_ictal; gc.collect()
    
        with open(kpca_fname.format(sbreed,nsubject),'rb') as f:
            kpca = pickle.load(f)
    
        if transform:   
            X_train = kpca_preprocess_features(X_train)
            X_train = kpca_incremental_transform(kpca,X_train)
            gc.collect()
    
        X_test = load_features( fxpath, nbreed, nsubject, 3 )
        if transform:
            X_test = kpca_preprocess_features(X_test)
            X_test = kpca_incremental_transform(kpca,X_test)
            gc.collect()
            
        bc = BC(**bc_params)
        bc.fit(X_train,Y)

        #print 'oob_score: ', bc.oob_score_
        subject_preds = bc.predict_proba(X_test)[:,1]

        preds.append(subject_preds)
        subject_preds = pd.DataFrame(subject_preds)

        subject_preds.to_csv(s_fname.format(sbreed,nsubject),index=False,header=None)    

        del X_train, X_test; gc.collect()
        sys.stdout.flush()
def gogo_bagged_svm(fxpath, mpath, spath):

    transform = True

    svc_params = {
        "penalty": "l2",
        "loss": "l2",
        "dual": False,
        "C": 33.0,
        "intercept_scaling": 1e4,
        "class_weight": "auto",
        "random_state": 42,
    }

    bc_params = {
        "base_estimator": LinearSVC(**svc_params),
        "n_estimators": 96,
        "max_samples": 0.1,
        "max_features": 0.8,
        "oob_score": False,
        # if you have tons of memory (i.e. 32gb ram + 32gb swap)
        #  incresaing this parameter may help performance.  else,
        #  increasing it may cause "out of memory" errors.
        "n_jobs": 1,
        #'n_jobs':8,
        "verbose": 1,
        "random_state": 42,
    }

    """
    lr_params = {'C':1e6,#tr(-3,3,7),
                 'penalty':'l2',
                 'class_weight':'auto',
                 'intercept_scaling':1e6}#tr(-1,6,7)}
    """

    preds = []

    kpca_fname = "%s/kpca_rbf_{0}_{1}.pkl" % mpath
    s_fname = "%s/kpca_linear_svm{0}_{1}_preds.csv" % spath

    for i in range(7):
        if i < 5:
            nbreed = 1
            sbreed = "dog"
            nsubject = i + 1
        else:
            nbreed = 2
            sbreed = "human"
            nsubject = 1 + abs(5 - i)

        print "breed%d.subject%d.." % (nbreed, nsubject)

        X_ictal = load_features(fxpath, nbreed, nsubject, 1)
        X_inter = load_features(fxpath, nbreed, nsubject, 2)

        X_train = vstack((X_inter, X_ictal))
        Y = [0 for x in X_inter] + [1 for x in X_ictal]
        wi = 1.0 / len(X_inter) * 1000
        wp = 1.0 / len(X_ictal) * 1000
        W = array([wp if y else wi for y in Y])

        del X_inter, X_ictal
        gc.collect()

        with open(kpca_fname.format(sbreed, nsubject), "rb") as f:
            kpca = cPickle.load(f)

        if transform:
            X_train = kpca_preprocess_features(X_train)
            X_train = kpca_incremental_transform(kpca, X_train)
            gc.collect()

        X_test = load_features(fxpath, nbreed, nsubject, 3)
        if transform:
            X_test = kpca_preprocess_features(X_test)
            X_test = kpca_incremental_transform(kpca, X_test)
            gc.collect()

        bc = BC(**bc_params)
        bc.fit(X_train, Y)

        # print 'oob_score: ', bc.oob_score_
        subject_preds = bc.predict_proba(X_test)[:, 1]

        preds.append(subject_preds)
        subject_preds = pd.DataFrame(subject_preds)

        subject_preds.to_csv(s_fname.format(sbreed, nsubject), index=False, header=None)

        del X_train, X_test
        gc.collect()
        sys.stdout.flush()