Python trainの例、classifier.train Pythonの例

コード例 #1

0

ファイルを表示

ファイル: main.py プロジェクト: nathankjer/classify-characters

def main():

	data_folder = 'data/sources/wikipedia'
	models_folder = 'classifier/models'
	save_loc = '/usr/share/nginx/html/wiki'

	if not os.path.exists(data_folder):
		os.makedirs(data_folder)

	if next(os.walk(data_folder))[1]:
		
		retrain = True
		if retrain:
			input, target, classes = data.sample(data_folder)
			model = classifier.build(input.shape, target.shape)
			classifier.train(model, input, target)
			classifier.save(models_folder, model, classes)

		else:
			model, classes = classifier.load(models_folder, sorted(os.listdir(models_folder))[-1])
		
		for root, dirs, files in os.walk(data_folder):
			for file in files:
				if not file.startswith('.'):
					with open(root+'/'+file) as f:
						input = data.str2mat(f.read())
						output = classifier.run(model, input)
						data.backtest(save_loc+'/'+file, classes, input, output)
	else:
		print("""\nNo data found.\nPut subfolders of files by class, within the 'data' folder.""")

コード例 #2

0

ファイルを表示

def perform_experiment(percentages, model_config, loader_config, method):
    accuracy_list = []

    for percentage in percentages:
        # print(f"Training of model based on {percentage*100}% deletion of pixels.")

        model_config.set_model('VGG-11')
        model_config.set_optimizer()

        data_dir = f'dataset/roar_{method}/'
        datasetname = f'cifar-{model_config.num_classes}-{percentage*100}%-removed/'

        percentage_loader = DataLoaderConfiguration(path=loader_config.path, data_dir=data_dir, datasetname=datasetname)

        model_config.model_dir = f'saved-models/VGG-11-ROAR-{method}-{percentage*100}.pth'

        if not os.path.exists(model_config.model_dir):
            print(f"Model for {percentage*100}% will be trained now.")
            train(model_config, percentage_loader)
        else:
            model_config.load_model()

        eval_accuracy = parse_epoch(percentage_loader.testloader, model_config.model, model_config.optimizer, model_config.criterion, model_config.device, train=False)
        accuracy_list.append(eval_accuracy)
        
        # print("Eval accur:", eval_accuracy)
        # print("----------------------------------------------")

    return accuracy_list

コード例 #3

0

ファイルを表示

def experiment(model_config, loader_config, percentages = [0.1, 0.3, 0.5, 0.7, 0.9]):
    if not os.path.exists(model_config.model_dir):
        print("Cifar-10 model will be trained which is used for data preparation.")
        train(model_config, loader_config)
        

    If adjusted data is not created, create it. 
    if not os.path.exists(loader_config.path + 'dataset/roar_full_grad/'):
        print("The data for full grad is not found in dataset/roar_full_grad")
        print("Creating it can take a long time, please abort this run and download it from github")
        create_data(percentages, model_config, loader_config, salience_method="full_grad")

    if not os.path.exists(loader_config.path + 'dataset/roar_input_grad/'):
        print("The data for input grad is not found in dataset/roar_input_grad")
        print("Creating it can take a long time, please abort this run and download it from github")
        create_data(percentages, model_config, loader_config, salience_method="input_grad")

    if not os.path.exists(loader_config.path + 'dataset/roar_random/'):
        print("The data for random is not found in dataset/roar_random")
        print("Creating it can take a long time, please abort this run and download it from github")
        create_data(percentages, model_config, loader_config, salience_method="random")

    # Train model based on certrain adjusted data
    accuracy_list = []
    accuracy_list.append(perform_experiment(percentages, model_config, loader_config, "full_grad"))
    accuracy_list.append(perform_experiment(percentages, model_config, loader_config, "input_grad"))
    accuracy_list.append(perform_experiment(percentages, model_config, loader_config, "random"))
    return accuracy_list

コード例 #4

0

ファイルを表示

def main():
    if len(sys.argv) != 1:
        usage()

    net = sys.argv[1]
    if net == "classifier":
        classifier.train()
    else:
        print("unknown net")

コード例 #5

0

ファイルを表示

def evaluate(train_set, test_set, classifier):
    classifier.train(train_set)
    predictions_list = classifier.predict(test_set)
    acc = get_acc(test_set, predictions_list)
    sens = get_sensivity(test_set, predictions_list)
    spec = get_specifity(test_set, predictions_list)
    prec = get_precision(test_set, predictions_list)
    fmeas = get_fmeas(test_set, predictions_list)
    return acc, sens, spec, prec, fmeas

コード例 #6

0

ファイルを表示

    def add_buttons(self):
#        self.add_button('Deploy', self.OnDeploy)
#        self.add_button('Save Team', self.OnSave1)
#        self.add_button('Load Team', self.OnLoad1)
        self.add_button('First Person Agent', self.OnFPS)
        self.add_button('Snapshot', self.OnSnapshot)
        from classifier import ObjectClassifier
        classifier = ObjectClassifier()
        classifier.train()

コード例 #7

0

ファイルを表示

def retrain(model_folder_name, model_type):
    model_dir = os.path.join(Globals.model_path, model_folder_name)
    processed_dir = os.path.join(model_dir, "data")

    classifier.train(data_dir=processed_dir,
                     session=MyGraph(),
                     classifier_filename=os.path.join(model_dir,
                                                      "classifier.pkl"),
                     model_type=model_type)

    return True, ""

コード例 #8

0

ファイルを表示

def main(transductive: bool = False):
    try:
        from classifier import pretrain
    except ImportError:
        part2xy = load_dataset_fast('FILIMDB', parts=SCORED_PARTS)
        train_ids, train_texts, train_labels = part2xy['train']
        print('\nTraining classifier on %d examples from train set ...' %
              len(train_texts))
        st = time()
        params = train(train_texts, train_labels)
        print('Classifier trained in %.2fs' % (time() - st))
    else:
        part2xy = load_dataset_fast('FILIMDB',
                                    parts=SCORED_PARTS + ('train_unlabeled', ))
        train_ids, train_texts, train_labels = part2xy['train']
        _, train_unlabeled_texts, _ = part2xy['train_unlabeled']

        st = time()

        if transductive:
            all_texts = list(text for _, text, _ in part2xy.values())
        else:
            all_texts = [train_texts, train_unlabeled_texts]

        total_texts = sum(len(text) for text in all_texts)
        print('\nPretraining classifier on %d examples' % total_texts)
        params = pretrain(all_texts)
        print('Classifier pretrained in %.2fs' % (time() - st))
        print('\nTraining classifier on %d examples from train set ...' %
              len(train_texts))
        st = time()
        params = train(train_texts, train_labels, params)
        print('Classifier trained in %.2fs' % (time() - st))
        del part2xy["train_unlabeled"]

    allpreds = []
    for part, (ids, x, y) in part2xy.items():
        print('\nClassifying %s set with %d examples ...' % (part, len(x)))
        st = time()
        preds = classify(x, params)
        print('%s set classified in %.2fs' % (part, time() - st))
        allpreds.extend(zip(ids, preds))

        if y is None:
            print('no labels for %s set' % part)
        else:
            score(preds, y)

    save_preds(allpreds, preds_fname=PREDS_FNAME)
    print('\nChecking saved predictions ...')
    score_preds(preds_fname=PREDS_FNAME, data_dir='FILIMDB')

コード例 #9

0

ファイルを表示

ファイル: testing.py プロジェクト: Offroads/Group-Assignment

def test_classify_test_dataset():
    """
	This will attempt to classify the test dataset
	"""
    start_time = time.time()
    classifier.train()
    number_of_reviews = classifier.negative_review_count + classifier.positive_review_count
    results = classifier.predict_reviews()
    print(results)
    print(
        str(results["correct_predictions"] / number_of_reviews * 100) +
        "% is the accuracy ")
    final_time = time.time() - start_time
    print("It took: " f'{final_time:.2f}' " seconds to run\n")

コード例 #10

0

ファイルを表示

ファイル: testing.py プロジェクト: Offroads/Group-Assignment

def test_classify_train_dataset_with_testing_data_with_stopwords():
    """
	This will attempt to classify the training dataset, using the testing dataset to train - with stop-words
	"""
    start_time = time.time()
    classifier.train(use_testing_data=True)
    number_of_reviews = classifier.negative_review_count + classifier.positive_review_count
    results = classifier.predict_reviews(use_stop_words=True,
                                         classify_training_data=True)
    print(results)
    print(
        str(results["correct_predictions"] / number_of_reviews * 100) +
        "% is the accuracy ")
    final_time = time.time() - start_time
    print("It took: " f'{final_time:.2f}' " seconds to run\n")

コード例 #11

0

ファイルを表示

ファイル: testing.py プロジェクト: morten-pedersen/INFO284-Group-Assignment

def test_predict_test_dataset_with_stopwords():
    """
	This test will attempt to classify the test dataset while using stopwords
	"""
    start_time = time.time()
    use_stop_words = True
    predict.train()
    number_of_reviews = predict.negative_review_count + predict.positive_review_count
    results = predict.predict_test_reviews(use_stop_words=use_stop_words)
    print(results)
    print(
        str(results["correct_predictions"] / number_of_reviews * (100)) +
        "% is the accuracy ")
    final_time = time.time() - start_time
    print("It took: " f'{final_time:.2f}' " seconds to run\n")

コード例 #12

0

ファイルを表示

def train(input_folder_path, model_folder_name, model_type):
    print("Input Folder Path:", input_folder_path)
    print("Model Folder Name:", model_folder_name)

    print("Checking Directories...")
    if os.path.exists(input_folder_path) == False:
        return False, "Invalid input folder!"

    model_dir = os.path.join(Globals.model_path, model_folder_name)

    if os.path.exists(model_dir) == True:
        return False, "Model already exists!"

    print("Aligning faces...")
    processed_dir = os.path.join(model_dir, "data")

    my_graph = MyGraph()

    align.align_faces(AlignOptions(input_folder_path, processed_dir, my_graph))

    directories = os.listdir(processed_dir)

    # SVC's don't seem to be able to handle only having 1 image for training, so let's create a duplicate
    if model_type == "svc":
        for d in directories:
            subdir = os.path.join(processed_dir, d)

            if os.path.isdir(subdir):
                files = os.listdir(subdir)

                if len(files) == 1:
                    file_name_split = os.path.splitext(files[0])
                    file_path_from = os.path.join(subdir, files[0])
                    file_path_to = os.path.join(
                        subdir, file_name_split[0] + "_2" + file_name_split[1])
                    print("Only 1 image found for training... Duplicating ",
                          file_path_from)
                    copyfile(file_path_from, file_path_to)

    print("Training...")

    classifier.train(data_dir=processed_dir,
                     session=my_graph,
                     classifier_filename=os.path.join(model_dir,
                                                      "classifier.pkl"),
                     model_type=model_type)

    return True, ""

コード例 #13

0

ファイルを表示

ファイル: evaluate.py プロジェクト: okhokhlov/filimdb_evaluation

def main():
    part2xy = load_dataset_fast('FILIMDB')
    train_ids, train_texts, train_labels = part2xy['train']

    print('\nTraining classifier on %d examples from train set ...' %
          len(train_texts))
    st = time()
    params = train(train_texts, train_labels)
    print('Classifier trained in %.2fs' % (time() - st))

    allpreds = []
    for part, (ids, x, y) in part2xy.items():
        print('\nClassifying %s set with %d examples ...' % (part, len(x)))
        st = time()
        preds = classify(x, params)
        print('%s set classified in %.2fs' % (part, time() - st))
        allpreds.extend(zip(ids, preds))

        if y is None:
            print('no labels for %s set' % part)
        else:
            score(preds, y)

    save_preds(allpreds, preds_fname=PREDS_FNAME)
    print('\nChecking saved predictions ...')
    score_preds(preds_fname=PREDS_FNAME, data_dir='FILIMDB')

コード例 #14

0

ファイルを表示

ファイル: evaluate.py プロジェクト: hd898/filimdb_evaluation

def main():
    try:
        from classifier import pretrain
    except ImportError:
        part2xy = load_dataset_fast('FILIMDB')
        train_ids, train_texts, train_labels = part2xy['train']
        print('\nTraining classifier on %d examples from train set ...' %
              len(train_texts))
        st = time()
        params = train(train_texts, train_labels)
        print('Classifier trained in %.2fs' % (time() - st))
    else:
        part2xy = load_dataset_fast('FILIMDB',
                                    parts=('train', 'dev', 'test',
                                           'train_unlabeled'))
        train_ids, train_texts, train_labels = part2xy['train']
        _, train_unlabeled_texts, _ = part2xy['train_unlabeled']
        all_texts = train_texts + train_unlabeled_texts

        print('\nPretraining classifier on %d examples' % len(all_texts))
        st = time()
        params = pretrain(all_texts)
        print('Classifier pretrained in %.2fs' % (time() - st))
        print('\nTraining classifier on %d examples from train set ...' %
              len(train_texts))
        st = time()
        params = train(train_texts, train_labels, params)
        print('Classifier trained in %.2fs' % (time() - st))
        del part2xy["train_unlabeled"]

    allpreds = []
    for part, (ids, x, y) in part2xy.items():
        print('\nClassifying %s set with %d examples ...' % (part, len(x)))
        st = time()
        preds = classify(x, params)
        print('%s set classified in %.2fs' % (part, time() - st))
        allpreds.extend(zip(ids, preds))

        if y is None:
            print('no labels for %s set' % part)
        else:
            score(preds, y)

    save_preds(allpreds, preds_fname=PREDS_FNAME)
    print('\nChecking saved predictions ...')
    score_preds(preds_fname=PREDS_FNAME, data_dir='FILIMDB')

コード例 #15

0

ファイルを表示

ファイル: class_runner.py プロジェクト: floodric/BigPoker

def train(classifier=c):
  print("training classifier, please wait\n")

  trainingData = parsedata.readAllGames(False,exclusions)

  counter = 0
  # lazy iterator since trainingdata can be HUUUUUUUUUGE 
  for k in iter(trainingData):
    counter += 1
    #print(k)
    if len(k.split(',')) != 7:
      continue
    for score in trainingData[k]:
      classifier.train(k,score > 0)
    if counter % 25000 == 0:
      print("{}, ".format(counter), end='', flush=True)
    if counter % 100000 == 0:
      print()

  print("\n")

コード例 #16

0

ファイルを表示

def train(classifier=c):
    print("training classifier, please wait\n")

    trainingData = parsedata.readAllGames(False, exclusions)

    counter = 0
    # lazy iterator since trainingdata can be HUUUUUUUUUGE
    for k in iter(trainingData):
        counter += 1
        #print(k)
        if len(k.split(',')) != 7:
            continue
        for score in trainingData[k]:
            classifier.train(k, score > 0)
        if counter % 25000 == 0:
            print("{}, ".format(counter), end='', flush=True)
        if counter % 100000 == 0:
            print()

    print("\n")

コード例 #17

0

ファイルを表示

ファイル: train.py プロジェクト: JustinSeymour/image-classifier

def main():

    # Instantiate the console arguments function
    args = arg_parser()

    print("GPU setting: {}".format(args.gpu))

    # Define normalization for transforms
    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    )

    # Define transformations for training, validation and test sets
    data_transforms = create_transforms(30, 224, 256, normalize)

    # Load the datasets from the image folders
    datasets = image_datasets(data_transforms)

    # Define the dataloaders using the image datasets
    loaders = data_loaders(datasets, 32)

    # Instantiate a new model
    model = create_model(arch=args.arch)

    output_units = len(datasets['training'].classes)

    # Create new classifier
    model.classifier = create_classifier(model, args.hidden_layers,
                                         output_units, args.dropout)

    device = check_gpu(args.gpu)
    print(device)
    model.to(device)

    learning_rate = args.learning_rate
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate)
    epochs = args.epochs
    print_every = args.print_every
    steps = 0
    trainloader = loaders['training']
    validloader = loaders['validation']

    # trained_model = train(model, epochs, learning_rate, criterion, optimizer, loaders['training'], loaders['validation'], device)
    trained_model = train(model, trainloader, validloader, device, criterion,
                          optimizer, epochs, print_every, steps)

    print("Training has completed")

    test_model(trained_model, loaders['testing'], device)

    initial_checkpoint(trained_model, args.checkpoint_dir,
                       datasets['training'])

コード例 #18

0

ファイルを表示

ファイル: ner_classifier.py プロジェクト: adtabora/criminal_risk

def train_classifier():
    #1. prepare data
    print "-- Prepare Data"
    train_sentences = conll2002.iob_sents('esp.train')
    test_sentences = conll2002.iob_sents('esp.testa')

    #2. extract features
    train_df, test_df = extract_classif_features(train_sentences,
                                                 test_sentences)
    #3. train
    clf = classifier.train(train_df, test_df)

コード例 #19

0

ファイルを表示

ファイル: Multi_Distribution_AEs.py プロジェクト: vuthily/multi-distribution-representation-learning

def standalone():
    auc1, t1, TP1, FP1, TN1, FN1 = classifier.train(X_train, Y_train, X_test,Y_test, "svm")
    auc2, t2, TP2, FP2, TN2, FN2 = classifier.train(X_train, Y_train, X_test,Y_test, "pct")
    auc3, t3, TP3, FP3, TN3, FN3 = classifier.train(X_train, Y_train, X_test,Y_test, "nct")
    auc4, t4, TP4, FP4, TN4, FN4 = classifier.train(X_train, Y_train, X_test,Y_test, "lr")


    data_save = np.asarray([data_index, input_dim, balance_rate, 
                            auc1,1000 * t1,TP1, FP1, TN1, FN1,
                            auc2,1000 * t2,TP2, FP2, TN2, FN2,
                            auc3,1000 * t3, TP3, FP3, TN3, FN3 ,
                            auc4,1000 * t4, TP4, FP4, TN4, FN4])   
    data_save = np.reshape(data_save, (-1,27))


    if os.path.isfile("Results/RF_AUC_DIF/AUC_Input.csv"): #
        auc = np.genfromtxt('Results/RF_AUC_DIF/AUC_Input.csv', delimiter=',') 
        auc = np.reshape(auc,(-1,27))
        data_save = np.concatenate((auc, data_save), axis = 0)
        np.savetxt("Results/RF_AUC_DIF/AUC_Input.csv", data_save,delimiter = ",",fmt = "%f")
    
    else:
        np.savetxt("Results/RF_AUC_DIF/AUC_Input.csv", data_save,delimiter = ",",fmt = "%f")

コード例 #20

0

ファイルを表示

def main():
    """
    """
    parser = argparse.ArgumentParser()

    parser.add_argument("-v",
                        "--verbose",
                        dest="verbose",
                        help="Increase verbosity",
                        action='store_true')

    parser.add_argument("-d",
                        "--postset",
                        dest="postset",
                        help="Dataset file path. A CSV file is required.",
                        type=str)

    parser.add_argument("-min",
                        "--ngrammin",
                        dest="ngram_min",
                        help="Minimum number of ngrams",
                        type=int)

    parser.add_argument("-max",
                        "--ngrammax",
                        dest="ngram_max",
                        help="Maximum number of ngrams",
                        type=int)

    parser.add_argument("-s",
                        "--save",
                        dest="save",
                        help="Save",
                        action='store_true')

    options = parser.parse_args()

    if not options.verbose:
        warnings.filterwarnings("ignore")

    with open(options.postset) as f:
        postset = json.load(f)
    df = pd.read_json(options.postset, orient='columns')
    df.columns = ['samples']

    posts = preprocess(df['samples'])
    scores = cls.train(posts,
                       options.ngram_min,
                       options.ngram_max,
                       save=options.save)

コード例 #21

0

ファイルを表示

def get_model(car_features, non_car_features, filename):
    if os.path.exists(filename):
        with open(filename, mode='rb') as f:
            data = pickle.load(f)
            clf = data['clf']
            scaler = data['scaler']
            return clf, scaler
    clf, scaler = train(car_features, non_car_features)
    with open(filename, mode='wb') as f:
        pickle.dump({
            'clf': clf,
            'scaler': scaler,
        }, f)
    return clf, scaler

コード例 #22

0

ファイルを表示

ファイル: midiReader.py プロジェクト: JorgeFrias/Stupid-music-composition

def trainModels(dataDict, models=[]):
    notesMdls = []
    velMdls = []
    timeMdls = []
    if (len(models) == 0):
        notesMdl = train(dataDict['dataNotes'], dataDict['targetNotes'])
        velMdl = train(dataDict['dataNotes'], dataDict['targetVelocity'])
        timeMdl = train(dataDict['dataNotes'], dataDict['targetTime'])
        notesMdls.append(notesMdl)
        velMdls.append(velMdl)
        timeMdls.append(timeMdl)
    elif (len(models) == 1):
        notesMdl = train(dataDict['dataNotes'],
                         dataDict['targetNotes'],
                         model=models[0])
        velMdl = train(dataDict['dataNotes'],
                       dataDict['targetVelocity'],
                       model=models[0])
        timeMdl = train(dataDict['dataNotes'],
                        dataDict['targetTime'],
                        model=models[0])
        notesMdls.append(notesMdl)
        velMdls.append(velMdl)
        timeMdls.append(timeMdl)
    else:
        for model in models:
            print("Training notes model for " + model)
            notesMdl = train(dataDict['dataNotes'],
                             dataDict['targetNotes'],
                             model=model)
            print("Training velocity model for " + model)
            velMdl = train(dataDict['dataNotes'],
                           dataDict['targetVelocity'],
                           model=model)
            print("Training time model for " + model)
            timeMdl = train(dataDict['dataNotes'],
                            dataDict['targetTime'],
                            model=model)
            notesMdls.append(notesMdl)
            velMdls.append(velMdl)
            timeMdls.append(timeMdl)
    return notesMdls, velMdls, timeMdls

コード例 #23

0

ファイルを表示

ファイル: core.py プロジェクト: tomknappramos/Audissey

def get_data_and_retrain():
    global user_data_path

    # if user training data unavailable, throw error. This means user didn't bother training.
    if(not os.path.exists(user_data_path + user + ".csv")):
        print("NO TRAINING DATA AVIALABLE. PLEASE TRAIN SOME CLASSES")
        return

    # refresh user data
    classifier.get_data_from(user_data_path + user + ".csv")

    # block training from happening --> the svm breaks with a single class
    if( len(classifier.c_classes) <= 1):
        print("Please train more classes: at least 2 classes required")
        return

    # use normalization (have to do this for the svm for some reason)
    classifier.use_normalization_and_normalize_training_data()

    # set classifier type
    classifier.set_classifier(classifier.c_svm_rbf)

     # train classifier (no inputs no offsets/biases)
    classifier.train()

コード例 #24

0

ファイルを表示

ファイル: sensitive_transparency.py プロジェクト: LaurenceBont/fact-full-grad-uva

def sensitive_transparency(model_config, data_config):
    saliency_dir = data_config.path + 'dataset/saliency/'
    dataset = datasets.ImageFolder(root=saliency_dir, transform=data_config.transform)
    saliencyloader = torch.utils.data.DataLoader(dataset, batch_size=1, num_workers=2)
    # model_config.set_model()
    if not os.path.exists(model_config.model_dir):
        train(model_config, data_config)
    else:    
        model_config.load_model()

    simple_fullgrad = SimpleFullGrad(model_config.model)
    # model
    # fullgrad = FullGrad(model_config.model, im_size=(1,3,32,32), device=model_config.device)

    if os.path.exists(saliency_dir):
        compute_save_fullgrad_saliency(saliencyloader, data_config.unnormalize, data_config.save_path, model_config.device, simple_fullgrad)

    else:
        print("Add pictures to: " + saliency_dir)
        print("Saliency maps will be shown")
        
    csv_dir = data_config.data_dir + '/' + data_config.dataset_name + '/test/PPB-2017-metadata.csv'

    etnic_acc(data_config.testloader, model_config.model, model_config.optimizer, model_config.criterion, model_config.device, csv_dir)

コード例 #25

0

ファイルを表示

def main():
    s = time.time()
    train = pd.read_csv("train.csv")
    test = pd.read_csv("test.csv")
    clean(train)
    clean(test)

    print("Data processed after " + str(time.time() - s) + " sec")

    #-----------Predictor-------------------------------
    tfidf_bag = bag(train.tweet)
    predictor.predict(tfidf_bag, train.label)
    #---------------------------------------------------

    #-----------Clustering------------------------------
    hateful = train.copy(deep=True)
    get_hateful(hateful)
    kmeans_model, vectorizer = classifier.train(hateful, False)
    print("Clusters Found after " + str(time.time() - s) + " sec")

コード例 #26

0

ファイルを表示

ファイル: rest_service.py プロジェクト: Cytryn31/uwrMachine

def train():
    classfier_name = request.forms.get('classfier_name')
    classfier_type = request.forms.get('classfier_type')
    classfier_params = request.forms.get('classfier_params')
    cross_validation_type = request.forms.get('cross_validation_type')
    learning_curve_params = request.forms.get('learning_curve_params')
    train_size = request.forms.get('train_size')
    clf = classifier.configure_classifier(classfier_type,classfier_params)
    cv = classifier.configure_cross_validation(cross_validation_type,classfier_params)
    features_train, labels_train = wtf.getArrays()

    clf, train_sizes, train_scores, test_scores = classifier.train(clf,
                        train_sizes = np.linspace(.1, 1.0,train_size),
                        cv = cv,
                        params = " ",
                        features = features_train,
                        labels = labels_train )
    data = classfier_to_send(classfier_name, clf, train_sizes, train_scores, test_scores)
    post.send("http://naos-software.com/dataprocessing/rest-api","/classifiers","",data)

    return data

コード例 #27

0

ファイルを表示

ファイル: author_classifier.py プロジェクト: Ber10e/Author_classifier

def test_features2(features, num_rounds, file):
    """
    Tests the (homemade)classifier on a set of features, returning it's precision
    """
    correct = {}
    for a in authors:
        correct[a] = 0
    runs = 0
    print "aantal features:" + str(len(features))
    print "testfeature:" + str(features)
    for i in range(0, num_rounds):

        start = time()
        data = split_train_test_data(authors, corp, 45)
        testdata = data["test"]
        traindata = data["train"]
        if file == "":
            trained_model = train(traindata, authors, features)
            print "model trained in:" + str(time() - start) + "seconds"
        else:
            trained_model = getfromfile(file)[1]
            writetofile((features, trained_model), "classifier2.c")
            print "trained model extracted from" + file
        print "number of runs:" + str(len(testdata))
        winsound.Beep(2000, 500)
        print "starting with classifications..."
        for j in range(0, len(testdata)):
            start = time()
            if classify(testdata[j][0], trained_model, features, authors, traindata) == testdata[j][1]:
                correct[testdata[j][1]] += 1
                runs += 1
            else:
                runs += 1
            print "runtime:" + str(time() - start)
    print "runs:" + str(runs)
    totalcorrect = 0
    for a in authors:
        totalcorrect += correct[a]
    print "correct:" + str(totalcorrect)
    return float(totalcorrect) / runs

コード例 #28

0

ファイルを表示

def old_user():
    if request.method == 'POST':
        id = request.form['id']
        print id

        UPLOAD_FOLDER = '/home/ankush/openface/training-images/' + id
        app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
        file = request.files['file']
        filename = secure_filename(file.filename)
        file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
        for i in range(20):
            shutil.copy(os.path.join(app.config['UPLOAD_FOLDER'], filename),
                        os.path.join(app.config['UPLOAD_FOLDER'], filename.split('.')[0] + str(i) + '.jpg'))

        import alignImages
        output = alignImages.alignMain("align")

        import creatingcsv
        creatingcsv.csv()

        import classifier
        output = classifier.train('/home/ankush/openface/generated-embeddings')
        return jsonify(output)

コード例 #29

0

ファイルを表示

def training_image():
    if request.method == 'POST':
        user = request.form['user']
        print user
        cursor = db.cursor()
        sql = "INSERT INTO data (user_name) VALUES ('%s')" % (user)
        sql1 = "SELECT user_id FROM data WHERE user_name = ('%s')" % (user)
        cursor.execute(sql)
        db.commit()
        cursor.execute(sql1)
        results = cursor.fetchall()
        for row in results:
            ids = int(row[0])
            print ids

        path = os.makedirs('/home/ankush/openface/training-images/' + str(ids))
        UPLOAD_FOLDER = '/home/ankush/openface/training-images/' + str(ids)
        app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

        file = request.files['file']
        filename = secure_filename(file.filename)
        file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
        for i in range(30):
            shutil.copy(os.path.join(app.config['UPLOAD_FOLDER'], filename),
                        os.path.join(app.config['UPLOAD_FOLDER'], filename.split('.')[0] + str(i) + '.jpg'))

        import alignImages
        output = alignImages.alignMain("align")

        import creatingcsv
        creatingcsv.csv()

        import classifier

        output = classifier.train('/home/ankush/openface/generated-embeddings')
        return jsonify(output)

コード例 #30

0

ファイルを表示

ファイル: SVMExercise.py プロジェクト: Cytryn31/uwrMachine

from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.externals import joblib

#features_train,  labels_train,  = wtf.getArrays()


features_train, labels_train, features_test, labels_test = prep_terrain_data.makeTerrainData()
#for (i, feature) in enumerate(features_test):
 #   print (i, len(feature))
#labels_train[0] = 1
# fit the model
cv = cross_validation.ShuffleSplit(len(features_train), n_iter=50, test_size=0.1, random_state=0)

clf = train("svm", train_sizes=np.linspace(.1, 1.0, 20),cv = cv, params = " ", features= features_train, labels=labels_train )


#pred, accuracy, recall, precision = test(clf, features_test, labels_test)

plt.show()

#clf.fit(features_train, labels_train)

#pred = clf.predict(features_test)

from sklearn.metrics import accuracy_score
#acc = accuracy_score(pred, labels_test)d
#print (" accuracy: ", acc)

#print(classification_report(labels_test, pred))

コード例 #31

0

ファイルを表示

ファイル: analysis.py プロジェクト: mster/fruit-classifier

import numpy as np
import matplotlib.pyplot as pyplot
import h5py
import scipy
from PIL import Image
from scipy import ndimage
import cv2
import random
import json

# importing classifier resources
import classifier

# fetch data from model after training
result = classifier.train()


def format_results():
    # static array variants
    APPLE = [1, 0, 0]
    ORANGE = [0, 1, 0]
    BANANA = [0, 0, 1]

    # misc printing data
    correct_counter = 0
    show_prediction = True

    for i in range(result["prediction_training_labels"].shape[1]):
        guess_array = result["prediction_training_labels"][:, i].astype(int)
        correct_label = result["original_training_labels"][i]

コード例 #32

0

ファイルを表示

ファイル: 06_train_classifier.py プロジェクト: Mohitsharma44/l3embedding

                        dest='non_overlap_chunk_size',
                        action='store',
                        default=10)

    parser.add_argument('-umm',
                        '--use-min-max',
                        dest='use_min_max',
                        action='store_true',
                        default=False)

    parser.add_argument('features_dir',
                        action='store',
                        type=str,
                        help='Path to directory where feature files are stored')

    parser.add_argument('output_dir',
                        action='store',
                        type=str,
                        help='Path to directory where output files will be stored')

    parser.add_argument('fold_num',
                        action='store',
                        type=int,
                        help='Fold ordinal to train/test with')

    return vars(parser.parse_args())

if __name__ == '__main__':
    args = parse_arguments()
    train(**args)

コード例 #33

0

ファイルを表示

def learn():
    ds_x, ds_y = dataset.load_dataset()
    clsfr = classifier.create_classifier(verbose=True, layer_sizes=(100,25))
    classifier.train(clsfr, ds_x[:9000], ds_y[:9000])
    print(classifier.rate(clsfr, ds_x[9000:], ds_y[9000:]))
    classifier.dump_classifier(clsfr, '100x25')

コード例 #34

0

ファイルを表示

ファイル: rest_service.py プロジェクト: patryk1992/angular

def train():
    callback = request.GET.get('callback')
    classifier_name = request.GET.get('classifier_name')
    classifier_id = request.GET.get('classifier_id')
    user_id = request.GET.get('user_id')
    classifier_type = request.GET.get('classifier_type')
    classifier_params = request.GET.get('classifier_params')
    cross_validation_type = request.GET.get('cross_validation_type')
    cross_validation_params = request.GET.get('cross_validation_params')
    result_test_classifiers_id = request.GET.get('result_test_classifiers_id')
    collection_id = request.GET.get('collection_id')
    vectorized_document_collection_id = request.GET.get('vectorized_document_collection_id')
    train_size = request.GET.get('train_size')
    #data = classifier_to_send(user_id, classifier_name, classifier_params, "", "", 1)
    #post.send("http://localhost:8080/dataprocessing/rest-api/classifiers/",data)
    print("Params :")
    print(classifier_name)
    print(classifier_type)
    print(classifier_params)
    print(cross_validation_type)
    print(cross_validation_params)
    print(collection_id)
    print(train_size)
    clf = classifier.configure_classifier(classifier_type,classifier_params)
    #features_train, labels_train = makeTerrainData(n_points=200)
    features_train, labels_train, features_test, labels_test = makeTerrainData()
    #print(features_train)
    if(cross_validation_type == 'None') :
            cross_validation_type = None

    cv = classifier.configure_cross_validation(cross_validation_type,cross_validation_params, n = len(features_train))
	
    print("features_train :")
    print(len(features_train))
    print("labels_train :")
    print(len(labels_train))
    clf.fit(features_train, labels_train)

    fig = classifier.train(clf,
                        train_sizes = np.linspace(.1, 1.0,train_size),
                        cv = cv,
                        params = " ",
                        features = features_train,
                        labels = labels_train )
    imgdata = StringIO()
    fig.savefig(imgdata, format='svg')
    imgdata.seek(0)  # rewind the data

    svg_dta = imgdata.getvalue()  # this is svg data
    import pickle
    s = pickle.dumps(clf)
    print("classifier dump:")
    #print(s)
    data = classifier_to_send(user_id = user_id, name = classifier_name, vectorizedDocumentCollectionId= vectorized_document_collection_id, parameter = classifier_params, learningCurve = svg_dta, content = s, flag = 1)
    put.send("http://localhost:8080/dataprocessing/rest-api/classifiers/",classifier_id, data)
    pred = clf.predict(features_train)
    from sklearn.metrics import accuracy_score
    acc = accuracy_score(labels_train, pred)
    precision = precision_score(labels_train, pred)
    recall = recall_score(labels_train, pred)

    print("result_test_classifiers_id: " + result_test_classifiers_id )
    
    data = test_data_to_send(id_result_test_classifier = result_test_classifiers_id, user_id = user_id, classifierId = classifier_id, vectorizedDocumentCollectionId = vectorized_document_collection_id, parameter = " ", precision = precision, accuracy = acc, recall = recall)
    print(data)
    put.send("http://localhost:8080/dataprocessing/rest-api/resultTestClassifiers/", result_test_classifiers_id,  data)

    return '{0}({1})'.format(callback, {'a':1, 'b':2})

コード例 #35

0

ファイルを表示

ファイル: main.py プロジェクト: darxsys/ML

def main(args):
    """Calls training and other functions. Needs 5 arguments.
    """

    word_file = args[0]
    training_file = args[1]
    validation_file = args[2]
    test_file = args[3]
    out_path = args[4]

    words = {}
    with open(word_file, "r") as f:
        i = 0
        for line in f:
            line = line.split()
            words[i] = line[0]
            i += 1

    dimension = len(words)
    training_set = parse_input(training_file, dimension)
    validation_set = parse_input(validation_file, dimension)
    test_set = parse_input(test_file, dimension)

    # first part
    w, w0, error = train(training_set, dimension, 0.)
    emp_error = calc_num_wrong(w, w0, training_set, dimension)
    print_weights(out_path + "tezine1.dat", w, w0, error, emp_error)

    # second part
    l = [0.1, 1., 5., 10., 100., 1000.]

    with open(out_path + "optimizacija.dat", "w") as f:
        best_error = error
        best_w = w
        best_w0 = w0

        num_wrong = calc_num_wrong(w, w0, validation_set, dimension)
        optimal = 0.
        f.write("\u03BB" + " = " + str(0) + ", " + str(num_wrong) + "\n")

        for lambda_ in l:
            w, w0, error = train(training_set, dimension, lambda_)
            num = calc_num_wrong(w, w0, validation_set, dimension)
            # if lambda_ == 1.:
            #     output_predictions(out_path + "pred_proba.dat", validation_set, w, w0)
            #     top_five = w.argsort()[-5:][::-1]
            #     with open(out_path + "rijeci_proba.txt", "w") as f2:
            #         for x in top_five:
            #             f2.write(words[x] + "\n")

            f.write("\u03BB" + " = " + str(lambda_) + ", " + str(num) + "\n")
            if num <= num_wrong:
                num_wrong = num
                optimal = lambda_

        f.write("optimalno: " + "\u03BB = " + str(optimal) + "\n")

    # third part
    training_set.extend(validation_set)
    w, w0, error = train(training_set, dimension, optimal)
    emp_error = calc_num_wrong(w, w0, training_set, dimension)
    print_weights(out_path + "tezine2.dat", w, w0, error, emp_error)

    top_twenty = w.argsort()[-20:][::-1]
    with open(out_path + "rijeci.txt", "w") as f:
        for x in top_twenty:
            f.write(words[x] + "\n")

    output_predictions(out_path + "ispitni-predikcije.dat", test_set, w, w0)

コード例 #36

0

ファイルを表示

ファイル: statistics.py プロジェクト: HaohanWang/naiveBayes

import classifier as c
import operator
import math

nb, vocab = c.train()
prob = nb[1]
wordprob=[[],[]]
wordprob[0] = sorted(prob[0].iteritems(), key=operator.itemgetter(1))
wordprob[1] = sorted(prob[1].iteritems(), key=operator.itemgetter(1))
wordprob[0].reverse()
wordprob[1].reverse()

#for i in range(0, 2):
#	print "------------"
#	for j in range(1, 20):
#		print wordprob[i][j]

logratio = [{},{}]
for word in vocab:
	for i in range(0, 2):
		logratio[0][word]=math.log(prob[0][word])-math.log(prob[1][word])
		logratio[1][word]=math.log(prob[1][word])-math.log(prob[0][word])
logrank = [[],[]]
logrank[0] = sorted(logratio[0].iteritems(), key=operator.itemgetter(1))
logrank[1] = sorted(logratio[1].iteritems(), key=operator.itemgetter(1))
for i in range(0, 2):
	print "--------------"
	logrank[i].reverse()
	for j in range(1, 21):
		print logrank[i][j]

コード例 #37

0

ファイルを表示

def main(train_timeout=5 * 60, eval_timeout=5 * 60):
    results = {}
    try:
        import classifier
        importlib.reload(classifier)
    except Exception as e:
        print(e)
        results["exception"] = str(e)
        if sys.modules.get("classifier"):
            del sys.modules['classifier']
        return results

    part2xy = load_dataset_fast('FILIMDB_hidden', SCORED_PARTS)
    train_ids, train_texts, train_labels = part2xy['train']

    print('\nTraining classifier on %d examples from train set ...' % len(train_texts))
    st = time()

    try:
        with time_limit(train_timeout):
            params = classifier.train(train_texts, train_labels)
    except (TimeoutException, ValueError, Exception) as e:
        del sys.modules['classifier']
        print(e)
        if isinstance(e, TimeoutException):
            results["train_time"] = train_timeout
        results["exception"] = str(e)
        return results

    train_time = time() - st
    results["train_time"] = train_time

    print('Classifier trained in %.2fs' % train_time)

    allpreds = []
    for part, (ids, x, y) in part2xy.items():
        print('\nClassifying %s set with %d examples ...' % (part, len(x)))
        st = time()
        try:
            with time_limit(eval_timeout):
                preds = classifier.classify(x, params)
        except (TimeoutException, ValueError) as e:
            del sys.modules['classifier']
            if isinstance(e, TimeoutException):
                print("Timeout on evaluating %s set!" % part)
                results["eval_on_%s_set_time" % part] = eval_timeout
            else:
                print(e)
            results["exception"] = str(e)
            return results

        eval_time = time() - st
        results["eval_on_%s_set_time" % part] = eval_time
        print('%s set classified in %.2fs' % (part, eval_time))
        allpreds.extend(zip(ids, preds))

        if y is None:
            print('no labels for %s set' % part)
        else:
            acc = score(preds, y)
            results["eval_on_%s_set_acc" % part] = acc
    del sys.modules['classifier']
    return results

コード例 #38

0

ファイルを表示

ファイル: cli.py プロジェクト: morten-pedersen/INFO284-Group-Assignment

def command(command):
    """
	This function will take a command and execute the command given, or else it will tell the user the command doesnt exist
	:param command: a string with your command
	"""
    help = """				Available commands are:
		predict	        - Attempts to predict a review by the user 
		run             - allow you to run different parts of the program
		exit            - exits the program
		clear           - clears the window
		help            - shows the different commands available
		wordcount       - Will show how many times a word shows up in the trainingdata, type in the word in the next input
		stopwords       - learn more about stopwords
		setpath         - allows you to set the path to the directory that contains the data
		topwords        - this will list the most common positive or negative words
		candidates      - list the people that contributed to the assignment and how they contributed
		"""

    stop_words_info = "Stop words are words that doesn't have any negative or positive meaning.\n"\
                      "It can be helpful to use stopwords to remove data that shouldn't impact the prediction.\n"\
                      "It can help performance and has an impact on the result."

    stop_word_commands = """				Available commands are:
		help / commands - lists the commands
		info            - lists info about stopwords
		back            - go back to prevous section
		clear           - clear the window
		listwords       - lists the stopwords
		"""

    if command == "exit" or command == "close" or command == "stop":
        quit()

    elif command == "topwords":

        is_a_number = False
        number_of_words = None
        while not is_a_number:
            number_of_words = input("How many words do you want to see?\n")
            try:
                number_of_words = int(number_of_words)
                is_a_number = True
            except Exception as e:
                print("Please enter a number.")
                pass

        classifier.train()
        common_pos_words = data_handler.get_common_words(
            classifier.pos_words_dict, number_of_words)
        print("\nPositive words...")
        for item in common_pos_words:
            print(item)
        print("\nNegative words...")
        common_neg_words = data_handler.get_common_words(
            classifier.neg_words_dict, number_of_words)
        for item in common_neg_words:
            print(item)

    elif command == "wordcount":
        done = False
        while not done:  # You can keep trying different words until you type back
            word = input("Type in the word: ")
            if word == "back":
                done = True
                return  # return to "main menu"
            data = classifier.train()
            pos_fr = data["pos_words_dict"]
            neg_fr = data["neg_words_dict"]
            print(word, " was found ",
                  data_handler.get_specific_word(pos_fr, word),
                  " times in the positive reviews\n")
            print(word, " was found ",
                  data_handler.get_specific_word(neg_fr, word),
                  " times in the negative reviews\n")

    elif command == "setpath":
        main.set_path()

    elif command == "run":
        done = False
        clear_window()
        while not done:  #TODO finish commands
            user_input = input(
                "Which function do you want to run? Use the numbers to select. Type back to return\n"
                "1  - train                              - This will attempt to load the preprocessed training data from the file, if it can't it will process it and save it as a file \n"
                "2  - load test data                     - this will load the test data from the file test.data if possible, if it can't it will process the test data and save it as test.data\n"
                "3  - predict the test reviews           - This will attempt to predict the test reviews\n"
                "4  - Predict test review with stopwords - This will attempt to predict the test reviews while using stopwords\n"
                "5  - cleanup                            - This will remove all files created by this program\n"
                "back                                    - Return back to main menu\n"
            )
            user_input = user_input.lower()
            print("Running ", user_input)
            if user_input == "1":
                classifier.train()
                print("Classifier is ready.")
            elif user_input == "2":
                classifier.load_test_dataset()
                print("Test data is ready.")
            elif user_input == "3":
                testing.test_predict_test_dataset()

            elif user_input == "4":
                testing.test_predict_test_dataset_with_stopwords()

            elif user_input == "5":
                data_handler.cleanup_files()

            elif user_input == "back":
                done = True
                print("Returning to previous section...")

            else:
                print("Couldn't run ", user_input,
                      " Maybe you spelled it wrong?\n")
    elif command == "predict":
        done = False
        while not done:
            user_input = input("Enter your review or back to return: ")
            if user_input.lower() == "back":
                done = True
                return
            print("Attempting to predict...")
            print("Your input was: " + user_input + "\n")
            result = classifier.predict_input(user_input)
            print(result[0])
            print(result[1])
            print(result[2])

    elif command == "help":
        print(help)

    elif command == "stopwords":
        done = False
        print(stop_word_commands)
        while not done:
            user_input = input(
                "Type a command. Type help for a list of options: ")

            if user_input.lower() == "info":
                print(stop_words_info)

            elif user_input == "back":
                print("Going back...")
                done = True

            elif user_input == "help" or user_input == "commands":
                print(stop_word_commands)

            elif user_input == "clear":
                clear_window()

            elif user_input == "listwords":
                stop_words = get_stop_words('english')
                for word in stop_words:
                    print(word)
            else:
                print(
                    "Did not recognize that command, type help to show a list of commands"
                )

    elif command == "clear":
        clear_window()

    elif command == "candidates":
        print("The candidates are:\n")
        print("110 - wrote all the code")
        print("21  - minor testing of the code")

    else:  # if a command that doesnt exist is typed in.
        print(help)

コード例 #39

0

ファイルを表示

ファイル: twss_classifier.py プロジェクト: Underflow/twss

def train(file_path, classification):
  for line in lines(file_path):
    classifier.train(line, classification)

コード例 #40

0

ファイルを表示

ファイル: server.py プロジェクト: samster25/greylockHackfest

	4 : "Left",
	5 : "Up",
	6 : "Down",
	7 : "Clockwise Circle",
	8 : "Counter Circle"
}

NOTES_LOOKUP = { 
	3 : G,
	5 : C,
	6 : A,
	7 : B,
	8 : D
}

classy = classifier.train()
sock.settimeout(.5)
def handle_data(data,log):
	float_dat = map(lambda x: float(x), data.split(","))
	return [float_dat[0],float_dat[1],-float_dat[2]]

log_data = False
data_stream_x = []
data_stream_y = []
data_stream_z = []
count_less_than_theta = 0
print "SERVER IS LISTENING"

midiout = rtmidi.MidiOut()
midiout.open_virtual_port("test1")
while True:

コード例 #41

0

ファイルを表示

    'data/lists/train_list.mat')
data_frame = create_dataframe(filename_list, labels_list, annotation_list,
                              'data/annotation/')
pickle_file(data_frame=data_frame, file_to_save='train_data.pickle')

x_train = to_numpy_array(data_frame,
                         image_shape=(224, 224),
                         data_path='data/images/')
y_train = labels_to_logical(labels_list)

print('COMPILE and TRAIN MODEL')
model = Models()
model = model.TransferFine(top_layers=True)
history, model = train(model,
                       x_train,
                       y_train,
                       split=0.8,
                       early_stopping=True,
                       epochs=5)

print('Saving: Model Architecture and Weights')
model.save('save_architecture.h5')
model.save_weights('save_model_weights.h5')

print('TEST DATA PREP')
filename_list, labels_list, annotation_list = load_matfile(
    'data/lists/test_list.mat')
data_frame = create_dataframe(filename_list, labels_list, annotation_list,
                              'data/annotation/')
pickle_file(data_frame=data_frame, file_to_save='test_data.pickle')

x_test = to_numpy_array(data_frame,

コード例 #42

0

ファイルを表示

ファイル: SVMExercise.py プロジェクト: patryk1992/angular

from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn import svm
from sklearn.externals import joblib

#features_train,  labels_train,  = wtf.getArrays()


features_train, labels_train, features_test, labels_test = prep_terrain_data.makeTerrainData()
#for (i, feature) in enumerate(features_test):
 #   print (i, len(feature))
#labels_train[0] = 1
# fit the model
cv = cross_validation.ShuffleSplit(len(features_train), n_iter=50, test_size=0.1, random_state=0)

train(svm.SVC(), train_sizes=np.linspace(.1, 1.0, 10),cv = cv, params = " ", features= features_train, labels=labels_train )


#pred, accuracy, recall, precision = test(clf, features_test, labels_test)

plt.show()

#clf.fit(features_train, labels_train)

#pred = clf.predict(features_test)

from sklearn.metrics import accuracy_score
#acc = accuracy_score(pred, labels_test)d
#print (" accuracy: ", acc)

#print(classification_report(labels_test, pred))

コード例 #43

0

ファイルを表示

ファイル: cli.py プロジェクト: Offroads/Group-Assignment

def command(command):
    """
	This function will take a command and execute the command given, or else it will tell the user the command doesnt exist
	:param command: a string with your command
	"""
    commands = """				Available commands are:
		classify        - Attempts to classify a review that is written by the user 
		run             - allow you to run different parts of the program
		exit            - exits the program
		clear           - clears the window
		commands / help - shows the different commands available
		wordcount       - Will show how many times a word shows up in the trainingdata, type in the word in the next input
		stopwords       - learn more about stopwords
		setpath         - allows you to set the path to the directory that contains the data
		topwords        - this will list the most common positive or negative words
		candidates      - list the people that contributed to the assignment and how they contributed
		"""

    stop_words_info = "\nStop words are words that doesn't have any negative or positive meaning.\n"\
                      "These words can have a negative impact on the accuracy if they are used more in one of the types of reviews\n"\
                      "The way we handle stop-words is to just skip skip words that are found in the list of stop-words.\n"

    stop_word_commands = """				Available commands are:
		commands / help - lists the commands
		info            - lists info about stopwords
		back            - go back to prevous section
		clear           - clear the window
		listwords       - lists the stopwords
		"""

    if command == "exit" or command == "close" or command == "stop":
        quit()

    elif command == "topwords":

        is_a_number = False
        number_of_words = None
        while not is_a_number:
            number_of_words = input("How many words do you want to see?\n")
            try:
                number_of_words = int(number_of_words)
                is_a_number = True
            except Exception:
                print("Please enter a number.")
                pass

        classifier.train()
        common_pos_words = data_handler.get_common_words(
            classifier.pos_words_dict, number_of_words)
        print("\nPositive words...")
        for item in common_pos_words:
            print(item)
        print("\nNegative words...")
        common_neg_words = data_handler.get_common_words(
            classifier.neg_words_dict, number_of_words)
        for item in common_neg_words:
            print(item)

    elif command == "wordcount":
        data = classifier.train()
        pos_fr = data["pos_words_dict"]
        neg_fr = data["neg_words_dict"]
        while True:  # You can keep trying different words until you type back
            word = input("Type in the word: ")
            if word == "back":
                return  # return to "main menu"
            print(word, " was found ",
                  data_handler.get_specific_word(pos_fr, word),
                  " times in the positive reviews\n")
            print(word, " was found ",
                  data_handler.get_specific_word(neg_fr, word),
                  " times in the negative reviews\n")

    elif command == "setpath":
        main.set_path()

    elif command == "run":
        done = False
        clear_window()
        while not done:
            user_input = input(
                "Which function do you want to run? Use the numbers to select. Type back to return\n"
                "1  - classify the test reviews                                          - This will classify the test reviews\n"
                "2  - classify test review with stopwords                                - This will classify the test reviews while using stopwords\n"
                "3  - cleanup                                                            - This will remove all files created by this program\n"
                "4  - classify training reviews                                          - This will classify the training data\n"
                "5  - classify training reviews with stopwords                           - This will classify the training data with stopwords\n"
                "6  - classify training reviews with testing dataset                     - This will classify the training reviews, using the testing data for the classifier\n"
                "7  - classify training reviews with testing dataset, using stopwords    - This will classify the training reviews, using the testing data for the classifier and stopwords\n"
                "8  - classify testing reviews with testing dataset                      - This will classify the testing reviews, using the testing data for the classifier\n"
                "9  - classify testing reviews with testing dataset, using stopwords     - This will classify the testing reviews, using the testing data for the classifier and stop-words\n"
                "10 - all                                                                - This will run all the tests\n"
                "back                                                                    - Return back to main menu\n"
            )
            user_input = user_input.lower()
            print("Running ", user_input)
            if user_input == "1":
                print(
                    "Attempting to classify the test reviews. This may take a while."
                )
                testing.test_classify_test_dataset()
            elif user_input == "2":
                print(
                    "Attempting to classify the test reviews while using stop-words. This may take a while."
                )
                testing.test_classify_test_dataset_with_stopwords()

            elif user_input == "3":
                data_handler.cleanup_files()

            elif user_input == "4":
                print(
                    "Attempting to classify the training reviews. This may take a while."
                )
                testing.test_classify_train_dataset()

            elif user_input == "5":
                print(
                    "Attempting to classify the training reviews with stopwords. This may take a while."
                )
                testing.test_classify_train_dataset_with_stopwords()

            elif user_input == "6":
                print(
                    "Attempting to classify training dataset while using the testing dataset for the classifier. This may take a while."
                )
                testing.test_classify_train_dataset_with_testing_data()
            elif user_input == "7":
                print(
                    "Attempting to classify training dataset while using the testing dataset for the classifier, while using stop-words. This may take a while."
                )
                testing.test_classify_train_dataset_with_testing_data_with_stopwords(
                )

            elif user_input == "8":
                print(
                    "Attempting to classify testing dataset while using the testing dataset for the classifier. This may take a while."
                )
                testing.test_classify_test_dataset_with_testing_data()

            elif user_input == "9":
                print(
                    "Attempting to classify testing dataset while using the testing dataset for the classifier, while using stop-words. This may take a while."
                )
                testing.test_classify_test_dataset_with_testing_data_using_stopwords(
                )

            elif user_input == "10":
                line = "_____________________________________________________________________________________________________________________________"
                print(
                    "Attempting to classify the test reviews. This may take a while."
                )
                testing.test_classify_test_dataset()
                print(line)

                print(
                    "Attempting to classify the test reviews while using stop-words. This may take a while."
                )
                testing.test_classify_test_dataset_with_stopwords()
                print(line)

                print(
                    "Attempting to classify the training reviews. This may take a while."
                )
                testing.test_classify_train_dataset()
                print(line)

                print(
                    "Attempting to classify the training reviews with stopwords. This may take a while."
                )
                testing.test_classify_train_dataset_with_stopwords()
                print(line)

                print(
                    "Attempting to classify training dataset while using the testing dataset for the classifier. This may take a while."
                )
                testing.test_classify_train_dataset_with_testing_data()
                print(line)

                print(
                    "Attempting to classify training dataset while using the testing dataset for the classifier, while using stop-words. This may take a while."
                )
                testing.test_classify_train_dataset_with_testing_data_with_stopwords(
                )
                print(line)

                print(
                    "Attempting to classify testing dataset while using the testing dataset for the classifier. This may take a while."
                )
                testing.test_classify_test_dataset_with_testing_data()
                print(line)

                print(
                    "Attempting to classify testing dataset while using the testing dataset for the classifier, while using stop-words. This may take a while."
                )
                testing.test_classify_test_dataset_with_testing_data_using_stopwords(
                )
                print(line)

            elif user_input == "back":
                done = True
                print("Returning to previous section...")

            else:
                print("Couldn't run ", user_input,
                      " Maybe you spelled it wrong?\n")
    elif command == "classify":
        classifier.train()  #prepare the classifier
        while True:
            user_input = input("\nEnter your review or back to return: ")
            if user_input.lower() == "back":
                return
            print("Attempting to classify the review: " + user_input + "\n")
            result = classifier.predict_input(user_input)
            print(result[0])
            print(result[1])
            print(result[2])

    elif command == "commands" or command == "help":
        print(commands)

    elif command == "stopwords":
        done = False
        print(stop_word_commands)
        while not done:
            user_input = input(
                "Type a command. Type help for a list of options: ")

            if user_input.lower() == "info":
                print(stop_words_info)

            elif user_input == "back":
                print("Going back...")
                done = True

            elif user_input == "commands" or user_input == "help":
                print(stop_word_commands)

            elif user_input == "clear":
                clear_window()

            elif user_input == "listwords":
                stop_words = get_stop_words('english')
                for word in stop_words:
                    print(word)
            else:
                print(
                    "Did not recognize that command, type commands to show a list of commands"
                )

    elif command == "clear":
        clear_window()

    elif command == "candidates":
        print("The candidates are:\n")
        print("110 - wrote all the code and worked on the report")
        print("21  - testing of the code and worked on the report")

    else:  # if a command that doesnt exist is typed in.
        print(commands)

コード例 #44

0

ファイルを表示

ファイル: miller.py プロジェクト: jon--lee/mlb-call-classifier

import classifier
import neuralpy
import grapher

net = neuralpy.Network(2, 8, 1)

uris = [ "miller_xml/" + str(i) + ".xml" for i in range(1,13) ]

epochs = 200
learning_rate = 0.05

validation_percentage = .32

ps = []

classifier.stand = "L"

for i in range(0, 10):
	net.randomize_parameters()
	p = classifier.train(net, uris, epochs, learning_rate, validation_percentage, save_file='results/miller_' + str(i) + '.txt')
	neuralpy.output(p)
	ps.append(p)


i = ps.index(max(ps))
neuralpy.output("\n\n" + str(max(ps)) + " at " + str(i))

grapher.graph(filepath='results/miller_' + str(i) + '.txt')


# grapher.graph(filepath='results/miller_4.txt')