def main():
	if not isfile(model_filename):
		print('Waiting for model to load...')
		ut.train_model(csv_filename, model_filename)
	clf = load(model_filename)
	link = ut.tokenize_link(input('Enter link to see if it is malicious: '))
	prediction = clf.predict(link)
	print(f'This URL is {prediction[0]}')
Example #2
0
def main():
    if not isfile(model_filename):
        print('Waiting for model to load...')
        ut.train_model(csv_filename, model_filename=model_filename)
    clf = load(model_filename)
    vectorizer = clf['vectorizer']
    guess_str = ut.stem_string(input('Enter text to see if it is spam: '))
    prediction = vectorizer.transform([guess_str])
    result = clf['classifier'].predict(prediction)
    print(f'This text is {result[0]}')
Example #3
0
# From:https://campus.datacamp.com
# Import the libraries
import utility as u

train, test = u.load_data()
train = u.prepare_data(train)

# Create train_two with the newly defined feature
train_two = train.copy()
train_two["Family_size"] = 1
train_two["Family_size"] = train_two["SibSp"] + train_two["Parch"] + 1
train_two["Family_size"] = train_two["Family_size"].fillna(1)
print(train_two)

# Create a new feature set and add the new feature
feature_list = ["Pclass", "Sex", "Age", "Fare", "SibSp", "Parch", "Family_size"]

#Control overfitting by setting "max_depth" to 10 and "min_samples_split" to 5 : my_tree_two
max_depth = 10
min_samples_split = 5
my_tree, features, target = u.train_model(train_two, feature_list, max_depth, min_samples_split)

# Look at the importance and score of the included features
print(my_tree.feature_importances_)
print(my_tree.score(features, target))

u.plot_result(feature_list, my_tree, features, target)
Example #4
0

def print_features(data, f_x, f_y, title, index=111, color_opt='bo'):
    plt.subplot(index)
    plt.xlabel(f_x)
    plt.ylabel(f_y)
    plt.title(title)
    plt.plot(data[f_x], data[f_y], color_opt)


train, test = u.load_data()
train = u.prepare_data(train)

# Print the train data to see the available features
#print(train)
plt.figure(1)
print_features(train, 'Sex', 'Survived', 'Sex to surviced', 411, 'ro')
print_features(train, 'Age', 'Survived', 'Age to surviced', 412, 'go')
print_features(train, 'Fare', 'Survived', 'Fare to surviced', 413, 'bo')
print_features(train, 'Pclass', 'Survived', 'Pclass to surviced', 414, 'yo')
#plt.show()

feature_list = ["Pclass", "Sex", "Age", "Fare"]
my_tree, features, target = u.train_model(train, feature_list)

# Look at the importance and score of the included features
print(my_tree.feature_importances_)
print(my_tree.score(features, target))

u.plot_result(feature_list, my_tree, features, target, 2)
        sys.exit()
    if number_of_epochs <= 0 or number_of_epochs > 10000:
        print("The number of epochs value should be between 1 and 10000")
        sys.exit()


check_arguments()
print("The training will be processed using", device)

# data processing
datasets, loaders = utility.data_processing(data_dir)
# building model, criterion, and optimizer
model = utility.model_build(arch, hidden_size, output)
criterion, optimizer = utility.crit_optim(model, lr)
# training the model
model = utility.train_model(loaders["train"], loaders["valid"], model,
                            criterion, optimizer, device, number_of_epochs, 40)
# print accuracy
__, test_accuracy = utility.loss_accuracy(loaders["test"], model, criterion,
                                          device)
print("The model accuracy on the test set is: {:.2f}%".format(test_accuracy *
                                                              100))
# save the model
model_name = utility.save_model(model,
                                datasets["train"],
                                optimizer,
                                arch,
                                output,
                                hidden_size,
                                model_dir="/")
print("The model was saved in:", model_name)
Example #6
0
    n_features == 1024

for param in model.parameters():
    param.requires_grad = False

model.classifier = nn.Sequential(nn.Linear(n_features, hidden_units),
                                 nn.ReLU(), nn.Dropout(0.2),
                                 nn.Linear(hidden_units, 50), nn.ReLU(),
                                 nn.Linear(50, 102), nn.LogSoftmax(dim=1))

criterion = nn.NLLLoss()

optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate)

scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
print('starting')
model = utility.train_model(model, n_epochs, criterion, optimizer, scheduler,
                            device, dataloaders)

model.class_to_idx = image_datasets['train'].class_to_idx
model.cpu()
checkpoint = {
    'arch': architecture,
    'hidden_units': hidden_units,
    'class_to_idx': model.class_to_idx,
    'state_dict': model.state_dict()
}
checkpoint_file_path = save_dir + checkpoint_file
torch.save(checkpoint, checkpoint_file_path)

print("Congrats! Your model has been successfully trained")
                action="store",
                default=0.001)
ap.add_argument('--dropout', dest="dropout", action="store", default=0.5)
ap.add_argument('--epochs', dest="epochs", action="store", type=int, default=1)
ap.add_argument('--arch',
                dest="arch",
                action="store",
                default="vgg16",
                type=str)
ap.add_argument('--hidden_units',
                type=int,
                dest="hidden_units",
                action="store",
                default=4096)

pa = ap.parse_args()
directory = pa.data_dir
checkpoint = pa.save_dir
lrate = pa.learning_rate
architecture = pa.arch
dropout = pa.dropout
hidden_units = pa.hidden_units
power = pa.gpu
epochs = pa.epochs

model, optimizer, criterion = utility.design_model(architecture, dropout,
                                                   lrate, hidden_units)

utility.train_model(model, criterion, optimizer, 1, power)

utility.save_checkpoint(model, optimizer, epochs)
Example #8
0
# Download and load the training data
trainset = datasets.FashionMNIST(path + 'input/trainset/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download and load the test data
testset = datasets.FashionMNIST(path + 'input/testset/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)

model = Classifier_noDropOut()

criterion = torch.nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.003)

'''
another optimizer
optimizer = optim.Adam(model.parameters(), lr=0.003)
'''

epochs = 200

train_losses_ndp, test_losses_ndp, test_accuracy_ndp = train_model(model, optimizer, criterion, trainloader, testloader, epochs)

model = Classifier_DropOut()

criterion = torch.nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.003)

train_losses_dp, test_losses_dp, test_accuracy_dp = train_model(model, optimizer, criterion, trainloader, testloader, epochs)

print('end')