def train_classifier(train_data, train_labels, dev_data, dev_labels, seed): learning_rate = 0.005 classifier = ml.Classifier(dim=4, lr=learning_rate, seed=seed) last_acc = None for i in range(100): # number of epochs for data, label in zip(train_data, train_labels): classifier.learn(data, label) current_acc = classifier.calculate_acc(dev_data, dev_labels) print(current_acc) if last_acc and last_acc > current_acc - 0.001: break last_acc = current_acc return classifier, current_acc
def train_classifier(train, dev, seed): learning_rate = 0.005 classifier = ml.Classifier(dim=4, lr=learning_rate, seed=seed) last_acc = None for i in range(100): # number of epochs for example in train.get_data(): classifier.learn(example.get_data(), example.get_label()) current_acc = classifier.calculate_acc(dev.get_data_batch(), dev.get_label_batch()) if last_acc and last_acc > current_acc - 0.001: break last_acc = current_acc return classifier, current_acc
if datapoint[3] not in ['Alaska', 'Wyoming']: # not legally allowed to perform experiment in these states if datapoint[4] == "smoking": smoking_label = 1 else: smoking_label = 0 dev_data.append((datapoint[0], int(datapoint[1]), datapoint[2], datapoint[3])) dev_labels.append(smoking_label) ## Whew, we're finished loading the data. Let's train a classifier five times with different random seeds. ## We use the learn function to update our parameters accs = [] for i in range(5): print("Seed", i) learning_rate = 0.005 classifier = ml.Classifier(dim=4, lr=learning_rate, seed=i) last_acc = None for i in range(100): # number of epochs for j in range(len(train_data)): classifier.learn(train_data[j], train_labels[j]) current_acc = classifier.calculate_acc(dev_data, dev_labels) print(current_acc) if last_acc and last_acc > current_acc - 0.001: break last_acc = current_acc accs.append(current_acc) ## Finally, let's print out our predictions on the dev set: dev_predictions = classifier.predict_batch(dev_data) num_printed = 0