) #TODO better not to copy and play with indices, instead. whatever! scores = disc_scores[:] assert len(scores) == train_data[0].shape[0] rng_state = np.random.get_state() np.random.shuffle(images) np.random.set_state(rng_state) np.random.shuffle(targets) np.random.set_state(rng_state) np.random.shuffle(scores) #sample the initial training set daal_set_indices = random.sample(range(train_data[0].shape[0]), INITIAL_TRAINING_SIZE) #train the classifier with the initial set daal_train_data = (images[daal_set_indices], targets[daal_set_indices]) classifier_daal.train(daal_train_data, MAX_ITER / len(daal_train_data)) acc = classifier_daal.evaluate(test_data) #arr_range = np.arange(train_data[0].shape[0]) daal_accuracies.append(acc) for step in range(ACTIVE_LEARNING_STEPS): print "step", step #get predicted probabilities of the unlabel set and compute the entropies predictions = classifier_daal.predict(images) entropies = get_entropy(predictions) #lower the better entropies = normalize(entropies) #entropies -> less better [0-1] entropies = 1 - entropies weighted_entropies = entropies * scores # we would like to select the largest argsorted = np.argsort( weighted_entropies)[::-1] #increasing sort, then reversed
rng_state = np.random.get_state() np.random.shuffle(images) np.random.set_state(rng_state) np.random.shuffle(targets) np.random.set_state(rng_state) np.random.shuffle(scores) #sample the initial training set train_set_indices = random.sample(range(train_data[0].shape[0]), INITIAL_TRAINING_SIZE) score_argsort = np.argsort(scores[:]) daal_set_indices = score_argsort[-INITIAL_TRAINING_SIZE:].tolist() print 'TRAIN_SET_INDICES', len(train_set_indices) print 'DAAL_SET_INDICES', len(daal_set_indices) #train the classifier with the initial set my_train_data = (images[train_set_indices], targets[train_set_indices]) daal_train_data = (images[daal_set_indices], targets[daal_set_indices]) classifier_daal.train(daal_train_data) classifier_entropy.train(my_train_data) acc = classifier_entropy.evaluate(test_data) entropy_accuracies.append(acc) acc = classifier_daal.evaluate(test_data) daal_accuracies.append(acc) for step in range(ACTIVE_LEARNING_STEPS): print "step", step ## Entropy: #get predicted probabilities of the unlabel set and compute the entropies print "making entropy decisions" predictions = classifier_entropy.predict(images[train_set_indices]) entropies = get_entropy(predictions)#lower the better argsorted = np.argsort(entropies)[::-1]#increasing sort #start from the beginning and find the first 10 samples that are not already labelled