Example #1
0
def train_nn(train_x, train_y, arch, LAMBDA):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        nn = NeuralNet(train_x, train_y, architecture=arch, LAMBDA=LAMBDA, maxiter=10000)
        print nn._architecture
        nn.train()
    return nn
Example #2
0
    def plot(self, eval_value):
        # plt.hide()
        X, Y = sklearn.utils.shuffle(self.x_data, self.y_data, random_state=0)
        # print("eval in plot : ", eval_value)

        self.brain = NeuralNet(eval_value)

        cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
        cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
        # cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA'])
        # cmap_bold = ListedColormap(['#FF0000', '#00FF00'])

        x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
        y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1

        h = 0.05
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                             np.arange(y_min, y_max, h))

        self.calculate_decision_boundry(X, Y, eval_value)

        Z = self.brain.evaluate(np.c_[xx.ravel(), yy.ravel()])
        # print(xx)
        # print(yy)

        Z = np.reshape(Z, xx.shape)

        plt.figure()
        plt.pcolormesh(xx, yy, Z, cmap=cmap_light)

        plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=cmap_bold)
        plt.xlim(xx.min(), xx.max())
        plt.ylim(yy.min(), yy.max())

        plt.show()
	def load_latest_network(self):
		file_counter = 0

		self.net = NeuralNet()

		if not os.path.exists('models') or not os.path.isfile("models/best_0.h5"):
			if not os.path.exists('models'):
				os.makedirs('models')

			self.net.net.new_network()
			self.net.save('models', "best_0.h5")
			self.model_string = "best_0.h5"
			self.model_counter = 0
			return

		string_prefix = 'models/best_'
		file_string = string_prefix + str(file_counter) + '.h5'

		while os.path.isfile(file_string):
			file_counter += 1
			file_string = string_prefix + str(file_counter) + '.h5'

		# decrement to find existing one

		file_counter -= 1
		file_string = string_prefix + str(file_counter) + '.h5'

		print("loaded best network " + file_string[7:-3])

		self.net.load("models", file_string[7:])
		self.model_string = file_string[7:-3]
		self.model_counter = file_counter
Example #4
0
 def create_FR_after_retrain(self,
                             mode,
                             acc_loss,
                             retrain=True,
                             epochs=50,
                             lr=0.01):
     final_rec = self.create_FR_with_different_acc_loss(mode, acc_loss)
     if retrain:
         self.retrain_with_mask(final_rec, epochs=epochs, lr=lr)
     retrain_nn = NeuralNet(ckp_name_prefix=final_rec.get_retrain_prefix())
     retrain_nn.net.initialize_spatial_layers(dat.shape(), cfg.BATCH_SIZE,
                                              self.ps)
     retrain_nn.net.reset_spatial()
     retrain_nn.net.strict_mask_update(update_ids=list(
         range(len(final_rec.mask))),
                                       masks=final_rec.mask)
     if INNAS_COMP:
         test_acc = 100
         ops_saved = 100
         ops_total = 100
     else:
         _, test_acc, _ = retrain_nn.test(self.test_gen)
         ops_saved, ops_total = retrain_nn.net.num_ops()
     final_rec.retrain_update(test_acc, ops_saved, ops_total, epochs, lr)
     print(final_rec)
     save_to_file(final_rec, path=cfg.RESULTS_DIR)
Example #5
0
def main():

    data_from_file = np.array(read_file("heart_disease_dataset.csv", ';',
                                        True))
    nbEpochs = int(input("Enter the number of epochs: "))
    neuron = NeuralNet(data_from_file, 2, 8, 5)
    print("Enter \n\"r\" for running tests (", nbEpochs,
          "epochs ) or \n\"b\" for the best accuracy result of last testing")

    option = input()
    while option == 'r' or option == 'b':
        if option == 'r':
            neuron.train(nbEpochs)
        elif option == 'b':
            print('Best result achieved during last tests: ' +
                  str(neuron.bestResult))
            #print(neuron.W1)
        print(
            "Enter \n\"r\" for running tests (", nbEpochs,
            "epochs ) or \n\"b\" for the best accuracy result of last testing")
        option = input()

    #print(neuron.W1)
    print("Writing best weights output file")
    filename_weights = 'best_weights.npz'
    np.savez(filename_weights,
             name1=neuron.W1,
             name2=neuron.W2,
             name3=neuron.b1,
             name4=neuron.b2)
    data = np.load(filename_weights)  # reading for future use
    print(data['name1'])
    print(data['name2'])
    print(data['name3'])
    print(data['name4'])
def ops_saved_summery(net_name=cfg.NET.__name__,
                      dataset_name=cfg.DATA.name(),
                      mode=Mode.ALL_MODES,
                      ps='*',
                      ones_range=('*', '*'),
                      acc_loss='*',
                      gran_thresh='*',
                      init_acc='*',
                      batch_size=cfg.BATCH_SIZE,
                      max_samples=cfg.TEST_SET_SIZE):
    rec_finder = RecordFinder(net_name, dataset_name, ps, ones_range,
                              gran_thresh, acc_loss, init_acc)
    final_rec_fn = rec_finder.find_rec_filename(mode,
                                                RecordType.FINAL_RESULT_REC)
    if final_rec_fn is None:
        print('No Record found')
        return
    rec = load_from_file(final_rec_fn, '')
    print(rec)

    base_fn = 'ops_summery_' + rec.filename
    summery_fn_pkl = os.path.join(cfg.RESULTS_DIR, base_fn + '.pkl')
    if os.path.exists(summery_fn_pkl):
        arr = load_from_file(summery_fn_pkl, path='')
    else:
        nn = NeuralNet()
        data = Datasets.get(dataset_name, cfg.DATASET_DIR)
        nn.net.initialize_spatial_layers(data.shape(), cfg.BATCH_SIZE,
                                         rec.patch_size)
        test_gen, _ = data.testset(batch_size=batch_size,
                                   max_samples=max_samples)

        arr = [None] * len(rec.mask)
        for idx, layer in enumerate(rec.mask):
            nn.net.reset_spatial()
            print(
                f"----------------------------------------------------------------"
            )

            nn.net.strict_mask_update(update_ids=[idx], masks=[layer])
            _, test_acc, _ = nn.test(test_gen)
            ops_saved, ops_total = nn.net.num_ops()

            arr[idx] = (ops_saved, ops_total, test_acc)
            nn.net.print_ops_summary()

        print(
            f"----------------------------------------------------------------"
        )
        nn.net.reset_spatial()
        save_to_file(arr, use_default=False, path='', filename=summery_fn_pkl)

    out_path = os.path.join(cfg.RESULTS_DIR, base_fn + ".csv")
    with open(out_path, 'w', newline='') as f:
        csv.writer(f).writerow(['layer', 'ops_saved', 'ops_total'])
        for idx, r in enumerate(arr):
            csv.writer(f).writerow([idx, r[0], r[1]])

    return arr
 def get_master_parent(self, num_inputs, num_outputs, data):
     if path.exists("neural_net.txt"):
         old_net = pickle.load(open("neural_net.txt", "rb"))
         old_net.master_reset_innov()
         return old_net
     net = NeuralNet(num_inputs, num_outputs, data)
     net.reset_neural_net()
     return net
Example #8
0
    def run(self):
        for hyperparameters in tqdm(
                self.dict_product(self.hyperparameters_lists)):
            model = NeuralNet(hyperparameters)
            log_dict = model.run()
            self.log(log_dict, hyperparameters)

        self.log_to_file(self.log_file)
Example #9
0
    def __init__(self, num_input, batch_size=100, num_epochs=10, display=False,
                 blacklist=[], whitelist=[], normalize=False, display_step=1):
        """Init classifier"""

        # Network parameters
        self.l_rate = 0.001
        self.dropout_prob = 0.5
        self.reg_param = 0.01
        self.std_param = 5
        self.training_epochs = num_epochs
        self.display_step = display_step
        self.batch_size = batch_size
        self.display = display
        self.normalize = normalize

        self.blacklist = blacklist
        self.whitelist = whitelist

        assert not (self.blacklist and self.whitelist), (
            'Both whitelist and blacklist are defined'
        )

        ############################
        # TensorFlow Variables below
        ############################

        # Placeholders
        self.X = tf.placeholder('float', [None, num_input], name='X')
        self.Y = tf.placeholder('int32', [None], name='Y')
        self.keep_prob = tf.placeholder('float')

        # Cost threshold for anomaly detection
        self.cost_threshold = tf.Variable(0, dtype=tf.float32)

        # for normalization
        self.feature_min = tf.Variable(np.zeros(num_input), dtype=tf.float32)
        self.feature_max = tf.Variable(np.zeros(num_input), dtype=tf.float32)

        # Create Network
        network_sizes = [num_input, 25, 2, 25, num_input]
        activations = [tf.nn.relu, tf.nn.sigmoid, tf.nn.relu, tf.nn.sigmoid]

        self.neural_net = NeuralNet(network_sizes, activations)

        prediction = self.neural_net.create_network(self.X, self.keep_prob)

        self.cost = tf.reduce_mean(tf.square(prediction - self.X))
        self.cost += self.reg_param * self.neural_net.get_l2_loss()
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.l_rate)
        self.optimizer = self.optimizer.minimize(self.cost)

        self.init_op = tf.initialize_all_variables()
        self.saver = tf.train.Saver()

        # for gpu
        self.config = tf.ConfigProto(log_device_placement=False)
        self.config.gpu_options.allow_growth = True
Example #10
0
def test_triangle(n, norm_vect):
    sym = []

    sym.append((norm_vect, 0))
    for i in range(n):
        sym.append((-norm_vect, 10 * 2**(n - i - 1)))

    folding_net = NeuralNet.folding_net(sym, optimize=True)
    layers = []
    layers.append(
        Layer(2,
              2,
              weights=compute_rot(norm_vect),
              bias=nd.zeros(2),
              function=nd.identity))
    layers.append(
        Layer(1,
              2,
              weights=nd.array([[-10, 1]]),
              bias=nd.array([0]),
              function=echelon))
    compute_net = NeuralNet([2, 2, 1], layers)

    size = 2**(n + 12)
    inputs = nd.zeros((2, size))
    inputs[0] = nd.random.uniform(-2**(n + 1), 2**(n + 1), size)
    inputs[1] = nd.random.uniform(-2**(n + 1), 2**(n + 1), size)

    outputs = compute_net.compute(folding_net.compute(inputs))

    x = list(inputs[0].asnumpy())
    y = list(inputs[1].asnumpy())
    results = list(outputs.asnumpy()[0])

    #    def triangle(x, y):
    #        x = nd.abs(x)
    #        x_floor = nd.floor(x)
    #        x = nd.where(nd.modulo(x_floor, 2), 1 - x + x_floor, x - x_floor)
    #        return y - x > 0

    #    true_outputs = list(triangle(inputs[0], inputs[1]).asnumpy())

    colors = ['red', 'green']

    plt.scatter(x,
                y,
                c=results,
                cmap=matplotlib.colors.ListedColormap(colors),
                marker='.')
    plt.show()

    #    plt.scatter(x,y, c = true_outputs, cmap=matplotlib.colors.ListedColormap(colors), marker = '.')
    #    plt.show()

    return sym, folding_net, compute_net
Example #11
0
def main():
    """ Kieran Ringel
    For each data set three lines are run in main.
    The first one creates an instance of Org with the arguments being the file name, where the header
    is located to be removed ([-1] if there is no header, the location of the class so they can all
    be moved to the last column, and the column location of any categorical features so that one hot
    encoding can be applied.
    The next line calls to open the file and returns the dataframe of the file
    The final line creates an instance of NeuralNet with the arguments being the dataframe, the number
    of hidden layers, the number of hidden nodes per layer, whether classification or regression are
    to be performed, how the weights are being trained (GA, DE, BP, PSO), and the population size (1 for algorithms that
    don't need a population).
    """

    print('Breast Cancer')
    print("generations: 5 \n"
          "pop = 25 \n"
          "ts = 4 \n"
          "weight = 4 \n"
          "mutations = 10")
    #print("generations: 40 \n"
    #      "beta = 1.7 \n"
    #     "pop = 25 \n"
    #    "pr = .7")
    cancer = Org('Data/breast-cancer-wisconsin.data', [-1], -1, [-1])
    df = cancer.open()
    # ##NN(file, number hidden layers, number hidden nodes per layer)
    NeuralNet(df, 0, 12, 'classification', 'DE', 20)

    #print('glass')
    #glass = Org('Data/glass.data', [-1], -1, [-1])
    #df = glass.open()
    #NeuralNet(df, 2, 6, 'classification', "DE", 5)

    #print('soybean')
    #soybean = Org('Data/soybean-small.data', [-1], -1, [-1])
    #df = soybean.open()
    #NeuralNet(df, 0, 17, 'classification', "DE", 30)

    #print('abalone')
    #print("generations: 20 \n"
    #        "pop = 25 \n"
    #       "ts = 4 \n"
    #      "weight = 4 \n"
    #     "mutations = 30")
    #abalone = Org('Data/abalone.data', [-1], -1, [0])
    #df = abalone.open()
    #NeuralNet(df, 2, 1, 'regression', 'GA', 25)

    print('machine')
    machine = Org('Data/machine.data', [-1], -1, [-1])
    df = machine.open()
    NeuralNet(df, 2, 3, 'regression')
    print(df)
Example #12
0
def train_nn(train_x, train_y, arch, LAMBDA):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        nn = NeuralNet(train_x,
                       train_y,
                       architecture=arch,
                       LAMBDA=LAMBDA,
                       maxiter=10000)
        print nn._architecture
        nn.train()
    return nn
def train():
    nn = NeuralNet()
    # dataset = Datasets.get('MNIST', cfg.DATASET_DIR)
    dataset = Datasets.get('FashionMNIST', cfg.DATASET_DIR)

    test_gen, _ = dataset.testset(batch_size=cfg.BATCH_SIZE,
                                  max_samples=cfg.TEST_SET_SIZE,
                                  specific_label=target)
    (train_gen, _), (_, _) = dataset.trainset(batch_size=cfg.BATCH_SIZE,
                                              valid_size=0.0,
                                              specific_label=target)

    # Not the cleanest code ever, but I reuse the MNIST generation code of ClassificationDataset class
    # to take MNIST dataset
    mnist_train_dataset = FeatureClassification(train_gen.dataset,
                                                target_label=target,
                                                transforms=cfg.NUM_TRANS)
    mnist_test_dataset = FeatureClassification(test_gen.dataset,
                                               target_label=target,
                                               transforms=cfg.NUM_TRANS)
    mnist_eval_dataset = FeatureClassification(test_gen.dataset,
                                               target_label=None,
                                               transforms=cfg.NUM_TRANS)

    train_loader = torch.utils.data.DataLoader(
        mnist_train_dataset,
        batch_size=cfg.BATCH_SIZE,
        sampler=torch.utils.data.RandomSampler(mnist_train_dataset),
        num_workers=4,
        pin_memory=1)
    test_loader = torch.utils.data.DataLoader(mnist_test_dataset,
                                              batch_size=cfg.BATCH_SIZE,
                                              sampler=None,
                                              num_workers=4,
                                              pin_memory=1)
    eval_loader = torch.utils.data.DataLoader(mnist_eval_dataset,
                                              batch_size=cfg.BATCH_SIZE,
                                              sampler=None,
                                              num_workers=4,
                                              pin_memory=1)

    nn.train(train_loader,
             test_loader,
             epochs=epochs,
             lr=0.01,
             lr_plan={
                 0: 0.001,
                 10: 0.0001
             })
    score_func, labels = nn.evaluate(eval_loader)

    pickle_out = open('dump.pickle', 'wb')
    pickle.dump({'score_func': score_func, 'labels': labels}, pickle_out)
    pickle_out.close()
def testUpdateWeights():
    neuralNet = NeuralNet()
    gTrain = [[1, 1, 1, 1, 1, 1, 1, 1, 1], 
            [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
            [1, 1, 1, 0.5, 0.5, 0.5, 1, 1, 1]]
    cTrain = [[0.5, 0.5, 0.5], [0.4, 0.4, 0.4], [0.3, 0.3, 0.3]]
    w = neuralNet.miniBatch(gTrain, cTrain)
    print(w)
    print("------------------------------")
    neuralNet.updateDerivatives(w)
    print(neuralNet.layers[1][1].weights)
Example #15
0
class Server(threading.Thread):
    def __init__(self):
        threading.Thread.__init__(self)
        self.us = ''
        #Sender ip is changed after every recieved data packet
        self.them = ''
        self.port = 50006
        #Create a socket on port 50007
        self.s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        self.s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        self.s.bind((self.us, 50007))
        #Create a neural net, with weights from data file.
        self.net = NeuralNet(3, 2, 1, 8)
        self.net.readWeights("weights.dat")
        #Set run flag, and execute run() in separate thread.
        self.runFlag = True
        self.start()

    #The server loop
    def run(self):
        while self.runFlag:
            #Wait for datapacket, and save date+sender
            data, addr = self.s.recvfrom(1024)  # buffer size is 1024 bytes
            self.them = addr[0]
            #Allow for remote server stop
            stop = (data.decode('utf-8') == "Stop!")
            if stop:
                self.stop()
            #Decode datapacket into string
            data = data.decode('utf-8').strip().split()
            #If data is missing, skip
            if len(data) != 3:
                continue
            #Else, parse into numbers, and feed into neural network
            inp = []
            for x in data:
                inp.append(float(x))
            otp = self.net.processInput(inp)
            #Send back the output.
            returnString = str(otp[0]) + " " + str(otp[1])
            self.send(returnString)

    #Function that sends back message to self.them, which is last sender of
    #a datapacket
    def send(self, message):
        self.s.sendto(message.encode('utf-8'), (self.them, self.port))

    #Sets flag to exit server loop, and closes socket. Causes exception
    #if currently in recvfrom().
    def stop(self):
        self.runFlag = False
        self.s.close()
        print("Server thread ending, socket closed.")
def run_kl_neural_net():
    hyperparameters = {
        "middle_size": 20,
        "L2_reg": 0.1,
        "step_size": 0.01,
        "activation": "None",
        "y_type": "heatmap",
        "w_var": 0.05,
        "loss_type": "kl"
    }
    neural_net = NeuralNet(hyperparameters)
    neural_net.run()
Example #17
0
def train_pred_layers(train_gen,
                      test_gen,
                      arch,
                      dataset,
                      model_chkp=None,
                      mask_list=None,
                      epochs=5,
                      pred_list=None):
    """
    Train ZAPs of a pretrained model using the Adam optimizer.
    Each ZAP is trained in isolation.
    :param train_gen: the training set used to train the predictors
    :param test_gen: the test set to evaluate the predictor performance
    :param arch: a string that represents the model architecture, e.g., 'alexnet'
    :param dataset: a string that represents the dataset, e.g., 'cifar100'
    :param model_chkp: a model checkpoint path to be loaded (default: None)
    :param mask_list: specific mask list to train with (default: [6, 5, 4, 3])
    :param epochs: number of epochs to train each ZAP (default: 5)
    :param pred_list: specific prediction layers to train (default: all)
    """
    # Set default masks values
    if mask_list is None:
        mask_list = [6, 5, 4, 3]

    for mask in mask_list:
        cfg.LOG.start_new_log(
            name='{}-{}_zap-train_mask-{}'.format(arch, dataset, mask))

        mask = int(mask)
        nn = NeuralNet(arch, dataset, model_chkp=model_chkp)

        for pred_idx, pred_layer in enumerate(nn.model.pred_layers):
            if pred_list is not None:
                # Skip ZAPs that are not in the list
                if str(pred_idx) not in pred_list:
                    continue

            cfg.LOG.write_title('ZAP #{}'.format(pred_idx),
                                pad_width=50,
                                pad_symbol='=')
            pred_layer.set_pattern(mask)
            # Threshold is set here for test purposes only, i.e., it does not affect the training process
            pred_layer.threshold = 0.0
            nn.train_pred(train_gen,
                          test_gen,
                          epochs,
                          pred_idx=pred_idx,
                          lr=0.01)

        cfg.LOG.close_log()
        nn = None
        torch.cuda.empty_cache()
	def train_and_eval_new_network(self):
		#training_data = self.generate_training_data(params.train_game_size)

		if self.count_num_saved_games() < params.train_game_size:
			print("Not enough data to train")
			return

		num_games, training_data = self.load_training_data()

		print("Loaded " + str(num_games) + " games")

		self.net.save('models', "temp.h5")
		new_net = NeuralNet()
		new_net.load('models', "temp.h5")

		new_net.train(training_data)

		# compare 2 nets

		result = DuelManager().play_games(SaltZeroAgent(new_net), SaltZeroAgent(self.net), params.duel_game_count, debug = True, \
			use_gating = True, gating_threshold = params.gating_threshold)

		print("finished dueling, result is " + str(result[0]) + " to " + str(result[1]) + " new to old")

		if (result[0] / (result[0] + result[1]) >= params.gating_threshold):
			# Passed!

			print("Passed! saving new net")
			new_net.save("models", "best_" + str(self.model_counter + 1) + ".h5")
			self.model_counter += 1
			self.net = new_net
Example #19
0
class NNDriver:
    """Class which gets the data ready to be used in the neural net
    and calls the train and predict functions with the parameters specified by the user"""
    def __init__(self, train, test, valid, p):
        # get the data as ndarrays for training, testing, and validation
        train_data = np.asarray(train)
        test_data = np.asarray(test)
        valid_data = np.asarray(valid)

        # Create the samples ndarray matrices with only the columns for the features
        # (leaving off the last column, which holds the classifications)
        self.X_train = train_data[:, :-1]
        self.X_valid = valid_data[:, :-1]
        self.X_test = test_data[:, :-1]

        # Create the classification ndarray vectors from the last column of the data
        self.Y_train = (train_data[:, -1]).astype(int)
        self.Y_valid = (valid_data[:, -1]).astype(int)
        self.Y_test = (test_data[:, -1]).astype(int)

        # get the parameters needed to build the neural net
        num_train_samples, num_features = self.X_train.shape
        num_classifications = max(train.classes)
        layers, activation, alpha, decay = p  # get the user's choices for parameters
        self.NN = NeuralNet(num_features, num_classifications + 1, alpha,
                            decay, layers)

    def build(self, params=None, Report=None):
        """Calls the neural net's train function using the formatted training and validation data"""
        epoch = self.NN.train(self.X_train, self.Y_train, self.X_valid,
                              self.Y_valid)

        # record the number of epochs run during training
        if Report != None:
            Report['netTrainCycle'] = str(epoch)

    def predict(self):
        """Calls the neural net's predict function using the formatted test data"""
        Y_test_predict, test_loss = self.NN.predict(self.X_test, self.Y_test)
        return Y_test_predict

    def params(self, p=None):
        """Sets the parameters for the neural net based on the user's choices"""
        if p != None:
            self.NN.alpha = p[2]
            self.NN.activation = p[1]
            self.NN.layers = p[0]
            self.NN.deacy = p[3]
        return None
 def construct_non_jit(self, jit):
     child = NeuralNet(self.num_inputs, self.num_outputs)
     for i in range(self.networks[0].master_innov[0] - 1):
         res = self.determine_connection(i, jit, jit)
         child.handle_new_connection(res[0], res[1])
     child.revalidate_node_order()
     child.set_next_innov(self.networks[0].master_innov[0])
     return child
 def __init__(self, num_inputs, num_outputs, pop_size, data):
     self.num_inputs = num_inputs
     self.num_outputs = num_outputs
     self.pop_size = pop_size
     self.data = data
     self.reference = NeuralNet(num_inputs, num_outputs, data)
     self.reference.reset_neural_net()
     master_parent = self.get_master_parent(num_inputs, num_outputs, data)
     self.networks = [master_parent]
     for i in range(pop_size):
         net = copy.deepcopy(self.reference)
         net.randomize()
         net.mutate()
         self.networks.append(net)
     self.species = self.assign_pop_to_species(self.networks, [])
def main():
    model = Doc2Vec.load('400_pvdm_doc2vec.d2v')
    model_dbow = Doc2Vec.load('400_pvdbow_doc2vec.d2v')
    #mistake pvdm is actually pv-dbow
    path = 'datasets/'

    files = [f for f in listdir(path) if isfile(join(path,f))]
    files.pop(0)

    data_loader = DataLoader(path)

    domains = data_loader.csv_files


    names = {1: 'title', 4: 'abstract', 5: 'mesh', 'y': 6}

    domain_features = data_loader.get_feature_matrix(names)

    #get size
    n_total_documents = 0

    for domain in domain_features:
        n_total_documents+=len(domain[0])

    all_features = numpy.zeros(shape=(n_total_documents, 800))
    all_labels = numpy.asarray([])
    i = 0

    for domain in domain_features:
        features, labels = domain
        all_labels = numpy.hstack((all_labels, labels))
        for feature_vector in features:
            preprocessed_line = list(preprocess(feature_vector))
            all_features[i, 0:400] = numpy.float_(model.infer_vector(preprocessed_line))
            all_features[i, 400:] = numpy.float_(model_dbow.infer_vector(preprocessed_line))
            i+=1
    all_labels = numpy.asarray(all_labels)
    all_labels[all_labels == -1] = 0
    all_labels = numpy.intc(all_labels)
    train, test = data_loader.create_random_samples(all_features, all_labels)
    train_x, train_y = train
    test_x, test_y = test

    classifier = NeuralNet(n_hidden_units=[200], output_size=2, batch_size=20, n_epochs=200, dropout=True,
                                   activation_function='relu', learning_rate=.3, momentum=True, momentum_term=.5)

    classifier.train(train_x, train_y)
    classifier.test(test_x, test_y)
Example #23
0
def test_triangle_horiz(n):
    norm_vect = nd.array([1, 0])
    sym = []

    sym.append((norm_vect, 0))
    for i in range(n):
        sym.append((-norm_vect, 2**(n - i - 1)))

    folding_net = NeuralNet.folding_net(sym, optimize=True)
    layers = []
    layers.append(
        Layer(2,
              2,
              weights=compute_rot(norm_vect),
              bias=nd.zeros(2),
              function=nd.identity))
    layers.append(
        Layer(1,
              2,
              weights=nd.array([[-1, 1]]),
              bias=nd.array([0]),
              function=echelon))
    compute_net = NeuralNet([2, 2, 1], layers)

    size = 2**(n + 12)
    inputs = nd.zeros((2, size))
    inputs[0] = nd.random.uniform(-2**(n), 2**(n), size)
    inputs[1] = nd.random.uniform(-0.1, 1.1, size)

    outputs = compute_net.compute(folding_net.compute(inputs))

    def triangle(x, y):
        x = nd.abs(x)
        x_floor = nd.floor(x)
        x = nd.where(nd.modulo(x_floor, 2), 1 - x + x_floor, x - x_floor)
        return y - x > 0

    true_outputs = triangle(inputs[0], inputs[1])

    errors = nd.sum(nd.abs(true_outputs - outputs))

    print("MODEL PROPERTY :")
    print("--------------------------------------")
    print("Number of layers :",
          folding_net.layersNumber + compute_net.layersNumber)
    print("Number of parameters :", folding_net.size() + compute_net.size())
    print("Errors :", errors, "/", size, "=", errors / size)
    print("--------------------------------------")
Example #24
0
def hidden_layer_crossvalidation(datatuple):
    """ Cross validation to choose the number of hidden layers"""
    FLAGS.create_dir()
    # save configuration
    configure_name = "configure.py"
    copyfile(configure_name, FLAGS.model_dir + configure_name)

    trainaccs = []
    validaccs = []

    for i in [3, 4, 5, 6]:
        FLAGS.n_nodes = [32] * i + [1]
        FLAGS.n_layer = len(FLAGS.n_nodes)
        print FLAGS.n_nodes
        nn_model = NeuralNet(FLAGS.n_layer, FLAGS.n_nodes, FLAGS.n_feat,
                             FLAGS.func_num)
        nn_model = Adamdelta(datatuple, nn_model, FLAGS.adadelta_gamma)
        trainaccs.append(nn_model.train_acc)
        validaccs.append(nn_model.valid_acc)
        nnu.save_model(
            nn_model,
            FLAGS.model_dir + "Adamdelta_" + str(FLAGS.max_iteration) + ".p")

    print "final round training accuracy: ", trainaccs
    print "final round validation accuracy: ", validaccs
    results = tuple([trainaccs, validaccs])
    np.save(FLAGS.model_dir + "results.npy", results)
Example #25
0
 def __init__(self):
     threading.Thread.__init__(self)
     self.us = ''
     #Sender ip is changed after every recieved data packet
     self.them = ''
     self.port = 50006
     #Create a socket on port 50007
     self.s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
     self.s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
     self.s.bind((self.us, 50007))
     #Create a neural net, with weights from data file.
     self.net = NeuralNet(3, 2, 1, 8)
     self.net.readWeights("weights.dat")
     #Set run flag, and execute run() in separate thread.
     self.runFlag = True
     self.start()
Example #26
0
def processDirList(outPath, parent, dirs, layer, gpuNum):
    net = NeuralNet('vgg19', 224, True, gpuNum)

    for directory in dirs:
        onlyFiles = [
            f for f in os.listdir(os.path.join(parent, directory))
            if os.path.isfile(os.path.join(parent, directory, f))
        ]

        # Try to make the output directory
        try:
            os.makedirs(os.path.join(outPath, directory))
        except:
            pass

        for fileName in onlyFiles:
            _, ext = os.path.splitext(fileName)

            # Only process images
            if 'jpg' in ext.lower() or 'png' in ext.lower():
                inFullPath = os.path.join(parent, directory, fileName)
                outFile = fileName + '_l' + layer + '_vgg.bin'
                outFullPath = os.path.join(outPath, directory, outFile)

                # Does the file already exist? if so lets skip.
                if not os.path.isfile(outFullPath):
                    rep = getRep(inFullPath, outFullPath, net)
                    if rep is not None:
                        matio.save_mat(outFullPath, rep)
                    else:
                        pass
                else:
                    pass
Example #27
0
def info_tutorial():
    nn = NeuralNet()
    x_shape = dat.shape()
    test_gen, _ = dat.testset(batch_size=cfg.BATCH_SIZE,
                              max_samples=cfg.TEST_SET_SIZE)
    nn.test(test_gen, print_it=True)
    nn.net.initialize_spatial_layers(x_shape, cfg.BATCH_SIZE, PATCH_SIZE)
    nn.summary(x_shape, print_it=True)
    nn.print_weights()
    print(nn.output_size(x_shape))

    # Spatial Operations, defined one the net itself. Remember that after enabling a layer, ops are affected
    assert nn.net.num_spatial_layers() != 0
    nn.net.print_spatial_status()
    # nn.train(epochs=1, set_size=5000, lr=0.1, batch_size=cfg.BATCH_SIZE)  # Train to see fully disabled performance
    nn.net.print_ops_summary()
    nn.net.print_ops_summary(
        use_conv=True)  # Count convlution operations instead of MAC
    print(nn.net.num_ops())  # (ops_saved, total_ops)

    # Given x, we generate all spatial layer requirement sizes:
    spat_sizes = nn.net.generate_spatial_sizes(x_shape)
    print(spat_sizes)
    p_spat_sizes = nn.net.generate_padded_spatial_sizes(x_shape, PATCH_SIZE)
    print(p_spat_sizes)

    # Generate a constant 1 value mask over all spatial nets
    print(nn.net.enabled_layers())
    nn.net.fill_masks_to_val(1)
    print(nn.net.enabled_layers())
    print(nn.net.disabled_layers())
    nn.net.print_spatial_status(
    )  # Now all are enabled, seeing the mask was set
    nn.train(epochs=1, set_size=5000, lr=0.1, batch_size=cfg.BATCH_SIZE
             )  # Train to see all layers enabled performance
    nn.net.print_ops_summary()
    nn.net.print_ops_summary(
        use_conv=True)  # Count convlution operations instead of MAC
    nn.net.reset_spatial()  # Disables layers as well
    nn.net.print_ops_summary()
    nn.net.print_ops_summary(use_conv=True)
    # Turns on 3 ids and turns off all others
    chosen_victims = random.sample(range(nn.net.num_spatial_layers()), 4)
    nn.net.strict_mask_update(update_ids=chosen_victims[0:3],
                              masks=[
                                  torch.zeros(p_spat_sizes[chosen_victims[0]]),
                                  torch.zeros(p_spat_sizes[chosen_victims[1]]),
                                  torch.zeros(p_spat_sizes[chosen_victims[2]])
                              ])

    # Turns on one additional id and *does not* turn off all others
    nn.net.lazy_mask_update(
        update_ids=[chosen_victims[3]],
        masks=[torch.zeros(p_spat_sizes[chosen_victims[3]])])
    nn.net.print_spatial_status()  #
    print(nn.net.enabled_layers())
    nn.train(epochs=1, set_size=5000, lr=0.1,
             batch_size=cfg.BATCH_SIZE)  # Run with 4 layers on
    nn.net.print_ops_summary()
    nn.net.print_ops_summary(use_conv=True)
Example #28
0
    def __init__(self,
                 parent=None,
                 width=4,
                 height=3,
                 dpi=100,
                 x_data=[],
                 y_data=[]):
        fig = Figure(figsize=(width, height), dpi=dpi)

        FigureCanvas.__init__(self, fig)
        self.setParent(parent)
        self.x_data = []
        self.ax = self.figure.add_subplot(111)
        self.y_data = []
        FigureCanvas.setSizePolicy(self, QSizePolicy.Expanding,
                                   QSizePolicy.Expanding)
        FigureCanvas.updateGeometry(self)
        self.brain = NeuralNet()
Example #29
0
def train():
    """
    Train the network on the specific label given as a command line argument.
    The function dumps its evaluation data for further analysis.
    """
    nn = NeuralNet()
    test_import = Datasets.get(dataset, cfg.DATASET_DIR)._test_importer()
    train_import = Datasets.get(dataset, cfg.DATASET_DIR)._train_importer()

    # Not the cleanest code ever, but I reuse the MNIST generation code of ClassificationDataset class
    # to take MNIST dataset
    test_dataset = FeatureClassification(test_import,
                                         target_label=None,
                                         transforms=cfg.NUM_TRANS)
    train_dataset = FeatureClassification(train_import,
                                          target_label=label,
                                          transforms=cfg.NUM_TRANS)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg.BATCH_SIZE,
        sampler=torch.utils.data.RandomSampler(train_dataset),
        num_workers=4,
        pin_memory=1)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=cfg.BATCH_SIZE,
                                              sampler=None,
                                              num_workers=4,
                                              pin_memory=1)

    nn.train(train_loader,
             test_loader,
             epochs=int(epochs / cfg.NUM_TRANS),
             lr=0.01,
             lr_plan={
                 5: 0.001,
                 10: 0.0001
             })
    score_func, labels = nn.evaluate(test_loader)

    pickle_out = open('dump.pickle', 'wb')
    pickle.dump({'score_func': score_func, 'labels': labels}, pickle_out)
    pickle_out.close()
Example #30
0
    def __init__(self,
                 patch_size,
                 ones_range,
                 gran_thresh,
                 max_acc_loss,
                 init_acc=None,
                 test_size=cfg.TEST_SET_SIZE,
                 patterns_idx=None):
        self.ps = patch_size
        self.max_acc_loss = max_acc_loss
        self.gran_thresh = gran_thresh

        if patterns_idx is None:
            self.ones_range = ones_range
            self.input_patterns = None
        else:
            patterns_rec = load_from_file(
                f'all_patterns_ps{self.ps}_cluster{patterns_idx}.pkl',
                path=cfg.RESULTS_DIR)
            self.ones_range = (patterns_rec[1], patterns_rec[1] + 1)
            self.input_patterns = patterns_rec[2]

        self.full_net_run_time = None
        self.total_ops = None

        self.nn = NeuralNet()
        self.nn.net.initialize_spatial_layers(dat.shape(), cfg.BATCH_SIZE,
                                              self.ps)
        self.test_gen, _ = dat.testset(batch_size=cfg.BATCH_SIZE,
                                       max_samples=cfg.TEST_SET_SIZE)
        self.test_set_size = cfg.TEST_SET_SIZE
        if INNAS_COMP:
            init_acc = DEBUG_INIT_ACC
        if init_acc is None:
            _, test_acc, correct = self.nn.test(self.test_gen)
            print(f'==> Asserted test-acc of: {test_acc} [{correct}]\n ')
            self.init_acc = test_acc  # TODO - Fix initialize bug
        else:
            self.init_acc = init_acc
        self.record_finder = RecordFinder(cfg.NET.__name__, dat.name(),
                                          patch_size, ones_range, gran_thresh,
                                          max_acc_loss, self.init_acc)
Example #31
0
 def __init__(self, sess, observation_space, action_space, LEARNING_RATE,
              NET_SIZE, TAU, action_scale):
     self.sess = sess
     self.observation_space = observation_space
     self.action_space = action_space
     self.LEARNING_RATE = LEARNING_RATE
     self.input_pl, self.gradients_pl = self.placeholder_inputs()
     self.network = NeuralNet('Actor', self.observation_space,
                              self.action_space, NET_SIZE, TAU)
     self.prediction = self.network.inference_actor(self.input_pl,
                                                    action_scale)
     self.target_network = NeuralNet('Target_Actor', self.observation_space,
                                     self.action_space, NET_SIZE, TAU)
     self.target_prediction = self.target_network.inference_actor(
         self.input_pl, action_scale)
     variables = self.network.get_variables()
     self.gradients = self.gradients(self.prediction, variables,
                                     self.gradients_pl)
     self.optimizer = self.optimizer(self.gradients,
                                     self.network.get_variables())
Example #32
0
def training():
    # dat.data_summary(show_sample=False)
    nn = NeuralNet(resume=True)  # Spatial layers are by default, disabled
    nn.summary(dat.shape())
    nn.train(epochs=50, lr=0.01)
    test_gen, _ = dat.testset(batch_size=cfg.BATCH_SIZE,
                              max_samples=cfg.TEST_SET_SIZE)
    test_loss, test_acc, count = nn.test(test_gen)
    print(
        f'==> Final testing results: test acc: {test_acc:.3f} with {count}, test loss: {test_loss:.3f}'
    )
Example #33
0
class Tank(GameObject):

	maxspeed = 5
	maxturnspeed = 0.3
	size = 15
	color = (255, 0, 0)# Red

	def __init__(self, game, x, y, brain_weights=[]):
		GameObject.__init__(self, game, x, y)
		self.speed = 0
		self.direction = 90
		self.score = 0
		self.brain = NeuralNet(brain_weights)


	def get_nearest_food(self, game):

		# Find the nearest food instance
		nearestfood = -1
		for food in game.entitylist:
			if isinstance(food, Food):# If the entity is a food entity
				if nearestfood == -1:
					nearestfood = food
					distance = math.hypot(self.x-food.x, self.y-food.y)
				elif math.hypot(self.x-food.x, self.y-food.y) < distance:
					nearestfood = food
					distance = math.hypot(self.x-food.x, self.y-food.y)

		return nearestfood


	def step(self, game):

		nearestfood = self.get_nearest_food(game)

		# Get the values needed by the brain
		looking_at = (math.cos(self.direction), math.sin(self.direction))# Where am I looking at?

		nearestfood_vector_length = math.hypot(nearestfood.x-self.x, nearestfood.y-self.y)
		nearestfood_vector = ((nearestfood.x-self.x)/nearestfood_vector_length, (nearestfood.y-self.y)/nearestfood_vector_length)# In what direction is the nearest food?

		# Run the brain
		motorleft, motorright = self.brain.run((looking_at, nearestfood_vector))

		# Turn the tank
		rotation = motorleft-motorright # Get the direction changes
		rotation = min(self.maxturnspeed, max(self.maxturnspeed*(-1), rotation)) # Clamp to the max rotation
		self.direction = (self.direction+rotation) % 360 # Add the rotation to the self.direction

		# Update the speed
		self.speed = min(self.maxspeed, max(self.maxspeed*(-1), motorleft + motorright))

		# Check collision with the nearest food.
		if math.hypot(self.x-nearestfood.x, self.y-nearestfood.y) < self.size:
			self.score += 1
			nearestfood.destroy(game)

		# Move the tanks according to the speed
		self.x += math.cos(self.direction)*self.speed
		self.y += math.sin(self.direction)*self.speed

		# Make the borders of the game go in one-another
		if self.x < 0:# Exceeded left border
			self.x = game.surface.get_width()-self.x
		elif self.x > game.surface.get_width():# Exceeded right border
			self.x = self.x-game.surface.get_width()

		if self.y < 0:# Exceeded top border
			self.y = game.surface.get_height()-self.y
		elif self.y > game.surface.get_height():# Exceeded bottom border
			self.y = self.y-game.surface.get_height()


	def draw(self, game):
		rect = pygame.Rect(0, 0, self.size, self.size)
		rect.center = (self.x, self.y)
		if self in game.get_highscores():
			color = (255, 255, 0)# Yellow
		else:
			color = self.color# Red

		pygame.draw.rect(game.surface, color, rect)
		pygame.draw.line(game.surface, color, (self.x, self.y), (self.x+math.cos(self.direction)*(self.size+10), self.y+math.sin(self.direction)*(self.size+10)))


	def destroy(self, game):
		self.brain = 0
		GameObject.destroy(self, game)
Example #34
0
	def __init__(self, game, x, y, brain_weights=[]):
		GameObject.__init__(self, game, x, y)
		self.speed = 0
		self.direction = 90
		self.score = 0
		self.brain = NeuralNet(brain_weights)
Example #35
0
############
Main program
############
"""
cmd = ArgParser()


"""
##########################
Neural net: initialization
##########################
"""
seed(0)

if cmd.inFile:
    NN = NeuralNet()
    NN.read(cmd.inFile)
else:
    NN = NeuralNet(cmd.Nvars,cmd.Nperceptrons,cmd.Nneurons)
NN.printParams()


"""
###############
Hyperparameters
###############
"""
Ntraining  = 300000
Nruntest   = 10000
Nminibatch = 8
toScramble = {2:[5]}
class Classifier:
    """Unary classifier to detect anomalous behavior

    Args:
        num_input (int): Number of input for classifier
        batch_size (int = 100): Batch size
        num_epochs (int = 10): Number of training epochs
        display (bool = False): Flag to print output
        blacklist (list = []): List of features to ignore,
            cannot be used if whitelist is being used
        whitelist (list = []): List of features to use,
            cannot be used if blacklist is being used
        normalize (bool = False): Flag to determine if data is normalized
        display_step (int = 1): How often to display epoch data during training
    """

    def __init__(self, num_input, batch_size=100, num_epochs=10, display=False,
                 blacklist=[], whitelist=[], normalize=False, display_step=1):
        """Init classifier"""

        # Network parameters
        self.l_rate = 0.001
        self.dropout_prob = 0.5
        self.reg_param = 0.01
        self.std_param = 5
        self.training_epochs = num_epochs
        self.display_step = display_step
        self.batch_size = batch_size
        self.display = display
        self.normalize = normalize

        self.blacklist = blacklist
        self.whitelist = whitelist

        assert not (self.blacklist and self.whitelist), (
            'Both whitelist and blacklist are defined'
        )

        ############################
        # TensorFlow Variables below
        ############################

        # Placeholders
        self.X = tf.placeholder('float', [None, num_input], name='X')
        self.Y = tf.placeholder('int32', [None], name='Y')
        self.keep_prob = tf.placeholder('float')

        # Cost threshold for anomaly detection
        self.cost_threshold = tf.Variable(0, dtype=tf.float32)

        # for normalization
        self.feature_min = tf.Variable(np.zeros(num_input), dtype=tf.float32)
        self.feature_max = tf.Variable(np.zeros(num_input), dtype=tf.float32)

        # Create Network
        network_sizes = [num_input, 25, 2, 25, num_input]
        activations = [tf.nn.relu, tf.nn.sigmoid, tf.nn.relu, tf.nn.sigmoid]

        self.neural_net = NeuralNet(network_sizes, activations)

        prediction = self.neural_net.create_network(self.X, self.keep_prob)

        self.cost = tf.reduce_mean(tf.square(prediction - self.X))
        self.cost += self.reg_param * self.neural_net.get_l2_loss()
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.l_rate)
        self.optimizer = self.optimizer.minimize(self.cost)

        self.init_op = tf.initialize_all_variables()
        self.saver = tf.train.Saver()

        # for gpu
        self.config = tf.ConfigProto(log_device_placement=False)
        self.config.gpu_options.allow_growth = True

    def train(self, train_file='', reset_weights=False):
        """Trains classifier

        Args:
            train_file (str = ''): Training file location csv formatted,
                must consist of only regular behavior
            reset_weights (bool = False): Flag to reset weights
        """

        trX, trY = grab_data(train_file, self.blacklist, self.whitelist)
        training_size = len(trX)

        # normalize X
        if self.normalize:
            _min = trX.min(axis=0)
            _max = trX.max(axis=0)
            trX = normalize(trX, _min, _max)

        assert self.batch_size < training_size, (
            'batch size is larger than training_size'
        )

        with tf.Session(config=self.config) as sess:
            sess.run(self.init_op)

            if reset_weights:
                sess.run(self.neural_net.reset_weights())

            costs = []

            for epoch in range(self.training_epochs):
                cost = 0
                num_costs = 0
                for i in range(0, training_size, self.batch_size):
                    # for batch training
                    upper_bound = i + self.batch_size
                    if upper_bound >= training_size:
                        upper_bound = training_size - 1

                    feed_dict = {self.X: np.atleast_2d(trX[i:upper_bound]),
                                 self.Y: np.atleast_1d(trY[i:upper_bound]),
                                 self.keep_prob: self.dropout_prob}
                    _, c = sess.run([self.optimizer, self.cost],
                                    feed_dict=feed_dict)

                    cost += c
                    num_costs += 1

                    # calculate average cost on last epoch for threshold
                    if epoch == self.training_epochs - 1:
                        costs.append(c)

                if epoch % self.display_step == 0:
                    display_str = 'Epoch {0:04} with cost={1:.9f}'
                    display_str = display_str.format(epoch+1, cost/num_costs)
                    self.print(display_str)

            # assign cost threshold
            cost_threshold = np.mean(costs) + self.std_param * np.std(costs)
            sess.run(self.cost_threshold.assign(cost_threshold))

            self.print('Threshold: ' + str(cost_threshold))

            # assign normalization values
            if self.normalize:
                sess.run(self.feature_min.assign(_min))
                sess.run(self.feature_max.assign(_max))

            self.print('Optimization Finished')

            # save model
            save_path = self.saver.save(sess, './model.ckpt')
            self.print('Model saved in file: {}'.format(save_path))

    def test(self, test_file=''):
        """Tests classifier

        Args:
            test_file (str = ''): Testing file location csv formatted

        Returns:
            (dict): Dictionary containing the following fields
                accuracy
        """

        teX, teY = grab_data(test_file, self.blacklist, self.whitelist)

        testing_size = len(teX)

        rtn_dict = {
            'num_acc': 0,
            'num_fp': 0,
            'num_tn': 0,
            'num_fn': 0,
            'num_tp': 0
        }

        with tf.Session(config=self.config) as sess:
            self.saver.restore(sess, './model.ckpt')

            # normalize data
            if self.normalize:
                _min = self.feature_min.eval()
                _max = self.feature_max.eval()

                teX = normalize(teX, _min, _max)

            for i in range(testing_size):
                cost = sess.run(self.cost,
                                feed_dict={self.X: np.atleast_2d(teX[i]),
                                           self.keep_prob: 1.0})

                t = self.cost_threshold.eval()

                class_guess = 1 if float(cost) < t else -1

                if teY[i] == 1:
                    if class_guess == 1:
                        rtn_dict['num_tn'] += 1
                    else:
                        rtn_dict['num_fp'] += 1
                else:
                    if class_guess == -1:
                        rtn_dict['num_tp'] += 1
                    else:
                        rtn_dict['num_fn'] += 1

                if teY[i] == class_guess:
                    rtn_dict['num_acc'] += 1

            rtn_dict['accuracy'] = rtn_dict['num_acc'] / testing_size
            rtn_dict['fp_rate'] = rtn_dict['num_fp'] / (rtn_dict['num_tn'] +
                                                        rtn_dict['num_fp'])
            rtn_dict['fn_rate'] = rtn_dict['num_fn'] / (rtn_dict['num_tp'] +
                                                        rtn_dict['num_fn'])

            rtn_dict['accuracy'] *= 100
            rtn_dict['fp_rate'] *= 100
            rtn_dict['fn_rate'] *= 100
            rtn_dict['tp_rate'] = 100 - rtn_dict['fn_rate']
            rtn_dict['tn_rate'] = 100 - rtn_dict['fp_rate']

        self.print(rtn_dict)

        return rtn_dict

    def print(self, val):
        """Internal function for printing"""

        if self.display:
            print(val)
Example #37
0
from __future__ import print_function
import numpy as np

from NeuralNet import NeuralNet

file_lines = []

# read in the file and save it line by line as a list of strings
with open ('optdigits_train.txt', 'r') as training_file:
    file_lines = training_file.readlines()

inputs = [[float(x) for x in line.strip().split(',')[0:-1]] for line in file_lines]
answers = [int(x) for x in line.strip().split(',')[-1] for line in file_lines]

[array.append(1.0) for array in inputs]  # add the bias nodes to the input



net = NeuralNet()
net.train(inputs[0:10], answers[0:10])
Example #38
0
def transfer_learning(print_output=True):
    path = 'datasets/'
    data_loader = DataLoader(path)
    names = {1: 'title', 4: 'abstract', 5: 'mesh', 'y': 6}
    transformed_data_sets = []

    path = 'datasets/'

    files = [f for f in listdir(path) if isfile(join(path,f))]
    files.pop(0)
    data_loader = DataLoader(path)
    domains = data_loader.csv_files
    all_domains = copy.deepcopy(domains)
    training_domains = data_loader.csv_files
    all_domains_svm_wda_metrics_list = []
    all_domains_svm_metrics_list = []
    all_domains_svm_bow_mlp_list = []
    all_domains_mlp_fold_scores = []

    for i, held_out_domain in enumerate(domains):
        training_domains.pop(i)
        names = {1: 'title', 4: 'abstract', 5: 'mesh', 'y': 6}
        svm_wda_metrics_list = []
        svm_metrics_list = []
        svm_bow_mlp_list = []

        folder_name = '/' + files[i]
        domain_name = files[i].__str__()
        domain_name = domain_name.split('.')[0]
        folder_name = 'output' + '/' + domain_name

        output = "Dataset: {}".format(files[i])
        if print_output:
            print(output)

        #shuffle(data_loader.csv_files)
        data_loader.csv_files = training_domains
        data_sets = data_loader.csv_files
        domains = data_loader.get_feature_matrix(names)

        #Get one file out of the csv files in the dataloader use this as the held out domain

        #Get the feature representation of the held out data
        held_out_x, held_out_y = data_loader.get_feature_matrix(names, held_out_domain)
        #Create the folds for the held out data in this case the default 5
        folds = data_loader.cross_fold_valdation(held_out_x, held_out_y)
        #Get the total number of domains i.e., the number of files with documents
        n_source_domains = len(data_sets)
        os.makedirs(folder_name)

        #Must convert the data type of the matrix for theano
        feature_engineer = Feature_Engineer()

        #Start the 5 fold cross validation
        for n_fold, fold in enumerate(folds):
            output = "Fold {}: \n".format(n_fold)
            if print_output:
                print(output)
            output = '{}/{}/fold_{}.csv'.format(os.getcwd(), folder_name, (n_fold + 1))
            file = open(output, 'w')
            csv_writer = csv.writer(file)

            #Each sample is a list that contains the x and y for the classifier
            #Typically fold[0] would be the train sample but because it is switched for
            #testing the effectiveness of the domain adaptation
            train_sample = fold[1]
            test_sample = fold[0]

            #These are the original copies to be copied over the augmented feature matrix
            #Each sample contains the text and y labels from the data before it is put into the sklearn count vectorizer
            train_x, train_y = train_sample
            test_x, test_y = test_sample

            train_y[train_y == 0] = 2
            train_y[train_y == 1] = 3
            test_y[test_y == 0] = 2
            test_y[test_y == 1] = 3


            #Get the bag of words representation of the small 20% target source data and transform the other 80%
            #of the data.
            train_x = data_loader.get_transformed_features(train_x, True, False, True)
            test_x = data_loader.transform(test_x, True, True)

            transformed_domains = []

            #Transform the domains with respect to the training data
            for domain in domains:
                domain_x, domain_y = domain
                transformed_domain_x = data_loader.transform(domain_x, True, True)
                transformed_domain_x, domain_y = data_loader.underSample(transformed_domain_x, domain_y)
                transformed_domains.append([transformed_domain_x, domain_y])

            augmented_feature_matrix_train, augmented_y_train = feature_engineer.augmented_feature_matrix(transformed_domains,
                                                                                              [train_x, train_y])
            augmented_feature_matrix_test, augmented_y_test = feature_engineer.augmented_feature_matrix(held_out_domain=[test_x, test_y],
                                                                                                        train_or_test=False,
                                                                                                        n_source_domains=len(transformed_domains))
            augmented_y_test[augmented_y_test == 2] = 0
            augmented_y_test[augmented_y_test == 3] = 1
            #SVM with the augmented feature matrix for domain adaptation
            svm_wda = SVM()
            svm_wda.train(augmented_feature_matrix_train, augmented_y_train)
            svm_wda.test(augmented_feature_matrix_test, augmented_y_test)
            output = "\nSVM with domain adaptation metrics:"
            csv_writer.writerow([output])
            if print_output:
                print(output)
                print(svm_wda)
                print("\n")
            svm_wda_metrics_list.append(svm_wda.metrics)

            classifier = NeuralNet(n_hidden_units=[250], output_size=4, batch_size=20, n_epochs=200, dropout=True,
                                   activation_function='relu', learning_rate=.3, momentum=True, momentum_term=.5)
            write_to_csv(svm_wda.metrics, csv_writer)


            y_for_mlp = []
            #Set up the x and y data for the MLP
            for p, domain in enumerate(transformed_domains):
                domain_x, domain_y = domain
                domain_x = domain_x.todense()
                y_for_mlp.append(domain_y)

                if p == 0:
                    neural_net_x_train = domain_x
                    neural_net_y_train = domain_y
                else:
                    neural_net_x_train = numpy.vstack((neural_net_x_train, domain_x))
                    neural_net_y_train = numpy.hstack((neural_net_y_train, domain_y))

            neural_net_x_train = numpy.float_(neural_net_x_train)


            classifier.train(neural_net_x_train, neural_net_y_train)

            test_y[test_y == 2] = 0
            test_y[test_y == 3] = 1
            svm_y_train = neural_net_y_train
            svm_y_train[svm_y_train == 2] = 0
            svm_y_train[svm_y_train == 3] = 1

            #SVM without the domain adaptation
            svm = SVM()
            svm.train(sparse.coo_matrix(neural_net_x_train), svm_y_train)
            svm.test(test_x, test_y)
            output = "\nSVM without domain adaptation"
            if print_output:
                print(output)
                print(svm)
                print("\n")
            csv_writer.writerow([output])
            svm_metrics_list.append(svm.metrics)
            write_to_csv(svm.metrics, csv_writer)


            #Transform the feature vectors of the held out data to the learned hidden layer features of the previous
            #MLP trained with all n-1 datasets

            perceptron_train_x = theano.shared(neural_net_x_train)
            perceptron_test_x = theano.shared(test_x.todense())

            transformed_perceptron_train_x = classifier.transfer_learned_weights(perceptron_train_x)
            transformed_perceptron_test_x = classifier.transfer_learned_weights(perceptron_test_x)

            modified_transformed_perceptron_train_x = numpy.hstack((transformed_perceptron_train_x,
                                                                    neural_net_x_train))
            modified_transformed_perceptron_test_x = numpy.hstack((transformed_perceptron_test_x,
                                                                   test_x.todense()))

            output = "\nSVM with BoW and transformed features"
            csv_writer.writerow([output])
            if print_output:
                print(output)
            svm_mlp_bow = SVM()
            svm_mlp_bow.train(sparse.coo_matrix(modified_transformed_perceptron_train_x), svm_y_train)
            svm_mlp_bow.test(sparse.coo_matrix(modified_transformed_perceptron_test_x), test_y)
            write_to_csv(svm_mlp_bow.metrics, csv_writer)
            if print_output:
                print(svm_mlp_bow)
            svm_bow_mlp_list.append(svm_mlp_bow.metrics)


            output = "*********** End of fold {} ***********".format(n_fold)

            if print_output:
                print(output)


        training_domains = copy.deepcopy(all_domains)
        file_name = '{}/{}/fold_averages.csv'.format(os.getcwd(), folder_name)
        file = open(file_name, 'w+')
        csv_writer = csv.writer(file)

        if print_output:
            output = "----------------------------------------------------------------------------------------" \
                     "\nFold Scores\n " \
                     "SVM with domain adaptation"
            print_write_output(output, svm_wda_metrics_list, all_domains_svm_wda_metrics_list, csv_writer)

            output = "\nSVM without domain adaptation"
            print_write_output(output, svm_metrics_list, all_domains_svm_metrics_list, csv_writer)

            output = "SVM with BoW and transformed features"
            print_write_output(output, svm_bow_mlp_list, all_domains_svm_bow_mlp_list, csv_writer)



    file_name = '{}/output/all_fold_averages.csv'.format(os.getcwd())
    file = open(file_name, 'w+')
    csv_writer = csv.writer(file)
    if print_output:
        output = "*******************************************************************************************" \
                 "\nAll domain macro metric scores\n " \
                 "SVM with domain adaptation"
        print_macro_scores("SVM with domain adaptation", all_domains_svm_wda_metrics_list, csv_writer)

        output = "\nSVM without domain adaptation"
        print_macro_scores(output, all_domains_svm_metrics_list, csv_writer)

        output = "SVM with BoW and transformed features"
        print_macro_scores(output, all_domains_svm_bow_mlp_list, csv_writer)
Example #39
0
def main():
    """
    :description: Main method to create and run learning on neural network. Handles k fold validation.
    :return: void
    """
    filename = sys.argv[1]

    # Default values
    hold_back = 0.2
    node_count = 5

    # Check command line arguments
    try:
        if len(sys.argv) > 2:
            if sys.argv[2] == 'h':
                node_count = int(sys.argv[3])  # Get hidden nodes
                if len(sys.argv) > 4 and sys.argv[4] == 'p':
                    hold_back = float(sys.argv[5])
            elif sys.argv[2] == 'p':
                hold_back = float(sys.argv[3])  # Get hold back percentage
        if node_count <= 0 or hold_back <= 0 or hold_back >= 1:
            raise ValueError
    except (IndexError, TypeError, ValueError):
        print('Usage: ann.py <filename> [h <number of hidden nodes>] [p <holdout proportion>]')
        exit()

    print('Using %d hidden nodes and %1.0f%% data holdout' % (node_count, hold_back * 100))

    points, expec_output = read_data(filename)

    net = NeuralNet(2, node_count, 1)

    # Initialize
    valid_size = int(len(points) * hold_back)  # Size of list of examples that will be validation data
    test_errors = []
    validation_errors = []
    best_epoch = 1
    best_error = 1
    epoch = 0
    best_net = net

    # Run epochs and k fold validation
    while True:
        epoch += 1

        print("Epoch %d, %d epochs since best" % (epoch, epoch - best_epoch))

        test_error = 0
        test_tests = 0
        validation_error = 0
        validation_tests = 0

        # k fold validation for each set of training and validation data within example set
        for k in range(int(len(points) / valid_size)):
            # Validation data is percent of total data based on length
            valid_data = points[k * valid_size:(k + 1) * valid_size]
            valid_out_data = expec_output[k * valid_size:(k + 1) * valid_size]

            # Training data is the set of examples that are not in validation data
            train_data = points[:k * valid_size] + points[(k + 1) * valid_size:]
            train_out_data = expec_output[:k * valid_size] + expec_output[(k + 1) * valid_size:]

            # Neural net learn
            net.learn(0.1, train_data, train_out_data)

            # Calculate errors in training data
            for i in range(len(train_data)):
                test_tests += 1
                if train_out_data[i][0] != round(net.classify(train_data[i])[0]):
                    test_error += 1

            # Calculate errors in validation data
            for i in range(len(valid_data)):
                validation_tests += 1
                if valid_out_data[i][0] != round(net.classify(valid_data[i])[0]):
                    validation_error += 1

        # Add current errors to list of all errors
        test_errors.append(test_error / test_tests)
        validation_errors.append(validation_error / validation_tests)

        # Keep track of best error, otherwise break if neural net has not improved in the last 200 epochs
        if validation_errors[-1] < best_error:
            best_epoch = epoch
            best_error = validation_errors[-1]
            best_net = net.copy()
            print("Best error: %1.2f%%" % (best_error * 100))
        elif epoch - best_epoch >= 200:
            print("Error did not improve in 200 epochs, stopping training")
            break

    # Error outputs
    print("Best error: %1.2f%% at epoch %d" % (best_error * 100, best_epoch))
    print("Best network: %s" % best_net.weights)

    # Plot of errors
    test_plot, = plt.plot(test_errors, label="Test Error")
    validation_plot, = plt.plot(validation_errors, label="Validation Error")
    plt.legend(handles=[test_plot, validation_plot])
    plt.show()