Ejemplo n.º 1
0
def split_dataset(dataset, train_percent=None):
    ''' Splits the dataset into train and test '''

    if not train_percent or int(train_percent) > 100:
        print("Train percent Invalid, using default")
        train_percent = 80

    # Shuffle / Randamize the indecies
    data_indecies = [i for i in rage(dataset.num_records)]
    shuffled_indecies = np.shuffe(data_indecies)

    # How many traininig data we need? 
    num_train_records = int(train_percent) * dataset.num_records // 100

    # Init train and test 
    train_text, train_labels = [], []
    test_text, test_labels = [], []

    for index in shuffled_indecies:
        if index < num_train_records:
            train_labels.append(dataset.labels[index])
            train_text.append(dataset.text[index])
        else:
            test_labels.append(dataset.labels[index])
            test_text.append(dataset.text[index])
    
    train_dataset = DataSet(None, train_text, train_labels, dataset.isVectorized)
    test_dataset  = DataSet(None, test_text, test_labels, dataset.isVectorized)

    return train_dataset, test_dataset
Ejemplo n.º 2
0
def plot_stations(filename):
    """
    Read in a saved stations file and plot it.
    """

    dataset = DataSet()
    dataset.read(filename)
    dataset.plot()
Ejemplo n.º 3
0
    def __init__(self, target_file):
        '''
        Superseeds data_set.DataSet with methods for reading / writing from / to CSV files

        :param target_file: CSV file to read or write
        '''
        DataSet.__init__(self)
        self.target_file = target_file
Ejemplo n.º 4
0
    def input_fn(data_set: DataSet, size):
        input_dict = {}

        # labeled data
        data_set = data_set.get_batch(size)
        input_dict['inputs'] = tf.constant(np.array(data_set.inputs()))
        input_dict['sequence_length'] = tf.constant(data_set.lengths())
        input_dict['mask'] = tf.constant(data_set.masks())
        labels = tf.constant(data_set.labels())

        return input_dict, labels
Ejemplo n.º 5
0
def create_data_set(key_items, value_items, name=""):
    data_items = []
    for key_counter in range(0, len(key_items)):
        key = key_items[key_counter]
        if key_counter < len(value_items):
            value = value_items[key_counter]
        else:
            value = None
        data_items.append(DataItem(key, value))

    ds = DataSet(name)
    ds.add_data_items(data_items)
    return ds
Ejemplo n.º 6
0
 def get_dataset(self, is_train=True):
     ld = LoadedData()
     if is_train:
         ld.load_data()
         ld.label_normalize()
     else:
         # load source speaker data
         ld.load_data(filename=config.src_data_dir,
                      test_set_size=config.src_test_size,
                      vali_set_size=config.src_vali_size)
     # sort the data
     # ld.sort_data()
     ld.print_info()
     dataset = DataSet(ld)
     self.ds = dataset
     self.loaded_data = ld
     self.train_dataset_iter = dataset.train_iterator
     self.vali_dataset_iter = dataset.vali_iterator
     self.test_dataset_iter = dataset.test_iterator
     self.dataset_iter = tf.data.Iterator.from_string_handle(
         self.dataset_handle, dataset.train_set.output_types,
         dataset.train_set.output_shapes)
     with tf.name_scope('batch_data'):
         self.batch_features, \
         self.batch_labels, \
         self.batch_lengths, \
         self.batch_uttids = self.dataset_iter.get_next()
Ejemplo n.º 7
0
    def load(self, dataPath, numTrain, numValid, numTest):
        """Load the data."""
        print("Loading data from " + dataPath + "...")

        data = np.genfromtxt(dataPath, delimiter=",", dtype="uint8")

        # The last numTest instances ALWAYS comprise the test set.
        train, test = data[:numTrain + numValid], data[numTrain + numValid:]
        shuffle(train)

        train, valid = train[:numTrain], train[numTrain:]

        self.trainingSet = DataSet(train)
        self.validationSet = DataSet(valid)
        self.testSet = DataSet(test)

        print("Data loaded.")
def split_dataset(dataset, ratio=None):
    size = dataset.size
    if ratio is None:
        ratio = _choose_optimal_train_ratio(size)

    mask = np.zeros(size, dtype=np.bool_)
    train_size = int(size * ratio)
    mask[:train_size] = True
    np.random.shuffle(mask)

    train_x = dataset.x[mask, :]
    train_y = dataset.y[mask]

    mask = np.invert(mask)
    test_x = dataset.x[mask, :]
    test_y = dataset.y[mask]

    return DataSet(train_x, train_y), DataSet(test_x, test_y)
Ejemplo n.º 9
0
def main():
    from data_set import DataSet
    from collections import namedtuple
    hps = {
        'encode_step': 5,  # 历史数据个数
        'train_data_num': 100000,  # 训练集个数
        }
    
    hps = namedtuple("HParams", hps.keys())(**hps)
    
    data_set = DataSet(hps)
    obs = Observations(0, 0, 0, 0)
    print(obs.values(data_set.history_data, hps.encode_step).shape)
    return
Ejemplo n.º 10
0
    def input_fn(labeled: DataSet, unlabeled: DataSet, labeled_size,
                 unlabeled_size):
        input_dict = {}

        # labeled data
        labeled = labeled.get_batch(labeled_size)
        input_dict['labeled_inputs'] = tf.constant(np.array(labeled.inputs()))
        input_dict['labeled_sequence_length'] = tf.constant(labeled.lengths())
        input_dict['labeled_mask'] = tf.constant(labeled.masks())
        labels = tf.constant(labeled.labels())

        # unlabeled data
        unlabeled = unlabeled.get_batch(unlabeled_size)
        input_dict['unlabeled_inputs'] = tf.constant(
            np.array(unlabeled.inputs()))
        input_dict['unlabeled_sequence_length'] = tf.constant(
            unlabeled.lengths())
        input_dict['unlabeled_mask'] = tf.constant(unlabeled.masks())

        return input_dict, labels
Ejemplo n.º 11
0
    def predict_img(self, image_path):
        """
        :param image_path: 图片地址
        :return: json
        """
        # 修改图片
        ori_path = image_path
        image_path = resize(image_path)

        # 判断
        res = []
        predict = tf.reshape(self.output, [-1, CATEGORY_COUNT])
        pred = self.sess.run(predict,
                             feed_dict={
                                 INPUT: [DataSet.read_image(image_path)],
                                 DROPOUT_RATE: 0.
                             })

        with open(class_path, "r") as f:
            contents = f.readlines()
            for i, content in enumerate(contents):
                res.append({
                    "name": content.split()[0],
                    "prob": int(pred[0][i] * 10000)
                })

        res = sorted(res, key=lambda res: float(res['prob']), reverse=True)

        # 选取其他人画的
        nums = random.sample(range(0, 10), 2)
        otherpics = []
        for num in nums:
            otherpics.append(
                rf"..\static\dist\img\sp\{res[0]['name']}-{num}.png")
        file_name = image_path.split(r"\received")[1]
        ori_img = cv2.imread(ori_path, cv2.IMREAD_GRAYSCALE)
        ori_img = cv2.resize(ori_img, (200, 200))
        if not os.path.exists(rf"{BUTING_PATH}\code\static\dist\img\sh"):
            os.makedirs(rf"{BUTING_PATH}\code\static\dist\img\sh")
        plt.imsave(rf"{BUTING_PATH}\code\static\dist\img\sh" + file_name,
                   ori_img,
                   cmap='gray')
        oripics = [r"..\static\dist\img\sh" + file_name]
        return {
            "size": len(pred[0]),
            "res": res,
            "otherpic": otherpics,
            "oripic": oripics
        }
def to_dataset(df, k, target_column, with_bias):
    df = df[1:].reset_index(drop=True)
    df = df.drop(['date'], axis=1)
    target = df[target_column]

    n, cols = df.shape
    windows_num = n - k  # effective window size, including the label, is k + 1

    x = np.empty([windows_num, k * cols + int(with_bias)])
    y = np.empty([windows_num])

    for i in xrange(windows_num):
        window = df[i:i + k]
        row = window.as_matrix().reshape((-1, ))
        if with_bias:
            row = np.insert(row, 0, 1)
        x[i] = row
        y[i] = target[i + k]

    debug('data set: x=%s y=%s' % (x.shape, y.shape))
    return DataSet(x, y)
 def get_data_set(self, name, directory='/data_sets'):
     data_set = self.get_saved_data_set(name)[0]
     if not data_set:
         if not self.reset:
             print('Could not find dataset. Creating new one')
             self.captains_log.error(
                 'Could not find saved dataset. Creating new one')
         data = DataSet(name, self.save, directory)
         for message in data.set_up():
             self._log_message(message[0], message[1])
         if self.save:
             self.save_data_set(data)
         return data
     else:
         if self.reset:
             data = DataSet(name, self.save, directory)
             for message in data.set_up():
                 self._log_message(message[0], message[1])
             if self.save:
                 self.save_data_set(data)
             return data
         return data_set
Ejemplo n.º 14
0
def main():
    hps = get_hps()
    data_set = DataSet(hps)
    env = Env(hps, data_set)
    model = Model(hps, env.observations_dim, env.actions_dim)
    obs = env.reset()
    data_set.add_data(obs, 0, 0)

    data_size = hps.train_data_num
    for i in range(data_size):
        print('\r{}/{}'.format(i, data_size), end='')
        obs, reward, _ = env.step(obs, Actions([0.3, 0.3, 0.4]))
        data_set.add_data(obs, 0, 0)

    n = hps.train_iter
    for i in range(n):
        print('\n\n{}/{}'.format(i, n))
        model.price_train(1, data_set)
        if i % hps.eval_interval == 0:
            print('-'*50)
            model.price_test(1, data_set)
            print('-'*50)
        
    return
Ejemplo n.º 15
0
def aggregate_demo(args):
    """
    python3 aggregate_demo.py pong --range-start=0 --range-end=5
    """

    if args.demo_memory_folder is not None:
        demo_memory_folder = args.demo_memory_folder
    else:
        demo_memory_folder = "{}_demo_samples".format(args.env)

    game_state = game.GameState(game=args.env)
    D = DataSet(args.resized_height, args.resized_width, RandomState(),
                args.replay_memory, args.phi_len, game_state.n_actions)

    data_file = '{}-dqn.pkl'.format(args.env)
    img_file = '{}-dqn-images.h5'.format(args.env)
    for index in range(args.range_start, args.range_end):
        print("Demonstration sample #{num:03d}".format(num=index + 1))
        try:
            data = pickle.load(
                open(
                    demo_memory_folder + '/{0:03d}/'.format(index + 1) +
                    data_file, 'rb'))
        except:
            print("Check demo folder if it exist!")
            return
        actions = data['D.actions']
        rewards = data['D.rewards']
        terminal = data['D.terminal']

        imgs = get_compressed_images(demo_memory_folder +
                                     '/{0:03d}/'.format(index + 1) + img_file +
                                     '.gz')
        print("\tMemory size: {}".format(data['D.size']))
        for mem_index in range(data['D.size']):
            D.add_sample(imgs[mem_index], actions[mem_index],
                         rewards[mem_index], terminal[mem_index])
        # h5file.close()
        print("\tTotal Memory size: {}".format(D.size))

    D.resize()
    D.create_validation_set(percent=args.validation_set_percent)

    data = {
        'D.width': D.width,
        'D.height': D.height,
        'D.max_steps': D.max_steps,
        'D.phi_length': D.phi_length,
        'D.num_actions': D.num_actions,
        'D.actions': D.actions,
        'D.rewards': D.rewards,
        'D.terminal': D.terminal,
        'D.bottom': D.bottom,
        'D.top': D.top,
        'D.size': D.size,
        'D.validation_set_markers': D.validation_set_markers,
        'D.validation_indices': D.validation_indices,
        'epsilon': args.init_epsilon,
        't': 0
    }
    images = D.imgs

    pickle.dump(
        data, open(demo_memory_folder + '/' + args.env + '-dqn-all.pkl', 'wb'),
        pickle.HIGHEST_PROTOCOL)
    print("Saving and compressing replay memory...")
    save_compressed_images(
        demo_memory_folder + '/' + args.env + '-dqn-images-all.h5', images)
    print("Saved and compressed replay memory")
            minibatch_index,
            inputs,
            outputs
        )
        self.test_eval_function = self.compiled_test_function(
            self.classifier,
            minibatch_index, 
            inputs,
            outputs
        )


from data_set import DataSet

if __name__ == '__main__':
    dataset = DataSet()
    dataset.load()
    dbn = DeepBeliefNetworkTrainer(dataset)
    dbn.initialize()

    start_time = time.clock()
    layer_epoch_costs = dbn.pretrain()
    end_time = time.clock()
    print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    start_time = time.clock()
    epoch_losses, best_validation_loss, best_iter, test_score = dbn.train()
    end_time = time.clock()
    print >> sys.stderr, ('The fine tuning code for file ' +
Ejemplo n.º 17
0
common = {
    'dev': './data/atis.pkl.dev',
    'test': './data/atis.pkl.test',
    'slot': './data/atis.pkl.slot',
}

if __name__ == '__main__':

    config = config_plain
    # experiments = experiments[5:6]

    if not os.path.exists('./out'):
        os.mkdir('./out')

    # for vocab size
    DataSet('./data/atis.pkl.slot', './data/atis.pkl.train')
    DataSet('./data/atis.pos.slot', './data/atis.pos.train')

    slot = common['slot']
    validation_set = DataSet(slot, common['dev'])
    test_set = DataSet(slot, common['test'])

    print('# Experiments (%d)' % len(experiments))
    print('# validation_set (%d)' % validation_set.size())
    print('# test_set (%d)' % test_set.size())

    pos_model = None
    if 'pos_model' in config:
        pos_set = DataSet('./data/atis.pos.slot', './data/atis.pos.train')
        print('# Pre-training')
        print('# POS training set (%d)' % pos_set.size())
Ejemplo n.º 18
0
    # Set up axes
    ax.set_xticklabels([''] + input_sentence.split(' ') + ['<EOS>'],
                       rotation=90)
    ax.set_yticklabels([''] + output_words)

    # Show label at every tick
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    plt.show()


if __name__ == "__main__":

    # train()
    data_set = DataSet(opt.filename, opt.max_len, opt.min_count, device)

    model = build_model(data_set.english_vocab, data_set.french_vocab)
    # evaluate()

    # optim
    optimizer = build_optimizer(model)

    # loss function
    criterion = nn.NLLLoss(ignore_index=PAD_id, reduction='elementwise_mean')

    # Loading checkpoint
    checkpoint = None
    if opt.checkpoint:
        checkpoint = load_checkpoint(opt.checkpoint)
        model.load_state_dict(checkpoint['state_dict'])
Ejemplo n.º 19
0
    def run(cls, dev, test, labeled_slot, labeled_train, unlabeled_slot,
            unlabeled_train, steps, gpu_memory):
        training_set = DataSet(labeled_slot, labeled_train)
        validation_set = DataSet(labeled_slot, dev)
        test_set = DataSet(labeled_slot, test)
        unlabeled_set = DataSet(unlabeled_slot, unlabeled_train)

        print('# training_set (%d)' % training_set.size())
        print('# validation_set (%d)' % validation_set.size())
        print('# test_set (%d)' % test_set.size())
        print('# unlabeled_set (%d)' % unlabeled_set.size())

        classifier = tf.contrib.learn.Estimator(
            model_fn=SlotFilling.rnn_model_fn,
            params={
                'num_slot': training_set.num_classes(),
                'num_pos': unlabeled_set.num_classes(),
                'drop_out': DROP_OUT,
                'embedding_dimension': EMBEDDING_DIMENSION,
                'vocab_size': DataSet.vocab_size(),
                'unlabeled': unlabeled_set.size() > 0
            },
            config=tf.contrib.learn.RunConfig(
                gpu_memory_fraction=gpu_memory,
                save_checkpoints_secs=30,
            ),
            model_dir='./model')

        validation_metrics = {
            "accuracy":
            tf.contrib.learn.MetricSpec(
                metric_fn=tf.contrib.metrics.streaming_accuracy,
                prediction_key='predictions',
                weight_key='labeled_mask')
        }

        monitor = tf.contrib.learn.monitors.ValidationMonitor(
            input_fn=lambda: SlotFilling.input_fn(
                validation_set, unlabeled_set, validation_set.size(), 1),
            eval_steps=1,
            every_n_steps=50,
            metrics=validation_metrics,
            early_stopping_metric="loss",
            early_stopping_metric_minimize=True,
            early_stopping_rounds=300)

        classifier.fit(input_fn=lambda: SlotFilling.input_fn(
            training_set, unlabeled_set, training_set.size(), 500),
                       monitors=[monitor],
                       steps=steps)

        predictions = classifier.predict(input_fn=lambda: SlotFilling.input_fn(
            test_set, unlabeled_set, test_set.size(), 1))

        slot_correct = 0
        slot_no_match = 0
        slot_mismatch = 0
        slot_over_match = 0

        for i, p in enumerate(predictions):
            target = test_set.labels()[i][:test_set.lengths()[i]]
            prediction = list(p['predictions'])[:test_set.lengths()[i]]
            for expected, actual in zip(target, prediction):
                actual = int(actual)
                if expected is actual:
                    slot_correct += 1
                elif test_set.get_slot(actual) is 'o':
                    slot_no_match += 1
                elif test_set.get_slot(expected) is 'o':
                    slot_over_match += 1
                else:
                    slot_mismatch += 1

        return {
            'accuracy': slot_correct / sum(test_set.lengths()),
            'correct': slot_correct,
            'no_match': slot_no_match,
            'mismatch': slot_mismatch,
            'over_match': slot_over_match,
        }
            self.classifier, minibatch_index, inputs, outputs, learning_rate)


from data_set import DataSet

if __name__ == '__main__':
    argparser = argparse.ArgumentParser(
        description='Demonstrate Multilayer Perceptron')
    argparser.add_argument(
        '--training-epochs',
        dest='epochs',
        type=int,
        default='1000',
        help='number of epochs to run the training (default: 1000)')

    dataset = DataSet()
    dataset.load()
    trainer = MultilayerPerceptronTrainer(
        dataset, n_epochs=argparser.parse_args().epochs)
    trainer.initialize()
    state = trainer.start_training(patience=10000,
                                   patience_increase=2,
                                   improvement_threshold=0.995)
    start_time = time.clock()
    while (trainer.continue_training(state)):
        print('epoch %d, validation error %f%%' %
              (state.epoch, state.epoch_losses[-1][0] * 100.0))
    end_time = time.clock()
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    print(('Optimization complete. Best validation score of %f%% '
def main(_):
  # Import data
  ###mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)

  print("starting to load data...")
  x2 = pickle.load(open( "T1_GD_all_x_no_normalization.p", "rb" ))
  print("x2 loaded.")
  y2 = pickle.load(open( "T1_GD_all_y_no_normalization.p", "rb" ))
  print("y2 loaded.")
  validate_x2 = pickle.load(open( "T1_GD_validation_x_no_normalization_aggregated_.p", "rb" ))
  print("validate_x2 loaded.")
  validate_y2 = pickle.load(open( "T1_GD_validation_y_no_normalization_aggregated.p", "rb" ))
  print("validate_y2 loaded.")
  validate_x2_nonaggregated = pickle.load(open( "T1_GD_all__validation_x_no_normalization.p", "rb" ))
  print("validate_x2 loaded.")
  validate_y2_nonaggregated = pickle.load(open( "T1_GD_all__validation_y_no_normalization.p", "rb" ))
  print("validate_y2 loaded.")

  number_epochs = sys.argv[0]
  kernal_size = sys.argv[2]


  data_set_all = DataSet(x2,y2, fake_data=False)
  validation_set_all = DataSet(validate_x2_nonaggregated, validate_y2_nonaggregated, fake_data=False)


  # Create the convolutional model
  x = tf.placeholder(tf.float32, [None, 65536])

  # Define loss and optimizer
  y_ = tf.placeholder(tf.float32, [None, 3])

  # Build the graph for the deep net
  # with tf.device('/gpu:2'):
  y_conv, keep_prob, saver = deepnn(x)
  print(keep_prob)


  #plt.imshow(mnist.test.images[0].reshape(28,28))
  #print(type(mnist.test.images))
  #print(mnist.test.images.shape)
  #plt.show()
  cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
  train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
  correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
  accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))




  #grads = new_optimizer.compute_gradients(cross_entropy)
  data_points = []
  avg_loss = 0
  total_loss = 0
  avg_validation_loss = 0
  total_validation_loss = 0
  batch_size = sys.argv[1]
  batches_completed = 0
  validation_batches_completed = 0
  config = tf.ConfigProto(allow_soft_placement=True)
  config.gpu_options.allow_growth = True
  output_file= open("T1_GD_validation_loss_file_no_normalization_"+epoch_size+"_epochs_"+kernal_size+"_kernalsize_"+batch_size+"_batchsize.txt","w+")
  
  with tf.Session(config = config) as sess:
    
    sess.run(tf.global_variables_initializer())


    # sess.graph.finalize()

    for i in range(5000):
      batch_x, batch_y = data_set_all.next_batch(batch_size)
      for batch_slice in batch_x:
        batch_slice = numpy.reshape(batch_slice, (256, 256))
        batch_slice = random_alteration(batch_slice)
        batch_slice = numpy.reshape(batch_slice, 65536)


      batches_completed += 1
      loss = sess.run(cross_entropy, feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5})
      total_loss += loss
      new_avg_loss = total_loss/batches_completed

      if(new_avg_loss>avg_loss and batches_completed != 1):
        avg_loss = new_avg_loss
      # break

      avg_loss = new_avg_loss

      data_points.append(loss)

      if i % 1000 == 0:
        validation_batch_x, validation_batch_y = validation_set_all.next_batch(batch_size)
        validation_batches_completed+=1
        train_accuracy = accuracy.eval(feed_dict={x: validation_batch_x, y_: validation_batch_y, keep_prob: 1.0})
        validation_loss = cross_entropy.eval(feed_dict={x: validation_batch_x, y_: validation_batch_y, keep_prob: 1.0})
        total_validation_loss += validation_loss
        new_avg_validation_loss = total_validation_loss/validation_batches_completed

        if(new_avg_validation_loss>avg_validation_loss and batches_completed!=1):
          avg_validation_loss = new_avg_validation_loss


        avg_validation_loss = new_avg_validation_loss

        output_file.write("Validation loss at i = %d is %g\n" % (i, avg_validation_loss))


        total_times = 0.0
        total_accuracy = 0.0
        prediction=tf.argmax(y_conv,1)
        probabilities=tf.nn.softmax(y_conv)
        probs_array = []
        condensed_y = []

        for j in range(len(validate_x2)):
          #print(test_x2[i])
          #print(test_y2[i])
          temp3 = accuracy.eval(feed_dict={x: validate_x2[j], y_: validate_y2[j], keep_prob: 1.0})
          print('test accuracy %g' % temp3)
          total_accuracy = total_accuracy + temp3
          total_times = total_times+1
          temp4 = prediction.eval(feed_dict={x: validate_x2[j], keep_prob: 1.0}, session=sess)
          print("predictions", temp4)
          probability = probabilities.eval(feed_dict={x: validate_x2[j], keep_prob: 1.0}, session=sess)
          print(probability)
          if j==0:
            probs_array = probability.mean(axis=0)
            condensed_y = validate_y2[j].mean(axis=0)
            continue
          probs_array = numpy.vstack([probs_array, probability.mean(axis=0)])
          condensed_y = numpy.vstack([condensed_y, validate_y2[j].mean(axis=0)])


        fpr = dict()
        tpr = dict()
        roc_auc = dict()

        for j in range(3):
          fpr[j], tpr[j], _ = roc_curve(condensed_y[:, j], probs_array[:, j])
          roc_auc[j] = auc(fpr[j], tpr[j])

          # Compute micro-average ROC curve and ROC area
        fpr["micro"], tpr["micro"], _ = roc_curve(condensed_y.ravel(), probs_array.ravel())
        roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

        output_file.write("ROCs at i = %d are "%i)
        for j in range(3):
          plt.plot(fpr[j], tpr[j], label='ROC curve of class {0} (area = {1:0.2f})'''.format(j, roc_auc[j]))
          output_file.write(str(roc_auc[j])+", ")

        output_file.write("\n")
        output_file.flush()
        print('step %d, training accuracy %g' % (i, train_accuracy))
        name = 'T1_GD_testing_with_intermediateROC_no_normalization_epoch_' + str(i)
        save_path = saver.save(sess, name)

      train_step.run(feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5})


    #testing  
    print(avg_loss)
    output_file.close()
    save_path = saver.save(sess, 'T1_GD_testing_with_intermediateROC_no_normalization_final')
Ejemplo n.º 22
0
 def setUpClass(self):
     self.dataset = DataSet()
     self.dataset.load(100)
Ejemplo n.º 23
0
class TestTutorials(unittest.TestCase):
    """docstring for TestTutorials"""
    @classmethod
    def setUpClass(self):
        self.dataset = DataSet()
        self.dataset.load(100)

    def test_convolutional_multilayer_perceptron(self):
        lenet5 = ConvolutionalMultilayerPerceptronTrainer(self.dataset,
                                                          n_epochs=1,
                                                          batch_size=2)
        lenet5.initialize(nkerns=[2, 5])
        epoch_losses, best_validation_loss, best_iter, test_score = lenet5.train(
            patience=10000, patience_increase=2, improvement_threshold=0.995)
        self.assertEqual(epoch_losses, [[0.52000000000000002, 49]])
        self.assertEqual(test_score, 0.45000000000000001)

    def test_convolutional_multilayer_perceptron_incremental(self):
        lenet5 = ConvolutionalMultilayerPerceptronTrainer(self.dataset,
                                                          n_epochs=1,
                                                          batch_size=2)
        lenet5.initialize(nkerns=[2, 5])
        state = lenet5.start_training(patience=10000,
                                      patience_increase=2,
                                      improvement_threshold=0.995)
        while lenet5.continue_training(state):
            pass
        self.assertEqual(state.epoch_losses, [[0.52000000000000002, 49]])
        self.assertEqual(state.test_score, 0.45000000000000001)

    def test_deep_belief_network(self):
        dbn = DeepBeliefNetworkTrainer(self.dataset,
                                       batch_size=2,
                                       pretraining_epochs=1,
                                       training_epochs=1)
        dbn.initialize()

        layer_epoch_costs = dbn.pretrain()
        self.assertTrue(layer_epoch_costs[0][0] > -229.574659742916
                        and layer_epoch_costs[0][0] < -229.574659742915)
        self.assertTrue(layer_epoch_costs[1][0] > -724.564076667859
                        and layer_epoch_costs[1][0] < -724.564076667856)
        self.assertTrue(layer_epoch_costs[2][0] > -237.068920458976
                        and layer_epoch_costs[2][0] < -237.068920458975)

        epoch_losses, best_validation_loss, best_iter, test_score = dbn.train()
        self.assertEqual(best_validation_loss, 0.79)
        self.assertEqual(best_iter, 49)
        self.assertEqual(test_score, 0.76)

    def test_deep_belief_network_incremental(self):
        dbn = DeepBeliefNetworkTrainer(self.dataset,
                                       batch_size=2,
                                       pretraining_epochs=1,
                                       training_epochs=1)
        dbn.initialize()
        state = dbn.start_pretraining()
        while dbn.continue_pretraining(state):
            pass
        self.assertTrue(state.layer_epoch_costs[0] > -229.574659742916
                        and state.layer_epoch_costs[0] < -229.574659742915)
        self.assertTrue(state.layer_epoch_costs[1] > -724.564076667859
                        and state.layer_epoch_costs[1] < -724.564076667856)
        self.assertTrue(state.layer_epoch_costs[2] > -237.068920458976
                        and state.layer_epoch_costs[2] < -237.068920458975)

        state = dbn.start_training()
        while dbn.continue_training(state):
            pass
        self.assertEqual(state.best_validation_loss, 0.79)
        self.assertEqual(state.best_iter, 49)
        self.assertEqual(state.test_score, 0.76)

    def test_denoising_autoencoder(self):
        da = DenoisingAutoencoderTrainer(self.dataset,
                                         training_epochs=1,
                                         batch_size=2)
        da.initialize()
        uncorrupt_costs = da.train()
        self.assertEqual(uncorrupt_costs, [149.16503228187111])
        da.initialize(corruption_level=0.3)
        corrupt_costs = da.train()
        self.assertTrue(corrupt_costs[0] > 173.6649940882978
                        and corrupt_costs[0] < 173.6649940882979)

    def test_denoising_autoencoder_incremental(self):
        da = DenoisingAutoencoderTrainer(self.dataset,
                                         training_epochs=1,
                                         batch_size=2)
        da.initialize()
        state = da.start_training()
        while da.continue_training(state):
            pass
        self.assertEqual(state.costs, [149.16503228187111])
        da.initialize(corruption_level=0.3)
        state = da.start_training()
        while da.continue_training(state):
            pass
        self.assertTrue(state.costs[0] > 173.6649940882978
                        and state.costs[0] < 173.6649940882979)

    def test_logistic(self):
        lc = LogisticTrainer(self.dataset, batch_size=2, n_epochs=1)
        lc.initialize()
        epoch_losses, best_validation_loss, best_iter, test_score = lc.train(
            patience=5000, patience_increase=2, improvement_threshold=0.995)
        self.assertEqual(epoch_losses, [[0.40000000000000002, 49]])
        self.assertEqual(test_score, 0.30)

    def test_logistic_incremental(self):
        lc = LogisticTrainer(self.dataset, batch_size=2, n_epochs=1)
        lc.initialize()
        state = lc.start_training(patience=5000,
                                  patience_increase=2,
                                  improvement_threshold=0.995)
        while lc.continue_training(state):
            pass
        self.assertEqual(state.epoch_losses, [[0.40000000000000002, 49]])
        self.assertEqual(state.test_score, 0.30)

    def test_multilayer_perceptron(self):
        mp = MultilayerPerceptronTrainer(self.dataset,
                                         n_epochs=1,
                                         batch_size=2)
        mp.initialize()
        epoch_losses, best_validation_loss, best_iter, test_score = mp.train(
            patience=10000, patience_increase=2, improvement_threshold=0.995)
        self.assertEqual(epoch_losses, [[0.54, 49]])
        self.assertEqual(test_score, 0.52)

    def test_multilayer_perceptron_incremental(self):
        mp = MultilayerPerceptronTrainer(self.dataset,
                                         n_epochs=1,
                                         batch_size=2)
        mp.initialize()
        state = mp.start_training(patience=10000,
                                  patience_increase=2,
                                  improvement_threshold=0.995)
        while mp.continue_training(state):
            pass
        self.assertEqual(state.epoch_losses, [[0.54, 49]])
        self.assertEqual(state.test_score, 0.52)

    def test_restricted_boltzmann_machine(self):
        rbm = RestrictedBoltzmannMachineTrainer(self.dataset,
                                                training_epochs=1,
                                                batch_size=2)
        rbm.initialize(n_chains=2, n_samples=2, n_hidden=5)
        epoch_costs = rbm.train()
        self.assertEqual(epoch_costs, [-174.86070176730175])

    def test_restricted_boltzmann_machine_incremental(self):
        rbm = RestrictedBoltzmannMachineTrainer(self.dataset,
                                                training_epochs=1,
                                                batch_size=2)
        rbm.initialize(n_chains=2, n_samples=2, n_hidden=5)
        state = rbm.start_training()
        while rbm.continue_training(state):
            pass
        self.assertEqual(state.epoch_losses, [-174.86070176730175])

    def test_stacked_denoising_autoencoder(self):
        sda = StackedDenoisingAutoencoderTrainer(self.dataset,
                                                 pretraining_epochs=1,
                                                 n_epochs=1,
                                                 batch_size=2)
        sda.preinitialize()
        layer_epoch_costs = sda.pretrain()
        self.assertEqual(
            layer_epoch_costs,
            [[328.15852933515004], [771.56755018914123], [661.65193991637716]])
        sda.initialize()
        epoch_losses, best_validation_loss, best_iter, test_score = sda.train(
            None)
        self.assertEqual(epoch_losses, [[0.73, 49]])
        self.assertEqual(best_validation_loss, 0.73)
        self.assertEqual(best_iter, 49)
        self.assertEqual(test_score, 0.67)

    def test_stacked_denoising_autoencoder_incremental(self):
        sda = StackedDenoisingAutoencoderTrainer(self.dataset,
                                                 pretraining_epochs=1,
                                                 n_epochs=1,
                                                 batch_size=2)
        sda.preinitialize()
        state = sda.start_pretraining()
        while sda.continue_pretraining(state):
            pass
        self.assertEqual(
            state.layer_epoch_costs,
            [[328.15852933515004], [771.56755018914123], [661.65193991637716]])
        sda.initialize()
        state = sda.start_training()
        while sda.continue_training(state):
            pass
        self.assertEqual(state.epoch_losses, [[0.73, 49]])
        self.assertEqual(state.best_validation_loss, 0.73)
        self.assertEqual(state.best_iter, 49)
        self.assertEqual(state.test_score, 0.67)
Ejemplo n.º 24
0
    def input_fn(labeled: DataSet, unlabeled: DataSet = None, size: int = BATCH_SIZE):
        input_dict = {
        }

        if unlabeled is not None and unlabeled.size() == 0:
            unlabeled = None

        # labeled data
        labeled = labeled.get_batch(size)
        input_dict['labeled_inputs'] = tf.constant(np.array(labeled.inputs()))
        input_dict['labeled_sequence_length'] = tf.constant(labeled.lengths())
        input_dict['labeled_mask'] = tf.constant(labeled.masks())
        labels = tf.constant(labeled.labels())

        # unlabeled data
        unlabeled = unlabeled is None and labeled or unlabeled.get_batch(labeled.size())
        input_dict['unlabeled_inputs'] = tf.constant(np.array(unlabeled.inputs()))
        input_dict['unlabeled_sequence_length'] = tf.constant(unlabeled.lengths())
        input_dict['unlabeled_mask'] = tf.constant(unlabeled.masks())
        input_dict['unlabeled_size'] = tf.constant(unlabeled.size())
        input_dict['unlabeled_target'] = tf.constant(unlabeled.labels())

        return input_dict, labels
Ejemplo n.º 25
0
 def load_data(self, imu_file_name: str, att_file_name: str):
     '''read pixhawk log file'''
     self._data_set = DataSet(imu_file_name, att_file_name)
     self._data_set.load_imu_data()
Ejemplo n.º 26
0
class attitude():
    """docstring for attitude"""
    def __init__(self):
        self._strategy = "none"
        self._data_set = None
        self._att = []

    def load_data(self, imu_file_name: str, att_file_name: str):
        '''read pixhawk log file'''
        self._data_set = DataSet(imu_file_name, att_file_name)
        self._data_set.load_imu_data()
        # self._data_set.load_px4_att()
        # self._data_set.load_open_imu_data()

    def remove_allresults(self):
        self._att.clear()

    def calculate_att(self):
        '''implement in subclass'''
        self.remove_allresults()

    def add_pitch_roll_yaw(self, pitch: float, roll: float, yaw: float):
        ''' '''
        self._att.append([pitch, roll, yaw])

    def show_fig(self):
        '''show fig of calculated attitude and pixhawk attitude'''
        if not self._att:
            print('no result')
            return
        # ekf_times, ekf_pitchs, ekf_rolls, ekf_yaws = self._data_set.get_ekf_attitude()
        imu_times = self._data_set.get_imu_times()

        plt.figure(self._strategy)
        plt.subplot(311)
        pitch_deg = [c[0] * 57.2957795 for c in self._att]
        plt.plot(imu_times, pitch_deg, label="pitch")
        plt.ylabel('pitch(deg)')
        plt.title(self._strategy)
        plt.legend()
        plt.grid(True)
        plt.grid(linestyle='--')

        plt.subplot(312)
        roll_deg = [c[1] * 57.2957795 for c in self._att]
        plt.plot(imu_times, roll_deg, label="roll")
        plt.ylabel('roll(deg)')
        plt.xlabel('time(s)')
        plt.legend()
        plt.grid(True)
        plt.grid(linestyle='--')

        plt.subplot(313)
        yaw_deg = [c[2] * 57.2957795 for c in self._att]
        plt.plot(imu_times, yaw_deg, label="yaw")
        plt.ylabel('yaw(deg)')
        plt.xlabel('time(s)')
        plt.legend()
        plt.grid(True)
        plt.grid(linestyle='--')

        plt.show()

    def test(self):
        '''main test'''
        sensorfile = r'test\09_26_14_sensor_combined_0.csv'
        attfile = r'test\09_26_14_vehicle_attitude_0.csv'
        self.load_data(sensorfile, attfile)
        self.calculate_att()
        self.show_fig()
Ejemplo n.º 27
0
def main():
    sys.setrecursionlimit(2000)

    config = Configuration()

    with open(config.DATA_FOLDER + '/config.txt', 'r') as f:
        configFile = f.read().split(',')

    print('Parameters', configFile)
    config.EPSILON_START = float(configFile[0])
    config.LOAD_NET_NUMBER = int(float(configFile[1]))

    agentTF = AgentTF(config.STATE_SIZE, config.PHI_LENGTH, config.ACTION_SIZE,
                      config.HIDDEN_LAYERS, config.BATCH_SIZE, config.TAU,
                      config.GAMMA)

    if config.LOAD_NET_NUMBER > 0:
        dataSet = loadDataSet(config.DATA_FOLDER, config.LOAD_NET_NUMBER)
        agentTF.restore_model(config.DATA_FOLDER)
        countTotalSteps = config.LOAD_NET_NUMBER
    else:
        # Initialize DataSet
        dataSet = DataSet(config.STATE_SIZE, config.REPLAY_MEMORY_SIZE,
                          config.PHI_LENGTH, config.RNG)
        countTotalSteps = 0

        openLearningFile(config.DATA_FOLDER)

    eC = environmentControl(config.PATH_ROBOT, config.PATH_GOAL,
                            config.PATH_LAUNCHFILE)
    eC.spawn(config.ROBOT_NAME)
    eC.spawnGoal()
    eC.setRandomModelState(config.ROBOT_NAME)
    #eC.pause()

    dP = dataProcessor(eC, config.ROBOT_NAME, config.PHI_LENGTH,
                       config.STATE_SIZE, config.NUM_SENSOR_VAL,
                       config.SENSOR_RANGE_MAX, config.SENSOR_RANGE_MIN,
                       config.VEL, config.VEL_CURVE, config.UPDATE_TIME,
                       config.SPEED_UP)

    lastState = np.zeros((1, config.STATE_SIZE))
    lastReward = 0
    lastAction = 0

    countSteps = 0
    batchCount = 0
    lossAverages = np.empty([0])
    epochCount = 0

    epsilon = max(config.EPSILON_START, config.EPSILON_MIN)
    epsilonRate = config.EPSILON_DECAY

    quit = False

    try:
        for i in range(4):
            action = np.random.randint(config.ACTION_SIZE)
            dP.action(action)

            state, reward = dP.getStateReward()
            dataSet.addSample(lastState, action, reward, state, dP.isGoal)
            countTotalSteps += 1
            countSteps += 1
            lastState = state
        if config.EPSILON_START < -0:
            quit = True
        while not quit:
            if countTotalSteps % 1000 == 0:
                updateLearningFile(config.DATA_FOLDER, lossAverages,
                                   countTotalSteps)
                lossAverages = np.empty([0])
                print(countTotalSteps)

            phi = dataSet.phi(lastState)
            action = agentTF.getAction(phi, epsilon)
            #action=userAction()
            eC.unpause()
            dP.action(action)
            state, reward = dP.getStateReward()
            eC.pause()

            if dP.isGoal:
                print('The goal was reached in ', countSteps, ' steps')
                countSteps = 1
                eC.setRandomModelState(config.ROBOT_NAME)
                eC.setRandomModelState('goal')
                dP.isGoal = False

            if dP.flipped:
                eC.setRandomModelState(config.ROBOT_NAME)
                dP.flipped = False

            # After NUM_STEPS the chance is over
            if countSteps % config.NUM_STEPS == 0:
                countSteps = 1
                reward -= 1
                eC.setRandomModelState(config.ROBOT_NAME)
                eC.setRandomModelState('goal')
                print('Your chance is over! Try again ...')

            #print(reward)

            dataSet.addSample(lastState, action, reward, state, dP.isGoal)

            # Training
            if countTotalSteps > config.REPLAY_START_SIZE and countTotalSteps % 5 == 0:
                batchStates, batchActions, batchRewards, batchNextStates, batchTerminals= \
                          dataSet.randomBatch(config.BATCH_SIZE)
                loss = agentTF.train(batchStates, batchActions, batchRewards,
                                     batchNextStates, batchTerminals)
                #print('Loss', loss)
                # count How many trainings had been done
                batchCount += 1
                # add loss to lossAverages
                lossAverages = np.append(lossAverages, loss)

            #Update Epsilon save dataSet, network
            if countTotalSteps % config.SIZE_EPOCH == 0:
                # Number of Epochs
                epochCount += 1

                # Update Epsilon
                if (epsilon - epsilonRate) < config.EPSILON_MIN - 0.01:
                    quit = True
                epsilon = max(epsilon - epsilonRate, config.EPSILON_MIN)
                print('Epsilon updated to: ', epsilon)

                agentTF.save_model(countTotalSteps, config.DATA_FOLDER)
                saveDataSet(config.DATA_FOLDER, countTotalSteps, dataSet)
            lastState = state
            countTotalSteps += 1
            countSteps += 1

    except rospy.exceptions.ROSException:
        agentTF.save_model(countTotalSteps, config.DATA_FOLDER)
        saveDataSet(config.DATA_FOLDER, countTotalSteps, dataSet)
        agentTF.close()
        eC.close()

        with open(config.DATA_FOLDER + '/config.txt', 'w') as f:
            out = "{},{}".format(epsilon, countTotalSteps)
            f.write(out)
Ejemplo n.º 28
0
def main(_):
    # Import data
    ###mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)

    print("starting to load data...")
    x2 = pickle.load(open("all_x_l2normalization.p", "rb"))
    print("x2 loaded.")
    y2 = pickle.load(open("all_y_l2normalization.p", "rb"))
    print("y2 loaded.")
    validate_x2 = pickle.load(open("all__validation_x_l2normalization.p",
                                   "rb"))
    print("validate_x2 loaded.")
    validate_y2 = pickle.load(open("all__validation_y_l2normalization.p",
                                   "rb"))
    print("validate_y2 loaded.")

    data_set_all = DataSet(x2, y2, fake_data=False)
    validation_set_all = DataSet(validate_x2, validate_y2, fake_data=False)

    # Create the convolutional model
    x = tf.placeholder(tf.float32, [None, 65536])

    # Define loss and optimizer
    y_ = tf.placeholder(tf.float32, [None, 3])

    # Build the graph for the deep net
    y_conv, keep_prob, saver = deepnn(x)
    print(keep_prob)

    #plt.imshow(mnist.test.images[0].reshape(28,28))
    #print(type(mnist.test.images))
    #print(mnist.test.images.shape)
    #plt.show()
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    #grads = new_optimizer.compute_gradients(cross_entropy)
    data_points = []
    avg_loss = 0
    total_loss = 0
    avg_validation_loss = 0
    total_validation_loss = 0
    batch_size = 10
    batches_completed = 0
    validation_batches_completed = 0
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    output_file = open("validation_loss_file_l2normalization.txt", "w+")

    with tf.Session(config=config) as sess:

        sess.run(tf.global_variables_initializer())

        # sess.graph.finalize()

        for i in range(1000000):
            batch_x, batch_y = data_set_all.next_batch(batch_size)
            for batch_slice in batch_x:
                batch_slice = numpy.reshape(batch_slice, (256, 256))
                batch_slice = random_alteration(batch_slice)
                batch_slice = numpy.reshape(batch_slice, 65536)

            batches_completed += 1
            loss = sess.run(cross_entropy,
                            feed_dict={
                                x: batch_x,
                                y_: batch_y,
                                keep_prob: 0.5
                            })
            total_loss += loss
            new_avg_loss = total_loss / batches_completed

            if (new_avg_loss > avg_loss and batches_completed != 1):
                avg_loss = new_avg_loss
            # break

            avg_loss = new_avg_loss

            data_points.append(loss)

            if i % 10000 == 0:
                validation_batch_x, validation_batch_y = validation_set_all.next_batch(
                    batch_size)
                validation_batches_completed += 1
                train_accuracy = accuracy.eval(feed_dict={
                    x: validation_batch_x,
                    y_: validation_batch_y,
                    keep_prob: 1.0
                })
                validation_loss = cross_entropy.eval(feed_dict={
                    x: validation_batch_x,
                    y_: validation_batch_y,
                    keep_prob: 1.0
                })
                total_validation_loss += validation_loss
                new_avg_validation_loss = total_validation_loss / validation_batches_completed

                if (new_avg_validation_loss > avg_validation_loss
                        and batches_completed != 1):
                    avg_validation_loss = new_avg_validation_loss

                avg_validation_loss = new_avg_validation_loss

                output_file.write("Validation loss at i = %d is %g\n" %
                                  (i, avg_validation_loss))
                output_file.flush()
                print('step %d, training accuracy %g' % (i, train_accuracy))
                name = 'my-model_testing_l2normalization_epoch_' + str(i)
                save_path = saver.save(sess, name)
            train_step.run(feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5})

        #testing
        print(avg_loss)
        output_file.close()
        save_path = saver.save(sess, 'my-model_testing_l2normalization_final')
Ejemplo n.º 29
0
class TestTutorials(unittest.TestCase):
    """docstring for TestTutorials"""
    @classmethod
    def setUpClass(self):
        self.dataset = DataSet()
        self.dataset.load(100)

    def test_convolutional_multilayer_perceptron(self):
        lenet5 = ConvolutionalMultilayerPerceptronTrainer(self.dataset, n_epochs = 1, batch_size = 2)
        lenet5.initialize(nkerns = [2, 5])
        epoch_losses, best_validation_loss, best_iter, test_score = lenet5.train(patience = 10000, patience_increase = 2, improvement_threshold = 0.995)
        self.assertEqual(epoch_losses, [[0.52000000000000002, 49]])
        self.assertEqual(test_score, 0.45000000000000001)

    def test_convolutional_multilayer_perceptron_incremental(self):
        lenet5 = ConvolutionalMultilayerPerceptronTrainer(self.dataset, n_epochs = 1, batch_size = 2)
        lenet5.initialize(nkerns = [2, 5])
        state = lenet5.start_training(patience = 10000, patience_increase = 2, improvement_threshold = 0.995)
        while lenet5.continue_training(state):
            pass
        self.assertEqual(state.epoch_losses, [[0.52000000000000002, 49]])
        self.assertEqual(state.test_score, 0.45000000000000001)

    def test_deep_belief_network(self):
        dbn = DeepBeliefNetworkTrainer(self.dataset, batch_size = 2, pretraining_epochs = 1, training_epochs = 1)
        dbn.initialize()
        
        layer_epoch_costs = dbn.pretrain()
        self.assertTrue(layer_epoch_costs[0][0] > -229.574659742916 and layer_epoch_costs[0][0] < -229.574659742915)
        self.assertTrue(layer_epoch_costs[1][0] > -724.564076667859 and layer_epoch_costs[1][0] < -724.564076667856)
        self.assertTrue(layer_epoch_costs[2][0] > -237.068920458976 and layer_epoch_costs[2][0] < -237.068920458975)
        
        epoch_losses, best_validation_loss, best_iter, test_score = dbn.train()
        self.assertEqual(best_validation_loss, 0.79)
        self.assertEqual(best_iter, 49)
        self.assertEqual(test_score, 0.76)

    def test_deep_belief_network_incremental(self):
        dbn = DeepBeliefNetworkTrainer(
            self.dataset,
            batch_size=2,
            pretraining_epochs=1,
            training_epochs=1
        )
        dbn.initialize()
        state = dbn.start_pretraining()
        while dbn.continue_pretraining(state):
            pass
        self.assertTrue(
            state.layer_epoch_costs[0] > -229.574659742916
            and state.layer_epoch_costs[0] < -229.574659742915
        )
        self.assertTrue(
            state.layer_epoch_costs[1] > -724.564076667859
            and state.layer_epoch_costs[1] < -724.564076667856
        )
        self.assertTrue(
            state.layer_epoch_costs[2] > -237.068920458976
            and state.layer_epoch_costs[2] < -237.068920458975
        )

        state = dbn.start_training()
        while dbn.continue_training(state):
            pass
        self.assertEqual(state.best_validation_loss, 0.79)
        self.assertEqual(state.best_iter, 49)
        self.assertEqual(state.test_score, 0.76)

    def test_denoising_autoencoder(self):
        da = DenoisingAutoencoderTrainer(self.dataset, training_epochs=1, batch_size=2)
        da.initialize()
        uncorrupt_costs = da.train()
        self.assertEqual(uncorrupt_costs, [149.16503228187111])
        da.initialize(corruption_level = 0.3)
        corrupt_costs = da.train()
        self.assertTrue(
            corrupt_costs[0] > 173.6649940882978 
            and corrupt_costs[0] < 173.6649940882979
        )

    def test_denoising_autoencoder_incremental(self):
        da = DenoisingAutoencoderTrainer(self.dataset, training_epochs=1, batch_size=2)
        da.initialize()
        state = da.start_training()
        while da.continue_training(state):
            pass
        self.assertEqual(state.costs, [149.16503228187111])
        da.initialize(corruption_level = 0.3)
        state = da.start_training()
        while da.continue_training(state):
            pass
        self.assertTrue(
            state.costs[0] > 173.6649940882978
            and state.costs[0] < 173.6649940882979
        )
        
    def test_logistic(self):
        lc = LogisticTrainer(self.dataset, batch_size=2, n_epochs=1)
        lc.initialize()
        epoch_losses, best_validation_loss, best_iter, test_score = lc.train(patience = 5000, patience_increase = 2, improvement_threshold = 0.995)
        self.assertEqual(epoch_losses, [[0.40000000000000002, 49]])
        self.assertEqual(test_score, 0.30)
        
    def test_logistic_incremental(self):
        lc = LogisticTrainer(self.dataset, batch_size=2, n_epochs=1)
        lc.initialize()
        state = lc.start_training(patience=5000, patience_increase=2, improvement_threshold=0.995)
        while lc.continue_training(state):
            pass
        self.assertEqual(state.epoch_losses, [[0.40000000000000002, 49]])
        self.assertEqual(state.test_score, 0.30)

    def test_multilayer_perceptron(self):
        mp = MultilayerPerceptronTrainer(self.dataset, n_epochs = 1, batch_size = 2)
        mp.initialize()
        epoch_losses, best_validation_loss, best_iter, test_score = mp.train(patience = 10000, patience_increase = 2, improvement_threshold = 0.995)
        self.assertEqual(epoch_losses, [[0.54, 49]])
        self.assertEqual(test_score, 0.52)

    def test_multilayer_perceptron_incremental(self):
        mp = MultilayerPerceptronTrainer(self.dataset, n_epochs = 1, batch_size = 2)
        mp.initialize()
        state = mp.start_training(patience = 10000, patience_increase = 2, improvement_threshold = 0.995)
        while mp.continue_training(state):
            pass
        self.assertEqual(state.epoch_losses, [[0.54, 49]])
        self.assertEqual(state.test_score, 0.52)
        
    def test_restricted_boltzmann_machine(self):
        rbm = RestrictedBoltzmannMachineTrainer(self.dataset, training_epochs = 1, batch_size = 2)
        rbm.initialize(n_chains = 2, n_samples = 2, n_hidden = 5)
        epoch_costs = rbm.train()
        self.assertEqual(epoch_costs, [-174.86070176730175])
        
    def test_restricted_boltzmann_machine_incremental(self):
        rbm = RestrictedBoltzmannMachineTrainer(self.dataset, training_epochs = 1, batch_size = 2)
        rbm.initialize(n_chains = 2, n_samples = 2, n_hidden = 5)
        state = rbm.start_training()
        while rbm.continue_training(state):
            pass
        self.assertEqual(state.epoch_losses, [-174.86070176730175])

    def test_stacked_denoising_autoencoder(self):
        sda = StackedDenoisingAutoencoderTrainer(
            self.dataset,
            pretraining_epochs=1,
            n_epochs=1,
            batch_size=2
        )
        sda.preinitialize()
        layer_epoch_costs = sda.pretrain()
        self.assertEqual(layer_epoch_costs, [[328.15852933515004], [771.56755018914123], [661.65193991637716]])
        sda.initialize()
        epoch_losses, best_validation_loss, best_iter, test_score = sda.train(None)
        self.assertEqual(epoch_losses, [[0.73, 49]])
        self.assertEqual(best_validation_loss, 0.73)
        self.assertEqual(best_iter, 49)
        self.assertEqual(test_score, 0.67)

    def test_stacked_denoising_autoencoder_incremental(self):
        sda = StackedDenoisingAutoencoderTrainer(self.dataset, pretraining_epochs = 1, n_epochs = 1, batch_size = 2)
        sda.preinitialize()
        state = sda.start_pretraining()
        while sda.continue_pretraining(state):
            pass
        self.assertEqual(state.layer_epoch_costs, [[328.15852933515004], [771.56755018914123], [661.65193991637716]])
        sda.initialize()
        state = sda.start_training()
        while sda.continue_training(state):
            pass
        self.assertEqual(state.epoch_losses, [[0.73, 49]])
        self.assertEqual(state.best_validation_loss, 0.73)
        self.assertEqual(state.best_iter, 49)
        self.assertEqual(state.test_score, 0.67)
Ejemplo n.º 30
0
 def to_dataset(self, data):
     ds = DataSet(data, self.header)
     ds = ds.split(self.friend_index)
     return ds.X, ds.Y
Ejemplo n.º 31
0
def main(path):
    settings = Settings(path)
    # mask seg set net los vis
    if 'mask' in settings.stages:
        # need v,h raw and frames
        square = 'square' in settings.flags
        max_seg = settings.max_seg
        print(square)
        print(max_seg)
        v, h = data_io.read_from_file(settings.files['raw'], 'raw')
        mask = segment.vert_horiz_seg(v[:, settings.frames, :],
                                      h[:, settings.frames, :],
                                      square=square,
                                      max_seg=max_seg)
        data_io.save_to(mask, settings.files['mask'], 'mask')

    if 'seg' in settings.stages:
        mask = data_io.read_from_file(settings.files['mask'], 'mask')
        v, h = data_io.read_from_file(settings.files['raw'], 'raw')
        seg_v = segment.divide_data_to_segments(mask, v[:, settings.frames, :])
        seg_h = segment.divide_data_to_segments(mask, h[:, settings.frames, :])
        data_io.save_to([seg_v, seg_h], settings.files['seg'], 'seg')

    if 'set' in settings.stages:
        [seg_v, seg_h] = data_io.read_from_file(settings.files['seg'], 'seg')
        cv = 'cv' in settings.flags
        normalize = 'norm' in settings.flags
        sizes = settings.sizes
        data_sets = create_data_set.get_data(seg_v,
                                             seg_h,
                                             n_new_train=sizes['train'],
                                             normalize=normalize)
        data_io.save_to(data_sets, settings.files['set'], 'set')

    if 'net' in settings.stages:
        cv = 'cv' in settings.flags
        zero_all = 'zero_all' in settings.flags
        value_type = 'acc' if 'acc' in settings.flags else 'loss'
        data_sets = data_io.read_from_file(settings.files['set'], 'set')
        mask = data_io.read_from_file(settings.files['mask'], 'mask')

        tx, ty, vx, vy = data_sets
        D_in = vx.shape[1]

        ty = ty.astype(np.float64)
        vy = vy.astype(np.float64)

        n_data_sets = len(tx)
        n_frames = len(settings.frames)
        mask_nubmers = np.unique(mask)
        n_seg = len(mask_nubmers) - 1 if mask_nubmers[0] == 0 else len(
            mask_nubmers)

        frames_loss_maps = np.zeros([n_data_sets, n_frames])
        seg_loss_maps = np.zeros([n_data_sets, n_seg])
        all_train_losses = []
        all_test_losses = []
        all_acc = []
        for idx, (one_tx, one_ty, one_vx,
                  one_vy) in enumerate(zip(tx, ty, vx, vy)):
            one_train = DataSet(torch.from_numpy(one_tx),
                                torch.from_numpy(one_ty))
            one_test = DataSet(torch.from_numpy(one_vx.reshape([1, -1])),
                               torch.from_numpy(one_vy.reshape([
                                   1,
                               ])))
            mean_t, std_t = one_train.calc_mean_std()
            one_train = one_train.normalize(mean_t, std_t)
            one_test = one_test.normalize(mean_t, std_t)
            print(idx)
            net = dense_net.get_model(D_in)
            training_parameters = run_nn.get_train_params(net)

            net, train_losses, valid_losses, valid_accuracies = run_nn.train(
                net, [one_train, one_test], training_parameters)
            all_acc.append(valid_accuracies[-1])
            if valid_losses[-1] > 0.6:
                print('\n{}\n'.format(idx))
            all_train_losses.append(train_losses)
            all_test_losses.append(valid_losses)
            frames_loss_maps[idx, :] = np.asarray(
                run_nn.run_with_missing_parts(net,
                                              mask,
                                              one_test,
                                              False,
                                              len(settings.frames),
                                              part_type='frames',
                                              zero_all=zero_all,
                                              value_type=value_type))
            seg_loss_maps[idx, :] = run_nn.run_with_missing_parts(
                net,
                mask,
                one_test,
                False,
                len(settings.frames),
                part_type='segments',
                zero_all=zero_all,
                value_type=value_type)

        print('acc: {}'.format(np.mean(np.asarray(all_acc))))
        frame_loss = np.mean(frames_loss_maps, axis=0)
        seg_loss = segment.recreate_image(mask, np.mean(seg_loss_maps, axis=0))
        data_io.save_to(frame_loss, settings.files['vis_frame'], 'vis')
        data_io.save_to(seg_loss, settings.files['vis_seg'], 'vis')
        visualize_res.plot_losses(all_train_losses, all_test_losses, [],
                                  n_data_sets)

    if 'show_vis' in settings.stages:
        zero_all = 'zero_all' in settings.flags
        value_type = 'acc' if 'acc' in settings.flags else 'loss'
        zero_all_str = 'Present' if zero_all else 'Missing'
        value_type_str = 'Accuracy' if value_type == 'acc' else 'Loss'
        title_seg = 'Average {} per {} Segment'.format(value_type_str,
                                                       zero_all_str)
        title_frame = 'Average {} per {} Frame'.format(value_type_str,
                                                       zero_all_str)

        # images = data_io.read_from_file(settings.files['vis_both'], 'vis')
        # visualize_res.plot_spatial(images, settings.frame_groups_string, n_frames=len(images))

        loss_map = data_io.read_from_file(settings.files['vis_frame'], 'vis')
        visualize_res.plot_temporal(
            loss_map, [x + 1 for x in settings.frames],
            title=title_frame,
            ylabel=value_type
        )  # counting starts from 0, so the relevant frames are +1

        image = data_io.read_from_file(settings.files['vis_seg'], 'vis')
        visualize_res.plot_spatial(image, title=title_seg)
Ejemplo n.º 32
0
SIC_PATH = 'sic_day_GFDL-CM3_historical*'
SIT_PATH = 'sit_day_GFDL-CM3_historical*'
TAS_PATH = 'tas_3hr_GFDL-CM3_historical_r1i1p1_*.nc'
CLT_PATH = 'tcdc.eatm.gauss.19[89]*.nc'

BEGIN_DATE = datetime(1979, 1, 1, 0, 0, 0)
NUM_YEARS = 20

DELTA_T = 150

if __name__ == '__main__':
    print('Creating DataSet')
    data_set = DataSet(
        sic_path=SIC_PATH,
        sit_path=SIT_PATH,
        tas_path=TAS_PATH,
        clt_path=CLT_PATH,
        sic_scale=.01,
        clt_scale=.01,
    )
    print('Getting Albedos')
    albedos = Albedos()
    year = dateutil.relativedelta.relativedelta(years=1)
    rad_start_dates = [BEGIN_DATE + year * n for n in range(NUM_YEARS)]
    forcings = []
    for rad_start_date in rad_start_dates:
        forcing = get_radiative_forcing(
            start_date=rad_start_date,
            delta_t=DELTA_T,
            data_set=data_set,
            albedos=albedos,
        )
        probability = float(E**epsilon) / float(1 + (E**epsilon))

        for i in range(run_times):
            result.append(self.do_randomize(dataset, probability))

        return result

    def get_qD(self, D):
        count = 0
        for i in D.records:
            if i[4] > 13 and i[14] == '<=50K':
                count += 1
        return float(count) / float(len(D.records))

    def compute_accuacy(self, D, N=100, e=0.5, beta=0.05):
        p = (1 + math.exp(e)) / (math.exp(e) - 1)
        q = 1 / (1 + math.exp(e))
        alpha = p * math.sqrt(math.log(2 / beta) / (2 * N))

        qD = self.get_qD(D)
        data_list = self.do_randomized_mechenism(D, N, e)
        errors = [(p * (d - q)) - qD for d in data_list]

        return errors, alpha


D0 = DataSet()
D0.create_from_csv('./adult.csv')
rr = RandomizedResponse()
errors, alpha = rr.compute_accuacy(D0)
Ejemplo n.º 34
0
                              self.input_x: xs,
                              self.label_y: labels,
                              self.diag_x: diags,
                              self.keep_prob: 0.4
                          })

    def predict(self, dataset):
        return self.sess.run(self.predict_y_label,
                             feed_dict={
                                 self.input_x: dataset.xs,
                                 self.diag_x: dataset.diags,
                                 self.label_y: dataset.labels,
                                 self.keep_prob: 0.4
                             })


if __name__ == "__main__":
    for i in range(10):
        print("start with dataset: ", i)
        net = NaiveNet()
        test_set = DataSet(i, prefix="test")
        net.train(1000, DataSet(i), test_set)
        accuracies.append(net.best_accu)
        precisions.append(net.best_precision)
        recalls.append(net.best_recall)
        F1s.append(net.best_f1)
    print("Final Average Accuracy: ", np.average(accuracies))
    print("Final Average Precisions: ", np.average(precisions))
    print("Final Average Recalls: ", np.average(recalls))
    print("Final Average F1s: ", np.average(F1s))
Ejemplo n.º 35
0
 def setUpClass(self):
     self.dataset = DataSet()
     self.dataset.load(100)
Ejemplo n.º 36
0
    def estimate_initial_conditions(self, max_comp=128, max_iter=5000):
        # now run bem on the combined data set to get initial conditions
        max_log_like = None  # the highest value for all runs
        converged = False
        component_count = max_comp
        iteration_count = max_iter

        results = []  # will be a list of dicts to convert to a DataFrame

        cpu_count = multiprocessing.cpu_count()
        bem_pool = multiprocessing.Pool(processes=cpu_count)

        data = np.vstack(
            [np.vstack(ds.blobs.values()) for ds in self.data_sets]
        )

        while not converged:
            print component_count

            new_comp_counts = []

            # set of dictionaries for this comp run, one for each seed
            input_dicts = [
                {
                    'data': data,
                    'component_count': component_count,
                    'iteration_count': iteration_count,
                    'random_seed': seed
                } for seed in range(1, 17)
            ]

            tmp_results_list = bem_pool.map(bem_cluster, input_dicts)

            for r in tmp_results_list:
                if r['log_like'] > max_log_like:
                    max_log_like = r['log_like']

            for r in tmp_results_list:
                # if the new log_like is close to the max (within 1%),
                # see if there are any empty components (pi < 0.0001)

                if abs(max_log_like - r['log_like']) < abs(max_log_like * 0.01):

                    new_comp_counts.append(r['true_comp'])

                    # save good run to our results
                    results.append(r)

            if len(new_comp_counts) > 0:
                if int(np.mean(new_comp_counts)) < component_count:
                    component_count = int(np.min(new_comp_counts))
                else:
                    converged = True
            else:
                converged = True

        results_df = pd.DataFrame(
            results,
            columns=['comp', 'true_comp', 'seed', 'log_like']
        )

        min_comp = results_df.comp.min()
        best_index = results_df[results_df.comp == min_comp].log_like.argmax()

        best_run = results[best_index]

        # create a data set that's the combination of all data sets
        prelim_ds = DataSet(parameter_count=self._parameter_count)

        for i, ds in enumerate(self.data_sets):
            # start blob labels at 1 (i + 1)
            prelim_ds.add_blob(i + 1, np.vstack(ds.blobs.values()))

        prelim_ds.cluster(
            component_count=best_run['comp'],
            burn_in=0,
            iteration_count=iteration_count,
            random_seed=best_run['seed'],
            model='bem'
        )

        log_like = prelim_ds.get_log_likelihood_trace()[0]
        print log_like

        # get classifications to calculate weights for each data set
        pis = []
        for label in sorted(prelim_ds.labels):
            label_classes = prelim_ds.get_classifications(0, [label])

            ds_pis = []

            for c in range(best_run['comp']):
                ds_pis.append(np.sum(label_classes == c) / float(len(label_classes)))

            pis.append(ds_pis)  # list of lists

        # convert LoL pis to numpy array
        pis = np.array(pis)

        prelim_ds.plot_classifications(0)

        # Re-run a chain using the initial conditions from the last iteration
        last_iter = prelim_ds.raw_results.get_iteration(0)

        initial_conditions = {
            'pis': pis,
            'mus': last_iter.mus,
            'sigmas': last_iter.sigmas
        }

        return best_run['comp'], initial_conditions