Ejemplo n.º 1
0
    def __train_merge(self):
        '''
		Merge training datasets into a single dataset. Sample mini-batches from the merged dataset for training
		'''

        ## Merge training datasets
        X_train, time_train, event_train = utils.combine_datasets(
            self.datasets_train)

        ## To fetch mini-batches
        next_batch, num_batches = utils.batch_factory(X_train, time_train,
                                                      event_train,
                                                      self.batch_size)

        ## start training
        self.__sess = tf.Session()
        self.__sess.run(tf.global_variables_initializer())
        for epoch in range(self.epochs):
            for _ in range(num_batches):
                X_batch, time_batch, event_batch = next_batch()
                self.__sess.run(self.__train_op,
                                feed_dict={
                                    self.__X: X_batch,
                                    self.__time: time_batch,
                                    self.__event: event_batch,
                                    K.learning_phase(): 1
                                })
            if epoch % 100 == 0:
                print('-' * 20 + 'Epoch: {0}'.format(epoch) + '-' * 20)
                self.__print_loss_ci()
Ejemplo n.º 2
0
    def __train_merge(self):
        '''
		Merge training datasets into a single dataset. Sample mini-batches from the merged dataset for training
		'''
        #weights_path = 'checkpoints/resnet101_weights_tf.h5'
        weights_path = 'checkpoints/resnet50_weights_tf_dim_ordering_tf_kernels.h5'

        ## Merge training datasets
        X_train, time_train, event_train = utils.combine_datasets(
            self.datasets_train)
        X_train_1, time_train_1, event_train_1 = utils.combine_datasets(
            self.datasets_train_1)

        ## get training datasets ___ by heng
        #X_train, time_train, event_train = utils.get_datasets(self.datasets_train)

        ## To fetch mini-batches
        #next_batch, num_batches = utils.batch_factory(X_train, time_train, event_train, self.batch_size)

        ## start training
        self.__sess = tf.Session()
        #self.__sess_1 = tf.Session()
        K.set_session(self.__sess)
        #K.set_session(self.__sess_1)
        self.__sess.run(tf.global_variables_initializer())
        #self.__sess_1.run(tf.global_variables_initializer())
        #self.model.load_weights(weights_path, by_name=True)

        print(f'pre epoch train log:')
        #self.__print_loss_ci_yh_dsnet(self.datasets_train, self.datasets_train_1)

        for epoch in range(self.epochs):

            #next_batch, num_batches = utils.batch_factory(X_train, time_train, event_train, self.batch_size)
            #for _ in range(num_batches):
            #	X_batch, time_batch, event_batch = next_batch()

            X_merge = np.concatenate([X_train, X_train_1], axis=-1)

            time_event = np.hstack((time_train[..., None], event_train[...,
                                                                       None]))
            batches = 0
            for X_batch_merge, Y_batch in datagen.flow(
                    X_merge, time_event, batch_size=self.batch_size):
                batches += 1
                if batches >= X_train.shape[0] // self.batch_size:
                    break
                time_batch = Y_batch[:, 0]
                event_batch = Y_batch[:, 1]
                X_batch = X_batch_merge[..., :3]
                X_batch1 = X_batch_merge[..., 3:]

                self.__sess.run(self.__train_op,
                                feed_dict={
                                    self.__X: X_batch,
                                    self.__X1: X_batch1,
                                    self.__time: time_batch,
                                    self.__event: event_batch,
                                    K.learning_phase(): 1
                                })
                #print (self.__sess.run([self.__train_op, self.__loss], feed_dict={self.__X: X_batch, self.__time: time_batch, self.__event: event_batch, K.learning_phase(): 1}))
            #print (self.__sess.run([self.__score, self.__loss, self.__ci], feed_dict={self.__X: X_batch, self.__time: time_batch, self.__event: event_batch, K.learning_phase(): 0}))

            print(f'epoch {epoch} train log:')
            self.__print_loss_ci_yh_dsnet(self.datasets_train,
                                          self.datasets_train_1)
            if epoch % 2 == 0:
                print('-' * 20 + 'Epoch: {0}'.format(epoch) + '-' * 20)
                print(f'epoch {epoch} val log:')
                self.__print_loss_ci_yh_dsnet(self.datasets_val,
                                              self.datasets_val_1)
                print(f'epoch {epoch} test log:')
                self.__print_loss_ci_yh_dsnet(self.datasets_test,
                                              self.datasets_test_1)
Ejemplo n.º 3
0
    def __train_merge(self):
        '''
		Merge training datasets into a single dataset. Sample mini-batches from the merged dataset for training
		'''
        #weights_path = 'checkpoints/resnet101_weights_tf.h5'
        weights_path = 'checkpoints/resnet50_weights_tf_dim_ordering_tf_kernels.h5'

        ## Merge training datasets
        X_train, time_train, event_train = utils.combine_datasets(
            self.datasets_train)

        ## get training datasets ___ by heng
        #X_train, time_train, event_train = utils.get_datasets(self.datasets_train)

        ## To fetch mini-batches
        #next_batch, num_batches = utils.batch_factory(X_train, time_train, event_train, self.batch_size)

        ## start training
        self.__sess = tf.Session()
        K.set_session(self.__sess)
        self.__sess.run(tf.global_variables_initializer())
        #self.model.load_weights(weights_path, by_name=True)

        file_train = open(f"train_log.txt", "w")
        file_val = open(f"val_log.txt", "w")
        file_test = open(f"test_log.txt", "w")

        print(f'pre epoch train log:')
        ci_train, loss_train = self.__print_loss_ci_yh(self.datasets_train)
        ci_val, loss_val = self.__print_loss_ci_yh(self.datasets_val)
        ci_test, loss_test = self.__print_loss_ci_yh(self.datasets_test)

        file_train.write(str(ci_train) + " " + str(loss_train) + "\n")
        file_val.write(str(ci_val) + " " + str(loss_val) + "\n")
        file_test.write(str(ci_test) + " " + str(loss_test) + "\n")

        ci = 0

        for epoch in range(self.epochs):

            #next_batch, num_batches = utils.batch_factory(X_train, time_train, event_train, self.batch_size)
            #for _ in range(num_batches):
            #	X_batch, time_batch, event_batch = next_batch()

            time_event = np.hstack((time_train[..., None], event_train[...,
                                                                       None]))
            batches = 0
            for X_batch, Y_batch in datagen.flow(X_train,
                                                 time_event,
                                                 batch_size=self.batch_size):
                batches += 1
                if batches >= X_train.shape[0] // self.batch_size:
                    break
                time_batch = Y_batch[:, 0]
                event_batch = Y_batch[:, 1]

                self.__sess.run(self.__train_op,
                                feed_dict={
                                    self.__X: X_batch,
                                    self.__time: time_batch,
                                    self.__event: event_batch,
                                    K.learning_phase(): 1
                                })
                #print (self.__sess.run([self.__train_op, self.__loss], feed_dict={self.__X: X_batch, self.__time: time_batch, self.__event: event_batch, K.learning_phase(): 1}))
            #print (self.__sess.run([self.__score, self.__loss, self.__ci], feed_dict={self.__X: X_batch, self.__time: time_batch, self.__event: event_batch, K.learning_phase(): 0}))

            print('-' * 20 + 'Epoch: {0}'.format(epoch) + '-' * 20)
            print(f'epoch {epoch} train log:')
            ci_train, loss_train = self.__print_loss_ci_yh(self.datasets_train)
            file_train.write(str(ci_train) + " " + str(loss_train) + "\n")

            print(f'epoch {epoch} val log:')
            ci_val, loss_val = self.__print_loss_ci_yh(self.datasets_val)
            file_val.write(str(ci_val) + " " + str(loss_val) + "\n")
            #if val_ci > ci:
            #	ci = val_ci
            #	self.model.save_weights(f'my_model_weights_{epoch}.h5')
            #	print ("model saved!")
            print(f'epoch {epoch} test log:')
            ci_test, loss_test = self.__print_loss_ci_yh(self.datasets_test)
            file_test.write(str(ci_test) + " " + str(loss_test) + "\n")
            #if ci_test > ci:
            if ci_test > 0.62:
                ci = ci_test
                #self.model.save_weights(f'my_fusion_model_weights_{epoch}.h5')
                print("model saved!")

            if epoch > 300:
                file_train.close()
                file_val.close()
                file_test.close()
                raise
Ejemplo n.º 4
0
    def __train_merge(self):

        ## Merge training datasets
        X_train, time_value_train, event_train = utils.combine_datasets(
            self.datasets_train)

        ## start training
        self.__sess = tf.Session()
        K.set_session(self.__sess)
        self.__sess.run(tf.global_variables_initializer())
        #self.model.load_weights(weights_path, by_name=True)

        file_train = open(f"train_log.txt", "w")
        file_val = open(f"val_log.txt", "w")
        file_test = open(f"test_log.txt", "w")

        print(f'pre epoch train log:')
        ci_train, loss_train = self.__print_loss_ci(self.datasets_train)
        ci_val, loss_val = self.__print_loss_ci(self.datasets_val)
        ci_test, loss_test = self.__print_loss_ci(self.datasets_test)

        file_train.write(str(ci_train) + " " + str(loss_train) + "\n")
        file_val.write(str(ci_val) + " " + str(loss_val) + "\n")
        file_test.write(str(ci_test) + " " + str(loss_test) + "\n")

        ci = 0

        for epoch in range(self.epochs):

            #next_batch, num_batches = utils.batch_factory(X_train, time_value_train, event_train, self.batch_size)
            #for _ in range(num_batches):
            #	X_batch, time_value_batch, event_batch = next_batch()

            time_value_event = np.hstack(
                (time_value_train[..., None], event_train[..., None]))
            batches = 0
            for X_batch, Y_batch in datagen.flow(X_train,
                                                 time_value_event,
                                                 batch_size=self.batch_size):
                batches += 1
                if batches >= X_train.shape[0] // self.batch_size:
                    break
                time_value_batch = Y_batch[:, 0]
                event_batch = Y_batch[:, 1]

                self.__sess.run(self.__train_op,
                                feed_dict={
                                    self.__X: X_batch,
                                    self.__time_value: time_value_batch,
                                    self.__event: event_batch,
                                    K.learning_phase(): 1
                                })

            print(f'epoch {epoch} train log:')
            ci_train, loss_train = self.__print_loss_ci(self.datasets_train)
            file_train.write(str(ci_train) + " " + str(loss_train) + "\n")

            print('-' * 20 + 'Epoch: {0}'.format(epoch) + '-' * 20)
            print(f'epoch {epoch} val log:')
            ci_val, loss_val = self.__print_loss_ci(self.datasets_val)
            file_val.write(str(ci_val) + " " + str(loss_val) + "\n")
            #if val_ci > ci:
            #	ci = val_ci
            #	self.model.save_weights(f'my_model_weights_{epoch}.h5')
            #	print ("model saved!")
            print(f'epoch {epoch} test log:')
            ci_test, loss_test = self.__print_loss_ci(self.datasets_test)
            file_test.write(str(ci_test) + " " + str(loss_test) + "\n")
            if ci_test > ci:
                ci = ci_test
                #self.model.save_weights(f'my_model_weights_{epoch}.h5')
                print("model saved!")

            if epoch > 500:
                file_train.close()
                file_val.close()
                file_test.close()
                raise
Ejemplo n.º 5
0
    parser = argparse.ArgumentParser(description="Combine multiple datasets to make one submission or a new feature")
    parser.add_argument("dataset_files", type=argparse.FileType('r'), nargs="+")
    parser.add_argument("--output_type", type=str, default="submission")
    parser.add_argument("outfile", type=argparse.FileType('w'))
#    parser.add_argument("")
    args = parser.parse_args()
    
    is_submission = False
    if args.output_type == "dataset":
        is_submission = False
    elif args.output_type == "submission":
        is_submission = True
    else:
        raise ValueError("Must specify output type to be submission or dataset.")
    
    trainf, trainl, testf, test_ids, feature_names = utils.combine_datasets(args.dataset_files)
    

    shift = 200
    y = np.log(trainl + shift)
    ids = test_ids
    
    RANDOM_STATE = 2016
    params = {
        'min_child_weight': 1,
        'eta': 0.01,
        'colsample_bytree': 0.5,
        'max_depth': 12,
        'subsample': 0.8,
        'alpha': 1,
        'gamma': 1,