コード例 #1
0
    def run(self, conf_data):
        """
        executed on cluster run
        :param conf_data:
        :return:
        """
        try:
            # get related nodes
            net_node = self.get_prev_node(grp='netconf')
            data_node = self.get_prev_node(grp='preprocess')
            self._init_node_parm(conf_data['node_id'])

            # set result info cls
            result = TrainSummaryInfo(type=self.eval_result_type)
            result.set_nn_wf_ver_id(conf_data['wf_ver'])
            result.set_nn_id(conf_data['nn_id'])

            # run eval for each network
            result = net_node[0].eval(conf_data['node_id'], conf_data, data=data_node[0], result=result)

            # set parms for db store
            input_data = TrainSummaryInfo.save_result_info(self, result)
            input_data['accuracy'] = result.get_accuracy()
            return input_data
        except Exception as e:
            logging.error(e)
            raise Exception(e)
コード例 #2
0
    def eval(self, node_id, conf_data, data=None, result=None):
        logging.info("run NeuralNetNodeCnn eval")
        if data == None:
            self.eval_flag = "T"
        else:
            self.eval_flag = "E"

        # eval
        config = {
            "type": self.netconf["config"]["eval_type"],
            "labels": self.netconf["labels"],
            "nn_id": self.nn_id,
            "nn_wf_ver_id": self.wf_ver,
            "nn_batch_ver_id": self.batch
        }
        self.eval_data = TrainSummaryInfo(conf=config)

        # get data & dataconf
        test_data, dataconf = self.get_input_data(self.feed_node,
                                                  self.cls_pool,
                                                  self.eval_feed_name)

        self.eval_run(test_data)

        return self.eval_data
コード例 #3
0
    def eval(self, node_id, conf_data, data=None, result=None):
        println("run NeuralNetNodeCnn eval")
        if data == None:
            self.eval_flag = "T"
        else:
            self.eval_flag = "E"

        # eval
        self.batch = self.get_eval_batch(node_id)
        config = {
            "type": self.netconf["config"]["eval_type"],
            "labels": self.netconf["labels"],
            "nn_id": self.nn_id,
            "nn_wf_ver_id": self.wf_ver,
            "nn_batch_ver_id": self.batch
        }
        self.eval_data = TrainSummaryInfo(conf=config)

        # get data & dataconf
        test_data, dataconf = self.get_input_data(self.feed_node,
                                                  self.cls_pool,
                                                  self.eval_feed_name)

        with tf.Session() as sess:
            sess = self.get_saver_model(sess)
            sess.run(tf.global_variables_initializer())

            self.eval_run(sess, test_data)

        return self.eval_data
コード例 #4
0
    def eval(self, node_id, conf_data, data=None, result=None):
        println("run NeuralNetNodeCnn eval")
        self._init_train_parm(conf_data)
        if data == None:
            self.eval_flag = "T"
        else:
            self.eval_flag = "E"

        #eval
        self.batch = self.get_eval_batch(node_id)
        config = {
            "type": self.netconf["config"]["eval_type"],
            "labels": self.netconf["labels"],
            "nn_id": self.nn_id,
            "nn_wf_ver_id": self.wf_ver,
            "nn_batch_ver_id": self.batch
        }
        self.eval_data = TrainSummaryInfo(conf=config)

        # config = {"type": self.netconf["config"]["eval_type"], "labels": self.netconf["labels"]}
        # self.eval_data = TrainSummaryInfo(conf=config)
        # self.eval_data.set_nn_id(self.nn_id)
        # self.eval_data.set_nn_wf_ver_id(self.wf_ver)

        # get data & dataconf
        test_data, dataconf = self.get_input_data(self.feed_node,
                                                  self.cls_pool,
                                                  self.eval_feed_name)

        with tf.Session() as sess:
            if self.net_type == "resnet":
                self.get_model_resnet(sess)
                sess.run(tf.global_variables_initializer())
            else:
                sess, saver = self.get_model_cnn(sess, "T")
                sess.run(tf.global_variables_initializer())

            self.eval_run(sess, test_data)

        # keras.backend.clear_session()
        if self.eval_flag == "E":
            keras.backend.clear_session()

        return self.eval_data
コード例 #5
0
ファイル: eval_node_extra.py プロジェクト: mchoimis/tensormsa
    def run(self, conf_data):
        """
        executed on cluster run
        :param conf_data:
        :return:
        """
        try:
            # get related nodes
            net_node = self.get_prev_node(grp='netconf')
            data_node = self.get_prev_node(grp='preprocess')
            self._init_node_parm(conf_data['node_id'])

            # set result info cls
            result = TrainSummaryInfo(type=self.eval_result_type)
            result.set_nn_wf_ver_id(conf_data['wf_ver'])
            result.set_nn_id(conf_data['nn_id'])

            # run eval for each network
            result = net_node[0].eval(conf_data['node_id'],
                                      conf_data,
                                      data=data_node[0],
                                      result=result)

            if result is None or result == '':
                return {}
            # set parms for db store
            input_data = TrainSummaryInfo.save_result_info(self, result)
            input_data['accuracy'] = result.get_accuracy()

            condition_data = {}
            condition_data['nn_wf_ver_id'] = conf_data['wf_ver']
            condition_data[
                'condition'] = "3"  # 1 Pending, 2 Progress, 3 Finish, 4 Error
            # Net Version create
            NNCommonManager().update_nn_wf_info(conf_data['nn_id'],
                                                condition_data)

            return input_data
        except Exception as e:
            condition_data = {}
            condition_data['nn_wf_ver_id'] = conf_data['wf_ver']
            condition_data[
                'condition'] = "4"  # 1 Pending, 2 Progress, 3 Finish, 4 Error
            # Net Version create
            NNCommonManager().update_nn_wf_info(conf_data['nn_id'],
                                                condition_data)
            logging.error(e)
            raise Exception(e)
コード例 #6
0
    def run(self, conf_data):
        """
        executed on cluster run
        :param conf_data:
        :return:
        """
        try:
            # get related nodes
            net_node = self.get_prev_node(grp='netconf')
            data_node = self.get_prev_node(grp='preprocess')
            self._init_node_parm(conf_data['node_id'])

            # set result info cls
            result = TrainSummaryInfo(type=self.eval_result_type)
            result.set_nn_wf_ver_id(conf_data['wf_ver'])
            result.set_nn_id(conf_data['nn_id'])

            # run eval for each network
            result = net_node[0].eval(conf_data['node_id'], conf_data, data=data_node[0], result=result)

            if result is None:
                return {}
            # set parms for db store
            input_data = TrainSummaryInfo.save_result_info(self, result)
            input_data['accuracy'] = result.get_accuracy()

            condition_data = {}
            condition_data['nn_wf_ver_id'] = conf_data['wf_ver']
            condition_data['condition'] = "3"  # 1 Pending, 2 Progress, 3 Finish, 4 Error
            # Net Version create
            NNCommonManager().update_nn_wf_info(conf_data['nn_id'], condition_data)

            return input_data
        except Exception as e:
            condition_data = {}
            condition_data['nn_wf_ver_id'] = conf_data['wf_ver']
            condition_data['condition'] = "4"  # 1 Pending, 2 Progress, 3 Finish, 4 Error
            # Net Version create
            NNCommonManager().update_nn_wf_info(conf_data['nn_id'], condition_data)
            logging.error(e)
            raise Exception(e)
コード例 #7
0
    def eval(self, node_id, conf_data, data=None, result=None):
        """

        :param node_id:
        :param parm:
        :return:
        """
        logging.info("eval_data")

        self._init_node_parm(node_id.split('_')[0] + "_" + node_id.split('_')[1]+ "_" + "netconf_node")
        self.cls_pool_all = conf_data['cls_pool']  # Data feeder

        config = {"type": self.model_type, "labels": self.label_values, "nn_id":conf_data.get('nn_id'), "nn_wf_ver_id":conf_data.get('wf_ver')}
        train = TrainSummaryInfo(conf=config)
        print(config)
        self.batch = self.get_eval_batch(node_id)
        #print(train)
        self.model_eval_path = ''.join([self.model_path + '/' + self.batch])


        for _k, _v in self.cls_pool_all.items():
            if 'test' in _k:
                self.cls_pool = _v

            if 'evaldata' in _k:
                self.multi_node_flag = _v.multi_node_flag

        #conf_data['cls_pool'].get('nn00001_1_pre_feed_fr2wdnn_test')
        print("model_path : " + str(self.model_path))
        print("hidden_layers : " + str(self.hidden_layers))
        print("activation_function : " + str(self.activation_function))
        print("batch_size : " + str(self.batch_size))
        print("epoch : " + str(self.epoch))
        print("model_type : " + str(self.model_type))

        # data_store_path = WorkFlowDataFrame(conf_data['nn_id']+"_"+conf_data['wf_ver']+"_"+ "data_node").step_store
        data_conf_info = self.data_conf

        # make wide & deep model
        wdnn = NeuralCommonWdnn()
        wdnn_model = wdnn.wdnn_build(self.model_type, conf_data['node_id'], self.hidden_layers,
                                     str(self.activation_function), data_conf_info, str(self.model_eval_path))

        # feed
        # TODO file이 여러개면 어떻하지?
        # get prev node for load data
        #data_node_name = self._get_backward_node_with_type(conf_data['node_id'], 'preprocess')
        #train_data_set = self.cls_pool[data_node_name[0]]  # get filename
        train_data_set = self.cls_pool  # get filename
        file_queue = str(train_data_set.input_paths[0])  # get file_name

        # file을 돌면서 최대 Row를 전부 들고 옴 tfrecord 총 record갯수 가져오는 방법필요

        _batch_size = self.batch_size
        _num_tfrecords_files = 0

        # multi Feeder modified
        multi_read_flag = self.multi_read_flag

        # Todo H5
        # train per files in folder h5용
        # if multi_file flag = no이면 기본이 h5임
        try:
            results = dict()
            ori_list = list()
            pre_list = list()

            while (train_data_set.has_next()):
                print("h5")
                # 파일이 하나 돌때마다
                # for 배치사이즈와 파일의 총갯수를 가져다가 돌린다. -> 마지막에 뭐가 있을지 구분한다.
                # 파일에 iter를 넣으면 배치만큼 가져오는 fn이 있음 그걸 __itemd에 넣고
                # Input 펑션에서 multi를 vk판단해서 col와 ca를 구분한다.(이걸 배치마다 할 필요가 있나?)
                # -> 그러면서 피팅
                #
                # # Iteration is to improve for Model Accuracy

                # Per Line in file
                # eval should be one line predict
                #self.batch_size = 2

                for i in range(0, train_data_set.data_size(), self.batch_size):

                    data_set = train_data_set[i:i + self.batch_size]
                    #if i == 0:
                    #eval_data_Set = data_set
                    # input_fn2(self, mode, data_file, df, nnid, dataconf):
                    predict_value = wdnn_model.predict(
                        input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                                                                  data_set, data_conf_info))

                    data_set_count = len(data_set.index)
                    predict_val_list = [_pv for _pv in predict_value]
                    predict_val_count = len(predict_val_list)

                    if (data_set_count != predict_val_count):
                        logging.error("wdnn eval error check : dataframe count({0}) predict count({1})".format(data_set_count, predict_val_count))
                        raise ValueError(
                            'eval data validation check error : dataframe and predict count is different(neuralnet_node_wdnn.eval)')

                    data_set['predict_label'] = predict_val_list #list(predict_value)
                    #_predict = list(predict_value)
                    predict_y = list(data_set['predict_label'])


                    ori_list.extend(data_set[self.label].values.tolist())
                    pre_list.extend(list(data_set['predict_label']))

                    # model fitting
                    print(len(ori_list))
                    print(len(pre_list))
                    #logging.error("wdnn eval ori list  : {0}".format(ori_list) )
                    logging.info("wdnn eval ori list  : {0}".format(len(ori_list)) )
                    #logging.info("wdnn eval ori list  : {0}".format('info'))
                    #logging.debug("wdnn eval ori list  : {0}".format('debug'))
                    #logging.critical("wdnn eval ori list  : {0}".format('critical'))
                    #print("model fitting h5 " + str(data_set))
                # #Select Next file
                train_data_set.next()

            #TODO : 앞으로 옮기자
            train.set_nn_batch_ver_id(self.batch)
            if self.model_type == "regression":
                results['ori'] = ori_list
                results['pre'] = pre_list
                train.set_result_info(ori_list, pre_list)

            if self.model_type == "category":
                # tfrecord는 여기서 Label을 변경한다. 나중에 꺼낼때 답이 없음 Tensor 객체로 추출되기 때문에 그러나 H5는 feeder에서 변환해주자
                le = LabelEncoder()
                le.fit(self.label_values)

                for _i, _ori in enumerate(ori_list):
                    #return_value = self.labels[np.argmax(model.predict(X_train))]
                    train.set_result_info(str(_ori), str(le.inverse_transform(pre_list[_i])))
            #return self.batch
        except Exception as e:
            print("eval error")
            print(e)
            raise Exception(e)

        logging.info("eval end")
        return train
コード例 #8
0
ファイル: ml_node.py プロジェクト: yyf013932/tensormsa
    def eval(self, node_id, conf_data, data=None, result=None):
        """
            Tensorflow Wide and Deep Network Eval Method
        :param node_id:
        :param parm:
        :return: None
        """
        logging.info("eval_starting ------> {0}".format(node_id))
        try:
            self._init_node_parm(conf_data.get('nn_id') + "_" + conf_data.get('wf_ver')+ "_" + "netconf_node")
            self.cls_pool_all = conf_data['cls_pool']  # Data feeder


            graph = NNCommonManager().get_nn_node_name(conf_data['nn_id'])
            for net in graph:
                if net['fields']['graph_node'] == 'netconf_node':
                    netconf_node = net['fields']['graph_node_name']
            self.model_path = utils.get_model_path(conf_data['nn_id'], conf_data['wf_ver'], netconf_node)

            config = {"type": self.model_type, "labels": self.label_values, "nn_id":conf_data.get('nn_id'), "nn_wf_ver_id":conf_data.get('wf_ver')}
            train = TrainSummaryInfo(conf=config)
            print(config)
            self.batch_eval = self.get_eval_batch(node_id)
            self.model_eval_path = ''.join([self.model_path + '/' + self.batch])

            for _k, _v in self.cls_pool_all.items():
                if 'test' in _k:
                    self.cls_pool = _v

                if 'evaldata' in _k:
                    self.multi_node_flag = _v.multi_node_flag

            logging.info("model_path : {0}".format(self.model_path))
            logging.info("ml_class : {0}".format(self.ml_class))
            logging.info("config : {0}".format(self.config))

            config_acc = {"nn_id": conf_data['node_id'], "nn_wf_ver_id": conf_data.get('wf_ver'),
                      "nn_batch_ver_id": self.batch}
            acc_result = TrainSummaryAccLossInfo(config_acc)

            data_conf_info = self.data_conf

            # make ML modelnot
            clf = joblib.load(self.model_path+'/model.pkl')

            # feed
            # TODO file이 여러개면 어떻하지?
            # get prev node for load data
            train_data_set = self.cls_pool  # get filename
            file_queue = str(train_data_set.input_paths[0])  # get file_name

            # file을 돌면서 최대 Row를 전부 들고 옴 tfrecord 총 record갯수 가져오는 방법필요

            _batch_size = self.batch_size
            _num_tfrecords_files = 0

            # multi Feeder modified
            multi_read_flag = self.multi_read_flag

            # Todo H5
            # train per files in folder h5용
            # if multi_file flag = no이면 기본이 h5임
            try:
                results = dict()
                ori_list = list()
                pre_list = list()

                while (train_data_set.has_next()):
                    logging.info("Wdnn eval process from h5")
                    # 파일이 하나 돌때마다
                    # for 배치사이즈와 파일의 총갯수를 가져다가 돌린다. -> 마지막에 뭐가 있을지 구분한다.
                    # 파일에 iter를 넣으면 배치만큼 가져오는 fn이 있음 그걸 __itemd에 넣고
                    # Input 펑션에서 multi를 vk판단해서 col와 ca를 구분한다.(이걸 배치마다 할 필요가 있나?)
                    # -> 그러면서 피팅
                    #
                    # # Iteration is to improve for Model Accuracy

                    # Per Line in file
                    # eval should be one line predict

                    for i in range(0, train_data_set.data_size(), self.batch_size):

                        data_set = train_data_set[i:i + self.batch_size]
                        keys = list(data_conf_info['cell_feature'].keys())
                        keys.remove(data_conf_info['label'])
                        keys = np.asarray(keys)
                        data = data_set[keys].values
                        label = data_set[data_conf_info['label']].values
                        acc = cross_val_score(clf, data, label, scoring='accuracy').mean()
                        loss = cross_val_score(clf, data, label, scoring='neg_log_loss').mean()
                        # acc = eval_result['accuracy']
                        # loss = eval_result['loss']
                        acc_result.loss_info["loss"].append(str(acc))
                        acc_result.acc_info["acc"].append(str(loss))
                        iris = load_iris()
                        predict_val_list = list()

                        for row in data :
                            row = [row]
                            predict_value = clf.predict(row)
                            predict_val_list.append(predict_value)

                        # predict_value = clf.predict(
                        #     input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                        #                                               data_set, data_conf_info))

                        data_set_count = len(data_set.index)
                        #predict_val_list = [_pv for _pv in predict_value]
                        predict_val_count = len(predict_val_list)

                        if (data_set_count != predict_val_count):
                            logging.error("ML eval error check : dataframe count({0}) predict count({1})".format(data_set_count, predict_val_count))
                            raise ValueError(
                                'eval data validation check error : dataframe and predict count is different(neuralnet_node_wdnn.eval)')

                        data_set['predict_label'] = predict_val_list
                        predict_y = list(data_set['predict_label'])


                        ori_list.extend(data_set[self.label].values.tolist())
                        pre_list.extend(list(data_set['predict_label']))

                        # model fitting
                        logging.info("ML eval ori list  : {0}".format(len(ori_list)) )
                        logging.info("ML eval pre list  : {0}".format(len(pre_list)) )

                    train_data_set.next()

                #TODO : 앞으로 옮기자
                train.set_nn_batch_ver_id(self.batch_eval)
                if self.model_type == "regression":
                    results['ori'] = ori_list
                    results['pre'] = pre_list
                    train.set_result_info(ori_list, pre_list)

                if (self.model_type == "category" or self.model_type == "deep"):
                    # tfrecord는 여기서 Label을 변경한다. 나중에 꺼낼때 답이 없음 Tensor 객체로 추출되기 때문에 그러나 H5는 feeder에서 변환해주자
                    le = LabelEncoder()
                    le.fit(self.label_values)

                    for _i, _ori in enumerate(ori_list):
                        #return_value = self.labels[np.argmax(model.predict(X_train))]
                        #train.set_result_info(str(_ori), str(le.inverse_transform(pre_list[_i])))
                        train.set_result_info(str(_ori), str(pre_list[_i][0]))
                #return self.batch
            except Exception as e:
                print("eval error")
                print(e)
                raise Exception(e)

            logging.info("eval end")
        except Exception as oe:
            logging.info(oe)
            raise Exception(e)
        return train
コード例 #9
0
class NeuralNetNodeReNet(NeuralNetNode):
    """
    """
    def _init_train_parm(self, conf_data):
        # get initial value
        self.conf_data = conf_data
        self.cls_pool = conf_data["cls_pool"]
        self.nn_id = conf_data["nn_id"]
        self.wf_ver = conf_data["wf_ver"]
        self.node_id = conf_data["node_id"]
        self.node = WorkFlowSimpleManager().get_train_node()

        # get feed name
        self.train_feed_name = self.nn_id + "_" + self.wf_ver + "_" + WorkFlowSimpleManager(
        ).get_train_feed_node()
        self.eval_feed_name = self.nn_id + "_" + self.wf_ver + "_" + WorkFlowSimpleManager(
        ).get_eval_feed_node()
        self.feed_node = self.get_prev_node()

    def _init_value(self):
        self.g_train_cnt = 0
        self.file_end = '.bin'
        self.train_return_data = {}
        self.train_return_arr = [
            "Trainning .................................................."
        ]

    ####################################################################################################################
    def _set_netconf_parm(self):
        netconf = WorkFlowNetConfCNN().get_view_obj(self.node_id)
        try:
            netconf = WorkFlowNetConfCNN().set_num_classes_predcnt(
                self.nn_id, self.wf_ver, self.node, self.node_id, netconf)
        except:
            None
        self.netconf = netconf

        try:
            self.train_cnt = self.netconf["param"]["traincnt"]
            self.epoch = self.netconf["param"]["epoch"]
            self.batch_size = self.netconf["param"]["batch_size"]
            self.model_path = self.netconf["modelpath"]
            self.modelname = self.netconf["modelname"]
        except Exception as e:
            logging.info("NetConf is not exist.")
            logging.info(e)

    def _set_dataconf_parm(self, dataconf):
        self.dataconf = dataconf

    ####################################################################################################################
    def set_saver_model(self):
        self.save_path = self.model_path + "/" + str(self.batch) + str(
            self.file_end)
        keras.models.save_model(self.model, self.save_path)

        loss = round(self.loss * 100, 2)
        accR = round(self.acc * 100, 2)
        val_loss = round(self.val_loss * 100, 2)
        val_acc = round(self.val_acc * 100, 2)
        msg = "Global Step: " + str(self.g_train_cnt)
        msg += ", Training Loss: " + str(
            loss) + "%" + ", Training Accuracy: " + str(accR) + "%"
        msg += ", Test Loss: " + str(
            val_loss) + "%" + ", Test Accuracy: " + str(val_acc) + "%"
        logging.info(msg)

        config = {
            "nn_id": self.nn_id,
            "nn_wf_ver_id": self.wf_ver,
            "nn_batch_ver_id": self.batch
        }
        result = TrainSummaryAccLossInfo(config)
        result.loss_info["loss"] = str(val_loss)
        result.acc_info["acc"] = str(val_acc)
        self.save_accloss_info(result)

        result = [msg]

        # self.model_file_delete(self.model_path, self.modelname)

        self.train_return_arr.append(result)

        self.eval(self.node_id, self.conf_data, None, None)

    def get_model_resnet(self):
        try:
            keras.backend.tensorflow_backend.clear_session()
            self.lr_reducer = ReduceLROnPlateau(monitor='val_loss',
                                                factor=np.sqrt(0.1),
                                                cooldown=0,
                                                patience=5,
                                                min_lr=0.5e-6)
            self.early_stopper = EarlyStopping(monitor='val_acc',
                                               min_delta=0.001,
                                               patience=10)
            self.csv_logger = CSVLogger('resnet.csv')
            num_classes = self.netconf["config"]["num_classes"]
            numoutputs = self.netconf["config"]["layeroutputs"]
            x_size = self.dataconf["preprocess"]["x_size"]
            y_size = self.dataconf["preprocess"]["y_size"]
            channel = self.dataconf["preprocess"]["channel"]
            optimizer = self.netconf["config"]["optimizer"]

            filelist = os.listdir(self.model_path)
            filelist.sort(reverse=True)
            last_chk_path = self.model_path + "/" + self.load_batch + self.file_end

            try:
                self.model = keras.models.load_model(last_chk_path)
                logging.info("Train Restored checkpoint from:" + last_chk_path)
            except Exception as e:
                if numoutputs == 18:
                    self.model = resnet.ResnetBuilder.build_resnet_18(
                        (channel, x_size, y_size), num_classes)
                elif numoutputs == 34:
                    self.model = resnet.ResnetBuilder.build_resnet_34(
                        (channel, x_size, y_size), num_classes)
                elif numoutputs == 50:
                    self.model = resnet.ResnetBuilder.build_resnet_50(
                        (channel, x_size, y_size), num_classes)
                elif numoutputs == 101:
                    self.model = resnet.ResnetBuilder.build_resnet_101(
                        (channel, x_size, y_size), num_classes)
                elif numoutputs == 152:
                    self.model = resnet.ResnetBuilder.build_resnet_152(
                        (channel, x_size, y_size), num_classes)
                elif numoutputs == 200:
                    self.model = resnet.ResnetBuilder.build_resnet_200(
                        (channel, x_size, y_size), num_classes)
                logging.info(
                    "None to restore checkpoint. Initializing variables instead."
                    + last_chk_path)
                logging.info(e)

            self.model.compile(loss='categorical_crossentropy',
                               optimizer=optimizer,
                               metrics=['accuracy'])
        except Exception as e:
            logging.error(
                "===Error on Residualnet build model : {0}".format(e))

    ####################################################################################################################
    def train_run_resnet(self, input_data, test_data):
        data_augmentation = self.netconf["param"]["augmentation"]
        try:
            if data_augmentation == "N" or data_augmentation == "n":
                logging.info('Not using data augmentation.')
            else:
                logging.info('Using real-time data augmentation.')

            while (input_data.has_next()):
                data_set = input_data[0:input_data.data_size()]
                x_batch, y_batch, n_batch = self.get_batch_img_data(
                    data_set, "T")

                test_set = test_data[0:test_data.data_size()]
                x_tbatch, y_tbatch, n_tbatch = self.get_batch_img_data(
                    test_set, "T")

                for i in range(self.train_cnt):
                    if data_augmentation == "N" or data_augmentation == "n":
                        history = self.model.fit(x_batch,
                                                 y_batch,
                                                 batch_size=self.batch_size,
                                                 epochs=self.epoch,
                                                 validation_data=(x_tbatch,
                                                                  y_tbatch),
                                                 shuffle=True,
                                                 callbacks=[
                                                     self.lr_reducer,
                                                     self.early_stopper,
                                                     self.csv_logger
                                                 ])
                    else:
                        # This will do preprocessing and realtime data augmentation:
                        datagen = ImageDataGenerator(
                            featurewise_center=
                            False,  # set input mean to 0 over the dataset
                            samplewise_center=False,  # set each sample mean to 0
                            featurewise_std_normalization=
                            False,  # divide inputs by std of the dataset
                            samplewise_std_normalization=
                            False,  # divide each input by its std
                            zca_whitening=False,  # apply ZCA whitening
                            rotation_range=
                            0,  # randomly rotate images in the range (degrees, 0 to 180)
                            width_shift_range=0.1,
                            # randomly shift images horizontally (fraction of total width)
                            height_shift_range=0.1,
                            # randomly shift images vertically (fraction of total height)
                            horizontal_flip=True,  # randomly flip images
                            vertical_flip=False)  # randomly flip images

                        # Compute quantities required for featurewise normalization
                        # (std, mean, and principal components if ZCA whitening is applied).
                        datagen.fit(x_batch)

                        # Fit the model on the batches generated by datagen.flow().
                        history = self.model.fit_generator(
                            datagen.flow(x_batch,
                                         y_batch,
                                         batch_size=self.batch_size),
                            steps_per_epoch=x_batch.shape[0] //
                            self.batch_size,
                            validation_data=(x_tbatch, y_tbatch),
                            epochs=self.epoch,
                            verbose=1,
                            max_q_size=100,
                            callbacks=[
                                self.lr_reducer, self.early_stopper,
                                self.csv_logger
                            ])

                    self.loss = history.history["loss"][0]
                    self.acc = history.history["acc"][0]
                    self.val_loss = history.history["val_loss"][0]
                    self.val_acc = history.history["val_acc"][0]

                    self.g_train_cnt += 1
                    logging.info("Save Train Count=" + str(self.g_train_cnt))
                    self.set_saver_model()

                input_data.next()
        except Exception as e:
            logging.info(
                "Error[400] ..............................................")
            logging.info(e)

    def run(self, conf_data):
        try:
            logging.info("run NeuralNetNodeResnet Train")
            # init data setup
            self._init_train_parm(conf_data)
            self._init_value()

            # get data & dataconf
            test_data, dataconf = self.get_input_data(self.feed_node,
                                                      self.cls_pool,
                                                      self.eval_feed_name)
            input_data, dataconf = self.get_input_data(self.feed_node,
                                                       self.cls_pool,
                                                       self.train_feed_name)

            # set netconf, dataconf
            self._set_netconf_parm()
            self._set_dataconf_parm(dataconf)

            # set batch
            self.load_batch = self.get_eval_batch(self.node_id)
            if self.epoch != 0 and self.train_cnt != 0:
                self.train_batch, self.batch = self.make_batch(self.node_id)
            else:
                self.batch = self.load_batch

            self.get_model_resnet()

            self.train_run_resnet(input_data, test_data)

            self.train_return_data["TrainResult"] = self.train_return_arr

            if self.epoch == 0 or self.train_cnt == 0:
                self.eval(self.node_id, self.conf_data, None, None)

            return self.train_return_data
        except Exception as e:
            logging.info("===Error on running residualnet : {0}".format(e))

    ####################################################################################################################
    def eval_run(self, input_data):
        self.batch_size = self.netconf["param"]["batch_size"]
        labels = self.netconf["labels"]
        pred_cnt = self.netconf["param"]["predictcnt"]
        try:
            predlog = self.netconf["param"]["predictlog"]
        except:
            predlog = "N"
        # logging.info(labels)
        t_cnt_arr = []
        f_cnt_arr = []
        for i in range(len(labels)):
            t_cnt_arr.append(0)
            f_cnt_arr.append(0)

        input_data.pointer = 0
        # eval
        config = {
            "type": self.netconf["config"]["eval_type"],
            "labels": self.netconf["labels"],
            "nn_id": self.nn_id,
            "nn_wf_ver_id": self.wf_ver,
            "nn_batch_ver_id": self.batch
        }

        self.eval_data = TrainSummaryInfo(conf=config)

        while (input_data.has_next()):
            data_set = input_data[0:input_data.data_size()]
            x_batch, y_batch, n_batch = self.get_batch_img_data(data_set, "E")

            try:
                logits = self.model.predict(x_batch)

                for i in range(len(logits)):
                    true_name = y_batch[i]
                    file_name = n_batch[i]

                    logit = []
                    logit.append(logits[i])
                    #
                    idx = labels.index(true_name)
                    retrun_data = self.set_predict_return_cnn_img(
                        labels, logit, pred_cnt)
                    pred_name = retrun_data["key"][0]

                    if self.eval_flag == "E":
                        if true_name == pred_name:
                            t_cnt_arr[idx] = t_cnt_arr[idx] + 1
                            strLog = "[True] : "
                            if (predlog == "TT"):
                                logging.info(strLog + true_name +
                                             " FileName=" + file_name)
                                logging.info(retrun_data["key"])
                                logging.info(retrun_data["val"])
                        else:
                            f_cnt_arr[idx] = f_cnt_arr[idx] + 1
                            strLog = "[False] : "
                            if (predlog == "FF"):
                                logging.info(strLog + true_name +
                                             " FileName=" + file_name)
                                logging.info(retrun_data["key"])
                                logging.info(retrun_data["val"])
                        if (predlog == "AA"):
                            logging.info(strLog + true_name + " FileName=" +
                                         file_name)
                            logging.info(retrun_data["key"])
                            logging.info(retrun_data["val"])
                    else:
                        try:
                            listTF = retrun_data["key"].index(true_name)
                            t_cnt_arr[idx] = t_cnt_arr[idx] + 1
                            strLog = "[True] : "
                            if (predlog == "T"):
                                logging.info(strLog + true_name +
                                             " FileName=" + file_name)
                                logging.info(retrun_data["key"])
                                logging.info(retrun_data["val"])
                        except:
                            f_cnt_arr[idx] = f_cnt_arr[idx] + 1
                            strLog = "[False] : "
                            if (predlog == "F"):
                                logging.info(strLog + true_name +
                                             " FileName=" + file_name)
                                logging.info(retrun_data["key"])
                                logging.info(retrun_data["val"])
                        if (predlog == "A"):
                            logging.info(strLog + true_name + " FileName=" +
                                         file_name)
                            logging.info(retrun_data["key"])
                            logging.info(retrun_data["val"])

                    self.eval_data.set_result_info(true_name, pred_name)

            except Exception as e:
                logging.info(e)
                logging.info(
                    "None to restore checkpoint. Initializing variables instead."
                )

            input_data.next()

        # set parms for db store
        input_data = TrainSummaryInfo.save_result_info(self, self.eval_data)

        self.eval_print(labels, t_cnt_arr, f_cnt_arr)

    def eval_print(self, labels, t_cnt_arr, f_cnt_arr):
        logging.info(
            "####################################################################################################"
        )
        result = []
        strResult = "['Eval ......................................................']"
        result.append(strResult)
        totCnt = 0
        tCnt = 0
        fCnt = 0
        for i in range(len(labels)):
            strResult = "Category : " + self.spaceprint(labels[i], 15) + " "
            strResult += "TotalCnt=" + self.spaceprint(
                str(t_cnt_arr[i] + f_cnt_arr[i]), 8) + " "
            strResult += "TrueCnt=" + self.spaceprint(str(t_cnt_arr[i]),
                                                      8) + " "
            strResult += "FalseCnt=" + self.spaceprint(str(f_cnt_arr[i]),
                                                       8) + " "
            if t_cnt_arr[i] + f_cnt_arr[i] != 0:
                strResult += "True Percent(TrueCnt/TotalCnt*100)=" + str(
                    round(t_cnt_arr[i] /
                          (t_cnt_arr[i] + f_cnt_arr[i]) * 100)) + "%"
            totCnt += t_cnt_arr[i] + f_cnt_arr[i]
            tCnt += t_cnt_arr[i]
            fCnt += f_cnt_arr[i]
            logging.info(strResult)
            result.append(strResult)
        strResult = "---------------------------------------------------------------------------------------------------"
        logging.info(strResult)
        strResult = "Total Category=" + self.spaceprint(str(len(labels)),
                                                        11) + " "
        strResult += "TotalCnt=" + self.spaceprint(str(totCnt), 8) + " "
        strResult += "TrueCnt=" + self.spaceprint(str(tCnt), 8) + " "
        strResult += "FalseCnt=" + self.spaceprint(str(fCnt), 8) + " "
        if totCnt != 0:
            strResult += "True Percent(TrueCnt/TotalCnt*100)=" + str(
                round(tCnt / totCnt * 100)) + "%"
        logging.info(strResult)
        result.append(strResult)
        logging.info(
            "###################################################################################################"
        )

    def eval(self, node_id, conf_data, data=None, result=None):
        logging.info("run NeuralNetNodeCnn eval")

        if data == None:
            self.eval_flag = "T"
        else:
            self.eval_flag = "E"

        # get data & dataconf
        test_data, dataconf = self.get_input_data(self.feed_node,
                                                  self.cls_pool,
                                                  self.eval_feed_name)

        self.eval_run(test_data)

        return self.eval_data

    ####################################################################################################################
    def predict(self, node_id, filelist):
        """
        """
        logging.info("run NeuralNetNodeCnn Predict")
        self.node_id = node_id
        self._init_value()
        # net, data config setup
        data_node_name = self._get_backward_node_with_type(node_id, 'data')
        dataconf = WorkFlowNetConfCNN().get_view_obj(data_node_name[0])
        self._set_netconf_parm()
        self._set_dataconf_parm(dataconf)

        # data shape change MultiValuDict -> nd array
        filename_arr, filedata_arr = self.change_predict_fileList(
            filelist, dataconf)

        # get unique key
        self.load_batch = self.get_active_batch(self.node_id)
        unique_key = '_'.join([node_id, self.load_batch])

        logging.info("getModelPath:" + self.model_path + "/" +
                     self.load_batch + self.file_end)

        ## create tensorflow graph
        if (NeuralNetModel.dict.get(unique_key)):
            self = NeuralNetModel.dict.get(unique_key)
            graph = NeuralNetModel.graph.get(unique_key)
        else:
            self.get_model_resnet()

            NeuralNetModel.dict[unique_key] = self
            NeuralNetModel.graph[unique_key] = tf.get_default_graph()
            graph = tf.get_default_graph()
        pred_return_data = {}
        for i in range(len(filename_arr)):
            file_name = filename_arr[i]
            file_data = filedata_arr[i]

            logits = self.model.predict(file_data)

            labels = self.netconf["labels"]
            pred_cnt = self.netconf["param"]["predictcnt"]
            retrun_data = self.set_predict_return_cnn_img(
                labels, logits, pred_cnt)
            pred_return_data[file_name] = retrun_data
            logging.info("Return Data.......................................")
            logging.info(pred_return_data)

        return pred_return_data
コード例 #10
0
    def eval_run(self, input_data):
        self.batch_size = self.netconf["param"]["batch_size"]
        labels = self.netconf["labels"]
        pred_cnt = self.netconf["param"]["predictcnt"]
        try:
            predlog = self.netconf["param"]["predictlog"]
        except:
            predlog = "N"
        # logging.info(labels)
        t_cnt_arr = []
        f_cnt_arr = []
        for i in range(len(labels)):
            t_cnt_arr.append(0)
            f_cnt_arr.append(0)

        input_data.pointer = 0
        # eval
        config = {
            "type": self.netconf["config"]["eval_type"],
            "labels": self.netconf["labels"],
            "nn_id": self.nn_id,
            "nn_wf_ver_id": self.wf_ver,
            "nn_batch_ver_id": self.batch
        }

        self.eval_data = TrainSummaryInfo(conf=config)

        while (input_data.has_next()):
            data_set = input_data[0:input_data.data_size()]
            x_batch, y_batch, n_batch = self.get_batch_img_data(data_set, "E")

            try:
                logits = self.model.predict(x_batch)

                for i in range(len(logits)):
                    true_name = y_batch[i]
                    file_name = n_batch[i]

                    logit = []
                    logit.append(logits[i])
                    #
                    idx = labels.index(true_name)
                    retrun_data = self.set_predict_return_cnn_img(
                        labels, logit, pred_cnt)
                    pred_name = retrun_data["key"][0]

                    if self.eval_flag == "E":
                        if true_name == pred_name:
                            t_cnt_arr[idx] = t_cnt_arr[idx] + 1
                            strLog = "[True] : "
                            if (predlog == "TT"):
                                logging.info(strLog + true_name +
                                             " FileName=" + file_name)
                                logging.info(retrun_data["key"])
                                logging.info(retrun_data["val"])
                        else:
                            f_cnt_arr[idx] = f_cnt_arr[idx] + 1
                            strLog = "[False] : "
                            if (predlog == "FF"):
                                logging.info(strLog + true_name +
                                             " FileName=" + file_name)
                                logging.info(retrun_data["key"])
                                logging.info(retrun_data["val"])
                        if (predlog == "AA"):
                            logging.info(strLog + true_name + " FileName=" +
                                         file_name)
                            logging.info(retrun_data["key"])
                            logging.info(retrun_data["val"])
                    else:
                        try:
                            listTF = retrun_data["key"].index(true_name)
                            t_cnt_arr[idx] = t_cnt_arr[idx] + 1
                            strLog = "[True] : "
                            if (predlog == "T"):
                                logging.info(strLog + true_name +
                                             " FileName=" + file_name)
                                logging.info(retrun_data["key"])
                                logging.info(retrun_data["val"])
                        except:
                            f_cnt_arr[idx] = f_cnt_arr[idx] + 1
                            strLog = "[False] : "
                            if (predlog == "F"):
                                logging.info(strLog + true_name +
                                             " FileName=" + file_name)
                                logging.info(retrun_data["key"])
                                logging.info(retrun_data["val"])
                        if (predlog == "A"):
                            logging.info(strLog + true_name + " FileName=" +
                                         file_name)
                            logging.info(retrun_data["key"])
                            logging.info(retrun_data["val"])

                    self.eval_data.set_result_info(true_name, pred_name)

            except Exception as e:
                logging.info(e)
                logging.info(
                    "None to restore checkpoint. Initializing variables instead."
                )

            input_data.next()

        # set parms for db store
        input_data = TrainSummaryInfo.save_result_info(self, self.eval_data)

        self.eval_print(labels, t_cnt_arr, f_cnt_arr)
コード例 #11
0
    def eval(self, node_id, conf_data, data=None, result=None):
        """

        :param node_id:
        :param parm:
        :return:
        """
        logging.info("eval_starting ------> {0}".format(node_id))

        self._init_node_parm(
            node_id.split('_')[0] + "_" + node_id.split('_')[1] + "_" +
            "netconf_node")
        self.cls_pool_all = conf_data['cls_pool']  # Data feeder

        config = {
            "type": self.model_type,
            "labels": self.label_values,
            "nn_id": conf_data.get('nn_id'),
            "nn_wf_ver_id": conf_data.get('wf_ver')
        }
        train = TrainSummaryInfo(conf=config)
        print(config)
        self.batch = self.get_eval_batch(node_id)
        #print(train)
        self.model_eval_path = ''.join([self.model_path + '/' + self.batch])

        for _k, _v in self.cls_pool_all.items():
            if 'test' in _k:
                self.cls_pool = _v

            if 'evaldata' in _k:
                self.multi_node_flag = _v.multi_node_flag

        #conf_data['cls_pool'].get('nn00001_1_pre_feed_fr2wdnn_test')
        logging.info("model_path : {0}".format(self.model_path))
        print("hidden_layers : {0}".format(self.hidden_layers))
        print("activation_function : {0}".format(self.activation_function))
        print("batch_size : {0}".format(self.batch_size))
        print("epoch : {0}".format(self.epoch))
        print("model_type : {0}".format(self.model_type))

        # data_store_path = WorkFlowDataFrame(conf_data['nn_id']+"_"+conf_data['wf_ver']+"_"+ "data_node").step_store
        data_conf_info = self.data_conf

        # make wide & deep modelnot
        wdnn = NeuralCommonWdnn()
        wdnn_model = wdnn.wdnn_build(self.model_type, conf_data['node_id'],
                                     self.hidden_layers,
                                     str(self.activation_function),
                                     data_conf_info, str(self.model_eval_path))

        # feed
        # TODO file이 여러개면 어떻하지?
        # get prev node for load data
        #data_node_name = self._get_backward_node_with_type(conf_data['node_id'], 'preprocess')
        #train_data_set = self.cls_pool[data_node_name[0]]  # get filename
        train_data_set = self.cls_pool  # get filename
        file_queue = str(train_data_set.input_paths[0])  # get file_name

        # file을 돌면서 최대 Row를 전부 들고 옴 tfrecord 총 record갯수 가져오는 방법필요

        _batch_size = self.batch_size
        _num_tfrecords_files = 0

        # multi Feeder modified
        multi_read_flag = self.multi_read_flag

        # Todo H5
        # train per files in folder h5용
        # if multi_file flag = no이면 기본이 h5임
        try:
            results = dict()
            ori_list = list()
            pre_list = list()

            while (train_data_set.has_next()):
                print("h5")
                # 파일이 하나 돌때마다
                # for 배치사이즈와 파일의 총갯수를 가져다가 돌린다. -> 마지막에 뭐가 있을지 구분한다.
                # 파일에 iter를 넣으면 배치만큼 가져오는 fn이 있음 그걸 __itemd에 넣고
                # Input 펑션에서 multi를 vk판단해서 col와 ca를 구분한다.(이걸 배치마다 할 필요가 있나?)
                # -> 그러면서 피팅
                #
                # # Iteration is to improve for Model Accuracy

                # Per Line in file
                # eval should be one line predict
                #self.batch_size = 2

                for i in range(0, train_data_set.data_size(), self.batch_size):

                    data_set = train_data_set[i:i + self.batch_size]
                    #if i == 0:
                    #eval_data_Set = data_set
                    # input_fn2(self, mode, data_file, df, nnid, dataconf):
                    predict_value = wdnn_model.predict(
                        input_fn=lambda: train_data_set.input_fn2(
                            tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                            data_set, data_conf_info))

                    data_set_count = len(data_set.index)
                    predict_val_list = [_pv for _pv in predict_value]
                    predict_val_count = len(predict_val_list)

                    if (data_set_count != predict_val_count):
                        logging.error(
                            "wdnn eval error check : dataframe count({0}) predict count({1})"
                            .format(data_set_count, predict_val_count))
                        raise ValueError(
                            'eval data validation check error : dataframe and predict count is different(neuralnet_node_wdnn.eval)'
                        )

                    data_set[
                        'predict_label'] = predict_val_list  #list(predict_value)
                    #_predict = list(predict_value)
                    predict_y = list(data_set['predict_label'])

                    ori_list.extend(data_set[self.label].values.tolist())
                    pre_list.extend(list(data_set['predict_label']))

                    # model fitting
                    print(len(ori_list))
                    print(len(pre_list))
                    #logging.error("wdnn eval ori list  : {0}".format(ori_list) )
                    logging.info("wdnn eval ori list  : {0}".format(
                        len(ori_list)))
                    #logging.info("wdnn eval ori list  : {0}".format('info'))
                    #logging.debug("wdnn eval ori list  : {0}".format('debug'))
                    #logging.critical("wdnn eval ori list  : {0}".format('critical'))
                    #print("model fitting h5 " + str(data_set))
                # #Select Next file
                train_data_set.next()

            #TODO : 앞으로 옮기자
            train.set_nn_batch_ver_id(self.batch)
            if self.model_type == "regression":
                results['ori'] = ori_list
                results['pre'] = pre_list
                train.set_result_info(ori_list, pre_list)

            if self.model_type == "category":
                # tfrecord는 여기서 Label을 변경한다. 나중에 꺼낼때 답이 없음 Tensor 객체로 추출되기 때문에 그러나 H5는 feeder에서 변환해주자
                le = LabelEncoder()
                le.fit(self.label_values)

                for _i, _ori in enumerate(ori_list):
                    #return_value = self.labels[np.argmax(model.predict(X_train))]
                    train.set_result_info(
                        str(_ori), str(le.inverse_transform(pre_list[_i])))
            #return self.batch
        except Exception as e:
            print("eval error")
            print(e)
            raise Exception(e)

        logging.info("eval end")
        return train
コード例 #12
0
    def predict(self, node_id, ver, parm, data=None, result=None):
        """ Wdnn predict 
            batchlist info에서 active flag가 Y인 Model을 가져와서 예측을 함 

        Args:
          params: 
            * node_id
            * conf_data

        Returns:
            none

        Raises:

        Example

        """
        try:
            logging.info("wdnn predict_start nnid : {0}".format(node_id))
            _node_id = node_id + "_" + ver + "_" + "netconf_node"

            _data_conf_id = node_id + "_" + ver + "_dataconf_node"
            self._init_node_parm(_node_id)
            #self.cls_pool_all = conf_data['cls_pool']  # Data feeder

            config = {
                "type": self.model_type,
                "labels": self.label_values,
                "nn_id": node_id,
                "nn_wf_ver_id": ver
            }
            train = TrainSummaryInfo(conf=config)
            #print(config)
            self.batch = self.get_active_batch(_node_id)
            #print(train)
            self.model_predict_path = ''.join(
                [self.model_path + '/' + self.batch])
            self.multi_node_flag = False

            conf_data = {}
            conf_data['node_id'] = _node_id

            #conf_data['cls_pool'].get('nn00001_1_pre_feed_fr2wdnn_test')
            print("model_path : " + str(self.model_path))
            print("hidden_layers : " + str(self.hidden_layers))
            print("activation_function : " + str(self.activation_function))
            print("batch_size : " + str(self.batch_size))
            print("epoch : " + str(self.epoch))
            print("model_type : " + str(self.model_type))

            # data_store_path = WorkFlowDataFrame(conf_data['nn_id']+"_"+conf_data['wf_ver']+"_"+ "data_node").step_store
            data_conf_info = self.data_conf

            # make wide & deep model
            wdnn = NeuralCommonWdnn()
            wdnn_model = wdnn.wdnn_build(self.model_type, node_id,
                                         self.hidden_layers,
                                         str(self.activation_function),
                                         data_conf_info,
                                         str(self.model_predict_path))

            # feed
            # TODO file이 여러개면 어떻하지?
            filelist = sorted(parm.items())
            #train_data_set = self.cls_pool  # get filename
            #file_queue = str(train_data_set.input_paths[0])  # get file_name

            # file을 돌면서 최대 Row를 전부 들고 옴 tfrecord 총 record갯수 가져오는 방법필요

            _batch_size = self.batch_size
            _num_tfrecords_files = 0

            # multi Feeder modified
            multi_read_flag = self.multi_read_flag

            # Todo H5
            # train per files in folder h5용
            # if multi_file flag = no이면 기본이 h5임

            results = dict()
            ori_list = list()
            pre_list = list()
            #self.batch_size = 5
            for filename in filelist:
                print("h5")
                #feeder = PreNodeFeedFr2Wdnn().set_for_predict(_data_conf_id)
                feeder = PreNodeFeedFr2Wdnn()
                #_data_conf_id
                #set_for_predict
                feeder.set_for_predict(_data_conf_id)
                data_node = DataNodeFrame()
                train_data_set = data_node.load_csv_by_pandas(
                    self.predict_path + "/" + filename[1].name)

                #feeder.set_input_paths([self.predict_path + "/" + filename[1].name])
                #train_data_set = feeder
                #_size = train_data_set
                # 파일이 하나 돌때마다
                # for 배치사이즈와 파일의 총갯수를 가져다가 돌린다. -> 마지막에 뭐가 있을지 구분한다.
                # 파일에 iter를 넣으면 배치만큼 가져오는 fn이 있음 그걸 __itemd에 넣고
                # Input 펑션에서 multi를 vk판단해서 col와 ca를 구분한다.(이걸 배치마다 할 필요가 있나?)
                # -> 그러면서 피팅
                #
                # # Iteration is to improve for Model Accuracy

                # Per Line in file
                # eval should be one line predict
                #self.batch_size = 2

                #train_date를 어떻게 가져오냐가 문제

                result_df = pd.DataFrame()

                for i in range(0, len(train_data_set.index), self.batch_size):

                    data_set = train_data_set[i:i + self.batch_size]
                    #if i == 0:
                    #eval_data_Set = data_set
                    # input_fn2(self, mode, data_file, df, nnid, dataconf):
                    predict_value = wdnn_model.predict(
                        input_fn=lambda: feeder.input_fn2(
                            tf.contrib.learn.ModeKeys.TRAIN, filename,
                            data_set, data_conf_info))

                    data_set_count = len(data_set.index)
                    predict_val_list = [_pv for _pv in predict_value]
                    predict_val_count = len(predict_val_list)

                    if (data_set_count != predict_val_count):
                        logging.error(
                            "wdnn eval error check : dataframe count({0}) predict count({1})"
                            .format(data_set_count, predict_val_count))
                        raise ValueError(
                            'eval data validation check error : dataframe and predict count is different(neuralnet_node_wdnn.eval)'
                        )

                    data_set[
                        'predict_label'] = predict_val_list  #list(predict_value)
                    #_predict = list(predict_value)
                    predict_y = list(data_set['predict_label'])
                    #pd.concat(result_df, data_set)
                    result_df = result_df.append(data_set)
                    ori_list.extend(data_set[self.label].values.tolist())
                    pre_list.extend(list(data_set['predict_label']))

                    # model fitting
                    print(len(ori_list))
                    print(len(pre_list))
                    #logging.error("wdnn eval ori list  : {0}".format(ori_list) )
                    logging.info("wdnn eval ori list  : {0}".format(
                        len(ori_list)))
                    #logging.info("wdnn eval ori list  : {0}".format('info'))
                    #logging.debug("wdnn eval ori list  : {0}".format('debug'))
                    #logging.critical("wdnn eval ori list  : {0}".format('critical'))
                    #print("model fitting h5 " + str(data_set))
                # #Select Next file

                #train_data_set.next()

            predict_result_dir = utils.make_and_exist_directory(
                self.predict_path + "/" + "result" + "/")
            predict_result_filename = predict_result_dir + "result_" + strftime(
                "%Y-%m-%d-%H:%M:%S", gmtime()) + ".csv"
            result_df.to_csv(predict_result_filename)
            #os.remove(self.predict_path + "/" + filename[1].name)

            # #TODO : 앞으로 옮기자
            # train.set_nn_batch_ver_id(self.batch)
            # if self.model_type == "regression":
            #     results['ori'] = ori_list
            #     results['pre'] = pre_list
            #     train.set_result_info(ori_list, pre_list)
            #
            # if self.model_type == "category":
            #     # tfrecord는 여기서 Label을 변경한다. 나중에 꺼낼때 답이 없음 Tensor 객체로 추출되기 때문에 그러나 H5는 feeder에서 변환해주자
            #     le = LabelEncoder()
            #     le.fit(self.label_values)
            #
            #     for _i, _ori in enumerate(ori_list):
            #         #return_value = self.labels[np.argmax(model.predict(X_train))]
            #         train.set_result_info(str(_ori), str(le.inverse_transform(pre_list[_i])))
            #return self.batch

            logging.info("eval end")
            return train
        except Exception as e:
            logging.error("Wdnn predict error {0}".format(e))

            raise Exception(e)
コード例 #13
0
    def run(self, conf_data):
        """
        Wide and Deep Network Training 
        :param : conf_data
        :return: None
        """
        logging.info("NeuralNetNode Xgboost Run called") #nodeid 필요

        try:

            self._init_train_parm(conf_data)
            #self._init_value()
            train, test= self.get_input_data()

            spec = importlib.util.spec_from_file_location("data_preprocess", "/hoya_src_root/data_preprocess.py")
            foo = importlib.util.module_from_spec(spec)
            spec.loader.exec_module(foo)
            _label, _label_info, _label_values = foo.label_info()

            y_train = train[_label].ravel()
            x_train = train.drop([_label,"id"], axis=1)

            y_test = test[_label].ravel()
            x_test = test.drop([_label,"id"], axis=1)

            #x_train = train.values  # Creates an array of the train data
            #x_test = test.values  # Creats an array of the test data

            self.load_batch = self.get_eval_batch(self.node_id)  # Train이 Y인것 가져오기 Eval Flag가 Y인거 가져오기
            self.train_batch, self.batch = self.make_batch(self.node_id)

            logging.info("Xgboost Train get batch -> {0}".format(self.batch))
            logging.info("Xgboost Train get batch -> {0}".format(self.load_batch))
            if self.train_batch == None:
                self.model_train_path = ''.join([self.model_path + '/' + self.batch + '.bin'])
            else:
                self.model_train_path = ''.join([self.model_path + '/' + self.train_batch + '.bin'])

            xgb_params = self.get_xgboost_paramter()

            num_rounds = self.conf.get("epoch")
            dtrain = xgb.DMatrix(x_train, y_train)  # training data
            dvalid = xgb.DMatrix(x_test, y_test)  # validation data
            eval_result= {}
            gbm = xgb.train(xgb_params, dtrain, num_rounds,
                            [(dtrain, 'train'),(dvalid,"test")],
                            evals_result= eval_result
                            )  # stop if no improvement in 10 rounds

            gbm.save_model(self.model_train_path )
            predictions = gbm.predict(dvalid)
            train_prediction = gbm.predict(dvalid)

            #trainprediction_xgb = pd.DataFrame({'id': test,
            #                                    'predict': train_prediction})

            #trainprediction_xgb_merge = train_results_xgb.merge(trainprediction_xgb, how='left', on='id')
            # Todo Eval flag 보도록 고치고
            #    "nn_wf_ver_id": self.wf_ver, "nn_batch_ver_id": self.batch}
            config = {"nn_id": self.nn_id, "nn_wf_ver_id": self.wf_ver,
                      "nn_batch_ver_id": self.batch}
            acc_result = TrainSummaryAccLossInfo(config)
            acc_result.loss_info["loss"].extend(eval_result['test']['rmse'])
            acc_result.acc_info["acc"].extend(eval_result['test']['rmse'])
            self.save_accloss_info(acc_result)

            config = {"type": self.model_type, "labels": _label_values, "nn_id":self.nn_id, "nn_wf_ver_id":self.wf_ver}
            eval_result = TrainSummaryInfo(conf=config)
            eval_result.set_nn_batch_ver_id(self.batch)

            eval_result.set_result_info(y_test, train_prediction)

            input_data = TrainSummaryInfo.save_result_info(self, eval_result)
            input_data['accuracy'] = eval_result.get_accuracy()

            return input_data
        except Exception as e:
            logging.info("NeuralNetNodeXgboost Run Exception : {0}".format(e))
            raise Exception(e)
コード例 #14
0
    def eval(self, node_id, conf_data, data=None, result=None):
        """
            Tensorflow Wide and Deep Network Eval Method
        :param node_id:
        :param parm:
        :return: None
        """
        logging.info("eval_starting ------> {0}".format(node_id))
        try:
            #self._init_node_parm(conf_data['node_id'])
            #conf_data['node_id']
            #self._init_node_parm(conf_data['node_id'])
            self._init_node_parm(conf_data.get('nn_id') + "_" + conf_data.get('wf_ver')+ "_" + "netconf_node")
            self.cls_pool_all = conf_data['cls_pool']  # Data feeder


            graph = NNCommonManager().get_nn_node_name(conf_data['nn_id'])
            for net in graph:
                if net['fields']['graph_node'] == 'netconf_node':
                    netconf_node = net['fields']['graph_node_name']
            self.model_path = utils.get_model_path(conf_data['nn_id'], conf_data['wf_ver'], netconf_node)

            config = {"type": self.model_type, "labels": self.label_values, "nn_id":conf_data.get('nn_id'), "nn_wf_ver_id":conf_data.get('wf_ver')}
            train = TrainSummaryInfo(conf=config)
            print(config)
            self.batch_eval = self.get_eval_batch(node_id)
            #print(train)
            self.model_eval_path = ''.join([self.model_path + '/' + self.batch])


            for _k, _v in self.cls_pool_all.items():
                if 'test' in _k:
                    self.cls_pool = _v

                if 'evaldata' in _k:
                    self.multi_node_flag = _v.multi_node_flag

            #conf_data['cls_pool'].get('nn00001_1_pre_feed_fr2wdnn_test')
            logging.info("model_path : {0}".format(self.model_path))
            logging.info("hidden_layers : {0}".format(self.hidden_layers))
            logging.info("activation_function : {0}".format(self.activation_function))
            logging.info("batch_size : {0}".format(self.batch_size))
            logging.info("epoch : {0}".format(self.epoch))
            logging.info("model_type : {0}".format(self.model_type))
            logging.info("auto_demension : {0}".format(self.auto_demension))

            config_acc = {"nn_id": conf_data['node_id'], "nn_wf_ver_id": conf_data.get('wf_ver'),
                      "nn_batch_ver_id": self.batch}
            acc_result = TrainSummaryAccLossInfo(config_acc)

            data_conf_info = self.data_conf

            #validation_monitor = _LossCheckerHook(acc_result)

            # make wide & deep modelnot
            wdnn = NeuralCommonWdnn()
            wdnn_model = wdnn.wdnn_build(self.model_type, conf_data['node_id'], self.hidden_layers,
                                         str(self.activation_function), data_conf_info, str(self.model_eval_path),
                                         self.train, self.auto_demension)

            #, self.train, self.auto_demension

            # feed
            # TODO file이 여러개면 어떻하지?
            # get prev node for load data
            #data_node_name = self._get_backward_node_with_type(conf_data['node_id'], 'preprocess')
            #train_data_set = self.cls_pool[data_node_name[0]]  # get filename
            train_data_set = self.cls_pool  # get filename
            file_queue = str(train_data_set.input_paths[0])  # get file_name

            # file을 돌면서 최대 Row를 전부 들고 옴 tfrecord 총 record갯수 가져오는 방법필요

            _batch_size = self.batch_size
            _num_tfrecords_files = 0

            # multi Feeder modified
            multi_read_flag = self.multi_read_flag

            # Todo H5
            # train per files in folder h5용
            # if multi_file flag = no이면 기본이 h5임
            try:
                results = dict()
                ori_list = list()
                pre_list = list()

                while (train_data_set.has_next()):
                    logging.info("Wdnn eval process from h5")
                    # 파일이 하나 돌때마다
                    # for 배치사이즈와 파일의 총갯수를 가져다가 돌린다. -> 마지막에 뭐가 있을지 구분한다.
                    # 파일에 iter를 넣으면 배치만큼 가져오는 fn이 있음 그걸 __itemd에 넣고
                    # Input 펑션에서 multi를 vk판단해서 col와 ca를 구분한다.(이걸 배치마다 할 필요가 있나?)
                    # -> 그러면서 피팅
                    #
                    # # Iteration is to improve for Model Accuracy

                    # Per Line in file
                    # eval should be one line predict
                    #self.batch_size = 2

                    for i in range(0, train_data_set.data_size(), self.batch_size):

                        data_set = train_data_set[i:i + self.batch_size]

                        eval_result = wdnn_model.evaluate(
                           input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                                                                     data_set, data_conf_info), steps=200)
                        #print("model fitting h5 " + str(data_set))

                        acc = eval_result['accuracy']
                        loss = eval_result['loss']
                        acc_result.loss_info["loss"].append(str(eval_result['loss']))
                        acc_result.acc_info["acc"].append(str(eval_result['accuracy']))

                        predict_value = wdnn_model.predict(
                            input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                                                                      data_set, data_conf_info))

                        data_set_count = len(data_set.index)
                        predict_val_list = [_pv for _pv in predict_value]
                        predict_val_count = len(predict_val_list)

                        if (data_set_count != predict_val_count):
                            logging.error("wdnn eval error check : dataframe count({0}) predict count({1})".format(data_set_count, predict_val_count))
                            raise ValueError(
                                'eval data validation check error : dataframe and predict count is different(neuralnet_node_wdnn.eval)')

                        data_set['predict_label'] = predict_val_list #list(predict_value)
                        predict_y = list(data_set['predict_label'])


                        ori_list.extend(data_set[self.label].values.tolist())
                        pre_list.extend(list(data_set['predict_label']))

                        # model fitting
                        logging.info("wdnn eval ori list  : {0}".format(len(ori_list)) )
                        logging.info("wdnn eval pre list  : {0}".format(len(pre_list)) )

                    train_data_set.next()

                #TODO : 앞으로 옮기자
                train.set_nn_batch_ver_id(self.batch_eval)
                if self.model_type == "regression":
                    results['ori'] = ori_list
                    results['pre'] = pre_list
                    train.set_result_info(ori_list, pre_list)

                if (self.model_type == "category" or self.model_type == "deep"):
                    # tfrecord는 여기서 Label을 변경한다. 나중에 꺼낼때 답이 없음 Tensor 객체로 추출되기 때문에 그러나 H5는 feeder에서 변환해주자
                    le = LabelEncoder()
                    le.fit(self.label_values)

                    for _i, _ori in enumerate(ori_list):
                        #return_value = self.labels[np.argmax(model.predict(X_train))]
                        train.set_result_info(str(_ori), str(le.inverse_transform(pre_list[_i])))
                #return self.batch
            except Exception as e:
                print("eval error")
                print(e)
                raise Exception(e)

            logging.info("eval end")
        except Exception as oe:
            logging.info(oe)
            raise Exception(e)
        return train
コード例 #15
0
class NeuralNetNodeCnn(NeuralNetNode):
    """
    """
    def one_hot_encoded(self, num_classes):
        one = np.zeros((num_classes, num_classes))

        for i in range(num_classes):
            for j in range(num_classes):
                if i == j:
                    one[i][j] = 1
        return one

    ########################################################################
    def spaceprint(self, val, cnt):
        leng = len(str(val))
        cnt = cnt - leng
        restr = ""
        for i in range(cnt):
            restr += " "
        restr = restr + str(val)
        return restr

    ########################################################################
    def model_file_delete(self, model_path, modelname):
        existcnt = 10
        filelist = os.listdir(model_path)

        flist = []
        i = 0
        for filename in filelist:
            filetime = datetime.datetime.fromtimestamp(
                os.path.getctime(model_path + '/' +
                                 filename)).strftime('%Y%m%d%H%M%S')
            tmp = [filename, filetime]
            if filename.find(modelname) > -1:
                flist.append(tmp)
            i += 1
        flistsort = sorted(flist, key=operator.itemgetter(1), reverse=True)

        for i in range(len(flistsort)):
            if i > existcnt * 3:
                os.remove(model_path + "/" + flistsort[i][0])

    ########################################################################

    def _init_train_parm(self, conf_data):
        # get initial value
        self.conf_data = conf_data
        self.cls_pool = conf_data["cls_pool"]
        self.nn_id = conf_data["nn_id"]
        self.wf_ver = conf_data["wf_ver"]
        self.node_id = conf_data["node_id"]
        self.node = WorkFlowSimpleManager().get_train_node()

        # get feed name
        self.train_feed_name = self.nn_id + "_" + self.wf_ver + "_" + WorkFlowSimpleManager(
        ).get_train_feed_node()
        self.eval_feed_name = self.nn_id + "_" + self.wf_ver + "_" + WorkFlowSimpleManager(
        ).get_eval_feed_node()
        self.feed_node = self.get_prev_node()

    ########################################################################
    def _init_predict_parm(self, node_id):
        self.node_id = node_id

    ########################################################################
    def _init_value(self):
        self.g_ffile_print = "N"
        self.g_train_cnt = 0
        self.g_epoch_cnt = 0
        self.step_gap = 1
        self.file_end = '.h5'
        self.train_return_data = {}
        self.train_return_arr = [
            "Trainning .................................................."
        ]
        self.pred_return_data = {}

    ########################################################################

    def _set_netconf_parm(self):
        netconf = WorkFlowNetConfCNN().get_view_obj(self.node_id)
        try:
            netconf = WorkFlowNetConfCNN().set_num_classes_predcnt(
                self.nn_id, self.wf_ver, self.node, self.node_id, netconf)
        except:
            None
        self.netconf = netconf

        self.net_type = self.netconf["config"]["net_type"]
        self.train_cnt = self.netconf["param"]["traincnt"]
        self.epoch = self.netconf["param"]["epoch"]
        self.train_cnt = self.netconf["param"]["traincnt"]
        self.batch_size = self.netconf["param"]["batch_size"]
        self.model_path = self.netconf["modelpath"]
        self.modelname = self.netconf["modelname"]

    ########################################################################
    def _set_dataconf_parm(self, dataconf):
        self.dataconf = dataconf

    ########################################################################
    def get_batch_img_data(self, data_set, type):
        num_classes = self.netconf["config"]["num_classes"]
        labels = self.netconf["labels"]
        x_size = self.dataconf["preprocess"]["x_size"]
        y_size = self.dataconf["preprocess"]["y_size"]
        channel = self.dataconf["preprocess"]["channel"]

        labelsHot = self.one_hot_encoded(num_classes)

        name_data_batch = data_set[2]
        label_data_batch = data_set[1]
        img_data_batch = data_set[0]

        if type == "T":
            r = 0
            y_batch = np.zeros((len(label_data_batch), num_classes))
            for j in label_data_batch:
                j = j.decode('UTF-8')
                k = labels.index(j)
                y_batch[r] = labelsHot[k]
                r += 1
        else:
            y_batch = []
            for j in label_data_batch:
                j = j.decode('UTF-8')
                y_batch.append(j)

        n_batch = []
        for j in name_data_batch:
            j = j.decode('UTF-8')
            n_batch.append(j)

        try:
            x_batch = np.zeros((len(img_data_batch), len(img_data_batch[0])))
        except Exception as e:
            println(e)
        r = 0
        for j in img_data_batch:
            j = j.tolist()
            x_batch[r] = j
            r += 1

        x_batch = np.reshape(x_batch, (-1, x_size, y_size, channel))

        # println("Image Label ////////////////////////////////////////////////")
        # println(label_data_batch)
        # println(y_batch)
        # println("Image /////////////////////////////////////////////////")
        # println(x_batch)

        return x_batch, y_batch, n_batch
        ########################################################################

    ########################################################################

    def get_model_cnn(self, sess, type=None):
        prenumoutputs = 1
        num_classes = self.netconf["config"]["num_classes"]
        learnrate = self.netconf["config"]["learnrate"]
        numoutputs = self.netconf["config"]["layeroutputs"]
        optimizer = self.netconf["config"]["optimizer"]
        node_out = self.netconf["out"]["node_out"]

        x_size = self.dataconf["preprocess"]["x_size"]
        y_size = self.dataconf["preprocess"]["y_size"]
        channel = self.dataconf["preprocess"]["channel"]
        ################################################################
        X = tf.placeholder(tf.float32,
                           shape=[None, x_size, y_size, channel],
                           name='x')
        Y = tf.placeholder(tf.float32, shape=[None, num_classes], name='y')
        ################################################################
        stopper = 1
        model = X

        while True:
            try:
                layer = self.netconf["layer" + str(stopper)]
            except Exception as e:
                if stopper == 1:
                    return "Error[100] layer is None ..............................."
                break
            stopper += 1

            try:
                layercnt = layer["layercnt"]
                for i in range(layercnt):
                    # println(layer)
                    if prenumoutputs == 1:
                        prenumoutputs = numoutputs
                    else:
                        numoutputs = prenumoutputs * 2
                        prenumoutputs = numoutputs
                    active = str(layer["active"])
                    convkernelsize = [
                        int((layer["cnnfilter"][0])),
                        int((layer["cnnfilter"][1]))
                    ]
                    maxpkernelsize = [
                        int((layer["maxpoolmatrix"][0])),
                        int((layer["maxpoolmatrix"][1]))
                    ]
                    stride = [
                        int((layer["maxpoolstride"][0])),
                        int((layer["maxpoolstride"][1]))
                    ]
                    padding = str((layer["padding"]))

                    if active == 'relu':
                        activitaion = tf.nn.relu
                    else:
                        activitaion = tf.nn.relu

                    if str(layer["droprate"]) is not "":
                        droprate = float((layer["droprate"]))
                    else:
                        droprate = 0.0

                    model = tf.contrib.layers.conv2d(
                        inputs=model,
                        num_outputs=numoutputs,
                        kernel_size=convkernelsize,
                        activation_fn=activitaion,
                        weights_initializer=tf.contrib.layers.
                        xavier_initializer_conv2d(),
                        padding=padding)

                    model = tf.contrib.layers.max_pool2d(
                        inputs=model,
                        kernel_size=maxpkernelsize,
                        stride=stride,
                        padding=padding)

                    if droprate > 0.0 and type == "T":
                        model = tf.nn.dropout(model, droprate)

                    # println(model)
            except Exception as e:
                println("Error[200] Model Create Fail.")
                println(e)

        reout = int(model.shape[1]) * int(model.shape[2]) * int(model.shape[3])
        model = tf.reshape(model, [-1, reout])
        # println(model)
        W1 = tf.Variable(tf.truncated_normal([reout, node_out], stddev=0.1))
        model = tf.nn.relu(tf.matmul(model, W1))

        W5 = tf.Variable(
            tf.truncated_normal([node_out, num_classes], stddev=0.1))
        model = tf.matmul(model, W5)
        # println(model)
        if type == "P":
            model = tf.nn.softmax(model)
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=model, labels=Y))
        if optimizer == "AdamOptimizer":
            optimizer = tf.train.AdamOptimizer(
                learning_rate=learnrate).minimize(cost)
        else:
            optimizer = tf.train.RMSPropOptimizer(learnrate,
                                                  0.9).minimize(cost)
        y_pred_cls = tf.argmax(model, 1)
        check_prediction = tf.equal(y_pred_cls, tf.argmax(Y, 1))
        accuracy = tf.reduce_mean(tf.cast(check_prediction, tf.float32))

        self.model = model
        self.X = X
        self.Y = Y
        self.optimizer = optimizer
        self.y_pred_cls = y_pred_cls
        self.accuracy = accuracy
        self.cost = cost

        self.model_path = self.netconf["modelpath"]
        self.modelname = self.netconf["modelname"]
        last_chk_path = tf.train.latest_checkpoint(
            checkpoint_dir=self.model_path)

        try:
            step = last_chk_path.split("-")
            self.step_gap = int(step[1]) + 1
            saver = tf.train.Saver()
            saver.restore(sess, save_path=last_chk_path)
            println("Train Restored checkpoint from:" + last_chk_path)
        except:
            self.step_gap = 1
            println(
                "None to restore checkpoint. Initializing variables instead.")

        self.save_path = self.model_path + "/" + self.modelname + "-" + str(
            self.step_gap)

        return sess, saver

    ########################################################################
    def get_model_resnet(self, sess):
        self.lr_reducer = ReduceLROnPlateau(monitor='val_loss',
                                            factor=np.sqrt(0.1),
                                            cooldown=0,
                                            patience=5,
                                            min_lr=0.5e-6)
        self.early_stopper = EarlyStopping(monitor='val_acc',
                                           min_delta=0.001,
                                           patience=10)
        self.csv_logger = CSVLogger('resnet.csv')
        num_classes = self.netconf["config"]["num_classes"]
        numoutputs = self.netconf["config"]["layeroutputs"]
        x_size = self.dataconf["preprocess"]["x_size"]
        y_size = self.dataconf["preprocess"]["y_size"]
        channel = self.dataconf["preprocess"]["channel"]
        self.data_augmentation = self.dataconf["preprocess"]["augmentation"]

        filelist = os.listdir(self.model_path)

        try:
            for filename in filelist:
                step1 = filename.split("-")
                step2 = step1[1].split(".")
                if self.step_gap < int(step2[0]):
                    self.step_gap = int(step2[0])
            last_chk_path = self.model_path + "/" + self.modelname + "-" + str(
                self.step_gap) + str(self.file_end)
            println(last_chk_path)

            self.model = keras.models.load_model(last_chk_path)
            self.model.compile(loss='categorical_crossentropy',
                               optimizer='adam',
                               metrics=['accuracy'])
            self.step_gap = int(step2[0]) + 1
            println("Train Restored checkpoint from:" + last_chk_path)
        except:
            println(
                "None to restore checkpoint. Initializing variables instead.")

            if numoutputs == 18:
                self.model = resnet.ResnetBuilder.build_resnet_18(
                    (channel, x_size, y_size), num_classes)
            elif numoutputs == 34:
                self.model = resnet.ResnetBuilder.build_resnet_34(
                    (channel, x_size, y_size), num_classes)
            elif numoutputs == 50:
                self.model = resnet.ResnetBuilder.build_resnet_50(
                    (channel, x_size, y_size), num_classes)
            elif numoutputs == 101:
                self.model = resnet.ResnetBuilder.build_resnet_101(
                    (channel, x_size, y_size), num_classes)
            elif numoutputs == 152:
                self.model = resnet.ResnetBuilder.build_resnet_152(
                    (channel, x_size, y_size), num_classes)
            elif numoutputs == 200:
                self.model = resnet.ResnetBuilder.build_resnet_200(
                    (channel, x_size, y_size), num_classes)
            self.model.compile(loss='categorical_crossentropy',
                               optimizer='adam',
                               metrics=['accuracy'])

        self.save_path = self.model_path + "/" + self.modelname + "-" + str(
            self.step_gap) + str(self.file_end)

    ########################################################################
    def set_saver_model(self, sess):
        saver = tf.train.Saver()
        saver.save(sess, save_path=self.save_path)

        batch_accR = round(self.batch_acc * 100, 2)
        msg = "Global Step: " + str(
            self.step_gap) + ", Training Batch Accuracy: " + str(
                batch_accR) + "%" + ", Cost: " + str(self.i_cost)
        println(msg)

        config = {
            "nn_id": self.nn_id,
            "nn_wf_ver_id": self.wf_ver,
            "nn_batch_ver_id": self.batch
        }
        result = TrainSummaryAccLossInfo(config)
        result.loss_info["loss"] = str(self.i_cost)
        result.acc_info["acc"] = str(batch_accR)
        self.save_accloss_info(result)

        result = [msg]

        self.step_gap = self.step_gap + self.g_epoch_cnt
        self.save_path = self.model_path + "/" + self.modelname + "-" + str(
            self.step_gap)

        self.model_file_delete(self.model_path, self.modelname)

        self.train_return_arr.append(result)

        self.eval(self.node_id, self.conf_data, None, None)

    def set_saver_model_keras(self):
        keras.models.save_model(self.model, self.save_path)

        loss = round(self.loss * 100, 2)
        accR = round(self.acc * 100, 2)
        val_loss = round(self.val_loss * 100, 2)
        val_acc = round(self.val_acc * 100, 2)
        msg = "Global Step: " + str(self.step_gap)
        msg += ", Training Loss: " + str(
            loss) + "%" + ", Training Accuracy: " + str(accR) + "%"
        msg += ", Test Loss: " + str(
            val_loss) + "%" + ", Test Accuracy: " + str(val_acc) + "%"
        println(msg)

        config = {
            "nn_id": self.nn_id,
            "nn_wf_ver_id": self.wf_ver,
            "nn_batch_ver_id": self.batch
        }
        result = TrainSummaryAccLossInfo(config)
        result.loss_info["loss"] = str(val_loss)
        result.acc_info["acc"] = str(val_acc)
        self.save_accloss_info(result)

        result = [msg]

        self.step_gap = self.step_gap + self.g_epoch_cnt
        self.save_path = self.model_path + "/" + self.modelname + "-" + str(
            self.step_gap) + str(self.file_end)

        self.model_file_delete(self.model_path, self.modelname)

        self.train_return_arr.append(result)

        self.eval(self.node_id, self.conf_data, None, None)

    def run(self, conf_data):
        println("run NeuralNetNodeCnn Train")
        # init data setup
        self._init_train_parm(conf_data)
        self._init_value()
        # set batch
        self.train_batch, self.batch = self.make_batch(self.node_id)

        # get data & dataconf
        test_data, dataconf = self.get_input_data(self.feed_node,
                                                  self.cls_pool,
                                                  self.eval_feed_name)
        input_data, dataconf = self.get_input_data(self.feed_node,
                                                   self.cls_pool,
                                                   self.train_feed_name)

        # set netconf, dataconf
        self._set_netconf_parm()
        self._set_dataconf_parm(dataconf)

        # train
        with tf.Session() as sess:
            if self.net_type == "resnet":
                self.get_model_resnet(sess)
                sess.run(tf.global_variables_initializer())
                self.train_run_resnet(input_data, test_data)
            else:
                sess, saver = self.get_model_cnn(sess, "T")
                sess.run(tf.global_variables_initializer())
                self.train_run_cnn(sess, input_data, test_data)

        self.train_return_data["TrainResult"] = self.train_return_arr

        if self.epoch == 0 or self.train_cnt == 0:
            self.eval(self.node_id, self.conf_data, None, None)

        return self.train_return_data

    def train_run_resnet(self, input_data, test_data):
        try:
            if self.data_augmentation == "N" or self.data_augmentation == "n":
                println('Not using data augmentation.')
            else:
                println('Using real-time data augmentation.')

            while (input_data.has_next()):
                data_set = input_data[0:input_data.data_size()]
                x_batch, y_batch, n_batch = self.get_batch_img_data(
                    data_set, "T")

                test_set = test_data[0:test_data.data_size()]
                x_tbatch, y_tbatch, n_tbatch = self.get_batch_img_data(
                    test_set, "T")

                for i in range(self.train_cnt):
                    if self.data_augmentation == "N" or self.data_augmentation == "n":
                        history = self.model.fit(x_batch,
                                                 y_batch,
                                                 batch_size=self.batch_size,
                                                 epochs=self.epoch,
                                                 validation_data=(x_tbatch,
                                                                  y_tbatch),
                                                 shuffle=True,
                                                 callbacks=[
                                                     self.lr_reducer,
                                                     self.early_stopper,
                                                     self.csv_logger
                                                 ])
                    else:
                        # This will do preprocessing and realtime data augmentation:
                        datagen = ImageDataGenerator(
                            featurewise_center=
                            False,  # set input mean to 0 over the dataset
                            samplewise_center=False,  # set each sample mean to 0
                            featurewise_std_normalization=
                            False,  # divide inputs by std of the dataset
                            samplewise_std_normalization=
                            False,  # divide each input by its std
                            zca_whitening=False,  # apply ZCA whitening
                            rotation_range=
                            0,  # randomly rotate images in the range (degrees, 0 to 180)
                            width_shift_range=0.1,
                            # randomly shift images horizontally (fraction of total width)
                            height_shift_range=0.1,
                            # randomly shift images vertically (fraction of total height)
                            horizontal_flip=True,  # randomly flip images
                            vertical_flip=False)  # randomly flip images

                        # Compute quantities required for featurewise normalization
                        # (std, mean, and principal components if ZCA whitening is applied).
                        datagen.fit(x_batch)

                        # Fit the model on the batches generated by datagen.flow().
                        history = self.model.fit_generator(
                            datagen.flow(x_batch,
                                         y_batch,
                                         batch_size=self.batch_size),
                            steps_per_epoch=x_batch.shape[0] //
                            self.batch_size,
                            validation_data=(x_tbatch, y_tbatch),
                            epochs=self.epoch,
                            verbose=1,
                            max_q_size=100,
                            callbacks=[
                                self.lr_reducer, self.early_stopper,
                                self.csv_logger
                            ])

                    self.loss = history.history["loss"][0]
                    self.acc = history.history["acc"][0]
                    self.val_loss = history.history["val_loss"][0]
                    self.val_acc = history.history["val_acc"][0]

                    self.g_train_cnt += 1
                    self.g_epoch_cnt = self.g_train_cnt
                    println("Save Train Count=" + str(self.g_train_cnt))
                    self.set_saver_model_keras()

                input_data.next()
        except Exception as e:
            println(
                "Error[400] ..............................................")
            println(e)

    def train_run_cnn(self, sess, input_data, test_data):
        try:
            while (input_data.has_next()):
                for i in range(self.train_cnt):
                    for i in range(0, input_data.size(), self.batch_size):
                        data_set = input_data[i:i + self.batch_size]
                        x_batch, y_batch, n_batch = self.get_batch_img_data(
                            data_set, "T")

                        for i in range(self.epoch):
                            feed_dict_train = {
                                self.X: x_batch,
                                self.Y: y_batch
                            }

                            _, self.i_cost, self.batch_acc = sess.run(
                                [self.optimizer, self.cost, self.accuracy],
                                feed_dict=feed_dict_train)

                            self.g_epoch_cnt += 1
                            println("Epoch Count=" + str(self.g_epoch_cnt))

                        self.g_train_cnt += 1
                        println("Save Train Count=" + str(self.g_train_cnt))
                        self.set_saver_model(sess)

                input_data.next()
        except Exception as e:
            println(
                "Error[400] ..............................................")
            println(e)

        return self.train_return_data

    ########################################################################
    def eval(self, node_id, conf_data, data=None, result=None):
        println("run NeuralNetNodeCnn eval")
        self._init_train_parm(conf_data)
        if data == None:
            self.eval_flag = "T"
        else:
            self.eval_flag = "E"

        #eval
        self.batch = self.get_eval_batch(node_id)
        config = {
            "type": self.netconf["config"]["eval_type"],
            "labels": self.netconf["labels"],
            "nn_id": self.nn_id,
            "nn_wf_ver_id": self.wf_ver,
            "nn_batch_ver_id": self.batch
        }
        self.eval_data = TrainSummaryInfo(conf=config)

        # config = {"type": self.netconf["config"]["eval_type"], "labels": self.netconf["labels"]}
        # self.eval_data = TrainSummaryInfo(conf=config)
        # self.eval_data.set_nn_id(self.nn_id)
        # self.eval_data.set_nn_wf_ver_id(self.wf_ver)

        # get data & dataconf
        test_data, dataconf = self.get_input_data(self.feed_node,
                                                  self.cls_pool,
                                                  self.eval_feed_name)

        with tf.Session() as sess:
            if self.net_type == "resnet":
                self.get_model_resnet(sess)
                sess.run(tf.global_variables_initializer())
            else:
                sess, saver = self.get_model_cnn(sess, "T")
                sess.run(tf.global_variables_initializer())

            self.eval_run(sess, test_data)

        # keras.backend.clear_session()
        if self.eval_flag == "E":
            keras.backend.clear_session()

        return self.eval_data

    def eval_run(self, sess, input_data):
        self.batch_size = self.netconf["param"]["batch_size"]
        labels = self.netconf["labels"]
        pred_cnt = self.netconf["param"]["predictcnt"]
        try:
            predlog = self.netconf["param"]["predlog"]
        except:
            predlog = "N"
        # println(labels)
        t_cnt_arr = []
        f_cnt_arr = []
        for i in range(len(labels)):
            t_cnt_arr.append(0)
            f_cnt_arr.append(0)

        input_data.pointer = 0
        while (input_data.has_next()):
            for i in range(0, input_data.size(), self.batch_size):
                data_set = input_data[i:i + self.batch_size]
                x_batch, y_batch, n_batch = self.get_batch_img_data(
                    data_set, "E")

                try:
                    if self.net_type == "cnn":
                        logits = sess.run([self.model],
                                          feed_dict={self.X: x_batch})
                        logits = logits[0]
                    elif self.net_type == "resnet":
                        logits = self.model.predict(x_batch)

                    for i in range(len(logits)):
                        true_name = y_batch[i]
                        file_name = n_batch[i]

                        logit = []
                        logit.append(logits[i])
                        #
                        idx = labels.index(true_name)
                        retrun_data = self.set_predict_return_cnn_img(
                            labels, logit, pred_cnt)
                        pred_name = retrun_data["key"][0]

                        if self.eval_flag == "E":
                            if true_name == pred_name:
                                t_cnt_arr[idx] = t_cnt_arr[idx] + 1
                                strLog = "[True] : "
                                if (predlog == "TT"):
                                    println(strLog + true_name + " FileName=" +
                                            file_name)
                                    println(retrun_data["key"])
                                    println(retrun_data["val"])
                            else:
                                f_cnt_arr[idx] = f_cnt_arr[idx] + 1
                                strLog = "[False] : "
                                if (predlog == "FF"):
                                    println(strLog + true_name + " FileName=" +
                                            file_name)
                                    println(retrun_data["key"])
                                    println(retrun_data["val"])
                            if (predlog == "AA"):
                                println(strLog + true_name + " FileName=" +
                                        file_name)
                                println(retrun_data["key"])
                                println(retrun_data["val"])
                        else:
                            try:
                                listTF = retrun_data["key"].index(true_name)
                                t_cnt_arr[idx] = t_cnt_arr[idx] + 1
                                strLog = "[True] : "
                                if (predlog == "T"):
                                    println(strLog + true_name + " FileName=" +
                                            file_name)
                                    println(retrun_data["key"])
                                    println(retrun_data["val"])
                            except:
                                f_cnt_arr[idx] = f_cnt_arr[idx] + 1
                                strLog = "[False] : "
                                if (predlog == "F"):
                                    println(strLog + true_name + " FileName=" +
                                            file_name)
                                    println(retrun_data["key"])
                                    println(retrun_data["val"])
                            if (predlog == "A"):
                                println(strLog + true_name + " FileName=" +
                                        file_name)
                                println(retrun_data["key"])
                                println(retrun_data["val"])

                        self.eval_data.set_result_info(true_name, pred_name)

                except Exception as e:
                    println(e)
                    println(
                        "None to restore checkpoint. Initializing variables instead."
                    )

            input_data.next()

        self.eval_print(labels, t_cnt_arr, f_cnt_arr)

    def eval_print(self, labels, t_cnt_arr, f_cnt_arr):
        println(
            "####################################################################################################"
        )
        result = []
        strResult = "['Eval ......................................................']"
        result.append(strResult)
        totCnt = 0
        tCnt = 0
        fCnt = 0
        for i in range(len(labels)):
            strResult = "Category : " + self.spaceprint(labels[i], 15) + " "
            strResult += "TotalCnt=" + self.spaceprint(
                str(t_cnt_arr[i] + f_cnt_arr[i]), 8) + " "
            strResult += "TrueCnt=" + self.spaceprint(str(t_cnt_arr[i]),
                                                      8) + " "
            strResult += "FalseCnt=" + self.spaceprint(str(f_cnt_arr[i]),
                                                       8) + " "
            if t_cnt_arr[i] + f_cnt_arr[i] != 0:
                strResult += "True Percent(TrueCnt/TotalCnt*100)=" + str(
                    round(t_cnt_arr[i] /
                          (t_cnt_arr[i] + f_cnt_arr[i]) * 100)) + "%"
            totCnt += t_cnt_arr[i] + f_cnt_arr[i]
            tCnt += t_cnt_arr[i]
            fCnt += f_cnt_arr[i]
            println(strResult)
            result.append(strResult)
        strResult = "---------------------------------------------------------------------------------------------------"
        println(strResult)
        strResult = "Total Category=" + self.spaceprint(str(len(labels)),
                                                        11) + " "
        strResult += "TotalCnt=" + self.spaceprint(str(totCnt), 8) + " "
        strResult += "TrueCnt=" + self.spaceprint(str(tCnt), 8) + " "
        strResult += "FalseCnt=" + self.spaceprint(str(fCnt), 8) + " "
        if totCnt != 0:
            strResult += "True Percent(TrueCnt/TotalCnt*100)=" + str(
                round(tCnt / totCnt * 100)) + "%"
        println(strResult)
        result.append(strResult)
        println(
            "###################################################################################################"
        )

    def predict(self, node_id, filelist):
        """
        """
        println("run NeuralNetNodeCnn Predict")
        # init data setup
        self._init_predict_parm(node_id)
        self._init_value()
        # net, data config setup
        data_node_name = self._get_backward_node_with_type(node_id, 'data')
        dataconf = WorkFlowNetConfCNN().get_view_obj(data_node_name[0])
        self._set_netconf_parm()
        self._set_dataconf_parm(dataconf)
        self.net_type = self.netconf["config"]["net_type"]

        # data shape change MultiValuDict -> nd array
        filename_arr, filedata_arr = self.change_predict_fileList(
            filelist, dataconf)

        # get unique key
        unique_key = '_'.join([node_id, self.get_eval_batch(node_id)])

        # prepare net conf
        tf.reset_default_graph()

        ## create tensorflow graph
        if (NeuralNetModel.dict.get(unique_key)):
            self = NeuralNetModel.dict.get(unique_key)
            graph = NeuralNetModel.graph.get(unique_key)
        else:
            if self.net_type == "cnn":
                self.get_model_cnn("P")
            elif self.net_type == "resnet":
                self.get_model_resnet()

            NeuralNetModel.dict[unique_key] = self
            NeuralNetModel.graph[unique_key] = tf.get_default_graph()
            graph = tf.get_default_graph()

        # predict
        with tf.Session(graph=graph) as sess:
            sess.run(tf.global_variables_initializer())

            for i in range(len(filename_arr)):
                file_name = filename_arr[i]
                file_data = filedata_arr[i]

                if self.net_type == "cnn":
                    sess, saver = self.get_saver_model(sess)
                    logits = sess.run([self.model],
                                      feed_dict={self.X: file_data})
                    logits = logits[0]
                elif self.net_type == "resnet":
                    logits = self.model.predict(file_data)

                labels = self.netconf["labels"]
                pred_cnt = self.netconf["param"]["predictcnt"]
                retrun_data = self.set_predict_return_cnn_img(
                    labels, logits, pred_cnt)
                self.pred_return_data[file_name] = retrun_data
                println("Return Data.......................................")
                println(self.pred_return_data)

        return self.pred_return_data
コード例 #16
0
    def eval(self, node_id, conf_data, data=None, result=None):
        """
            Tensorflow Wide and Deep Network Eval Method
        :param node_id:
        :param parm:
        :return: None
        """
        logging.info("eval_starting ------> {0}".format(node_id))
        try:
            #self._init_node_parm(conf_data['node_id'])
            #conf_data['node_id']
            #self._init_node_parm(conf_data['node_id'])
            self._init_node_parm(conf_data.get('nn_id') + "_" + conf_data.get('wf_ver')+ "_" + "netconf_node")
            self.cls_pool_all = conf_data['cls_pool']  # Data feeder


            graph = NNCommonManager().get_nn_node_name(conf_data['nn_id'])
            for net in graph:
                if net['fields']['graph_node'] == 'netconf_node':
                    netconf_node = net['fields']['graph_node_name']
            self.model_path = utils.get_model_path(conf_data['nn_id'], conf_data['wf_ver'], netconf_node)

            config = {"type": self.model_type, "labels": self.label_values, "nn_id":conf_data.get('nn_id'), "nn_wf_ver_id":conf_data.get('wf_ver')}
            train = TrainSummaryInfo(conf=config)
            print(config)
            self.batch_eval = self.get_eval_batch(node_id)
            #print(train)
            self.model_eval_path = ''.join([self.model_path + '/' + self.batch])


            for _k, _v in self.cls_pool_all.items():
                if 'test' in _k:
                    self.cls_pool = _v

                if 'evaldata' in _k:
                    self.multi_node_flag = _v.multi_node_flag

            #conf_data['cls_pool'].get('nn00001_1_pre_feed_fr2wdnn_test')
            logging.info("model_path : {0}".format(self.model_path))
            logging.info("hidden_layers : {0}".format(self.hidden_layers))
            logging.info("activation_function : {0}".format(self.activation_function))
            logging.info("batch_size : {0}".format(self.batch_size))
            logging.info("epoch : {0}".format(self.epoch))
            logging.info("model_type : {0}".format(self.model_type))
            logging.info("auto_demension : {0}".format(self.auto_demension))

            config_acc = {"nn_id": conf_data['node_id'], "nn_wf_ver_id": conf_data.get('wf_ver'),
                      "nn_batch_ver_id": self.batch}
            acc_result = TrainSummaryAccLossInfo(config_acc)

            data_conf_info = self.data_conf

            #validation_monitor = _LossCheckerHook(acc_result)

            # make wide & deep modelnot
            wdnn = NeuralCommonWdnn()
            wdnn_model = wdnn.wdnn_build(self.model_type, conf_data['node_id'], self.hidden_layers,
                                         str(self.activation_function), data_conf_info, str(self.model_eval_path),
                                         self.train, self.auto_demension)

            #, self.train, self.auto_demension

            # feed
            # TODO file이 여러개면 어떻하지?
            # get prev node for load data
            #data_node_name = self._get_backward_node_with_type(conf_data['node_id'], 'preprocess')
            #train_data_set = self.cls_pool[data_node_name[0]]  # get filename
            train_data_set = self.cls_pool  # get filename
            file_queue = str(train_data_set.input_paths[0])  # get file_name

            # file을 돌면서 최대 Row를 전부 들고 옴 tfrecord 총 record갯수 가져오는 방법필요

            _batch_size = self.batch_size
            _num_tfrecords_files = 0

            # multi Feeder modified
            multi_read_flag = self.multi_read_flag

            # Todo H5
            # train per files in folder h5용
            # if multi_file flag = no이면 기본이 h5임
            try:
                results = dict()
                ori_list = list()
                pre_list = list()

                while (train_data_set.has_next()):
                    logging.info("Wdnn eval process from h5")
                    # 파일이 하나 돌때마다
                    # for 배치사이즈와 파일의 총갯수를 가져다가 돌린다. -> 마지막에 뭐가 있을지 구분한다.
                    # 파일에 iter를 넣으면 배치만큼 가져오는 fn이 있음 그걸 __itemd에 넣고
                    # Input 펑션에서 multi를 vk판단해서 col와 ca를 구분한다.(이걸 배치마다 할 필요가 있나?)
                    # -> 그러면서 피팅
                    #
                    # # Iteration is to improve for Model Accuracy

                    # Per Line in file
                    # eval should be one line predict
                    #self.batch_size = 2

                    for i in range(0, train_data_set.data_size(), self.batch_size):

                        data_set = train_data_set[i:i + self.batch_size]

                        eval_result = wdnn_model.evaluate(
                           input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                                                                     data_set, data_conf_info), steps=200)
                        #print("model fitting h5 " + str(data_set))

                        if self.model_type == 'regression':
                            acc = eval_result['loss']
                            loss = eval_result['loss']
                            acc_result.loss_info["loss"].append(str(eval_result['loss']))
                            acc_result.acc_info["acc"].append(str(eval_result['loss']))
                        else:
                            acc = eval_result['accuracy']
                            loss = eval_result['loss']
                            acc_result.loss_info["loss"].append(str(eval_result['loss']))
                            acc_result.acc_info["acc"].append(str(eval_result['accuracy']))

                        # acc = eval_result['accuracy']
                        # loss = eval_result['loss']
                        # acc_result.loss_info["loss"].append(str(eval_result['loss']))
                        # acc_result.acc_info["acc"].append(str(eval_result['accuracy']))

                        predict_value = wdnn_model.predict(
                            input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                                                                      data_set, data_conf_info))

                        data_set_count = len(data_set.index)
                        predict_val_list = [_pv for _pv in predict_value]
                        predict_val_count = len(predict_val_list)

                        if (data_set_count != predict_val_count):
                            logging.error("wdnn eval error check : dataframe count({0}) predict count({1})".format(data_set_count, predict_val_count))
                            raise ValueError(
                                'eval data validation check error : dataframe and predict count is different(neuralnet_node_wdnn.eval)')

                        data_set['predict_label'] = predict_val_list #list(predict_value)
                        predict_y = list(data_set['predict_label'])


                        ori_list.extend(data_set[self.label].values.tolist())
                        pre_list.extend(list(data_set['predict_label']))

                        # model fitting
                        logging.info("wdnn eval ori list  : {0}".format(len(ori_list)) )
                        logging.info("wdnn eval pre list  : {0}".format(len(pre_list)) )

                    train_data_set.next()

                #TODO : 앞으로 옮기자
                train.set_nn_batch_ver_id(self.batch_eval)
                if self.model_type == "regression":
                    results['ori'] = ori_list
                    results['pre'] = pre_list
                    train.set_result_info(ori_list, pre_list)

                if (self.model_type == "category" or self.model_type == "deep"):
                    # tfrecord는 여기서 Label을 변경한다. 나중에 꺼낼때 답이 없음 Tensor 객체로 추출되기 때문에 그러나 H5는 feeder에서 변환해주자
                    le = LabelEncoder()
                    le.fit(self.label_values)

                    for _i, _ori in enumerate(ori_list):
                        #return_value = self.labels[np.argmax(model.predict(X_train))]
                        train.set_result_info(str(_ori), str(le.inverse_transform(pre_list[_i])))
                #return self.batch
            except Exception as e:
                print("eval error")
                print(e)
                raise Exception(e)

            logging.info("eval end")
        except Exception as oe:
            logging.info(oe)
            raise Exception(e)
        return train
コード例 #17
0
class NeuralNetNodeImage(NeuralNetNode):
    def lr_schedule(self, epoch):
        """Learning Rate Schedule
        Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
        Called automatically every epoch as part of callbacks during training.
        # Arguments
            epoch (int): The number of epochs
        # Returns
            lr (float32): learning rate
        """
        lr = 1e-3
        if epoch > 180:
            lr *= 0.5e-3
        elif epoch > 160:
            lr *= 1e-3
        elif epoch > 120:
            lr *= 1e-2
        elif epoch > 80:
            lr *= 1e-1

        return lr

    def keras_get_model(self):
        # keras.backend.tensorflow_backend.clear_session()
        backendK.clear_session()
        # if settings.GPU_FLAG == True:
        #     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
        #     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        #     backendK.set_session(sess)

        try:
            self.model = keras.models.load_model(self.last_chk_path)
            logging.info("Train Restored checkpoint from:" + self.last_chk_path)
        except Exception as e:
            logging.info("None to restore checkpoint. Initializing variables instead." + self.last_chk_path)
            logging.info(e)

            if self.optimizer == 'sgd':
                self.optimizer = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
            elif self.optimizer == 'rmsprop':
                self.optimizer = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=1e-6)
            elif self.optimizer == 'adagrad':
                self.optimizer = optimizers.Adagrad(lr=0.01, epsilon=1e-08, decay=1e-6)
            elif self.optimizer == 'adadelta':
                self.optimizer = optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=1e-08, decay=1e-6)
            elif self.optimizer == 'adam':
                self.optimizer = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-6)
                # self.optimizer = optimizers.Adam(lr=self.lr_schedule(0))
            elif self.optimizer == 'adamax':
                self.optimizer = optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-6)
            elif self.optimizer == 'nadam':
                self.optimizer = optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004)

            if self.net_type == 'inceptionv4':
                # self.labels_cnt = 1001
                self.model = inception_v4_model(self.labels_cnt, 0.2, self.pretrain_model_path)
            # elif self.net_type == 'nasnet':
            #     self.model = NASNetLarge(input_shape=(331, 331, 3))
            elif self.net_type == 'resnet':
                numoutputs = self.netconf["config"]["layeroutputs"]

                if numoutputs == 18:
                    self.model = resnet.ResnetBuilder.build_resnet_18((self.channel, self.x_size, self.y_size), self.labels_cnt)
                elif numoutputs == 34:
                    self.model = resnet.ResnetBuilder.build_resnet_34((self.channel, self.x_size, self.y_size), self.labels_cnt)
                elif numoutputs == 50:
                    self.model = resnet.ResnetBuilder.build_resnet_50((self.channel, self.x_size, self.y_size), self.labels_cnt)
                elif numoutputs == 101:
                    self.model = resnet.ResnetBuilder.build_resnet_101((self.channel, self.x_size, self.y_size), self.labels_cnt)
                elif numoutputs == 152:
                    self.model = resnet.ResnetBuilder.build_resnet_152((self.channel, self.x_size, self.y_size), self.labels_cnt)

            # if settings.GPU_FLAG == True:
            #     self.model = multi_gpu_model(self.model, gpus=1)
            self.model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=['accuracy'])
            # self.model.summary()

    def train_run_image(self, input_data, test_data):
        '''
        Train Run
        :param input_data:
        :param test_data:
        :return:
        '''
        self.epoch = self.netconf["param"]["epoch"]
        self.data_augmentation = self.netconf["param"]["augmentation"]
        try:
            self.fit_size = self.netconf["param"]["fit_size"]
        except:
            self.fit_size = 9999999999

        self.lr_scheduler = LearningRateScheduler(self.lr_schedule)
        self.lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6)
        self.early_stopper = EarlyStopping(monitor='val_acc', min_delta=0.001, patience=10)

        try:
            while_cnt = 0
            self.loss = 0
            self.acc = 0
            self.val_loss = 0
            self.val_acc = 0

            input_data.reset_pointer()
            test_data.reset_pointer()

            test_set = test_data[0:test_data.data_size()]
            x_tbatch = self.get_convert_img_x(test_set[0], self.x_size, self.y_size, self.channel) # img_data_batch
            y_tbatch = self.get_convert_img_y(test_set[1], self.labels, self.labels_cnt) # label_data_batch

            while (input_data.has_next()):
                run_size = 0
                while( run_size < input_data.data_size()):
                    if run_size + self.fit_size > input_data.data_size():
                        input_set = input_data[run_size:input_data.data_size()]
                    else:
                        input_set = input_data[run_size:run_size + self.fit_size]
                    run_size += self.fit_size + 1
                    x_batch = self.get_convert_img_x(input_set[0], self.x_size, self.y_size, self.channel)  # img_data_batch
                    y_batch = self.get_convert_img_y(input_set[1], self.labels, self.labels_cnt)  # label_data_batch

                    if len(x_batch) < self.batch_size:
                        self.batch_size = len(x_batch)

                    # # Normalize data.
                    # x_batch = x_batch.astype('float32') / 255
                    # x_tbatch = x_tbatch.astype('float32') / 255

                    # # If subtract pixel mean is enabled
                    # if self.subtract_pixel_mean:
                    #     x_train_mean = np.mean(x_batch, axis=0)
                    #     x_batch -= x_train_mean
                    #     x_tbatch -= x_train_mean

                    if self.data_augmentation == "N" or self.data_augmentation == "n":
                        history = self.model.fit(x_batch, y_batch,
                                                 batch_size=self.batch_size,
                                                 epochs=self.epoch,
                                                 validation_data=(x_tbatch, y_tbatch),
                                                 shuffle=True,
                                                 callbacks=[self.lr_reducer, self.early_stopper, self.lr_scheduler])
                    else:
                        # This will do preprocessing and realtime data augmentation:
                        datagen = ImageDataGenerator(
                            featurewise_center=False,  # set input mean to 0 over the dataset
                            samplewise_center=False,  # set each sample mean to 0
                            featurewise_std_normalization=False,  # divide inputs by std of the dataset
                            samplewise_std_normalization=False,  # divide each input by its std
                            zca_whitening=False,  # apply ZCA whitening
                            rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
                            width_shift_range=0.1, # randomly shift images horizontally (fraction of total width)
                            height_shift_range=0.1, # randomly shift images vertically (fraction of total height)
                            horizontal_flip=True,  # randomly flip images
                            vertical_flip=False)  # randomly flip images

                        # Compute quantities required for featurewise normalization
                        # (std, mean, and principal components if ZCA whitening is applied).
                        datagen.fit(x_batch)

                        # Fit the model on the batches generated by datagen.flow().
                        history = self.model.fit_generator(
                            datagen.flow(x_batch, y_batch, batch_size=self.batch_size),
                            validation_data=(x_tbatch, y_tbatch),
                            epochs=self.epoch, verbose=1, workers=5,
                            steps_per_epoch=x_batch.shape[0] // self.batch_size,
                            callbacks=[self.lr_reducer, self.early_stopper, self.lr_scheduler])

                    self.loss += history.history["loss"][len(history.history["loss"])-1]
                    self.acc += history.history["acc"][len(history.history["acc"])-1]
                    self.val_loss += history.history["val_loss"][len(history.history["val_loss"])-1]
                    self.val_acc += history.history["val_acc"][len(history.history["val_acc"])-1]

                    while_cnt += 1
                input_data.next()

            if while_cnt > 0:
                self.loss =self.loss/while_cnt
                self.acc = self.acc / while_cnt
                self.val_loss = self.val_loss / while_cnt
                self.val_acc = self.val_acc / while_cnt

        except Exception as e:
            logging.info("Error[400] ..............................................")
            logging.info(e)

    def run(self, conf_data):
        '''
        Train run init
        :param conf_data: 
        :return: 
        '''
        try :
            logging.info("run NeuralNetNodeImage Train")
            # Common Start #############################################################################################
            # init value
            self = NeuralNetNode()._init_node_parm(self, conf_data)

            # netconf
            self.netconf = WorkFlowNetConf().get_view_obj(self.node_id)

            # dataconf & get data
            input_data, self.dataconf = self.get_input_data(self.feed_node, self.cls_pool, self.train_feed_name)
            test_data, self.dataconf_eval = self.get_input_data(self.feed_node, self.cls_pool, self.eval_feed_name)
            # Common End ###############################################################################################

            # Label Setup (1: HDF label row)
            self.labels, self.labels_cnt = self._get_netconf_labels(self.netconf, input_data, 1)

            self.channel = self.dataconf["preprocess"]["channel"]
            self.x_size = self.dataconf["preprocess"]["x_size"]
            self.y_size = self.dataconf["preprocess"]["y_size"]
            self.train_cnt = self.netconf["param"]["traincnt"]
            self.batch_size = self.netconf["param"]["batch_size"]
            self.predlog = self.netconf["param"]["predictlog"]
            self.optimizer = self.netconf["config"]["optimizer"]
            # Subtracting pixel mean improves accuracy
            self.subtract_pixel_mean = True

            # get model
            self.keras_get_model()

            if self.train_cnt == 0:
                self.train_batch = self.load_batch
                # Eval & Result Save
                self.eval(self.node_id, self.conf_data, test_data, None)

                # Eval Result Print
                self.eval_result_print(self.eval_data, self.predlog)
            else:
                _, self.train_batch = self.make_batch(self.node_id)
                self.save_path = self.model_path + "/" + str(self.train_batch) + self.file_end

                # Acc & Loss Init
                config = {"nn_id": self.nn_id, "nn_wf_ver_id": self.nn_wf_ver_id,
                          "nn_batch_ver_id": self.train_batch}
                self.acc_loss_result = TrainSummaryAccLossInfo(config)

                for i in range(self.train_cnt):
                    # Train
                    self.train_run_image(input_data, test_data)

                    # Model Save :  _init_node_parm : self.save_path
                    keras.models.save_model(self.model, self.save_path)

                    # Acc Loss Save : _init_node_parm : self.acc_loss_result
                    self.set_acc_loss_result(self.acc_loss_result, self.loss, self.acc, self.val_loss, self.val_acc)

                    # Eval & Result Save
                    self.eval(self.node_id, self.conf_data, test_data, None)

                    # Eval Result Print
                    self.eval_result_print(self.eval_data, self.predlog)

            return self.eval_data
        except Exception as e :
            logging.info("===Error on Train  : {0}".format(e))

    ####################################################################################################################
    def eval(self, node_id, conf_data, data=None, result=None):
        '''
        eval run init
        :param node_id: 
        :param conf_data: 
        :param data: 
        :param result: 
        :return: 
        '''
        try :
            logging.info("run NeuralNetNodeImage eval")

            pred_cnt = self.netconf["param"]["predictcnt"]
            eval_type = self.netconf["config"]["eval_type"]

            # eval result
            config = {"type": eval_type, "labels": self.labels,
                      "nn_id": self.nn_id,
                      "nn_wf_ver_id": self.nn_wf_ver_id, "nn_batch_ver_id": self.train_batch}
            self.eval_data = TrainSummaryInfo(conf=config)

            if data is None:
                return self.eval_data

            data.reset_pointer()

            while (data.has_next()):
                data_set = data[0:data.data_size()]
                x_batch = self.get_convert_img_x(data_set[0], self.x_size, self.y_size, self.channel)  # img_data_batch

                # # Normalize data.
                # x_batch = x_batch.astype('float32') / 255

                # # If subtract pixel mean is enabled
                # if self.subtract_pixel_mean:
                #     x_train_mean = np.mean(x_batch, axis=0)
                #     x_batch -= x_train_mean

                logits = self.model.predict(x_batch)

                y_batch = self.get_convert_img_y_eval(data_set[1])
                n_batch = self.get_convert_img_y_eval(data_set[2]) # File Name

                for i in range(len(logits)):
                    true_name = y_batch[i]

                    logit = []
                    logit.append(logits[i])
                    retrun_data = self.set_predict_return_cnn_img(self.labels, logit, pred_cnt)
                    pred_name = retrun_data["key"]
                    pred_value = retrun_data["val"]
                    #예측값이 배열로 넘어온다 한개라도 맞으면참
                    t_pred_name = pred_name[0]
                    for p_cnt in range(pred_cnt):
                        if pred_name[p_cnt] == true_name:
                            t_pred_name = pred_name[p_cnt]

                    # eval result
                    self.eval_data.set_result_info(true_name, t_pred_name)

                    # Row log를 찍기위해서 호출한다.
                    file_name = n_batch[i]
                    self.eval_data.set_tf_log(file_name, true_name, pred_name, pred_value)

                data.next()

            # eval result
            if self.train_cnt != 0:
                TrainSummaryInfo.save_result_info(self, self.eval_data)

            return self.eval_data

        except Exception as e :
            logging.info("===Error on Eval  : {0}".format(e))

    ####################################################################################################################
    def predict(self, nn_id, ver, filelist):
        '''
        predict
        :param node_id: 
        :param filelist: 
        :return: 
        '''
        logging.info("run NeuralNetNodeImage Predict")
        self.subtract_pixel_mean = True
        self = NeuralNetNode()._init_pred_parm(self, nn_id, ver)
        # net   config setup
        self.netconf = WorkFlowNetConf().get_node_info(nn_id, ver, self.netconf_name)
        self.dataconf = WorkFlowNetConf().get_node_info(nn_id, ver, self.dataconf_name)

        # data shape change MultiValuDict -> nd array
        filename_arr, filedata_arr = self.change_predict_fileList(filelist, self.dataconf)

        # get unique key
        unique_key = '_'.join([str(nn_id), str(ver), self.load_batch])

        logging.info("getModelPath:"+self.model_path + "/" + self.load_batch+self.file_end)

        ## create tensorflow graph
        if (NeuralNetModel.dict.get(unique_key)):
            self = NeuralNetModel.dict.get(unique_key)
            # graph = NeuralNetModel.graph.get(unique_key)
        else:
            self.keras_get_model()

            NeuralNetModel.dict[unique_key] = self
            NeuralNetModel.graph[unique_key] = tf.get_default_graph()
            # graph = tf.get_default_graph()

        pred_return_data = {}
        for i in range(len(filename_arr)):
            file_name = filename_arr[i]
            file_data = filedata_arr[i]

            # # Normalize data.
            # file_data = file_data.astype('float32') / 255

            # # If subtract pixel mean is enabled
            # if self.subtract_pixel_mean:
            #     x_train_mean = np.mean(file_data, axis=0)
            #     file_data -= x_train_mean

            try:
                logits = self.model.predict(file_data)
            except Exception as e:
                self.keras_get_model()

                NeuralNetModel.dict[unique_key] = self
                NeuralNetModel.graph[unique_key] = tf.get_default_graph()
                # graph = tf.get_default_graph()
                logits = self.model.predict(file_data)

            labels = self.netconf["labels"]
            pred_cnt = self.netconf["param"]["predictcnt"]
            retrun_data = self.set_predict_return_cnn_img(labels, logits, pred_cnt)
            pred_return_data[file_name] = retrun_data
            logging.info("Return Data.......................................")
            logging.info(pred_return_data)

        return pred_return_data
コード例 #18
0
    def eval(self, node_id, conf_data, data=None, result=None):
        '''
        eval run init
        :param node_id: 
        :param conf_data: 
        :param data: 
        :param result: 
        :return: 
        '''
        try :
            logging.info("run NeuralNetNodeImage eval")

            pred_cnt = self.netconf["param"]["predictcnt"]
            eval_type = self.netconf["config"]["eval_type"]

            # eval result
            config = {"type": eval_type, "labels": self.labels,
                      "nn_id": self.nn_id,
                      "nn_wf_ver_id": self.nn_wf_ver_id, "nn_batch_ver_id": self.train_batch}
            self.eval_data = TrainSummaryInfo(conf=config)

            if data is None:
                return self.eval_data

            data.reset_pointer()

            while (data.has_next()):
                data_set = data[0:data.data_size()]
                x_batch = self.get_convert_img_x(data_set[0], self.x_size, self.y_size, self.channel)  # img_data_batch

                # # Normalize data.
                # x_batch = x_batch.astype('float32') / 255

                # # If subtract pixel mean is enabled
                # if self.subtract_pixel_mean:
                #     x_train_mean = np.mean(x_batch, axis=0)
                #     x_batch -= x_train_mean

                logits = self.model.predict(x_batch)

                y_batch = self.get_convert_img_y_eval(data_set[1])
                n_batch = self.get_convert_img_y_eval(data_set[2]) # File Name

                for i in range(len(logits)):
                    true_name = y_batch[i]

                    logit = []
                    logit.append(logits[i])
                    retrun_data = self.set_predict_return_cnn_img(self.labels, logit, pred_cnt)
                    pred_name = retrun_data["key"]
                    pred_value = retrun_data["val"]
                    #예측값이 배열로 넘어온다 한개라도 맞으면참
                    t_pred_name = pred_name[0]
                    for p_cnt in range(pred_cnt):
                        if pred_name[p_cnt] == true_name:
                            t_pred_name = pred_name[p_cnt]

                    # eval result
                    self.eval_data.set_result_info(true_name, t_pred_name)

                    # Row log를 찍기위해서 호출한다.
                    file_name = n_batch[i]
                    self.eval_data.set_tf_log(file_name, true_name, pred_name, pred_value)

                data.next()

            # eval result
            if self.train_cnt != 0:
                TrainSummaryInfo.save_result_info(self, self.eval_data)

            return self.eval_data

        except Exception as e :
            logging.info("===Error on Eval  : {0}".format(e))
コード例 #19
0
ファイル: ml_node.py プロジェクト: yyf013932/tensormsa
    def eval(self, node_id, conf_data, data=None, result=None):
        """
            Tensorflow Wide and Deep Network Eval Method
        :param node_id:
        :param parm:
        :return: None
        """
        logging.info("eval_starting ------> {0}".format(node_id))
        try:
            self._init_node_parm(
                conf_data.get('nn_id') + "_" + conf_data.get('wf_ver') + "_" +
                "netconf_node")
            self.cls_pool_all = conf_data['cls_pool']  # Data feeder

            graph = NNCommonManager().get_nn_node_name(conf_data['nn_id'])
            for net in graph:
                if net['fields']['graph_node'] == 'netconf_node':
                    netconf_node = net['fields']['graph_node_name']
            self.model_path = utils.get_model_path(conf_data['nn_id'],
                                                   conf_data['wf_ver'],
                                                   netconf_node)

            config = {
                "type": self.model_type,
                "labels": self.label_values,
                "nn_id": conf_data.get('nn_id'),
                "nn_wf_ver_id": conf_data.get('wf_ver')
            }
            train = TrainSummaryInfo(conf=config)
            print(config)
            self.batch_eval = self.get_eval_batch(node_id)
            self.model_eval_path = ''.join(
                [self.model_path + '/' + self.batch])

            for _k, _v in self.cls_pool_all.items():
                if 'test' in _k:
                    self.cls_pool = _v

                if 'evaldata' in _k:
                    self.multi_node_flag = _v.multi_node_flag

            logging.info("model_path : {0}".format(self.model_path))
            logging.info("ml_class : {0}".format(self.ml_class))
            logging.info("config : {0}".format(self.config))

            config_acc = {
                "nn_id": conf_data['node_id'],
                "nn_wf_ver_id": conf_data.get('wf_ver'),
                "nn_batch_ver_id": self.batch
            }
            acc_result = TrainSummaryAccLossInfo(config_acc)

            data_conf_info = self.data_conf

            # make ML modelnot
            clf = joblib.load(self.model_path + '/model.pkl')

            # feed
            # TODO file이 여러개면 어떻하지?
            # get prev node for load data
            train_data_set = self.cls_pool  # get filename
            file_queue = str(train_data_set.input_paths[0])  # get file_name

            # file을 돌면서 최대 Row를 전부 들고 옴 tfrecord 총 record갯수 가져오는 방법필요

            _batch_size = self.batch_size
            _num_tfrecords_files = 0

            # multi Feeder modified
            multi_read_flag = self.multi_read_flag

            # Todo H5
            # train per files in folder h5용
            # if multi_file flag = no이면 기본이 h5임
            try:
                results = dict()
                ori_list = list()
                pre_list = list()

                while (train_data_set.has_next()):
                    logging.info("Wdnn eval process from h5")
                    # 파일이 하나 돌때마다
                    # for 배치사이즈와 파일의 총갯수를 가져다가 돌린다. -> 마지막에 뭐가 있을지 구분한다.
                    # 파일에 iter를 넣으면 배치만큼 가져오는 fn이 있음 그걸 __itemd에 넣고
                    # Input 펑션에서 multi를 vk판단해서 col와 ca를 구분한다.(이걸 배치마다 할 필요가 있나?)
                    # -> 그러면서 피팅
                    #
                    # # Iteration is to improve for Model Accuracy

                    # Per Line in file
                    # eval should be one line predict

                    for i in range(0, train_data_set.data_size(),
                                   self.batch_size):

                        data_set = train_data_set[i:i + self.batch_size]
                        keys = list(data_conf_info['cell_feature'].keys())
                        keys.remove(data_conf_info['label'])
                        keys = np.asarray(keys)
                        data = data_set[keys].values
                        label = data_set[data_conf_info['label']].values
                        acc = cross_val_score(clf,
                                              data,
                                              label,
                                              scoring='accuracy').mean()
                        loss = cross_val_score(clf,
                                               data,
                                               label,
                                               scoring='neg_log_loss').mean()
                        # acc = eval_result['accuracy']
                        # loss = eval_result['loss']
                        acc_result.loss_info["loss"].append(str(acc))
                        acc_result.acc_info["acc"].append(str(loss))
                        iris = load_iris()
                        predict_val_list = list()

                        for row in data:
                            row = [row]
                            predict_value = clf.predict(row)
                            predict_val_list.append(predict_value)

                        # predict_value = clf.predict(
                        #     input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                        #                                               data_set, data_conf_info))

                        data_set_count = len(data_set.index)
                        #predict_val_list = [_pv for _pv in predict_value]
                        predict_val_count = len(predict_val_list)

                        if (data_set_count != predict_val_count):
                            logging.error(
                                "ML eval error check : dataframe count({0}) predict count({1})"
                                .format(data_set_count, predict_val_count))
                            raise ValueError(
                                'eval data validation check error : dataframe and predict count is different(neuralnet_node_wdnn.eval)'
                            )

                        data_set['predict_label'] = predict_val_list
                        predict_y = list(data_set['predict_label'])

                        ori_list.extend(data_set[self.label].values.tolist())
                        pre_list.extend(list(data_set['predict_label']))

                        # model fitting
                        logging.info("ML eval ori list  : {0}".format(
                            len(ori_list)))
                        logging.info("ML eval pre list  : {0}".format(
                            len(pre_list)))

                    train_data_set.next()

                #TODO : 앞으로 옮기자
                train.set_nn_batch_ver_id(self.batch_eval)
                if self.model_type == "regression":
                    results['ori'] = ori_list
                    results['pre'] = pre_list
                    train.set_result_info(ori_list, pre_list)

                if (self.model_type == "category"
                        or self.model_type == "deep"):
                    # tfrecord는 여기서 Label을 변경한다. 나중에 꺼낼때 답이 없음 Tensor 객체로 추출되기 때문에 그러나 H5는 feeder에서 변환해주자
                    le = LabelEncoder()
                    le.fit(self.label_values)

                    for _i, _ori in enumerate(ori_list):
                        #return_value = self.labels[np.argmax(model.predict(X_train))]
                        #train.set_result_info(str(_ori), str(le.inverse_transform(pre_list[_i])))
                        train.set_result_info(str(_ori), str(pre_list[_i][0]))
                #return self.batch
            except Exception as e:
                print("eval error")
                print(e)
                raise Exception(e)

            logging.info("eval end")
        except Exception as oe:
            logging.info(oe)
            raise Exception(e)
        return train