Beispiel #1
0
    def predict2(self, nn_id, conf_data, parm={}):

        # model build
        #
        # self._init_node_parm(nn_id)

        # data_conf_info = WorkflowDataConfFrame(nn_id + "_" + ver + "_" + "dataconf_node").data_conf
        try:
            node_id = conf_data['node_id']
            netconf = conf_data['net_conf']
            dataconf = conf_data['data_conf']

            # make wide & deep model
            wdnn = NeuralCommonWdnn()
            # wdnn_model = wdnn.wdnn_predict_build('wdnn', nn_id, netconf['hidden_layers'], netconf['activation_function'], '', netconf['model_path'], False)

            wdnn_model = wdnn.wdnn_build('wdnn', nn_id,
                                         netconf['hidden_layers'],
                                         netconf['activation_function'],
                                         dataconf['data_conf'],
                                         netconf['model_path'], False)

            label_column = list(dataconf['data_conf']["label"].keys())[0]

            # data -> csv (pandas)
            df = pd.read_csv(
                tf.gfile.Open('/hoya_src_root/adultest.data'),
                # names=COLUMNS,
                skipinitialspace=True,
                engine="python")

            # df['label'] = (df[label_column].apply(lambda x: "Y" in x)).astype(int)
            # df['label'] = (df['income_bracket'].apply(lambda x: '>50K' in x)).astype(int)

            # predict
            #    def input_fn(self, df, nnid, dataconf ):
            predict_results = wdnn_model.predict(
                input_fn=lambda: wdnn.input_fn(df, nn_id, dataconf['data_conf']
                                               ))
            df['label'] = list(predict_results)

            return None
        except Exception as e:
            raise Exception(e)
        return None
    def predict(self, nn_id, conf_data, parm = {}):

        # model build
        #
        # self._init_node_parm(nn_id)

        # data_conf_info = WorkflowDataConfFrame(nn_id + "_" + ver + "_" + "dataconf_node").data_conf
        try:
            node_id = conf_data['node_id']
            netconf = conf_data['net_conf']
            dataconf = conf_data['data_conf']

            # make wide & deep model
            wdnn = NeuralCommonWdnn()
            # wdnn_model = wdnn.wdnn_predict_build('wdnn', nn_id, netconf['hidden_layers'], netconf['activation_function'], '', netconf['model_path'], False)

            wdnn_model = wdnn.wdnn_build('wdnn', nn_id, netconf['hidden_layers'],netconf['activation_function'],dataconf['data_conf'], netconf['model_path'], False)

            label_column = list(dataconf['data_conf']["label"].keys())[0]

            # data -> csv (pandas)
            df = pd.read_csv( tf.gfile.Open('/hoya_src_root/adultest.data'),
                             # names=COLUMNS,
                              skipinitialspace=True,
                              engine="python")

            # df['label'] = (df[label_column].apply(lambda x: "Y" in x)).astype(int)
            # df['label'] = (df['income_bracket'].apply(lambda x: '>50K' in x)).astype(int)


            # predict
            #    def input_fn(self, df, nnid, dataconf ):
            predict_results = wdnn_model.predict(input_fn=lambda: wdnn.input_fn( df, nn_id, dataconf['data_conf']))
            df['label'] = list(predict_results)

            return None
        except Exception as e:
            raise Exception(e)
        return None
    def eval(self, node_id, conf_data, data=None, result=None):
        """

        :param node_id:
        :param parm:
        :return:
        """
        logging.info("eval_data")

        self._init_node_parm(node_id.split('_')[0] + "_" + node_id.split('_')[1]+ "_" + "netconf_node")
        self.cls_pool_all = conf_data['cls_pool']  # Data feeder

        config = {"type": self.model_type, "labels": self.label_values, "nn_id":conf_data.get('nn_id'), "nn_wf_ver_id":conf_data.get('wf_ver')}
        train = TrainSummaryInfo(conf=config)
        print(config)
        self.batch = self.get_eval_batch(node_id)
        #print(train)
        self.model_eval_path = ''.join([self.model_path + '/' + self.batch])


        for _k, _v in self.cls_pool_all.items():
            if 'test' in _k:
                self.cls_pool = _v

            if 'evaldata' in _k:
                self.multi_node_flag = _v.multi_node_flag

        #conf_data['cls_pool'].get('nn00001_1_pre_feed_fr2wdnn_test')
        print("model_path : " + str(self.model_path))
        print("hidden_layers : " + str(self.hidden_layers))
        print("activation_function : " + str(self.activation_function))
        print("batch_size : " + str(self.batch_size))
        print("epoch : " + str(self.epoch))
        print("model_type : " + str(self.model_type))

        # data_store_path = WorkFlowDataFrame(conf_data['nn_id']+"_"+conf_data['wf_ver']+"_"+ "data_node").step_store
        data_conf_info = self.data_conf

        # make wide & deep model
        wdnn = NeuralCommonWdnn()
        wdnn_model = wdnn.wdnn_build(self.model_type, conf_data['node_id'], self.hidden_layers,
                                     str(self.activation_function), data_conf_info, str(self.model_eval_path))

        # feed
        # TODO file이 여러개면 어떻하지?
        # get prev node for load data
        #data_node_name = self._get_backward_node_with_type(conf_data['node_id'], 'preprocess')
        #train_data_set = self.cls_pool[data_node_name[0]]  # get filename
        train_data_set = self.cls_pool  # get filename
        file_queue = str(train_data_set.input_paths[0])  # get file_name

        # file을 돌면서 최대 Row를 전부 들고 옴 tfrecord 총 record갯수 가져오는 방법필요

        _batch_size = self.batch_size
        _num_tfrecords_files = 0

        # multi Feeder modified
        multi_read_flag = self.multi_read_flag

        # Todo H5
        # train per files in folder h5용
        # if multi_file flag = no이면 기본이 h5임
        try:
            results = dict()
            ori_list = list()
            pre_list = list()

            while (train_data_set.has_next()):
                print("h5")
                # 파일이 하나 돌때마다
                # for 배치사이즈와 파일의 총갯수를 가져다가 돌린다. -> 마지막에 뭐가 있을지 구분한다.
                # 파일에 iter를 넣으면 배치만큼 가져오는 fn이 있음 그걸 __itemd에 넣고
                # Input 펑션에서 multi를 vk판단해서 col와 ca를 구분한다.(이걸 배치마다 할 필요가 있나?)
                # -> 그러면서 피팅
                #
                # # Iteration is to improve for Model Accuracy

                # Per Line in file
                # eval should be one line predict
                #self.batch_size = 2

                for i in range(0, train_data_set.data_size(), self.batch_size):

                    data_set = train_data_set[i:i + self.batch_size]
                    #if i == 0:
                    #eval_data_Set = data_set
                    # input_fn2(self, mode, data_file, df, nnid, dataconf):
                    predict_value = wdnn_model.predict(
                        input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                                                                  data_set, data_conf_info))

                    data_set_count = len(data_set.index)
                    predict_val_list = [_pv for _pv in predict_value]
                    predict_val_count = len(predict_val_list)

                    if (data_set_count != predict_val_count):
                        logging.error("wdnn eval error check : dataframe count({0}) predict count({1})".format(data_set_count, predict_val_count))
                        raise ValueError(
                            'eval data validation check error : dataframe and predict count is different(neuralnet_node_wdnn.eval)')

                    data_set['predict_label'] = predict_val_list #list(predict_value)
                    #_predict = list(predict_value)
                    predict_y = list(data_set['predict_label'])


                    ori_list.extend(data_set[self.label].values.tolist())
                    pre_list.extend(list(data_set['predict_label']))

                    # model fitting
                    print(len(ori_list))
                    print(len(pre_list))
                    #logging.error("wdnn eval ori list  : {0}".format(ori_list) )
                    logging.info("wdnn eval ori list  : {0}".format(len(ori_list)) )
                    #logging.info("wdnn eval ori list  : {0}".format('info'))
                    #logging.debug("wdnn eval ori list  : {0}".format('debug'))
                    #logging.critical("wdnn eval ori list  : {0}".format('critical'))
                    #print("model fitting h5 " + str(data_set))
                # #Select Next file
                train_data_set.next()

            #TODO : 앞으로 옮기자
            train.set_nn_batch_ver_id(self.batch)
            if self.model_type == "regression":
                results['ori'] = ori_list
                results['pre'] = pre_list
                train.set_result_info(ori_list, pre_list)

            if self.model_type == "category":
                # tfrecord는 여기서 Label을 변경한다. 나중에 꺼낼때 답이 없음 Tensor 객체로 추출되기 때문에 그러나 H5는 feeder에서 변환해주자
                le = LabelEncoder()
                le.fit(self.label_values)

                for _i, _ori in enumerate(ori_list):
                    #return_value = self.labels[np.argmax(model.predict(X_train))]
                    train.set_result_info(str(_ori), str(le.inverse_transform(pre_list[_i])))
            #return self.batch
        except Exception as e:
            print("eval error")
            print(e)
            raise Exception(e)

        logging.info("eval end")
        return train
Beispiel #4
0
    def predict(self, node_id, ver, parm, data=None, result=None):
        """ Wdnn predict 
            batchlist info에서 active flag가 Y인 Model을 가져와서 예측을 함 

        Args:
          params: 
            * node_id
            * conf_data

        Returns:
            none

        Raises:

        Example

        """
        try:
            logging.info("wdnn predict_start nnid : {0}".format(node_id))
            _node_id = node_id + "_" + ver + "_" + "netconf_node"

            _data_conf_id = node_id + "_" + ver + "_dataconf_node"
            self._init_node_parm(_node_id)
            #self.cls_pool_all = conf_data['cls_pool']  # Data feeder

            config = {
                "type": self.model_type,
                "labels": self.label_values,
                "nn_id": node_id,
                "nn_wf_ver_id": ver
            }
            train = TrainSummaryInfo(conf=config)
            #print(config)
            self.batch = self.get_active_batch(_node_id)
            #print(train)
            self.model_predict_path = ''.join(
                [self.model_path + '/' + self.batch])
            self.multi_node_flag = False

            conf_data = {}
            conf_data['node_id'] = _node_id

            #conf_data['cls_pool'].get('nn00001_1_pre_feed_fr2wdnn_test')
            print("model_path : " + str(self.model_path))
            print("hidden_layers : " + str(self.hidden_layers))
            print("activation_function : " + str(self.activation_function))
            print("batch_size : " + str(self.batch_size))
            print("epoch : " + str(self.epoch))
            print("model_type : " + str(self.model_type))

            # data_store_path = WorkFlowDataFrame(conf_data['nn_id']+"_"+conf_data['wf_ver']+"_"+ "data_node").step_store
            data_conf_info = self.data_conf

            # make wide & deep model
            wdnn = NeuralCommonWdnn()
            wdnn_model = wdnn.wdnn_build(self.model_type, node_id,
                                         self.hidden_layers,
                                         str(self.activation_function),
                                         data_conf_info,
                                         str(self.model_predict_path))

            # feed
            # TODO file이 여러개면 어떻하지?
            filelist = sorted(parm.items())
            #train_data_set = self.cls_pool  # get filename
            #file_queue = str(train_data_set.input_paths[0])  # get file_name

            # file을 돌면서 최대 Row를 전부 들고 옴 tfrecord 총 record갯수 가져오는 방법필요

            _batch_size = self.batch_size
            _num_tfrecords_files = 0

            # multi Feeder modified
            multi_read_flag = self.multi_read_flag

            # Todo H5
            # train per files in folder h5용
            # if multi_file flag = no이면 기본이 h5임

            results = dict()
            ori_list = list()
            pre_list = list()
            #self.batch_size = 5
            for filename in filelist:
                print("h5")
                #feeder = PreNodeFeedFr2Wdnn().set_for_predict(_data_conf_id)
                feeder = PreNodeFeedFr2Wdnn()
                #_data_conf_id
                #set_for_predict
                feeder.set_for_predict(_data_conf_id)
                data_node = DataNodeFrame()
                train_data_set = data_node.load_csv_by_pandas(
                    self.predict_path + "/" + filename[1].name)

                #feeder.set_input_paths([self.predict_path + "/" + filename[1].name])
                #train_data_set = feeder
                #_size = train_data_set
                # 파일이 하나 돌때마다
                # for 배치사이즈와 파일의 총갯수를 가져다가 돌린다. -> 마지막에 뭐가 있을지 구분한다.
                # 파일에 iter를 넣으면 배치만큼 가져오는 fn이 있음 그걸 __itemd에 넣고
                # Input 펑션에서 multi를 vk판단해서 col와 ca를 구분한다.(이걸 배치마다 할 필요가 있나?)
                # -> 그러면서 피팅
                #
                # # Iteration is to improve for Model Accuracy

                # Per Line in file
                # eval should be one line predict
                #self.batch_size = 2

                #train_date를 어떻게 가져오냐가 문제

                result_df = pd.DataFrame()

                for i in range(0, len(train_data_set.index), self.batch_size):

                    data_set = train_data_set[i:i + self.batch_size]
                    #if i == 0:
                    #eval_data_Set = data_set
                    # input_fn2(self, mode, data_file, df, nnid, dataconf):
                    predict_value = wdnn_model.predict(
                        input_fn=lambda: feeder.input_fn2(
                            tf.contrib.learn.ModeKeys.TRAIN, filename,
                            data_set, data_conf_info))

                    data_set_count = len(data_set.index)
                    predict_val_list = [_pv for _pv in predict_value]
                    predict_val_count = len(predict_val_list)

                    if (data_set_count != predict_val_count):
                        logging.error(
                            "wdnn eval error check : dataframe count({0}) predict count({1})"
                            .format(data_set_count, predict_val_count))
                        raise ValueError(
                            'eval data validation check error : dataframe and predict count is different(neuralnet_node_wdnn.eval)'
                        )

                    data_set[
                        'predict_label'] = predict_val_list  #list(predict_value)
                    #_predict = list(predict_value)
                    predict_y = list(data_set['predict_label'])
                    #pd.concat(result_df, data_set)
                    result_df = result_df.append(data_set)
                    ori_list.extend(data_set[self.label].values.tolist())
                    pre_list.extend(list(data_set['predict_label']))

                    # model fitting
                    print(len(ori_list))
                    print(len(pre_list))
                    #logging.error("wdnn eval ori list  : {0}".format(ori_list) )
                    logging.info("wdnn eval ori list  : {0}".format(
                        len(ori_list)))
                    #logging.info("wdnn eval ori list  : {0}".format('info'))
                    #logging.debug("wdnn eval ori list  : {0}".format('debug'))
                    #logging.critical("wdnn eval ori list  : {0}".format('critical'))
                    #print("model fitting h5 " + str(data_set))
                # #Select Next file

                #train_data_set.next()

            predict_result_dir = utils.make_and_exist_directory(
                self.predict_path + "/" + "result" + "/")
            predict_result_filename = predict_result_dir + "result_" + strftime(
                "%Y-%m-%d-%H:%M:%S", gmtime()) + ".csv"
            result_df.to_csv(predict_result_filename)
            #os.remove(self.predict_path + "/" + filename[1].name)

            # #TODO : 앞으로 옮기자
            # train.set_nn_batch_ver_id(self.batch)
            # if self.model_type == "regression":
            #     results['ori'] = ori_list
            #     results['pre'] = pre_list
            #     train.set_result_info(ori_list, pre_list)
            #
            # if self.model_type == "category":
            #     # tfrecord는 여기서 Label을 변경한다. 나중에 꺼낼때 답이 없음 Tensor 객체로 추출되기 때문에 그러나 H5는 feeder에서 변환해주자
            #     le = LabelEncoder()
            #     le.fit(self.label_values)
            #
            #     for _i, _ori in enumerate(ori_list):
            #         #return_value = self.labels[np.argmax(model.predict(X_train))]
            #         train.set_result_info(str(_ori), str(le.inverse_transform(pre_list[_i])))
            #return self.batch

            logging.info("eval end")
            return train
        except Exception as e:
            logging.error("Wdnn predict error {0}".format(e))

            raise Exception(e)
Beispiel #5
0
    def run(self, conf_data):
        logging.info("NeuralNetNodeWdnn Run called")
        #return None
        #return None
        """
                Wide & Deep Network Training
                :param nnid : network id in tfmsacore_nninfo
                :return: acturacy
        """
        try:
            self._init_node_parm(conf_data['node_id'])
            if self.train == False:
                return None
            self.cls_pool = conf_data['cls_pool']  # Data feeder

            self.train_batch, self.batch = self.make_batch(
                conf_data['node_id'])  #makebatch

            self.before_train_batch = self.get_before_make_batch(
                conf_data['node_id'], self.batch)  #before train batch

            if self.before_train_batch != None:
                self.model_train_before_path = ''.join([
                    self.model_path + '/' +
                    str(self.before_train_batch.nn_batch_ver_id)
                ])

            if self.train_batch == None:
                self.model_train_path = ''.join(
                    [self.model_path + '/' + self.batch])
            else:
                self.model_train_path = ''.join(
                    [self.model_path + '/' + self.train_batch])

            #model file copy
            if self.before_train_batch != None:
                src = self.model_train_before_path
                dst = self.model_train_path
                utils.copy_all(src, dst)

            logging.info("model_path : {0} ".format(self.model_path))
            logging.info("hidden_layers : {0} ".format(self.hidden_layers))
            logging.info("activation_function : {0} ".format(
                self.activation_function))
            logging.info("batch_size : {0} ".format(self.batch_size))
            logging.info("epoch : {0} ".format(self.epoch))
            logging.info("model_type : {0} ".format(self.model_type))

            data_conf_info = self.data_conf

            # make wide & deep model
            wdnn = NeuralCommonWdnn()
            wdnn_model = wdnn.wdnn_build(self.model_type, conf_data['node_id'],
                                         self.hidden_layers,
                                         str(self.activation_function),
                                         data_conf_info,
                                         str(self.model_train_path),
                                         self.train, self.auto_demension)

            #feed
            # TODO file이 여러개면 어떻하지?
            # get prev node for load data
            data_node_name = self._get_backward_node_with_type(
                conf_data['node_id'], 'preprocess')
            train_data_set = self.cls_pool[data_node_name[0]]  #get filename
            file_queue = str(train_data_set.input_paths[0])  #get file_name

            #file을 돌면서 최대 Row를 전부 들고 옴 tfrecord 총 record갯수 가져오는 방법필요

            _batch_size = self.batch_size
            _num_tfrecords_files = 0
            #_batch_size = 2

            #multi Feeder modified
            multi_read_flag = self.multi_read_flag
            if multi_read_flag == True:
                for index, fn in enumerate(train_data_set.input_paths):
                    _num_tfrecords_files += self.generator_len(
                        tf.python_io.tf_record_iterator(
                            fn))  # get length of generators
                print("total loop " +
                      str(math.ceil(_num_tfrecords_files / _batch_size)))

                for index in range(
                        int(math.ceil(_num_tfrecords_files / _batch_size))):
                    print("number of for loop " + str(index))
                    wdnn_model.fit(input_fn=lambda: train_data_set.input_fn(
                        tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                        _batch_size),
                                   steps=self.epoch)

                results = wdnn_model.evaluate(
                    input_fn=lambda: train_data_set.input_fn(
                        tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                        _batch_size),
                    steps=1)
            else:
                #Todo H5
                # train per files in folder h5용
                #if multi_file flag = no이면 기본이 h5임
                while (train_data_set.has_next()):
                    print("h5")
                    #파일이 하나 돌때마다
                    #for 배치사이즈와 파일의 총갯수를 가져다가 돌린다. -> 마지막에 뭐가 있을지 구분한다.
                    #파일에 iter를 넣으면 배치만큼 가져오는 fn이 있음 그걸 __itemd에 넣고
                    # Input 펑션에서 multi를 vk판단해서 col와 ca를 구분한다.(이걸 배치마다 할 필요가 있나?)
                    # -> 그러면서 피팅
                    #
                    # # Iteration is to improve for Model Accuracy

                    # Per Line in file
                    for i in range(0, train_data_set.data_size(),
                                   self.batch_size):

                        data_set = train_data_set[i:i + self.batch_size]
                        if i == 0:
                            eval_data_Set = data_set
                        #input_fn2(self, mode, data_file, df, nnid, dataconf):
                        wdnn_model.fit(
                            input_fn=lambda: train_data_set.input_fn2(
                                tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                                data_set, data_conf_info),
                            steps=200)
                        #model fitting
                        print("model fitting h5 " + str(data_set))
                    # #Select Next file
                    train_data_set.next()
                results = dict()

                #results = wdnn_model.evaluate(
                #input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                #                                         eval_data_Set, data_conf_info), steps=200)

            for key in sorted(results):
                print("%s: %s" % (key, results[key]))
                logging.info("train data eval result : {0} : {1}".format(
                    key, results[key]))

            #feature_map, target = train_data_set.input_fn(tf.contrib.learn.ModeKeys.TRAIN, file_queue, 128)
            print("end")
            #with tf.Session() as sess:
            return None
        except Exception as e:
            logging.info("[Wide and Deep Train Process] : {0}".format(e))
            raise Exception(e)
Beispiel #6
0
    def eval(self, node_id, conf_data, data=None, result=None):
        """

        :param node_id:
        :param parm:
        :return:
        """
        logging.info("eval_starting ------> {0}".format(node_id))

        self._init_node_parm(
            node_id.split('_')[0] + "_" + node_id.split('_')[1] + "_" +
            "netconf_node")
        self.cls_pool_all = conf_data['cls_pool']  # Data feeder

        config = {
            "type": self.model_type,
            "labels": self.label_values,
            "nn_id": conf_data.get('nn_id'),
            "nn_wf_ver_id": conf_data.get('wf_ver')
        }
        train = TrainSummaryInfo(conf=config)
        print(config)
        self.batch = self.get_eval_batch(node_id)
        #print(train)
        self.model_eval_path = ''.join([self.model_path + '/' + self.batch])

        for _k, _v in self.cls_pool_all.items():
            if 'test' in _k:
                self.cls_pool = _v

            if 'evaldata' in _k:
                self.multi_node_flag = _v.multi_node_flag

        #conf_data['cls_pool'].get('nn00001_1_pre_feed_fr2wdnn_test')
        logging.info("model_path : {0}".format(self.model_path))
        print("hidden_layers : {0}".format(self.hidden_layers))
        print("activation_function : {0}".format(self.activation_function))
        print("batch_size : {0}".format(self.batch_size))
        print("epoch : {0}".format(self.epoch))
        print("model_type : {0}".format(self.model_type))

        # data_store_path = WorkFlowDataFrame(conf_data['nn_id']+"_"+conf_data['wf_ver']+"_"+ "data_node").step_store
        data_conf_info = self.data_conf

        # make wide & deep modelnot
        wdnn = NeuralCommonWdnn()
        wdnn_model = wdnn.wdnn_build(self.model_type, conf_data['node_id'],
                                     self.hidden_layers,
                                     str(self.activation_function),
                                     data_conf_info, str(self.model_eval_path))

        # feed
        # TODO file이 여러개면 어떻하지?
        # get prev node for load data
        #data_node_name = self._get_backward_node_with_type(conf_data['node_id'], 'preprocess')
        #train_data_set = self.cls_pool[data_node_name[0]]  # get filename
        train_data_set = self.cls_pool  # get filename
        file_queue = str(train_data_set.input_paths[0])  # get file_name

        # file을 돌면서 최대 Row를 전부 들고 옴 tfrecord 총 record갯수 가져오는 방법필요

        _batch_size = self.batch_size
        _num_tfrecords_files = 0

        # multi Feeder modified
        multi_read_flag = self.multi_read_flag

        # Todo H5
        # train per files in folder h5용
        # if multi_file flag = no이면 기본이 h5임
        try:
            results = dict()
            ori_list = list()
            pre_list = list()

            while (train_data_set.has_next()):
                print("h5")
                # 파일이 하나 돌때마다
                # for 배치사이즈와 파일의 총갯수를 가져다가 돌린다. -> 마지막에 뭐가 있을지 구분한다.
                # 파일에 iter를 넣으면 배치만큼 가져오는 fn이 있음 그걸 __itemd에 넣고
                # Input 펑션에서 multi를 vk판단해서 col와 ca를 구분한다.(이걸 배치마다 할 필요가 있나?)
                # -> 그러면서 피팅
                #
                # # Iteration is to improve for Model Accuracy

                # Per Line in file
                # eval should be one line predict
                #self.batch_size = 2

                for i in range(0, train_data_set.data_size(), self.batch_size):

                    data_set = train_data_set[i:i + self.batch_size]
                    #if i == 0:
                    #eval_data_Set = data_set
                    # input_fn2(self, mode, data_file, df, nnid, dataconf):
                    predict_value = wdnn_model.predict(
                        input_fn=lambda: train_data_set.input_fn2(
                            tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                            data_set, data_conf_info))

                    data_set_count = len(data_set.index)
                    predict_val_list = [_pv for _pv in predict_value]
                    predict_val_count = len(predict_val_list)

                    if (data_set_count != predict_val_count):
                        logging.error(
                            "wdnn eval error check : dataframe count({0}) predict count({1})"
                            .format(data_set_count, predict_val_count))
                        raise ValueError(
                            'eval data validation check error : dataframe and predict count is different(neuralnet_node_wdnn.eval)'
                        )

                    data_set[
                        'predict_label'] = predict_val_list  #list(predict_value)
                    #_predict = list(predict_value)
                    predict_y = list(data_set['predict_label'])

                    ori_list.extend(data_set[self.label].values.tolist())
                    pre_list.extend(list(data_set['predict_label']))

                    # model fitting
                    print(len(ori_list))
                    print(len(pre_list))
                    #logging.error("wdnn eval ori list  : {0}".format(ori_list) )
                    logging.info("wdnn eval ori list  : {0}".format(
                        len(ori_list)))
                    #logging.info("wdnn eval ori list  : {0}".format('info'))
                    #logging.debug("wdnn eval ori list  : {0}".format('debug'))
                    #logging.critical("wdnn eval ori list  : {0}".format('critical'))
                    #print("model fitting h5 " + str(data_set))
                # #Select Next file
                train_data_set.next()

            #TODO : 앞으로 옮기자
            train.set_nn_batch_ver_id(self.batch)
            if self.model_type == "regression":
                results['ori'] = ori_list
                results['pre'] = pre_list
                train.set_result_info(ori_list, pre_list)

            if self.model_type == "category":
                # tfrecord는 여기서 Label을 변경한다. 나중에 꺼낼때 답이 없음 Tensor 객체로 추출되기 때문에 그러나 H5는 feeder에서 변환해주자
                le = LabelEncoder()
                le.fit(self.label_values)

                for _i, _ori in enumerate(ori_list):
                    #return_value = self.labels[np.argmax(model.predict(X_train))]
                    train.set_result_info(
                        str(_ori), str(le.inverse_transform(pre_list[_i])))
            #return self.batch
        except Exception as e:
            print("eval error")
            print(e)
            raise Exception(e)

        logging.info("eval end")
        return train
    def predict(self, node_id,ver, parm, data=None, result=None):

        """ Wdnn predict 
            batchlist info에서 active flag가 Y인 Model을 가져와서 예측을 함 

        Args:
          params: 
            * node_id
            * conf_data

        Returns:
            none

        Raises:INFO

        Example

        """
        try:
            logging.info("wdnn predict_start nnid : {0}".format(node_id))
            if ver == 'active':
                self.batch, wf_ver = self.get_active_batch2(node_id)
                #self.model_predict_path = ''.join([self.model_path + '/' + self.batch])
            else:
                wf_ver = ver
                _nn_ver_id= node_id + "_" + wf_ver + "_" + "netconf_node"
                self.batch = self.get_active_batch(_nn_ver_id)  # Train이 Y인것 가져오기 Eval Flag가 Y인거 가져오기

            _node_id = node_id + "_" + wf_ver+ "_" + "netconf_node"
            _data_conf_id = node_id + "_" + wf_ver + "_dataconf_node"
            self._init_node_parm(_node_id)


            graph = NNCommonManager().get_nn_node_name(node_id)
            for net in graph:
                if net['fields']['graph_node'] == 'netconf_node':
                    netconf_node = net['fields']['graph_node_name']
            self.model_path = utils.get_model_path(node_id, wf_ver, netconf_node)

            config = {"type": self.model_type, "labels": self.label_values, "nn_id":node_id, "nn_wf_ver_id":ver}
            #train = TrainSummaryInfo(conf=config)
            self.model_predict_path = ''.join([self.model_path + '/' + self.batch])
            self.multi_node_flag = False
            self.predict_path = ''.join(['/hoya_src_root/'+node_id+'/common/predict'])

            conf_data = {}
            conf_data['node_id'] = _node_id

            logging.info("model_path : " + str(self.model_path))
            logging.info("hidden_layers : " + str(self.hidden_layers))
            logging.info("activation_function : " + str(self.activation_function))
            logging.info("batch_size : " + str(self.batch_size))
            logging.info("epoch : " + str(self.epoch))
            logging.info("model_type : " + str(self.model_type))

            data_conf_info = self.data_conf
            # make wide & deep model
            wdnn = NeuralCommonWdnn()
            wdnn_model = wdnn.wdnn_build(self.model_type, node_id, self.hidden_layers,
                                         str(self.activation_function), data_conf_info, str(self.model_predict_path),dememsion_auto_flag = self.auto_demension)
            # feed
            le = LabelEncoder()
            le.fit(self.label_values)

            file_cnt = len(parm.FILES.keys())
            dir = 'predict'
            filelist = list()
            if file_cnt > 0:
                for key, requestSingleFile in parm.FILES.items():
                    filelist.append(str(requestSingleFile._name))

            _batch_size = self.batch_size
            _num_tfrecords_files = 0

            # multi Feeder modified
            multi_read_flag = self.multi_read_flag

            results = dict()
            ori_list = list()
            pre_list = list()
            #self.batch_size = 5
            for filename in filelist:
                print("h5")
                #feeder = PreNodeFeedFr2Wdnn().set_for_predict(_data_conf_id)
                feeder = PreNodeFeedFr2Wdnn()

                feeder.set_for_predict(_data_conf_id)
                data_node = DataNodeFrame()
                train_data_set = data_node.load_csv_by_pandas(self.predict_path + "/" + filename)

                result_df = pd.DataFrame()

                for i in range(0, len(train_data_set.index), self.batch_size):

                    data_set = train_data_set[i:i + self.batch_size]

                    predict_value = wdnn_model.predict(
                        input_fn=lambda: feeder.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, filename,
                                                                  data_set, data_conf_info))

                    data_set_count = len(data_set.index)
                    predict_val_list = [_pv for _pv in predict_value]
                    predict_val_count = len(predict_val_list)

                    if (data_set_count != predict_val_count):
                        logging.error("wdnn eval error check : dataframe count({0}) predict count({1})".format(data_set_count, predict_val_count))
                        raise ValueError(
                            'eval data validation check error : dataframe and predict count is different(neuralnet_node_wdnn.eval)')

                    data_set['predict_label'] = list(le.inverse_transform(predict_val_list)) #list(predict_value)

                    #_predict = list(predict_value)
                    predict_y = list(data_set['predict_label'])
                    #pd.concat(result_df, data_set)
                    result_df = result_df.append(data_set)
                    ori_list.extend(data_set[self.label].values.tolist())
                    pre_list.extend(list(data_set['predict_label']))

                    logging.info("wdnn eval ori list  : {0}".format(len(ori_list)))
                    logging.info("wdnn eval ori list  : {0}".format(len(pre_list)))
                #train_data_set.next()

            predict_result_dir = utils.make_and_exist_directory(self.predict_path + "/" + "result" + "/")
            predict_result_filename = predict_result_dir + "result_" + strftime("%Y-%m-%d-%H:%M:%S",
                                                                                gmtime()) + ".csv"
            result_df.to_csv(predict_result_filename)

            logging.info("eval end")
            return json.loads(result_df.to_json())
        except Exception as e:
            logging.error("Wdnn predict error {0}".format(e))

            raise Exception(e)
    def run(self, conf_data):
        """
        Wide and Deep Network Training 
        :param : conf_data
        :return: None
        """
        logging.info("NeuralNetNodeWdnn Run called")

        try:
            self._init_node_parm(conf_data['node_id'])

            if self.train == False: #Train Value 가 False면 훈련 안함
                 return None

            graph = NNCommonManager().get_nn_node_name(conf_data['nn_id'])
            for net in graph:
                if net['fields']['graph_node'] == 'netconf_node':
                    netconf_node = net['fields']['graph_node_name']
            self.model_path = utils.get_model_path(conf_data['nn_id'], conf_data['wf_ver'], netconf_node)
            #Model Path
            #self.model_path = utils.get_model_path(nnid, ver, netconf_node)
            #Set Data Feeder
            self.cls_pool = conf_data['cls_pool'] # Data feeder

            # set batch
            self.load_batch = self.get_eval_batch(conf_data['node_id']) #Train이 Y인것 가져오기 Eval Flag가 Y인거 가져오기
            self.train_batch, self.batch = self.make_batch(conf_data['node_id'])
            #self.before_train_batch = self.get_before_make_batch(conf_data['node_id'], self.batch)  # before train batch

            #if self.before_train_batch != None:
            #    self.model_train_before_path = ''.join([self.model_path+'/'+str(self.before_train_batch.nn_batch_ver_id)])
            logging.info("Wdnn Train get batch -> {0}".format(self.batch))
            logging.info("Wdnn Train get batch -> {0}".format(self.load_batch))
            if self.train_batch == None :
                self.model_train_path = ''.join([self.model_path+'/'+self.batch])
            else :
                self.model_train_path = ''.join([self.model_path + '/' + self.train_batch])

            #Todo Eval flag 보도록 고치고
            #    "nn_wf_ver_id": self.wf_ver, "nn_batch_ver_id": self.batch}
            config = {"nn_id": conf_data['node_id'], "nn_wf_ver_id": self.net_ver,
                      "nn_batch_ver_id": self.batch}
            acc_result = TrainSummaryAccLossInfo(config)

            if self.load_batch  != self.batch:
                src = ''.join([self.model_path+'/'+self.load_batch])
                dst =  self.model_train_path
                utils.copy_all(src, dst)


            #Optimizer and learning rate
            #self._optimizer_type = self.optimizer_type
            #self._learning_rates = self.learning_rate
            logging.info("model_path : {0} ".format(self.model_path))
            logging.info("hidden_layers : {0} ".format(self.hidden_layers))
            logging.info("activation_function : {0} ".format(self.activation_function))
            logging.info("batch_size : {0} ".format(self.batch_size))
            logging.info("epoch : {0} ".format(self.epoch))
            logging.info("model_type : {0} ".format(self.model_type))
            logging.info("optimizer_type : {0} ".format(self.optimizer_type))
            logging.info("learning_rates : {0} ".format(self.learning_rates))


            data_conf_info = self.data_conf

            # make wide & deep model
            wdnn = NeuralCommonWdnn()
            wdnn_model = wdnn.wdnn_build(self.model_type, conf_data['node_id'],self.hidden_layers,str(self.activation_function),data_conf_info, str(self.model_train_path),self.train, self.auto_demension)

            #feed
            # TODO file이 여러개면 어떻하지?
            # get prev node for load data
            data_node_name = self._get_backward_node_with_type(conf_data['node_id'], 'preprocess')
            train_data_set = self.cls_pool[data_node_name[0]] #get filename
            file_queue  = str(train_data_set.input_paths[0]) #get file_name

            #file을 돌면서 최대 Row를 전부 들고 옴 tfrecord 총 record갯수 가져오는 방법필요
            _batch_size = self.batch_size
            _num_tfrecords_files = 0

            #multi Feeder modified
            multi_read_flag = self.multi_read_flag

            #validation_monitor = _LossCheckerHook(acc_result)
            train_cnt = 5



            #customsMonitor = EarlyStoppingHook()
            if multi_read_flag == True:
                logging.info("Reading tfrecord")
                for index, fn in enumerate(train_data_set.input_paths):
                    _num_tfrecords_files += self.generator_len(
                        tf.python_io.tf_record_iterator(fn))  # get length of generators
                logging.info("total loop " + str(math.ceil(_num_tfrecords_files/_batch_size)) )

                for index in range(int(math.ceil(_num_tfrecords_files/_batch_size))):
                    for i in range(train_cnt):
                        logging.info("number of for loop " + str(index))
                        train_result = wdnn_model.fit(input_fn=lambda: train_data_set.input_fn(tf.contrib.learn.ModeKeys.TRAIN, file_queue,_batch_size), steps=self.epoch)
                        #train_result = wdnn_model.fit(
                        #    input_fn=lambda: train_data_set.input_fn(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                        #                                             _batch_size), steps=self.epoch)
                        eval_result = wdnn_model.evaluate(
                            input_fn=lambda: train_data_set.input_fn(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                                                                     _batch_size), steps=200)
                        acc = eval_result['accuracy']
                        loss = eval_result['loss']
                        acc_result.loss_info["loss"].append(str(eval_result['loss']))
                        acc_result.acc_info["acc"].append(str(eval_result['accuracy']))


            else:
                #Todo H5
                # train per files in folder h5용
                logging.info("Training Wide and Deep from Reading hdf5")
                while(train_data_set.has_next()) :

                    for i in range(0, train_data_set.data_size(), self.batch_size): #크게 한번 도는거
                        logging.info("Training WDNN Total Count {0} out of {1}".format(i+self.batch_size, train_data_set.data_size()))
                        data_set = train_data_set[i:i + self.batch_size]

                        for t_i in range(train_cnt):
                            logging.info(
                                "Training WDNN Train Count {0} out of {1}".format(t_i, train_cnt))
                            #data_set = train_data_set[i:i + self.batch_size]
                            if i == 0:
                                eval_data_Set = data_set
                            train_result = wdnn_model.fit(
                                input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                                                                          data_set,data_conf_info), steps=self.epoch)
                            eval_result = wdnn_model.evaluate(
                                input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                                                                          data_set, data_conf_info), steps=200)
                            logging.info("wdnn training complete count from h5 : {0} ".format(len(data_set)))

                            acc = eval_result['accuracy']
                            loss = eval_result['loss']
                            acc_result.loss_info["loss"].append(str(eval_result['loss']))
                            acc_result.acc_info["acc"].append(str(eval_result['accuracy']))
                            #train_result = wdnn_model.fit(
                            #    input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                            #                                              data_set, data_conf_info), steps=self.epoch)

                            logging.info("Traing Result -> {0}".format(train_result))

                    train_data_set.next()

                print("end")
                self.save_accloss_info(acc_result)
            return None
        except Exception as e:
            logging.info("[Wide and Deep Train Process] : {0}".format(e))
            raise Exception(e)
    def eval(self, node_id, conf_data, data=None, result=None):
        """
            Tensorflow Wide and Deep Network Eval Method
        :param node_id:
        :param parm:
        :return: None
        """
        logging.info("eval_starting ------> {0}".format(node_id))
        try:
            #self._init_node_parm(conf_data['node_id'])
            #conf_data['node_id']
            #self._init_node_parm(conf_data['node_id'])
            self._init_node_parm(conf_data.get('nn_id') + "_" + conf_data.get('wf_ver')+ "_" + "netconf_node")
            self.cls_pool_all = conf_data['cls_pool']  # Data feeder


            graph = NNCommonManager().get_nn_node_name(conf_data['nn_id'])
            for net in graph:
                if net['fields']['graph_node'] == 'netconf_node':
                    netconf_node = net['fields']['graph_node_name']
            self.model_path = utils.get_model_path(conf_data['nn_id'], conf_data['wf_ver'], netconf_node)

            config = {"type": self.model_type, "labels": self.label_values, "nn_id":conf_data.get('nn_id'), "nn_wf_ver_id":conf_data.get('wf_ver')}
            train = TrainSummaryInfo(conf=config)
            print(config)
            self.batch_eval = self.get_eval_batch(node_id)
            #print(train)
            self.model_eval_path = ''.join([self.model_path + '/' + self.batch])


            for _k, _v in self.cls_pool_all.items():
                if 'test' in _k:
                    self.cls_pool = _v

                if 'evaldata' in _k:
                    self.multi_node_flag = _v.multi_node_flag

            #conf_data['cls_pool'].get('nn00001_1_pre_feed_fr2wdnn_test')
            logging.info("model_path : {0}".format(self.model_path))
            logging.info("hidden_layers : {0}".format(self.hidden_layers))
            logging.info("activation_function : {0}".format(self.activation_function))
            logging.info("batch_size : {0}".format(self.batch_size))
            logging.info("epoch : {0}".format(self.epoch))
            logging.info("model_type : {0}".format(self.model_type))
            logging.info("auto_demension : {0}".format(self.auto_demension))

            config_acc = {"nn_id": conf_data['node_id'], "nn_wf_ver_id": conf_data.get('wf_ver'),
                      "nn_batch_ver_id": self.batch}
            acc_result = TrainSummaryAccLossInfo(config_acc)

            data_conf_info = self.data_conf

            #validation_monitor = _LossCheckerHook(acc_result)

            # make wide & deep modelnot
            wdnn = NeuralCommonWdnn()
            wdnn_model = wdnn.wdnn_build(self.model_type, conf_data['node_id'], self.hidden_layers,
                                         str(self.activation_function), data_conf_info, str(self.model_eval_path),
                                         self.train, self.auto_demension)

            #, self.train, self.auto_demension

            # feed
            # TODO file이 여러개면 어떻하지?
            # get prev node for load data
            #data_node_name = self._get_backward_node_with_type(conf_data['node_id'], 'preprocess')
            #train_data_set = self.cls_pool[data_node_name[0]]  # get filename
            train_data_set = self.cls_pool  # get filename
            file_queue = str(train_data_set.input_paths[0])  # get file_name

            # file을 돌면서 최대 Row를 전부 들고 옴 tfrecord 총 record갯수 가져오는 방법필요

            _batch_size = self.batch_size
            _num_tfrecords_files = 0

            # multi Feeder modified
            multi_read_flag = self.multi_read_flag

            # Todo H5
            # train per files in folder h5용
            # if multi_file flag = no이면 기본이 h5임
            try:
                results = dict()
                ori_list = list()
                pre_list = list()

                while (train_data_set.has_next()):
                    logging.info("Wdnn eval process from h5")
                    # 파일이 하나 돌때마다
                    # for 배치사이즈와 파일의 총갯수를 가져다가 돌린다. -> 마지막에 뭐가 있을지 구분한다.
                    # 파일에 iter를 넣으면 배치만큼 가져오는 fn이 있음 그걸 __itemd에 넣고
                    # Input 펑션에서 multi를 vk판단해서 col와 ca를 구분한다.(이걸 배치마다 할 필요가 있나?)
                    # -> 그러면서 피팅
                    #
                    # # Iteration is to improve for Model Accuracy

                    # Per Line in file
                    # eval should be one line predict
                    #self.batch_size = 2

                    for i in range(0, train_data_set.data_size(), self.batch_size):

                        data_set = train_data_set[i:i + self.batch_size]

                        eval_result = wdnn_model.evaluate(
                           input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                                                                     data_set, data_conf_info), steps=200)
                        #print("model fitting h5 " + str(data_set))

                        acc = eval_result['accuracy']
                        loss = eval_result['loss']
                        acc_result.loss_info["loss"].append(str(eval_result['loss']))
                        acc_result.acc_info["acc"].append(str(eval_result['accuracy']))

                        predict_value = wdnn_model.predict(
                            input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                                                                      data_set, data_conf_info))

                        data_set_count = len(data_set.index)
                        predict_val_list = [_pv for _pv in predict_value]
                        predict_val_count = len(predict_val_list)

                        if (data_set_count != predict_val_count):
                            logging.error("wdnn eval error check : dataframe count({0}) predict count({1})".format(data_set_count, predict_val_count))
                            raise ValueError(
                                'eval data validation check error : dataframe and predict count is different(neuralnet_node_wdnn.eval)')

                        data_set['predict_label'] = predict_val_list #list(predict_value)
                        predict_y = list(data_set['predict_label'])


                        ori_list.extend(data_set[self.label].values.tolist())
                        pre_list.extend(list(data_set['predict_label']))

                        # model fitting
                        logging.info("wdnn eval ori list  : {0}".format(len(ori_list)) )
                        logging.info("wdnn eval pre list  : {0}".format(len(pre_list)) )

                    train_data_set.next()

                #TODO : 앞으로 옮기자
                train.set_nn_batch_ver_id(self.batch_eval)
                if self.model_type == "regression":
                    results['ori'] = ori_list
                    results['pre'] = pre_list
                    train.set_result_info(ori_list, pre_list)

                if (self.model_type == "category" or self.model_type == "deep"):
                    # tfrecord는 여기서 Label을 변경한다. 나중에 꺼낼때 답이 없음 Tensor 객체로 추출되기 때문에 그러나 H5는 feeder에서 변환해주자
                    le = LabelEncoder()
                    le.fit(self.label_values)

                    for _i, _ori in enumerate(ori_list):
                        #return_value = self.labels[np.argmax(model.predict(X_train))]
                        train.set_result_info(str(_ori), str(le.inverse_transform(pre_list[_i])))
                #return self.batch
            except Exception as e:
                print("eval error")
                print(e)
                raise Exception(e)

            logging.info("eval end")
        except Exception as oe:
            logging.info(oe)
            raise Exception(e)
        return train
Beispiel #10
0
    def predict(self, node_id,ver, parm, data=None, result=None):

        """ Wdnn predict 
            batchlist info에서 active flag가 Y인 Model을 가져와서 예측을 함 

        Args:
          params: 
            * node_id
            * conf_data

        Returns:
            none

        Raises:INFO

        Example

        """
        try:
            logging.info("wdnn predict_start nnid : {0}".format(node_id))
            if ver == 'active':
                self.batch, wf_ver = self.get_active_batch2(node_id)
                #self.model_predict_path = ''.join([self.model_path + '/' + self.batch])
            else:
                wf_ver = ver
                _nn_ver_id= node_id + "_" + wf_ver + "_" + "netconf_node"
                self.batch = self.get_active_batch(_nn_ver_id)  # Train이 Y인것 가져오기 Eval Flag가 Y인거 가져오기

            _node_id = node_id + "_" + wf_ver+ "_" + "netconf_node"
            _data_conf_id = node_id + "_" + wf_ver + "_dataconf_node"
            self._init_node_parm(_node_id)


            graph = NNCommonManager().get_nn_node_name(node_id)
            for net in graph:
                if net['fields']['graph_node'] == 'netconf_node':
                    netconf_node = net['fields']['graph_node_name']
            self.model_path = utils.get_model_path(node_id, wf_ver, netconf_node)

            config = {"type": self.model_type, "labels": self.label_values, "nn_id":node_id, "nn_wf_ver_id":ver}
            #train = TrainSummaryInfo(conf=config)
            self.model_predict_path = ''.join([self.model_path + '/' + self.batch])
            self.multi_node_flag = False
            self.predict_path = ''.join(['/hoya_src_root/'+node_id+'/common/predict'])

            conf_data = {}
            conf_data['node_id'] = _node_id

            logging.info("model_path : " + str(self.model_path))
            logging.info("hidden_layers : " + str(self.hidden_layers))
            logging.info("activation_function : " + str(self.activation_function))
            logging.info("batch_size : " + str(self.batch_size))
            logging.info("epoch : " + str(self.epoch))
            logging.info("model_type : " + str(self.model_type))

            data_conf_info = self.data_conf
            # make wide & deep model
            wdnn = NeuralCommonWdnn()
            wdnn_model = wdnn.wdnn_build(self.model_type, node_id, self.hidden_layers,
                                         str(self.activation_function), data_conf_info, str(self.model_predict_path),dememsion_auto_flag = self.auto_demension)
            # feed
            le = LabelEncoder()
            le.fit(self.label_values)

            file_cnt = len(parm.FILES.keys())
            dir = 'predict'
            filelist = list()
            if file_cnt > 0:
                for key, requestSingleFile in parm.FILES.items():
                    filelist.append(str(requestSingleFile._name))

            _batch_size = self.batch_size
            _num_tfrecords_files = 0

            # multi Feeder modified
            multi_read_flag = self.multi_read_flag

            results = dict()
            ori_list = list()
            pre_list = list()
            #self.batch_size = 5
            for filename in filelist:
                print("h5")
                #feeder = PreNodeFeedFr2Wdnn().set_for_predict(_data_conf_id)
                feeder = PreNodeFeedFr2Wdnn()

                feeder.set_for_predict(_data_conf_id)
                data_node = DataNodeFrame()
                train_data_set = data_node.load_csv_by_pandas(self.predict_path + "/" + filename)

                result_df = pd.DataFrame()

                for i in range(0, len(train_data_set.index), self.batch_size):

                    data_set = train_data_set[i:i + self.batch_size]

                    predict_value = wdnn_model.predict(
                        input_fn=lambda: feeder.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, filename,
                                                                  data_set, data_conf_info))

                    data_set_count = len(data_set.index)
                    predict_val_list = [_pv for _pv in predict_value]
                    predict_val_count = len(predict_val_list)

                    if (data_set_count != predict_val_count):
                        logging.error("wdnn eval error check : dataframe count({0}) predict count({1})".format(data_set_count, predict_val_count))
                        raise ValueError(
                            'eval data validation check error : dataframe and predict count is different(neuralnet_node_wdnn.eval)')

                    data_set['predict_label'] = list(le.inverse_transform(predict_val_list)) #list(predict_value)

                    #_predict = list(predict_value)
                    predict_y = list(data_set['predict_label'])
                    #pd.concat(result_df, data_set)
                    result_df = result_df.append(data_set)
                    ori_list.extend(data_set[self.label].values.tolist())
                    pre_list.extend(list(data_set['predict_label']))

                    logging.info("wdnn eval ori list  : {0}".format(len(ori_list)))
                    logging.info("wdnn eval ori list  : {0}".format(len(pre_list)))
                #train_data_set.next()

            predict_result_dir = utils.make_and_exist_directory(self.predict_path + "/" + "result" + "/")
            predict_result_filename = predict_result_dir + "result_" + strftime("%Y-%m-%d-%H:%M:%S",
                                                                                gmtime()) + ".csv"
            result_df.to_csv(predict_result_filename)

            logging.info("eval end")
            return json.loads(result_df.to_json())
        except Exception as e:
            logging.error("Wdnn predict error {0}".format(e))

            raise Exception(e)
Beispiel #11
0
    def run(self, conf_data):
        """
        Wide and Deep Network Training 
        :param : conf_data
        :return: None
        """
        logging.info("NeuralNetNodeWdnn Run called")

        try:
            self._init_node_parm(conf_data['node_id'])

            if self.train == False: #Train Value 가 False면 훈련 안함
                 return None

            graph = NNCommonManager().get_nn_node_name(conf_data['nn_id'])
            for net in graph:
                if net['fields']['graph_node'] == 'netconf_node':
                    netconf_node = net['fields']['graph_node_name']
            self.model_path = utils.get_model_path(conf_data['nn_id'], conf_data['wf_ver'], netconf_node)
            #Model Path
            #self.model_path = utils.get_model_path(nnid, ver, netconf_node)
            #Set Data Feeder
            self.cls_pool = conf_data['cls_pool'] # Data feeder

            # set batch
            self.load_batch = self.get_eval_batch(conf_data['node_id']) #Train이 Y인것 가져오기 Eval Flag가 Y인거 가져오기
            self.train_batch, self.batch = self.make_batch(conf_data['node_id'])
            #self.before_train_batch = self.get_before_make_batch(conf_data['node_id'], self.batch)  # before train batch

            #if self.before_train_batch != None:
            #    self.model_train_before_path = ''.join([self.model_path+'/'+str(self.before_train_batch.nn_batch_ver_id)])
            logging.info("Wdnn Train get batch -> {0}".format(self.batch))
            logging.info("Wdnn Train get batch -> {0}".format(self.load_batch))
            if self.train_batch == None :
                self.model_train_path = ''.join([self.model_path+'/'+self.batch])
            else :
                self.model_train_path = ''.join([self.model_path + '/' + self.train_batch])

            #Todo Eval flag 보도록 고치고
            #    "nn_wf_ver_id": self.wf_ver, "nn_batch_ver_id": self.batch}
            config = {"nn_id": conf_data['node_id'], "nn_wf_ver_id": self.net_ver,
                      "nn_batch_ver_id": self.batch}
            acc_result = TrainSummaryAccLossInfo(config)

            if self.load_batch  != self.batch:
                src = ''.join([self.model_path+'/'+self.load_batch])
                dst =  self.model_train_path
                utils.copy_all(src, dst)


            #Optimizer and learning rate
            #self._optimizer_type = self.optimizer_type
            #self._learning_rates = self.learning_rate
            logging.info("model_path : {0} ".format(self.model_path))
            logging.info("hidden_layers : {0} ".format(self.hidden_layers))
            logging.info("activation_function : {0} ".format(self.activation_function))
            logging.info("batch_size : {0} ".format(self.batch_size))
            logging.info("epoch : {0} ".format(self.epoch))
            logging.info("model_type : {0} ".format(self.model_type))
            logging.info("optimizer_type : {0} ".format(self.optimizer_type))
            logging.info("learning_rates : {0} ".format(self.learning_rates))


            data_conf_info = self.data_conf

            # make wide & deep model
            wdnn = NeuralCommonWdnn()
            wdnn_model = wdnn.wdnn_build(self.model_type, conf_data['node_id'],self.hidden_layers,str(self.activation_function),data_conf_info, str(self.model_train_path),self.train, self.auto_demension)

            #feed
            # TODO file이 여러개면 어떻하지?
            # get prev node for load data
            data_node_name = self._get_backward_node_with_type(conf_data['node_id'], 'preprocess')
            train_data_set = self.cls_pool[data_node_name[0]] #get filename
            file_queue  = str(train_data_set.input_paths[0]) #get file_name

            #file을 돌면서 최대 Row를 전부 들고 옴 tfrecord 총 record갯수 가져오는 방법필요
            _batch_size = self.batch_size
            _num_tfrecords_files = 0

            #multi Feeder modified
            multi_read_flag = self.multi_read_flag

            #validation_monitor = _LossCheckerHook(acc_result)
            train_cnt = 5



            #customsMonitor = EarlyStoppingHook()
            if multi_read_flag == True:
                logging.info("Reading tfrecord")
                for index, fn in enumerate(train_data_set.input_paths):
                    _num_tfrecords_files += self.generator_len(
                        tf.python_io.tf_record_iterator(fn))  # get length of generators
                logging.info("total loop " + str(math.ceil(_num_tfrecords_files/_batch_size)) )

                for index in range(int(math.ceil(_num_tfrecords_files/_batch_size))):
                    for i in range(train_cnt):
                        logging.info("number of for loop " + str(index))
                        train_result = wdnn_model.fit(input_fn=lambda: train_data_set.input_fn(tf.contrib.learn.ModeKeys.TRAIN, file_queue,_batch_size), steps=self.epoch)
                        #train_result = wdnn_model.fit(
                        #    input_fn=lambda: train_data_set.input_fn(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                        #                                             _batch_size), steps=self.epoch)
                        eval_result = wdnn_model.evaluate(
                            input_fn=lambda: train_data_set.input_fn(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                                                                     _batch_size), steps=200)
                        acc = eval_result['accuracy']
                        loss = eval_result['loss']
                        acc_result.loss_info["loss"].append(str(eval_result['loss']))
                        acc_result.acc_info["acc"].append(str(eval_result['accuracy']))


            else:
                #Todo H5
                # train per files in folder h5용
                logging.info("Training Wide and Deep from Reading hdf5")
                while(train_data_set.has_next()) :

                    for i in range(0, train_data_set.data_size(), self.batch_size): #크게 한번 도는거
                        logging.info("Training WDNN Total Count {0} out of {1}".format(i+self.batch_size, train_data_set.data_size()))
                        data_set = train_data_set[i:i + self.batch_size]

                        for t_i in range(train_cnt):
                            logging.info(
                                "Training WDNN Train Count {0} out of {1}".format(t_i, train_cnt))
                            #data_set = train_data_set[i:i + self.batch_size]
                            if i == 0:
                                eval_data_Set = data_set
                            train_result = wdnn_model.fit(
                                input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                                                                          data_set,data_conf_info), steps=self.epoch)
                            eval_result = wdnn_model.evaluate(
                                input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                                                                          data_set, data_conf_info), steps=200)
                            logging.info("wdnn training complete count from h5 : {0} ".format(len(data_set)))
                            if self.model_type == 'regression':
                                acc = eval_result['loss']
                                loss = eval_result['loss']
                                acc_result.loss_info["loss"].append(str(eval_result['loss']))
                                acc_result.acc_info["acc"].append(str(eval_result['loss']))
                            else:
                                acc = eval_result['accuracy']
                                loss = eval_result['loss']
                                acc_result.loss_info["loss"].append(str(eval_result['loss']))
                                acc_result.acc_info["acc"].append(str(eval_result['accuracy']))

                            #acc_result.loss_info["loss"].append(str(eval_result['loss']))
                            #acc_result.acc_info["acc"].append(str(eval_result['accuracy']))
                            #train_result = wdnn_model.fit(
                            #    input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                            #                                              data_set, data_conf_info), steps=self.epoch)

                            logging.info("Traing Result -> {0}".format(train_result))

                    train_data_set.next()

                print("end")
                self.save_accloss_info(acc_result)
            return None
        except Exception as e:
            logging.info("[Wide and Deep Train Process] : {0}".format(e))
            raise Exception(e)
Beispiel #12
0
    def eval(self, node_id, conf_data, data=None, result=None):
        """
            Tensorflow Wide and Deep Network Eval Method
        :param node_id:
        :param parm:
        :return: None
        """
        logging.info("eval_starting ------> {0}".format(node_id))
        try:
            #self._init_node_parm(conf_data['node_id'])
            #conf_data['node_id']
            #self._init_node_parm(conf_data['node_id'])
            self._init_node_parm(conf_data.get('nn_id') + "_" + conf_data.get('wf_ver')+ "_" + "netconf_node")
            self.cls_pool_all = conf_data['cls_pool']  # Data feeder


            graph = NNCommonManager().get_nn_node_name(conf_data['nn_id'])
            for net in graph:
                if net['fields']['graph_node'] == 'netconf_node':
                    netconf_node = net['fields']['graph_node_name']
            self.model_path = utils.get_model_path(conf_data['nn_id'], conf_data['wf_ver'], netconf_node)

            config = {"type": self.model_type, "labels": self.label_values, "nn_id":conf_data.get('nn_id'), "nn_wf_ver_id":conf_data.get('wf_ver')}
            train = TrainSummaryInfo(conf=config)
            print(config)
            self.batch_eval = self.get_eval_batch(node_id)
            #print(train)
            self.model_eval_path = ''.join([self.model_path + '/' + self.batch])


            for _k, _v in self.cls_pool_all.items():
                if 'test' in _k:
                    self.cls_pool = _v

                if 'evaldata' in _k:
                    self.multi_node_flag = _v.multi_node_flag

            #conf_data['cls_pool'].get('nn00001_1_pre_feed_fr2wdnn_test')
            logging.info("model_path : {0}".format(self.model_path))
            logging.info("hidden_layers : {0}".format(self.hidden_layers))
            logging.info("activation_function : {0}".format(self.activation_function))
            logging.info("batch_size : {0}".format(self.batch_size))
            logging.info("epoch : {0}".format(self.epoch))
            logging.info("model_type : {0}".format(self.model_type))
            logging.info("auto_demension : {0}".format(self.auto_demension))

            config_acc = {"nn_id": conf_data['node_id'], "nn_wf_ver_id": conf_data.get('wf_ver'),
                      "nn_batch_ver_id": self.batch}
            acc_result = TrainSummaryAccLossInfo(config_acc)

            data_conf_info = self.data_conf

            #validation_monitor = _LossCheckerHook(acc_result)

            # make wide & deep modelnot
            wdnn = NeuralCommonWdnn()
            wdnn_model = wdnn.wdnn_build(self.model_type, conf_data['node_id'], self.hidden_layers,
                                         str(self.activation_function), data_conf_info, str(self.model_eval_path),
                                         self.train, self.auto_demension)

            #, self.train, self.auto_demension

            # feed
            # TODO file이 여러개면 어떻하지?
            # get prev node for load data
            #data_node_name = self._get_backward_node_with_type(conf_data['node_id'], 'preprocess')
            #train_data_set = self.cls_pool[data_node_name[0]]  # get filename
            train_data_set = self.cls_pool  # get filename
            file_queue = str(train_data_set.input_paths[0])  # get file_name

            # file을 돌면서 최대 Row를 전부 들고 옴 tfrecord 총 record갯수 가져오는 방법필요

            _batch_size = self.batch_size
            _num_tfrecords_files = 0

            # multi Feeder modified
            multi_read_flag = self.multi_read_flag

            # Todo H5
            # train per files in folder h5용
            # if multi_file flag = no이면 기본이 h5임
            try:
                results = dict()
                ori_list = list()
                pre_list = list()

                while (train_data_set.has_next()):
                    logging.info("Wdnn eval process from h5")
                    # 파일이 하나 돌때마다
                    # for 배치사이즈와 파일의 총갯수를 가져다가 돌린다. -> 마지막에 뭐가 있을지 구분한다.
                    # 파일에 iter를 넣으면 배치만큼 가져오는 fn이 있음 그걸 __itemd에 넣고
                    # Input 펑션에서 multi를 vk판단해서 col와 ca를 구분한다.(이걸 배치마다 할 필요가 있나?)
                    # -> 그러면서 피팅
                    #
                    # # Iteration is to improve for Model Accuracy

                    # Per Line in file
                    # eval should be one line predict
                    #self.batch_size = 2

                    for i in range(0, train_data_set.data_size(), self.batch_size):

                        data_set = train_data_set[i:i + self.batch_size]

                        eval_result = wdnn_model.evaluate(
                           input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                                                                     data_set, data_conf_info), steps=200)
                        #print("model fitting h5 " + str(data_set))

                        if self.model_type == 'regression':
                            acc = eval_result['loss']
                            loss = eval_result['loss']
                            acc_result.loss_info["loss"].append(str(eval_result['loss']))
                            acc_result.acc_info["acc"].append(str(eval_result['loss']))
                        else:
                            acc = eval_result['accuracy']
                            loss = eval_result['loss']
                            acc_result.loss_info["loss"].append(str(eval_result['loss']))
                            acc_result.acc_info["acc"].append(str(eval_result['accuracy']))

                        # acc = eval_result['accuracy']
                        # loss = eval_result['loss']
                        # acc_result.loss_info["loss"].append(str(eval_result['loss']))
                        # acc_result.acc_info["acc"].append(str(eval_result['accuracy']))

                        predict_value = wdnn_model.predict(
                            input_fn=lambda: train_data_set.input_fn2(tf.contrib.learn.ModeKeys.TRAIN, file_queue,
                                                                      data_set, data_conf_info))

                        data_set_count = len(data_set.index)
                        predict_val_list = [_pv for _pv in predict_value]
                        predict_val_count = len(predict_val_list)

                        if (data_set_count != predict_val_count):
                            logging.error("wdnn eval error check : dataframe count({0}) predict count({1})".format(data_set_count, predict_val_count))
                            raise ValueError(
                                'eval data validation check error : dataframe and predict count is different(neuralnet_node_wdnn.eval)')

                        data_set['predict_label'] = predict_val_list #list(predict_value)
                        predict_y = list(data_set['predict_label'])


                        ori_list.extend(data_set[self.label].values.tolist())
                        pre_list.extend(list(data_set['predict_label']))

                        # model fitting
                        logging.info("wdnn eval ori list  : {0}".format(len(ori_list)) )
                        logging.info("wdnn eval pre list  : {0}".format(len(pre_list)) )

                    train_data_set.next()

                #TODO : 앞으로 옮기자
                train.set_nn_batch_ver_id(self.batch_eval)
                if self.model_type == "regression":
                    results['ori'] = ori_list
                    results['pre'] = pre_list
                    train.set_result_info(ori_list, pre_list)

                if (self.model_type == "category" or self.model_type == "deep"):
                    # tfrecord는 여기서 Label을 변경한다. 나중에 꺼낼때 답이 없음 Tensor 객체로 추출되기 때문에 그러나 H5는 feeder에서 변환해주자
                    le = LabelEncoder()
                    le.fit(self.label_values)

                    for _i, _ori in enumerate(ori_list):
                        #return_value = self.labels[np.argmax(model.predict(X_train))]
                        train.set_result_info(str(_ori), str(le.inverse_transform(pre_list[_i])))
                #return self.batch
            except Exception as e:
                print("eval error")
                print(e)
                raise Exception(e)

            logging.info("eval end")
        except Exception as oe:
            logging.info(oe)
            raise Exception(e)
        return train