def __init__(self, normalized=False): config = configparser.ConfigParser() config.read( os.path.join(os.path.dirname(__file__), 'DataSetConfig.ini')) self._tags = ast.literal_eval(config['Diagnosis']['tags']) self._sample_rate = int(config['Diagnosis']['sample_rate']) self._frame_size = int(config['Diagnosis']['frame_size']) self._step = int(config['Diagnosis']['step']) self._npy_path = config['path']['npy'] if not os.path.exists(self._npy_path): os.makedirs(self._npy_path) ddb = Query() self._normalized = normalized if normalized: self._normalize_param = ddb.get_normalize_parm(self._tags) my_query = {'IsValidShot': True, 'IsDisrupt': False} self._shots = ddb.query(my_query) my_query = { 'IsValidShot': True, 'IsDisrupt': True, 'CqTime': { "$gte": 0.05 }, 'IpFlat': { '$gte': 110 } } self._shots += ddb.query(my_query)
def get(self): """ 加载npy数据到tf.data.DataSet :return: training set, test set """ train_test_shots = list() with open(os.path.join('log', 'ShotsUsed4Training.txt'), 'r') as f: for shot in f.readlines(): train_test_shots.append(int(shot)) train_test_shots.sort(reverse=False) ddb = Query() shots = list() with open(os.path.join('log', 'ShotsInDataset.txt'), 'w') as f: my_query = { 'IsValidShot': True, 'IsDisrupt': True, 'CqTime': { "$gte": 0.15 }, 'IpFlat': { '$gte': 110 } } for shot in ddb.query(my_query): if os.path.exists( os.path.join(self.npy_path, '{}'.format(shot))): shots.append(shot) shots.sort(reverse=False) for shot in shots: if shot in train_test_shots: print('{} 1 d'.format(shot), file=f) else: print('{} 0 d'.format(shot), file=f) shots.clear() my_query = { 'IsValidShot': True, 'IsDisrupt': False, 'IpFlat': { '$gte': 110 } } for shot in ddb.query(my_query): if os.path.exists( os.path.join(self.npy_path, '{}'.format(shot))): shots.append(shot) shots.sort(reverse=False) for shot in shots: print('{} 0 u'.format(shot), file=f)
break_query = { 'IsValidShot': True, 'IsDisrupt': True, 'IpFlat': { '$gt': 100 }, 'CqTime': { '$gt': 0.2 } } normal_query = { 'IsValidShot': True, 'IsDisrupt': False, 'IpFlat': { '$gt': 100 }, 'RampDownTime': { '$gt': 0.2 } } validshot = db.query(valid_query) breakshot = db.query(break_query) normalshot = db.query(normal_query) root_path = os.path.dirname(__file__) + os.sep + "data" if not os.path.exists(root_path): os.makedirs(root_path) np.save(root_path + os.sep + r"Valid.npy", validshot) np.save(root_path + os.sep + r"Break.npy", breakshot) np.save(root_path + os.sep + r"Normal.npy", normalshot)
import DDB from pymongo import MongoClient from DDB.Service import Query from DDB.Data import Reader import random from scipy import signal ddb = Query() my_query = {'IsValidShot': True, 'IsDisrupt': False} shots = ddb.query(my_query) my_query = { 'IsValidShot': True, 'IsDisrupt': True, 'CqTime': { "$gte": 0.05 }, 'IpFlat': { '$gte': 110 } } shots += ddb.query(my_query) config = DDB.get_config() database = config['output'] client = MongoClient(database['host'], int(database['port'])) db = client[database['database']] param = db[database['collection'] + '归一化参数'] # tags = [r'\Bt', r'\Ihfp', r'\Ivfp', r'\MA_POL_CA01T', r'\MA_POL_CA02T', r'\MA_POL_CA03T', r'\MA_POL_CA05T', r'\MA_POL_CA06T', r'\MA_POL_CA07T', r'\MA_POL_CA19T', r'\MA_POL_CA20T', r'\MA_POL_CA21T', r'\MA_POL_CA22T', r'\MA_POL_CA23T', r'\MA_POL_CA24T', r'\axuv_ca_01', r'\ip', r'\sxr_cb_024', r'\sxr_cc_049', r'\vs_c3_aa001', r'\vs_ha_aa001'] # reader = Reader(root_path='/nas/hdf5_new') # result = dict() # for tag in tags:
def load(self): """ 加载npy数据到tf.data.DataSet :return: training set, test set """ examples_und = list() examples_dis = list() labels_und = list() labels_dis = list() ddb = Query() shots = list() # 不使用非破裂炮进行训练 # my_query = {'IsValidShot': True, 'IsDisrupt': False} # for shot in ddb.query(my_query): # if os.path.exists(os.path.join(self.npy_path, '{}'.format(shot))): # shots.append(shot) # if len(shots) >= self.shots/2: # break my_query = { 'IsValidShot': True, 'IsDisrupt': True, 'CqTime': { "$gte": 0.15 }, 'IpFlat': { '$gte': 110 } } for shot in ddb.query(my_query): if os.path.exists(os.path.join(self._npy_path, '{}'.format(shot))): shots.append(shot) if len(shots) >= self._shots_len: break # shots = np.random.choice(shots, self.shots_len) if not os.path.exists('log'): os.mkdir('log') if os.path.exists(os.path.join('log', 'ShotsUsed4Training.txt')): path = os.path.join( 'log', 'ShotsUsed4Training_{}.txt'.format( time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())))) os.rename(os.path.join('log', 'ShotsUsed4Training.txt'), path) with open(os.path.join('log', 'ShotsUsed4Training.txt'), 'w') as f: for shot in shots: print(shot, file=f) for shot in shots: file_names = [ i for i in os.listdir( os.path.join(self._npy_path, '{}'.format(shot))) if 'x' in i ] for file in file_names: x = np.load( os.path.join(self._npy_path, '{}'.format(shot), file)) y = np.load( os.path.join(self._npy_path, '{}'.format(shot), file.replace('x', 'y'))) if y[-1] > 0: examples_dis.append(x) labels_dis.append(y[-1]) else: examples_und.append(x) labels_und.append(y[-1]) len_und = len(labels_und) len_dis = len(labels_dis) print('Length un_disruption: ', len_und, '\nLength disruption: ', len_dis) # -------------------------------------------------------------------------------------- # 均衡策略1:扩大disruption, un_disruption不变 # -------------------------------------------------------------------------------------- # dataset_und = tf.data.Dataset.from_tensor_slices((examples_und, labels_und)) # dataset_dis = tf.data.Dataset.from_tensor_slices((examples_dis, labels_dis)) # # split_point_und = (int(len(labels_und)*self.train), int(len(labels_und)*self.test)) # split_point_dis = (int(len(labels_dis)*self.train), int(len(labels_dis)*self.test)) # # train_dataset_und = dataset_und.take(split_point_und[0]) # test_dataset_und = dataset_und.skip(split_point_und[0]).take(split_point_und[1]) # train_dataset_dis = dataset_dis.take(split_point_dis[0]) # test_dataset_dis = dataset_dis.skip(split_point_dis[0]).take(split_point_dis[1]) # # train_dataset_dis = train_dataset_dis.repeat(int(split_point_und[0]/split_point_dis[0])) # # train_dataset = train_dataset_und.concatenate(train_dataset_dis) # test_dataset = test_dataset_und.concatenate(test_dataset_dis) # -------------------------------------------------------------------------------------- # 均衡策略2:disruption扩大2倍, 随机抽取un_disruption, 比例为und/dis = 6/4 # -------------------------------------------------------------------------------------- dataset_und = tf.data.Dataset.from_tensor_slices( (examples_und, labels_und)) dataset_dis = tf.data.Dataset.from_tensor_slices( (examples_dis, labels_dis)) dataset_und = dataset_und.shuffle(buffer_size=len_und).take(3 * len_dis) dataset_dis = dataset_dis.repeat(2) dataset = dataset_und.concatenate(dataset_dis) dataset = dataset.shuffle(5 * len_dis) train_dataset = dataset.take(int(5 * len_dis * self._train_per)) test_dataset = dataset.skip(int(5 * len_dis * self._train_per)).take( int(5 * len_dis * self._test_per)) return train_dataset, test_dataset