def __init__(self, variables, nEpoch, nFeatures, nLabels, nDomains, nWeigts, ptReweight, signalDataSets, backgroundDataSets): self.nEpoch = nEpoch #Define input data queue self.inputDataQueue = tf.RandomShuffleQueue( capacity=65536 * 2, min_after_dequeue=65536 * 2 - 65536 / 2, shapes=[[nFeatures], [nLabels], [nDomains], [nWeigts]], dtypes=[tf.float32, tf.float32, tf.float32, tf.float32]) #Add DataSample objects for each data set used self.sigScaleSum = 0 for dataSet in signalDataSets: self.sigScaleSum += dataSet.xsec * dataSet.rescale * dataSet.kFactor / dataSet.Nevts batchSize = 65536 / 4 self.sigDataSamples = [] for dataSet in signalDataSets: self.sigDataSamples.append( DataSample(dataSet, nEpoch, batchSize, variables, self.inputDataQueue, self.sigScaleSum, signal=True, background=False, domain=dataSet.domain, ptReweight=ptReweight)) self.bgScaleSum = 0 for dataSet in signalDataSets: self.bgScaleSum += dataSet.xsec * dataSet.rescale * dataSet.kFactor / dataSet.Nevts self.bgDataSamples = [] for dataSet in backgroundDataSets: self.bgDataSamples.append( DataSample(dataSet, nEpoch, batchSize, variables, self.inputDataQueue, self.bgScaleSum, signal=False, background=True, domain=dataSet.domain, ptReweight=ptReweight))
def read_all_labeled_samples(): results = [] sql = 'select * from data_samples' db_results = sqlHelper.select(sql) for result in db_results: story_id = result['story_id'] sentence = result['sentence'] fps = result['fps'].split() sample = DataSample(story_id, sentence, fps) results.append(sample) return results
def parse(self, files): patients_data = [] for file in files: samples = [] with open(file, 'r') as data_file: for line in data_file.readlines(): splitted_line = line.split() if self._is_ignored_line(splitted_line, file): continue samples.append(DataSample(splitted_line)) patients_data.append(samples) return patients_data
def read_all_labeled_samples_by_story(): # key-story_id value-list[DataSample1, DataSample2,...] samples = {} sql = 'select * from data_samples where sentence != fps and fps != \'null \'' db_results = sqlHelper.select(sql) for result in db_results: story_id = result['story_id'] sentence = result['sentence'] fps = str(result['fps']).split() sample = DataSample(story_id, sentence, fps) if story_id in samples: samples[story_id].append(sample) else: samples[story_id] = [sample] return samples
def read_all_unlabeled_samples(): ''' 获取所有未标注的样本,形成初始数据 :return: ''' # final results results = [] sql = 'select * from unlabeled_data_samples_all ' \ 'where source =\'summary\' and team in ' \ '(select distinct project_id from FP.data_samples_with_project) ' db_results = sqlHelper.select(sql) if db_results is None: return results for result in db_results: story_id = result['story_id'] sentence = result['sentence'] sample = DataSample(story_id=story_id, sentence=sentence) results.append(sample) return results
def prepare_edf_to_txt(): ds = DataSample('00000068','s01_2012_02_09','00000068_s01_a00.edf') return ds.save_to_file()