Beispiel #1
0
    def __init__(self, variables, nEpoch, nFeatures, nLabels, nDomains,
                 nWeigts, ptReweight, signalDataSets, backgroundDataSets):
        self.nEpoch = nEpoch

        #Define input data queue
        self.inputDataQueue = tf.RandomShuffleQueue(
            capacity=65536 * 2,
            min_after_dequeue=65536 * 2 - 65536 / 2,
            shapes=[[nFeatures], [nLabels], [nDomains], [nWeigts]],
            dtypes=[tf.float32, tf.float32, tf.float32, tf.float32])

        #Add DataSample objects for each data set used
        self.sigScaleSum = 0
        for dataSet in signalDataSets:
            self.sigScaleSum += dataSet.xsec * dataSet.rescale * dataSet.kFactor / dataSet.Nevts

        batchSize = 65536 / 4
        self.sigDataSamples = []
        for dataSet in signalDataSets:
            self.sigDataSamples.append(
                DataSample(dataSet,
                           nEpoch,
                           batchSize,
                           variables,
                           self.inputDataQueue,
                           self.sigScaleSum,
                           signal=True,
                           background=False,
                           domain=dataSet.domain,
                           ptReweight=ptReweight))

        self.bgScaleSum = 0
        for dataSet in signalDataSets:
            self.bgScaleSum += dataSet.xsec * dataSet.rescale * dataSet.kFactor / dataSet.Nevts

        self.bgDataSamples = []
        for dataSet in backgroundDataSets:
            self.bgDataSamples.append(
                DataSample(dataSet,
                           nEpoch,
                           batchSize,
                           variables,
                           self.inputDataQueue,
                           self.bgScaleSum,
                           signal=False,
                           background=True,
                           domain=dataSet.domain,
                           ptReweight=ptReweight))
Beispiel #2
0
def read_all_labeled_samples():
    results = []

    sql = 'select * from data_samples'
    db_results = sqlHelper.select(sql)

    for result in db_results:
        story_id = result['story_id']
        sentence = result['sentence']
        fps = result['fps'].split()

        sample = DataSample(story_id, sentence, fps)
        results.append(sample)

    return results
    def parse(self, files):
        patients_data = []

        for file in files:
            samples = []

            with open(file, 'r') as data_file:
                for line in data_file.readlines():
                    splitted_line = line.split()

                    if self._is_ignored_line(splitted_line, file):
                        continue

                    samples.append(DataSample(splitted_line))
            patients_data.append(samples)

        return patients_data
Beispiel #4
0
def read_all_labeled_samples_by_story():
    # key-story_id   value-list[DataSample1, DataSample2,...]
    samples = {}

    sql = 'select * from data_samples  where  sentence != fps and fps != \'null \''
    db_results = sqlHelper.select(sql)

    for result in db_results:
        story_id = result['story_id']
        sentence = result['sentence']
        fps = str(result['fps']).split()

        sample = DataSample(story_id, sentence, fps)

        if story_id in samples:
            samples[story_id].append(sample)
        else:
            samples[story_id] = [sample]

    return samples
Beispiel #5
0
def read_all_unlabeled_samples():
    '''
    获取所有未标注的样本,形成初始数据
    :return:
    '''
    # final results
    results = []

    sql = 'select * from unlabeled_data_samples_all ' \
          'where source =\'summary\' and team in ' \
          '(select distinct project_id from FP.data_samples_with_project) '
    db_results = sqlHelper.select(sql)

    if db_results is None:
        return results

    for result in db_results:
        story_id = result['story_id']
        sentence = result['sentence']

        sample = DataSample(story_id=story_id, sentence=sentence)
        results.append(sample)

    return results
Beispiel #6
0
def prepare_edf_to_txt():
    ds = DataSample('00000068','s01_2012_02_09','00000068_s01_a00.edf')
    return ds.save_to_file()