コード例 #1
0
class ScrabbleInterface(FrameworkInterface):
    """docstring for ScrabbleInterface"""
    def __init__(
        self,
        target_building,
        exp_id='none',
        conf={
            'source_buildings': ['ebu3b'],
            'source_samples_list': [5],
            'logger_postfix': 'temp',
            'seed_num': 5
        }):
        super(ScrabbleInterface, self).__init__(conf, exp_id, 'scrabble')
        self.target_building = target_building
        self.source_buildings = conf['source_buildings']
        self.sample_num_list = conf['source_samples_list']
        self.seed_num = conf['seed_num']
        if self.target_building not in self.source_buildings:
            self.source_buildings = self.source_buildings + [
                self.target_building
            ]
            self.sample_num_list = self.sample_num_list + [self.seed_num]
        conf['use_cluster_flag'] = True
        conf['use_brick_flag'] = True
        conf['negative_flag'] = True
        self.logger_postfix = conf['logger_postfix']

        column_names = ['VendorGivenName', 'BACnetName', 'BACnetDescription']

        self.building_sentence_dict = dict()
        self.building_label_dict = dict()
        self.building_tagsets_dict = dict()
        for building in self.source_buildings:
            true_tagsets = {}
            label_dict = {}
            for labeled in LabeledMetadata.objects(building=building):
                srcid = labeled.srcid
                true_tagsets[srcid] = labeled.tagsets
                fullparsing = None
                for clm in column_names:
                    one_fullparsing = [i[1] for i in labeled.fullparsing[clm]]
                    if not fullparsing:
                        fullparsing = one_fullparsing
                    else:
                        fullparsing += ['O'] + one_fullparsing
                        #  This format is alinged with the sentence
                        #  conformation rule.
                label_dict[srcid] = fullparsing

            self.building_tagsets_dict[building] = true_tagsets
            self.building_label_dict[building] = label_dict
            sentence_dict = dict()
            for raw_point in RawMetadata.objects(building=building):
                srcid = raw_point.srcid
                if srcid in true_tagsets:
                    metadata = raw_point['metadata']
                    sentence = None
                    for clm in column_names:
                        if not sentence:
                            sentence = [c for c in metadata[clm].lower()]
                        else:
                            sentence += ['\n'] + \
                                        [c for c in metadata[clm].lower()]
                    sentence_dict[srcid] = sentence
            self.building_sentence_dict[building] = sentence_dict

        # Validation of the dataset
        for building in self.source_buildings:
            for srcid, label_pairs in self.building_label_dict[building]\
                                          .items():
                assert len(label_pairs) == \
                           len(self.building_sentence_dict[building][srcid])

        self.scrabble = Scrabble(self.source_buildings, self.target_building,
                                 self.sample_num_list,
                                 self.building_sentence_dict,
                                 self.building_label_dict,
                                 self.building_tagsets_dict, conf)

    @exec_measurement
    def learn_auto(self, iter_num=1):
        params = (self.source_buildings, self.sample_num_list,
                  self.target_building)
        self.learned_srcids = []
        params = {
            'use_cluster_flag': True,
            'use_brick_flag': True,
            'negative_flag': True,
            'target_building': self.target_building,
            'building_list': self.source_buildings,
            'sample_num_list': self.scrabble.sample_num_list
        }
        #self.scrabble.char2tagset_iteration(iter_num, self.logger_postfix, *params)
        step_data = {
            'iter_num':
            0,
            'next_learning_srcids':
            self.scrabble.get_random_srcids(self.scrabble.building_srcid_dict,
                                            self.source_buildings,
                                            self.sample_num_list),
            'model_uuid':
            None
        }
        step_datas = [step_data]
        step_datas.append(
            self.scrabble.char2tagset_onestep(step_data, **params))
        pdb.set_trace()

    @exec_measurement
    def learn_auto2(self, iter_num=1):
        num_sensors_in_gray = 10000
        while num_sensors_in_gray > 0:
            new_srcids = self.zodiac.select_informative_samples_only(10)
            self.update_model(new_srcids)
            num_sensors_in_gray = self.zodiac.get_num_sensors_in_gray()
            pred_point_tagsets = self.zodiac.predict(self.target_srcids)
            for i, srcid in enumerate(self.target_srcids):
                self.pred['tagsets'][srcid] = set([pred_point_tagsets[i]])
            print(num_sensors_in_gray)
            self.evaluate()
        pdb.set_trace()
コード例 #2
0
class ScrabbleInterface(Inferencer):
    """docstring for ScrabbleInterface"""
    def __init__(self,
                 target_building,
                 target_srcids,
                 source_buildings,
                 config=None
                 ):
        super(ScrabbleInterface, self).__init__(
            target_building=target_building,
            target_srcids=target_srcids,
            source_buildings=source_buildings,
            config=config,
            framework_name='scrabble')

        if not config:
            config = {}

        # Prepare config for Scrabble object
        if 'sample_num_list' in config:
            sample_num_list = config['sample_num_list']
        else:
            sample_num_list = [0] * (len(source_buildings) + 1) # +1 for target

        if self.target_building not in self.source_buildings:
            self.source_buildings = self.source_buildings + [self.target_building]
        if len(self.source_buildings) > len(sample_num_list):
            sample_num_list.append(0)
        if 'use_cluster_flag' not in config:
            config['use_cluster_flag'] = True
        if 'use_brick_flag' not in config:
            config['use_brick_flag'] = True
        if 'negative_flag' not in config:
            config['negative_flag'] = True

        column_names = ['VendorGivenName',
                         'BACnetName',
                         'BACnetDescription']

        self.building_sentence_dict = dict()
        self.building_label_dict = dict()
        self.building_tagsets_dict = dict()
        for building in self.source_buildings:
            true_tagsets = {}
            label_dict = {}
            for labeled in self.query_labels(building=building):
                srcid = labeled.srcid
                true_tagsets[srcid] = labeled.tagsets
                fullparsing = None
                for clm in column_names:
                    one_fullparsing = [i[1] for i in labeled.fullparsing[clm]]
                    if not fullparsing:
                        fullparsing = one_fullparsing
                    else:
                        fullparsing += ['O'] + one_fullparsing
                        #  This format is alinged with the sentence 
                        #  conformation rule.
                label_dict[srcid] = fullparsing

            self.building_tagsets_dict[building] = true_tagsets
            self.building_label_dict[building] = label_dict
            sentence_dict = dict()
            for raw_point in RawMetadata.objects(building=building):
                srcid = raw_point.srcid
                if srcid in true_tagsets:
                    metadata = raw_point['metadata']
                    sentence = None
                    for clm in column_names:
                        if not sentence:
                            sentence = [c for c in metadata[clm].lower()]
                        else:
                            sentence += ['\n'] + \
                                        [c for c in metadata[clm].lower()]
                    sentence_dict[srcid]  = sentence
            self.building_sentence_dict[building] = sentence_dict

        # Validation of the dataset
        for building in self.source_buildings:
            for srcid, label_pairs in self.building_label_dict[building]\
                                          .items():
                assert len(label_pairs) == \
                           len(self.building_sentence_dict[building][srcid])

        self.scrabble = Scrabble(
            target_building=self.target_building,
            target_srcids=self.target_srcids,
            building_label_dict=self.building_label_dict,
            building_sentence_dict=self.building_sentence_dict,
            building_tagsets_dict=self.building_tagsets_dict,
            source_buildings=self.source_buildings,
            source_sample_num_list=sample_num_list,
            conf=config,
            learning_srcids=[])

    def learn_auto(self, iter_num=1):
        params = (self.source_buildings,
                  self.sample_num_list,
                  self.target_building)
        self.learned_srcids = []
        params = {
            'use_cluster_flag': True,
            'use_brick_flag': True,
            'negative_flag': True,
            'target_building': self.target_building,
            'building_list': self.source_buildings,
            'sample_num_list': self.scrabble.sample_num_list
            }
        #self.scrabble.char2tagset_iteration(iter_num, self.logger_postfix, *params)
        step_data = {'iter_num':0,
                     'next_learning_srcids': self.scrabble.get_random_srcids(
                                            self.scrabble.building_srcid_dict,
                                            self.source_buildings,
                                            self.sample_num_list),
                     'model_uuid': None}
        step_datas = [step_data]
        step_datas.append(self.scrabble.char2tagset_onestep(step_data,
                                                            **params))

    def update_model(self, srcids):
        self.scrabble.update_model(srcids)

    def predict(self, target_srcids=None):
        return self.scrabble.predict(target_srcids)

    def predict_proba(self, target_srcids=None):
        return self.scrabble.predict_proba(target_srcids)

    def select_informative_samples(self, sample_num=10):
        return self.scrabble.select_informative_samples_only(sample_num)