def default_setting(self): """ A default setting for data loading :return: """ unknown_as_zero = False # using original labels, e.g., w.r.t. semi-supervised dataset binary_rele = False # using original labels train_presort, validation_presort, test_presort = True, True, True train_batch_size, validation_batch_size, test_batch_size = 1, 1, 1 scale_data, scaler_id, scaler_level = get_default_scaler_setting( data_id=self.data_id) # more data settings that are rarely changed self.data_dict = dict(data_id=self.data_id, dir_data=self.dir_data, min_docs=10, min_rele=1, scale_data=scale_data, scaler_id=scaler_id, scaler_level=scaler_level, train_presort=train_presort, validation_presort=validation_presort, test_presort=test_presort, train_batch_size=train_batch_size, validation_batch_size=validation_batch_size, test_batch_size=test_batch_size, unknown_as_zero=unknown_as_zero, binary_rele=binary_rele) data_meta = get_data_meta(data_id=self.data_id) # add meta-information self.data_dict.update(data_meta) return self.data_dict
def default_setting(self): """ A default setting for data loading when performing adversarial ltr """ unknown_as_zero = False binary_rele = False # using the original values train_presort, validation_presort, test_presort = True, True, True train_batch_size, validation_batch_size, test_batch_size = 1, 1, 1 scale_data, scaler_id, scaler_level = get_default_scaler_setting( data_id=self.data_id) # more data settings that are rarely changed self.data_dict = dict(data_id=self.data_id, dir_data=self.dir_data, min_docs=10, min_rele=1, unknown_as_zero=unknown_as_zero, binary_rele=binary_rele, train_presort=train_presort, validation_presort=validation_presort, test_presort=test_presort, train_batch_size=train_batch_size, validation_batch_size=validation_batch_size, test_batch_size=test_batch_size, scale_data=scale_data, scaler_id=scaler_id, scaler_level=scaler_level) data_meta = get_data_meta(data_id=self.data_id) # add meta-information self.data_dict.update(data_meta) return self.data_dict
def default_setting(self): """ A default setting for data loading when running lambdaMART :return: """ unknown_as_zero = True if self.data_id in MSLETOR_SEMI else False # since lambdaMART is a supervised method binary_rele = False # using the original values presort = True # this setting leads to no difference for lambdaMART, but it can be altered to reused buffered data scale_data, scaler_id, scaler_level = get_default_scaler_setting( data_id=self.data_id) # more data settings that are rarely changed self.data_dict = dict(data_id=self.data_id, dir_data=self.dir_data, min_docs=10, min_rele=1, sample_rankings_per_q=1, unknown_as_zero=unknown_as_zero, binary_rele=binary_rele, presort=presort, scale_data=scale_data, scaler_id=scaler_id, scaler_level=scaler_level) data_meta = get_data_meta(data_id=self.data_id) # add meta-information self.data_dict.update(data_meta) return self.data_dict
def default_setting(self): """ A default setting for data loading when running lambdaMART """ unknown_as_zero = True if self.data_id in MSLETOR_SEMI else False # since lambdaMART is a supervised method binary_rele = False # using the original values train_presort, validation_presort, test_presort = False, False, False train_batch_size, validation_batch_size, test_batch_size = 1, 1, 1 scale_data, scaler_id, scaler_level = get_default_scaler_setting( data_id=self.data_id) # more data settings that are rarely changed self.data_dict = dict(data_id=self.data_id, dir_data=self.dir_data, min_docs=10, min_rele=1, unknown_as_zero=unknown_as_zero, binary_rele=binary_rele, train_presort=train_presort, validation_presort=validation_presort, test_presort=test_presort, train_batch_size=train_batch_size, validation_batch_size=validation_batch_size, test_batch_size=test_batch_size, scale_data=scale_data, scaler_id=scaler_id, scaler_level=scaler_level) data_meta = get_data_meta(data_id=self.data_id) # add meta-information self.data_dict.update(data_meta) return self.data_dict
def default_setting(self): """ A default setting for data loading :return: """ unknown_as_zero = True if self.data_id in MSLETOR_SEMI else False binary_rele = False # using the original values presort = True # a default setting scale_data, scaler_id, scaler_level = get_default_scaler_setting( data_id=self.data_id) # more data settings that are rarely changed self.data_dict = dict(data_id=self.data_id, dir_data=self.dir_data, min_docs=10, min_rele=1, sample_rankings_per_q=1, unknown_as_zero=unknown_as_zero, binary_rele=binary_rele, presort=presort, scale_data=scale_data, scaler_id=scaler_id, scaler_level=scaler_level) data_meta = get_data_meta(data_id=self.data_id) # add meta-information self.data_dict.update(data_meta) return self.data_dict
def grid_search(self): """ Iterator of settings for data loading when performing adversarial ltr :param debug: :param data_id: :param dir_data: :return: """ ''' common settings without grid-search ''' binary_rele, unknown_as_zero = False, False common_data_dict = dict(data_id=self.data_id, dir_data=self.dir_data, min_docs=10, min_rele=1, unknown_as_zero=unknown_as_zero, binary_rele=binary_rele) data_meta = get_data_meta(data_id=self.data_id) # add meta-information common_data_dict.update(data_meta) ''' some settings for grid-search ''' choice_presort = [True] if self.debug else [True] choice_sample_rankings_per_q = [1] if self.debug else [1] # number of sample rankings per query choice_scale_data, choice_scaler_id, choice_scaler_level = get_default_scaler_setting(data_id=self.data_id, grid_search=True) for scale_data, scaler_id, scaler_level, presort, sample_rankings_per_q in product(choice_scale_data, choice_scaler_id, choice_scaler_level, choice_presort, choice_sample_rankings_per_q): self.data_dict = dict(presort=presort, sample_rankings_per_q=sample_rankings_per_q, scale_data=scale_data, scaler_id=scaler_id, scaler_level=scaler_level) self.data_dict.update(common_data_dict) yield self.data_dict
def grid_search(self): if self.data_json is not None: # using json file choice_presort = self.json_dict['presort'] choice_min_docs = self.json_dict['min_docs'] choice_min_rele = self.json_dict['min_rele'] choice_binary_rele = self.json_dict['binary_rele'] choice_unknown_as_zero = self.json_dict['unknown_as_zero'] choice_sample_rankings_per_q = self.json_dict[ 'sample_rankings_per_q'] base_data_dict = dict(data_id=self.data_id, dir_data=self.json_dict["dir_data"]) else: choice_min_docs = [10] choice_min_rele = [1] choice_presort = [True] choice_binary_rele = [False] choice_unknown_as_zero = [True] choice_sample_rankings_per_q = [ 1 ] # number of sample rankings per query base_data_dict = dict(data_id=self.data_id, dir_data=self.dir_data) data_meta = get_data_meta(data_id=self.data_id) # add meta-information base_data_dict.update(data_meta) choice_scale_data, choice_scaler_id, choice_scaler_level = get_default_scaler_setting( data_id=self.data_id, grid_search=True) for min_docs, min_rele, sample_rankings_per_q in product( choice_min_docs, choice_min_rele, choice_sample_rankings_per_q): threshold_dict = dict(min_docs=min_docs, min_rele=min_rele, sample_rankings_per_q=sample_rankings_per_q) for binary_rele, unknown_as_zero, presort in product( choice_binary_rele, choice_unknown_as_zero, choice_presort): custom_dict = dict(binary_rele=binary_rele, unknown_as_zero=unknown_as_zero, presort=presort) for scale_data, scaler_id, scaler_level in product( choice_scale_data, choice_scaler_id, choice_scaler_level): scale_dict = dict(scale_data=scale_data, scaler_id=scaler_id, scaler_level=scaler_level) self.data_dict = dict() self.data_dict.update(base_data_dict) self.data_dict.update(threshold_dict) self.data_dict.update(custom_dict) self.data_dict.update(scale_dict) yield self.data_dict
def load_setting(self): if self.use_json: choice_min_docs = self.json_dict['min_docs'] choice_min_rele = self.json_dict['min_rele'] choice_binary_rele = self.json_dict['binary_rele'] choice_unknown_as_zero = self.json_dict['unknown_as_zero'] choice_train_presort = self.json_dict['train_presort'] choice_train_batch_size = self.json_dict['train_batch_size'] # hard-coding for rarely changed settings base_data_dict = dict(data_id=self.data_id, dir_data=self.json_dict["dir_data"], test_presort=True, validation_presort=True, validation_batch_size=1, test_batch_size=1) else: choice_min_docs = [10] choice_min_rele = [1] choice_binary_rele = [False] choice_unknown_as_zero = [False] choice_train_presort = [True] choice_train_batch_size = [1] # number of sample rankings per query base_data_dict = dict(data_id=self.data_id, dir_data=self.dir_data, test_presort=True, validation_presort=True, validation_batch_size=1, test_batch_size=1) data_meta = get_data_meta(data_id=self.data_id) # add meta-information base_data_dict.update(data_meta) choice_scale_data, choice_scaler_id, choice_scaler_level = get_default_scaler_setting(data_id=self.data_id, grid_search=True) for min_docs, min_rele, train_batch_size in product(choice_min_docs, choice_min_rele, choice_train_batch_size): threshold_dict = dict(min_docs=min_docs, min_rele=min_rele, train_batch_size=train_batch_size) for binary_rele, unknown_as_zero, train_presort in product(choice_binary_rele, choice_unknown_as_zero, choice_train_presort): custom_dict = dict(binary_rele=binary_rele, unknown_as_zero=unknown_as_zero, train_presort=train_presort) for scale_data, scaler_id, scaler_level in product(choice_scale_data, choice_scaler_id, choice_scaler_level): scale_dict = dict(scale_data=scale_data, scaler_id=scaler_id, scaler_level=scaler_level) self.data_dict = dict() self.data_dict.update(base_data_dict) self.data_dict.update(threshold_dict) self.data_dict.update(custom_dict) self.data_dict.update(scale_dict) return self.data_dict
def grid_search(self): """ Iterator of settings for data loading when performing adversarial ltr """ if self.ad_data_json is not None: # using json file choice_min_docs = self.json_dict['min_docs'] choice_min_rele = self.json_dict['min_rele'] choice_binary_rele = self.json_dict['binary_rele'] choice_unknown_as_zero = self.json_dict['unknown_as_zero'] choice_train_presort = self.json_dict['train_presort'] choice_train_batch_size = self.json_dict['train_batch_size'] base_data_dict = dict(data_id=self.data_id, dir_data=self.json_dict["dir_data"], test_presort=True, validation_presort=True, validation_batch_size=1, test_batch_size=1) else: choice_min_docs = [10] choice_min_rele = [1] choice_binary_rele = [False] choice_unknown_as_zero = [False] choice_train_presort = [True] choice_train_batch_size = [1 ] # number of sample rankings per query base_data_dict = dict(data_id=self.data_id, dir_data=self.dir_data, test_presort=True, validation_presort=True, validation_batch_size=1, test_batch_size=1) data_meta = get_data_meta(data_id=self.data_id) # add meta-information base_data_dict.update(data_meta) choice_scale_data, choice_scaler_id, choice_scaler_level = get_default_scaler_setting( data_id=self.data_id, grid_search=True) for min_docs, min_rele, train_batch_size in product( choice_min_docs, choice_min_rele, choice_train_batch_size): threshold_dict = dict(min_docs=min_docs, min_rele=min_rele, train_batch_size=train_batch_size) for binary_rele, unknown_as_zero, train_presort in product( choice_binary_rele, choice_unknown_as_zero, choice_train_presort): custom_dict = dict(binary_rele=binary_rele, unknown_as_zero=unknown_as_zero, train_presort=train_presort) for scale_data, scaler_id, scaler_level in product( choice_scale_data, choice_scaler_id, choice_scaler_level): scale_dict = dict(scale_data=scale_data, scaler_id=scaler_id, scaler_level=scaler_level) self.data_dict = dict() self.data_dict.update(base_data_dict) self.data_dict.update(threshold_dict) self.data_dict.update(custom_dict) self.data_dict.update(scale_dict) yield self.data_dict