Ejemplo n.º 1
0
    def default_setting(self):
        """
        A default setting for data loading
        :return:
        """
        unknown_as_zero = False  # using original labels, e.g., w.r.t. semi-supervised dataset
        binary_rele = False  # using original labels
        train_presort, validation_presort, test_presort = True, True, True
        train_batch_size, validation_batch_size, test_batch_size = 1, 1, 1
        scale_data, scaler_id, scaler_level = get_default_scaler_setting(
            data_id=self.data_id)

        # more data settings that are rarely changed
        self.data_dict = dict(data_id=self.data_id,
                              dir_data=self.dir_data,
                              min_docs=10,
                              min_rele=1,
                              scale_data=scale_data,
                              scaler_id=scaler_id,
                              scaler_level=scaler_level,
                              train_presort=train_presort,
                              validation_presort=validation_presort,
                              test_presort=test_presort,
                              train_batch_size=train_batch_size,
                              validation_batch_size=validation_batch_size,
                              test_batch_size=test_batch_size,
                              unknown_as_zero=unknown_as_zero,
                              binary_rele=binary_rele)

        data_meta = get_data_meta(data_id=self.data_id)  # add meta-information
        self.data_dict.update(data_meta)

        return self.data_dict
Ejemplo n.º 2
0
    def default_setting(self):
        """
		A default setting for data loading when performing adversarial ltr
		"""
        unknown_as_zero = False
        binary_rele = False  # using the original values
        train_presort, validation_presort, test_presort = True, True, True
        train_batch_size, validation_batch_size, test_batch_size = 1, 1, 1

        scale_data, scaler_id, scaler_level = get_default_scaler_setting(
            data_id=self.data_id)

        # more data settings that are rarely changed
        self.data_dict = dict(data_id=self.data_id,
                              dir_data=self.dir_data,
                              min_docs=10,
                              min_rele=1,
                              unknown_as_zero=unknown_as_zero,
                              binary_rele=binary_rele,
                              train_presort=train_presort,
                              validation_presort=validation_presort,
                              test_presort=test_presort,
                              train_batch_size=train_batch_size,
                              validation_batch_size=validation_batch_size,
                              test_batch_size=test_batch_size,
                              scale_data=scale_data,
                              scaler_id=scaler_id,
                              scaler_level=scaler_level)

        data_meta = get_data_meta(data_id=self.data_id)  # add meta-information
        self.data_dict.update(data_meta)

        return self.data_dict
    def default_setting(self):
        """
        A default setting for data loading when running lambdaMART
        :return:
        """
        unknown_as_zero = True if self.data_id in MSLETOR_SEMI else False  # since lambdaMART is a supervised method
        binary_rele = False  # using the original values
        presort = True  # this setting leads to no difference for lambdaMART, but it can be altered to reused buffered data

        scale_data, scaler_id, scaler_level = get_default_scaler_setting(
            data_id=self.data_id)

        # more data settings that are rarely changed
        self.data_dict = dict(data_id=self.data_id,
                              dir_data=self.dir_data,
                              min_docs=10,
                              min_rele=1,
                              sample_rankings_per_q=1,
                              unknown_as_zero=unknown_as_zero,
                              binary_rele=binary_rele,
                              presort=presort,
                              scale_data=scale_data,
                              scaler_id=scaler_id,
                              scaler_level=scaler_level)

        data_meta = get_data_meta(data_id=self.data_id)  # add meta-information
        self.data_dict.update(data_meta)

        return self.data_dict
Ejemplo n.º 4
0
    def default_setting(self):
        """
        A default setting for data loading when running lambdaMART
        """
        unknown_as_zero = True if self.data_id in MSLETOR_SEMI else False  # since lambdaMART is a supervised method
        binary_rele = False  # using the original values
        train_presort, validation_presort, test_presort = False, False, False
        train_batch_size, validation_batch_size, test_batch_size = 1, 1, 1

        scale_data, scaler_id, scaler_level = get_default_scaler_setting(
            data_id=self.data_id)

        # more data settings that are rarely changed
        self.data_dict = dict(data_id=self.data_id,
                              dir_data=self.dir_data,
                              min_docs=10,
                              min_rele=1,
                              unknown_as_zero=unknown_as_zero,
                              binary_rele=binary_rele,
                              train_presort=train_presort,
                              validation_presort=validation_presort,
                              test_presort=test_presort,
                              train_batch_size=train_batch_size,
                              validation_batch_size=validation_batch_size,
                              test_batch_size=test_batch_size,
                              scale_data=scale_data,
                              scaler_id=scaler_id,
                              scaler_level=scaler_level)

        data_meta = get_data_meta(data_id=self.data_id)  # add meta-information
        self.data_dict.update(data_meta)

        return self.data_dict
Ejemplo n.º 5
0
    def default_setting(self):
        """
        A default setting for data loading
        :return:
        """
        unknown_as_zero = True if self.data_id in MSLETOR_SEMI else False
        binary_rele = False  # using the original values
        presort = True  # a default setting

        scale_data, scaler_id, scaler_level = get_default_scaler_setting(
            data_id=self.data_id)

        # more data settings that are rarely changed
        self.data_dict = dict(data_id=self.data_id,
                              dir_data=self.dir_data,
                              min_docs=10,
                              min_rele=1,
                              sample_rankings_per_q=1,
                              unknown_as_zero=unknown_as_zero,
                              binary_rele=binary_rele,
                              presort=presort,
                              scale_data=scale_data,
                              scaler_id=scaler_id,
                              scaler_level=scaler_level)

        data_meta = get_data_meta(data_id=self.data_id)  # add meta-information
        self.data_dict.update(data_meta)

        return self.data_dict
Ejemplo n.º 6
0
	def grid_search(self):
		"""
		Iterator of settings for data loading when performing adversarial ltr
		:param debug:
		:param data_id:
		:param dir_data:
		:return:
		"""
		''' common settings without grid-search '''
		binary_rele, unknown_as_zero = False, False
		common_data_dict = dict(data_id=self.data_id, dir_data=self.dir_data, min_docs=10, min_rele=1,
								unknown_as_zero=unknown_as_zero, binary_rele=binary_rele)

		data_meta = get_data_meta(data_id=self.data_id)  # add meta-information
		common_data_dict.update(data_meta)

		''' some settings for grid-search '''
		choice_presort = [True] if self.debug else [True]
		choice_sample_rankings_per_q = [1] if self.debug else [1]  # number of sample rankings per query
		choice_scale_data, choice_scaler_id, choice_scaler_level = get_default_scaler_setting(data_id=self.data_id, grid_search=True)

		for scale_data, scaler_id, scaler_level, presort, sample_rankings_per_q in product(choice_scale_data,
																						   choice_scaler_id,
																						   choice_scaler_level,
																						   choice_presort,
																						   choice_sample_rankings_per_q):

			self.data_dict = dict(presort=presort, sample_rankings_per_q=sample_rankings_per_q,
								  scale_data=scale_data, scaler_id=scaler_id, scaler_level=scaler_level)
			self.data_dict.update(common_data_dict)
			yield self.data_dict
Ejemplo n.º 7
0
    def grid_search(self):
        if self.data_json is not None:  # using json file
            choice_presort = self.json_dict['presort']
            choice_min_docs = self.json_dict['min_docs']
            choice_min_rele = self.json_dict['min_rele']
            choice_binary_rele = self.json_dict['binary_rele']
            choice_unknown_as_zero = self.json_dict['unknown_as_zero']
            choice_sample_rankings_per_q = self.json_dict[
                'sample_rankings_per_q']
            base_data_dict = dict(data_id=self.data_id,
                                  dir_data=self.json_dict["dir_data"])
        else:
            choice_min_docs = [10]
            choice_min_rele = [1]
            choice_presort = [True]
            choice_binary_rele = [False]
            choice_unknown_as_zero = [True]
            choice_sample_rankings_per_q = [
                1
            ]  # number of sample rankings per query

            base_data_dict = dict(data_id=self.data_id, dir_data=self.dir_data)

        data_meta = get_data_meta(data_id=self.data_id)  # add meta-information
        base_data_dict.update(data_meta)

        choice_scale_data, choice_scaler_id, choice_scaler_level = get_default_scaler_setting(
            data_id=self.data_id, grid_search=True)

        for min_docs, min_rele, sample_rankings_per_q in product(
                choice_min_docs, choice_min_rele,
                choice_sample_rankings_per_q):
            threshold_dict = dict(min_docs=min_docs,
                                  min_rele=min_rele,
                                  sample_rankings_per_q=sample_rankings_per_q)

            for binary_rele, unknown_as_zero, presort in product(
                    choice_binary_rele, choice_unknown_as_zero,
                    choice_presort):
                custom_dict = dict(binary_rele=binary_rele,
                                   unknown_as_zero=unknown_as_zero,
                                   presort=presort)

                for scale_data, scaler_id, scaler_level in product(
                        choice_scale_data, choice_scaler_id,
                        choice_scaler_level):
                    scale_dict = dict(scale_data=scale_data,
                                      scaler_id=scaler_id,
                                      scaler_level=scaler_level)

                    self.data_dict = dict()
                    self.data_dict.update(base_data_dict)
                    self.data_dict.update(threshold_dict)
                    self.data_dict.update(custom_dict)
                    self.data_dict.update(scale_dict)
                    yield self.data_dict
Ejemplo n.º 8
0
	def load_setting(self):
		if self.use_json:
			choice_min_docs = self.json_dict['min_docs']
			choice_min_rele = self.json_dict['min_rele']
			choice_binary_rele = self.json_dict['binary_rele']
			choice_unknown_as_zero = self.json_dict['unknown_as_zero']
			choice_train_presort = self.json_dict['train_presort']
			choice_train_batch_size = self.json_dict['train_batch_size']
			# hard-coding for rarely changed settings
			base_data_dict = dict(data_id=self.data_id, dir_data=self.json_dict["dir_data"], test_presort=True,
								  validation_presort=True, validation_batch_size=1, test_batch_size=1)
		else:
			choice_min_docs = [10]
			choice_min_rele = [1]
			choice_binary_rele = [False]
			choice_unknown_as_zero = [False]
			choice_train_presort = [True]
			choice_train_batch_size = [1]  # number of sample rankings per query

			base_data_dict = dict(data_id=self.data_id, dir_data=self.dir_data, test_presort=True,
								  validation_presort=True, validation_batch_size=1, test_batch_size=1)

		data_meta = get_data_meta(data_id=self.data_id)  # add meta-information
		base_data_dict.update(data_meta)

		choice_scale_data, choice_scaler_id, choice_scaler_level = get_default_scaler_setting(data_id=self.data_id,
																							  grid_search=True)

		for min_docs, min_rele, train_batch_size in product(choice_min_docs, choice_min_rele, choice_train_batch_size):
			threshold_dict = dict(min_docs=min_docs, min_rele=min_rele, train_batch_size=train_batch_size)

			for binary_rele, unknown_as_zero, train_presort in product(choice_binary_rele, choice_unknown_as_zero,
																	   choice_train_presort):
				custom_dict = dict(binary_rele=binary_rele, unknown_as_zero=unknown_as_zero,
								   train_presort=train_presort)

				for scale_data, scaler_id, scaler_level in product(choice_scale_data, choice_scaler_id,
																   choice_scaler_level):
					scale_dict = dict(scale_data=scale_data, scaler_id=scaler_id, scaler_level=scaler_level)

					self.data_dict = dict()
					self.data_dict.update(base_data_dict)
					self.data_dict.update(threshold_dict)
					self.data_dict.update(custom_dict)
					self.data_dict.update(scale_dict)
					return self.data_dict
Ejemplo n.º 9
0
    def grid_search(self):
        """
		Iterator of settings for data loading when performing adversarial ltr
		"""
        if self.ad_data_json is not None:  # using json file
            choice_min_docs = self.json_dict['min_docs']
            choice_min_rele = self.json_dict['min_rele']
            choice_binary_rele = self.json_dict['binary_rele']
            choice_unknown_as_zero = self.json_dict['unknown_as_zero']
            choice_train_presort = self.json_dict['train_presort']
            choice_train_batch_size = self.json_dict['train_batch_size']
            base_data_dict = dict(data_id=self.data_id,
                                  dir_data=self.json_dict["dir_data"],
                                  test_presort=True,
                                  validation_presort=True,
                                  validation_batch_size=1,
                                  test_batch_size=1)
        else:
            choice_min_docs = [10]
            choice_min_rele = [1]
            choice_binary_rele = [False]
            choice_unknown_as_zero = [False]
            choice_train_presort = [True]
            choice_train_batch_size = [1
                                       ]  # number of sample rankings per query

            base_data_dict = dict(data_id=self.data_id,
                                  dir_data=self.dir_data,
                                  test_presort=True,
                                  validation_presort=True,
                                  validation_batch_size=1,
                                  test_batch_size=1)

        data_meta = get_data_meta(data_id=self.data_id)  # add meta-information
        base_data_dict.update(data_meta)

        choice_scale_data, choice_scaler_id, choice_scaler_level = get_default_scaler_setting(
            data_id=self.data_id, grid_search=True)

        for min_docs, min_rele, train_batch_size in product(
                choice_min_docs, choice_min_rele, choice_train_batch_size):
            threshold_dict = dict(min_docs=min_docs,
                                  min_rele=min_rele,
                                  train_batch_size=train_batch_size)

            for binary_rele, unknown_as_zero, train_presort in product(
                    choice_binary_rele, choice_unknown_as_zero,
                    choice_train_presort):
                custom_dict = dict(binary_rele=binary_rele,
                                   unknown_as_zero=unknown_as_zero,
                                   train_presort=train_presort)

                for scale_data, scaler_id, scaler_level in product(
                        choice_scale_data, choice_scaler_id,
                        choice_scaler_level):
                    scale_dict = dict(scale_data=scale_data,
                                      scaler_id=scaler_id,
                                      scaler_level=scaler_level)

                    self.data_dict = dict()
                    self.data_dict.update(base_data_dict)
                    self.data_dict.update(threshold_dict)
                    self.data_dict.update(custom_dict)
                    self.data_dict.update(scale_dict)
                    yield self.data_dict