def test_ndarray_to_instances(self):
        """
        Tests the ndarray_to_instances method.
        """
        x = numpy.array([[1, 2, 3], [4, 5, 6]], numpy.float64)
        inst = converters.ndarray_to_instances(x, "test")
        self.assertIsNotNone(inst, msg="Should not be None!")
        self.assertEqual("Att-1", inst.attribute(0).name, msg="Attribute name differs at #0")
        self.assertEqual(3, inst.num_attributes, msg="# of columns differ")
        self.assertEqual(2, inst.num_instances, msg="# of rows differ")
        self.assertEqual(1, inst.get_instance(0).get_value(0), msg="value differs at 0,0")

        x = numpy.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]], numpy.float64)
        inst = converters.ndarray_to_instances(x, "test", att_template="@-!")
        self.assertIsNotNone(inst, msg="Should not be None!")
        self.assertEqual("test-0", inst.attribute(0).name, msg="Attribute name differs at #0")
        self.assertEqual(3, inst.num_attributes, msg="# of columns differ")
        self.assertEqual(2, inst.num_instances, msg="# of rows differ")
        self.assertEqual(1.1, inst.get_instance(0).get_value(0), msg="value differs at 0,0")

        x = numpy.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]], numpy.float64)
        inst = converters.ndarray_to_instances(x, "test", att_list=["a", "b", "c"])
        self.assertIsNotNone(inst, msg="Should not be None!")
        self.assertEqual("a", inst.attribute(0).name, msg="Attribute name differs at #0")
        self.assertEqual(3, inst.num_attributes, msg="# of columns differ")
        self.assertEqual(2, inst.num_instances, msg="# of rows differ")
        self.assertEqual(1.1, inst.get_instance(0).get_value(0), msg="value differs at 0,0")
Example #2
0
    def exposed_evaluate(self, X, d, task, i_model, i_evl):
        data = np.reshape(eval(X), [d, -1], order='C')
        if task == 'regression':
            if i_model == 'LR':
                data = converters.ndarray_to_instances(data, relation='tmp')
                data.class_is_last()
                model = Classifier(
                    classname='weka.classifiers.functions.LinearRegression')
                evl = Evaluation(data)
                evl.crossvalidate_model(model, data, 5, Random(0))
            elif i_model == 'RF':
                data = converters.ndarray_to_instances(data, relation='tmp')
                data.class_is_last()
                model = Classifier(
                    classname='weka.classifiers.trees.RandomForest')
                evl = Evaluation(data)
                evl.crossvalidate_model(model, data, 5, Random(0))
            if i_evl == 'mae':
                r_mae = evl.mean_absolute_error
                return r_mae
            elif i_evl == 'mse':
                r_mae = evl.mean_square_error
                return r_mse
            elif i_evl == '1-rae':
                r_one_minus_rae = 1 - evl.relative_absolute_error / 100
                del evl, model, data
                return r_one_minus_rae

        elif task == 'classification':
            le = LabelEncoder()
            data[:, -1] = le.fit_transform(data[:, -1])
            if i_model == 'RF':
                dataRaw = converters.ndarray_to_instances(data, relation='tmp')
                weka_filter = Filter(
                    classname=
                    "weka.filters.unsupervised.attribute.NumericToNominal",
                    options=["-R", "last"])
                weka_filter.inputformat(dataRaw)
                data = weka_filter.filter(dataRaw)
                data.class_is_last()
                model = Classifier(
                    classname='weka.classifiers.trees.RandomForest')
                evl = Evaluation(data)
                evl.crossvalidate_model(model, data, 5, Random(0))
            elif i_model == 'LR':
                model = LogisticRegression(multi_class='ovr')
            elif i_model == 'SVM':
                model = svm.SVC()
            if i_evl == 'f_score':
                fscore = evl.weighted_f_measure
                del evl, model, data, dataRaw
                if not (fscore >= 0.01 and fscore < 1.01):
                    fscore = 0.01
                return fscore
Example #3
0
	def _sklearn2weka(self, features, labels=None):

		encoder = CategoricalEncoder(encoding='ordinal')
		labels_nominal = encoder.fit_transform(np.array(labels).reshape(-1, 1))

		if not hasattr(self, 'dict') and labels is not None:

			dict = {}

			for label, nominal in zip(labels, labels_nominal):
				if nominal.item(0) not in dict:
					dict[nominal.item(0)] = label

			self._dict = dict

		labels_column = np.reshape(labels_nominal,[labels_nominal.shape[0], 1])

		weka_dataset = ndarray_to_instances(np.ascontiguousarray(features, dtype=np.float_), 'weka_dataset')
		weka_dataset.insert_attribute(Attribute.create_nominal('tag', [str(float(i)) for i in range(len(self._dict))]), features.shape[1])

		if labels is not None:
			for index, inst in enumerate(weka_dataset):
				inst.set_value(features.shape[1], labels_column[index])
				weka_dataset.set_instance(index,inst)

		return weka_dataset
    def test_ndarray_to_instances(self):
        """
        Tests the ndarray_to_instances method.
        """
        x = numpy.array([[1, 2, 3], [4, 5, 6]], numpy.float64)
        inst = converters.ndarray_to_instances(x, "test")
        self.assertIsNotNone(inst, msg="Should not be None!")
        self.assertEqual("Att-1",
                         inst.attribute(0).name,
                         msg="Attribute name differs at #0")
        self.assertEqual(3, inst.num_attributes, msg="# of columns differ")
        self.assertEqual(2, inst.num_instances, msg="# of rows differ")
        self.assertEqual(1,
                         inst.get_instance(0).get_value(0),
                         msg="value differs at 0,0")

        x = numpy.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]], numpy.float64)
        inst = converters.ndarray_to_instances(x, "test", att_template="@-!")
        self.assertIsNotNone(inst, msg="Should not be None!")
        self.assertEqual("test-0",
                         inst.attribute(0).name,
                         msg="Attribute name differs at #0")
        self.assertEqual(3, inst.num_attributes, msg="# of columns differ")
        self.assertEqual(2, inst.num_instances, msg="# of rows differ")
        self.assertEqual(1.1,
                         inst.get_instance(0).get_value(0),
                         msg="value differs at 0,0")

        x = numpy.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]], numpy.float64)
        inst = converters.ndarray_to_instances(x,
                                               "test",
                                               att_list=["a", "b", "c"])
        self.assertIsNotNone(inst, msg="Should not be None!")
        self.assertEqual("a",
                         inst.attribute(0).name,
                         msg="Attribute name differs at #0")
        self.assertEqual(3, inst.num_attributes, msg="# of columns differ")
        self.assertEqual(2, inst.num_instances, msg="# of rows differ")
        self.assertEqual(1.1,
                         inst.get_instance(0).get_value(0),
                         msg="value differs at 0,0")
Example #5
0
def classify(train, test, name="RF", tuning=False):
    jvm.start()

    if isinstance(train, list) and isinstance(test, list):
        train = weka_instance(train)
        trn_data = converters.load_any_file(train)
        test = weka_instance(test)
        tst_data = converters.load_any_file(test)

    elif os.path.isfile(train) and os.path.isfile(test):
        trn_data = converters.load_any_file(train)
        tst_data = converters.load_any_file(test)

    else:
        trn = csv_as_ndarray(train)
        tst = csv_as_ndarray(test)

        trn_data = converters.ndarray_to_instances(trn, relation="Train")
        tst_data = converters.ndarray_to_instances(tst, relation="Test")

    trn_data.class_is_last()
    tst_data.class_is_last()

    # t = time()
    if tuning:
        opt = tune(train)
    else:
        opt = default_opt
    # print("Time to tune: {} seconds".format(time() - t))

    cls = Classifier(classname=classifiers[name.lower()], options=opt)

    cls.build_classifier(trn_data)

    distr = [cls.distribution_for_instance(inst)[1] for inst in tst_data]
    preds = [cls.classify_instance(inst) for inst in tst_data]

    jvm.stop()

    return preds, distr
Example #6
0
def SMOreg(obj):
    jvm.start(packages=True)

    # load model
    cls = Classifier(jobject=serialization.read("new_models/SMOreg.model"))
    input = np.array([[obj.screen_num_7, obj.show_num_7, obj.money_num_7, obj.audience_num_7, obj.director_effect,
                       obj.distributor_effect, obj.month, obj.nationality, obj.before_grade, obj.after_grade, obj.age,
                       obj.actor_effect, 0]])
    instance = ndarray_to_instances(input, relation="input")

    for index, inst in enumerate(instance):
        audience_num = cls.classify_instance(inst)

    jvm.stop()
    return audience_num
Example #7
0
    def _sklearn2weka(self, features, labels=None):
        # All weka datasets have to be a zero-based coding for the column of labels
        # We can use non-aligned labels for training and testing because the labels
        # in testing phase are only used to obtain performance, but not for preds.
        # We compute performance off-line.
        labels_encoder = OrdinalEncoder()
        labels_nominal = labels_encoder.fit_transform(np.array(labels).reshape(-1, 1))

        labels_column = np.reshape(labels_nominal, [labels_nominal.shape[0], 1])

        # TODO: find another way to do the same
        # The follow is used to assign the value of _dict only in training phase
        if not hasattr(self, '_dict') and labels is not None:

            dict = {}

            for label, nominal in zip(labels, labels_nominal):
                if nominal.item(0) not in dict:
                    dict[nominal.item(0)] = label

            self._dict = dict

        weka_dataset = ndarray_to_instances(np.ascontiguousarray(features, dtype=np.float_), 'weka_dataset')
        weka_dataset.insert_attribute(Attribute.create_nominal('tag', [str(float(i)) for i in range(len(self._dict))]),
                                      features.shape[1])

        if labels is not None:
            try:
                for index, inst in enumerate(weka_dataset):
                    inst.set_value(features.shape[1], labels_column[index])
                    weka_dataset.set_instance(index, inst)
            except TypeError as e:
                print('Error: it seems InstanceIterator does not implement a valid iterator.')
                print('Please, check the class definition in lib/python3.7/site-packages/weka/core/dataset.py.')
                print('This error could be due to the next() method: it should be declared as __next__().')
                exit()
        return weka_dataset