예제 #1
0
        def _train(train_raw):
            models = {}
            all_docs = {}
            for topic in train_raw:
                x_train = []
                y_train = []
                for inst in train_raw[topic]:

                    feature_vector = [
                        ext.extract(inst[0], inst[1])
                        for ext in self.extractors
                    ]
                    x_train.append(feature_vector)
                    y_train.append(inst[2])
                svm = Supervised(self.args, self.opts)
                with open(
                        constants.get_path()['tmp'] +
                        '/ltr-features-%s' % topic, 'wb') as mf:
                    json.dump({
                        'x_train': x_train,
                        'y_train': y_train
                    },
                              mf,
                              indent=2)
                svm.train(x_train, y_train)
                models[topic.lower()] = svm
                all_docs[topic] = [inst[1] for inst in train_raw[topic]]
                return models, all_docs
예제 #2
0
        def _train(train_raw):
            models = {}
            all_docs = {}
            for topic in train_raw:
                x_train = []
                y_train = []
                for inst in train_raw[topic]:

                    feature_vector = [
                        ext.extract(inst[0], inst[1])
                        for ext in self.extractors
                    ]
                    x_train.append(feature_vector)
                    y_train.append(inst[2])
            svm = Supervised(self.args, self.opts)
            if not os.path.exists(constants.get_path()['tmp'] +
                                  '/ltr-features-all'):
                with open(constants.get_path()['tmp'] + '/ltr-features-all',
                          'wb') as mf:
                    json.dump({
                        'x_train': x_train,
                        'y_train': y_train
                    },
                              mf,
                              indent=2)
            svm.train(x_train, y_train)
            all_docs = [
                inst[1] for topc in train_raw for inst in train_raw[topc]
            ]
            return svm, all_docs
예제 #3
0
        def _train(train_raw):
            models = {}
            all_docs = {}
            for topic in train_raw:
                x_train = []
                y_train = []
                for inst in train_raw[topic]:

                    feature_vector = [
                        ext.extract(inst[0], inst[1]) for ext in self.extractors]
                    x_train.append(feature_vector)
                    y_train.append(inst[2])
                svm = Supervised(self.args, self.opts)
                with open(constants.get_path()['tmp'] + '/ltr-features-%s' % topic, 'wb') as mf:
                    json.dump(
                        {'x_train': x_train, 'y_train': y_train}, mf, indent=2)
                svm.train(x_train, y_train)
                models[topic.lower()] = svm
                all_docs[topic] = [inst[1] for inst in train_raw[topic]]
                return models, all_docs
예제 #4
0
        def _train(train_raw):
            models = {}
            all_docs = {}
            for topic in train_raw:
                x_train = []
                y_train = []
                for inst in train_raw[topic]:

                    feature_vector = [
                        ext.extract(inst[0], inst[1]) for ext in self.extractors]
                    x_train.append(feature_vector)
                    y_train.append(inst[2])
            svm = Supervised(self.args, self.opts)
            if not os.path.exists(constants.get_path()['tmp'] + '/ltr-features-all'):
                with open(constants.get_path()['tmp'] + '/ltr-features-all', 'wb') as mf:
                    json.dump(
                        {'x_train': x_train, 'y_train': y_train}, mf, indent=2)
            svm.train(x_train, y_train)
            all_docs = [inst[1]
                        for topc in train_raw for inst in train_raw[topc]]
            return svm, all_docs