コード例 #1
0
    def __init__(self,
                 dataset,
                 estimators: [(str, BaseEstimator)],
                 logger=None,
                 voting='hard',
                 weights=None,
                 *args,
                 **kwargs):
        """
        Parameters
        ----------
        voting: str, {‘hard’, ‘soft’} (default=’hard’) 
            If ‘hard’, uses predicted class labels for majority rule voting. Else if ‘soft’, 
            predicts the class label based on the argmax of the sums of the predicted probabilities, 
            which is recommended for an ensemble of well-calibrated classifiers.
        weights: array-like, shape = [n_classifiers], optional (default=`None`)
            Sequence of weights (float or int) to weight the occurrences of predicted class labels (hard voting) 
            or class probabilities before averaging (soft voting). Uses uniform weights if None.
        """

        self.classifier = VotingClassifier(estimators,
                                           voting=voting,
                                           weights=weights)

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger)
コード例 #2
0
    def __init__(self,
                 dataset: "DataSet",
                 n_estimators=500,
                 max_leaf_nodes=16,
                 verbose=0,
                 model: RandomForestClassifier = None,
                 logger=None):
        self.scores = scores = [
            'recall_weighted', 'precision_micro', 'precision_weighted'
        ]
        self.tuned_parameters = {
            'criterion': ['gini', 'entropy'],
            'class_weight': ['balanced', None],
            'n_estimators': [10, 30, 50, 100, 150, 200],
            'max_depth': [2, 3, 4, 5, 6, 7, None],
            'bootstrap': [True, False]
        }
        if model == None:
            self.classifier = RandomForestClassifier(n_estimators=150,
                                                     max_depth=5,
                                                     criterion='gini',
                                                     class_weight=None,
                                                     bootstrap=True)
            # self.classifier = RandomForestClassifier(n_estimators=n_estimators, max_leaf_nodes=16, n_jobs=-1,
            #                                         verbose=verbose)
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)
コード例 #3
0
    def __init__(self,
                 dataset: "DataSet",
                 verbose=0,
                 model: SGDClassifier = None,
                 logger=None):
        self.scores = scores = ['precision_weighted']
        self.tuned_parameters_tf = {
            'vect__ngram_range': [(1, 1), (1, 2), (1, 3), (1, 4)],
            'tfidf__use_idf': [True, False],
            'tfidf__smooth_idf': [True, False],
            'tfidf__sublinear_tf': [True, False],
            'clf__loss': ['hinge', 'modified_huber'],
            'clf__penalty': ['none', 'l1', 'l2', 'elasticnet'],
            'clf__class_weight': ['balanced', None],
            'clf__fit_intercept': [True, False]
        }
        self.tuned_parameters = {
            'loss': ['hinge', 'modified_huber'],
            'penalty': ['none', 'l1', 'l2', 'elasticnet'],
            'class_weight': ['balanced', None],
            'fit_intercept': [True, False]
        }
        if model == None:
            self.classifier = SGDClassifier()
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)
コード例 #4
0
    def __init__(self, dataset: "DataSet", loss='squared_hinge', max_iter=1000, verbose=0, model=None,
                 logger: "Logger" = None):
        # self.scores = scores = ['recall_weighted', 'precision_weighted']
        self.scores = scores = ['precision_weighted']
        self.tuned_parameters = {'C': [1, 10, 100, 1000], 'gamma': [1e-3, 1e-4, 'auto'],
                                 'kernel': ['rbf', 'linear', 'poly'], 'class_weight': ['balanced', None],
                                 'degree': [3, 4, 5]}
        if model == None:
            self.classifier = SVC()
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)
コード例 #5
0
    def __init__(self, dataset: "DataSet", verbose=0, model: MultinomialNB = None, logger=None):
        self.scores = scores = ['precision_weighted']
        self.tuned_parameters = {'vect__ngram_range': [(1, 1), (1, 2), (1, 3), (1, 4)],
                                 'tfidf__use_idf': [True, False],
                                 'tfidf__smooth_idf': [True, False],
                                 'tfidf__sublinear_tf': [True, False],
                                 'clf__alpha': [1e-2, 1e-3, 1e-4],
                                 'clf__fit_prior': [True, False]}
        if model == None:
            self.classifier = MultinomialNB()
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)
コード例 #6
0
    def __init__(self,
                 dataset: "DataSet",
                 n_estimators=120,
                 verbose=0,
                 model=None,
                 logger: "Logger" = None):
        self.scores = scores = ['recall_weighted', 'precision_weighted']
        self.tuned_parameters = {
            'loss': ['deviance'],
            'learning_rate': [0.3, 0.1, 0.03, 0.01, 0.003, 0.001],
            'n_estimators': [10, 30, 50, 100, 150, 200],
            'max_depth': [2, 3, 4, 5, 6, 7, None]
        }
        if model == None:
            self.classifier = GradientBoostingClassifier(n_estimators=100,
                                                         max_depth=5,
                                                         loss='deviance',
                                                         learning_rate=0.1)
            # self.classifier = GradientBoostingClassifier(max_depth=2, n_estimators=n_estimators, verbose=verbose)
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)
コード例 #7
0
    def populate_features(self):
        tasks = Classifier.populate_features(self)

        # Add classifier tasks
        clf1 = LogisticRegression(solver='lbfgs',
                                  multi_class='multinomial',
                                  max_iter=100,
                                  random_state=1)
        clf2 = RandomForestClassifier(n_estimators=100, criterion='entropy')
        clf3 = SVC(kernel='linear', probability=True)

        tasks.append(('clf',
                      VotingClassifier(estimators=[('lr', clf1), ('rf', clf2),
                                                   ('svm', clf3)],
                                       voting='soft',
                                       weights=[4, 2, 5])))
        self.pipeline = Pipeline(tasks)
コード例 #8
0
ファイル: main.py プロジェクト: manisero/SemViii
if __name__ == "__main__":

    file_handler = FileHandler()

    configuration_provider = ConfigurationProvider(configuration_file)
    tree_browser = HTMLTreeBrowser()

    specification_registry = SpecificationRegistry(configuration_provider, tree_browser)
    content_downloader = WebPageContentDownloader()
    tree_builder = HTMLTreeBuilder()

    configuration_generator = ConfigurationGenerator(configuration_provider,
                                                     specification_registry,
                                                     content_downloader,
                                                     tree_builder)

    url_map = file_handler.get_url_map(configuration_provider.get_classified_input_file_name())

    configuration_generator.generate_configuration(url_map)

    url_map_to_classify = file_handler.get_url_map(configuration_provider.get_unclassified_input_file_name())

    classifier = Classifier(configuration_provider, specification_registry, content_downloader, tree_builder)

    classification = classifier.classify(url_map_to_classify.keys())

    file_handler.write_classification(configuration_provider.get_output_file_name(), classification)

    SummaryPrinter().print_summary(classification, url_map_to_classify)
コード例 #9
0
 def _classify(self):
     classifier = Classifier(self.preprocessed_contents,
                             max_pages=self.max_pages,
                             webpage_title=self._website.get_title())
     classifier.classify()
     self.webpage_topic_is_plural = classifier.is_webpage_topic_plural()
コード例 #10
0
 def __init__(self, path, train_df, test_df, features):
     Classifier.__init__(self, path, train_df, test_df, features)
コード例 #11
0
    mobileUrl = "http://sensing-ms-api.mybluemix.net/api/Patients/0/" \
                "mobileData?access_token=QCOI7AjXi7Is90f9hK0BQsOQuKxoU2ISnBa9HLt6Bmsg0nvQbOqPAbELCzTsl2ww"
    sensorUrl = "http://sensing-ms-api.mybluemix.net/api/Patients/0/" \
                "sensorData?access_token=QCOI7AjXi7Is90f9hK0BQsOQuKxoU2ISnBa9HLt6Bmsg0nvQbOqPAbELCzTsl2ww"
    featuresNames = ["heartrate", "temperature", "steps", "activity"]
    allFeaturesNames = [
        "heartrate_std", "heartrate_max", "heartrate_avg", "heartrate_min",
        "temperature_std", "temperature_max", "temperature_avg", "temperature_min",
        "steps", "activity_score", "activity_minutes"

    ]
    mobileData = getData(mobileUrl)
    sensorData = getData(sensorUrl)
    groups = groupData(mobileData, sensorData)
    features, labels = extractFeatures(groups, featuresNames)
    clf = Classifier()
    clf.train(features, labels)
    rules = clf.getDangerousRules(allFeaturesNames)
    print rules
    maxSeconds = 1
    seconds = maxSeconds
    previousSensorData = None
    while True:
        if seconds > 0:
            print "\nWaiting for "+str(seconds)+" seconds."
            sleep(1)
            seconds -= 1
        else:
            print "\nStarting again..."
            latestSensorData = getLatestSensorData(sensorUrl)
            if previousSensorData is None or latestSensorData["datetime"] != previousSensorData["datetime"]:
コード例 #12
0
from classification.classifier import Classifier
from classification.text_processor import TextProcessor
from preprocess.stemmer import Stemmer
from preprocess.tokenizer import Tokenizer

text_processor = TextProcessor(Stemmer(), Tokenizer())
classifier = Classifier(text_processor)

while True:
    user_input = input()
    print(classifier.classify(user_input))
コード例 #13
0
 def populate_features(self):
     tasks = Classifier.populate_features(self)
     tasks.append(('clf',
                   RandomForestClassifier(n_estimators=100,
                                          criterion='entropy')))
     self.pipeline = Pipeline(tasks)
コード例 #14
0
if __name__ == "__main__":
    mobileUrl = "http://sensing-ms-api.mybluemix.net/api/Patients/0/" \
                "mobileData?access_token=QCOI7AjXi7Is90f9hK0BQsOQuKxoU2ISnBa9HLt6Bmsg0nvQbOqPAbELCzTsl2ww"
    sensorUrl = "http://sensing-ms-api.mybluemix.net/api/Patients/0/" \
                "sensorData?access_token=QCOI7AjXi7Is90f9hK0BQsOQuKxoU2ISnBa9HLt6Bmsg0nvQbOqPAbELCzTsl2ww"
    featuresNames = ["heartrate", "temperature", "steps", "activity"]
    allFeaturesNames = [
        "heartrate_std", "heartrate_max", "heartrate_avg", "heartrate_min",
        "temperature_std", "temperature_max", "temperature_avg",
        "temperature_min", "steps", "activity_score", "activity_minutes"
    ]
    mobileData = getData(mobileUrl)
    sensorData = getData(sensorUrl)
    groups = groupData(mobileData, sensorData)
    features, labels = extractFeatures(groups, featuresNames)
    clf = Classifier()
    clf.train(features, labels)
    rules = clf.getDangerousRules(allFeaturesNames)
    print rules
    maxSeconds = 1
    seconds = maxSeconds
    previousSensorData = None
    while True:
        if seconds > 0:
            print "\nWaiting for " + str(seconds) + " seconds."
            sleep(1)
            seconds -= 1
        else:
            print "\nStarting again..."
            latestSensorData = getLatestSensorData(sensorUrl)
            if previousSensorData is None or latestSensorData[
コード例 #15
0
from statistics.analyzer import Analyzer
from classification.classifier import Classifier

file_path = 'C:\\Users\Roman\\Documents\\lab02\\statistics\\default_data.csv'

analyzer = Analyzer(file_path)

analyzer.execute()

statistics_dictionary = analyzer.read_csv_table('statistics.csv')
freq_statistics = analyzer.read_csv_table('statistics_dictionary.csv')
statistics_unnormal = analyzer.read_csv_table(
    'statistics_dictionary_unnormal_probability.csv')
classifier = Classifier(statistics_dictionary, statistics_unnormal,
                        freq_statistics).calc_normal_probability()
コード例 #16
0
        for i in range(len(hp.lr_scheduler_step_size)):
            hp.lr_scheduler_step_size[i] = int(hp.lr_scheduler_step_size[i])
    elif hp.optimizer == 'adam' or hp.optimizer == 'sgdr' or hp.optimizer == 'adamw':
        hp.lr_scheduler_step_size = int(hp.lr_scheduler_step_size)

    hp.manual_seed = random.randint(1, 10000)  # fix seed
    random.seed(hp.manual_seed)
    np.random.seed(hp.manual_seed)
    torch.manual_seed(hp.manual_seed)

    if hp.name == 'modelnet':
        hp.num_class = 40
        if not os.path.exists("modelnet_log"):
            os.makedirs("modelnet_log")
        hp.name = os.path.join("modelnet_log", hp.name + hp.index)
        trainer = Classifier(hp)
    elif hp.name == 'modelnet10':
        hp.num_class = 10
        if not os.path.exists("modelnet10_log"):
            os.makedirs("modelnet10_log")
        hp.name = os.path.join("modelnet10_log", hp.name + hp.index)
        trainer = Classifier10(hp)
    elif hp.name == 'part_shapenet':
        if not os.path.exists("part_shapenet_log"):
            os.makedirs("part_shapenet_log")
        hp.name = os.path.join("part_shapenet_log", hp.name + hp.index)
        trainer = PartSegmentor(hp)
    elif hp.name == 's3dis':
        hp.num_class = 13
        if not os.path.exists("s3dis_log"):
            os.makedirs("s3dis_log")
コード例 #17
0
 def populate_features(self):
     tasks = Classifier.populate_features(self)
     tasks.append(('clf', LinearSVC()))
     self.pipeline = Pipeline(tasks)