Ejemplo n.º 1
0
    def __init__(self,
                 dataset,
                 estimators: [(str, BaseEstimator)],
                 logger=None,
                 voting='hard',
                 weights=None,
                 *args,
                 **kwargs):
        """
        Parameters
        ----------
        voting: str, {‘hard’, ‘soft’} (default=’hard’) 
            If ‘hard’, uses predicted class labels for majority rule voting. Else if ‘soft’, 
            predicts the class label based on the argmax of the sums of the predicted probabilities, 
            which is recommended for an ensemble of well-calibrated classifiers.
        weights: array-like, shape = [n_classifiers], optional (default=`None`)
            Sequence of weights (float or int) to weight the occurrences of predicted class labels (hard voting) 
            or class probabilities before averaging (soft voting). Uses uniform weights if None.
        """

        self.classifier = VotingClassifier(estimators,
                                           voting=voting,
                                           weights=weights)

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger)
Ejemplo n.º 2
0
    def __init__(self,
                 dataset: "DataSet",
                 n_estimators=500,
                 max_leaf_nodes=16,
                 verbose=0,
                 model: RandomForestClassifier = None,
                 logger=None):
        self.scores = scores = [
            'recall_weighted', 'precision_micro', 'precision_weighted'
        ]
        self.tuned_parameters = {
            'criterion': ['gini', 'entropy'],
            'class_weight': ['balanced', None],
            'n_estimators': [10, 30, 50, 100, 150, 200],
            'max_depth': [2, 3, 4, 5, 6, 7, None],
            'bootstrap': [True, False]
        }
        if model == None:
            self.classifier = RandomForestClassifier(n_estimators=150,
                                                     max_depth=5,
                                                     criterion='gini',
                                                     class_weight=None,
                                                     bootstrap=True)
            # self.classifier = RandomForestClassifier(n_estimators=n_estimators, max_leaf_nodes=16, n_jobs=-1,
            #                                         verbose=verbose)
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)
Ejemplo n.º 3
0
    def __init__(self,
                 dataset: "DataSet",
                 verbose=0,
                 model: SGDClassifier = None,
                 logger=None):
        self.scores = scores = ['precision_weighted']
        self.tuned_parameters_tf = {
            'vect__ngram_range': [(1, 1), (1, 2), (1, 3), (1, 4)],
            'tfidf__use_idf': [True, False],
            'tfidf__smooth_idf': [True, False],
            'tfidf__sublinear_tf': [True, False],
            'clf__loss': ['hinge', 'modified_huber'],
            'clf__penalty': ['none', 'l1', 'l2', 'elasticnet'],
            'clf__class_weight': ['balanced', None],
            'clf__fit_intercept': [True, False]
        }
        self.tuned_parameters = {
            'loss': ['hinge', 'modified_huber'],
            'penalty': ['none', 'l1', 'l2', 'elasticnet'],
            'class_weight': ['balanced', None],
            'fit_intercept': [True, False]
        }
        if model == None:
            self.classifier = SGDClassifier()
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)
Ejemplo n.º 4
0
    def __init__(self, dataset: "DataSet", loss='squared_hinge', max_iter=1000, verbose=0, model=None,
                 logger: "Logger" = None):
        # self.scores = scores = ['recall_weighted', 'precision_weighted']
        self.scores = scores = ['precision_weighted']
        self.tuned_parameters = {'C': [1, 10, 100, 1000], 'gamma': [1e-3, 1e-4, 'auto'],
                                 'kernel': ['rbf', 'linear', 'poly'], 'class_weight': ['balanced', None],
                                 'degree': [3, 4, 5]}
        if model == None:
            self.classifier = SVC()
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)
Ejemplo n.º 5
0
    def __init__(self, dataset: "DataSet", verbose=0, model: MultinomialNB = None, logger=None):
        self.scores = scores = ['precision_weighted']
        self.tuned_parameters = {'vect__ngram_range': [(1, 1), (1, 2), (1, 3), (1, 4)],
                                 'tfidf__use_idf': [True, False],
                                 'tfidf__smooth_idf': [True, False],
                                 'tfidf__sublinear_tf': [True, False],
                                 'clf__alpha': [1e-2, 1e-3, 1e-4],
                                 'clf__fit_prior': [True, False]}
        if model == None:
            self.classifier = MultinomialNB()
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)
Ejemplo n.º 6
0
    def __init__(self,
                 dataset: "DataSet",
                 n_estimators=120,
                 verbose=0,
                 model=None,
                 logger: "Logger" = None):
        self.scores = scores = ['recall_weighted', 'precision_weighted']
        self.tuned_parameters = {
            'loss': ['deviance'],
            'learning_rate': [0.3, 0.1, 0.03, 0.01, 0.003, 0.001],
            'n_estimators': [10, 30, 50, 100, 150, 200],
            'max_depth': [2, 3, 4, 5, 6, 7, None]
        }
        if model == None:
            self.classifier = GradientBoostingClassifier(n_estimators=100,
                                                         max_depth=5,
                                                         loss='deviance',
                                                         learning_rate=0.1)
            # self.classifier = GradientBoostingClassifier(max_depth=2, n_estimators=n_estimators, verbose=verbose)
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)
    def populate_features(self):
        tasks = Classifier.populate_features(self)

        # Add classifier tasks
        clf1 = LogisticRegression(solver='lbfgs',
                                  multi_class='multinomial',
                                  max_iter=100,
                                  random_state=1)
        clf2 = RandomForestClassifier(n_estimators=100, criterion='entropy')
        clf3 = SVC(kernel='linear', probability=True)

        tasks.append(('clf',
                      VotingClassifier(estimators=[('lr', clf1), ('rf', clf2),
                                                   ('svm', clf3)],
                                       voting='soft',
                                       weights=[4, 2, 5])))
        self.pipeline = Pipeline(tasks)
Ejemplo n.º 8
0
if __name__ == "__main__":

    file_handler = FileHandler()

    configuration_provider = ConfigurationProvider(configuration_file)
    tree_browser = HTMLTreeBrowser()

    specification_registry = SpecificationRegistry(configuration_provider, tree_browser)
    content_downloader = WebPageContentDownloader()
    tree_builder = HTMLTreeBuilder()

    configuration_generator = ConfigurationGenerator(configuration_provider,
                                                     specification_registry,
                                                     content_downloader,
                                                     tree_builder)

    url_map = file_handler.get_url_map(configuration_provider.get_classified_input_file_name())

    configuration_generator.generate_configuration(url_map)

    url_map_to_classify = file_handler.get_url_map(configuration_provider.get_unclassified_input_file_name())

    classifier = Classifier(configuration_provider, specification_registry, content_downloader, tree_builder)

    classification = classifier.classify(url_map_to_classify.keys())

    file_handler.write_classification(configuration_provider.get_output_file_name(), classification)

    SummaryPrinter().print_summary(classification, url_map_to_classify)
Ejemplo n.º 9
0
 def _classify(self):
     classifier = Classifier(self.preprocessed_contents,
                             max_pages=self.max_pages,
                             webpage_title=self._website.get_title())
     classifier.classify()
     self.webpage_topic_is_plural = classifier.is_webpage_topic_plural()
 def __init__(self, path, train_df, test_df, features):
     Classifier.__init__(self, path, train_df, test_df, features)
Ejemplo n.º 11
0
    mobileUrl = "http://sensing-ms-api.mybluemix.net/api/Patients/0/" \
                "mobileData?access_token=QCOI7AjXi7Is90f9hK0BQsOQuKxoU2ISnBa9HLt6Bmsg0nvQbOqPAbELCzTsl2ww"
    sensorUrl = "http://sensing-ms-api.mybluemix.net/api/Patients/0/" \
                "sensorData?access_token=QCOI7AjXi7Is90f9hK0BQsOQuKxoU2ISnBa9HLt6Bmsg0nvQbOqPAbELCzTsl2ww"
    featuresNames = ["heartrate", "temperature", "steps", "activity"]
    allFeaturesNames = [
        "heartrate_std", "heartrate_max", "heartrate_avg", "heartrate_min",
        "temperature_std", "temperature_max", "temperature_avg", "temperature_min",
        "steps", "activity_score", "activity_minutes"

    ]
    mobileData = getData(mobileUrl)
    sensorData = getData(sensorUrl)
    groups = groupData(mobileData, sensorData)
    features, labels = extractFeatures(groups, featuresNames)
    clf = Classifier()
    clf.train(features, labels)
    rules = clf.getDangerousRules(allFeaturesNames)
    print rules
    maxSeconds = 1
    seconds = maxSeconds
    previousSensorData = None
    while True:
        if seconds > 0:
            print "\nWaiting for "+str(seconds)+" seconds."
            sleep(1)
            seconds -= 1
        else:
            print "\nStarting again..."
            latestSensorData = getLatestSensorData(sensorUrl)
            if previousSensorData is None or latestSensorData["datetime"] != previousSensorData["datetime"]:
Ejemplo n.º 12
0
from classification.classifier import Classifier
from classification.text_processor import TextProcessor
from preprocess.stemmer import Stemmer
from preprocess.tokenizer import Tokenizer

text_processor = TextProcessor(Stemmer(), Tokenizer())
classifier = Classifier(text_processor)

while True:
    user_input = input()
    print(classifier.classify(user_input))
Ejemplo n.º 13
0
 def populate_features(self):
     tasks = Classifier.populate_features(self)
     tasks.append(('clf',
                   RandomForestClassifier(n_estimators=100,
                                          criterion='entropy')))
     self.pipeline = Pipeline(tasks)
Ejemplo n.º 14
0
if __name__ == "__main__":
    mobileUrl = "http://sensing-ms-api.mybluemix.net/api/Patients/0/" \
                "mobileData?access_token=QCOI7AjXi7Is90f9hK0BQsOQuKxoU2ISnBa9HLt6Bmsg0nvQbOqPAbELCzTsl2ww"
    sensorUrl = "http://sensing-ms-api.mybluemix.net/api/Patients/0/" \
                "sensorData?access_token=QCOI7AjXi7Is90f9hK0BQsOQuKxoU2ISnBa9HLt6Bmsg0nvQbOqPAbELCzTsl2ww"
    featuresNames = ["heartrate", "temperature", "steps", "activity"]
    allFeaturesNames = [
        "heartrate_std", "heartrate_max", "heartrate_avg", "heartrate_min",
        "temperature_std", "temperature_max", "temperature_avg",
        "temperature_min", "steps", "activity_score", "activity_minutes"
    ]
    mobileData = getData(mobileUrl)
    sensorData = getData(sensorUrl)
    groups = groupData(mobileData, sensorData)
    features, labels = extractFeatures(groups, featuresNames)
    clf = Classifier()
    clf.train(features, labels)
    rules = clf.getDangerousRules(allFeaturesNames)
    print rules
    maxSeconds = 1
    seconds = maxSeconds
    previousSensorData = None
    while True:
        if seconds > 0:
            print "\nWaiting for " + str(seconds) + " seconds."
            sleep(1)
            seconds -= 1
        else:
            print "\nStarting again..."
            latestSensorData = getLatestSensorData(sensorUrl)
            if previousSensorData is None or latestSensorData[
Ejemplo n.º 15
0
from statistics.analyzer import Analyzer
from classification.classifier import Classifier

file_path = 'C:\\Users\Roman\\Documents\\lab02\\statistics\\default_data.csv'

analyzer = Analyzer(file_path)

analyzer.execute()

statistics_dictionary = analyzer.read_csv_table('statistics.csv')
freq_statistics = analyzer.read_csv_table('statistics_dictionary.csv')
statistics_unnormal = analyzer.read_csv_table(
    'statistics_dictionary_unnormal_probability.csv')
classifier = Classifier(statistics_dictionary, statistics_unnormal,
                        freq_statistics).calc_normal_probability()
Ejemplo n.º 16
0
        for i in range(len(hp.lr_scheduler_step_size)):
            hp.lr_scheduler_step_size[i] = int(hp.lr_scheduler_step_size[i])
    elif hp.optimizer == 'adam' or hp.optimizer == 'sgdr' or hp.optimizer == 'adamw':
        hp.lr_scheduler_step_size = int(hp.lr_scheduler_step_size)

    hp.manual_seed = random.randint(1, 10000)  # fix seed
    random.seed(hp.manual_seed)
    np.random.seed(hp.manual_seed)
    torch.manual_seed(hp.manual_seed)

    if hp.name == 'modelnet':
        hp.num_class = 40
        if not os.path.exists("modelnet_log"):
            os.makedirs("modelnet_log")
        hp.name = os.path.join("modelnet_log", hp.name + hp.index)
        trainer = Classifier(hp)
    elif hp.name == 'modelnet10':
        hp.num_class = 10
        if not os.path.exists("modelnet10_log"):
            os.makedirs("modelnet10_log")
        hp.name = os.path.join("modelnet10_log", hp.name + hp.index)
        trainer = Classifier10(hp)
    elif hp.name == 'part_shapenet':
        if not os.path.exists("part_shapenet_log"):
            os.makedirs("part_shapenet_log")
        hp.name = os.path.join("part_shapenet_log", hp.name + hp.index)
        trainer = PartSegmentor(hp)
    elif hp.name == 's3dis':
        hp.num_class = 13
        if not os.path.exists("s3dis_log"):
            os.makedirs("s3dis_log")
Ejemplo n.º 17
0
 def populate_features(self):
     tasks = Classifier.populate_features(self)
     tasks.append(('clf', LinearSVC()))
     self.pipeline = Pipeline(tasks)