def __init__(self, dataset, estimators: [(str, BaseEstimator)], logger=None, voting='hard', weights=None, *args, **kwargs): """ Parameters ---------- voting: str, {‘hard’, ‘soft’} (default=’hard’) If ‘hard’, uses predicted class labels for majority rule voting. Else if ‘soft’, predicts the class label based on the argmax of the sums of the predicted probabilities, which is recommended for an ensemble of well-calibrated classifiers. weights: array-like, shape = [n_classifiers], optional (default=`None`) Sequence of weights (float or int) to weight the occurrences of predicted class labels (hard voting) or class probabilities before averaging (soft voting). Uses uniform weights if None. """ self.classifier = VotingClassifier(estimators, voting=voting, weights=weights) SklearnClassifier.__init__(self, self.classifier) Classifier.__init__(self, dataset, logger)
def __init__(self, dataset: "DataSet", n_estimators=500, max_leaf_nodes=16, verbose=0, model: RandomForestClassifier = None, logger=None): self.scores = scores = [ 'recall_weighted', 'precision_micro', 'precision_weighted' ] self.tuned_parameters = { 'criterion': ['gini', 'entropy'], 'class_weight': ['balanced', None], 'n_estimators': [10, 30, 50, 100, 150, 200], 'max_depth': [2, 3, 4, 5, 6, 7, None], 'bootstrap': [True, False] } if model == None: self.classifier = RandomForestClassifier(n_estimators=150, max_depth=5, criterion='gini', class_weight=None, bootstrap=True) # self.classifier = RandomForestClassifier(n_estimators=n_estimators, max_leaf_nodes=16, n_jobs=-1, # verbose=verbose) else: self.classifier = model SklearnClassifier.__init__(self, self.classifier) Classifier.__init__(self, dataset, logger=logger)
def __init__(self, dataset: "DataSet", verbose=0, model: SGDClassifier = None, logger=None): self.scores = scores = ['precision_weighted'] self.tuned_parameters_tf = { 'vect__ngram_range': [(1, 1), (1, 2), (1, 3), (1, 4)], 'tfidf__use_idf': [True, False], 'tfidf__smooth_idf': [True, False], 'tfidf__sublinear_tf': [True, False], 'clf__loss': ['hinge', 'modified_huber'], 'clf__penalty': ['none', 'l1', 'l2', 'elasticnet'], 'clf__class_weight': ['balanced', None], 'clf__fit_intercept': [True, False] } self.tuned_parameters = { 'loss': ['hinge', 'modified_huber'], 'penalty': ['none', 'l1', 'l2', 'elasticnet'], 'class_weight': ['balanced', None], 'fit_intercept': [True, False] } if model == None: self.classifier = SGDClassifier() else: self.classifier = model SklearnClassifier.__init__(self, self.classifier) Classifier.__init__(self, dataset, logger=logger)
def __init__(self, dataset: "DataSet", loss='squared_hinge', max_iter=1000, verbose=0, model=None, logger: "Logger" = None): # self.scores = scores = ['recall_weighted', 'precision_weighted'] self.scores = scores = ['precision_weighted'] self.tuned_parameters = {'C': [1, 10, 100, 1000], 'gamma': [1e-3, 1e-4, 'auto'], 'kernel': ['rbf', 'linear', 'poly'], 'class_weight': ['balanced', None], 'degree': [3, 4, 5]} if model == None: self.classifier = SVC() else: self.classifier = model SklearnClassifier.__init__(self, self.classifier) Classifier.__init__(self, dataset, logger=logger)
def __init__(self, dataset: "DataSet", verbose=0, model: MultinomialNB = None, logger=None): self.scores = scores = ['precision_weighted'] self.tuned_parameters = {'vect__ngram_range': [(1, 1), (1, 2), (1, 3), (1, 4)], 'tfidf__use_idf': [True, False], 'tfidf__smooth_idf': [True, False], 'tfidf__sublinear_tf': [True, False], 'clf__alpha': [1e-2, 1e-3, 1e-4], 'clf__fit_prior': [True, False]} if model == None: self.classifier = MultinomialNB() else: self.classifier = model SklearnClassifier.__init__(self, self.classifier) Classifier.__init__(self, dataset, logger=logger)
def __init__(self, dataset: "DataSet", n_estimators=120, verbose=0, model=None, logger: "Logger" = None): self.scores = scores = ['recall_weighted', 'precision_weighted'] self.tuned_parameters = { 'loss': ['deviance'], 'learning_rate': [0.3, 0.1, 0.03, 0.01, 0.003, 0.001], 'n_estimators': [10, 30, 50, 100, 150, 200], 'max_depth': [2, 3, 4, 5, 6, 7, None] } if model == None: self.classifier = GradientBoostingClassifier(n_estimators=100, max_depth=5, loss='deviance', learning_rate=0.1) # self.classifier = GradientBoostingClassifier(max_depth=2, n_estimators=n_estimators, verbose=verbose) else: self.classifier = model SklearnClassifier.__init__(self, self.classifier) Classifier.__init__(self, dataset, logger=logger)
def populate_features(self): tasks = Classifier.populate_features(self) # Add classifier tasks clf1 = LogisticRegression(solver='lbfgs', multi_class='multinomial', max_iter=100, random_state=1) clf2 = RandomForestClassifier(n_estimators=100, criterion='entropy') clf3 = SVC(kernel='linear', probability=True) tasks.append(('clf', VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('svm', clf3)], voting='soft', weights=[4, 2, 5]))) self.pipeline = Pipeline(tasks)
if __name__ == "__main__": file_handler = FileHandler() configuration_provider = ConfigurationProvider(configuration_file) tree_browser = HTMLTreeBrowser() specification_registry = SpecificationRegistry(configuration_provider, tree_browser) content_downloader = WebPageContentDownloader() tree_builder = HTMLTreeBuilder() configuration_generator = ConfigurationGenerator(configuration_provider, specification_registry, content_downloader, tree_builder) url_map = file_handler.get_url_map(configuration_provider.get_classified_input_file_name()) configuration_generator.generate_configuration(url_map) url_map_to_classify = file_handler.get_url_map(configuration_provider.get_unclassified_input_file_name()) classifier = Classifier(configuration_provider, specification_registry, content_downloader, tree_builder) classification = classifier.classify(url_map_to_classify.keys()) file_handler.write_classification(configuration_provider.get_output_file_name(), classification) SummaryPrinter().print_summary(classification, url_map_to_classify)
def _classify(self): classifier = Classifier(self.preprocessed_contents, max_pages=self.max_pages, webpage_title=self._website.get_title()) classifier.classify() self.webpage_topic_is_plural = classifier.is_webpage_topic_plural()
def __init__(self, path, train_df, test_df, features): Classifier.__init__(self, path, train_df, test_df, features)
mobileUrl = "http://sensing-ms-api.mybluemix.net/api/Patients/0/" \ "mobileData?access_token=QCOI7AjXi7Is90f9hK0BQsOQuKxoU2ISnBa9HLt6Bmsg0nvQbOqPAbELCzTsl2ww" sensorUrl = "http://sensing-ms-api.mybluemix.net/api/Patients/0/" \ "sensorData?access_token=QCOI7AjXi7Is90f9hK0BQsOQuKxoU2ISnBa9HLt6Bmsg0nvQbOqPAbELCzTsl2ww" featuresNames = ["heartrate", "temperature", "steps", "activity"] allFeaturesNames = [ "heartrate_std", "heartrate_max", "heartrate_avg", "heartrate_min", "temperature_std", "temperature_max", "temperature_avg", "temperature_min", "steps", "activity_score", "activity_minutes" ] mobileData = getData(mobileUrl) sensorData = getData(sensorUrl) groups = groupData(mobileData, sensorData) features, labels = extractFeatures(groups, featuresNames) clf = Classifier() clf.train(features, labels) rules = clf.getDangerousRules(allFeaturesNames) print rules maxSeconds = 1 seconds = maxSeconds previousSensorData = None while True: if seconds > 0: print "\nWaiting for "+str(seconds)+" seconds." sleep(1) seconds -= 1 else: print "\nStarting again..." latestSensorData = getLatestSensorData(sensorUrl) if previousSensorData is None or latestSensorData["datetime"] != previousSensorData["datetime"]:
from classification.classifier import Classifier from classification.text_processor import TextProcessor from preprocess.stemmer import Stemmer from preprocess.tokenizer import Tokenizer text_processor = TextProcessor(Stemmer(), Tokenizer()) classifier = Classifier(text_processor) while True: user_input = input() print(classifier.classify(user_input))
def populate_features(self): tasks = Classifier.populate_features(self) tasks.append(('clf', RandomForestClassifier(n_estimators=100, criterion='entropy'))) self.pipeline = Pipeline(tasks)
if __name__ == "__main__": mobileUrl = "http://sensing-ms-api.mybluemix.net/api/Patients/0/" \ "mobileData?access_token=QCOI7AjXi7Is90f9hK0BQsOQuKxoU2ISnBa9HLt6Bmsg0nvQbOqPAbELCzTsl2ww" sensorUrl = "http://sensing-ms-api.mybluemix.net/api/Patients/0/" \ "sensorData?access_token=QCOI7AjXi7Is90f9hK0BQsOQuKxoU2ISnBa9HLt6Bmsg0nvQbOqPAbELCzTsl2ww" featuresNames = ["heartrate", "temperature", "steps", "activity"] allFeaturesNames = [ "heartrate_std", "heartrate_max", "heartrate_avg", "heartrate_min", "temperature_std", "temperature_max", "temperature_avg", "temperature_min", "steps", "activity_score", "activity_minutes" ] mobileData = getData(mobileUrl) sensorData = getData(sensorUrl) groups = groupData(mobileData, sensorData) features, labels = extractFeatures(groups, featuresNames) clf = Classifier() clf.train(features, labels) rules = clf.getDangerousRules(allFeaturesNames) print rules maxSeconds = 1 seconds = maxSeconds previousSensorData = None while True: if seconds > 0: print "\nWaiting for " + str(seconds) + " seconds." sleep(1) seconds -= 1 else: print "\nStarting again..." latestSensorData = getLatestSensorData(sensorUrl) if previousSensorData is None or latestSensorData[
from statistics.analyzer import Analyzer from classification.classifier import Classifier file_path = 'C:\\Users\Roman\\Documents\\lab02\\statistics\\default_data.csv' analyzer = Analyzer(file_path) analyzer.execute() statistics_dictionary = analyzer.read_csv_table('statistics.csv') freq_statistics = analyzer.read_csv_table('statistics_dictionary.csv') statistics_unnormal = analyzer.read_csv_table( 'statistics_dictionary_unnormal_probability.csv') classifier = Classifier(statistics_dictionary, statistics_unnormal, freq_statistics).calc_normal_probability()
for i in range(len(hp.lr_scheduler_step_size)): hp.lr_scheduler_step_size[i] = int(hp.lr_scheduler_step_size[i]) elif hp.optimizer == 'adam' or hp.optimizer == 'sgdr' or hp.optimizer == 'adamw': hp.lr_scheduler_step_size = int(hp.lr_scheduler_step_size) hp.manual_seed = random.randint(1, 10000) # fix seed random.seed(hp.manual_seed) np.random.seed(hp.manual_seed) torch.manual_seed(hp.manual_seed) if hp.name == 'modelnet': hp.num_class = 40 if not os.path.exists("modelnet_log"): os.makedirs("modelnet_log") hp.name = os.path.join("modelnet_log", hp.name + hp.index) trainer = Classifier(hp) elif hp.name == 'modelnet10': hp.num_class = 10 if not os.path.exists("modelnet10_log"): os.makedirs("modelnet10_log") hp.name = os.path.join("modelnet10_log", hp.name + hp.index) trainer = Classifier10(hp) elif hp.name == 'part_shapenet': if not os.path.exists("part_shapenet_log"): os.makedirs("part_shapenet_log") hp.name = os.path.join("part_shapenet_log", hp.name + hp.index) trainer = PartSegmentor(hp) elif hp.name == 's3dis': hp.num_class = 13 if not os.path.exists("s3dis_log"): os.makedirs("s3dis_log")
def populate_features(self): tasks = Classifier.populate_features(self) tasks.append(('clf', LinearSVC())) self.pipeline = Pipeline(tasks)