Python SvmClassifier Examples

Programming Language: Python

Namespace/Package Name: classify.classifier

Class/Type: SvmClassifier

Examples at hotexamples.com: 4

Python SvmClassifier - 4 examples found. These are the top rated real world Python examples of classify.classifier.SvmClassifier extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

evaluation(4)

normalize(4)

testing(4)

training(4)

Example #1

Show file

File: run.py Project: persistforever/KnowledgeableSVM

 def run_classify(self, train_path, test_path, train_set, test_set):
     loader = PickleMarket()
     # read train
     feature_names = loader.load_market(train_path)[0]
     train_articles = list()
     for type in train_set.split("#"):
         path = train_path.replace(u"car", type)
         train_articles.extend(loader.load_market(path)[1:])
     train_dataset = np.array([np.array(article[1:-1], dtype=float) for article in train_articles])
     print train_dataset.shape
     train_label = np.array([np.array(int(article[-1])) for article in train_articles])
     # read test
     test_articles = list()
     for type in test_set.split("#"):
         path = test_path.replace(u"car", type)
         test_articles.extend(loader.load_market(path)[1:])
     test_dataset = np.array([np.array(article[1:-1]) for article in test_articles])
     print test_dataset.shape
     test_label = np.array([np.array(int(article[-1])) for article in test_articles])
     # train cls
     classifier = SvmClassifier()
     train_dataset = classifier.normalize(train_dataset, method="mapminmax")
     test_dataset = classifier.normalize(test_dataset, method="mapminmax")
     classifier.training(train_dataset, train_label, c=10, kernel="linear")
     # test cls
     test_prob = classifier.testing(test_dataset, type="prob")
     test_class = classifier.testing(test_dataset, type="label")
     print "performance is", classifier.evaluation(test_label, test_prob, test_class)
     print "finish"

Example #2

Show file

File: run.py Project: persistforever/KnowledgableArticle

 def run_classify(self, train_path, test_path) :
     loader = PickleMarket()
     articles = list()
     # for type in [u'car', u'finance', u'web'] :
     #     path = train_path.replace(u'all', type)
     articles.extend(loader.load_market(train_path))
     train_dataset = np.array([np.array(article[1:-1]) for article in articles])
     print train_dataset.shape
     train_label = np.array([np.array(int(article[-1])) for article in articles])
     articles = loader.load_market(test_path)
     test_dataset = np.array([np.array(article[1:-1]) for article in articles])
     test_label = np.array([np.array(int(article[-1])) for article in articles])
     classifier = SvmClassifier()
     train_dataset = classifier.normalize(train_dataset, method='mapminmax')
     test_dataset = classifier.normalize(test_dataset, method='mapminmax')
     classifier.training(train_dataset, train_label, cset=range(10, 100, 10), kernel='linear')
     test_prob = classifier.testing(test_dataset, type='prob')
     test_class = classifier.testing(test_dataset, type='label')
     print 'performance is', classifier.evaluation(test_label, test_prob, test_class)
     print 'finish'

Example #3

Show file

File: run.py Project: persistforever/KnowledgableArticle

 def run_optimize_params(self, train_article_market_path, test_article_market_path, \
     pos_path, punc_path, klword_path, logger_path) :
     loader = PickleMarket()
     logger = list()
     logger.append(['w', 'combined', 'weight', 'kernel', 'c', 'norm', 'car_car', \
         'car_finance', 'car_web', 'finance_car', 'finance_finance', 'finance_web', \
         'web_car', 'web_fiannce', 'web_web', 'merge_car', 'merge_finance', 'merge_web'])
     domains = [u'car', u'finance', u'web']
     wset = [5, 10, 15, 20]
     combinedset = [True, False]
     weightset = [1, 2, 5]
     kernelset = ['linear', 'poly', 'rbf']
     cset = [range(10, 100, 10), range(100, 1000, 100)]
     normset = ['mapminmax', 'zscore']
     token_selector = selector.TokenExtractor(punc_path)
     for w in wset :
         for combined in combinedset :
             pos_selector = selector.PosExtractor(pos_path, w=w, combined=combined)
             for weight in weightset :
                 word_selector = selector.WordExtractor(klword_path, weight=weight)
                 train_featuresets, test_featuresets = list(), list()
                 for step in range(len(domains)) :
                     train_featuresets.append(list())
                     test_featuresets.append(list())
                 for index, domain in enumerate(domains) :
                     train_articles = loader.load_market(train_article_market_path.replace(u'all', domain))
                     test_articles = loader.load_market(test_article_market_path.replace(u'all', domain))
                     # train
                     length = len(train_articles) - 1
                     for idx, article in enumerate(train_articles) :
                         article['features'] = list()
                         article['features'].extend(pos_selector.extract_feature(article['participle_content']))
                         article['features'].extend(token_selector.extract_feature(article['title'], article['content'], \
                                                                                     article['participle_title'], \
                                                                                     article['participle_content']))
                         article['features'].extend(word_selector.extract_feature(article['participle_title'], \
                                                                                     article['participle_content']))
                         print 'finish rate is %.2f%%\r' % (100.0*idx/length),
                     print 'finish rate is %.2f%%\r' % (100.0*idx/length)
                     train_featuresets[index] = [[article['id']] + article['features'] + [article['label']] for article in train_articles]
                     # test
                     length = len(test_articles) - 1
                     for idx, article in enumerate(test_articles) :
                         article['features'] = list()
                         article['features'].extend(pos_selector.extract_feature(article['participle_content']))
                         article['features'].extend(token_selector.extract_feature(article['title'], article['content'], \
                                                                                     article['participle_title'], \
                                                                                     article['participle_content']))
                         article['features'].extend(word_selector.extract_feature(article['participle_title'], \
                                                                                     article['participle_content']))
                         print 'finish rate is %.2f%%\r' % (100.0*idx/length),
                     print 'finish rate is %.2f%%\r' % (100.0*idx/length)
                     test_featuresets[index] = [[article['id']] + article['features'] + [article['label']] for article in test_articles]
                 for kernel in kernelset :
                     for c in cset :
                         for norm in normset :
                             evl = list()
                             for train_idx in range(0, len(domains)) :
                                 for test_idx in range(0, len(domains)) :
                                     train_dataset = np.array([np.array(article[1:-1]) for article in train_featuresets[train_idx]])
                                     train_label = np.array([np.array(int(article[-1])) for article in train_featuresets[train_idx]])
                                     test_dataset = np.array([np.array(article[1:-1]) for article in test_featuresets[test_idx]])
                                     test_label = np.array([np.array(int(article[-1])) for article in test_featuresets[test_idx]])
                                     classifier = SvmClassifier()
                                     train_dataset = classifier.normalize(train_dataset, method=norm)
                                     test_dataset = classifier.normalize(test_dataset, method=norm)
                                     classifier.training(train_dataset, train_label, cset=c, kernel=kernel)
                                     test_prob = classifier.testing(test_dataset, type='prob')
                                     test_class = classifier.testing(test_dataset, type='label')
                                     evl.append(classifier.evaluation(test_label, test_prob, test_class)[1])
                             print 'single finished ...'
                             # merge
                             articles = list()
                             for train_idx in range(0, len(domains)) :
                                 articles.extend(train_featuresets[train_idx])
                             train_dataset = np.array([np.array(article[1:-1]) for article in articles])
                             train_label = np.array([np.array(int(article[-1])) for article in articles])
                             for test_idx in range(0, len(domains)) :
                                 test_dataset = np.array([np.array(article[1:-1]) for article in test_featuresets[test_idx]])
                                 test_label = np.array([np.array(int(article[-1])) for article in test_featuresets[test_idx]])
                                 classifier = SvmClassifier()
                                 train_dataset = classifier.normalize(train_dataset, method=norm)
                                 test_dataset = classifier.normalize(test_dataset, method=norm)
                                 classifier.training(train_dataset, train_label, cset=c, kernel=kernel)
                                 test_prob = classifier.testing(test_dataset, type='prob')
                                 test_class = classifier.testing(test_dataset, type='label')
                                 evl.append(classifier.evaluation(test_label, test_prob, test_class)[1])
                             print 'merge finished ...'
                             print 'performance is', 1.0*sum(evl)/len(evl)
                             log = [w, combined, weight, kernel, c[0], norm]
                             log.extend(evl)
                             logger.append(log)
     file_operator = TextFileOperator()
     file_operator.writing(logger, logger_path)
     print 'finish'

Example #4

Show file

File: run.py Project: persistforever/KnowledgeableSVM

 def run_optimize_params(
     self, train_article_market_path, test_article_market_path, pos_path, punc_path, klword_path, logger_path
 ):
     loader = PickleMarket()
     logger = list()
     logger.append(
         [
             "w",
             "combined",
             "weight",
             "kernel",
             "c",
             "norm",
             "car_car",
             "car_finance",
             "car_web",
             "finance_car",
             "finance_finance",
             "finance_web",
             "web_car",
             "web_fiannce",
             "web_web",
             "merge_car",
             "merge_finance",
             "merge_web",
         ]
     )
     domains = [u"car", u"finance", u"web"]
     wset = [5, 10, 15, 20]
     combinedset = [True, False]
     weightset = [1, 2, 5]
     kernelset = ["linear", "poly", "rbf"]
     cset = [range(10, 100, 10), range(100, 1000, 100)]
     normset = ["mapminmax", "zscore"]
     token_selector = selector.TokenExtractor(punc_path)
     for w in wset:
         for combined in combinedset:
             pos_selector = selector.PosExtractor(pos_path, w=w, combined=combined)
             for weight in weightset:
                 word_selector = selector.WordExtractor(klword_path, weight=weight)
                 train_featuresets, test_featuresets = list(), list()
                 for step in range(len(domains)):
                     train_featuresets.append(list())
                     test_featuresets.append(list())
                 for index, domain in enumerate(domains):
                     train_articles = loader.load_market(train_article_market_path.replace(u"all", domain))
                     test_articles = loader.load_market(test_article_market_path.replace(u"all", domain))
                     # train
                     length = len(train_articles) - 1
                     for idx, article in enumerate(train_articles):
                         article["features"] = list()
                         article["features"].extend(pos_selector.extract_feature(article["participle_content"]))
                         article["features"].extend(
                             token_selector.extract_feature(
                                 article["title"],
                                 article["content"],
                                 article["participle_title"],
                                 article["participle_content"],
                             )
                         )
                         article["features"].extend(
                             word_selector.extract_feature(
                                 article["participle_title"], article["participle_content"]
                             )
                         )
                         print "finish rate is %.2f%%\r" % (100.0 * idx / length),
                     print "finish rate is %.2f%%\r" % (100.0 * idx / length)
                     train_featuresets[index] = [
                         [article["id"]] + article["features"] + [article["label"]] for article in train_articles
                     ]
                     # test
                     length = len(test_articles) - 1
                     for idx, article in enumerate(test_articles):
                         article["features"] = list()
                         article["features"].extend(pos_selector.extract_feature(article["participle_content"]))
                         article["features"].extend(
                             token_selector.extract_feature(
                                 article["title"],
                                 article["content"],
                                 article["participle_title"],
                                 article["participle_content"],
                             )
                         )
                         article["features"].extend(
                             word_selector.extract_feature(
                                 article["participle_title"], article["participle_content"]
                             )
                         )
                         print "finish rate is %.2f%%\r" % (100.0 * idx / length),
                     print "finish rate is %.2f%%\r" % (100.0 * idx / length)
                     test_featuresets[index] = [
                         [article["id"]] + article["features"] + [article["label"]] for article in test_articles
                     ]
                 for kernel in kernelset:
                     for c in cset:
                         for norm in normset:
                             evl = list()
                             for train_idx in range(0, len(domains)):
                                 for test_idx in range(0, len(domains)):
                                     train_dataset = np.array(
                                         [np.array(article[1:-1]) for article in train_featuresets[train_idx]]
                                     )
                                     train_label = np.array(
                                         [np.array(int(article[-1])) for article in train_featuresets[train_idx]]
                                     )
                                     test_dataset = np.array(
                                         [np.array(article[1:-1]) for article in test_featuresets[test_idx]]
                                     )
                                     test_label = np.array(
                                         [np.array(int(article[-1])) for article in test_featuresets[test_idx]]
                                     )
                                     classifier = SvmClassifier()
                                     train_dataset = classifier.normalize(train_dataset, method=norm)
                                     test_dataset = classifier.normalize(test_dataset, method=norm)
                                     classifier.training(train_dataset, train_label, cset=c, kernel=kernel)
                                     test_prob = classifier.testing(test_dataset, type="prob")
                                     test_class = classifier.testing(test_dataset, type="label")
                                     evl.append(classifier.evaluation(test_label, test_prob, test_class)[1])
                             print "single finished ..."
                             # merge
                             articles = list()
                             for train_idx in range(0, len(domains)):
                                 articles.extend(train_featuresets[train_idx])
                             train_dataset = np.array([np.array(article[1:-1]) for article in articles])
                             train_label = np.array([np.array(int(article[-1])) for article in articles])
                             for test_idx in range(0, len(domains)):
                                 test_dataset = np.array(
                                     [np.array(article[1:-1]) for article in test_featuresets[test_idx]]
                                 )
                                 test_label = np.array(
                                     [np.array(int(article[-1])) for article in test_featuresets[test_idx]]
                                 )
                                 classifier = SvmClassifier()
                                 train_dataset = classifier.normalize(train_dataset, method=norm)
                                 test_dataset = classifier.normalize(test_dataset, method=norm)
                                 classifier.training(train_dataset, train_label, cset=c, kernel=kernel)
                                 test_prob = classifier.testing(test_dataset, type="prob")
                                 test_class = classifier.testing(test_dataset, type="label")
                                 evl.append(classifier.evaluation(test_label, test_prob, test_class)[1])
                             print "merge finished ..."
                             print "performance is", 1.0 * sum(evl) / len(evl)
                             log = [w, combined, weight, kernel, c[0], norm]
                             log.extend(evl)
                             logger.append(log)
     file_operator = TextFileOperator()
     file_operator.writing(logger, logger_path)
     print "finish"