Python ClassifierChain Examples, skmultilearn.problem_transform.ClassifierChain Python Examples

Example #1

0

Show file

    def classifer_chain(self):


        # initialize classifier chains multi-label classifier
        # with a gaussian naive bayes base classifier
        print("build classifier...")
        classifier = ClassifierChain(RandomForestClassifier())
        #classifier = LabelPowerset(RandomForestClassifier())
        print("end...")

        print("start training...")
        classifier.fit(self.X_train, self.y_train)
        print("end...")

        # predict
        print("start test...")
        predictions = classifier.predict(self.X_test)
        print("end...")

        print("result as following:")

        result = hamming_loss(self.y_test, predictions)
        print("hanming_loss: ", result)

        print("accuracy score: ", accuracy_score(y_test, predictions))

        result = f1_score(self.y_test, predictions, average='micro')
        print("micro-f1_score: ", result)

Example #2

0

Show file

def buildCCClassifier(xTrain, yTrain):
    # initialize classifier chains multi-label classifier
    # with a gaussian naive bayes base classifier
    classifier = ClassifierChain(GaussianNB())

    # train
    classifier.fit(xTrain, yTrain)
    return classifier

Example #3

0

Show file

File: views.py Project: punisher21maximum/ToxicComment

def check(request):
    vect = TfidfVectorizer(max_features=40000, stop_words='english')
    target = [
        'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate'
    ]
    data = pd.read_csv('train.csv')
    test_data = pd.read_csv('D:/T.Y.BTECH/BML/Project/test.csv')
    X = data.comment_text
    test_X = test_data.comment_text
    xt = vect.fit_transform(X)
    yt = vect.transform(test_X)
    y_trans = data.iloc[:, 2:8]
    X_train, X_test, y_train, y_test = train_test_split(xt,
                                                        y_trans,
                                                        test_size=0.3)
    input_comment = ''
    output_class = None
    toxic = None
    severe_toxic = None
    obscene = None
    threat = None
    insult = None
    identity_hate = None
    posts = Post.objects.all()
    for post in posts:
        cmnt = post
    input_comment1 = str(cmnt)
    input_comment1 = [input_comment1]
    input_comment1 = vect.transform(input_comment1)
    from skmultilearn.problem_transform import ClassifierChain

    classifier = ClassifierChain(LogisticRegression(),
                                 require_dense=[False, True])
    classifier.fit(X_train, y_train)
    output_class = classifier.predict_proba(input_comment1).toarray()

    #load_model = joblib.load('knn.pkl')
    #load_model = joblib.load('lr.pkl')
    #output_class = load_model.predict_proba(input_comment1).toarray()
    # output_class = output_class.tolist()
    output_class = list(chain.from_iterable(output_class))
    toxic = output_class[0]
    severe_toxic = output_class[1]
    obscene = output_class[2]
    threat = output_class[3]
    insult = output_class[4]
    identity_hate = output_class[5]
    print(output_class)

    context = dict()
    context['input_comment'] = input_comment
    context['output_class1'] = toxic
    context['output_class2'] = severe_toxic
    context['output_class3'] = obscene
    context['output_class4'] = threat
    context['output_class5'] = insult
    context['output_class6'] = identity_hate
    return render(request, 'polls/comment_details.html', context)

Example #4

0

Show file

    def classify(self):
        from skmultilearn.problem_transform import ClassifierChain
        from sklearn.svm import SVC,LinearSVC
        import sklearn.metrics as metrics

        # =============================
        #      ClassifierChain        #
        # =============================
        from sklearn.multiclass import OneVsRestClassifier
        # from sklearn.multioutput import ClassifierChain
        from sklearn.linear_model import LogisticRegression
        # cc = ClassifierChain(LogisticRegression())
        self.cc = ClassifierChain(LinearSVC())
        self.cc.fit(self.train_data, self.train_labels)
        # y_pred = self.cc.predict(self.test_data)
        # cc_art_f1 = metrics.f1_score(self.test_labels, y_pred, average='micro')



        # # initialize Classifier Chain multi-label classifier
        # # with an SVM classifier
        # # SVM in scikit only supports the X matrix in sparse representation
        # classifier = ClassifierChain(
        #     classifier=SVC(),
        #     require_dense=[False, True]
        # )
        # # train
        # classifier.fit(self.train_data, self.train_labels)
        # # predict
        # predictions = classifier.predict(self.test_data)
        # print(predictions)
        # art_f1 = metrics.f1_score(self.test_labels, predictions, average='macro')
        # return art_f1




        # =============================
        #    KNeighborsClassifier     #
        # =============================
        from sklearn.neighbors import KNeighborsClassifier
        knc = KNeighborsClassifier()

        knc.fit(self.train_data, self.train_labels)
        # Y_pred = knc.predict(self.test_data)
        # knc_art_f1 = metrics.f1_score(self.test_labels, Y_pred, average='micro')




        # =============================
        #           SGDClassifier     #
        # =============================
        from sklearn.naive_bayes import MultinomialNB
        from sklearn.linear_model import SGDClassifier
        sgd = SGDClassifier(loss='hinge', penalty='l2', alpha=1e-3, random_state=0, max_iter=6, tol=None)
        clf = OneVsRestClassifier(sgd)
        clf.fit(self.train_data, self.train_labels)

Example #5

0

Show file

File: test_cc.py Project: queirozfcom/scikit-multilearn

    def test_if_order_is_set(self):
        classifier = ClassifierChain(
            classifier=GaussianNB(), require_dense=[True, True], order=None
        )
        X, y = self.get_multilabel_data_for_tests(sparsity_indicator='sparse')[0]

        classifier.fit(X,y)

        self.assertEqual(classifier._order(), list(range(y.shape[1])))

Example #6

0

Show file

    def test_if_order_is_set(self):
        classifier = ClassifierChain(classifier=GaussianNB(),
                                     require_dense=[True, True],
                                     order=None)
        X, y = self.get_multilabel_data_for_tests(
            sparsity_indicator='sparse')[0]

        classifier.fit(X, y)

        self.assertEqual(classifier._order(), list(range(y.shape[1])))

Example #7

0

Show file

File: classifierChain.py Project: monk1337/MultiLab

    def train(self):
        classifier = ClassifierChain(LogisticRegression())
        classifier.fit(self.x_data, self.y_data)

        predictions = classifier.predict(self.x_test)

        return {
            'accuracy': accuracy_score(self.y_test, predictions),
            'f1_score': f1_score(self.y_test, predictions, average='micro')
        }

Example #8

0

Show file

File: test_cc.py Project: queirozfcom/scikit-multilearn

    def test_if_order_is_set_when_explicitly_given(self):
        X, y = self.get_multilabel_data_for_tests(sparsity_indicator='sparse')[0]

        reversed_chain = list(reversed(range(y.shape[1])))
        classifier = ClassifierChain(
            classifier=GaussianNB(), require_dense=[True, True], order=reversed_chain
        )

        classifier.fit(X, y)

        self.assertEqual(classifier._order(), reversed_chain)

Example #9

0

Show file

File: model_manger.py Project: solversa/AutoDL-1

class ClassifierChains:
    def __init__(self):
        self.model = ClassifierChain(LGBMClassifier())

    def set_grow_step(self, new_step):
        self.grow_boost_round = new_step

    def fit(self, X_train, y_train):
        self.model.fit(X_train, y_train)

    def predict(self, X_test):
        return self.model.predict(X_test).A

Example #10

0

Show file

    def test_if_order_is_set_when_explicitly_given(self):
        X, y = self.get_multilabel_data_for_tests(
            sparsity_indicator='sparse')[0]

        reversed_chain = list(reversed(range(y.shape[1])))
        classifier = ClassifierChain(classifier=GaussianNB(),
                                     require_dense=[True, True],
                                     order=reversed_chain)

        classifier.fit(X, y)

        self.assertEqual(classifier._order(), reversed_chain)

Example #11

0

Show file

def classifiers(X_train, Y_train, X_test):

    classifier1 = BinaryRelevance(GaussianNB())
    classifier2 = ClassifierChain(GaussianNB())
    classifier3 = LabelPowerset(GaussianNB())

    classifier1.fit(X_train, Y_train)
    classifier2.fit(X_train, Y_train)
    classifier3.fit(X_train, Y_train)

    predictions1 = classifier1.predict(X_test)
    predictions2 = classifier2.predict(X_test)
    predictions3 = classifier3.predict(X_test)

    return predictions1, predictions2, predictions3

Example #12

0

Show file

File: baseline_classifiers.py Project: yyht/MLMA_hate_speech

def majority_voting_multilabel_classification(train_filename, dev_filename, test_filename, attribute):
  df_train = pd.read_csv(train_filename)
  df_dev = pd.read_csv(dev_filename)
  df_test = pd.read_csv(test_filename)
  mlb = MultiLabelBinarizer()
  X_train = df_train.tweet.apply(clean_text)
  y_train_text = df_train[attribute].apply(lambda x: x.split('_'))
  y_train = mlb.fit_transform(y_train_text)
  X_dev = df_dev.tweet.apply(clean_text)
  y_dev_text = df_dev[attribute].apply(lambda x: x.split('_'))
  y_dev = mlb.fit_transform(y_dev_text)
  X_test = df_test.tweet.apply(clean_text)
  y_test_text = df_test[attribute].apply(lambda x: x.split('_'))
  y_test = mlb.fit_transform(y_test_text)
  count_vect = CountVectorizer()
  X_train_counts = count_vect.fit_transform(X_train)
  tfidf_transformer = TfidfTransformer()
  X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
  Y = mlb.fit_transform(y_train_text)
  classifier = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('clf', ClassifierChain(DummyClassifier()))])

  classifier.fit(X_train, y_train)
  y_pred = classifier.predict(X_test)
  print('Accuracy %s' % accuracy_score(y_pred, y_test))
  print('Test macro F1 score is %s' % f1_score(y_test, y_pred, average='macro'))
  print('Test micro F1 score is %s' % f1_score(y_test, y_pred, average='micro'))

Example #13

0

Show file

def train(classifier, X_train, X_test, y_train, y_test, strategy):
    """Computes a multi-label classification.

    This approach is used by `one-vs-the-rest`, `classifier-chains`, and
    `label-powerset` strategies. For each classifier, the classes are fitted
    at the same time or in sequence. Since all the classes are represented by one
    and only one classifier, it is possible to gain knowledge about the classes
    by inspecting this unique classifier.

    Args:
        classifier: An instance of a scikit-learn classifier.
        classes: A list of strings representing the classes to be trained.
        X_train: A matrix containing features for training.
        y_train: A one-column dataframe containing labels for training.
        strategy: A string defining which of the three strategies will be used.

    Returns:
        A classification model and its performance report
    """
    if strategy == 'one-vs-the-rest':
        model = OneVsRestClassifier(classifier)
    if strategy == 'classifier-chains':
        model = ClassifierChain(classifier)
    if strategy == 'label-powerset':
        model = LabelPowerset(classifier)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    report = classification_report(y_test,
                                   y_pred,
                                   output_dict=True,
                                   target_names=y_train.columns)

    return model, report

Example #14

0

Show file

    def RecommendByClassifierChain(train_data, train_data_y, test_data, test_data_y, recommendNum=5):
        """分类器链"""
        classifier = ClassifierChain(RandomForestClassifier(oob_score=True, max_depth=10, min_samples_split=20))
        classifier.fit(train_data, train_data_y)

        predictions = classifier.predict_proba(test_data)
        predictions = predictions.todense().getA()

        recommendList = DataProcessUtils.getListFromProbable(predictions, range(1, train_data_y.shape[1] + 1),
                                                             recommendNum)
        answerList = test_data_y
        print(predictions)
        print(test_data_y)
        print(recommendList)
        print(answerList)
        return [recommendList, answerList]

Example #15

0

Show file

    def Classifier_Chain(ytrain, yvalid, ytest, base_model):
        """
        Fits a Classifier Chain Model with LinearSVC as base classifier 
        specifiying either themes or subthemes for Y.
        Returns a table of results with train, valid, test score, and 
        recall, precision, f1 scores for valid and test data. 
        """
        classifier_chain = ClassifierChain(base_model)
        model = classifier_chain.fit(X_train, ytrain)

        train = model.score(X_train, np.array(ytrain))
        valid = model.score(X_valid, np.array(yvalid))
        test = model.score(X_test, np.array(ytest))

        #validation scores
        predictions = model.predict(X_valid)
        recall = recall_score(np.array(yvalid), predictions, average='micro')
        precision = precision_score(np.array(yvalid),
                                    predictions,
                                    average='micro')
        f1 = f1_score(np.array(yvalid), predictions, average='micro')

        #test scores
        predictions_test = model.predict(X_test)
        recall_test = recall_score(np.array(ytest),
                                   predictions_test,
                                   average='micro')
        precision_test = precision_score(np.array(ytest),
                                         predictions_test,
                                         average='micro')
        f1_test = f1_score(np.array(ytest), predictions_test, average='micro')

        #All rounded to 3 decimal place
        case = {
            'Model': "TF-IDF + LinearSVC",
            'Train Accuracy': round(train, 3),
            'Validation Accuracy': round(valid, 3),
            'Test Accuracy': round(test, 3),
            'Valid Recall': round(recall, 3),
            'Valid Precision': round(precision, 3),
            'Valid F1': round(f1, 3),
            'Test Recall': round(recall_test, 3),
            'Test Precision': round(precision_test, 3),
            'Test F1': round(f1_test, 3)
        }

        results_dict.append(case)

Example #16

0

Show file

File: articles_classifier.py Project: x0rzkov/zodiac

    def fit(self, X, y):
        """
        fit the model to the data. Train the classifier
        Note: You should use the zodiac.classifier.cleaner on all the texts before you fit the data

        :parameter
        ----------
            :param X: (list)
                list of clean text (you can use zodiac.cleaner.TextCleaner)
            :param y: (numpy.array)
                array of labels
        """
        self.x_vec_ = self.vectorizer_.fit_transform(X)
        # initialize classifier chains multi-label classifier
        self.classifier_ = ClassifierChain(SVC(probability=True))
        # Training logistic regression model on train data
        self.classifier_.fit(self.x_vec_, y)

Example #17

0

Show file

def ClassifierChain_method(X_train, y_train, samples_leaf, samples_split):
    """
	问题转换-->分类器链方法
	:param X_train: 输入数据
	:param y_train: 对应标签数据
	:return:
	"""
    try:
        classifier = ClassifierChain(
            DecisionTreeClassifier(min_samples_leaf=int(samples_leaf),
                                   min_samples_split=int(samples_split)))
        classifier.fit(X_train, y_train)

        return classifier
    except Exception as e:
        print("warning----分类器链|ClassifierChain_method----" + str(e))

    return None

Example #18

0

Show file

File: ensemble_classifiers.py Project: kakinani/multilabel_mail

def build_MajorityVoting(X_train, y_train, X_test, y_test):

    classifier = MajorityVotingClassifier(
        clusterer=FixedLabelSpaceClusterer(
            clusters=[[1, 2, 3], [0, 2, 5], [4, 5]]),
        classifier=ClassifierChain(classifier=GaussianNB()))
    classifier.fit(X_train, y_train)
    prediction = classifier.predict(X_test)
    print('Test accuracy is {}'.format(accuracy_score(y_test, prediction)))

Example #19

0

Show file

File: classificationModelsMultiClass.py Project: ahmedlatef/FrImCla

 def __init__(
         self,
         rdm_state=84,
         params={"classifier__C": [0.1, 1.0, 10.0, 100.0, 1000.0, 10000.0]},
         niterations=5):
     self.model = ClassifierChain(
         LogisticRegression(random_state=rdm_state))
     self.params = params
     self.niterations = niterations

Example #20

0

Show file

File: flow.py Project: clussificate/OM-Researches-Analysis

def train_model(X, y, strategy):
    X = np.array(X)
    y = np.array(y)
    clf = lightgbm.sklearn.LGBMClassifier(max_depth=9, num_leaves=500,
                                          n_estimators=50, n_jobs=-1)  # 0.8
    print(clf)
    if strategy=='ovr':  # OneVsRest strategy also known as BinaryRelevance strategy
        ovr = OneVsRestClassifier(clf)
        ovr.fit(X, y)
        save_model(ovr, "model/flow/ovr")
        return ovr
    elif strategy=='classifier_chains':
        cc = ClassifierChain(clf)
        cc.fit(X, y)
        save_model(cc, "model/flow/cc")
        return cc
    else:
        raise Exception("Correct strategies：ovr or classifier_chains")

Example #21

0

Show file

    def __init__(self):
        self.total_data_df = pd.read_csv(os.path.join("data",
                                                      "cleaned_data.csv"),
                                         encoding="ISO-8859-1")
        self.data_df = self.total_data_df[~self.total_data_df.Tags.isnull()]
        self.total_records = len(self.data_df.index)
        self.train_df = self.data_df.tail(int(self.total_records * .67))
        self.test_df = self.data_df.head(int(self.total_records * .23))
        self.total_tag_list = self.get_tag_list()
        self.total_word_list = self.get_word_list()
        self.modified_train_df = pd.DataFrame()
        self.modified_test_df = pd.DataFrame()
        self.classifier = BernoulliNB()
        self.classifier_multilabel = ClassifierChain(BernoulliNB())
        self.classifier_dt = DecisionTreeRegressor(max_depth=2000)
        self.classifier_random_forest = RandomForestRegressor(max_depth=100)
        self.classifier_svm = svm.SVC(kernel='linear')

        self.test_tags = pd.DataFrame()

Example #22

0

Show file

File: mainscript.py Project: glewan/SVM-text-classification

def main():

    print("Welcome to SVM text classifier. Please choose a dataset: \n"
          "Press 'b' for BBC news dataset\n"
          "Press 'r' for Reuters-21578 dataset\n"
          "Press 'g' for 20 News group\n"
          "Press 'q' for exit\n \n"
          "Enter your decision: ")

    model = input()

    if model == 'b':
        train_X, train_Y, test_X, test_Y = bbc()
    elif model == 'r':
        train_X, train_Y, test_X, test_Y = reut()
    elif model == 'g':
        train_X, train_Y, test_X, test_Y = tng()
    elif model == 'q':
        print("Program is closing...")
        sys.exit(0)
    else:
        "Please choose one of described options"

    # OVO
    print("\n--------------\nOVO")
    if (model == 'b') or (model == 'g'):
        classifier = OneVsOneClassifier(LinearSVC(random_state=42))
        classifier.fit(train_X, train_Y)

        predictions_SVM = classifier.predict(test_X)
        evaluate(test_Y, predictions_SVM)
        print_confm(test_Y, predictions_SVM, model)

        # OVA
        print("\n--------------\nOVA")
        classifier = OneVsRestClassifier(LinearSVC(random_state=42))
        classifier.fit(train_X, train_Y)

        predictions_SVM = classifier.predict(test_X)

        evaluate(test_Y, predictions_SVM)
        print_confm(test_Y, predictions_SVM, model)

    if (model == 'r'):
        # OVA
        print("\n--------------\nOVA")
        # classifier = OneVsRestClassifier(LinearSVC(random_state=42))

        classifier = ClassifierChain(classifier=LinearSVC(),
                                     require_dense=[False, True])

        classifier.fit(train_X, train_Y)
        predictions_SVM = classifier.predict(test_X)
        evaluate(test_Y, predictions_SVM)
        print_confm(test_Y, predictions_SVM, model)

Example #23

0

Show file

File: main.py Project: DraganSkiljevic/siap

def randomForestClassifierChain():
    print("Random forest classifier chain")

    start = time.time()
    classifier = ClassifierChain(classifier=RandomForestClassifier(),
                                 require_dense=[False, True])
    filename = "randomForestClassifierChain"

    # classifier.fit(train_x, train_y)

    # save
    # pickle.dump(classifier, open(filename, 'wb'))

    # load the model from disk
    classifier = pickle.load(open(filename, 'rb'))

    print('training time taken: ', round(time.time() - start, 0), 'seconds')

    predictions_new = classifier.predict(test_x)

    accuracy(test_y, predictions_new)

Example #24

0

Show file

File: main.py Project: DraganSkiljevic/siap

def knnClassifierChain():
    print("knn classifier chain")

    start = time.time()
    classifier = ClassifierChain(KNeighborsClassifier())

    filename = "knnChain"

    classifier.fit(train_x, train_y)

    # save
    pickle.dump(classifier, open(filename, 'wb'))

    # load the model from disk
    classifier = pickle.load(open(filename, 'rb'))

    print('training time taken: ', round(time.time() - start, 0), 'seconds')

    predictions_new = classifier.predict(test_x)

    accuracy(test_y, predictions_new)

Example #25

0

Show file

File: classificationModelsMultiClass.py Project: ahmedlatef/FrImCla

 def __init__(
         self,
         random_state=84,
         params={
             'classifier__C': [1, 10, 100, 1000],
             'classifier__gamma': [0.001, 0.0001],
             'classifier__kernel': ['rbf', 'linear']
         },
         niterations=10):
     self.model = ClassifierChain(SVC(random_state=random_state))
     self.params = params
     self.niterations = niterations

Example #26

0

Show file

File: main.py Project: DraganSkiljevic/siap

def gaussianNaiveBayes():
    print("Gaussian naive bayes")

    start = time.time()
    classifier = ClassifierChain(GaussianNB())

    filename = "gaussianNaiveBayes"

    classifier.fit(train_x, train_y)

    # save
    pickle.dump(classifier, open(filename, 'wb'))

    # load the model from disk
    classifier = pickle.load(open(filename, 'rb'))

    print('training time taken: ', round(time.time() - start, 0), 'seconds')

    predictions_new = classifier.predict(test_x)

    accuracy(test_y, predictions_new)

Example #27

0

Show file

File: main.py Project: DraganSkiljevic/siap

def supportVectorMachineChain():
    print("Support vector machine")

    start = time.time()
    classifier = ClassifierChain(classifier=svm.SVC(),
                                 require_dense=[False, True])
    filename = "SupportVectorMachine"

    classifier.fit(train_x, train_y)

    # save
    pickle.dump(classifier, open(filename, 'wb'))

    # load the model from disk
    classifier = pickle.load(open(filename, 'rb'))

    print('training time taken: ', round(time.time() - start, 0), 'seconds')

    predictions_new = classifier.predict(test_x)

    accuracy(test_y, predictions_new)

Example #28

0

Show file

def train_model(X, y, strategy):
    X = np.array(X)
    y = np.array(y)
    # clf = SVC(C=1,kernel='rbf',probability=True, gamma='scale') # svc without class_weight
    # clf = SVC(C=10,kernel='rbf',class_weight='balanced',probability=True, gamma='scale')  # svc with class_weight
    clf = XGBClassifier(subsample=0.8, colsample_bytree=0.8)
    # clf = XGBClassifier(learning_rate=0.1, n_estimators=150, max_depth=5,
    #                     min_child_weight=1, gamma=0.1, subsample=0.8, colsample_bytree=0.8,
    #                     objective='binary:logistic', nthread=4, scale_pos_weight=1)
    print(clf)
    if strategy == 'ovr':  # OneVsRest strategy also known as BinaryRelevance strategy
        ovr = OneVsRestClassifier(clf)
        ovr.fit(X, y)
        save_model(ovr, "model/ovr")
        return ovr
    elif strategy == 'classifier_chains':
        cc = ClassifierChain(clf)
        cc.fit(X, y)
        save_model(cc, "model/cc")
        return cc
    else:
        raise Exception("Correct strategies：ovr or classifier_chains")

Example #29

0

Show file

File: classificationModelsMultiClass.py Project: ahmedlatef/FrImCla

 def __init__(
         self,
         random_state=84,
         n_estimators=20,
         params={
             'classifier__n_estimators': [250, 500, 1000, 1500],
             'classifier__min_samples_split': [2, 4, 8]
         },
         niterations=10):
     self.model = ClassifierChain(
         ExtraTreesClassifier(random_state=random_state,
                              n_estimators=n_estimators))
     self.params = params
     self.niterations = niterations

Example #30

0

Show file

File: ensemble_classifiers.py Project: kakinani/multilabel_mail

def build_Mklnn(X_train, y_train):

    parameters = {
        'classifier': [LabelPowerset(), ClassifierChain()],
        'classifier__classifier': [RandomForestClassifier()],
        'classifier__classifier__n_estimators': [10, 20, 50],
    }

    clf = GridSearchCV(LabelSpacePartitioningClassifier(),
                       parameters,
                       scoring='f1_macro')
    clf.fit(X_train, y_train)

    print(clf.best_params_, clf.best_score_)

Example #31

0

Show file

File: Attempt 4.py Project: ElijahWilde/THR

def ClassifierChain ():
    # Train-Test Split =======================================================
    print("setting up a neural network...")
    from sklearn.model_selection import train_test_split
    train, test = train_test_split(df, test_size=0.33, shuffle=True)
    
    train_text = train['Book_Text']
    test_text = test['Book_Text']
    
    # TF-IDF ==================================================================
    from sklearn.feature_extraction.text import TfidfVectorizer
    vectorizer = TfidfVectorizer(strip_accents='unicode', analyzer='word', ngram_range=(1,3), norm='l2')
    vectorizer.fit(train_text)
    vectorizer.fit(test_text)
    
    x_train = vectorizer.transform(train_text)
    y_train = train.drop(labels = ['Book_Text'], axis=1)
    
    x_test = vectorizer.transform(test_text)
    y_test = test.drop(labels = ['Book_Text'], axis=1)
    
    # using classifier chains
    from skmultilearn.problem_transform import ClassifierChain
    from sklearn.linear_model import LogisticRegression

    # initialize classifier chains multi-label classifier
    classifier = ClassifierChain(LogisticRegression())
    
    # Training logistic regression model on train data
    classifier.fit(x_train, y_train)
    
    # predict
    predictions = classifier.predict(x_test)
    
    # accuracy
    print("Accuracy = ",accuracy_score(y_test,predictions))
    print("\n")

Example #32

0

Show file

File: classificationModelsMultiClass.py Project: ahmedlatef/FrImCla

 def __init__(
         self,
         random_state=84,
         n_estimators=20,
         params={
             "classifier__max_depth": [3, None],
             "classifier__max_features": [1, 3, 10],
             "classifier__min_samples_leaf": [1, 3, 10]
         },
         niterations=10):
     self.model = ClassifierChain(
         GradientBoostingClassifier(random_state=random_state,
                                    n_estimators=n_estimators))
     self.params = params
     self.niterations = niterations