예제 #1
0
    cols = input()
    cols_list.append(cols)
data.scale_data(cols_list[:-1])

cols_list = data.get_cols()
y_column_name = input("enter y column name: ")
X_train, X_test, y_train, y_test = data.spilt_data(y_column_name)

model_type = input("Enter R for Regression and C for Classification: ")

if model_type == "C":
    print("Your options are: " + str(Classifier_list))  #add mode list
    modelname = input("Enter model to be used: ")
    classifier = Classification(X_train, X_test, y_train, y_test, modelname)
    classifier.predict()
    classifier.accuracy()
    classifier.save_model()

elif model_type == 'R':
    print("Your options are: " + str(Regressor_list))  #add mode list
    modelname = input("Enter model to be used, use A for all")
    if modelname == "A":
        for modelname in Regressor_list:
            regressor = Regression(X_train, X_test, y_train, y_test, modelname)
            regressor.predict()
            regressor.accuracy()
            regressor.save_model()
    else:
        regressor = Regression(X_train, X_test, y_train, y_test, modelname)
        regressor.predict()
        regressor.accuracy()
예제 #2
0
def main():
    parser = OptionParser()
    parser.add_option("--stemmer-language", dest="stemmer_language", help="Language for SnowballStemmer", default="english")
    parser.add_option('-i', action="store_true", dest="ignore_stopwords_stemmer", help="Ignore stopwords in stemmer, default false", default=False)
    parser.add_option("--stopwords-language", dest="stopwords_language", help="Language for stopwords")
    parser.add_option("-k", action="store_true", dest="keep_stopwords", help="Keep stopwords, default remove", default=False)
    parser.add_option('--load-classifier', dest="load_classifier_file_path", help="Specify load classifiers file")
    parser.add_option('--create-classifier', dest="create_classifier", help="File for training set")
    parser.add_option('--row-training-set', dest="row_training_set", help="Number of row for training set", default=1000)
    parser.add_option('-r', action="store_true", dest="random_row_training_set", help="Get random row from training set file", default=False)
    parser.add_option('--text-field', dest="text_field", help="text field in json file", default="text")
    parser.add_option('--word-tokenize-language', dest="word_tokenize_language", help="Word tokenize language", default="english")
    parser.add_option('--classification-field', dest="classification_field", help="Classification field in json data", default="category")
    parser.add_option('--dump-classifier', dest="dump_classifier", help="Dump classifier file", default=False)
    parser.add_option('-a', action="store_true", dest="calculate_accuracy", help="Calculate accuracy", default=False)
    parser.add_option('--test-file-path', dest="test_file_path", help="Test file path")
    parser.add_option('--row-test-set', dest="row_test_set", help="Number of row for test set", default=500)
    parser.add_option('--random-row-test-set', action="store_true", dest="random_row_test_set", help="Get random row from test set file", default=False)
    parser.add_option('--test-text-field', dest="test_text_field", help="text field in json test file", default="text")
    parser.add_option('--test-classification-field', dest="test_classification_field", help="classificaion field in json test file", default="category")
    parser.add_option('--classify', dest="classify_text", help="classify text", default=False)
    (options, args) = parser.parse_args(sys.argv)

    cl = Classification(
        stemmer_language=options.stemmer_language,
        stopwords_language=options.stopwords_language,
        ignore_stopwords_stemmer=options.ignore_stopwords_stemmer,
    )

    if options.load_classifier_file_path:
        cl.load_classifier(load_classifier_file_path=options.load_classifier_file_path)
    elif options.create_classifier:
        cl.create_and_train_classifier(
            training_file_path=options.create_classifier,
            keep_stopwords=options.keep_stopwords,
            row_training_set=options.row_training_set,
            random_row_training_set=options.random_row_training_set,
            text_field=options.text_field,
            word_tokenize_language=options.word_tokenize_language,
            classification_field=options.classification_field
        )

    if options.dump_classifier:
        cl.dump_classifier(options.dump_classifier)
    if options.calculate_accuracy:
        cl.accuracy(
            test_file_path=options.test_file_path,
            keep_stopwords=options.keep_stopwords,
            row_test_set=options.row_test_set,
            random_row_test_set=options.random_row_test_set,
            text_field=options.test_text_field,
            word_tokenize_language=options.word_tokenize_language,
            classification_field=options.test_classification_field
        )

    if(options.classify_text):
        cl.classify(
            text=options.classify_text,
            keep_stopwords=options.keep_stopwords,
            word_tokenize_language=options.word_tokenize_language
        )