Python load_dataの例

プログラミング言語: Python

名前空間/パッケージ名: datasets.sentiment140

メソッド/関数: load_data

hotexamples.comのコード掲載数: 3

Python load_data - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdatasets.sentiment140.load_dataの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def main(argv):
    try:
        long_flags = ["help", "save", "test", "verbose"]
        opts, args = getopt.getopt(argv, "hs:tv", long_flags)
    except:
        usage()
        sys.exit(2)

    model_name = None
    testing = False
    verbose = False
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            sys.exit()
        elif opt in ("-s", "--save"):
            print('Saving model to %s' % arg)
            model_name = arg
        elif opt in ("-t", "--test"):
            testing = True
        elif opt in ("-v", "--verbose"):
            verbose = True
            output_format = '%(asctime)s : %(levelname)s : %(message)s'
            logging.basicConfig(format=output_format, level=logging.INFO)

    # Prevents user from running script without saving
    # or testing the model
    if not (model_name or testing):
        logging.critical("Sentiment140_Pipeline script is neither saving or testing the model built")
        sys.exit()

    logging.info("Opening CSV file...")
    all_data = sentiment140.load_data(verbose=verbose)
    model = train_d2v_model(all_data, epoch_num=10)

    # Saves memory
    model.init_sims(replace=True)

    if model_name:
        model.save(model_name)

    if testing:
        test_model(model)

コード例 #2

ファイルを表示

ファイル: Sentiment140_NB_Pipeline.py プロジェクト: AimVoma/sunny-side-up

def main(argv):
    # Initial local path for Stanford Twitter Data Features is None
    FEAT_PATH = None
    verbose = False

    # Parse command line arguments
    try:
        long_flags = ["help", "bernoulli", "multinomial", "gaussian"]
        opts, args = getopt.getopt(argv, "hf:v", long_flags)
    except getopt.GetoptError:
        usage()
        sys.exit(2)

    # Classifier variable. Used for training on tweet features below
    classifier = NaiveBayesClassifier
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage()
            sys.exit()
        elif opt == '-f':
            FEAT_PATH = arg
        elif opt == '-v':
            verbose = True
            output_format = '%(asctime)s : %(levelname)s : %(message)s'
            logging.basicConfig(format=output_format, level=logging.INFO)
        elif opt in ("bernoulli", "multinomial", "gaussian"):
            ''' This section allows you to use scikit-learn packages for
            text classification.

            NLTKs SklearnClassifier makes the process much easier,
            since you dont have to convert feature dictionaries to
            numpy arrays yourself, or keep track of all known features.
            The Scikits classifiers also tend to be more memory efficient
            than the standard NLTK classifiers, due to their use of sparse
            arrays.

            Credit to "Jacob" and his post on Steamhacker.com
            '''
            pipeline = None
            if opt == "bernoulli":
                pipeline = Pipeline([('nb', BernoulliNB())])
            elif opt == "multinomial":
                pipeline = Pipeline([('nb', MultinomialNB())])
            elif opt == "gaussian":
                pipeline = Pipeline([('nb', GaussianNB())])
            classifier = SklearnClassifier(pipeline)

    # Perform tweet parsing and learning
    logging.info("Opening CSV file...")
    logging.info("Extracting Features...")

    all_data = list()
    # Checks if all_data has already been set
    if FEAT_PATH is not None:
        tweet_feats = open(FEAT_PATH, 'r')
        all_data = [eval(line) for line in tweet_feats]
    else:
        all_data = sentiment140.load_data(feat_extractor=word_feats,
                                          verbose=verbose)

    logging.info("CSV file opened and features extracted")
    train_set, dev_set, test_set = split_data(all_data, train=.9,
                                              dev=0, test=.1, shuffle=True)
    logging.info("Data split into sets")
    classifier = classifier.train(train_set)
    logging.info("Classifier trained")

    logging.info("Evaluating accuracy and other features\n")
    test_model(classifier, test_set)

コード例 #3

ファイルを表示

def main(argv):
    # Initial local path for Stanford Twitter Data Features is None
    FEAT_PATH = None
    verbose = False

    # Parse command line arguments
    try:
        long_flags = ["help", "bernoulli", "multinomial", "gaussian"]
        opts, args = getopt.getopt(argv, "hf:v", long_flags)
    except getopt.GetoptError:
        usage()
        sys.exit(2)

    # Classifier variable. Used for training on tweet features below
    classifier = NaiveBayesClassifier
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage()
            sys.exit()
        elif opt == '-f':
            FEAT_PATH = arg
        elif opt == '-v':
            verbose = True
            output_format = '%(asctime)s : %(levelname)s : %(message)s'
            logging.basicConfig(format=output_format, level=logging.INFO)
        elif opt in ("bernoulli", "multinomial", "gaussian"):
            ''' This section allows you to use scikit-learn packages for
            text classification.

            NLTKs SklearnClassifier makes the process much easier,
            since you dont have to convert feature dictionaries to
            numpy arrays yourself, or keep track of all known features.
            The Scikits classifiers also tend to be more memory efficient
            than the standard NLTK classifiers, due to their use of sparse
            arrays.

            Credit to "Jacob" and his post on Steamhacker.com
            '''
            pipeline = None
            if opt == "bernoulli":
                pipeline = Pipeline([('nb', BernoulliNB())])
            elif opt == "multinomial":
                pipeline = Pipeline([('nb', MultinomialNB())])
            elif opt == "gaussian":
                pipeline = Pipeline([('nb', GaussianNB())])
            classifier = SklearnClassifier(pipeline)

    # Perform tweet parsing and learning
    logging.info("Opening CSV file...")
    logging.info("Extracting Features...")

    all_data = list()
    # Checks if all_data has already been set
    if FEAT_PATH is not None:
        tweet_feats = open(FEAT_PATH, 'r')
        all_data = [eval(line) for line in tweet_feats]
    else:
        all_data = sentiment140.load_data(feat_extractor=word_feats,
                                          verbose=verbose)

    logging.info("CSV file opened and features extracted")
    train_set, dev_set, test_set = split_data(all_data,
                                              train=.9,
                                              dev=0,
                                              test=.1,
                                              shuffle=True)
    logging.info("Data split into sets")
    classifier = classifier.train(train_set)
    logging.info("Classifier trained")

    logging.info("Evaluating accuracy and other features\n")
    test_model(classifier, test_set)