Python keywordExtraction 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: analysis

메소드/함수: keywordExtraction

hotexamples.com에서의 예제들: 4

Python keywordExtraction - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 analysis.keywordExtraction에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def createStreamingContext():

    # Create a local StreamingContext with two working thread and batch interval of 1 second
    sc = SparkContext("spark://%s:7077" % MASTER_NAME,
                      appName="GlutenTweet",
                      pyFiles=PYFILES)
    ssc = StreamingContext(sc, 2)

    # Create a DStream of raw data
    raw = ssc.socketTextStream(MASTER_IP, 9999)

    # Convert into models
    tweets = raw.map(lambda r: Tweet(raw_json=r))

    # Store models
    tweets.foreachRDD(storeTweetsRDD)

    # Sliding window analysis
    window = tweets.window(20 * 60, 30)
    hashtagCounts = analysisHahtagCount(window)
    streamTop(hashtagCounts).pprint()

    # Keyword extraction - note tweets is immutable
    tweetsKeyword = tweets.map(lambda t: keywordExtraction(t))

    # Update models
    tweetsKeyword.foreachRDD(updateTweetsRDD)

    # Sliding window analysis
    window2 = tweetsKeyword.window(20 * 60, 30)
    keywordCounts = analysisKeywordCount(window2)
    streamTop(keywordCounts).pprint()

    ssc.checkpoint(CHECKPOINT_DIR)
    return ssc

예제 #2

파일 보기

파일: app.py 프로젝트: ecesena/spark-tutorial

def createStreamingContext():

    # Create a local StreamingContext with two working thread and batch interval of 1 second
    sc = SparkContext("spark://%s:7077" % MASTER_NAME, appName="GlutenTweet", pyFiles=PYFILES)
    ssc = StreamingContext(sc, 2)

    # Create a DStream of raw data
    raw = ssc.socketTextStream(MASTER_IP, 9999)

    # Convert into models
    tweets = raw.map(lambda r: Tweet(raw_json=r))

    # Store models
    tweets.foreachRDD(storeTweetsRDD)

    # Sliding window analysis
    window = tweets.window(20*60, 30)
    hashtagCounts = analysisHahtagCount(window)
    streamTop(hashtagCounts).pprint()

    # Keyword extraction - note tweets is immutable
    tweetsKeyword = tweets.map(lambda t: keywordExtraction(t))

    # Update models
    tweetsKeyword.foreachRDD(updateTweetsRDD)

    # Sliding window analysis
    window2 = tweetsKeyword.window(20*60, 30)
    keywordCounts = analysisKeywordCount(window2)
    streamTop(keywordCounts).pprint()

    ssc.checkpoint(CHECKPOINT_DIR)
    return ssc

예제 #3

파일 보기

from models import Tweet
from database import db_session
from analysis import keywordExtraction, analysisHahtagCount, analysisKeywordCount
from config import *

PYFILES = ['batch.py'] + PYFILES

# Create a local StreamingContext with two working thread and batch interval of 1 second
# sc = SparkContext("spark://%s:7077" % MASTER, "GlutenTweetBatch", pyFiles=PYFILES)
sc = SparkContext("spark://%s:7077" % 'hadoop-m-unoa',
                  appName="GlutenTweetBatch",
                  pyFiles=PYFILES)

dbTweets = db_session.query(Tweet).all()
tweets = sc.parallelize(dbTweets)

# Hashtag analysis
hashtagCounts = analysisHahtagCount(tweets)
print(hashtagCounts.top(10, key=lambda p: p[1]))

# Keyword extraction - note tweets is immutable
tweetsKeyword = tweets.map(lambda t: keywordExtraction(t))

# Update models
# tweetsKeyword.foreachRDD(updateTweetsRDD)

# Keyword analysis
keywordCounts = analysisKeywordCount(tweetsKeyword)
print(keywordCounts.top(10, key=lambda p: p[1]))

예제 #4

파일 보기

파일: batch.py 프로젝트: ecesena/spark-tutorial

from pyspark import SparkContext

from models import Tweet
from database import db_session
from analysis import keywordExtraction, analysisHahtagCount, analysisKeywordCount
from config import *

PYFILES = ['batch.py'] + PYFILES

# Create a local StreamingContext with two working thread and batch interval of 1 second
# sc = SparkContext("spark://%s:7077" % MASTER, "GlutenTweetBatch", pyFiles=PYFILES)
sc = SparkContext("spark://%s:7077" % 'hadoop-m-unoa', appName="GlutenTweetBatch", pyFiles=PYFILES)

dbTweets = db_session.query(Tweet).all()
tweets = sc.parallelize(dbTweets)

# Hashtag analysis
hashtagCounts = analysisHahtagCount(tweets)
print(hashtagCounts.top(10, key=lambda p: p[1]))

# Keyword extraction - note tweets is immutable
tweetsKeyword = tweets.map(lambda t: keywordExtraction(t))

# Update models
# tweetsKeyword.foreachRDD(updateTweetsRDD)

# Keyword analysis
keywordCounts = analysisKeywordCount(tweetsKeyword)
print(keywordCounts.top(10, key=lambda p: p[1]))