def get(self): queryParams = request.args paramCheck = paramChecker(queryParams) if not paramCheck: return "Invalid URL/Params.", 400 else: db = MongodbInteracter(dbName='tsa', collectionName='tweets') return db.fetchContentsViaKeyword(keyword=queryParams['name']), 200
def get(self): queryParams = request.args paramCheck = paramChecker(queryParams) if not paramCheck: return "Invalid URL/Params.", 400 else: db = MongodbInteracter(dbName='tsa', collectionName='tweets') pipeline = [{'$match' : {'keyword' : {'$regex' : queryParams['name'], '$options': 'im'}}}, {'$group' : {'_id' : '$keyword', 'count' : {'$sum' : 1}}}] return db.aggregation(pipeline=pipeline), 200
class AutoTweetExtraction: def __init__(self): self.db = MongodbInteracter(dbName='tsa', collectionName='twitter') self.twitterHandle = TwitterHandle() def run(self): datetoday = datetime.date.today() dayoffset = datetime.timedelta(days=1) previousday = datetoday - dayoffset amy_tweets = self.twitterHandle.searchByKeyword("amy klobuchar", since=previousday) self.db.postContents(amy_tweets)
def runAll(self): ''' Classify all tweets and store the classification in database ''' dbInteractor = MongodbInteracter("tsa", "twitter") tweets = dbInteractor.fetchContents() for tweet in tweets: print("processing tweet: ", tweet['_id']) text = tweet['tweetText'] text = self.preprocessor.clean(text) result = self.classifier.classify(text, verbose=False) tweet['classification'] = result dbInteractor.replaceOnce(tweet) print("---- Batch run Complete ----")
def visualizer(self): dbHandle = MongodbInteracter(dbName='tsa', collectionName='twitter') tweets = dbHandle.fetchContents() df = pd.DataFrame(tweets) df.head() df['created_at'] = pd.to_datetime(df['created_at']) df['created_at'] = df.created_at.map(lambda x: x.strftime('%Y-%m-%d')) df['wo_split'] = df['classification'].apply(lambda x: x.get('wo_split')) df['wo_split'] = df['wo_split'].apply(lambda x: x.get('result')) dataframeList = [(df.groupby('created_at').wo_split.apply(lambda x: (x=='pos').sum())).reset_index().wo_split.values.tolist(), (df.groupby('created_at').wo_split.apply(lambda x: (x=='neg').sum())).reset_index().wo_split.values.tolist(), (df.groupby('created_at').wo_split.apply(lambda x: (x=='neutral').sum())).reset_index().wo_split.values.tolist()] dataframeList = np.asarray(dataframeList) dataframeList = dataframeList.transpose() df2 = pd.DataFrame( dataframeList.tolist(), columns=["positive", "negative", "neutral"]) df2['created_at'] = df.created_at.unique() data = pd.pivot_table(df2, values = ['positive','negative','neutral'], index='created_at') data.head() # Create traces trace0 = go.Bar( x = data.index, y = data.positive, name = 'positive' ) trace1 = go.Bar( x = data.index, y = data.negative, name = 'negative' ) trace2 = go.Bar( x = data.index, y = data.neutral, name = 'neutral' ) data = [trace0,trace1,trace2] # layout = go.Layout(title = 'Positive vs Negative vs Neutral') figure = go.Figure(data=data) return pyo.plot(figure)
def get(self): # queryParams = request.args # paramCheck = paramChecker(queryParams) paramCheck = True if not paramCheck: return "Invalid URL/Params.", 400 else: self.db = MongodbInteracter(dbName='tsa', collectionName='tweets') self.classifier = Classifier() try: result = self.runPipeLine() # print("\n,<==RESULT==>\n", result) return json.loads(dumps(result)), 200 except Exception as e: print("\n<==***Error In Classification Pipeline***==>\n", e) return "ERROR: Classification Pipeline encountered error. View Log for details", 500
def run(self): datetoday = datetime.date.today() dateoffset = datetime.timedelta(days=1) weekoffset = datetime.timedelta(days=7) db = MongodbInteracter(dbName='tsa', collectionName='twitter') query = {'created_at' : {'$gte' : datetime.datetime.combine(datetoday - weekoffset, datetime.time()), '$lt': datetime.datetime.combine(datetoday + dateoffset, datetime.time())}} daily_tweets = db.fetchContents(query=query) db = MongodbInteracter(dbName='tsa', collectionName='twitter_result') existing_tweets = db.fetchContents() processedTweets = self.__processTweets(daily_tweets=daily_tweets, existing_tweets=existing_tweets) result = self.__classifyTweets(processedTweets) db.postContents(result)
class ClassificationPipelineResource(Resource): def get(self): # queryParams = request.args # paramCheck = paramChecker(queryParams) paramCheck = True if not paramCheck: return "Invalid URL/Params.", 400 else: self.db = MongodbInteracter(dbName='tsa', collectionName='tweets') self.classifier = Classifier() try: result = self.runPipeLine() # print("\n,<==RESULT==>\n", result) return json.loads(dumps(result)), 200 except Exception as e: print("\n<==***Error In Classification Pipeline***==>\n", e) return "ERROR: Classification Pipeline encountered error. View Log for details", 500 def runPipeLine(self): fetchedTweets = list(self.db.fetchContentsViaKeyword('amy')) return [ self.classifier.classify(x['tweetText']) for x in fetchedTweets[0:100] ]
from processing.preprocess import Preprocessor from processing.sentenceSplitter.sentenceSplitter import splitSentence, splitSentenceVerbose from processing.sentenceSplitter.coreferenceresolver import resolveCoreference from processing.aspectsIdentifier import AspectIdentifier # TODO: Add logic to switch between handle in a user friendly way. Read keys from config. if __name__ == '__main__': print('inside word split pipeline test') # set verbose to true for all debug verbose = True preprocessor = Preprocessor() dbHandle = MongodbInteracter(dbName='tsa', collectionName='tweets') tweets = dbHandle.fetchContents({}) tweets = list(tweets) while True: print("\n"*4) print("input please: ", end="") x = input() x = preprocessor.clean(x) print("\nTweet text: ", x) # print("\n",classifier.classify(x)) print("-"*100) if verbose: sents = splitSentenceVerbose(x) else: sents = splitSentence(x)
def __init__(self): self.db = MongodbInteracter(dbName='tsa', collectionName='twitter') self.twitterHandle = TwitterHandle()