Example #1
0
 def get(self):
     queryParams = request.args
     paramCheck = paramChecker(queryParams)
     if not paramCheck:
         return "Invalid URL/Params.", 400
     else:
         db = MongodbInteracter(dbName='tsa', collectionName='tweets')
         return db.fetchContentsViaKeyword(keyword=queryParams['name']), 200
Example #2
0
 def get(self):
     queryParams = request.args
     paramCheck = paramChecker(queryParams)
     if not paramCheck:
         return "Invalid URL/Params.", 400
     else:
         db = MongodbInteracter(dbName='tsa', collectionName='tweets')
         pipeline = [{'$match' : {'keyword' : {'$regex' : queryParams['name'], '$options': 'im'}}},
             {'$group' : {'_id' : '$keyword', 'count' : {'$sum' : 1}}}]
         return db.aggregation(pipeline=pipeline), 200
Example #3
0
class AutoTweetExtraction:
    def __init__(self):
        self.db = MongodbInteracter(dbName='tsa', collectionName='twitter')
        self.twitterHandle = TwitterHandle()

    def run(self):
        datetoday = datetime.date.today()
        dayoffset = datetime.timedelta(days=1)
        previousday = datetoday - dayoffset
        amy_tweets = self.twitterHandle.searchByKeyword("amy klobuchar",
                                                        since=previousday)
        self.db.postContents(amy_tweets)
 def runAll(self):
     '''
     Classify all tweets and store the classification in database
     '''
     dbInteractor = MongodbInteracter("tsa", "twitter")
     tweets = dbInteractor.fetchContents()
     for tweet in tweets:
         print("processing tweet: ", tweet['_id'])
         text = tweet['tweetText']
         text = self.preprocessor.clean(text)
         result = self.classifier.classify(text, verbose=False)
         tweet['classification'] = result
         dbInteractor.replaceOnce(tweet)
     print("---- Batch run Complete ----")
    def visualizer(self):
        dbHandle = MongodbInteracter(dbName='tsa', collectionName='twitter')
        tweets = dbHandle.fetchContents()
        df = pd.DataFrame(tweets)
        df.head()
        
        df['created_at'] = pd.to_datetime(df['created_at'])
        df['created_at'] = df.created_at.map(lambda x: x.strftime('%Y-%m-%d'))
        df['wo_split'] = df['classification'].apply(lambda x: x.get('wo_split'))
        df['wo_split'] = df['wo_split'].apply(lambda x: x.get('result'))


        dataframeList = [(df.groupby('created_at').wo_split.apply(lambda x: (x=='pos').sum())).reset_index().wo_split.values.tolist(),
        (df.groupby('created_at').wo_split.apply(lambda x: (x=='neg').sum())).reset_index().wo_split.values.tolist(),
        (df.groupby('created_at').wo_split.apply(lambda x: (x=='neutral').sum())).reset_index().wo_split.values.tolist()]

        dataframeList = np.asarray(dataframeList)
        dataframeList = dataframeList.transpose()


        df2 = pd.DataFrame(
                dataframeList.tolist(),
                columns=["positive", "negative", "neutral"])
        df2['created_at'] = df.created_at.unique()
        
        data = pd.pivot_table(df2, values = ['positive','negative','neutral'], index='created_at')
        data.head()
        # Create traces
        trace0 = go.Bar(
            x = data.index,
            y = data.positive,
            name = 'positive'
        )
        trace1 = go.Bar(
            x = data.index,
            y = data.negative,
            name = 'negative'
        )
        trace2 = go.Bar(
            x = data.index,
            y = data.neutral,
            name = 'neutral'
        )

        data = [trace0,trace1,trace2]
        # layout = go.Layout(title = 'Positive vs Negative vs Neutral')
        figure = go.Figure(data=data)
        return pyo.plot(figure)
 def get(self):
     # queryParams = request.args
     # paramCheck = paramChecker(queryParams)
     paramCheck = True
     if not paramCheck:
         return "Invalid URL/Params.", 400
     else:
         self.db = MongodbInteracter(dbName='tsa', collectionName='tweets')
         self.classifier = Classifier()
         try:
             result = self.runPipeLine()
             # print("\n,<==RESULT==>\n", result)
             return json.loads(dumps(result)), 200
         except Exception as e:
             print("\n<==***Error In Classification Pipeline***==>\n", e)
             return "ERROR: Classification Pipeline encountered error. View Log for details", 500
 def run(self):
     datetoday = datetime.date.today()
     dateoffset = datetime.timedelta(days=1)
     weekoffset = datetime.timedelta(days=7)
     db = MongodbInteracter(dbName='tsa', collectionName='twitter')
     query = {'created_at' : {'$gte' : datetime.datetime.combine(datetoday - weekoffset, datetime.time()), '$lt': datetime.datetime.combine(datetoday + dateoffset, datetime.time())}}
     daily_tweets = db.fetchContents(query=query)
     db = MongodbInteracter(dbName='tsa', collectionName='twitter_result')
     existing_tweets = db.fetchContents()
     processedTweets = self.__processTweets(daily_tweets=daily_tweets, existing_tweets=existing_tweets)
     result = self.__classifyTweets(processedTweets)
     db.postContents(result)
class ClassificationPipelineResource(Resource):
    def get(self):
        # queryParams = request.args
        # paramCheck = paramChecker(queryParams)
        paramCheck = True
        if not paramCheck:
            return "Invalid URL/Params.", 400
        else:
            self.db = MongodbInteracter(dbName='tsa', collectionName='tweets')
            self.classifier = Classifier()
            try:
                result = self.runPipeLine()
                # print("\n,<==RESULT==>\n", result)
                return json.loads(dumps(result)), 200
            except Exception as e:
                print("\n<==***Error In Classification Pipeline***==>\n", e)
                return "ERROR: Classification Pipeline encountered error. View Log for details", 500

    def runPipeLine(self):
        fetchedTweets = list(self.db.fetchContentsViaKeyword('amy'))
        return [
            self.classifier.classify(x['tweetText'])
            for x in fetchedTweets[0:100]
        ]
Example #9
0
from processing.preprocess import Preprocessor
from processing.sentenceSplitter.sentenceSplitter import splitSentence, splitSentenceVerbose
from processing.sentenceSplitter.coreferenceresolver import resolveCoreference
from processing.aspectsIdentifier import AspectIdentifier



# TODO: Add logic to switch between handle in a user friendly way. Read keys from config.      


if __name__ == '__main__':
    print('inside word split pipeline test')
    # set verbose to true for all debug
    verbose = True
    preprocessor = Preprocessor()
    dbHandle = MongodbInteracter(dbName='tsa', collectionName='tweets')
    tweets = dbHandle.fetchContents({})
    tweets = list(tweets)

    while True:
        print("\n"*4)
        print("input please: ", end="")
        x = input()
        x = preprocessor.clean(x)
        print("\nTweet text: ", x)
        # print("\n",classifier.classify(x))
        print("-"*100)
        if verbose:
            sents = splitSentenceVerbose(x)
        else:
            sents = splitSentence(x)
Example #10
0
 def __init__(self):
     self.db = MongodbInteracter(dbName='tsa', collectionName='twitter')
     self.twitterHandle = TwitterHandle()