예제 #1
0
def extract_years(snippet, output):
    """
    function extracts the dates and fill them with the computed confidence score of
    function extract_entities_textrazor
    :param snippet:
    :param output:
    :return:
    """
    jar_files = os.path.join(os.path.dirname(__file__), 'jars')
    sutime = SUTime(jars=jar_files, mark_time_ranges=True)
    res = json.dumps(sutime.parse(snippet), sort_keys=True, indent=4)

    dates_list = []
    for i in range(len(res)):
        if res[i:i+5] == 'value':
            j = i+9
            while res[j] != '"':
                j = j+1
            dates_list.append(''.join(res[i+9:j]))


    dic_year = output['Y']
    dates_list_new = {'entity':[], 'confidenceScore': [] }

    for i in range(len(dic_year['entity'])):
        for ele in dates_list:
            if ele.__contains__(dic_year['entity'][i][0]):
                if ele not in dates_list_new['entity']:
                    dates_list_new['entity'].append(ele)
                    dates_list_new['confidenceScore'].append(dic_year['confidenceScore'][i])

    output['Y'] = dates_list_new
    return output
예제 #2
0
파일: proiect.py 프로젝트: paulsem/TILN
def sutime_function(text):
    translator = Translator()
    traducere = translator.translate(text, src='ro', dest="en").text

    java_target = "java\\target"
    jar_files = os.path.join(os.path.dirname(__file__), java_target)
    sutime = SUTime(jars=jar_files, mark_time_ranges=True)

    ttext = []
    ttype = []
    tmpdictionar = {}

    for x in sutime.parse(traducere):
        for value, key in x.items():
            if value == "text":
                valoare = convert_to_romana(key)
                ttext.append(valoare)
            elif value == "type":
                valoare2 = convert_to_romana(key)
                ttype.append(valoare2)

    for x in range(len(ttext)):
        try:
            tmpdictionar[ttype[x]].append(ttext[x])
        except:
            tmpdictionar[ttype[x]] = [ttext[x]]

    return tmpdictionar
class NLUWrapper(object):
    def __init__(self, host='localhost', port=5001, **kwargs):
        self.host, self.port = host, port
        self.sutime = SUTime(jars=os.path.join(os.path.dirname(__file__),
                                               'python-sutime', 'jars'),
                             mark_time_ranges=True)
        print 'Initialized with {}:{}'.format(self.host, self.port)

    def annotate(self, in_utterance, modules=()):
        sutime_response = None
        try:
            if 'SUTime' in modules:
                sutime_response = self.sutime.parse(in_utterance)
                modules = [module for module in modules if module != 'SUTime']
            response = requests.post('http://{}:{}/annotate'.format(
                self.host, self.port),
                                     json={
                                         'state': {
                                             'utterance': in_utterance
                                         },
                                         'modules': modules
                                     },
                                     timeout=5)
        except requests.Timeout:
            return {}
        assert response.status_code == 200, 'Error calling the NLU service'
        result = response.json()
        if sutime_response is not None:
            result['annotations']['SUTime'] = sutime_response
        return result

    def annotate_sentiment(self, in_utterance):
        response = self.annotate(in_utterance,
                                 modules=['Preprocessor', 'VaderNLTK'])
        return response['annotations']['sentiment']

    def annotate_ner(self, in_utterance):
        response = self.annotate(in_utterance,
                                 modules=['Preprocessor', 'StanfordNER'])
        return response['annotations'].get('ner', {})

    def annotate_pos(self, in_utterance):
        response = self.annotate(in_utterance,
                                 modules=['Preprocessor', 'MorphoTagger'])
        return response['annotations'].get('postag', [])

    def annotate_abuse(self, in_utterance):
        response = self.annotate(
            in_utterance, modules=['Preprocessor', 'AlanaAbuseDetector'])
        return response['annotations'].get('abuse', {})
예제 #4
0
class DateLinker(BasePipeline):
    def __init__(self, resource_folder=None):
        self.annotator_name = 'Date_Linker'
        if resource_folder is None:
            self.resource_folder = os.path.join(os.path.dirname(__file__),
                                                '../resources/sutime/')
        self.sutime = SUTime(jars=self.resource_folder)

    def run(self, document):

        dates = self.sutime.parse(document.text)

        pattern = re.compile(r"^-*\d*-*\d*-*\d*-*$")

        for date in dates:
            if date["type"] == "DATE" and pattern.match(date["value"]):
                val = date["value"]
                if val[0] == '-':
                    if len(val[1:]) == 4:
                        stdform = val + '-00-00T00:00:00Z^^http://www.w3.org/2001/XMLSchema#dateTime'
                    elif len(val[1:]) == 7:
                        stdform = val + '-00T00:00:00Z^^http://www.w3.org/2001/XMLSchema#dateTime'
                    elif len(val[1:]) == 10:
                        stdform = val + 'T00:00:00Z^^http://www.w3.org/2001/XMLSchema#dateTime'
                    else:
                        stdform = val + '^^<http://www.w3.org/2001/XMLSchema#dateTime>'

                else:
                    if len(val) == 4:
                        stdform = val + '-00-00T00:00:00Z^^http://www.w3.org/2001/XMLSchema#dateTime'
                    elif len(val) == 7:
                        stdform = val + '-00T00:00:00Z^^http://www.w3.org/2001/XMLSchema#dateTime'
                    elif len(val) == 10:
                        stdform = val + 'T00:00:00Z^^http://www.w3.org/2001/XMLSchema#dateTime'
                    else:
                        stdform = val + '^^<http://www.w3.org/2001/XMLSchema#dateTime>'

                start = date["start"]
                end = date["end"]

                entity = Entity(uri=stdform,
                                boundaries=(start, end),
                                surfaceform=document.text[start:end],
                                annotator=self.annotator_name)

                document.entities.append(entity)

        return document
예제 #5
0
class timeDelta:
    def __init__(self, path):
        # Initialize SUtime

        jar_files = os.path.join(os.path.dirname(path), 'jars')
        self.sutime = SUTime(jars=jar_files,
                             mark_time_ranges=False,
                             include_range=True)

    def get_times(self, text):
        # get all time values found by SUtime

        parsed = self.sutime.parse(text)
        values = []

        for dic in parsed:
            values.append(dic['value'])

        return values
예제 #6
0
def extract_entitites(snippet):
    """
    this function gets
    :param snippet: a snippet in English
    :return: and returns back the extracted person name, organization name, location and year in a dictionary namely output
    """

    nlp = StanfordCoreNLP('http://localhost:9000')
    res = nlp.annotate(snippet,
                   properties={
                       'annotators': 'ner', #'sutime'
                       'outputFormat': 'json',
                       #'timeout': 1000,
                   })

    output = {'RN':[], 'U':[], 'Y':[]}

    """ for extracting the university and persons names"""
    for sent in range(len(res['sentences'])):
        for element in res['sentences'][sent]['tokens']:
            if element['ner'] == 'PERSON':
                output['RN'].append(element['word'])
            if element['ner'] == 'ORGANIZATION': #or element['ner'] == 'LOCATION' :
                output['U'].append(element['word'])


    """ for extracting the years"""
    jar_files = os.path.join(os.path.dirname(__file__), 'jars')
    sutime = SUTime(jars=jar_files, mark_time_ranges=True)
    res = json.dumps(sutime.parse(snippet), sort_keys=True, indent=4)


    for i in range(len(res)):
        if res[i:i+5] == 'value':
            j = i+9
            while res[j] != '"':
                j = j+1
            output['Y'].append(''.join(res[i+9:j]))

    return output
예제 #7
0
class Streambot:
    """Stream Twitter and look for tweets that contain targeted words,
    when tweets found look for datetime and room, if present save tweet
    to OutgoingTweet model.
    Ex.
    bot = Streambot()
    # to run a stream looking for tweets about PyCon
    bot.run_stream(["PyCon"])
    """
    def __init__(self):
        self.api = self.setup_auth()
        self.stream_listener = StreamListener(self)
        jar_files = os.path.join(BASE_DIR, "python-sutime", "jars")
        self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)
        self.slacker = Slacker(s.SLACK_TOKEN)

    def setup_auth(self):
        """Set up auth stuff for api and return tweepy api object"""
        auth = tweepy.OAuthHandler(s.openspaces["CONSUMER_KEY"],
                                   s.openspaces["CONSUMER_SECRET"])
        auth.set_access_token(s.openspaces["ACCESS_TOKEN"],
                              s.openspaces["ACCESS_TOKEN_SECRET"])

        api = tweepy.API(auth)
        return api

    def run_stream(self, search_list=None):
        """Start stream, when matching tweet found on_status method called. 
        search_list arg is a list of terms that will be looked for in tweets
        """
        if search_list == None:
            raise ValueError(
                "Need a list of search terms as arg to run_stream")

        stream = tweepy.Stream(auth=self.api.auth,
                               listener=self.stream_listener)
        stream.filter(track=search_list)

    def send_mention_tweet(self, screen_name):
        """Mention a user in a tweet from bot letting them know that
        their tweet has been recieved and that we will send out reminders
        about their event.
        """
        hours_mins = time_utils.get_local_clock_time()

        mention = "@{} just saw your Open Spaces tweet at {}."
        mention += " Pending approval we'll retweet a reminder before your event!"
        mention = mention.format(screen_name, hours_mins)

        try:
            self.api.update_status(status=mention)
        except:
            # if same user tweets valid openspaces tweet at exact same clock time
            # it causes a duplicate tweet which bot can't send
            loggly.info(
                "duplicate tweet by openspaces bot in send_mention_tweet")

    def send_slack_message(self, channel, message):
        """Send a slack message a channel

        channel options:
        #outgoing_tweets
        #need_review
        #event_conflict
        """
        self.slacker.chat.post_message(channel, message)

    def parse_time_room(self, tweet):
        """Get time and room number from a tweet using SUTime and tweet_utils"""
        extracted_time = self.sutime.parse(tweet)
        time_and_room = tweet_utils.get_time_and_room(tweet, extracted_time)
        return time_and_room

    def value_check(self, time_room_obj):
        """Returns a tuple with the counts of values extracted from a tweet
        in the parse_time_room method. This tuple is used to decide how bot
        will respond to tweet. 
        """
        num_room_values = len(time_room_obj["room"])
        num_time_values = len(time_room_obj["date"])

        return (num_room_values, num_time_values)

    def retweet_logic(self, tweet, tweet_id, screen_name, user_id):
        """Use SUTime to try to parse a datetime out of a tweet, if successful
        save tweet to OutgoingTweet to be retweeted
        """
        # use SUTime to parse a datetime out of tweet
        time_room = self.parse_time_room(tweet)

        # make sure both time and room extracted and only one val each
        val_check = self.value_check(time_room)

        if val_check == (1, 1):
            room = time_room["room"][0]
            date_mention = tweet_utils.check_date_mention(tweet)
            converted_time = time_utils.convert_to_utc(time_room["date"][0],
                                                       date_mention)

            # check for a time and room conflict, only 1 set of retweets per event
            # default time range that a room is resrved for is -15 +30 mins
            conflict = db_utils.check_time_room_conflict(converted_time, room)

            if not conflict:
                # send message to slack when a tweet is scheduled to go out
                slack_message = "{} From: {}, id: {}".format(
                    tweet, screen_name, user_id)
                self.send_slack_message('#outgoing_tweets', slack_message)

                self.send_mention_tweet(screen_name)

                # This record lets us check that retweets not for same event
                db_utils.create_event(description=tweet,
                                      start=converted_time,
                                      location=room,
                                      creator=screen_name)

                tweet_utils.schedule_tweets(screen_name, tweet, tweet_id,
                                            converted_time)
                loggly.info(
                    "scheduled this tweet for retweet: {}".format(tweet))

            else:
                message = """Tweet recived for an event bot is already scheduled
                    to retweet about. Sender: {}, room: {}, time: {}, 
                    tweet: {} tweet_id: {}
                    """
                message = message.format(screen_name, room, converted_time,
                                         tweet, tweet_id)
                self.send_slack_message("#event_conflict", message)
                loggly.info(message)

        elif val_check == (0, 0):
            # tweet found but without valid time or room extracted, ignore
            pass

        else:
            # tweet with relevant information but not exactly 1 time & 1 room
            message = """Tweet found that needs review: {}  tweet_id: {}
                screen_name: {}, user_id: {}
                """
            message = message.format(tweet, tweet_id, screen_name, user_id)
            self.send_slack_message("#need_review", message)
예제 #8
0
import os
import json
from sutime import SUTime

if __name__ == '__main__':
    test_case = u'I need a desk for tomorrow from 2pm to 3pm'

    jar_files = 'C:\Users\Leandra\Anaconda2\lib\site-packages\sutime\jars'
    jar_files = 'C:\Users\Leandra\Documents\Fall2016\NLP\carpool-search\jars'
    print(jar_files)
    sutime = SUTime(jars=jar_files, mark_time_ranges=True)

    print(json.dumps(sutime.parse(test_case), sort_keys=True, indent=4))
예제 #9
0
class RetweetBot:
    def __init__(self):
        # Twitter API setup
        auth = tweepy.OAuthHandler(os.environ.get('CONSUMER_KEY'),
                                   os.environ.get('CONSUMER_SECRET'))
        auth.set_access_token(os.environ.get('ACCESS_TOKEN'),
                              os.environ.get('ACCESS_TOKEN_SECRET'))
        self.api = tweepy.API(auth)
        self.tweet_list = []
        self.relevance_scores = []

        # bad words
        response = requests.get(BAD_WORDS_URL)
        self.bad_words = response.text.split('\n')

        # stop words
        self.stopwords = list(stopwords.words('english'))

        # sutime
        jar_files = os.environ.get('JAR_FILES', '../python-sutime/jars')
        self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)

        # nltk data append
        nltk.data.path.append(
            os.environ.get('NLTK_CORPUS', '/webapps/hackor/hackor/nltk_data'))

    '''
	 	Get all tweets
	'''

    def get_tweets(self,
                   topic="#pycon",
                   quantity=10,
                   result_type="recent,popular"):
        tweet_list = self.api.search(q=topic,
                                     count=quantity,
                                     lang='en',
                                     result_type=result_type)
        print("Retrieved {} candidate tweets.".format(len(tweet_list)))
        self.tweet_list += tweet_list

    def clear_tweets(self):
        self.tweet_list = []
        self.relevance_scores = []

    '''
		Defining relevance score as the importance of the user tweeting
		Features: tweeter followers, friends, ratio number of hashtags in the tweet (smaller the better) (PageRank?)
		Remove tweets that have any bad words
	'''

    def score(self, tweet):

        if not self.isSafe(tweet.text):
            return MAX_NEGATIVE

        if tweet.text.startswith('RT'):
            return MAX_NEGATIVE

        # influencer ratio
        influencer_ratio = 0
        if tweet.user.friends_count:
            influencer_ratio = tweet.user.followers_count / tweet.user.friends_count

        #number of hashtags
        hashtags = tweet.text.count('#')

        #hashtag word length
        hashtagcount = 0
        for word in tweet.text.split():
            if word.startswith('#'):
                hashtagcount += len(word)

        final_score = influencer_ratio * (hashtagcount / 140) * 1.0 / (
            1 + hashtags) * tweet.favorite_count
        final_score = 1.0
        return final_score

    '''
		Computing Relevance for all tweets
	'''

    def compute_relevance_scores(self):
        for _id, tweet in enumerate(self.tweet_list):
            if self.score(tweet) > 0.0:
                self.relevance_scores.append((_id, self.score(tweet)))
        self.relevance_scores.sort(key=lambda tup: tup[1], reverse=True)

    def compose_relevant_slack_messages(self, count=1):
        messages = []
        if self.relevance_scores:
            message = ''
            for score in self.relevance_scores[0:count]:
                tweet_score = score[1]
                print tweet_score
                tweet = self.tweet_list[score[0]]
                message = "RT <https://twitter.com/" + tweet.user.screen_name + "|" + tweet.user.screen_name + ">" + " " + tweet.text
                message += "\n <https://twitter.com/" + tweet.user.screen_name + "/status/" + str(
                    tweet.id) + "|Original Tweet>"
                messages.append(message)
        return messages

    def isSafe(self, tweet):
        result = True
        ret = tweet.replace('#', '')
        for word in self.bad_words:
            regex = r"\b(?=\w)" + re.escape(word) + r"\b(?!\w)"
            if re.search(regex, ret, re.IGNORECASE):
                result = False
                break
        return result

    '''
		Get time and room number from a tweet
	'''

    def get_time_and_room(self, tweet):

        result = {}
        result['date'] = []
        result['room'] = []

        time_slots = self.sutime.parse(tweet)
        tweet_without_time = tweet

        for time_slot in time_slots:
            tweet_without_time = tweet_without_time.replace(
                time_slot.get('text'), '')
            result['date'].append(time_slot.get('value'))

        filter_known_words = [
            word.lower() for word in word_tokenize(tweet_without_time)
            if word.lower() not in (self.stopwords + nltk.corpus.words.words())
        ]

        # regular expression for room
        room_re = re.compile('([a-zA-Z](\d{3})[-+]?(\d{3})?)')

        for word in filter_known_words:
            if room_re.match(word):
                result['room'].append(room_re.match(word).group())

        return result
예제 #10
0
class Streambot:
    """
    Stream Twitter and look for tweets that contain targeted words, 
    when tweets found look for datetime and room, if present save tweet to
    OutgoingTweet model.  

    Ex.
    bot = Streambot()
    # to run a stream looking for tweets about PyCon
    bot.run_stream(["PyCon"]) 
    """
    def __init__(self):
        self.api = self.setup_auth()
        self.stream_listener = StreamListener(self)
        self.tz = pytz.timezone('US/Pacific')

        jar_files = os.path.join(BASE_DIR, "python-sutime", "jars")
        self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)

    def setup_auth(self):
        """
        Set up auth stuff for api and return tweepy api object
        """
        auth = tweepy.OAuthHandler(s.listener["CONSUMER_KEY"],
                                   s.listener["CONSUMER_SECRET"])
        auth.set_access_token(s.listener["ACCESS_TOKEN"],
                              s.listener["ACCESS_TOKEN_SECRET"])
        api = tweepy.API(auth)

        return api

    def run_stream(self, search_list=[]):
        """
        Start stream, when matching tweet found on_status in StreamListener called. 
        search_list arg is a list of terms that will be looked for in tweets
        """
        if search_list == []:
            raise ValueError(
                "Need a list of search terms as arg to run_stream")

        stream = tweepy.Stream(auth=self.api.auth,
                               listener=self.stream_listener)
        stream.filter(track=search_list)

    def convert_to_utc(self, talk_time):
        """
        Convert the datetime string we get from SUTime to utcnow
        """
        # get correct local year, month, dat
        local_date = datetime.now(self.tz)
        local_date_str = datetime.strftime(local_date, "%Y %m %d")
        year, month, day = local_date_str.split(" ")

        # get SUTime parsed talk time and extract hours, mins
        dt_obj = parse(talk_time)
        local_time_str = datetime.strftime(dt_obj, "%H %M")
        hours, mins = local_time_str.split(" ")

        # build up correct datetime obj, normalize & localize, switch to utc
        correct_dt = datetime(int(year), int(month), int(day), int(hours),
                              int(mins))
        tz_aware_local = self.tz.normalize(self.tz.localize(correct_dt))
        local_as_utc = tz_aware_local.astimezone(pytz.utc)

        return local_as_utc

    def schedule_tweets(self, screen_name, tweet, tweet_id, talk_time):
        """
        Take tweet and datetime, schedule num of reminder tweets at set intervals 
        """
        # check config table to see if autosend on
        config_obj = models.AppConfig.objects.latest("id")
        approved = 1 if config_obj.auto_send else 0

        tweet_url = "https://twitter.com/{name}/status/{tweet_id}"
        embeded_tweet = tweet_url.format(name=screen_name, tweet_id=tweet_id)

        # set num of reminder tweets and interval in mins that tweets sent
        # num_tweets = 2 & interval = 15 sends 2 tweets 30 & 15 mins before
        num_tweets = 2
        interval = 1

        for mins in range(interval, (num_tweets * interval + 1), interval):
            remind_time = talk_time - timedelta(minutes=mins)

            message = "Coming up in {} minutes! {}".format(mins, embeded_tweet)

            print("message should be saved!!!")

            # saving the tweet to the OutgoingTweet table triggers celery stuff
            tweet_obj = models.Tweets(tweet=message,
                                      approved=approved,
                                      scheduled_time=remind_time)
            tweet_obj.save()

    def retweet_logic(self, tweet, tweet_id, screen_name):
        """
        Use SUTime to try to parse a datetime out of a tweet, if successful
        save tweet to OutgoingTweet to be retweeted
        """
        print(tweet, tweet_id)
        time_room = self.get_time_and_room(tweet)

        # check to make sure both time and room extracted and only one val for each
        val_check = [val for val in time_room.values() if len(val) == 1]

        if len(val_check) == 2:
            # way to mention a user after a valid tweet is recieved
            # time_stamp = datetime.datetime.utcnow()

            # mention = "@{} We saw your openspaces tweet!{}".format(screen_name, time_stamp)

            # self.api.update_status(status=mention)

            # need to make time from SUTime match time Django is using
            sutime_stuff = time_room["date"][0]
            print("sutime_stuff: {}".format(sutime_stuff))
            talk_time = self.convert_to_utc(time_room["date"][0])
            print("reult from convet to utc: {}".format(talk_time))

            self.schedule_tweets(screen_name, tweet, tweet_id, talk_time)

    def get_time_and_room(self, tweet):
        """
        Get time and room number from a tweet
        Written by Santi @ https://github.com/adavanisanti
        """
        result = {}
        result["date"] = []
        result["room"] = []

        time_slots = self.sutime.parse(tweet)
        tweet_without_time = tweet

        for time_slot in time_slots:
            tweet_without_time = tweet_without_time.replace(
                time_slot.get("text"), "")
            result["date"].append(time_slot.get("value"))

        # filter_known_words = [word.lower() for word in word_tokenize(tweet_without_time) if word.lower() not in (self.stopwords + nltk.corpus.words.words())]
        filter_known_words = [
            word.lower() for word in word_tokenize(tweet_without_time)
        ]

        # regular expression for room
        room_re = re.compile("([a-zA-Z](\d{3})[-+]?(\d{3})?)")

        for word in filter_known_words:
            if room_re.match(word):
                result["room"].append(room_re.match(word).group())

        return result
예제 #11
0
 #line = line.encode('utf-8')
 line = line.encode('ascii', 'ignore')
 sent_tokenize_list = sent_tokenize(line)
 #print sent_tokenize_list
 print "-------------------------------"
 res = []
 dt = []
 for sent in sent_tokenize_list:
     sent_list = re.split(
         '(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)(\s|[A-Z].*)', sent)
     print sent_list
     for sent_new in sent_list:
         #print sent_new
         #print sent,type(sent)
         #dt = list(json.dumps(sutime.parse(sent), sort_keys=True))
         dt = sutime.parse(sent_new)
         #print dt
         #irrelev if no timeframe info
         if len(dt) == 0:
             #res.append(('',z))
             continue
         else:
             word_neg_rel = ''
             word_pos_rel = ''
             #print dt
             tokens = nltk.word_tokenize(sent_new.lower())
             neg_flag = 0
             for token in tokens:
                 if token.lower() in set_neg:
                     neg_flag = 1
                     break
예제 #12
0
class NLUModule:
    def __init__(self, classifier_path=None, ner_path = None, sutime_jar_path = None):
        # Change the path according to your system
        if classifier_path is None:
            classifier_path = "C:\stanford_corenlp\stanford-ner-2018-02-27\stanford-ner-2018-02-27\classifiers\english.muc.7class.distsim.crf.ser.gz"

        if ner_path is None:
            ner_path = "C:\stanford_corenlp\stanford-ner-2018-02-27\stanford-ner-2018-02-27\stanford-ner.jar"

        if sutime_jar_path is None:
            sutime_jar_path = "C:\stanford_corenlp\stanford-corenlp-full-2018-02-27\stanford-corenlp-full-2018-02-27"

        self.stanford_classifier = classifier_path
        self.stanford_ner_path = ner_path
        self.sutime_path = sutime_jar_path

        # Creating Tagger Object
        self.st = StanfordNERTagger(self.stanford_classifier, self.stanford_ner_path)
        self.su = SUTime(jars=self.sutime_path, mark_time_ranges=True, include_range=True)

        self.weather_terms = ["weather", "climate", "precipitation", "sun", "rain", "cloud","snow", "hot", "humid", "cold", "sunny", "windy","cloudy",
                              "rainy", "snowy", "misty", "foggy", "colder","hotter", "warmer", "pleasant"]
        self.greet_terms= ["hello","hey","howdy","hello","hi", "yo", "yaw"]
        self.closure_terms = ["no", "nope", "thank you", "bye", "tata", "thanks", "that will be all", "that's it", "that'll be all"]
        self.day_terms = ["dawn", "dusk", "morning", "evening", "noon","afternoon", "night", "tonight", "midnight", "midday"] #, "hours"]
        self.date_terms = ["today", "tomorrow", "yesterday"]

    def DiscoverIntentAndEntities(self, text):
        text = text.strip()
        tokenized_text = text
        if ' ' in text:
            tokenized_text = word_tokenize(text)

        classified_text = self.st.tag(tokenized_text)
        time_tags = self.su.parse(text)
        queryDate = None
        queryTime = None
        # pos_tags = pos_tag(tokenized_text)

        detectionResults = {"intent":INTENT_TYPES.UNK, "entities":{"LOCATION":"", "DATE":[], "TIME":[], "DURATION":[], "QUERIES":[]}}
        returnIntentAndEnt = {"intent":INTENT_TYPES.UNK, "entities":{"LOCATION":"", "DATE":"", "TIME":""}}

        for word,tag in classified_text:
            if 'LOCATION' in tag:
                detectionResults["entities"]["LOCATION"] += word + " "
            # elif 'DATE' in tag or word in self.date_terms:
            #     detectionResults["entities"]["DATE"] = word + " "
            # elif 'DATE' in tag or word in self.day_terms:
            #     detectionResults["entities"]["HOUR"] = word + " "
            elif 'O' in tag and word in WEATHER_TERMS:
                detectionResults["entities"]["QUERIES"].append(word)
                detectionResults["intent"] = INTENT_TYPES.WTH_QU

        if len(time_tags) > 0:
            for tag in time_tags:
                typeKey = tag["type"]
                detectionResults["entities"][typeKey].append(tag["value"])
            if len(detectionResults["entities"]["DATE"]) > 0:
                for dateVal in detectionResults["entities"]["DATE"]:
                    queryDate = dateVal
                    if 'W' in dateVal:
                        queryDate = dateVal.replace('W','')

            if len(detectionResults["entities"]["TIME"]) > 0:
                for timeVal in detectionResults["entities"]["TIME"]:
                    timeTokens = timeVal.split('T')
                    if queryDate is None:
                        queryDate = timeTokens[0]
                    timePart = timeTokens[1]

                    if timePart in TIME_ABRV.keys():
                        timePart = DAY_TERMS[TIME_ABRV[timePart]]
                        # Overwrite the query time with a generic time maps for MO, AF etc.
                        queryTime = timePart

                    #Do not overwrite query time if some time was detected already
                    if queryTime is None:
                        queryTime = timePart

        if detectionResults["intent"] == INTENT_TYPES.UNK:
            if text in GREET_TERMS:
                detectionResults["intent"] = INTENT_TYPES.GRT
            elif text in CLOSURE_TERMS:
                detectionResults["intent"] = INTENT_TYPES.CLS
            elif text in YES_TERMS:
                detectionResults["intent"] = INTENT_TYPES.ANS_YES
            elif text in NO_TERMS:
                detectionResults["intent"] = INTENT_TYPES.ANS_NO

        if len(detectionResults["entities"]["LOCATION"]) > 0 or len(detectionResults["entities"]["DATE"]) \
                or len(detectionResults["entities"]["TIME"]) or len(detectionResults["entities"]["DURATION"]) > 0:
            detectionResults["intent"] = INTENT_TYPES.ANS_SLT

        returnIntentAndEnt["intent"] = detectionResults["intent"]
        returnIntentAndEnt["entities"]["LOCATION"] = detectionResults["entities"]["LOCATION"]
        if not queryDate is None:
            returnIntentAndEnt["entities"]["DATE"] = queryDate
        if not queryTime is None:
            returnIntentAndEnt["entities"]["TIME"] = queryTime

        return returnIntentAndEnt

# nlu = NLUModule()
# print(nlu.DiscoverIntentAndEntities("Tomorrow afternoon"))
# print(nlu.DiscoverIntentAndEntities("How is the weather on Fifth March."))
# print(nlu.DiscoverIntentAndEntities("How is the weather in March."))
# print(nlu.DiscoverIntentAndEntities("What is it like on Tuesday."))
# print(nlu.DiscoverIntentAndEntities("How does it look like tomorrow?"))
# print(nlu.DiscoverIntentAndEntities("What is the weather like next week?"))
# print(nlu.DiscoverIntentAndEntities("What is it like tomorrow between 3 to 4pm ?"))
# print(nlu.DiscoverIntentAndEntities("What is it like tomorrow afternoon?"))
# print(nlu.DiscoverIntentAndEntities("What is it like tomorrow at noon?"))
# print(nlu.DiscoverIntentAndEntities("What is it like tomorrow evening?"))
# print(nlu.DiscoverIntentAndEntities("What is it like tomorrow morning?"))
# print(nlu.DiscoverIntentAndEntities("What is it like tomorrow night?"))
# print(nlu.DiscoverIntentAndEntities("What is it like tomorrow at midnight?"))
# print(nlu.DiscoverIntentAndEntities("What is it like tomorrow during noon?"))
# print(nlu.DiscoverIntentAndEntities("What is it like tomorrow around 11:00 in the morning?"))
# print(nlu.DiscoverIntentAndEntities("What is it like next Tuesday around 11:00 in the morning?"))
# print(nlu.DiscoverIntentAndEntities("What is it like next Tuesday between 10:00 and 11:00 in the night?"))

# from nltk import word_tokenize, pos_tag, ne_chunk
#
# import os
# import collections
#
# from nltk.stem.snowball import SnowballStemmer
# from nltk.chunk import conlltags2tree, tree2conlltags
# import string
#
#
# def features(tokens, index, history):
#     """
#     `tokens`  = a POS-tagged sentence [(w1, t1), ...]
#     `index`   = the index of the token we want to extract features for
#     `history` = the previous predicted IOB tags
#     """
#
#     # init the stemmer
#     stemmer = SnowballStemmer('english')
#
#     # Pad the sequence with placeholders
#     tokens = [('[START2]', '[START2]'), ('[START1]', '[START1]')] + list(tokens) + [('[END1]', '[END1]'),
#                                                                                     ('[END2]', '[END2]')]
#     history = ['[START2]', '[START1]'] + list(history)
#
#     # shift the index with 2, to accommodate the padding
#     index += 2
#
#     word, pos = tokens[index]
#     prevword, prevpos = tokens[index - 1]
#     prevprevword, prevprevpos = tokens[index - 2]
#     nextword, nextpos = tokens[index + 1]
#     nextnextword, nextnextpos = tokens[index + 2]
#     previob = history[index - 1]
#     contains_dash = '-' in word
#     contains_dot = '.' in word
#     allascii = all([True for c in word if c in string.ascii_lowercase])
#
#     allcaps = word == word.capitalize()
#     capitalized = word[0] in string.ascii_uppercase
#
#     prevallcaps = prevword == prevword.capitalize()
#     prevcapitalized = prevword[0] in string.ascii_uppercase
#
#     nextallcaps = prevword == prevword.capitalize()
#     nextcapitalized = prevword[0] in string.ascii_uppercase
#
#     return {
#         'word': word,
#         'lemma': stemmer.stem(word),
#         'pos': pos,
#         'all-ascii': allascii,
#
#         'next-word': nextword,
#         'next-lemma': stemmer.stem(nextword),
#         'next-pos': nextpos,
#
#         'next-next-word': nextnextword,
#         'nextnextpos': nextnextpos,
#
#         'prev-word': prevword,
#         'prev-lemma': stemmer.stem(prevword),
#         'prev-pos': prevpos,
#
#         'prev-prev-word': prevprevword,
#         'prev-prev-pos': prevprevpos,
#
#         'prev-iob': previob,
#
#         'contains-dash': contains_dash,
#         'contains-dot': contains_dot,
#
#         'all-caps': allcaps,
#         'capitalized': capitalized,
#
#         'prev-all-caps': prevallcaps,
#         'prev-capitalized': prevcapitalized,
#
#         'next-all-caps': nextallcaps,
#         'next-capitalized': nextcapitalized,
#     }
#
# def to_conll_iob(annotated_sentence):
#     """
#     `annotated_sentence` = list of triplets [(w1, t1, iob1), ...]
#     Transform a pseudo-IOB notation: O, PERSON, PERSON, O, O, LOCATION, O
#     to proper IOB notation: O, B-PERSON, I-PERSON, O, O, B-LOCATION, O
#     """
#     proper_iob_tokens = []
#     for idx, annotated_token in enumerate(annotated_sentence):
#         tag, word, ner = annotated_token
#
#         if ner != 'O':
#             if idx == 0:
#                 ner = "B-" + ner
#             elif annotated_sentence[idx - 1][2] == ner:
#                 ner = "I-" + ner
#             else:
#                 ner = "B-" + ner
#         proper_iob_tokens.append((tag, word, ner))
#     return proper_iob_tokens
#
#
# def read_gmb(corpus_root):
#     for root, dirs, files in os.walk(corpus_root):
#         for filename in files:
#             if filename.endswith(".tags"):
#                 with open(os.path.join(root, filename), 'rb') as file_handle:
#                     file_content = file_handle.read().decode('utf-8').strip()
#                     annotated_sentences = file_content.split('\n\n')
#                     for annotated_sentence in annotated_sentences:
#                         annotated_tokens = [seq for seq in annotated_sentence.split('\n') if seq]
#
#                         standard_form_tokens = []
#
#                         for idx, annotated_token in enumerate(annotated_tokens):
#                             annotations = annotated_token.split('\t')
#                             word, tag, ner = annotations[0], annotations[1], annotations[3]
#
#                             if ner != 'O':
#                                 ner = ner.split('-')[0]
#
#                             if tag in ('LQU', 'RQU'):  # Make it NLTK compatible
#                                 tag = "``"
#
#                             standard_form_tokens.append((word, tag, ner))
#
#                         conll_tokens = to_conll_iob(standard_form_tokens)
#
#                         # Make it NLTK Classifier compatible - [(w1, t1, iob1), ...] to [((w1, t1), iob1), ...]
#                         # Because the classfier expects a tuple as input, first item input, second the class
#                         yield [((w, t), iob) for w, t, iob in conll_tokens]
#
#
# import pickle
# from collections import Iterable
# from nltk.tag import ClassifierBasedTagger
# from nltk.chunk import ChunkParserI
#
#
# class NamedEntityChunker(ChunkParserI):
#     def __init__(self, train_sents, **kwargs):
#         assert isinstance(train_sents, Iterable)
#
#         self.feature_detector = features
#         self.tagger = ClassifierBasedTagger(
#             train=train_sents,
#             feature_detector=features,
#             **kwargs)
#
#     def parse(self, tagged_sent):
#         chunks = self.tagger.tag(tagged_sent)
#
#         # Transform the result from [((w1, t1), iob1), ...]
#         # to the preferred list of triplets format [(w1, t1, iob1), ...]
#         iob_triplets = [(w, t, c) for ((w, t), c) in chunks]
#
#         # Transform the list of triplets to nltk.Tree format
#         return conlltags2tree(iob_triplets)
#
#
# corpus_root = "gmb-2.2.0/gmb-2.2.0"  # Make sure you set the proper path to the unzipped corpus
# reader = read_gmb(corpus_root)
# training_samples = list(reader)
# chunker = NamedEntityChunker(training_samples[:2000])
# print(chunker.parse(pos_tag(word_tokenize("What's the weather like in Pittsburgh this Monday."))))
예제 #13
0
class Streambot:
    """Stream Twitter and look for tweets that contain targeted words,
    when tweets found look for datetime and room, if present save tweet 
    to OutgoingTweet model.

    Ex.
    bot = Streambot()
    # to run a stream looking for tweets about PyCon
    bot.run_stream(["PyCon"])
    """
    def __init__(self):
        self.api = self.setup_auth()
        self.stream_listener = StreamListener(self)
        jar_files = os.path.join(BASE_DIR, "python-sutime", "jars")
        self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)
        self.slacker = Slacker(s.SLACK_TOKEN)

    def setup_auth(self):
        """Set up auth stuff for api and return tweepy api object"""
        auth = tweepy.OAuthHandler(s.listener["CONSUMER_KEY"],
                                   s.listener["CONSUMER_SECRET"])
        auth.set_access_token(s.listener["ACCESS_TOKEN"],
                              s.listener["ACCESS_TOKEN_SECRET"])

        api = tweepy.API(auth)
        return api

    def run_stream(self, search_list=None):
        """Start stream, when matching tweet found on_status method called. 
        search_list arg is a list of terms that will be looked for in tweets
        """
        if search_list == None:
            raise ValueError(
                "Need a list of search terms as arg to run_stream")

        stream = tweepy.Stream(auth=self.api.auth,
                               listener=self.stream_listener)
        stream.filter(track=search_list)

    def send_mention_tweet(self, screen_name):
        """Mention a user in a tweet from bot letting them know that
        their tweet has been recieved and that we will send out reminders
        about thier event.
        """
        time = datetime.datetime.now()
        mention = "@{} We saw your openspaces tweet! {}".format(
            screen_name, time)
        self.api.update_status(status=mention)

    def parse_time_room(self, tweet):
        """Get time and room number from a tweet using SUTime and tweet_utils"""
        extracted_time = self.sutime.parse(tweet)
        time_and_room = tweet_utils.get_time_and_room(tweet, extracted_time)
        return time_and_room

    def retweet_logic(self, tweet, tweet_id, screen_name, user_id):
        """Use SUTime to try to parse a datetime out of a tweet, if successful
        save tweet to OutgoingTweet to be retweeted
        """
        # use SUTime to parse a datetime out of tweet
        time_room = self.parse_time_room(tweet)

        # make sure both time and room extracted and only one val each
        val_check = [val for val in time_room.values() if len(val) == 1]

        if len(val_check) == 2:
            talk_room = time_room["room"][0]
            parsed_time = time_room["date"][0]
            talk_time = time_utils.convert_to_utc(parsed_time)

            # check for a time and room conflict, only one set of retweets per event
            conflict = db_utils.check_time_room_conflict(talk_time, talk_room)

            if not conflict:
                # send message to slack when a tweet is scheduled to go out
                slack_message = "{} From: {}, id: {}".format(
                    tweet, screen_name, user_id)
                self.slacker.chat.post_message('#outgoing_tweets',
                                               slack_message)

                self.send_mention_tweet(screen_name)

                # This record lets us check that retweets not for same event
                db_utils.create_event(description=tweet,
                                      start=talk_time,
                                      location=talk_room,
                                      creator=screen_name)

                # schedules reminder tweets to be sent out before event
                tweet_utils.schedule_tweets(screen_name, tweet, tweet_id,
                                            talk_time)
예제 #14
0
jar_files = os.path.join(os.path.dirname(__file__), 'jars')
sutime = SUTime(jars=jar_files, mark_time_ranges=True, include_range=True)
lemmatizer = nltk.WordNetLemmatizer()
#-----------------------------------------------------------------------------------------------------------------------
#LOAD THE SENTENCES
filepath = 'kolbuszowa.txt'
list_sentences = []
with open(filepath,encoding="utf8") as file:
     for line in file:
         list_sentences.append([line[:line.rfind(".") + 1]])

#PREPROCESSING START
for i in range(len(list_sentences)):
    sentence = list_sentences[i][0]
    #PREPROCESSING
    jsn = json.dumps(sutime.parse(sentence), sort_keys=True, indent=4)
    d = json.loads(jsn)

    if (len(d) >0):
        sentence = sentence.replace(d[0]['text'], d[0]['value'])

    for key in synonyms.keys():
        sentence = sentence.replace(key,synonyms[key])


    #CREATE DEPENDANCY TREE
    result = list(sdp.raw_parse(sentence))
    tree = get_tree(result[0],4)
    dep_tree_dot_repr = tree.to_dot()
    #source = Source(dep_tree_dot_repr, filename="dep_tree", format="png")
    #source.view()
예제 #15
0
class Streambot:
    """Stream Twitter and look for tweets that contain targeted words,
    when tweets found look for datetime and room, if present save tweet
    to OutgoingTweet model.
    Ex.
    bot = Streambot()
    # to run a stream looking for tweets about PyCon
    bot.run_stream(["PyCon"])
    """
    def __init__(self):
        db_utils.setup_outgoing_config(
        )  # needs an outgoing config obj to check against
        self.api = self.setup_auth()
        self.stream_listener = StreamListener(self)
        jar_files = os.path.join(BASE_DIR, "python-sutime", "jars")
        self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)
        self.slacker = Slacker(s.SLACK_TOKEN)

    def setup_auth(self):
        """Set up auth stuff for api and return tweepy api object"""
        auth = tweepy.OAuthHandler(s.test_bot["CONSUMER_KEY"],
                                   s.test_bot["CONSUMER_SECRET"])
        auth.set_access_token(s.test_bot["ACCESS_TOKEN"],
                              s.test_bot["ACCESS_TOKEN_SECRET"])

        api = tweepy.API(auth)
        return api

    def run_stream(self, search_list=None):
        """Start stream, when matching tweet found on_status method called.
        search_list arg is a list of terms that will be looked for in tweets
        """
        if search_list == None:
            raise ValueError(
                "Need a list of search terms as arg to run_stream")

        stream = tweepy.Stream(auth=self.api.auth,
                               listener=self.stream_listener)
        stream.filter(track=search_list)

    def send_mention_tweet(self, screen_name, room, time):
        """Mention a user in a tweet from bot letting them know that
        their tweet has been recieved and that we will send out reminders
        about their event.
        """
        mention = "@{} saw your openspaces tweet for: room {} at {}. Times should be relative to US/Pacific"
        mention = mention.format(screen_name, room, time)
        self.api.update_status(status=mention)

    def value_check(self, time_room_obj):
        """Returns a tuple with the counts of values extracted from a tweet
        in the parse_time_room method. This tuple is used to decide how bot
        will respond to tweet.
        """
        num_room_values = len(time_room_obj["room"])
        num_time_values = len(time_room_obj["date"])

        return (num_room_values, num_time_values)

    def parse_time_room(self, tweet):
        """Get time and room number from a tweet using SUTime and tweet_utils"""
        extracted_time = self.sutime.parse(tweet)
        time_and_room = tweet_utils.get_time_and_room(tweet, extracted_time)
        return time_and_room

    def retweet_logic(self, tweet, tweet_id, screen_name, user_id):
        """Use SUTime to try to parse a datetime out of a tweet, if successful
        save tweet to OutgoingTweet to be retweeted
        """
        # use SUTime to parse a datetime out of tweet
        time_room = self.parse_time_room(tweet)

        # make sure both time and room extracted and only one val each
        val_check = self.value_check(time_room)

        if val_check == (1, 1):
            room = time_room["room"][0]
            date_mention = tweet_utils.check_date_mention(tweet)
            converted_time = time_utils.convert_to_utc(time_room["date"][0],
                                                       date_mention)

            # check for a time and room conflict, only 1 set of retweets per event
            # default time range that a room is resrved for is -15 +30 mins
            conflict = db_utils.check_time_room_conflict(converted_time, room)

            if not conflict:
                event_obj = db_utils.create_event(description=tweet,
                                                  start=converted_time,
                                                  location=room,
                                                  creator=screen_name)

                tweet_utils.schedule_tweets(screen_name, tweet, tweet_id,
                                            converted_time, event_obj)

                # slack_msg = "{} From: {}, id: {}".format(tweet, screen_name, user_id)
                # self.send_slack_message('#outgoing_tweets', slack_message)

                send_slack_message(user_id=user_id,
                                   tweet_id=tweet_id,
                                   screen_name=screen_name,
                                   tweet_created=True,
                                   tweet=tweet,
                                   slack_msg=tweet)

                self.send_mention_tweet(screen_name, room, converted_time)

            else:
                message = f"Tweet found for an already scheduled event: {tweet}"
                send_slack_message(user_id=user_id,
                                   tweet_id=tweet_id,
                                   screen_name=screen_name,
                                   tweet_created=False,
                                   tweet=tweet,
                                   slack_msg=message,
                                   channel="conflict")

        elif val_check == (0, 0):
            # tweet found but without valid time or room extracted, ignore
            pass

        else:
            # tweet with relevant information but not exactly 1 time & 1 room
            slack_msg = """Tweet found that needs review: {}  tweet_id: {} screen_name: {}, user_id: {}"""
            slack_msg = slack_msg.format(tweet, tweet_id, screen_name, user_id)
            # self.send_slack_message("#need_review", message)

            send_slack_message(user_id=user_id,
                               tweet_id=tweet_id,
                               screen_name=screen_name,
                               tweet_created=False,
                               tweet=tweet,
                               slack_msg=slack_msg)

    def loadtest_logic(self, tweet, tweet_id, screen_name, user_id):
        """Logic similar to what is being used in the real bot so that we can
        load test how much volume it can handle before twitter kicks it off
        """
        # use SUTime to parse a datetime out of tweet
        time_room = self.parse_time_room(tweet)

        # fake time in the future that imitates a event's start time
        local_tz = pytz.timezone('US/Eastern')
        sample_time = datetime.datetime.now(local_tz) + datetime.timedelta(
            minutes=10)
        sample_time = sample_time.strftime("%Y-%m-%d %H:%M:%S")

        event_time = time_utils.convert_to_utc(sample_time)
        room = random.randint(0, 3000)

        # check for a time and room conflict, only 1 set of retweets per event
        conflict = db_utils.check_time_room_conflict(event_time, room)

        if not conflict:
            # This record lets us check that retweets not for same event
            event_obj = db_utils.create_event(description=tweet,
                                              start=event_time,
                                              location=room,
                                              creator=screen_name)

            tweet_utils.loadtest_schedule_tweets(screen_name=screen_name,
                                                 tweet=tweet,
                                                 tweet_id=tweet_id,
                                                 event_time=event_time,
                                                 event_obj=event_obj)

            print("tweet scheduled for retweet: {}".format(tweet))

            slack_msg = "{} From: {}, id: {}".format(tweet, screen_name,
                                                     user_id)
            # self.send_slack_message('#outgoing_tweets', slack_message)

            send_slack_message(user_id=user_id,
                               tweet_id=tweet_id,
                               screen_name=screen_name,
                               tweet_created=True,
                               tweet=tweet,
                               slack_msg=slack_msg,
                               event_obj=event_obj)

        else:
            print("conflict when scheduling the tweet")
예제 #16
0
import re
import sys
import codecs
from sutime import SUTime

# 41WM1234
asciiFile = str(sys.argv[1])
trinomials = re.compile("41[a-zA-Z]{2}[0-9]{1,4}")
lineNum = 0
with codecs.open(asciiFile, 'r', encoding='utf-8', errors='ignore') as f:
    for line in f:
        #result = trinomials.search(line)
        result = trinomials.findall(line)
        if result is not None:
            for item in result:
                print("Site " + str(item) + ", line " + str(lineNum))
        print(SUTime.parse(line, reference_date=''))
        lineNum += 1
예제 #17
0
class Streambot:
    """Stream Twitter and look for tweets that contain targeted words,
    when tweets found look for datetime and room, if present save tweet
    to OutgoingTweet model.
    Ex.
    bot = Streambot()
    # to run a stream looking for tweets about PyCon
    bot.run_stream(["PyCon"])
    """
    def __init__(self):
        self.api = self.setup_auth()
        self.stream_listener = StreamListener(self)
        jar_files = os.path.join(BASE_DIR, "python-sutime", "jars")
        self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)
        self.slacker = Slacker(s.SLACK_TOKEN)

    def setup_auth(self):
        """Set up auth stuff for api and return tweepy api object"""
        auth = tweepy.OAuthHandler(s.sender["CONSUMER_KEY"],
                                   s.sender["CONSUMER_SECRET"])
        auth.set_access_token(s.sender["ACCESS_TOKEN"],
                              s.sender["ACCESS_TOKEN_SECRET"])

        api = tweepy.API(auth)
        return api

    def run_stream(self, search_list=None):
        """Start stream, when matching tweet found on_status method called. 
        search_list arg is a list of terms that will be looked for in tweets
        """
        if search_list == None:
            raise ValueError(
                "Need a list of search terms as arg to run_stream")

        stream = tweepy.Stream(auth=self.api.auth,
                               listener=self.stream_listener)
        stream.filter(track=search_list)

    def send_mention_tweet(self, screen_name, room, time):
        """Mention a user in a tweet from bot letting them know that
        their tweet has been recieved and that we will send out reminders
        about their event.
        """
        mention = "@{} saw your openspaces tweet for: room {} at {}. Times should be relative to US/Pacific"
        mention = mention.format(screen_name, room, time)
        self.api.update_status(status=mention)

    def parse_time_room(self, tweet):
        """Get time and room number from a tweet using SUTime and tweet_utils"""
        extracted_time = self.sutime.parse(tweet)
        time_and_room = tweet_utils.get_time_and_room(tweet, extracted_time)
        return time_and_room

    def loadtest_logic(self, tweet, tweet_id, screen_name, user_id):
        """Logic similar to what is being used in the real bot so that we can 
        load test how much volume it can handle before twitter kicks it off 
        """
        # use SUTime to parse a datetime out of tweet
        time_room = self.parse_time_room(tweet)

        # fake time in the future that imitates a event's start time
        local_tz = pytz.timezone('US/Pacific')
        sample_time = datetime.datetime.now(local_tz) + datetime.timedelta(
            minutes=10)
        sample_time = sample_time.strftime("%Y-%m-%d %H:%M:%S")

        converted_time = time_utils.convert_to_utc(sample_time)
        room = "r123"

        # check for a time and room conflict, only 1 set of retweets per event
        conflict = db_utils.check_time_room_conflict(converted_time, room)

        # send message to slack when a tweet is scheduled to go out
        slack_message = "{} From: {}, id: {}".format(tweet, screen_name,
                                                     user_id)
        self.slacker.chat.post_message('#loadtest_tweets', slack_message)

        # This record lets us check that retweets not for same event
        db_utils.create_event(description=tweet,
                              start=converted_time,
                              location=room,
                              creator=screen_name)

        tweet_utils.loadtest_schedule_tweets(screen_name, tweet, tweet_id,
                                             converted_time)
        print("tweet scheduled for retweet: {}".format(tweet))
예제 #18
0
class Streambot:
    """Stream Twitter and look for tweets that contain targeted words,
    when tweets found look for datetime and room, if present save tweet
    to OutgoingTweet model.

    Ex.
    bot = Streambot()
    # to run a stream looking for tweets about PyCon
    bot.run_stream(["PyCon"])
    """
    def __init__(self):
        self.api = self.setup_auth()
        self.stream_listener = StreamListener(self)
        jar_files = os.path.join(BASE_DIR, "python-sutime", "jars")
        self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)

    def setup_auth(self):
        """Set up auth stuff for api and return tweepy api object"""
        auth = tweepy.OAuthHandler(s.listener["CONSUMER_KEY"],
                                   s.listener["CONSUMER_SECRET"])
        auth.set_access_token(s.listener["ACCESS_TOKEN"],
                              s.listener["ACCESS_TOKEN_SECRET"])

        api = tweepy.API(auth)
        return api

    def run_stream(self, search_list=None):
        """Start stream, when matching tweet found on_status method called. 
        search_list arg is a list of terms that will be looked for in tweets
        """
        if search_list == None:
            raise ValueError(
                "Need a list of search terms as arg to run_stream")

        stream = tweepy.Stream(auth=self.api.auth,
                               listener=self.stream_listener)
        stream.filter(track=search_list)

    def send_mention_tweet(self, screen_name, room, time):
        """Mention a user in a tweet from bot letting them know that
        their tweet has been recieved and that we will send out reminders
        about their event.
        """
        mention = "@{} saw your openspaces tweet for: room {} at {}. Times should be relative to US/Pacific"
        mention = mention.format(screen_name, room, time)
        self.api.update_status(status=mention)

    def parse_time_room(self, tweet):
        """Get time and room number from a tweet using SUTime and tweet_utils"""
        extracted_time = self.sutime.parse(tweet)
        time_and_room = tweet_utils.get_time_and_room(tweet, extracted_time)
        return time_and_room

    def retweet_logic(self, tweet, tweet_id, screen_name):
        """Use SUTime to try to parse a datetime out of a tweet, if successful
        save tweet to OutgoingTweet to be retweeted
        """
        # use SUTime to parse a datetime out of tweet
        time_room = self.parse_time_room(tweet)

        # make sure both time and room extracted and only one val each
        val_check = [val for val in time_room.values() if len(val) == 1]

        if len(val_check) == 2:
            room = time_room["room"][0]
            converted_time = time_utils.convert_to_utc(time_room["date"][0])

            # check for a time and room conflict, only 1 set of retweets per event
            conflict = db_utils.check_time_room_conflict(converted_time, room)

            if not conflict:
                self.send_mention_tweet(screen_name, room, converted_time)

                # This record lets us check that retweets not for same event
                db_utils.create_event(description=tweet,
                                      start=converted_time,
                                      location=room,
                                      creator=screen_name)

                tweet_utils.schedule_tweets(screen_name, tweet, tweet_id,
                                            converted_time)
                loggly.info(
                    "scheduled this tweet for retweet: {}".format(tweet))

            else:
                message = """
                            Tweet recived for an event bot is already scheduled
                            to retweet about. Sender: {}, room: {}, time: {},
                            tweet: {}
                          """
                message = message.format(screen_name, room, converted_time,
                                         tweet)
                loggly.info(message)

        else:
            # tweet found but without valid time or room extracted, ignore
            pass