Ejemplo n.º 1
0
def sutime_function(text):
    translator = Translator()
    traducere = translator.translate(text, src='ro', dest="en").text

    java_target = "java\\target"
    jar_files = os.path.join(os.path.dirname(__file__), java_target)
    sutime = SUTime(jars=jar_files, mark_time_ranges=True)

    ttext = []
    ttype = []
    tmpdictionar = {}

    for x in sutime.parse(traducere):
        for value, key in x.items():
            if value == "text":
                valoare = convert_to_romana(key)
                ttext.append(valoare)
            elif value == "type":
                valoare2 = convert_to_romana(key)
                ttype.append(valoare2)

    for x in range(len(ttext)):
        try:
            tmpdictionar[ttype[x]].append(ttext[x])
        except:
            tmpdictionar[ttype[x]] = [ttext[x]]

    return tmpdictionar
Ejemplo n.º 2
0
def extract_years(snippet, output):
    """
    function extracts the dates and fill them with the computed confidence score of
    function extract_entities_textrazor
    :param snippet:
    :param output:
    :return:
    """
    jar_files = os.path.join(os.path.dirname(__file__), 'jars')
    sutime = SUTime(jars=jar_files, mark_time_ranges=True)
    res = json.dumps(sutime.parse(snippet), sort_keys=True, indent=4)

    dates_list = []
    for i in range(len(res)):
        if res[i:i+5] == 'value':
            j = i+9
            while res[j] != '"':
                j = j+1
            dates_list.append(''.join(res[i+9:j]))


    dic_year = output['Y']
    dates_list_new = {'entity':[], 'confidenceScore': [] }

    for i in range(len(dic_year['entity'])):
        for ele in dates_list:
            if ele.__contains__(dic_year['entity'][i][0]):
                if ele not in dates_list_new['entity']:
                    dates_list_new['entity'].append(ele)
                    dates_list_new['confidenceScore'].append(dic_year['confidenceScore'][i])

    output['Y'] = dates_list_new
    return output
Ejemplo n.º 3
0
    def __init__(self, path):
        # Initialize SUtime

        jar_files = os.path.join(os.path.dirname(path), 'jars')
        self.sutime = SUTime(jars=jar_files,
                             mark_time_ranges=False,
                             include_range=True)
Ejemplo n.º 4
0
    def __init__(self):
        self.api = self.setup_auth()
        self.stream_listener = StreamListener(self)
        self.tz = pytz.timezone('US/Pacific')

        jar_files = os.path.join(BASE_DIR, "python-sutime", "jars")
        self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)
Ejemplo n.º 5
0
    def __init__(self, classifier_path=None, ner_path = None, sutime_jar_path = None):
        # Change the path according to your system
        if classifier_path is None:
            classifier_path = "C:\stanford_corenlp\stanford-ner-2018-02-27\stanford-ner-2018-02-27\classifiers\english.muc.7class.distsim.crf.ser.gz"

        if ner_path is None:
            ner_path = "C:\stanford_corenlp\stanford-ner-2018-02-27\stanford-ner-2018-02-27\stanford-ner.jar"

        if sutime_jar_path is None:
            sutime_jar_path = "C:\stanford_corenlp\stanford-corenlp-full-2018-02-27\stanford-corenlp-full-2018-02-27"

        self.stanford_classifier = classifier_path
        self.stanford_ner_path = ner_path
        self.sutime_path = sutime_jar_path

        # Creating Tagger Object
        self.st = StanfordNERTagger(self.stanford_classifier, self.stanford_ner_path)
        self.su = SUTime(jars=self.sutime_path, mark_time_ranges=True, include_range=True)

        self.weather_terms = ["weather", "climate", "precipitation", "sun", "rain", "cloud","snow", "hot", "humid", "cold", "sunny", "windy","cloudy",
                              "rainy", "snowy", "misty", "foggy", "colder","hotter", "warmer", "pleasant"]
        self.greet_terms= ["hello","hey","howdy","hello","hi", "yo", "yaw"]
        self.closure_terms = ["no", "nope", "thank you", "bye", "tata", "thanks", "that will be all", "that's it", "that'll be all"]
        self.day_terms = ["dawn", "dusk", "morning", "evening", "noon","afternoon", "night", "tonight", "midnight", "midday"] #, "hours"]
        self.date_terms = ["today", "tomorrow", "yesterday"]
Ejemplo n.º 6
0
 def __init__(self):
     db_utils.setup_outgoing_config(
     )  # needs an outgoing config obj to check against
     self.api = self.setup_auth()
     self.stream_listener = StreamListener(self)
     jar_files = os.path.join(BASE_DIR, "python-sutime", "jars")
     self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)
     self.slacker = Slacker(s.SLACK_TOKEN)
Ejemplo n.º 7
0
	def __init__(self):
		# Twitter API setup
		auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
		auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
		self.api = tweepy.API(auth)
		self.tweet_list = []
		self.relevance_scores = []

		# bad words
		response = requests.get(BAD_WORDS_URL)
		self.bad_words = response.text.split('\n')

		# stop words
		self.stopwords = list(stopwords.words('english'))

		# sutime
		jar_files = os.environ.get('JAR_FILES','/webapps/hackor/hackor/python-sutime/jars')
		self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)

		# nltk data append
		nltk.data.path.append(os.environ.get('NLTK_CORPUS','/webapps/hackor/hackor/nltk_data'))
Ejemplo n.º 8
0
def extract_entitites(snippet):
    """
    this function gets
    :param snippet: a snippet in English
    :return: and returns back the extracted person name, organization name, location and year in a dictionary namely output
    """

    nlp = StanfordCoreNLP('http://localhost:9000')
    res = nlp.annotate(snippet,
                   properties={
                       'annotators': 'ner', #'sutime'
                       'outputFormat': 'json',
                       #'timeout': 1000,
                   })

    output = {'RN':[], 'U':[], 'Y':[]}

    """ for extracting the university and persons names"""
    for sent in range(len(res['sentences'])):
        for element in res['sentences'][sent]['tokens']:
            if element['ner'] == 'PERSON':
                output['RN'].append(element['word'])
            if element['ner'] == 'ORGANIZATION': #or element['ner'] == 'LOCATION' :
                output['U'].append(element['word'])


    """ for extracting the years"""
    jar_files = os.path.join(os.path.dirname(__file__), 'jars')
    sutime = SUTime(jars=jar_files, mark_time_ranges=True)
    res = json.dumps(sutime.parse(snippet), sort_keys=True, indent=4)


    for i in range(len(res)):
        if res[i:i+5] == 'value':
            j = i+9
            while res[j] != '"':
                j = j+1
            output['Y'].append(''.join(res[i+9:j]))

    return output
class NLUWrapper(object):
    def __init__(self, host='localhost', port=5001, **kwargs):
        self.host, self.port = host, port
        self.sutime = SUTime(jars=os.path.join(os.path.dirname(__file__),
                                               'python-sutime', 'jars'),
                             mark_time_ranges=True)
        print 'Initialized with {}:{}'.format(self.host, self.port)

    def annotate(self, in_utterance, modules=()):
        sutime_response = None
        try:
            if 'SUTime' in modules:
                sutime_response = self.sutime.parse(in_utterance)
                modules = [module for module in modules if module != 'SUTime']
            response = requests.post('http://{}:{}/annotate'.format(
                self.host, self.port),
                                     json={
                                         'state': {
                                             'utterance': in_utterance
                                         },
                                         'modules': modules
                                     },
                                     timeout=5)
        except requests.Timeout:
            return {}
        assert response.status_code == 200, 'Error calling the NLU service'
        result = response.json()
        if sutime_response is not None:
            result['annotations']['SUTime'] = sutime_response
        return result

    def annotate_sentiment(self, in_utterance):
        response = self.annotate(in_utterance,
                                 modules=['Preprocessor', 'VaderNLTK'])
        return response['annotations']['sentiment']

    def annotate_ner(self, in_utterance):
        response = self.annotate(in_utterance,
                                 modules=['Preprocessor', 'StanfordNER'])
        return response['annotations'].get('ner', {})

    def annotate_pos(self, in_utterance):
        response = self.annotate(in_utterance,
                                 modules=['Preprocessor', 'MorphoTagger'])
        return response['annotations'].get('postag', [])

    def annotate_abuse(self, in_utterance):
        response = self.annotate(
            in_utterance, modules=['Preprocessor', 'AlanaAbuseDetector'])
        return response['annotations'].get('abuse', {})
Ejemplo n.º 10
0
class DateLinker(BasePipeline):
    def __init__(self, resource_folder=None):
        self.annotator_name = 'Date_Linker'
        if resource_folder is None:
            self.resource_folder = os.path.join(os.path.dirname(__file__),
                                                '../resources/sutime/')
        self.sutime = SUTime(jars=self.resource_folder)

    def run(self, document):

        dates = self.sutime.parse(document.text)

        pattern = re.compile(r"^-*\d*-*\d*-*\d*-*$")

        for date in dates:
            if date["type"] == "DATE" and pattern.match(date["value"]):
                val = date["value"]
                if val[0] == '-':
                    if len(val[1:]) == 4:
                        stdform = val + '-00-00T00:00:00Z^^http://www.w3.org/2001/XMLSchema#dateTime'
                    elif len(val[1:]) == 7:
                        stdform = val + '-00T00:00:00Z^^http://www.w3.org/2001/XMLSchema#dateTime'
                    elif len(val[1:]) == 10:
                        stdform = val + 'T00:00:00Z^^http://www.w3.org/2001/XMLSchema#dateTime'
                    else:
                        stdform = val + '^^<http://www.w3.org/2001/XMLSchema#dateTime>'

                else:
                    if len(val) == 4:
                        stdform = val + '-00-00T00:00:00Z^^http://www.w3.org/2001/XMLSchema#dateTime'
                    elif len(val) == 7:
                        stdform = val + '-00T00:00:00Z^^http://www.w3.org/2001/XMLSchema#dateTime'
                    elif len(val) == 10:
                        stdform = val + 'T00:00:00Z^^http://www.w3.org/2001/XMLSchema#dateTime'
                    else:
                        stdform = val + '^^<http://www.w3.org/2001/XMLSchema#dateTime>'

                start = date["start"]
                end = date["end"]

                entity = Entity(uri=stdform,
                                boundaries=(start, end),
                                surfaceform=document.text[start:end],
                                annotator=self.annotator_name)

                document.entities.append(entity)

        return document
Ejemplo n.º 11
0
class timeDelta:
    def __init__(self, path):
        # Initialize SUtime

        jar_files = os.path.join(os.path.dirname(path), 'jars')
        self.sutime = SUTime(jars=jar_files,
                             mark_time_ranges=False,
                             include_range=True)

    def get_times(self, text):
        # get all time values found by SUtime

        parsed = self.sutime.parse(text)
        values = []

        for dic in parsed:
            values.append(dic['value'])

        return values
Ejemplo n.º 12
0
def sutime_with_mark_time_ranges():
    return SUTime(jars=os.path.join(
        *[os.path.dirname(__file__), os.pardir, os.pardir, 'jars']),
                  mark_time_ranges=True)
def sutime_with_mark_time_ranges():
    return SUTime(mark_time_ranges=True, )
Ejemplo n.º 14
0
from flask import Flask
from flask import request
import os
import json
from sutime import SUTime
import sys
import json
app = Flask(__name__)
jar_files = os.path.join(os.path.dirname(__file__), 'jars')
sutime = SUTime(jars=jar_files, mark_time_ranges=False)
@app.route('/')
def homepage():
  q = request.args.get('q')
  return json.dumps(parse(q))
def parse(s):
  return sutime.parse(s)
if __name__ == '__main__':
  app.run(debug=True, use_reloader=True)
def loadSUtime():
    __file__ = "/Users/harsha/Documents/cse635_AIR/Project/Main/Code/python-sutime-master/"
    jar_files = os.path.join(os.path.dirname(__file__), 'jars')
    sutime = SUTime(jars=jar_files, mark_time_ranges=True)
    return sutime
Ejemplo n.º 16
0
def get_sutime():
    global sutime
    if sutime is None:
        sutime = SUTime(jars=jar_path, mark_time_ranges=True)
    return sutime
Ejemplo n.º 17
0
def sutime_with_jvm_flags():
    return SUTime(
        jars=os.path.join(
            *[os.path.dirname(__file__), os.pardir, os.pardir, "jars"]),
        jvm_flags=("-Xms256m", ),
    )
Ejemplo n.º 18
0
past = ['was', 'had', 'did']
set_past = set(past)

file = open('task4_cases.txt', 'r').read().split('\n')
# fname = 'task4_cases.txt'
# with open(fname) as f:
#     content = f.readlines()

x = "relevant positive"
y = "relevant negative"
z = "not relevant"

stemmer = PorterStemmer()
#path_jar = "/home/kanv/python-sutime/"
jar_files = os.path.join(os.path.dirname(__file__), 'jars')
sutime = SUTime(jars=jar_files, mark_time_ranges=True)

for line in file:
    #line = line.encode('utf-8')
    line = line.encode('ascii', 'ignore')
    sent_tokenize_list = sent_tokenize(line)
    #print sent_tokenize_list
    print "-------------------------------"
    res = []
    dt = []
    for sent in sent_tokenize_list:
        sent_list = re.split(
            '(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)(\s|[A-Z].*)', sent)
        print sent_list
        for sent_new in sent_list:
            #print sent_new
Ejemplo n.º 19
0
class Streambot:
    """Stream Twitter and look for tweets that contain targeted words,
    when tweets found look for datetime and room, if present save tweet
    to OutgoingTweet model.
    Ex.
    bot = Streambot()
    # to run a stream looking for tweets about PyCon
    bot.run_stream(["PyCon"])
    """
    def __init__(self):
        self.api = self.setup_auth()
        self.stream_listener = StreamListener(self)
        jar_files = os.path.join(BASE_DIR, "python-sutime", "jars")
        self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)
        self.slacker = Slacker(s.SLACK_TOKEN)

    def setup_auth(self):
        """Set up auth stuff for api and return tweepy api object"""
        auth = tweepy.OAuthHandler(s.openspaces["CONSUMER_KEY"],
                                   s.openspaces["CONSUMER_SECRET"])
        auth.set_access_token(s.openspaces["ACCESS_TOKEN"],
                              s.openspaces["ACCESS_TOKEN_SECRET"])

        api = tweepy.API(auth)
        return api

    def run_stream(self, search_list=None):
        """Start stream, when matching tweet found on_status method called. 
        search_list arg is a list of terms that will be looked for in tweets
        """
        if search_list == None:
            raise ValueError(
                "Need a list of search terms as arg to run_stream")

        stream = tweepy.Stream(auth=self.api.auth,
                               listener=self.stream_listener)
        stream.filter(track=search_list)

    def send_mention_tweet(self, screen_name):
        """Mention a user in a tweet from bot letting them know that
        their tweet has been recieved and that we will send out reminders
        about their event.
        """
        hours_mins = time_utils.get_local_clock_time()

        mention = "@{} just saw your Open Spaces tweet at {}."
        mention += " Pending approval we'll retweet a reminder before your event!"
        mention = mention.format(screen_name, hours_mins)

        try:
            self.api.update_status(status=mention)
        except:
            # if same user tweets valid openspaces tweet at exact same clock time
            # it causes a duplicate tweet which bot can't send
            loggly.info(
                "duplicate tweet by openspaces bot in send_mention_tweet")

    def send_slack_message(self, channel, message):
        """Send a slack message a channel

        channel options:
        #outgoing_tweets
        #need_review
        #event_conflict
        """
        self.slacker.chat.post_message(channel, message)

    def parse_time_room(self, tweet):
        """Get time and room number from a tweet using SUTime and tweet_utils"""
        extracted_time = self.sutime.parse(tweet)
        time_and_room = tweet_utils.get_time_and_room(tweet, extracted_time)
        return time_and_room

    def value_check(self, time_room_obj):
        """Returns a tuple with the counts of values extracted from a tweet
        in the parse_time_room method. This tuple is used to decide how bot
        will respond to tweet. 
        """
        num_room_values = len(time_room_obj["room"])
        num_time_values = len(time_room_obj["date"])

        return (num_room_values, num_time_values)

    def retweet_logic(self, tweet, tweet_id, screen_name, user_id):
        """Use SUTime to try to parse a datetime out of a tweet, if successful
        save tweet to OutgoingTweet to be retweeted
        """
        # use SUTime to parse a datetime out of tweet
        time_room = self.parse_time_room(tweet)

        # make sure both time and room extracted and only one val each
        val_check = self.value_check(time_room)

        if val_check == (1, 1):
            room = time_room["room"][0]
            date_mention = tweet_utils.check_date_mention(tweet)
            converted_time = time_utils.convert_to_utc(time_room["date"][0],
                                                       date_mention)

            # check for a time and room conflict, only 1 set of retweets per event
            # default time range that a room is resrved for is -15 +30 mins
            conflict = db_utils.check_time_room_conflict(converted_time, room)

            if not conflict:
                # send message to slack when a tweet is scheduled to go out
                slack_message = "{} From: {}, id: {}".format(
                    tweet, screen_name, user_id)
                self.send_slack_message('#outgoing_tweets', slack_message)

                self.send_mention_tweet(screen_name)

                # This record lets us check that retweets not for same event
                db_utils.create_event(description=tweet,
                                      start=converted_time,
                                      location=room,
                                      creator=screen_name)

                tweet_utils.schedule_tweets(screen_name, tweet, tweet_id,
                                            converted_time)
                loggly.info(
                    "scheduled this tweet for retweet: {}".format(tweet))

            else:
                message = """Tweet recived for an event bot is already scheduled
                    to retweet about. Sender: {}, room: {}, time: {}, 
                    tweet: {} tweet_id: {}
                    """
                message = message.format(screen_name, room, converted_time,
                                         tweet, tweet_id)
                self.send_slack_message("#event_conflict", message)
                loggly.info(message)

        elif val_check == (0, 0):
            # tweet found but without valid time or room extracted, ignore
            pass

        else:
            # tweet with relevant information but not exactly 1 time & 1 room
            message = """Tweet found that needs review: {}  tweet_id: {}
                screen_name: {}, user_id: {}
                """
            message = message.format(tweet, tweet_id, screen_name, user_id)
            self.send_slack_message("#need_review", message)
def sutime_spanish():
    return SUTime(language='spanish', )
Ejemplo n.º 21
0
class Streambot:
    """Stream Twitter and look for tweets that contain targeted words,
    when tweets found look for datetime and room, if present save tweet
    to OutgoingTweet model.
    Ex.
    bot = Streambot()
    # to run a stream looking for tweets about PyCon
    bot.run_stream(["PyCon"])
    """
    def __init__(self):
        self.api = self.setup_auth()
        self.stream_listener = StreamListener(self)
        jar_files = os.path.join(BASE_DIR, "python-sutime", "jars")
        self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)
        self.slacker = Slacker(s.SLACK_TOKEN)

    def setup_auth(self):
        """Set up auth stuff for api and return tweepy api object"""
        auth = tweepy.OAuthHandler(s.sender["CONSUMER_KEY"],
                                   s.sender["CONSUMER_SECRET"])
        auth.set_access_token(s.sender["ACCESS_TOKEN"],
                              s.sender["ACCESS_TOKEN_SECRET"])

        api = tweepy.API(auth)
        return api

    def run_stream(self, search_list=None):
        """Start stream, when matching tweet found on_status method called. 
        search_list arg is a list of terms that will be looked for in tweets
        """
        if search_list == None:
            raise ValueError(
                "Need a list of search terms as arg to run_stream")

        stream = tweepy.Stream(auth=self.api.auth,
                               listener=self.stream_listener)
        stream.filter(track=search_list)

    def send_mention_tweet(self, screen_name, room, time):
        """Mention a user in a tweet from bot letting them know that
        their tweet has been recieved and that we will send out reminders
        about their event.
        """
        mention = "@{} saw your openspaces tweet for: room {} at {}. Times should be relative to US/Pacific"
        mention = mention.format(screen_name, room, time)
        self.api.update_status(status=mention)

    def parse_time_room(self, tweet):
        """Get time and room number from a tweet using SUTime and tweet_utils"""
        extracted_time = self.sutime.parse(tweet)
        time_and_room = tweet_utils.get_time_and_room(tweet, extracted_time)
        return time_and_room

    def loadtest_logic(self, tweet, tweet_id, screen_name, user_id):
        """Logic similar to what is being used in the real bot so that we can 
        load test how much volume it can handle before twitter kicks it off 
        """
        # use SUTime to parse a datetime out of tweet
        time_room = self.parse_time_room(tweet)

        # fake time in the future that imitates a event's start time
        local_tz = pytz.timezone('US/Pacific')
        sample_time = datetime.datetime.now(local_tz) + datetime.timedelta(
            minutes=10)
        sample_time = sample_time.strftime("%Y-%m-%d %H:%M:%S")

        converted_time = time_utils.convert_to_utc(sample_time)
        room = "r123"

        # check for a time and room conflict, only 1 set of retweets per event
        conflict = db_utils.check_time_room_conflict(converted_time, room)

        # send message to slack when a tweet is scheduled to go out
        slack_message = "{} From: {}, id: {}".format(tweet, screen_name,
                                                     user_id)
        self.slacker.chat.post_message('#loadtest_tweets', slack_message)

        # This record lets us check that retweets not for same event
        db_utils.create_event(description=tweet,
                              start=converted_time,
                              location=room,
                              creator=screen_name)

        tweet_utils.loadtest_schedule_tweets(screen_name, tweet, tweet_id,
                                             converted_time)
        print("tweet scheduled for retweet: {}".format(tweet))
Ejemplo n.º 22
0
 def __init__(self, resource_folder=None):
     self.annotator_name = 'Date_Linker'
     if resource_folder is None:
         self.resource_folder = os.path.join(os.path.dirname(__file__),
                                             '../resources/sutime/')
     self.sutime = SUTime(jars=self.resource_folder)
Ejemplo n.º 23
0
def sutime():
    return SUTime()
Ejemplo n.º 24
0
class Streambot:
    """
    Stream Twitter and look for tweets that contain targeted words, 
    when tweets found look for datetime and room, if present save tweet to
    OutgoingTweet model.  

    Ex.
    bot = Streambot()
    # to run a stream looking for tweets about PyCon
    bot.run_stream(["PyCon"]) 
    """
    def __init__(self):
        self.api = self.setup_auth()
        self.stream_listener = StreamListener(self)
        self.tz = pytz.timezone('US/Pacific')

        jar_files = os.path.join(BASE_DIR, "python-sutime", "jars")
        self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)

    def setup_auth(self):
        """
        Set up auth stuff for api and return tweepy api object
        """
        auth = tweepy.OAuthHandler(s.listener["CONSUMER_KEY"],
                                   s.listener["CONSUMER_SECRET"])
        auth.set_access_token(s.listener["ACCESS_TOKEN"],
                              s.listener["ACCESS_TOKEN_SECRET"])
        api = tweepy.API(auth)

        return api

    def run_stream(self, search_list=[]):
        """
        Start stream, when matching tweet found on_status in StreamListener called. 
        search_list arg is a list of terms that will be looked for in tweets
        """
        if search_list == []:
            raise ValueError(
                "Need a list of search terms as arg to run_stream")

        stream = tweepy.Stream(auth=self.api.auth,
                               listener=self.stream_listener)
        stream.filter(track=search_list)

    def convert_to_utc(self, talk_time):
        """
        Convert the datetime string we get from SUTime to utcnow
        """
        # get correct local year, month, dat
        local_date = datetime.now(self.tz)
        local_date_str = datetime.strftime(local_date, "%Y %m %d")
        year, month, day = local_date_str.split(" ")

        # get SUTime parsed talk time and extract hours, mins
        dt_obj = parse(talk_time)
        local_time_str = datetime.strftime(dt_obj, "%H %M")
        hours, mins = local_time_str.split(" ")

        # build up correct datetime obj, normalize & localize, switch to utc
        correct_dt = datetime(int(year), int(month), int(day), int(hours),
                              int(mins))
        tz_aware_local = self.tz.normalize(self.tz.localize(correct_dt))
        local_as_utc = tz_aware_local.astimezone(pytz.utc)

        return local_as_utc

    def schedule_tweets(self, screen_name, tweet, tweet_id, talk_time):
        """
        Take tweet and datetime, schedule num of reminder tweets at set intervals 
        """
        # check config table to see if autosend on
        config_obj = models.AppConfig.objects.latest("id")
        approved = 1 if config_obj.auto_send else 0

        tweet_url = "https://twitter.com/{name}/status/{tweet_id}"
        embeded_tweet = tweet_url.format(name=screen_name, tweet_id=tweet_id)

        # set num of reminder tweets and interval in mins that tweets sent
        # num_tweets = 2 & interval = 15 sends 2 tweets 30 & 15 mins before
        num_tweets = 2
        interval = 1

        for mins in range(interval, (num_tweets * interval + 1), interval):
            remind_time = talk_time - timedelta(minutes=mins)

            message = "Coming up in {} minutes! {}".format(mins, embeded_tweet)

            print("message should be saved!!!")

            # saving the tweet to the OutgoingTweet table triggers celery stuff
            tweet_obj = models.Tweets(tweet=message,
                                      approved=approved,
                                      scheduled_time=remind_time)
            tweet_obj.save()

    def retweet_logic(self, tweet, tweet_id, screen_name):
        """
        Use SUTime to try to parse a datetime out of a tweet, if successful
        save tweet to OutgoingTweet to be retweeted
        """
        print(tweet, tweet_id)
        time_room = self.get_time_and_room(tweet)

        # check to make sure both time and room extracted and only one val for each
        val_check = [val for val in time_room.values() if len(val) == 1]

        if len(val_check) == 2:
            # way to mention a user after a valid tweet is recieved
            # time_stamp = datetime.datetime.utcnow()

            # mention = "@{} We saw your openspaces tweet!{}".format(screen_name, time_stamp)

            # self.api.update_status(status=mention)

            # need to make time from SUTime match time Django is using
            sutime_stuff = time_room["date"][0]
            print("sutime_stuff: {}".format(sutime_stuff))
            talk_time = self.convert_to_utc(time_room["date"][0])
            print("reult from convet to utc: {}".format(talk_time))

            self.schedule_tweets(screen_name, tweet, tweet_id, talk_time)

    def get_time_and_room(self, tweet):
        """
        Get time and room number from a tweet
        Written by Santi @ https://github.com/adavanisanti
        """
        result = {}
        result["date"] = []
        result["room"] = []

        time_slots = self.sutime.parse(tweet)
        tweet_without_time = tweet

        for time_slot in time_slots:
            tweet_without_time = tweet_without_time.replace(
                time_slot.get("text"), "")
            result["date"].append(time_slot.get("value"))

        # filter_known_words = [word.lower() for word in word_tokenize(tweet_without_time) if word.lower() not in (self.stopwords + nltk.corpus.words.words())]
        filter_known_words = [
            word.lower() for word in word_tokenize(tweet_without_time)
        ]

        # regular expression for room
        room_re = re.compile("([a-zA-Z](\d{3})[-+]?(\d{3})?)")

        for word in filter_known_words:
            if room_re.match(word):
                result["room"].append(room_re.match(word).group())

        return result
Ejemplo n.º 25
0
def sutime_spanish():
    return SUTime(
        jars=os.path.join(
            *[os.path.dirname(__file__), os.pardir, os.pardir, "jars"]),
        language="spanish",
    )
Ejemplo n.º 26
0
def sutime():
    return SUTime(jars=os.path.join(
        *[os.path.dirname(__file__), os.pardir, os.pardir, 'jars']))
Ejemplo n.º 27
0
 def __init__(self):
     self.api = self.setup_auth()
     self.stream_listener = StreamListener(self)
     jar_files = os.path.join(BASE_DIR, "python-sutime", "jars")
     self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)
     self.slacker = Slacker(s.SLACK_TOKEN)
Ejemplo n.º 28
0
class RetweetBot:
    def __init__(self):
        # Twitter API setup
        auth = tweepy.OAuthHandler(os.environ.get('CONSUMER_KEY'),
                                   os.environ.get('CONSUMER_SECRET'))
        auth.set_access_token(os.environ.get('ACCESS_TOKEN'),
                              os.environ.get('ACCESS_TOKEN_SECRET'))
        self.api = tweepy.API(auth)
        self.tweet_list = []
        self.relevance_scores = []

        # bad words
        response = requests.get(BAD_WORDS_URL)
        self.bad_words = response.text.split('\n')

        # stop words
        self.stopwords = list(stopwords.words('english'))

        # sutime
        jar_files = os.environ.get('JAR_FILES', '../python-sutime/jars')
        self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)

        # nltk data append
        nltk.data.path.append(
            os.environ.get('NLTK_CORPUS', '/webapps/hackor/hackor/nltk_data'))

    '''
	 	Get all tweets
	'''

    def get_tweets(self,
                   topic="#pycon",
                   quantity=10,
                   result_type="recent,popular"):
        tweet_list = self.api.search(q=topic,
                                     count=quantity,
                                     lang='en',
                                     result_type=result_type)
        print("Retrieved {} candidate tweets.".format(len(tweet_list)))
        self.tweet_list += tweet_list

    def clear_tweets(self):
        self.tweet_list = []
        self.relevance_scores = []

    '''
		Defining relevance score as the importance of the user tweeting
		Features: tweeter followers, friends, ratio number of hashtags in the tweet (smaller the better) (PageRank?)
		Remove tweets that have any bad words
	'''

    def score(self, tweet):

        if not self.isSafe(tweet.text):
            return MAX_NEGATIVE

        if tweet.text.startswith('RT'):
            return MAX_NEGATIVE

        # influencer ratio
        influencer_ratio = 0
        if tweet.user.friends_count:
            influencer_ratio = tweet.user.followers_count / tweet.user.friends_count

        #number of hashtags
        hashtags = tweet.text.count('#')

        #hashtag word length
        hashtagcount = 0
        for word in tweet.text.split():
            if word.startswith('#'):
                hashtagcount += len(word)

        final_score = influencer_ratio * (hashtagcount / 140) * 1.0 / (
            1 + hashtags) * tweet.favorite_count
        final_score = 1.0
        return final_score

    '''
		Computing Relevance for all tweets
	'''

    def compute_relevance_scores(self):
        for _id, tweet in enumerate(self.tweet_list):
            if self.score(tweet) > 0.0:
                self.relevance_scores.append((_id, self.score(tweet)))
        self.relevance_scores.sort(key=lambda tup: tup[1], reverse=True)

    def compose_relevant_slack_messages(self, count=1):
        messages = []
        if self.relevance_scores:
            message = ''
            for score in self.relevance_scores[0:count]:
                tweet_score = score[1]
                print tweet_score
                tweet = self.tweet_list[score[0]]
                message = "RT <https://twitter.com/" + tweet.user.screen_name + "|" + tweet.user.screen_name + ">" + " " + tweet.text
                message += "\n <https://twitter.com/" + tweet.user.screen_name + "/status/" + str(
                    tweet.id) + "|Original Tweet>"
                messages.append(message)
        return messages

    def isSafe(self, tweet):
        result = True
        ret = tweet.replace('#', '')
        for word in self.bad_words:
            regex = r"\b(?=\w)" + re.escape(word) + r"\b(?!\w)"
            if re.search(regex, ret, re.IGNORECASE):
                result = False
                break
        return result

    '''
		Get time and room number from a tweet
	'''

    def get_time_and_room(self, tweet):

        result = {}
        result['date'] = []
        result['room'] = []

        time_slots = self.sutime.parse(tweet)
        tweet_without_time = tweet

        for time_slot in time_slots:
            tweet_without_time = tweet_without_time.replace(
                time_slot.get('text'), '')
            result['date'].append(time_slot.get('value'))

        filter_known_words = [
            word.lower() for word in word_tokenize(tweet_without_time)
            if word.lower() not in (self.stopwords + nltk.corpus.words.words())
        ]

        # regular expression for room
        room_re = re.compile('([a-zA-Z](\d{3})[-+]?(\d{3})?)')

        for word in filter_known_words:
            if room_re.match(word):
                result['room'].append(room_re.match(word).group())

        return result
Ejemplo n.º 29
0
from sutime import SUTime
from collections import defaultdict
import numpy as np
debug = True

#location global vars
stanford_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'stanfordjars')
st = StanfordNERTagger(os.path.join(stanford_dir, 'ner-model.ser.gz'), os.path.join(stanford_dir, 'stanford-ner.jar'))
st._stanford_jar = os.path.join(stanford_dir, '*')
place_to_coords = {}
url_base = 'https://maps.googleapis.com/maps/api/place/textsearch/json'
api_key = 'AIzaSyAVat82-OUFKC9GpyOi3LNyQKwxE2KWY9U'

#time global vars
jar_files = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'sutimejars')
sutime = SUTime(jars=jar_files, mark_time_ranges=True)

#FB api global vars
app_id = "1696549057338916"
app_secret = "21090405ac37194a1d4578aeb2371845" # DO NOT SHARE WITH ANYONE!
access_token = app_id + "|" + app_secret

#classifier global vars
def unpickle():
    pickle_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'pickles')
    with open(os.path.join(pickle_dir, 'clf_driver.pkl'), 'rb') as fid:
        clf_driver = pickle.load(fid)
    with open(os.path.join(pickle_dir, 'clf_roundtrip.pkl'), 'rb') as fid:
        clf_roundtrip = pickle.load(fid)
    with open(os.path.join(pickle_dir, 'clf_relevant.pkl'), 'rb') as fid:
        clf_relevant = pickle.load(fid)
Ejemplo n.º 30
0
import os
import json
from sutime import SUTime

if __name__ == '__main__':
    test_case = u'I need a desk for tomorrow from 2pm to 3pm'

    jar_files = 'C:\Users\Leandra\Anaconda2\lib\site-packages\sutime\jars'
    jar_files = 'C:\Users\Leandra\Documents\Fall2016\NLP\carpool-search\jars'
    print(jar_files)
    sutime = SUTime(jars=jar_files, mark_time_ranges=True)

    print(json.dumps(sutime.parse(test_case), sort_keys=True, indent=4))