コード例 #1
0
    def __init__(self, path):
        # Initialize SUtime

        jar_files = os.path.join(os.path.dirname(path), 'jars')
        self.sutime = SUTime(jars=jar_files,
                             mark_time_ranges=False,
                             include_range=True)
コード例 #2
0
ファイル: proiect.py プロジェクト: paulsem/TILN
def sutime_function(text):
    translator = Translator()
    traducere = translator.translate(text, src='ro', dest="en").text

    java_target = "java\\target"
    jar_files = os.path.join(os.path.dirname(__file__), java_target)
    sutime = SUTime(jars=jar_files, mark_time_ranges=True)

    ttext = []
    ttype = []
    tmpdictionar = {}

    for x in sutime.parse(traducere):
        for value, key in x.items():
            if value == "text":
                valoare = convert_to_romana(key)
                ttext.append(valoare)
            elif value == "type":
                valoare2 = convert_to_romana(key)
                ttype.append(valoare2)

    for x in range(len(ttext)):
        try:
            tmpdictionar[ttype[x]].append(ttext[x])
        except:
            tmpdictionar[ttype[x]] = [ttext[x]]

    return tmpdictionar
コード例 #3
0
def extract_years(snippet, output):
    """
    function extracts the dates and fill them with the computed confidence score of
    function extract_entities_textrazor
    :param snippet:
    :param output:
    :return:
    """
    jar_files = os.path.join(os.path.dirname(__file__), 'jars')
    sutime = SUTime(jars=jar_files, mark_time_ranges=True)
    res = json.dumps(sutime.parse(snippet), sort_keys=True, indent=4)

    dates_list = []
    for i in range(len(res)):
        if res[i:i+5] == 'value':
            j = i+9
            while res[j] != '"':
                j = j+1
            dates_list.append(''.join(res[i+9:j]))


    dic_year = output['Y']
    dates_list_new = {'entity':[], 'confidenceScore': [] }

    for i in range(len(dic_year['entity'])):
        for ele in dates_list:
            if ele.__contains__(dic_year['entity'][i][0]):
                if ele not in dates_list_new['entity']:
                    dates_list_new['entity'].append(ele)
                    dates_list_new['confidenceScore'].append(dic_year['confidenceScore'][i])

    output['Y'] = dates_list_new
    return output
コード例 #4
0
    def __init__(self, classifier_path=None, ner_path = None, sutime_jar_path = None):
        # Change the path according to your system
        if classifier_path is None:
            classifier_path = "C:\stanford_corenlp\stanford-ner-2018-02-27\stanford-ner-2018-02-27\classifiers\english.muc.7class.distsim.crf.ser.gz"

        if ner_path is None:
            ner_path = "C:\stanford_corenlp\stanford-ner-2018-02-27\stanford-ner-2018-02-27\stanford-ner.jar"

        if sutime_jar_path is None:
            sutime_jar_path = "C:\stanford_corenlp\stanford-corenlp-full-2018-02-27\stanford-corenlp-full-2018-02-27"

        self.stanford_classifier = classifier_path
        self.stanford_ner_path = ner_path
        self.sutime_path = sutime_jar_path

        # Creating Tagger Object
        self.st = StanfordNERTagger(self.stanford_classifier, self.stanford_ner_path)
        self.su = SUTime(jars=self.sutime_path, mark_time_ranges=True, include_range=True)

        self.weather_terms = ["weather", "climate", "precipitation", "sun", "rain", "cloud","snow", "hot", "humid", "cold", "sunny", "windy","cloudy",
                              "rainy", "snowy", "misty", "foggy", "colder","hotter", "warmer", "pleasant"]
        self.greet_terms= ["hello","hey","howdy","hello","hi", "yo", "yaw"]
        self.closure_terms = ["no", "nope", "thank you", "bye", "tata", "thanks", "that will be all", "that's it", "that'll be all"]
        self.day_terms = ["dawn", "dusk", "morning", "evening", "noon","afternoon", "night", "tonight", "midnight", "midday"] #, "hours"]
        self.date_terms = ["today", "tomorrow", "yesterday"]
コード例 #5
0
    def __init__(self):
        self.api = self.setup_auth()
        self.stream_listener = StreamListener(self)
        self.tz = pytz.timezone('US/Pacific')

        jar_files = os.path.join(BASE_DIR, "python-sutime", "jars")
        self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)
コード例 #6
0
ファイル: streambot.py プロジェクト: totalgood/openchat
 def __init__(self):
     db_utils.setup_outgoing_config(
     )  # needs an outgoing config obj to check against
     self.api = self.setup_auth()
     self.stream_listener = StreamListener(self)
     jar_files = os.path.join(BASE_DIR, "python-sutime", "jars")
     self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)
     self.slacker = Slacker(s.SLACK_TOKEN)
コード例 #7
0
	def __init__(self):
		# Twitter API setup
		auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
		auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
		self.api = tweepy.API(auth)
		self.tweet_list = []
		self.relevance_scores = []

		# bad words
		response = requests.get(BAD_WORDS_URL)
		self.bad_words = response.text.split('\n')

		# stop words
		self.stopwords = list(stopwords.words('english'))

		# sutime
		jar_files = os.environ.get('JAR_FILES','/webapps/hackor/hackor/python-sutime/jars')
		self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)

		# nltk data append
		nltk.data.path.append(os.environ.get('NLTK_CORPUS','/webapps/hackor/hackor/nltk_data'))
コード例 #8
0
def extract_entitites(snippet):
    """
    this function gets
    :param snippet: a snippet in English
    :return: and returns back the extracted person name, organization name, location and year in a dictionary namely output
    """

    nlp = StanfordCoreNLP('http://localhost:9000')
    res = nlp.annotate(snippet,
                   properties={
                       'annotators': 'ner', #'sutime'
                       'outputFormat': 'json',
                       #'timeout': 1000,
                   })

    output = {'RN':[], 'U':[], 'Y':[]}

    """ for extracting the university and persons names"""
    for sent in range(len(res['sentences'])):
        for element in res['sentences'][sent]['tokens']:
            if element['ner'] == 'PERSON':
                output['RN'].append(element['word'])
            if element['ner'] == 'ORGANIZATION': #or element['ner'] == 'LOCATION' :
                output['U'].append(element['word'])


    """ for extracting the years"""
    jar_files = os.path.join(os.path.dirname(__file__), 'jars')
    sutime = SUTime(jars=jar_files, mark_time_ranges=True)
    res = json.dumps(sutime.parse(snippet), sort_keys=True, indent=4)


    for i in range(len(res)):
        if res[i:i+5] == 'value':
            j = i+9
            while res[j] != '"':
                j = j+1
            output['Y'].append(''.join(res[i+9:j]))

    return output
コード例 #9
0
 def __init__(self, resource_folder=None):
     self.annotator_name = 'Date_Linker'
     if resource_folder is None:
         self.resource_folder = os.path.join(os.path.dirname(__file__),
                                             '../resources/sutime/')
     self.sutime = SUTime(jars=self.resource_folder)
コード例 #10
0
def sutime_with_mark_time_ranges():
    return SUTime(mark_time_ranges=True, )
コード例 #11
0
ファイル: app.py プロジェクト: mohitmun/sutime
from flask import Flask
from flask import request
import os
import json
from sutime import SUTime
import sys
import json
app = Flask(__name__)
jar_files = os.path.join(os.path.dirname(__file__), 'jars')
sutime = SUTime(jars=jar_files, mark_time_ranges=False)
@app.route('/')
def homepage():
  q = request.args.get('q')
  return json.dumps(parse(q))
def parse(s):
  return sutime.parse(s)
if __name__ == '__main__':
  app.run(debug=True, use_reloader=True)
def loadSUtime():
    __file__ = "/Users/harsha/Documents/cse635_AIR/Project/Main/Code/python-sutime-master/"
    jar_files = os.path.join(os.path.dirname(__file__), 'jars')
    sutime = SUTime(jars=jar_files, mark_time_ranges=True)
    return sutime
コード例 #13
0
def get_sutime():
    global sutime
    if sutime is None:
        sutime = SUTime(jars=jar_path, mark_time_ranges=True)
    return sutime
コード例 #14
0
ファイル: parsing_args.py プロジェクト: yayuanzi8/SPARQA
from common.bert_args import BertArgs
from sutime import SUTime
from parsing.nltk_nlp_utils import NLTK_NLP
from common import globals_args
from common import hand_files

parser_mode = globals_args.parser_mode
wh_words_set = {
    "what", "which", "whom", "who", "when", "where", "why", "how", "how many",
    "how large", "how big"
}
bert_args = BertArgs(globals_args.root, globals_args.q_mode)
nltk_nlp = NLTK_NLP(globals_args.argument_parser.ip_port)
sutime = SUTime(jars=globals_args.argument_parser.sutime_jar_files,
                mark_time_ranges=True)
unimportantwords = hand_files.read_set(
    globals_args.argument_parser.unimportantwords)
unimportantphrases = hand_files.read_list(
    globals_args.argument_parser.unimportantphrases)
stopwords_dict = hand_files.read_set(
    globals_args.argument_parser.stopwords_dir)
ordinal_lines_dict = hand_files.read_ordinal_file(
    globals_args.argument_parser.ordinal_fengli
)  #2 {'second', '2ndis_equal_wh_word'}

count_phrases = [
    'Count', 'How many', 'how many', 'the number of', 'the count of',
    'the amount of', 'total number of', 'count'
]
count_ner_tags = ['count']
dayu_phrases = [
コード例 #15
0
ファイル: test_sutime.py プロジェクト: mohitmun/sutime
def sutime_with_mark_time_ranges():
    return SUTime(jars=os.path.join(
        *[os.path.dirname(__file__), os.pardir, os.pardir, 'jars']),
                  mark_time_ranges=True)
コード例 #16
0
def sutime_with_jvm_flags():
    return SUTime(
        jvm_flags=('-Xms256m'),
    )
コード例 #17
0
def sutime_spanish():
    return SUTime(language='spanish', )
コード例 #18
0
def sutime():
    return SUTime()
コード例 #19
0
def sutime_with_jvm_flags():
    return SUTime(
        jars=os.path.join(
            *[os.path.dirname(__file__), os.pardir, os.pardir, "jars"]),
        jvm_flags=("-Xms256m", ),
    )
コード例 #20
0
 def __init__(self, host='localhost', port=5001, **kwargs):
     self.host, self.port = host, port
     self.sutime = SUTime(jars=os.path.join(os.path.dirname(__file__),
                                            'python-sutime', 'jars'),
                          mark_time_ranges=True)
     print 'Initialized with {}:{}'.format(self.host, self.port)
コード例 #21
0
def sutime_spanish():
    return SUTime(
        jars=os.path.join(
            *[os.path.dirname(__file__), os.pardir, os.pardir, "jars"]),
        language="spanish",
    )
コード例 #22
0
ファイル: test_sutime.py プロジェクト: mohitmun/sutime
def sutime():
    return SUTime(jars=os.path.join(
        *[os.path.dirname(__file__), os.pardir, os.pardir, 'jars']))
コード例 #23
0
ファイル: load_test_bot.py プロジェクト: kwhaler/openchat
 def __init__(self):
     self.api = self.setup_auth()
     self.stream_listener = StreamListener(self)
     jar_files = os.path.join(BASE_DIR, "python-sutime", "jars")
     self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)
     self.slacker = Slacker(s.SLACK_TOKEN)
コード例 #24
0
                        type=str,
                        help='LC-QuAD',
                        default='DSN=knowledgebase;UID=dba;PWD=dba')
    parser.add_argument('--dbpedia_sparql_html',
                        type=str,
                        help='LC-QuAD',
                        default="http://114.212.84.164:8890/sparql")
    parser.add_argument('--freebase_pyodbc',
                        type=str,
                        help='CWQ, GraphQ',
                        default='DSN=freebaselatest;UID=dba;PWD=dba')
    parser.add_argument('--freebase_sparql_html',
                        type=str,
                        help='CWQ, GraphQ',
                        default="http://114.212.81.7:8894/sparql")
    return parser.parse_args()


argument_parser = get_args()

fn_lcquad_file = LCQuADFileName(root=argument_parser.root)
fn_graph_file = GraphqFileName(root=argument_parser.root)
fn_cwq_file = CWQFileName(root=argument_parser.root)

kb_dbpedia_201604_file = KB_DBpedia_201604(root=argument_parser.root)
kb_freebase_en_2013 = KB_Freebase_en_2013(root=argument_parser.root)
kb_freebase_latest_file = KB_Freebase_Latest(root=argument_parser.root)

nltk_nlp = NLTK_NLP(argument_parser.corenlp_ip_port)
sutime = SUTime(jars=argument_parser.sutime, mark_time_ranges=True)
コード例 #25
0
from sutime import SUTime
from collections import defaultdict
import numpy as np
debug = True

#location global vars
stanford_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'stanfordjars')
st = StanfordNERTagger(os.path.join(stanford_dir, 'ner-model.ser.gz'), os.path.join(stanford_dir, 'stanford-ner.jar'))
st._stanford_jar = os.path.join(stanford_dir, '*')
place_to_coords = {}
url_base = 'https://maps.googleapis.com/maps/api/place/textsearch/json'
api_key = 'AIzaSyAVat82-OUFKC9GpyOi3LNyQKwxE2KWY9U'

#time global vars
jar_files = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'sutimejars')
sutime = SUTime(jars=jar_files, mark_time_ranges=True)

#FB api global vars
app_id = "1696549057338916"
app_secret = "21090405ac37194a1d4578aeb2371845" # DO NOT SHARE WITH ANYONE!
access_token = app_id + "|" + app_secret

#classifier global vars
def unpickle():
    pickle_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'pickles')
    with open(os.path.join(pickle_dir, 'clf_driver.pkl'), 'rb') as fid:
        clf_driver = pickle.load(fid)
    with open(os.path.join(pickle_dir, 'clf_roundtrip.pkl'), 'rb') as fid:
        clf_roundtrip = pickle.load(fid)
    with open(os.path.join(pickle_dir, 'clf_relevant.pkl'), 'rb') as fid:
        clf_relevant = pickle.load(fid)
コード例 #26
0
    except (ValueError, TypeError):
        pass

    if type(text) == dict and 'begin' in text and 'end' in text:
        if get_datetime_type(text['begin']) in [
                'time', 'datetime'
        ] and get_datetime_type(text['end']) in ['time', 'datetime']:
            return 'dict'

    return 'other'


# Instantiate SUTime object
jar_files = "../../packages/python-sutime/jars/"
try:
    sutime = SUTime(jars=jar_files, mark_time_ranges=True, include_range=True)
except OSError:
    sutime = SUTime(jars=jar_files,
                    jvm_started=True,
                    mark_time_ranges=True,
                    include_range=True)

# MIT rooms list
with open('../listings/mit_rooms.csv', 'r') as f:
    rooms = [
        line.split(' ')[0] for line in f.readlines()
        if '-' in line.split(' ')[0] and len(line.split(' ')[0]) < 15
    ]
    rooms.sort(key=len, reverse=True)