def __init__(self, path): # Initialize SUtime jar_files = os.path.join(os.path.dirname(path), 'jars') self.sutime = SUTime(jars=jar_files, mark_time_ranges=False, include_range=True)
def sutime_function(text): translator = Translator() traducere = translator.translate(text, src='ro', dest="en").text java_target = "java\\target" jar_files = os.path.join(os.path.dirname(__file__), java_target) sutime = SUTime(jars=jar_files, mark_time_ranges=True) ttext = [] ttype = [] tmpdictionar = {} for x in sutime.parse(traducere): for value, key in x.items(): if value == "text": valoare = convert_to_romana(key) ttext.append(valoare) elif value == "type": valoare2 = convert_to_romana(key) ttype.append(valoare2) for x in range(len(ttext)): try: tmpdictionar[ttype[x]].append(ttext[x]) except: tmpdictionar[ttype[x]] = [ttext[x]] return tmpdictionar
def extract_years(snippet, output): """ function extracts the dates and fill them with the computed confidence score of function extract_entities_textrazor :param snippet: :param output: :return: """ jar_files = os.path.join(os.path.dirname(__file__), 'jars') sutime = SUTime(jars=jar_files, mark_time_ranges=True) res = json.dumps(sutime.parse(snippet), sort_keys=True, indent=4) dates_list = [] for i in range(len(res)): if res[i:i+5] == 'value': j = i+9 while res[j] != '"': j = j+1 dates_list.append(''.join(res[i+9:j])) dic_year = output['Y'] dates_list_new = {'entity':[], 'confidenceScore': [] } for i in range(len(dic_year['entity'])): for ele in dates_list: if ele.__contains__(dic_year['entity'][i][0]): if ele not in dates_list_new['entity']: dates_list_new['entity'].append(ele) dates_list_new['confidenceScore'].append(dic_year['confidenceScore'][i]) output['Y'] = dates_list_new return output
def __init__(self, classifier_path=None, ner_path = None, sutime_jar_path = None): # Change the path according to your system if classifier_path is None: classifier_path = "C:\stanford_corenlp\stanford-ner-2018-02-27\stanford-ner-2018-02-27\classifiers\english.muc.7class.distsim.crf.ser.gz" if ner_path is None: ner_path = "C:\stanford_corenlp\stanford-ner-2018-02-27\stanford-ner-2018-02-27\stanford-ner.jar" if sutime_jar_path is None: sutime_jar_path = "C:\stanford_corenlp\stanford-corenlp-full-2018-02-27\stanford-corenlp-full-2018-02-27" self.stanford_classifier = classifier_path self.stanford_ner_path = ner_path self.sutime_path = sutime_jar_path # Creating Tagger Object self.st = StanfordNERTagger(self.stanford_classifier, self.stanford_ner_path) self.su = SUTime(jars=self.sutime_path, mark_time_ranges=True, include_range=True) self.weather_terms = ["weather", "climate", "precipitation", "sun", "rain", "cloud","snow", "hot", "humid", "cold", "sunny", "windy","cloudy", "rainy", "snowy", "misty", "foggy", "colder","hotter", "warmer", "pleasant"] self.greet_terms= ["hello","hey","howdy","hello","hi", "yo", "yaw"] self.closure_terms = ["no", "nope", "thank you", "bye", "tata", "thanks", "that will be all", "that's it", "that'll be all"] self.day_terms = ["dawn", "dusk", "morning", "evening", "noon","afternoon", "night", "tonight", "midnight", "midday"] #, "hours"] self.date_terms = ["today", "tomorrow", "yesterday"]
def __init__(self): self.api = self.setup_auth() self.stream_listener = StreamListener(self) self.tz = pytz.timezone('US/Pacific') jar_files = os.path.join(BASE_DIR, "python-sutime", "jars") self.sutime = SUTime(jars=jar_files, mark_time_ranges=True)
def __init__(self): db_utils.setup_outgoing_config( ) # needs an outgoing config obj to check against self.api = self.setup_auth() self.stream_listener = StreamListener(self) jar_files = os.path.join(BASE_DIR, "python-sutime", "jars") self.sutime = SUTime(jars=jar_files, mark_time_ranges=True) self.slacker = Slacker(s.SLACK_TOKEN)
def __init__(self): # Twitter API setup auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) self.api = tweepy.API(auth) self.tweet_list = [] self.relevance_scores = [] # bad words response = requests.get(BAD_WORDS_URL) self.bad_words = response.text.split('\n') # stop words self.stopwords = list(stopwords.words('english')) # sutime jar_files = os.environ.get('JAR_FILES','/webapps/hackor/hackor/python-sutime/jars') self.sutime = SUTime(jars=jar_files, mark_time_ranges=True) # nltk data append nltk.data.path.append(os.environ.get('NLTK_CORPUS','/webapps/hackor/hackor/nltk_data'))
def extract_entitites(snippet): """ this function gets :param snippet: a snippet in English :return: and returns back the extracted person name, organization name, location and year in a dictionary namely output """ nlp = StanfordCoreNLP('http://localhost:9000') res = nlp.annotate(snippet, properties={ 'annotators': 'ner', #'sutime' 'outputFormat': 'json', #'timeout': 1000, }) output = {'RN':[], 'U':[], 'Y':[]} """ for extracting the university and persons names""" for sent in range(len(res['sentences'])): for element in res['sentences'][sent]['tokens']: if element['ner'] == 'PERSON': output['RN'].append(element['word']) if element['ner'] == 'ORGANIZATION': #or element['ner'] == 'LOCATION' : output['U'].append(element['word']) """ for extracting the years""" jar_files = os.path.join(os.path.dirname(__file__), 'jars') sutime = SUTime(jars=jar_files, mark_time_ranges=True) res = json.dumps(sutime.parse(snippet), sort_keys=True, indent=4) for i in range(len(res)): if res[i:i+5] == 'value': j = i+9 while res[j] != '"': j = j+1 output['Y'].append(''.join(res[i+9:j])) return output
def __init__(self, resource_folder=None): self.annotator_name = 'Date_Linker' if resource_folder is None: self.resource_folder = os.path.join(os.path.dirname(__file__), '../resources/sutime/') self.sutime = SUTime(jars=self.resource_folder)
def sutime_with_mark_time_ranges(): return SUTime(mark_time_ranges=True, )
from flask import Flask from flask import request import os import json from sutime import SUTime import sys import json app = Flask(__name__) jar_files = os.path.join(os.path.dirname(__file__), 'jars') sutime = SUTime(jars=jar_files, mark_time_ranges=False) @app.route('/') def homepage(): q = request.args.get('q') return json.dumps(parse(q)) def parse(s): return sutime.parse(s) if __name__ == '__main__': app.run(debug=True, use_reloader=True)
def loadSUtime(): __file__ = "/Users/harsha/Documents/cse635_AIR/Project/Main/Code/python-sutime-master/" jar_files = os.path.join(os.path.dirname(__file__), 'jars') sutime = SUTime(jars=jar_files, mark_time_ranges=True) return sutime
def get_sutime(): global sutime if sutime is None: sutime = SUTime(jars=jar_path, mark_time_ranges=True) return sutime
from common.bert_args import BertArgs from sutime import SUTime from parsing.nltk_nlp_utils import NLTK_NLP from common import globals_args from common import hand_files parser_mode = globals_args.parser_mode wh_words_set = { "what", "which", "whom", "who", "when", "where", "why", "how", "how many", "how large", "how big" } bert_args = BertArgs(globals_args.root, globals_args.q_mode) nltk_nlp = NLTK_NLP(globals_args.argument_parser.ip_port) sutime = SUTime(jars=globals_args.argument_parser.sutime_jar_files, mark_time_ranges=True) unimportantwords = hand_files.read_set( globals_args.argument_parser.unimportantwords) unimportantphrases = hand_files.read_list( globals_args.argument_parser.unimportantphrases) stopwords_dict = hand_files.read_set( globals_args.argument_parser.stopwords_dir) ordinal_lines_dict = hand_files.read_ordinal_file( globals_args.argument_parser.ordinal_fengli ) #2 {'second', '2ndis_equal_wh_word'} count_phrases = [ 'Count', 'How many', 'how many', 'the number of', 'the count of', 'the amount of', 'total number of', 'count' ] count_ner_tags = ['count'] dayu_phrases = [
def sutime_with_mark_time_ranges(): return SUTime(jars=os.path.join( *[os.path.dirname(__file__), os.pardir, os.pardir, 'jars']), mark_time_ranges=True)
def sutime_with_jvm_flags(): return SUTime( jvm_flags=('-Xms256m'), )
def sutime_spanish(): return SUTime(language='spanish', )
def sutime(): return SUTime()
def sutime_with_jvm_flags(): return SUTime( jars=os.path.join( *[os.path.dirname(__file__), os.pardir, os.pardir, "jars"]), jvm_flags=("-Xms256m", ), )
def __init__(self, host='localhost', port=5001, **kwargs): self.host, self.port = host, port self.sutime = SUTime(jars=os.path.join(os.path.dirname(__file__), 'python-sutime', 'jars'), mark_time_ranges=True) print 'Initialized with {}:{}'.format(self.host, self.port)
def sutime_spanish(): return SUTime( jars=os.path.join( *[os.path.dirname(__file__), os.pardir, os.pardir, "jars"]), language="spanish", )
def sutime(): return SUTime(jars=os.path.join( *[os.path.dirname(__file__), os.pardir, os.pardir, 'jars']))
def __init__(self): self.api = self.setup_auth() self.stream_listener = StreamListener(self) jar_files = os.path.join(BASE_DIR, "python-sutime", "jars") self.sutime = SUTime(jars=jar_files, mark_time_ranges=True) self.slacker = Slacker(s.SLACK_TOKEN)
type=str, help='LC-QuAD', default='DSN=knowledgebase;UID=dba;PWD=dba') parser.add_argument('--dbpedia_sparql_html', type=str, help='LC-QuAD', default="http://114.212.84.164:8890/sparql") parser.add_argument('--freebase_pyodbc', type=str, help='CWQ, GraphQ', default='DSN=freebaselatest;UID=dba;PWD=dba') parser.add_argument('--freebase_sparql_html', type=str, help='CWQ, GraphQ', default="http://114.212.81.7:8894/sparql") return parser.parse_args() argument_parser = get_args() fn_lcquad_file = LCQuADFileName(root=argument_parser.root) fn_graph_file = GraphqFileName(root=argument_parser.root) fn_cwq_file = CWQFileName(root=argument_parser.root) kb_dbpedia_201604_file = KB_DBpedia_201604(root=argument_parser.root) kb_freebase_en_2013 = KB_Freebase_en_2013(root=argument_parser.root) kb_freebase_latest_file = KB_Freebase_Latest(root=argument_parser.root) nltk_nlp = NLTK_NLP(argument_parser.corenlp_ip_port) sutime = SUTime(jars=argument_parser.sutime, mark_time_ranges=True)
from sutime import SUTime from collections import defaultdict import numpy as np debug = True #location global vars stanford_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'stanfordjars') st = StanfordNERTagger(os.path.join(stanford_dir, 'ner-model.ser.gz'), os.path.join(stanford_dir, 'stanford-ner.jar')) st._stanford_jar = os.path.join(stanford_dir, '*') place_to_coords = {} url_base = 'https://maps.googleapis.com/maps/api/place/textsearch/json' api_key = 'AIzaSyAVat82-OUFKC9GpyOi3LNyQKwxE2KWY9U' #time global vars jar_files = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'sutimejars') sutime = SUTime(jars=jar_files, mark_time_ranges=True) #FB api global vars app_id = "1696549057338916" app_secret = "21090405ac37194a1d4578aeb2371845" # DO NOT SHARE WITH ANYONE! access_token = app_id + "|" + app_secret #classifier global vars def unpickle(): pickle_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'pickles') with open(os.path.join(pickle_dir, 'clf_driver.pkl'), 'rb') as fid: clf_driver = pickle.load(fid) with open(os.path.join(pickle_dir, 'clf_roundtrip.pkl'), 'rb') as fid: clf_roundtrip = pickle.load(fid) with open(os.path.join(pickle_dir, 'clf_relevant.pkl'), 'rb') as fid: clf_relevant = pickle.load(fid)
except (ValueError, TypeError): pass if type(text) == dict and 'begin' in text and 'end' in text: if get_datetime_type(text['begin']) in [ 'time', 'datetime' ] and get_datetime_type(text['end']) in ['time', 'datetime']: return 'dict' return 'other' # Instantiate SUTime object jar_files = "../../packages/python-sutime/jars/" try: sutime = SUTime(jars=jar_files, mark_time_ranges=True, include_range=True) except OSError: sutime = SUTime(jars=jar_files, jvm_started=True, mark_time_ranges=True, include_range=True) # MIT rooms list with open('../listings/mit_rooms.csv', 'r') as f: rooms = [ line.split(' ')[0] for line in f.readlines() if '-' in line.split(' ')[0] and len(line.split(' ')[0]) < 15 ] rooms.sort(key=len, reverse=True)