def setUp(self): #self.serializer = semantria.XmlSerializer() self.serializer = semantria.JsonSerializer() self.session = semantria.Session(consumerKey, consumerSecret, self.serializer, use_compression=True) self.session.Request += onRequest self.session.Response += onResponse self.session.Error += onError self.session.DocsAutoResponse += onDocsAutoResponse self.session.CollsAutoResponse += onCollsAutoResponse
def analyze2(tweets): consumerKey = "7bba1e0b-3a0a-4c27-823d-0a06ab8d27f4" consumerSecret = "335156f6-a161-490c-a9c2-203ec44c0cbd" def onRequest(sender, result): pass # print(result) def onResponse(sender, result): pass # print(result) def onError(sender, result): pass # print(result) def onDocsAutoResponse(sender, result): pass # print(result) def onCollsAutoResponse(sender, result): pass # print(result) serializer = semantria.JsonSerializer() session = semantria.Session(consumerKey, consumerSecret, serializer) # print(session.getConfigurations()) session.Error += onError analyzedTweets = [] for tweet in tweets: doc = {"id": str(uuid.uuid1()).replace("-", ""), "text": tweet[1]} status = session.queueDocument(doc) time.sleep(0.2) status = session.getProcessedDocuments() if isinstance(status, list): for object in status: # print(object) analyzedTweet = AnalyzedTweet(tweet[0], tweet[2], tweet[3], object["sentiment_polarity"], 1, 1, 1) if (analyzedTweet.location): analyzedTweets.append(analyzedTweet) # print(analyzedTweet) print(len(analyzedTweets)) return analyzedTweets
def analyse(txtInput): serializer = semantria.JsonSerializer() session = semantria.Session(key, secret, serializer, use_compression=True) doc = {"id": str(txtInput).replace("-", ""), "text": txtInput} status = session.queueDocument(doc) if status == 202: print("\"", doc["id"], "\" document queued successfully.", "\r\n") results = [] while len(results) != 1: print("Retrieving your processed results...", "\r\n") time.sleep(0.1) # get processed documents status = session.getProcessedDocuments() results.extend(status) for data in results: response = data["sentiment_score"] return response # for data in results: # # print document sentiment score # print("Document ", data["id"], " Sentiment score: ", data["sentiment_score"], "\r\n") # # print document themes # if "themes" in data: # print("Document themes:", "\r\n") # for theme in data["themes"]: # print(" ", theme["title"], " (sentiment: ", theme["sentiment_score"], ")", "\r\n") # # print document entities # if "entities" in data: # print("Entities:", "\r\n") # for entity in data["entities"]: # print("\t", entity["title"], " : ", entity["entity_type"]," (sentiment: ", entity["sentiment_score"], ")", "\r\n") # return str(results) ##################################################################################### #print (str(analyse("my work is irrelevant me. F**k life!")))
def get_res(): serializer = semantria.JsonSerializer() session = semantria.Session("59e4e96b-f19b-48b5-910a-a2b5d9d2bfc7", "0cee133a-889e-4d1f-9d99-2749677bcfdd", serializer, use_compression=True) for fi in os.listdir(data_dir): print(fi) data = load_data((data_dir + '/%s' % fi)) # initialTexts.append(data) initialTexts[int(fi)] = data del initialTexts[0] for i, text in enumerate(initialTexts): doc = {"id": str(i + 1), "text": text} status = session.queueDocument(doc) if status == 202: print("\"", doc["id"], "\" document queued successfully.", "\r\n") length = len(initialTexts) results = [] while len(results) < length: print("Retrieving your processed results...", "\r\n") time.sleep(2) # get processed documents status = session.getProcessedDocuments() results.extend(status) for data in results: if "entities" in data: print("Entities:", "\r\n") if len(data["entities"]) == 20: with open('n_semantria_labels/%s' % data["id"] + 'limit', 'w') as fb: json.dump(data["entities"], fb) else: with open('n_semantria_labels/%s' % data["id"], 'w') as fb: json.dump(data["entities"], fb)
from __future__ import print_function import semantria import uuid import time serializer = semantria.JsonSerializer() session = semantria.Session("98b2c1f2-318f-4de6-a0f3-27bffd811737", "a90aa5cb-d515-4548-985c-735454a8a9a7", serializer, use_compression=True) initialTexts = [ "Lisa - there's 2 Skinny cow coupons available $5 skinny cow ice cream coupons on special k boxes and Printable FPC from facebook - a teeny tiny cup of ice cream. I printed off 2 (1 from my account and 1 from dh's). I couldn't find them instore and i'm not going to walmart before the 19th. Oh well sounds like i'm not missing much ...lol", "In Lake Louise - a guided walk for the family with Great Divide Nature Tours rent a canoe on Lake Louise or Moraine Lake go for a hike to the Lake Agnes Tea House. In between Lake Louise and Banff - visit Marble Canyon or Johnson Canyon or both for family friendly short walks. In Banff a picnic at Johnson Lake rent a boat at Lake Minnewanka hike up Tunnel Mountain walk to the Bow Falls and the Fairmont Banff Springs Hotel visit the Banff Park Museum. The \"must-do\" in Banff is a visit to the Banff Gondola and some time spent on Banff Avenue - think candy shops and ice cream.", "On this day in 1786 - In New York City commercial ice cream was manufactured for the first time." ] for text in initialTexts: doc = {"id": str(uuid.uuid4()).replace("-", ""), "text": text} status = session.queueDocument(doc) if status == 202: print("\"", doc["id"], "\" document queued successfully.", "\r\n") length = len(initialTexts) results = [] while len(results) < length: print("Retrieving your processed results...", "\r\n") # get processed documents
def endpoint_name(): print("endpoint hit!") rawtext = request.get_json()['text'] # process text using semantic api # Creates JSON serializer instance serializer = semantria.JsonSerializer() # Initializes new session with the serializer object and the keys. session = semantria.Session(consumerKey, consumerSecret, serializer, use_compression=True) subscription = session.getSubscription() initialTexts = [] results = [] tracker = {} documents = [] n = 975 textchunks = [rawtext[i:i + n] for i in range(0, len(rawtext), n)] for text in textchunks: # Creates a sample document which need to be processed on Semantria # Unique document ID # Source text which need to be processed doc_id = str(uuid.uuid4()) documents.append({'id': doc_id, 'text': text}) tracker[doc_id] = TASK_STATUS_QUEUED res = session.queueBatch(documents) if res in [200, 202]: print("{0} documents queued successfully.".format(len(documents))) documents = [] if len(documents): res = session.queueBatch(documents) if res not in [200, 202]: print("Unexpected error!") sys.exit(1) print("{0} documents queued successfully.".format(len(documents))) print("") # fix this too while len(list(filter(lambda x: x == TASK_STATUS_QUEUED, tracker.values()))): time.sleep(0.5) print("Retrieving your processed results...") response = session.getProcessedDocuments() for item in response: if item['id'] in tracker: tracker[item['id']] = item['status'] results.append(item) print("") # print and populate json to return it resultDict = {} for data in results: dataDict = {} # Printing of document sentiment score print("Document {0} / Sentiment score: {1}".format( data['id'], data['sentiment_score'])) # Printing of document themes if "themes" in data: print("Document themes:") for theme in data["themes"]: print("\t {0} (sentiment: {1})".format( theme['title'], theme['sentiment_score'])) # Printing of document entities if "entities" in data: print("Entities:") dataDict["entities"] = data["entities"] for entity in data["entities"]: print("\t {0}: {1} (sentiment: {2})".format( entity['title'], entity['entity_type'], entity['sentiment_score'])) # Printing the summary if "summary" in data: print("Summary:") dataDict["summary"] = data["summary"] print(data["summary"]) if "relations" in data: print("Relationships:") dataDict["relationships"] = data["relations"] for relation in data["relations"]: print("\t {0}: {1}".format(relation['type'], relation['extra'])) resultDict[data['id']] = dataDict print("") print("Done!") return jsonify(resultDict)
def parse_sentiment(self, input_texts, expected_lang): SentimentProvider.parse_sentiment(self, input_texts, expected_lang) if len(input_texts) > 100: raise SatException("Too many inputs. Input documents limited at 100 per API call!") # Parse messages from json file docs_less140 = [] docs_more140 = [] id_map = {} for comment in input_texts: # generate unique id comment_id = str(uuid.uuid4()).replace("-", "") while comment_id in id_map: comment_id = str(uuid.uuid4()).replace("-", "") # Map id to orignal id of the comment id_map[comment_id] = comment["id"] # clean the text of any url comment["text"] = re.sub(r'https?://www\.[a-z\.0-9]+', '', comment["text"]) comment["text"] = re.sub(r'www\.[a-z\.0-9]+', '', comment["text"]) # add comment to list of overall comments and bigger/smalle 140 char if len(comment["text"]) > 140: docs_more140.append({"id": comment_id, "text": comment["text"]}) else: docs_less140.append({"id": comment_id, "text": comment["text"]}) # Initalise JSON serialiser and create semantria Session serializer = semantria.JsonSerializer() session = semantria.Session(semantria_key, semantria_secret, serializer, use_compression=True) # Use Configuration for specific language print("Setting Language: " + expected_lang) if expected_lang != "German": raise SatException("Only 'German' is supported!") lang_id_less140 = german_conf_twitter_active lang_id_more140 = german_conf # Send messages as batch to semantria if len(docs_more140) > 0: session.queueBatch(docs_more140, lang_id_more140) if len(docs_less140) > 0: session.queueBatch(docs_less140, lang_id_less140) # Retrieve results length_more140 = len(docs_more140) results_more140 = [] length_less140 = len(docs_less140) results_less140 = [] while (len(results_more140) < length_more140) or (len(results_less140) < length_less140): print("Retrieving processed results...", "\r\n") time.sleep(2) # get processed documents status_more140 = session.getProcessedDocuments(lang_id_more140) for data in status_more140: if data["id"] in id_map: data["id"] = id_map[data["id"]] else: status_more140.remove(data) print "Added " + str(len(status_more140)) + " entries to result_more140" results_more140.extend(status_more140) status_less140 = session.getProcessedDocuments(lang_id_less140) for data in status_less140: if data["id"] in id_map: data["id"] = id_map[data["id"]] else: status_less140.remove(data) print "Added " + str(len(status_less140)) + " entries to result_less140" results_less140.extend(status_less140) results = results_more140 + results_less140 responses = [] for result in results: responses.append(SentimentResponse(result['id'], result['sentiment_score'], None)) return responses
def getDocumentThemes(textSubmitted): print("Semantria Detailed mode demo ...") print("") # the consumer key and secret key = "NONE" secret = "NONE" # Task statuses TASK_STATUS_UNDEFINED = 'UNDEFINED' TASK_STATUS_FAILED = 'FAILED' TASK_STATUS_QUEUED = 'QUEUED' TASK_STATUS_PROCESSED = 'PROCESSED' # Creates JSON serializer instance serializer = semantria.JsonSerializer() # Initializes new session with the serializer object and the keys. session = semantria.Session(key, secret, serializer, use_compression=True) # Initialize session callback handlers # session.Request += onRequest # session.Response += onResponse session.Error += onError # session.DocsAutoResponse += onDocsAutoResponse # session.CollsAutoResponse += onCollsAutoResponse subscription = session.getSubscription() initialTexts = [] results = [] tracker = {} documents = [] doc_id = str(uuid.uuid4()) documents.append({'id': doc_id, 'text': textSubmitted}) tracker[doc_id] = TASK_STATUS_QUEUED res = session.queueBatch(documents) if res in [200, 202]: print("{0} documents queued successfully.".format(len(documents))) documents = [] if len(documents): res = session.queueBatch(documents) if res not in [200, 202]: print("Unexpected error!") sys.exit(1) print("{0} documents queued successfully.".format(len(documents))) print("") while len(list(filter(lambda x: x == TASK_STATUS_QUEUED, tracker.values()))): time.sleep(0.5) print("Retrieving your processed results...") response = session.getProcessedDocuments() for item in response: if item['id'] in tracker: tracker[item['id']] = item['status'] results.append(item) print("") #print(textSubmitted) for data in results: # Printing of document sentiment score print("Document {0} / Sentiment score: {1}".format( data['id'], data['sentiment_score'])) print(data) if "auto_categories" in data: for auto_categories in data["auto_categories"]: if "categories" in auto_categories: for categories in auto_categories["categories"]: if categories["sentiment_score"] == data[ "sentiment_score"]: return (categories["title"]) return ("Nothing was found")
def __init__(self, consumer_key, consumer_secret): self.name = 'semantria' serializer = semantria.JsonSerializer() self.session = semantria.Session(consumer_key, consumer_secret, serializer) self.session.Error += onError