Exemple #1
0
    def setUp(self):
        #self.serializer = semantria.XmlSerializer()
        self.serializer = semantria.JsonSerializer()
        self.session = semantria.Session(consumerKey, consumerSecret, self.serializer, use_compression=True)

        self.session.Request += onRequest
        self.session.Response += onResponse
        self.session.Error += onError
        self.session.DocsAutoResponse += onDocsAutoResponse
        self.session.CollsAutoResponse += onCollsAutoResponse
Exemple #2
0
def analyze2(tweets):
    consumerKey = "7bba1e0b-3a0a-4c27-823d-0a06ab8d27f4"
    consumerSecret = "335156f6-a161-490c-a9c2-203ec44c0cbd"

    def onRequest(sender, result):
        pass

    # print(result)
    def onResponse(sender, result):
        pass

    # print(result)
    def onError(sender, result):
        pass

    # print(result)
    def onDocsAutoResponse(sender, result):
        pass

    # print(result)
    def onCollsAutoResponse(sender, result):
        pass

    # print(result)
    serializer = semantria.JsonSerializer()
    session = semantria.Session(consumerKey, consumerSecret, serializer)
    # print(session.getConfigurations())
    session.Error += onError
    analyzedTweets = []

    for tweet in tweets:
        doc = {"id": str(uuid.uuid1()).replace("-", ""), "text": tweet[1]}
        status = session.queueDocument(doc)
        time.sleep(0.2)
        status = session.getProcessedDocuments()
        if isinstance(status, list):
            for object in status:
                # print(object)
                analyzedTweet = AnalyzedTweet(tweet[0], tweet[2], tweet[3],
                                              object["sentiment_polarity"], 1,
                                              1, 1)
                if (analyzedTweet.location):
                    analyzedTweets.append(analyzedTweet)
                # print(analyzedTweet)
    print(len(analyzedTweets))
    return analyzedTweets
Exemple #3
0
def analyse(txtInput):
    serializer = semantria.JsonSerializer()
    session = semantria.Session(key, secret, serializer, use_compression=True)
    doc = {"id": str(txtInput).replace("-", ""), "text": txtInput}
    status = session.queueDocument(doc)
    if status == 202:
        print("\"", doc["id"], "\" document queued successfully.", "\r\n")

    results = []

    while len(results) != 1:
        print("Retrieving your processed results...", "\r\n")
        time.sleep(0.1)
        # get processed documents
        status = session.getProcessedDocuments()
        results.extend(status)
    for data in results:
        response = data["sentiment_score"]
    return response

    # for data in results:
    #     # print document sentiment score
    #     print("Document ", data["id"], " Sentiment score: ", data["sentiment_score"], "\r\n")

    #     # print document themes
    #     if "themes" in data:
    #         print("Document themes:", "\r\n")
    #         for theme in data["themes"]:
    #             print("     ", theme["title"], " (sentiment: ", theme["sentiment_score"], ")", "\r\n")

    #     # print document entities
    #     if "entities" in data:
    #         print("Entities:", "\r\n")
    #         for entity in data["entities"]:
    #             print("\t", entity["title"], " : ", entity["entity_type"]," (sentiment: ", entity["sentiment_score"], ")", "\r\n")

    # return str(results)


#####################################################################################

#print (str(analyse("my work is irrelevant me. F**k life!")))
Exemple #4
0
def get_res():
    serializer = semantria.JsonSerializer()
    session = semantria.Session("59e4e96b-f19b-48b5-910a-a2b5d9d2bfc7",
                                "0cee133a-889e-4d1f-9d99-2749677bcfdd",
                                serializer,
                                use_compression=True)

    for fi in os.listdir(data_dir):
        print(fi)
        data = load_data((data_dir + '/%s' % fi))
        # initialTexts.append(data)
        initialTexts[int(fi)] = data

    del initialTexts[0]
    for i, text in enumerate(initialTexts):
        doc = {"id": str(i + 1), "text": text}
        status = session.queueDocument(doc)
        if status == 202:
            print("\"", doc["id"], "\" document queued successfully.", "\r\n")

    length = len(initialTexts)
    results = []

    while len(results) < length:
        print("Retrieving your processed results...", "\r\n")
        time.sleep(2)
        # get processed documents
        status = session.getProcessedDocuments()
        results.extend(status)

    for data in results:
        if "entities" in data:
            print("Entities:", "\r\n")
            if len(data["entities"]) == 20:

                with open('n_semantria_labels/%s' % data["id"] + 'limit',
                          'w') as fb:
                    json.dump(data["entities"], fb)
            else:
                with open('n_semantria_labels/%s' % data["id"], 'w') as fb:
                    json.dump(data["entities"], fb)
from __future__ import print_function
import semantria
import uuid
import time

serializer = semantria.JsonSerializer()

session = semantria.Session("98b2c1f2-318f-4de6-a0f3-27bffd811737",
                            "a90aa5cb-d515-4548-985c-735454a8a9a7",
                            serializer,
                            use_compression=True)

initialTexts = [
    "Lisa - there's 2 Skinny cow coupons available $5 skinny cow ice cream coupons on special k boxes and Printable FPC from facebook - a teeny tiny cup of ice cream. I printed off 2 (1 from my account and 1 from dh's). I couldn't find them instore and i'm not going to walmart before the 19th. Oh well sounds like i'm not missing much ...lol",
    "In Lake Louise - a guided walk for the family with Great Divide Nature Tours rent a canoe on Lake Louise or Moraine Lake  go for a hike to the Lake Agnes Tea House. In between Lake Louise and Banff - visit Marble Canyon or Johnson Canyon or both for family friendly short walks. In Banff  a picnic at Johnson Lake rent a boat at Lake Minnewanka  hike up Tunnel Mountain  walk to the Bow Falls and the Fairmont Banff Springs Hotel  visit the Banff Park Museum. The \"must-do\" in Banff is a visit to the Banff Gondola and some time spent on Banff Avenue - think candy shops and ice cream.",
    "On this day in 1786 - In New York City  commercial ice cream was manufactured for the first time."
]

for text in initialTexts:
    doc = {"id": str(uuid.uuid4()).replace("-", ""), "text": text}

    status = session.queueDocument(doc)
    if status == 202:
        print("\"", doc["id"], "\" document queued successfully.", "\r\n")

length = len(initialTexts)
results = []

while len(results) < length:
    print("Retrieving your processed results...", "\r\n")
    # get processed documents
Exemple #6
0
def endpoint_name():
    print("endpoint hit!")
    rawtext = request.get_json()['text']

    # process text using semantic api

    # Creates JSON serializer instance
    serializer = semantria.JsonSerializer()
    # Initializes new session with the serializer object and the keys.
    session = semantria.Session(consumerKey,
                                consumerSecret,
                                serializer,
                                use_compression=True)
    subscription = session.getSubscription()
    initialTexts = []
    results = []
    tracker = {}
    documents = []

    n = 975
    textchunks = [rawtext[i:i + n] for i in range(0, len(rawtext), n)]
    for text in textchunks:
        # Creates a sample document which need to be processed on Semantria
        # Unique document ID
        # Source text which need to be processed
        doc_id = str(uuid.uuid4())
        documents.append({'id': doc_id, 'text': text})
        tracker[doc_id] = TASK_STATUS_QUEUED

        res = session.queueBatch(documents)
        if res in [200, 202]:
            print("{0} documents queued successfully.".format(len(documents)))
            documents = []

    if len(documents):
        res = session.queueBatch(documents)
        if res not in [200, 202]:
            print("Unexpected error!")
            sys.exit(1)
        print("{0} documents queued successfully.".format(len(documents)))

    print("")

    # fix this too
    while len(list(filter(lambda x: x == TASK_STATUS_QUEUED,
                          tracker.values()))):
        time.sleep(0.5)
        print("Retrieving your processed results...")

        response = session.getProcessedDocuments()
        for item in response:
            if item['id'] in tracker:
                tracker[item['id']] = item['status']
                results.append(item)

    print("")

    # print and populate json to return it
    resultDict = {}

    for data in results:
        dataDict = {}

        # Printing of document sentiment score
        print("Document {0} / Sentiment score: {1}".format(
            data['id'], data['sentiment_score']))

        # Printing of document themes
        if "themes" in data:
            print("Document themes:")
            for theme in data["themes"]:
                print("\t {0} (sentiment: {1})".format(
                    theme['title'], theme['sentiment_score']))

        # Printing of document entities
        if "entities" in data:
            print("Entities:")
            dataDict["entities"] = data["entities"]
            for entity in data["entities"]:
                print("\t {0}: {1} (sentiment: {2})".format(
                    entity['title'], entity['entity_type'],
                    entity['sentiment_score']))

        # Printing the summary
        if "summary" in data:
            print("Summary:")
            dataDict["summary"] = data["summary"]
            print(data["summary"])

        if "relations" in data:
            print("Relationships:")
            dataDict["relationships"] = data["relations"]
            for relation in data["relations"]:
                print("\t {0}: {1}".format(relation['type'],
                                           relation['extra']))

        resultDict[data['id']] = dataDict
        print("")

    print("Done!")

    return jsonify(resultDict)
Exemple #7
0
    def parse_sentiment(self, input_texts, expected_lang):
        SentimentProvider.parse_sentiment(self, input_texts, expected_lang)

        if len(input_texts) > 100:
            raise SatException("Too many inputs. Input documents limited at 100 per API call!")

        # Parse messages from json file
        docs_less140 = []
        docs_more140 = []
        id_map = {}
        for comment in input_texts:
            # generate unique id
            comment_id = str(uuid.uuid4()).replace("-", "")
            while comment_id in id_map:
                comment_id = str(uuid.uuid4()).replace("-", "")

            # Map id to orignal id of the comment
            id_map[comment_id] = comment["id"]

            # clean the text of any url
            comment["text"] = re.sub(r'https?://www\.[a-z\.0-9]+', '', comment["text"])
            comment["text"] = re.sub(r'www\.[a-z\.0-9]+', '', comment["text"])

            # add comment to list of overall comments and bigger/smalle 140 char
            if len(comment["text"]) > 140:
                docs_more140.append({"id": comment_id, "text": comment["text"]})
            else:
                docs_less140.append({"id": comment_id, "text": comment["text"]})

        # Initalise JSON serialiser and create semantria Session
        serializer = semantria.JsonSerializer()
        session = semantria.Session(semantria_key, semantria_secret, serializer, use_compression=True)

        # Use Configuration for specific language
        print("Setting Language: " + expected_lang)

        if expected_lang != "German":
            raise SatException("Only 'German' is supported!")

        lang_id_less140 = german_conf_twitter_active
        lang_id_more140 = german_conf

        # Send messages as batch to semantria
        if len(docs_more140) > 0:
            session.queueBatch(docs_more140, lang_id_more140)
        if len(docs_less140) > 0:
            session.queueBatch(docs_less140, lang_id_less140)

        # Retrieve results
        length_more140 = len(docs_more140)
        results_more140 = []
        length_less140 = len(docs_less140)
        results_less140 = []

        while (len(results_more140) < length_more140) or (len(results_less140) < length_less140):
            print("Retrieving processed results...", "\r\n")
            time.sleep(2)
            # get processed documents
            status_more140 = session.getProcessedDocuments(lang_id_more140)
            for data in status_more140:
                if data["id"] in id_map:
                    data["id"] = id_map[data["id"]]
                else:
                    status_more140.remove(data)
            print "Added " + str(len(status_more140)) + " entries to result_more140"
            results_more140.extend(status_more140)

            status_less140 = session.getProcessedDocuments(lang_id_less140)
            for data in status_less140:
                if data["id"] in id_map:
                    data["id"] = id_map[data["id"]]
                else:
                    status_less140.remove(data)
            print "Added " + str(len(status_less140)) + " entries to result_less140"
            results_less140.extend(status_less140)

        results = results_more140 + results_less140
        responses = []
        for result in results:
            responses.append(SentimentResponse(result['id'], result['sentiment_score'], None))
        return responses
Exemple #8
0
def getDocumentThemes(textSubmitted):
    print("Semantria Detailed mode demo ...")
    print("")

    # the consumer key and secret
    key = "NONE"
    secret = "NONE"

    # Task statuses
    TASK_STATUS_UNDEFINED = 'UNDEFINED'
    TASK_STATUS_FAILED = 'FAILED'
    TASK_STATUS_QUEUED = 'QUEUED'
    TASK_STATUS_PROCESSED = 'PROCESSED'

    # Creates JSON serializer instance
    serializer = semantria.JsonSerializer()
    # Initializes new session with the serializer object and the keys.
    session = semantria.Session(key, secret, serializer, use_compression=True)

    # Initialize session callback handlers
    # session.Request += onRequest
    # session.Response += onResponse
    session.Error += onError
    # session.DocsAutoResponse += onDocsAutoResponse
    # session.CollsAutoResponse += onCollsAutoResponse

    subscription = session.getSubscription()

    initialTexts = []
    results = []
    tracker = {}
    documents = []

    doc_id = str(uuid.uuid4())
    documents.append({'id': doc_id, 'text': textSubmitted})
    tracker[doc_id] = TASK_STATUS_QUEUED

    res = session.queueBatch(documents)

    if res in [200, 202]:
        print("{0} documents queued successfully.".format(len(documents)))
        documents = []

    if len(documents):
        res = session.queueBatch(documents)
        if res not in [200, 202]:
            print("Unexpected error!")
            sys.exit(1)
        print("{0} documents queued successfully.".format(len(documents)))

    print("")

    while len(list(filter(lambda x: x == TASK_STATUS_QUEUED,
                          tracker.values()))):
        time.sleep(0.5)
        print("Retrieving your processed results...")

        response = session.getProcessedDocuments()
        for item in response:
            if item['id'] in tracker:
                tracker[item['id']] = item['status']
                results.append(item)

    print("")

    #print(textSubmitted)

    for data in results:
        # Printing of document sentiment score
        print("Document {0} / Sentiment score: {1}".format(
            data['id'], data['sentiment_score']))

        print(data)
        if "auto_categories" in data:
            for auto_categories in data["auto_categories"]:
                if "categories" in auto_categories:
                    for categories in auto_categories["categories"]:
                        if categories["sentiment_score"] == data[
                                "sentiment_score"]:
                            return (categories["title"])

        return ("Nothing was found")
Exemple #9
0
 def __init__(self, consumer_key, consumer_secret):
     self.name = 'semantria'
     serializer = semantria.JsonSerializer()
     self.session = semantria.Session(consumer_key, consumer_secret,
                                      serializer)
     self.session.Error += onError