Beispiel #1
0
def analyzeText(text, model, num_cats):
    global index_count
    print("Classifying text #%s" % str(index_count))

    # this is where we are going to store our results
    formatted_categories = ""

    try:
        # We are going to make a request to the Deep Categorization API
        request = meaningcloud.DeepCategorizationRequest(
            license_key, model=model, txt=text, server=server
        )
        setRequestSource(request)
        response = meaningcloud.DeepCategorizationResponse(request.sendReq())

        if response.isSuccessful():
            categories = response.getCategories()
            formatted_categories = [
                response.getCategoryLabel(cat) for cat in categories[:num_cats]
            ]
        else:
            if isBlockingErrorType(response.getStatusCode()):
                raise ValueError(
                    "Something went wrong in the MeaningCloud request!: ("
                    + response.getStatusCode()
                    + ") "
                    + response.getStatusMsg()
                )
            else:
                print(
                    "Oops! The request to Deep Categorization for text #"
                    + str(index_count)
                    + " was not succesful: ("
                    + response.getStatusCode()
                    + ") "
                    + response.getStatusMsg()
                )
                formatted_categories = [
                    "ERROR ("
                    + response.getStatusCode()
                    + "): "
                    + response.getStatusMsg()
                ]

    except ValueError as e:
        raise ValueError(str(e))

    index_count += 1

    formatted_categories = formatted_categories + [""] * (
        num_cats - len(formatted_categories)
    )
    return pd.Series(formatted_categories)
def analyzeText(text):
    global index_count
    print("Identifying language for text #%s" % str(index_count))

    # this is where we are going to store our results
    language_code = ""
    language_name = ""

    try:
        # We are going to make a request to the Language Identification API
        request = meaningcloud.LanguageRequest(license_key, txt=text, server=server)
        setRequestSource(request)
        response = meaningcloud.LanguageResponse(request.sendReq())
        if response.isSuccessful():
            lang = response.getFirstLanguage()
            if lang:
                language_code = response.getLanguageCode(lang)
                language_name = response.getLanguageName(lang)
        else:
            if isBlockingErrorType(response.getStatusCode()):
                raise ValueError(
                    "Something went wrong in the MeaningCloud request!: ("
                    + response.getStatusCode()
                    + ") "
                    + response.getStatusMsg()
                )
            else:
                print(
                    "Oops! The request to Language Identification for text #"
                    + str(index_count)
                    + " was not succesful: ("
                    + response.getStatusCode()
                    + ") "
                    + response.getStatusMsg()
                )
                language_code = (
                    "ERROR ("
                    + response.getStatusCode()
                    + "): "
                    + response.getStatusMsg()
                )

    except ValueError as e:
        raise ValueError(str(e))

    index_count += 1

    return pd.Series([language_code, language_name])
Beispiel #3
0
def analyzeText(text, lang):
    global index_count
    print("Analyzing sentiment for text #%s" % str(index_count))

    # this is where we are going to store our results
    polarity = ""
    subjectivity = ""
    irony = ""
    agreement = ""
    confidence = ""

    try:
        # We are going to make a request to the Sentiment Analysis API
        request = meaningcloud.SentimentRequest(license_key,
                                                lang=lang,
                                                txt=text,
                                                server=server)
        setRequestSource(request)
        response = meaningcloud.SentimentResponse(request.sendReq())
        if response.isSuccessful():
            polarity = response.scoreTagToString(response.getGlobalScoreTag())
            subjectivity = response.getSubjectivity()
            irony = response.getIrony()
            agreement = response.getGlobalAgreement()
            confidence = response.getGlobalConfidence()
        else:
            if isBlockingErrorType(response.getStatusCode()):
                raise ValueError(
                    "Something went wrong in the MeaningCloud request!: (" +
                    response.getStatusCode() + ") " + response.getStatusMsg())
            else:
                print("Oops! The request to Sentiment Analysis for text #" +
                      str(index_count) + " was not succesful: (" +
                      response.getStatusCode() + ") " +
                      response.getStatusMsg())
                polarity = ("ERROR (" + response.getStatusCode() + "): " +
                            response.getStatusMsg())

    except ValueError as e:
        raise ValueError(str(e))

    index_count += 1

    return pd.Series([polarity, subjectivity, irony, agreement, confidence])
Beispiel #4
0
def analyzeText(text):
    global index_count
    print("Extracting summary for text #%s" % str(index_count))

    # this is where we are going to store our results
    summary = ""

    try:
        # We are going to make a request to the Summarization API
        request = meaningcloud.SummarizationRequest(license_key,
                                                    sentences=sentences,
                                                    txt=text,
                                                    server=server)
        setRequestSource(request)
        response = meaningcloud.SummarizationResponse(request.sendReq())
        if response.isSuccessful():
            summary = response.getSummary()
        else:
            if isBlockingErrorType(response.getStatusCode()):
                raise ValueError(
                    "Something went wrong in the MeaningCloud request!: (" +
                    response.getStatusCode() + ") " + response.getStatusMsg())
            else:
                print("Oops! The request to Summarization for text #" +
                      str(index_count) + " was not succesful: (" +
                      response.getStatusCode() + ") " +
                      response.getStatusMsg())
                summary = ("ERROR (" + response.getStatusCode() + "): " +
                           response.getStatusMsg())

    except ValueError as e:
        raise ValueError(str(e))

    index_count += 1

    return pd.Series([summary])
def analyzeText(text, language, threshold, tt, ud):
    global index_count
    print("Extracting topics for text #%s" % str(index_count))

    # this is where we are going to store our results
    topics = {
        "person": [],
        "organization": [],
        "location": [],
        "product": [],
        "id": [],
        "event": [],
        "other": [],
        "quantity": [],
    }

    try:
        # We are going to make a request to the Topics Extraction API
        request = meaningcloud.TopicsRequest(
            license_key,
            txt=text,
            lang=language,
            topicType=tt,
            server=server,
            otherparams={"ud": ud},
        )
        setRequestSource(request)
        response = meaningcloud.TopicsResponse(request.sendReq())

        if response.isSuccessful():
            if "e" in tt:
                entity_list = response.getEntities()
                if entity_list:
                    for entity in entity_list:
                        if int(response.getTopicRelevance(entity)) >= threshold:
                            first_node = response.getTypeFirstNode(
                                response.getOntoType(entity)
                            ).lower()
                            form = str(response.getTopicForm(entity))
                            insertInList(topics.get("other"), form) if topics.get(
                                first_node
                            ) is None else insertInList(topics.get(first_node), form)
            if "c" in tt:
                concept_list = response.getConcepts()
                if concept_list:
                    for concept in concept_list:
                        if int(response.getTopicRelevance(concept)) >= threshold:
                            first_node = response.getTypeFirstNode(
                                response.getOntoType(concept)
                            ).lower()
                            form = str(response.getTopicForm(concept))
                            insertInList(topics.get("other"), form) if topics.get(
                                first_node
                            ) is None else insertInList(topics.get(first_node), form)
            if "m" in tt:
                money_expression_list = response.getMoneyExpressions()
                if money_expression_list:
                    [
                        insertInList(
                            topics.get("quantity"), str(response.getTopicForm(money))
                        )
                        for money in money_expression_list
                    ]
            if "n" in tt:
                quantity_expression_list = response.getQuantityExpressions()
                if quantity_expression_list:
                    [
                        insertInList(
                            topics.get("quantity"), str(response.getTopicForm(quantity))
                        )
                        for quantity in quantity_expression_list
                    ]
        else:
            if isBlockingErrorType(response.getStatusCode()):
                raise ValueError(
                    "Something went wrong in the MeaningCloud request!: ("
                    + response.getStatusCode()
                    + ") "
                    + response.getStatusMsg()
                )
            else:
                print(
                    "Oops! The request to Topics Extraction for text #"
                    + str(index_count)
                    + " was not succesful: ("
                    + response.getStatusCode()
                    + ") "
                    + response.getStatusMsg()
                )
                topics = {
                    "person": "ERROR ("
                    + response.getStatusCode()
                    + "): "
                    + response.getStatusMsg(),
                    "organization": "",
                    "location": "",
                    "product": "",
                    "id": "",
                    "event": "",
                    "other": "",
                    "quantity": "",
                }

    except ValueError as e:
        raise ValueError(str(e))

    index_count += 1
    return pd.Series(topics)