def analyzeText(text, model, num_cats): global index_count print("Classifying text #%s" % str(index_count)) # this is where we are going to store our results formatted_categories = "" try: # We are going to make a request to the Deep Categorization API request = meaningcloud.DeepCategorizationRequest( license_key, model=model, txt=text, server=server ) setRequestSource(request) response = meaningcloud.DeepCategorizationResponse(request.sendReq()) if response.isSuccessful(): categories = response.getCategories() formatted_categories = [ response.getCategoryLabel(cat) for cat in categories[:num_cats] ] else: if isBlockingErrorType(response.getStatusCode()): raise ValueError( "Something went wrong in the MeaningCloud request!: (" + response.getStatusCode() + ") " + response.getStatusMsg() ) else: print( "Oops! The request to Deep Categorization for text #" + str(index_count) + " was not succesful: (" + response.getStatusCode() + ") " + response.getStatusMsg() ) formatted_categories = [ "ERROR (" + response.getStatusCode() + "): " + response.getStatusMsg() ] except ValueError as e: raise ValueError(str(e)) index_count += 1 formatted_categories = formatted_categories + [""] * ( num_cats - len(formatted_categories) ) return pd.Series(formatted_categories)
def analyzeText(text): global index_count print("Identifying language for text #%s" % str(index_count)) # this is where we are going to store our results language_code = "" language_name = "" try: # We are going to make a request to the Language Identification API request = meaningcloud.LanguageRequest(license_key, txt=text, server=server) setRequestSource(request) response = meaningcloud.LanguageResponse(request.sendReq()) if response.isSuccessful(): lang = response.getFirstLanguage() if lang: language_code = response.getLanguageCode(lang) language_name = response.getLanguageName(lang) else: if isBlockingErrorType(response.getStatusCode()): raise ValueError( "Something went wrong in the MeaningCloud request!: (" + response.getStatusCode() + ") " + response.getStatusMsg() ) else: print( "Oops! The request to Language Identification for text #" + str(index_count) + " was not succesful: (" + response.getStatusCode() + ") " + response.getStatusMsg() ) language_code = ( "ERROR (" + response.getStatusCode() + "): " + response.getStatusMsg() ) except ValueError as e: raise ValueError(str(e)) index_count += 1 return pd.Series([language_code, language_name])
def analyzeText(text, lang): global index_count print("Analyzing sentiment for text #%s" % str(index_count)) # this is where we are going to store our results polarity = "" subjectivity = "" irony = "" agreement = "" confidence = "" try: # We are going to make a request to the Sentiment Analysis API request = meaningcloud.SentimentRequest(license_key, lang=lang, txt=text, server=server) setRequestSource(request) response = meaningcloud.SentimentResponse(request.sendReq()) if response.isSuccessful(): polarity = response.scoreTagToString(response.getGlobalScoreTag()) subjectivity = response.getSubjectivity() irony = response.getIrony() agreement = response.getGlobalAgreement() confidence = response.getGlobalConfidence() else: if isBlockingErrorType(response.getStatusCode()): raise ValueError( "Something went wrong in the MeaningCloud request!: (" + response.getStatusCode() + ") " + response.getStatusMsg()) else: print("Oops! The request to Sentiment Analysis for text #" + str(index_count) + " was not succesful: (" + response.getStatusCode() + ") " + response.getStatusMsg()) polarity = ("ERROR (" + response.getStatusCode() + "): " + response.getStatusMsg()) except ValueError as e: raise ValueError(str(e)) index_count += 1 return pd.Series([polarity, subjectivity, irony, agreement, confidence])
def analyzeText(text): global index_count print("Extracting summary for text #%s" % str(index_count)) # this is where we are going to store our results summary = "" try: # We are going to make a request to the Summarization API request = meaningcloud.SummarizationRequest(license_key, sentences=sentences, txt=text, server=server) setRequestSource(request) response = meaningcloud.SummarizationResponse(request.sendReq()) if response.isSuccessful(): summary = response.getSummary() else: if isBlockingErrorType(response.getStatusCode()): raise ValueError( "Something went wrong in the MeaningCloud request!: (" + response.getStatusCode() + ") " + response.getStatusMsg()) else: print("Oops! The request to Summarization for text #" + str(index_count) + " was not succesful: (" + response.getStatusCode() + ") " + response.getStatusMsg()) summary = ("ERROR (" + response.getStatusCode() + "): " + response.getStatusMsg()) except ValueError as e: raise ValueError(str(e)) index_count += 1 return pd.Series([summary])
def analyzeText(text, language, threshold, tt, ud): global index_count print("Extracting topics for text #%s" % str(index_count)) # this is where we are going to store our results topics = { "person": [], "organization": [], "location": [], "product": [], "id": [], "event": [], "other": [], "quantity": [], } try: # We are going to make a request to the Topics Extraction API request = meaningcloud.TopicsRequest( license_key, txt=text, lang=language, topicType=tt, server=server, otherparams={"ud": ud}, ) setRequestSource(request) response = meaningcloud.TopicsResponse(request.sendReq()) if response.isSuccessful(): if "e" in tt: entity_list = response.getEntities() if entity_list: for entity in entity_list: if int(response.getTopicRelevance(entity)) >= threshold: first_node = response.getTypeFirstNode( response.getOntoType(entity) ).lower() form = str(response.getTopicForm(entity)) insertInList(topics.get("other"), form) if topics.get( first_node ) is None else insertInList(topics.get(first_node), form) if "c" in tt: concept_list = response.getConcepts() if concept_list: for concept in concept_list: if int(response.getTopicRelevance(concept)) >= threshold: first_node = response.getTypeFirstNode( response.getOntoType(concept) ).lower() form = str(response.getTopicForm(concept)) insertInList(topics.get("other"), form) if topics.get( first_node ) is None else insertInList(topics.get(first_node), form) if "m" in tt: money_expression_list = response.getMoneyExpressions() if money_expression_list: [ insertInList( topics.get("quantity"), str(response.getTopicForm(money)) ) for money in money_expression_list ] if "n" in tt: quantity_expression_list = response.getQuantityExpressions() if quantity_expression_list: [ insertInList( topics.get("quantity"), str(response.getTopicForm(quantity)) ) for quantity in quantity_expression_list ] else: if isBlockingErrorType(response.getStatusCode()): raise ValueError( "Something went wrong in the MeaningCloud request!: (" + response.getStatusCode() + ") " + response.getStatusMsg() ) else: print( "Oops! The request to Topics Extraction for text #" + str(index_count) + " was not succesful: (" + response.getStatusCode() + ") " + response.getStatusMsg() ) topics = { "person": "ERROR (" + response.getStatusCode() + "): " + response.getStatusMsg(), "organization": "", "location": "", "product": "", "id": "", "event": "", "other": "", "quantity": "", } except ValueError as e: raise ValueError(str(e)) index_count += 1 return pd.Series(topics)