def Adv_Disadv(self,input_data): """ """ i = self.counter temp = [] final_temp = [] try: internal_list_temp = ast.literal_eval(input_data) for j in range(len(internal_list_temp)): ##################### Text Normalizer ######################### if internal_list_temp[j] != '' and internal_list_temp[j] != "\r": try: baseUrl = "http://api.text-mining.ir/api/" url = baseUrl + "PreProcessing/NormalizePersianWord" payload = u'{{\"text\":\"{}\", \"refineSeparatedAffix\":true}}'.format(internal_list_temp[j]) temp=callApi(url, payload, tokenKey) except Exception as e: print("The Error is : ", e) print("Error on iteration: {0} function: {1} ".format(i,"Text Normalizer")) self.ErrorListIndex.append(i) continue try: url = baseUrl + "TextRefinement/FormalConverter" payload = u'\"{0}\"'.format(temp) temp = callApi(url, payload, tokenKey) except Exception as e: print("The Error is : ", e) print("Error on iteration: {0} function: {1} ".format(i,"Slang to Formal Converter")) ErrorListIndex.append(i) # ################ Sentence Tokenizer ################## try: url = baseUrl + "PreProcessing/SentenceSplitter" payload = u'''{\"text\":\"%s\", \"checkSlang\": true, \"normalize\": true, \"normalizerParams\": { \"text\": \"don't care\", \"RefineQuotationPunc \": false }, \"complexSentence\": true }'''%temp temp = callApi(url, payload, tokenKey) temp = ast.literal_eval(temp) except Exception as e: print("The Error is : ", e) print("Error on iteration: {0} function: {1} ".format(i,"Sentence Tokenizer")) self.ErrorListIndex.append(i) continue for l in range(len(temp)): final_temp.append(temp[l]) except Exception as e: print("The Error is : ", e) print("Error on iteration: {0} function: {1} ".format(i,"Adv_Disadv ")) self.ErrorListIndex.append(i) final_temp.append('') return final_temp
def Comment_Cleaner(self, str_data): """ """ i = self.counter temp = [] ##################### Text Normalizer ######################### try: baseUrl = "http://api.text-mining.ir/api/" url = baseUrl + "PreProcessing/NormalizePersianWord" payload = u'{{\"text\":\"{}\", \"refineSeparatedAffix\":true}}'.format(str_data) temp=callApi(url, payload, tokenKey) except Exception as e: print("The Error is : ", e) print("Error on iteration: {0} function: {1} ".format(i,"Text Normalizer")) self.ErrorListIndex.append(i) # ################ Call Sentence Splitter ################## try: url = baseUrl + "TextRefinement/FormalConverter" payload = u'\"{0}\"'.format(temp) temp = callApi(url, payload, tokenKey) except Exception as e: print("The Error is : ", e) print("Error on iteration: {0} function: {1} ".format(i,"Slang to Formal Converter")) ErrorListIndex.append(i) try: url = baseUrl + "PreProcessing/SentenceSplitter" payload = u'''{\"text\":\"%s\", \"checkSlang\": true, \"normalize\": true, \"normalizerParams\": { \"text\": \"don't care\", \"RefineQuotationPunc \": false }, \"complexSentence\": true }'''%format(temp) temp = callApi(url, payload, tokenKey) except Exception as e: print("The Error is : ", e) print("Error on iteration: {0} function: {1} ".format(i,"Slang to Formal Converter")) self.ErrorListIndex.append(i) return temp
def PoS_Extractor(self, input_data): """ """ try: url = baseUrl + "PosTagger/GetPos" temp_list = [] new_list = list(input_data) for i in range(len(new_list)): sentence_POS = [] payload = u'\"{0}\"'.format(new_list[i]) result = json.loads(callApi(url, payload, tokenKey)) for phrase in result: sentence_POS.append("("+phrase['word']+","+phrase['tags']['POS']['item1']+")") temp_list.append(sentence_POS) except Exception as e: print("The Error is : ", e) print("Error on iteration: {0} function: {1} ".format(self.counter,"PoS extractor")) self.ErrorListIndex.append(i) return temp_list