Beispiel #1
0
 def Adv_Disadv(self,input_data):
     """
     
     """
     i = self.counter 
     temp = []
     final_temp = []
     try:
         internal_list_temp = ast.literal_eval(input_data)
         for j in range(len(internal_list_temp)):
                 #####################      Text Normalizer       #########################
             if internal_list_temp[j] != '' and internal_list_temp[j] != "\r":
                 try:
                     baseUrl = "http://api.text-mining.ir/api/"
                     url =  baseUrl + "PreProcessing/NormalizePersianWord"
                     payload = u'{{\"text\":\"{}\", \"refineSeparatedAffix\":true}}'.format(internal_list_temp[j])
                     temp=callApi(url, payload, tokenKey)
                 except Exception as e: 
                     print("The Error is : ", e)
                     print("Error on iteration: {0} function: {1} ".format(i,"Text Normalizer"))
                     self.ErrorListIndex.append(i)
                     continue
                  try:
                     url =  baseUrl + "TextRefinement/FormalConverter"            
                     payload = u'\"{0}\"'.format(temp)
                     temp = callApi(url, payload, tokenKey)
                 except Exception as e: 
                     print("The Error is : ", e)
                     print("Error on iteration: {0} function: {1} ".format(i,"Slang to Formal Converter"))
                     ErrorListIndex.append(i)
         #         ################ Sentence Tokenizer ##################
                 try:
                     url =  baseUrl + "PreProcessing/SentenceSplitter"
                     payload = u'''{\"text\":\"%s\",
                             \"checkSlang\": true,
                             \"normalize\": true,
                             \"normalizerParams\": {
                             \"text\": \"don't care\",
                             \"RefineQuotationPunc \": false
                             },
                             \"complexSentence\": true
                             }'''%temp
                     temp = callApi(url, payload, tokenKey)
                     temp = ast.literal_eval(temp)
                 except Exception as e: 
                     print("The Error is : ", e)
                     print("Error on iteration: {0} function: {1} ".format(i,"Sentence Tokenizer"))
                     self.ErrorListIndex.append(i)
                     continue
                 for l in range(len(temp)):
                     final_temp.append(temp[l])
     except Exception as e:
         print("The Error is : ", e)
         print("Error on iteration: {0} function: {1} ".format(i,"Adv_Disadv "))
         self.ErrorListIndex.append(i)
         final_temp.append('')
     return final_temp
Beispiel #2
0
    def Comment_Cleaner(self, str_data):
        """
        
        """
        i = self.counter 
        temp = []
         #####################      Text Normalizer       #########################
        try:
            baseUrl = "http://api.text-mining.ir/api/"
            url =  baseUrl + "PreProcessing/NormalizePersianWord"
            payload = u'{{\"text\":\"{}\", \"refineSeparatedAffix\":true}}'.format(str_data)
            temp=callApi(url, payload, tokenKey)
        except Exception as e: 
            print("The Error is : ", e)
            print("Error on iteration: {0} function: {1} ".format(i,"Text Normalizer"))
            self.ErrorListIndex.append(i)                   
#        ################ Call Sentence Splitter ##################
        try:
            url =  baseUrl + "TextRefinement/FormalConverter"            
            payload = u'\"{0}\"'.format(temp)
            temp = callApi(url, payload, tokenKey)
        except Exception as e: 
            print("The Error is : ", e)
            print("Error on iteration: {0} function: {1} ".format(i,"Slang to Formal Converter"))
            ErrorListIndex.append(i)

            
        try:
            url =  baseUrl + "PreProcessing/SentenceSplitter"
            payload = u'''{\"text\":\"%s\",
                    \"checkSlang\": true,
                    \"normalize\": true,
                    \"normalizerParams\": {
                    \"text\": \"don't care\",
                    \"RefineQuotationPunc \": false
                    },
                    \"complexSentence\": true
                    }'''%format(temp)
            temp = callApi(url, payload, tokenKey)
            
        except Exception as e: 
            print("The Error is : ", e)
            print("Error on iteration: {0} function: {1} ".format(i,"Slang to Formal Converter"))
            self.ErrorListIndex.append(i)        
        return temp
Beispiel #3
0
 def PoS_Extractor(self, input_data):
     """
     
     """
     try:
         url =  baseUrl + "PosTagger/GetPos"
         temp_list = [] 
         new_list = list(input_data)
         for i in range(len(new_list)):
             sentence_POS = []
             payload = u'\"{0}\"'.format(new_list[i])                
             result = json.loads(callApi(url, payload, tokenKey))
             for phrase in result:
                 sentence_POS.append("("+phrase['word']+","+phrase['tags']['POS']['item1']+")")
             temp_list.append(sentence_POS)
     except Exception as e: 
         print("The Error is : ", e)
         print("Error on iteration: {0} function: {1} ".format(self.counter,"PoS extractor"))
         self.ErrorListIndex.append(i)
     return temp_list