Beispiel #1
0
 def fit(self, service_list, transformer=None):
     for document in service_list:
         if transformer:
             word_bag = transformer.transform(document)
         else:
             word_bag = WordBag().load_from_file(
                 join(self._corpus_path,
                      self._get_document_filename(document)))
         word_list = word_bag.get_words_list()
         self._fit(word_list)
Beispiel #2
0
 def transform(self, data):
     """'Parses the data extracting words'"""
     return WordBag(
         re.sub(
             ' +', ' ',
             re.sub("([a-z])([A-Z])", "\g<1> \g<2>",
                    re.sub(' +', ' ', re.sub('[^a-zA-Z ]', ' ',
                                             data)))).split(' '))
Beispiel #3
0
 def transform(self, wordbag):
     word_list = list(wordbag.get_words_list())
     processed = set()
     word_str = wordbag.get_words_str().lower()
     for word in word_list:
         if word.lower() not in processed:
             if word.lower() in self._synonyms:
                 word_str = word_str.replace(word.lower(),
                                             self._synonyms[word.lower()])
             processed.add(word.lower())
     return WordBag(word_str.split(' '))
Beispiel #4
0
 def publish_services(self, service_list):
     transformer = self._create_document_transformer(service_list)
     documents = []
     current_document = 1
     if self._dimensionality_reduction:
         if self._load_corpus_from_file:
             self._document_transformer.get_transformer1().fit(
                 service_list, transformer)
         else:
             self._document_transformer.get_transformer1().fit(service_list)
     for document in service_list:
         print(('Loading document ' + str(current_document) + ' of ' +
                str(len(service_list))))
         if self._load_corpus_from_file:
             if self._document_expansion:
                 bag = WordBag().load_from_file(
                     join(self._corpus_path,
                          self._get_document_filename(document)))
                 bag_of_words = self._document_transformer.transform(bag)
             else:
                 bag_of_words = WordBag().load_from_file(
                     join(self._corpus_path,
                          self._get_document_filename(document)))
         else:
             if self._document_expansion:
                 bag_of_words = self._document_transformer.transform(
                     transformer.transform(document))
             else:
                 bag_of_words = transformer.transform(document)
         if self._save_corpus:
             bag_of_words.save_to_file(
                 join(self._corpus_path,
                      self._get_document_filename(document)))
         documents.append(self._preprocess(bag_of_words))
         self._service_array.append(document)
         current_document += 1
     self._after_publish(documents)
Beispiel #5
0
 def transform(self, url):
     """Transform a wsdl file into a string"""
     self._reader.load_from_url(url)
     return WordBag(self._process_service())
Beispiel #6
0
 def transform(self, url):
     """Transform a wadl file into a string"""
     self._reader.load_from_url(url)
     return WordBag(self._process_application())