def test_too_long_string(self): testString = 'Lorem ipsum dolor sit amet, consectetur adipiscing' \ ' elit. Suspendisse non risus risus amet.' truncatedTestString = 'Lorem ipsum dolor sit amet, consectetur' \ ' adipiscing elit. Suspendisse non risu...' self.assertEquals(truncate_text(testString), truncatedTestString)
def parse(self, data_string): """Parses input data and returns a dict of result data""" start_time = time.time() results = [] threads = [] # Creating/starting a thread for each parser module for module in self.config.enabled_modules: thread = ParserThread(self.config, module, data_string) thread.start() threads.append(thread) # Synchronizing/finishing parser threads for thr in threads: thr.join() # The threads are done, let's get the results out of them for thr in threads: results.extend(thr.results) # Unique list of all major types types = list(set([result.type for result in results])) if results: # Getting a unique list of result types. all_types = [] for res in results: all_types.extend([res.type, res.subtype]) # Hierarchical Confidence Normalization normalizer_chain = HierarchicalNormalizerChain( self.config, types, list(set(all_types)) ) results = normalizer_chain.normalize(results) # Sorting our results by confidence value results = sorted( results, key=lambda result: result.confidence, reverse=True ) return { 'query': truncate_text(data_string), 'date': datetime.datetime.now(), 'execution_seconds': time.time() - start_time, 'top': results[0] if len(results) > 0 else None, 'results': { 'count': len(results), 'types': types, 'matches': results } }
def parse(self, data_string): """ Parses input data and returns a dict of result data :param data_string: the string we want to parse :type data_string: str :return: yields parse result data if there is any :rtype: dict """ start_time = time.time() results = [] threads = [] # Creating/starting a thread for each parser module for module in self.config.enabled_modules: thread = ParserThread(self.config, module, data_string) thread.start() threads.append(thread) # Synchronizing/finishing parser threads for thr in threads: thr.join() # The threads are done, let's get the results out of them for thr in threads: results.extend(thr.results) # Unique list of all major types types = list(set([result.type for result in results])) if results: # Getting a unique list of result types. all_types = [] for res in results: all_types.extend([res.type, res.subtype]) # Hierarchical Confidence Normalization normalizer_chain = HierarchicalNormalizerChain( self.config, types, list(set(all_types)) ) results = normalizer_chain.normalize(results) # Sorting our results by confidence value results = sorted( results, key=lambda result: result.confidence, reverse=True ) return { 'query': truncate_text(data_string), 'date': datetime.datetime.now(), 'execution_seconds': time.time() - start_time, 'top': results[0] if len(results) > 0 else None, 'results': { 'count': len(results), 'types': types, 'matches': results } }
def test_short_limit(self): self.assertEquals(truncate_text(self.testString, 10), 'The qui...')
def test_too_short_string(self): self.assertEquals(truncate_text(self.testString), self.testString)