def _get_all_possible_hits(text):
    
    ''' 
        Use regexes to match those sections of text that we want. 
        There are several kinds: the pro-regex (meaning do indeed match) and the anti-regex (meaning don't match).
        Both kinds are available for the raw text and then for the header of each hit.
    '''
    results = set()
    
    for regex, anti_regex in LPRC.get_document_parsing_regexes():
                                            
        for hit in re.finditer(regex, text):
            
            candidate = hit.group(0) 
                                        
            if anti_regex is not None and re.search(anti_regex, candidate):
                continue
                                                                                                                                            
            if not _check_whether_chunk_is_new_section(candidate):
                continue
                                        
            # legal proceeding is always mentioned very, very close to the start of the real section
            heading = ''.join(word for word in nltk.word_tokenize(candidate)[:10])
            
            valid_regexes, invalid_regexes = LPRC.good_patterns_and_bad_patterns_in_litigation_proceeding_headers()
            
            valid_regex_pass = True
            invalid_regex_pass = True
                        
            for valid_regex in valid_regexes:
                if not re.search(valid_regex, heading):
                    valid_regex_pass = False
                    break
            
            if valid_regex_pass:
                for invalid_regex in invalid_regexes:
                    if re.search(invalid_regex, heading):
                        invalid_regex_pass = False
                        break
            
            if invalid_regex_pass:
                results.add(candidate)
                
    return results
def _check_whether_chunk_is_new_section(hit):
    ''' 
        a checker to validate whether a given piece of context could
        conceivably be a real litigation mention and not just some detritus 
        picked up by the regexes from the table of contents or something
    '''
    # check to see whether it belongs to the table of contents
    for token in nltk.word_tokenize(hit):
        if re.match(LPRC.common_words_in_legitimate_legal_proceeding_hits(), token):
            return True
        
    return False