Python strip_stopwords Examples

Programming Language: Python

Namespace/Package Name: django.utils.stopwords

Method/Function: strip_stopwords

Examples at hotexamples.com: 5

Python strip_stopwords - 5 examples found. These are the top rated real world Python examples of django.utils.stopwords.strip_stopwords extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: clustering.py Project: azizur77/smartwebapps

def jaccard_distance(item1, item2):
    """
    A simple distance function (curse of dimentionality applies)
    distance(A, B) = 1 - n(A intersection B) / n(A union B) or
    distance(A, B) = 1 - n(A intersection B) / n(A) + n(B) - n(A intersection B)
    text1 and text2 are our features
    """
    feature1 = set(re.findall('\w+', strip_stopwords("%s %s" % (item1.title.lower(), item1.body.lower())))[:100])
    feature2 = set(re.findall('\w+', strip_stopwords("%s %s" % (item2.title.lower(), item2.body.lower())))[:100])

    if len(feature1) == 0 and len(feature2) == 0:
        return 1# max distance
    similarity = 1.0*len(feature1.intersection(feature2))/len(feature1.union(feature2))
    return 1 - similarity

Example #2

Show file

File: lang.py Project: hensom/fancyashow

def normalize(text):
  if not text:
    text = ''

  text = strip_stopwords(text)

  return NOT_WORD_NUM_RE.sub('', text)

Example #3

Show file

File: managers.py Project: pombredanne/django-avocado

def _tokenize(search_str):
    "Strips stopwords and tokenizes search string if not already a list."
    if isinstance(search_str, basestring):
        # TODO determine appropriate characters that should be retained
        sanitized_str = re.sub('[^\w\s]+', '', search_str)
        cleaned_str = stopwords.strip_stopwords(sanitized_str)
        toks = cleaned_str.split()
    else:
        toks = list(search_str)
    return toks

Example #4

Show file

def jaccard_distance(item1, item2):
    """
    A simple distance function (curse of dimentionality applies)
    distance(A, B) = 1 - n(A intersection B) / n(A union B) or
    distance(A, B) = 1 - n(A intersection B) / n(A) + n(B) - n(A intersection B)
    text1 and text2 are our features
    """
    feature1 = set(
        re.findall(
            '\w+',
            strip_stopwords("%s %s" %
                            (item1.title.lower(), item1.body.lower())))[:100])
    feature2 = set(
        re.findall(
            '\w+',
            strip_stopwords("%s %s" %
                            (item2.title.lower(), item2.body.lower())))[:100])

    if len(feature1) == 0 and len(feature2) == 0:
        return 1  # max distance
    similarity = 1.0 * len(feature1.intersection(feature2)) / len(
        feature1.union(feature2))
    return 1 - similarity

Example #5

Show file

File: custom_format_for_display.py Project: wcirillo/ten

def build_slug(slug):
    """ Builds a slug and strips all stopwords from a slug and builds it again.
    """
    slug = strip_stopwords(slug)
    slug = slugify(slug)
    words = slug.split('-')
    sentence = []
    for word in words:
        try:
            float(word)
        except ValueError:
            sentence.append(word)
    slug = u'-'.join(sentence) 
    return slug