def sanitize_string_for_search(text_string, find_synonyms=True):

    porter = stem.porter.PorterStemmer()

    #Para evitar problemas de codificacion al anadir a solr
    text_string = text_string.strip().lower().decode('utf-8')

    #Para evitar errores al anadir a solr
    text_string = sw_utils.remove_control_characters(text_string)

    #Eliminando entities de html
    text_string = sw_utils.remove_ascii_codes_from_string(text_string)

    #Se quitan los acronimos de internet
    text_string = sw_utils.remove_internet_acronyms_from_string(text_string)

    #Se quitan las palabras cortas
    text_string = sw_utils.remove_small_words_from_string(text_string)

    #Se le aplican stopwords de ingles
    pattern = re.compile(r'\b(' + r'|'.join(stopwords.words('english')) + r')\b\s*')
    text_string = pattern.sub('', text_string)

    string_tokens = nltk.word_tokenize(text_string)
    if find_synonyms:
        text_string = find_synonyms_and_stem(string_tokens)
    else:
        text_string = " ".join([porter.stem(kw) for kw in string_tokens])
    #Se realiza el stemming de la cadena.
    #text_string = " ".join([porter.stem(kw) for kw in string_tokens])

    return text_string
def join_tweet_texts(tweets):
    print strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " Juntando texto de los tweets..."
    text = ''
    for tweet in tweets:
        text += sw_utils.remove_internet_acronyms_from_string(tweet[u'content_stemmed']) + ' . '
    return text