def add_text_features(self):
        """
        function: add_text_features

        arguments: self
        returns: None 

        This function processes the "description" feature of the data. It stores the 
        output in spares matrix format in the variables self.train_desc_counts and self.test_desc_counts.
        If self.tfidf is True, then we perform tfidf anlysis. If self.feature_hash_n > -1, then 
        we hash the text down.

        """
        # perform a concatenation of the datasets
        total_data = pd.DataFrame(self.train_data.append(self.test_data))

        # compile a regex to look for html tags to remove
        htmlregex = re.compile(r'<.*?>')

        # get a list of the descriptions of the apartments
        # in which all HTML tags have been removed
        descriptions = [
            re.sub(htmlregex, ' ', text)
            for text in list(total_data['description'])
        ]
        del total_data

        # map put the descriptions in a One hot encoding
        cv = CountVectorizer()
        desc_counts = cv.fit(descriptions).transform(descriptions)
        print "Num unique words: " + str(len(cv.vocabulary_))

        if self.tfidf and (not self.feature_hash_n == -1):
            raise Exception(
                '\n\nError: cannot perform both tfidf and feature hashing.')

        # perform tf-idf analysis
        if self.tfidf:
            desc_counts = TfidfTransformer().fit_transform(desc_counts)

        # perform text hashing
        if not self.feature_hash_n == -1:
            ds = [
                dict(
                    zip(str(desc_counts.getrow(i).indices),
                        desc_counts.getrow(i).data))
                for i in xrange(desc_counts.shape[0])
            ]
            desc_counts = FeatureHasher(
                n_features=self.feature_hash_n).transform(ds)

        self.train_desc_counts = desc_counts[:self.train_n, :]
        self.test_desc_counts = desc_counts[self.train_n:, :]