コード例 #1
0
def write_to_file(disaster_name, single_disaster_data, dest_file, process,
                  stopword_handler):

    with open(dest_file, 'wb') as f:
        spamwriter = csv.writer(f)
        for instance_name in single_disaster_data:
            for file_id in single_disaster_data[instance_name]:
                file_text = single_disaster_data[instance_name][file_id]
                if process:
                    file_text = stopword_handler.remove_stopwords(file_text)
                    new_text = ""
                    for term in re.findall("\w+", file_text.lower()):
                        new_text += " %s" % (stem(term))
                    file_text = new_text
                file_id = "%s_%s" % (instance_name, file_id)
                labels = " ".join([disaster_name, instance_name])
                spamwriter.writerow([file_id, labels, file_text])
コード例 #2
0
ファイル: model.py プロジェクト: czt616/myUtility
    def __update_stemmed_model(self,
                               text_string=None,
                               text_list=None,
                               text_dict=None,
                               input_stemmed=False):
        #if not self.validate_input(text_string,text_list,text_dict):
        #    raise  TooManyInput

        if text_string:
            # for w in re.findall("\w+",text_string.lower()) :
            #     if not input_stemmed:
            #         w = stem(w)
            #     if w not in self._model:
            #         self._model[w] = 0
            #     self._model[w] += 1
            temp_list = re.findall("\w+", text_string.lower())
            if not input_stemmed:
                temp_list = map(stem, temp_list)

            self._model.update(temp_list)

        if text_list:
            # for w in text_list:
            #     if not input_stemmed:
            #         w = stem(w.lower())
            #     if w not in self._model:
            #         self._model[w] = 0
            #     self._model[w] += 1
            if not input_stemmed:
                text_list = map(stem, text_list)

            self._model.update(text_list)

        if text_dict:
            for w in text_dict:
                if not input_stemmed:
                    stemmed_w = stem(w.lower())
                    self._model[stemmed_w] += text_dict[w]
                #if w not in self._model:
                #    self._model[w] = 0
                else:
                    self._model[w] += text_dict[w]

        self._normalized = False
コード例 #3
0
def do_stem(matchobj):
    return stem(matchobj.group(0))
コード例 #4
0
ファイル: other.py プロジェクト: czt616/myUtility
def do_stem(matchobj):
    return stem(matchobj.group(0))