def load(chunk_len=200, debug=False): print source_slug print name_slug bulk = Bulk(source_slug, name_slug, chunk_len) lines = get_parts(slug, '\n ') for desc in lines:#[:1000]: desc = prettify(join_lines(desc)) words = re.findall(u'^([-А-ЯЁ\d][-А-ЯЁ\d\s?]*)\W', desc, re.UNICODE) if not words: bulk.append_desc(desc) continue word = prettify(words[0], encoding=False) if not check_word(word, debug): continue bulk.add(word, desc) bulk.process()