def load(chunk_len=200, debug=False): print source_slug print name_slug bulk = Bulk(source_slug, name_slug, chunk_len) lines = get_lines(slug) for line in lines: word, desc = line.split('#') word = prettify(word) desc = prettify(desc) if not check_word(word, debug): # "-", "." and " " continue bulk.add(word, desc) bulk.process()
def load(chunk_len=200, debug=False): for source_slug, name_slug in slugs: print source_slug print name_slug bulk = Bulk(source_slug, name_slug, chunk_len) slug = "%s_%s" % (source_slug, name_slug) lines = get_lines(slug) for value in lines: value = prettify(value, remove_dot=False) if not check_word(value, debug): # "-" and "." continue bulk.add(value) bulk.process()
def load(chunk_len=200, debug=False): print source_slug print name_slug bulk = Bulk(source_slug, name_slug, chunk_len) lines = get_lines(slug) for line in lines: word, desc = line.split('#', 1) # todo: не покрывает два случая: # - Господин# (ж. р. госпожа)#, владелец, ... # - Приходить# (прийти# ), прибыть, ... # todo: c запятыми можно отдельно повозиться: # - аутентичный (автентичный, отентичный) # - барон, баронет # - бросать деньги (за окно, на ветер) word = prettify(word).upper() desc = prettify(remove_begin(desc, [',', '||']).strip()) if not check_word(word, debug): # "-", "," and " " continue bulk.add(word, desc) bulk.process()