def main(): args = parse_args() char_map = dict() for input_json in args.input_json: with io.open(input_json, "r", encoding="UTF-8-sig") as json_in: data = json.load(json_in) for char_name, char_obj in data.iteritems(): if "(" in char_name: continue char_name = char_name.replace("%27", "'") char_map[char_name] = profanity.clean(" ".join(char_obj["summary"])) with io.open(args.character_file, "w", encoding="UTF-8") as chars_out: for c in sorted(char_map.keys()): chars_out.write(c) chars_out.write("\n") if args.update_elasticsearch: auth = networking.get_aws_auth() es = networking.get_elasticsearch() # clear out the type first es.indices.create(index=private.ES_INDEX, ignore=400) requests.delete("/".join([private.ES_URL, private.ES_INDEX, "char_summary"]), auth=auth) elasticsearch.helpers.bulk(es, get_insert_actions(char_map, private.ES_INDEX, "char_summary"))
def test_bastard_adjective(self): uncleaned = "Barra is the bastard daughter of Robert I" cleaned = "Barra is the baseborn daughter of Robert I" self.assertEquals(profanity.clean(uncleaned), cleaned)
def test_bastard_adjective_caps(self): uncleaned = "Ser Walder Rivers, also known as Bastard Walder," cleaned = "Ser Walder Rivers, also known as baseborn Walder," self.assertEquals(profanity.clean(uncleaned), cleaned)
def test_bastard_noun(self): uncleaned = "Lord Walder Frey's bastards." cleaned = "Lord Walder Frey's baseborn children." self.assertEquals(profanity.clean(uncleaned), cleaned)
def test_bastard_adjective_2(self): uncleaned = "Garrett Flowers is a bastard son of Garth the Gross." cleaned = "Garrett Flowers is a baseborn son of Garth the Gross." self.assertEquals(profanity.clean(uncleaned), cleaned)