def generate_normalized_aliases(name, aliases, type): aliases = [normalize_name(name)] + [normalize_name(alias) for alias in aliases] if type == "Person": aliases.extend( [alias.split()[-1] for alias in aliases] ) return list(set(aliases))
def get_indexes(input_str, tree_dict): categories = [''.join(M.lemmatize(i)).strip() for i in input_str.split(',')] indxs = [] for category in categories: category_split = normalize_name(category).split() previous_step = None cur_step = tree_dict for category_part in category_split: if category_part in cur_step: previous_step = cur_step cur_step = cur_step[category_part] else: if cur_step.get("$VAL$"): indxs.append(cur_step.get("$VAL$")) if category_part in tree_dict: previous_step = None cur_step = tree_dict if category_part in cur_step: previous_step = cur_step cur_step = cur_step[category_part] if cur_step.get("$VAL$"): indxs.append(cur_step.get("$VAL$")) previous_step = None cur_step = tree_dict # ind = categ_dict.get(normalize_name(category)) # if ind: # indxs.append(ind) return indxs
def is_response_correct(track_artist_aliases, track_name_aliases, query_text): query_text = normalize_name(query_text).split() is_artist_good = any( [match(query_text, alias) for alias in track_artist_aliases] ) is_track_good = any(match(query_text, alias) for alias in track_name_aliases) return is_artist_good, is_track_good
#!/usr/bin/python # -*- coding: utf-8 -* import json import common objs = json.load(open('../data/ly_info.json')) for ly_legislator in objs: common.normalize_name(ly_legislator) dump_data = json.dumps(objs, sort_keys=True, ensure_ascii=False) common.write_file(dump_data, '../data/ly_info.json') dump_data = json.dumps(objs, sort_keys=True, indent=4, ensure_ascii=False) common.write_file(dump_data, '../data/pretty_format/ly_info.json') empty_term_start = [(legislator["ad"], legislator["name"], legislator["links"]["ly"]) for legislator in objs if not legislator.has_key("term_start")] dump_data = json.dumps(empty_term_start, sort_keys=True, indent=4, ensure_ascii=False) common.write_file(dump_data, '../log/term_start_empty_on_lygovtw.json')
def file_to_dict(file): type_to_id = {normalize_name(row[1]): row[0] for row in csv.reader(open(file))} id_to_type = {v: k for k, v in type_to_id.items()} normalize = {''.join(M.lemmatize(key)).strip():type_to_id[key] for key in type_to_id} type_to_id.update(normalize) return type_to_id, id_to_type
def make_track_name_aliases(param): return [normalize_name(param)]
return dict_out def merge_dicts(dict_list_id_sorted): output = [] pre_dict_item = dict_list_id_sorted[0] same_id_term = [dict_per_ad(pre_dict_item)] for dict_item in dict_list_id_sorted[1:]: if dict_item["uid"] == pre_dict_item["uid"]: same_id_term.append(dict_per_ad(dict_item)) else: output.append({"uid": pre_dict_item["uid"], "name": pre_dict_item["name"], "former_names": pre_dict_item["former_names"], "each_term": same_id_term}) same_id_term = [dict_per_ad(dict_item)] pre_dict_item = dict_item output.append({"uid": pre_dict_item["uid"], "name": pre_dict_item["name"], "former_names": pre_dict_item["former_names"], "each_term": same_id_term}) return output objs = json.load(open('../data/npl_ly.json')) for npl_legislator in objs: common.normalize_name(npl_legislator) if not npl_legislator.get('elected_party'): npl_legislator['elected_party'] = npl_legislator['party'] dump_data = json.dumps(objs, sort_keys=True, ensure_ascii=False) common.write_file(dump_data, '../data/npl_ly.json') dump_data = json.dumps(objs, sort_keys=True, indent=4, ensure_ascii=False) common.write_file(dump_data, '../data/pretty_format/npl_ly.json') merged_npl = merge_dicts(sorted(objs, key=lambda d: [d["uid"], d['ad']])) dump_data = json.dumps(merged_npl, sort_keys=True, indent=4, ensure_ascii=False) common.write_file(dump_data, '../data/pretty_format/npl_ly(same_id_in_one_dict).json') dump_data = json.dumps(merged_npl, sort_keys=True) common.write_file(dump_data, '../data/npl_ly(same_id_in_one_dict).json')
#!/usr/bin/python # -*- coding: utf-8 -* import json import common objs = json.load(open('../data/ly_info.json')) for ly_legislator in objs: common.normalize_name(ly_legislator) dump_data = json.dumps(objs) common.write_file(dump_data, '../data/ly_info.json') dump_data = json.dumps(objs, sort_keys=True, indent=4, ensure_ascii=False) common.write_file(dump_data, '../data/pretty_format/ly_info.json') empty_term_start = [(legislator["ad"], legislator["name"], legislator["links"]["ly"]) for legislator in objs if not legislator.has_key("term_start")] dump_data = json.dumps(empty_term_start, sort_keys=True, indent=4, ensure_ascii=False) common.write_file(dump_data, '../log/term_start_empty_on_lygovtw.json')
import sys from common import normalize_name from parse_request import try_get_song_from_db from parse_response import match __author__ = 'jambo' for query in sys.stdin: track_id = int(query.strip()) track = try_get_song_from_db(track_id) if not track: print("TEXT\tЯ ничего не знаю про этот трек") exit() if track.artists[0].photo: print("IMG\t" + track.artists[0].photo) if track.artists[0].best_tracks: best_tracks = ["загаданный трек" if any([match(normalize_name(best_track).split(), alias) for alias in track.track_name_aliases]) else best_track for best_track in track.artists[0].best_tracks] print("TEXT\t{}".format("Самые известные треки данного исполнителя: {}".format(", ".join(best_tracks)))) if not track.artists[0].photo and not track.artists[0].best_tracks: print("TEXT\tУ меня нет подсказок про данного артиста") print("ANS\tОтвет: {} – «{}»".format(track.artists[0].name, track.track_name))