Ejemplo n.º 1
0
def generate_normalized_aliases(name, aliases, type):
    aliases = [normalize_name(name)] + [normalize_name(alias) for alias in aliases]
    if type == "Person":
        aliases.extend(
            [alias.split()[-1] for alias in aliases]
        )
    return list(set(aliases))
Ejemplo n.º 2
0
def get_indexes(input_str, tree_dict):
    categories = [''.join(M.lemmatize(i)).strip() for i in input_str.split(',')]
    indxs = []
    for category in categories:
        category_split = normalize_name(category).split()
        previous_step = None
        cur_step = tree_dict
        for category_part in category_split:
            if category_part in cur_step:
                previous_step = cur_step
                cur_step = cur_step[category_part]
            else:
                if cur_step.get("$VAL$"):
                    indxs.append(cur_step.get("$VAL$"))
                    if category_part in tree_dict:
                        previous_step = None
                        cur_step = tree_dict
                if category_part in cur_step:
                    previous_step = cur_step
                    cur_step = cur_step[category_part]

        if cur_step.get("$VAL$"):
            indxs.append(cur_step.get("$VAL$"))
            previous_step = None
            cur_step = tree_dict
        # ind = categ_dict.get(normalize_name(category))
        # if ind:
        #     indxs.append(ind)
    return indxs
Ejemplo n.º 3
0
def is_response_correct(track_artist_aliases, track_name_aliases, query_text):
    query_text = normalize_name(query_text).split()
    is_artist_good = any(
        [match(query_text, alias) for alias in track_artist_aliases]
    )
    is_track_good = any(match(query_text, alias) for alias in track_name_aliases)
    return is_artist_good, is_track_good
Ejemplo n.º 4
0
#!/usr/bin/python
# -*- coding: utf-8 -*
import json
import common

objs = json.load(open('../data/ly_info.json'))
for ly_legislator in objs:
    common.normalize_name(ly_legislator)
dump_data = json.dumps(objs, sort_keys=True, ensure_ascii=False)
common.write_file(dump_data, '../data/ly_info.json')
dump_data = json.dumps(objs, sort_keys=True, indent=4, ensure_ascii=False)
common.write_file(dump_data, '../data/pretty_format/ly_info.json')
empty_term_start = [(legislator["ad"], legislator["name"],
                     legislator["links"]["ly"]) for legislator in objs
                    if not legislator.has_key("term_start")]
dump_data = json.dumps(empty_term_start,
                       sort_keys=True,
                       indent=4,
                       ensure_ascii=False)
common.write_file(dump_data, '../log/term_start_empty_on_lygovtw.json')
Ejemplo n.º 5
0
def file_to_dict(file):
    type_to_id = {normalize_name(row[1]): row[0] for row in csv.reader(open(file))}
    id_to_type = {v: k for k, v in type_to_id.items()}
    normalize = {''.join(M.lemmatize(key)).strip():type_to_id[key] for key in type_to_id}
    type_to_id.update(normalize)
    return type_to_id, id_to_type
Ejemplo n.º 6
0
def make_track_name_aliases(param):
    return [normalize_name(param)]
Ejemplo n.º 7
0
    return dict_out

def merge_dicts(dict_list_id_sorted):
    output = []
    pre_dict_item = dict_list_id_sorted[0]
    same_id_term = [dict_per_ad(pre_dict_item)]
    for dict_item in dict_list_id_sorted[1:]:
        if dict_item["uid"] == pre_dict_item["uid"]:
            same_id_term.append(dict_per_ad(dict_item))
        else:
            output.append({"uid": pre_dict_item["uid"], "name": pre_dict_item["name"], "former_names": pre_dict_item["former_names"], "each_term": same_id_term})
            same_id_term = [dict_per_ad(dict_item)]
        pre_dict_item = dict_item
    output.append({"uid": pre_dict_item["uid"], "name": pre_dict_item["name"], "former_names": pre_dict_item["former_names"], "each_term": same_id_term})
    return output

objs = json.load(open('../data/npl_ly.json'))
for npl_legislator in objs:
    common.normalize_name(npl_legislator)
    if not npl_legislator.get('elected_party'):
        npl_legislator['elected_party'] = npl_legislator['party']
dump_data = json.dumps(objs, sort_keys=True, ensure_ascii=False)
common.write_file(dump_data, '../data/npl_ly.json')
dump_data = json.dumps(objs, sort_keys=True, indent=4, ensure_ascii=False)
common.write_file(dump_data, '../data/pretty_format/npl_ly.json')
merged_npl = merge_dicts(sorted(objs, key=lambda d: [d["uid"], d['ad']]))
dump_data = json.dumps(merged_npl, sort_keys=True, indent=4, ensure_ascii=False)
common.write_file(dump_data, '../data/pretty_format/npl_ly(same_id_in_one_dict).json')
dump_data = json.dumps(merged_npl, sort_keys=True)
common.write_file(dump_data, '../data/npl_ly(same_id_in_one_dict).json')
Ejemplo n.º 8
0
#!/usr/bin/python
# -*- coding: utf-8 -*
import json
import common


objs = json.load(open('../data/ly_info.json'))
for ly_legislator in objs:
    common.normalize_name(ly_legislator)
dump_data = json.dumps(objs)
common.write_file(dump_data, '../data/ly_info.json')
dump_data = json.dumps(objs, sort_keys=True, indent=4, ensure_ascii=False)
common.write_file(dump_data, '../data/pretty_format/ly_info.json')
empty_term_start = [(legislator["ad"], legislator["name"], legislator["links"]["ly"]) for legislator in objs if not legislator.has_key("term_start")]
dump_data = json.dumps(empty_term_start, sort_keys=True, indent=4, ensure_ascii=False)
common.write_file(dump_data, '../log/term_start_empty_on_lygovtw.json')
Ejemplo n.º 9
0
import sys
from common import normalize_name
from parse_request import try_get_song_from_db
from parse_response import match

__author__ = 'jambo'


for query in sys.stdin:
    track_id = int(query.strip())
    track = try_get_song_from_db(track_id)
    if not track:
        print("TEXT\tЯ ничего не знаю про этот трек")
        exit()
    if track.artists[0].photo:
        print("IMG\t" + track.artists[0].photo)
    if track.artists[0].best_tracks:
        best_tracks = ["загаданный трек" if any([match(normalize_name(best_track).split(), alias) for alias in track.track_name_aliases]) else best_track for best_track in track.artists[0].best_tracks]
        print("TEXT\t{}".format("Самые известные треки данного исполнителя: {}".format(", ".join(best_tracks))))
    if not track.artists[0].photo and not track.artists[0].best_tracks:
        print("TEXT\tУ меня нет подсказок про данного артиста")
    print("ANS\tОтвет: {} – «{}»".format(track.artists[0].name, track.track_name))