def create_lastname_list_from_personid(): ''' This function generates a dictionary from a last name to list of personids which have this lastname. ''' # ((personid, [full Name1], Nbibs) ... ) all_names = get_all_names_from_personid() # ((personid, last_name, Nbibs) ... ) all_names = ((row[0], generate_last_name_cluster_str(iter(row[1]).next()), row[2]) for row in all_names) # { (last_name, [(personid)... ], Nbibs) ... } all_names = groupby(sorted(all_names, key=itemgetter(1)), key=itemgetter(1)) all_names = ((key, list(data)) for key, data in all_names) all_names = ((key, map(itemgetter(0), data), sum(x[2] for x in data)) for key, data in all_names) return all_names
def create_lastname_list_from_personid(): ''' This function generates a dictionary from a last name to list of personids which have this lastname. ''' # ((personid, fulL Name1) ... ) all_names = get_all_names_from_personid() # ((personid, last_name) ... ) artifact_removal = re.compile("[^a-zA-Z0-9]") all_names = tuple((row[0], artifact_removal.sub("", split_name_parts(row[1].decode('utf-8'))[0]).lower()) for row in all_names) # { (last_name : [personid ... ]) ... } ret = {} for pair in all_names: ret[pair[1]] = ret.get(pair[1], []) + [pair[0]] return ret
def create_lastname_list_from_personid(): ''' This function generates a dictionary from a last name to list of personids which have this lastname. ''' # ((personid, fulL Name1) ... ) all_names = get_all_names_from_personid() # ((personid, last_name) ... ) artifact_removal = re.compile("[^a-zA-Z0-9]") all_names = tuple((row[0], artifact_removal.sub("", split_name_parts(row[1].decode('utf-8'))[0]).lower()) for row in all_names) # { (last_name : [personid ... ]) ... } ret = {} for pair in all_names: ret[pair[1]] = ret.get(pair[1], []) + [pair[0]] return ret