def get_authors_by_venue(cached_list, cached_set, cdblp_venue, dblp_venue): d = DBLPQuery.get_cache('cdblp-pub-cache.data') if not d.__contains__(cdblp_venue.get('title')): print('This C-DBLP venue is not on file.') return res = urlopen('http://www.dblp.org/search/api/?q=ce:venue:{}:*&h=750&format=json'.format(dblp_venue.get('title').lower())) # fix titles as { "Title ..." } fixed_json = re.compile('({\s*)(".+")(\s*})').sub(lambda m: m.group(2), res.read().decode('utf-8')) # get publications cdblp_pubs = d.get(cdblp_venue.get('title')) dblp_pubs = json.loads(fixed_json) cdblp_authors = set() dblp_authors = set() authors = dict() #print(type(cdblp_pubs)) #print(cdblp_pubs.keys()) for ky in cdblp_pubs.keys(): for ki in cdblp_pubs.get(ky).keys(): for pub in cdblp_pubs.get(ky).get(ki): for author in pub.get('authors'): cdblp_authors.add(author) for pub in dblp_pubs.get('result').get('hits').get('hit'): try: for author in pub.get('info').get('authors').get('author'): dblp_authors.add(author) except AttributeError: print('PublicationException: %s' % pub.get('@id')) pinyin = PinYin() pinyin.load_word() for author in cdblp_authors: name_comp = CDBLPAuthor.get_english_name(author, pinyin) if name_comp['full_name'] in dblp_authors: if authors.__contains__(name_comp['full_name']): authors[name_comp['full_name']]['zh'] = name_comp['zh'] authors[name_comp['full_name']]['count'] += 1 else: authors[name_comp['full_name']] = { 'zh': name_comp['zh'], 'count': 1 } elif len(author) == 3 and authors.__contains__(name_comp['full_name_dash']): if authors.__contains__(name_comp['full_name_dash']): authors[name_comp['full_name_dash']]['zh'] = name_comp['zh'] authors[name_comp['full_name_dash']]['count'] += 1 else: authors[name_comp['full_name_dash']] = { 'zh': name_comp['zh'], 'count': 1 } return authors