def run(self): print(self.dblp_urlpt, 'is created.') Data.result[self.dblp_urlpt] = DBLPQuery.author_match( self.dblp_urlpt, self.cdblp_author)['ratio'] Data.objects[self.dblp_urlpt] = DBLPQuery.author_match( self.dblp_urlpt, self.cdblp_author)['object'] Data.overlaps[self.dblp_urlpt] = DBLPQuery.author_match( self.dblp_urlpt, self.cdblp_author)['overlap'] print(self.dblp_urlpt, 'is done.')
def query(self, query_type, author1, author2, cdblp_venue, dblp_venue, submit): tmpl = lookup.get_template('query.html') cached_author_list = DBLPQuery.get_cached_authors() name_set = set(map(lambda a: a['full_name'], cached_author_list)).union(set(map(lambda a: a['zh'], cached_author_list))) if query_type == 'pub': result = get_publications_by_u(cached_author_list, name_set, author1) return tmpl.render(type=query_type, data=result, author=author1) elif query_type == 'coauthor': result = DBLPQuery.get_coauthors_by_author(cached_author_list, name_set, author1) return tmpl.render(type=query_type, data=result, author=author1) elif query_type == 'venue': result = DBLPQuery.get_venues_by_author(cached_author_list, name_set, author1) l = [] for k, v in result.items(): l.append({ 'venue': k, 'type': v.get('type'), 'count': v.get('count') }) l.sort(key=lambda i: i['count'], reverse=True) return tmpl.render(type=query_type, data=l, author=author1) elif query_type == 'coauthor-pub': result = DBLPQuery.get_coauthored_publications_by_authors(cached_author_list, name_set, author1, author2) return tmpl.render(type=query_type, data=result, author1=author1, author2=author2) elif query_type == 'join-venue': d = DBLPQuery.get_authors_by_venue(cached_author_list, name_set, { 'title': cdblp_venue }, { 'title': dblp_venue }) l = [] for k, v in d.items(): l.append({ 'name': v['zh'], 'name_en': k, 'count': v['count'] }) #l.sort(key=lambda i: i['count'], reverse=True) return tmpl.render(type=query_type, data=l, cdblp_venue=cdblp_venue, dblp_venue=dblp_venue) return tmpl.render()
def get_match(author_name): Data.clear() author_cdblp = CDBLPAuthor(author_name) author_name_comp = CDBLPAuthor.getEnglishName(author_name) urlpt = '{}/{}:{}'.format(author_name_comp['last_name'][0].lower(), author_name_comp['last_name'], author_name_comp['first_name']) candidate_urlpts = set() author_affiliation = dict() res = urlopen(DBLPQuery.get_dblp_url(urlpt)) dom = BeautifulSoup(res) for cu_tag in dom.find_all('li', 'homonym'): cu = cu_tag.find('a')['href'][3:-5] candidate_urlpts.add(cu) author_affiliation[cu] = cu_tag.find('a').next_sibling.string if len(candidate_urlpts) == 0: candidate_urlpts.add(urlpt) author_affiliation[urlpt] = 'Default University' l = [] for cu in candidate_urlpts: t = ThreadMatch(cu, author_cdblp) t.start() l.append(t) for t in l: t.join() result = [] for k, v in Data.result.items(): if v > 0.1: result.append({ 'urlpt': k, 'aff': author_affiliation[k], 'rank': v }) result.sort(key=lambda i: i['rank'], reverse=True) return {'author': author_cdblp, 'result': result}
def get_match(author_name): Data.clear() author_cdblp = CDBLPAuthor(author_name) author_name_comp = CDBLPAuthor.getEnglishName(author_name) urlpt = '{}/{}:{}'.format(author_name_comp['last_name'][0].lower(), author_name_comp['last_name'], author_name_comp['first_name']) candidate_urlpts = set() author_affiliation = dict() res = urlopen(DBLPQuery.get_dblp_url(urlpt)) dom = BeautifulSoup(res) for cu_tag in dom.find_all('li', 'homonym'): cu = cu_tag.find('a')['href'][3:-5] candidate_urlpts.add(cu) author_affiliation[cu] = cu_tag.find('a').next_sibling.string if len(candidate_urlpts) == 0: candidate_urlpts.add(urlpt) author_affiliation[urlpt] = 'Default University' l = [] for cu in candidate_urlpts: t = ThreadMatch(cu, author_cdblp) t.start() l.append(t) for t in l: t.join() result = [] for k, v in Data.result.items(): if v > 0.1: result.append({ 'urlpt': k, 'aff': author_affiliation[k], 'rank': v }) result.sort(key=lambda i: i['rank'], reverse=True) return { 'author': author_cdblp, 'result': result }
def run(self): print(self.dblp_urlpt, 'is created.') Data.result[self.dblp_urlpt] = DBLPQuery.author_match(self.dblp_urlpt, self.cdblp_author)['ratio'] Data.objects[self.dblp_urlpt] = DBLPQuery.author_match(self.dblp_urlpt, self.cdblp_author)['object'] Data.overlaps[self.dblp_urlpt] = DBLPQuery.author_match(self.dblp_urlpt, self.cdblp_author)['overlap'] print(self.dblp_urlpt, 'is done.')