def build_socials(raw_socials: List[Dict[str, Any]]) -> List[SocialEvent]: return [ SocialEvent( id=item["UID"], name=item["name"], description=item["description"], image=item.get("image"), location=item.get("location"), organizers=SocialEventOrganizers( members=item["organizers"]["members"], website=item["organizers"].get("website", ""), ), sessions=[ SessionInfo( session_name=session.get("name"), start_time=session.get("start_time"), end_time=session.get("end_time"), link=session.get("link"), ) for session in item["sessions"] ], rocketchat_channel=item.get("rocketchat_channel", ""), website=item.get("website", ""), zoom_link=item.get("zoom_link"), ) for item in raw_socials ]
def build_tutorial_blocks(t: Dict[str, Any]) -> List[SessionInfo]: blocks = compute_schedule_blocks(t["sessions"]) result = [] for i, block in enumerate(blocks): min_start = min([t["start_time"] for t in block]) max_end = max([t["end_time"] for t in block]) assert all(s["zoom_link"] == block[0]["zoom_link"] for s in block) result.append( SessionInfo( session_name=f"T-Live Session {i+1}", start_time=min_start, end_time=max_end, link=block[0]["zoom_link"], )) return result
def build_plenary_sessions( raw_plenary_sessions: List[Dict[str, Any]], raw_plenary_videos: Dict[str, List[Dict[str, Any]]], ) -> DefaultDict[str, List[PlenarySession]]: plenary_videos: DefaultDict[str, List[PlenaryVideo]] = defaultdict(list) for plenary_id, videos in raw_plenary_videos.items(): for item in videos: plenary_videos[plenary_id].append( PlenaryVideo( id=item["UID"], title=item["title"], speakers=item["speakers"], presentation_id=item["presentation_id"], )) plenary_sessions: DefaultDict[str, List[PlenarySession]] = defaultdict(list) for item in raw_plenary_sessions: plenary_sessions[item["date"]].append( PlenarySession( id=item["UID"], title=item["title"], image=item["image"], date=item["date"], day=item["day"], sessions=[ SessionInfo( session_name=session.get("name"), start_time=parse_session_time( session.get("start_time")), end_time=parse_session_time(session.get("end_time")), zoom_link=session.get("zoom_link"), ) for session in item.get("sessions") ], presenter=item.get("presenter"), institution=item.get("institution"), abstract=item.get("abstract"), bio=item.get("bio"), presentation_id=item.get("presentation_id"), rocketchat_channel=item.get("rocketchat_channel"), videos=plenary_videos[item["UID"]] if item["UID"] in ["business_meeting", "review_meeting"] else None, )) return plenary_sessions
def build_workshop_blocks(t: Dict[str, Any]) -> List[SessionInfo]: blocks = compute_schedule_blocks(t["sessions"], leeway=timedelta(hours=1)) if len(blocks) == 0: return [] result = [] for i, block in enumerate(blocks): min_start = min([t["start_time"] for t in block]) max_end = max([t["end_time"] for t in block]) result.append( SessionInfo( session_name=f"W-Live Session {i+1}", start_time=min_start, end_time=max_end, link="", )) return result
def build_workshops( raw_workshops: List[Dict[str, Any]], raw_workshop_papers: List[Dict[str, Any]], ) -> List[Workshop]: def workshop_title(workshop_id): for wsh in raw_workshops: if wsh["UID"] == workshop_id: return wsh["title"] return "" def build_workshop_blocks(t: Dict[str, Any]) -> List[SessionInfo]: blocks = compute_schedule_blocks(t["sessions"], leeway=timedelta(hours=1)) if len(blocks) == 0: return [] result = [] for i, block in enumerate(blocks): min_start = min([t["start_time"] for t in block]) max_end = max([t["end_time"] for t in block]) result.append( SessionInfo( session_name=f"W-Live Session {i+1}", start_time=min_start, end_time=max_end, link="", )) return result grouped_papers: DefaultDict[str, Any] = defaultdict(list) for paper in raw_workshop_papers: grouped_papers[paper["workshop"]].append(paper) ws_id_to_alias: Dict[str, str] = {w["UID"]: w["alias"] for w in raw_workshops} workshop_papers: DefaultDict[str, List[WorkshopPaper]] = defaultdict(list) for workshop_id, papers in grouped_papers.items(): for item in papers: workshop_papers[workshop_id].append( WorkshopPaper( id=item["UID"], title=item["title"], speakers=item["authors"], presentation_id=item.get("presentation_id", None), rocketchat_channel= f"paper-{ws_id_to_alias[workshop_id]}-{item['UID'].split('.')[-1]}", content=PaperContent( title=item["title"], authors=extract_list_field(item, "authors"), track=workshop_title(workshop_id), paper_type="Workshop", abstract=item.get("abstract"), tldr=item["abstract"][:250] + "..." if item["abstract"] else None, keywords=[], pdf_url=item.get("pdf_url"), demo_url=None, sessions=[], similar_paper_uids=[], program="workshop", ), )) workshops: List[Workshop] = [ Workshop( id=item["UID"], title=item["title"], organizers=item["organizers"], abstract=item["abstract"], website=item["website"], livestream=item.get("livestream"), papers=workshop_papers[item["UID"]], schedule=item.get("schedule"), prerecorded_talks=item.get("prerecorded_talks"), rocketchat_channel=item["rocketchat_channel"], zoom_links=item.get("zoom_links", []), sessions=[ SessionInfo( session_name=session.get("name", ""), start_time=session.get("start_time"), end_time=session.get("end_time"), link=session.get("zoom_link", ""), hosts=session.get("hosts"), ) for session in item.get("sessions") ], blocks=build_workshop_blocks(item), ) for item in raw_workshops ] return workshops
def build_papers( raw_papers: List[Dict[str, str]], paper_sessions: Dict[str, Any], paper_recs: Dict[str, List[str]], paper_images_path: str, ) -> List[Paper]: """Builds the site_data["papers"]. Each entry in the papers has the following fields: - UID: str - title: str - authors: str (separated by '|') - keywords: str (separated by '|') - track: str - paper_type: str (i.e., "Long", "Short", "SRW", "Demo") - pdf_url: str - demo_url: str """ # build the lookup from (paper, slot) to zoom_link paper_id_to_link: Dict[str, str] = {} for session_id, session in paper_sessions.items(): for paper_id in session["papers"]: assert paper_id not in paper_id_to_link, paper_id if session_id.startswith("z"): paper_id_to_link[paper_id] = session.get("zoom_link") elif session_id.startswith("g"): paper_id_to_link[ paper_id] = "https://www.virtualchair.net/events/emnlp2020" # build the lookup from paper to slots sessions_for_paper: DefaultDict[str, List[SessionInfo]] = defaultdict(list) for session_name, session_info in paper_sessions.items(): start_time = session_info["start_time"] end_time = session_info["end_time"] for paper_id in session_info["papers"]: link = paper_id_to_link[paper_id] sessions_for_paper[paper_id].append( SessionInfo( session_name=session_name, start_time=start_time, end_time=end_time, link=link, )) papers = [ Paper( id=item["UID"], forum=item["UID"], card_image_path=get_card_image_path_for_paper( item["UID"], paper_images_path), presentation_id=item.get("presentation_id", None), content=PaperContent( title=item["title"], authors=extract_list_field(item, "authors"), keywords=extract_list_field(item, "keywords"), abstract=item["abstract"], tldr=item["abstract"][:250] + "...", pdf_url=item.get("pdf_url", ""), demo_url=item.get("demo_url", ""), material=item.get("material"), track=normalize_track_name(item.get("track", "")), paper_type=item.get("paper_type", ""), sessions=sessions_for_paper[item["UID"]], similar_paper_uids=paper_recs.get(item["UID"], [item["UID"]]), program=item["program"], ), ) for item in raw_papers ] # throw warnings for missing information for paper in papers: if not paper.presentation_id and paper.content.program not in [ "demo", "findings", ]: print(f"WARNING: presentation_id not set for {paper.id}") if not paper.content.track: print(f"WARNING: track not set for {paper.id}") if paper.presentation_id and len(paper.content.sessions) != 1: print( f"WARNING: found {len(paper.content.sessions)} sessions for {paper.id}" ) if not paper.content.similar_paper_uids: print(f"WARNING: empty similar_paper_uids for {paper.id}") return papers
def build_papers( raw_papers: List[Dict[str, str]], all_paper_sessions: List[Dict[str, Dict[str, Any]]], qa_session_length_hr: int, all_paper_zoom_links: List[Dict[str, str]], all_paper_slideslive_ids: List[Dict[str, str]], paper_recs: Dict[str, List[str]], paper_images_path: str, ) -> List[Paper]: """Builds the site_data["papers"]. Each entry in the papers has the following fields: - UID: str - title: str - authors: str (separated by '|') - keywords: str (separated by '|') - track: str - paper_type: str (i.e., "Long", "Short", "SRW", "Demo") - pdf_url: str - demo_url: str The paper_schedule file contains the live QA session slots for each paper. An example paper_sessions.yml file is shown below. ```yaml 1A: date: 2020-07-06_05:00:00 papers: - main.1 - main.2 2A: date: 2020-07-06_08:00:00 papers: - main.17 - main.19 ``` """ # build the lookup from (paper, slot) to zoom_link zoom_info_for_paper_session: Dict[str, Dict[str, str]] = {} for item in all_paper_zoom_links: paper_id = item["UID"] session_name = item["session_name"] paper_session_id = f"{paper_id}-{session_name}" assert paper_session_id not in zoom_info_for_paper_session zoom_info_for_paper_session[paper_session_id] = item # build the lookup from paper to slideslive presentation ID presentation_id_for_paper: Dict[str, str] = {} for item in all_paper_slideslive_ids: paper_id = item["UID"] presentation_id = item["presentation_id"] assert paper_id not in presentation_id_for_paper presentation_id_for_paper[paper_id] = presentation_id # build the lookup from paper to slots sessions_for_paper: DefaultDict[str, List[SessionInfo]] = defaultdict(list) for session_name, session_info in chain( *[paper_sessions.items() for paper_sessions in all_paper_sessions]): date = session_info["date"] start_time = datetime.strptime(date, "%Y-%m-%d_%H:%M:%S") end_time = start_time + timedelta(hours=qa_session_length_hr) for paper_id in session_info["papers"]: paper_session_id = f"{paper_id}-{session_name}" zoom_info = zoom_info_for_paper_session[paper_session_id] assert (datetime.strptime( zoom_info["starttime"], "%Y-%m-%dT%H:%M:%SZ") == start_time), paper_id sessions_for_paper[paper_id].append( SessionInfo( session_name=session_name, start_time=start_time, end_time=end_time, zoom_link=zoom_info["zoom_join_link"], )) papers = [ Paper( id=item["UID"], forum=item["UID"], card_image_path=get_card_image_path_for_paper( item["UID"], paper_images_path), presentation_id=presentation_id_for_paper.get(item["UID"]), content=PaperContent( title=item["title"], authors=extract_list_field(item, "authors"), keywords=extract_list_field(item, "keywords"), abstract=item["abstract"], tldr=item["abstract"][:250] + "...", pdf_url=item.get("pdf_url", ""), demo_url=item.get("demo_url", ""), track=normalize_track_name(item.get("track", "")), paper_type=item.get("paper_type", ""), sessions=sessions_for_paper[item["UID"]], similar_paper_uids=paper_recs.get(item["UID"], [item["UID"]]), ), ) for item in raw_papers ] # throw warnings for missing information for paper in papers: if not paper.presentation_id: print(f"WARNING: presentation_id not set for {paper.id}") if not paper.content.track: print(f"WARNING: track not set for {paper.id}") if len(paper.content.sessions) != 2: print( f"WARNING: found {len(paper.content.sessions)} sessions for {paper.id}" ) if not paper.content.similar_paper_uids: print(f"WARNING: empty similar_paper_uids for {paper.id}") return papers
def build_papers( raw_papers: List[Dict[str, str]], paper_schedule: Dict[str, Dict[str, Any]], qa_session_length_hr: int, calendar_stub: str, paper_recs: Dict[str, List[str]], slideslive_id_mapping: List[Dict[str, str]], ) -> List[Paper]: """Builds the site_data["papers"]. Each entry in the papers has the following fields: - UID: str - title: str - authors: str (separated by '|') - keywords: str (separated by '|') - track: str - paper_type: str (i.e., "Long", "Short", "SRW", "Demo") - pdf_url: str - demo_url: str The paper_schedule file contains the live QA session slots and corresponding Zoom links for each paper. An example paper_schedule.yml file is shown below. ```yaml 1A: date: 2020-07-06_05:00:00 papers: - id: main.1 join_link: https://www.google.com/ - id: main.2 join_link: https://www.google.com/ 2A: date: 2020-07-06_08:00:00 papers: - id: main.17 join_link: https://www.google.com/ - id: main.19 join_link: https://www.google.com/ ``` """ # build the lookup from paper to slots sessions_for_paper: DefaultDict[str, List[SessionInfo]] = defaultdict(list) for session_name, session_info in paper_schedule.items(): date = session_info["date"] for item in session_info["papers"]: paper_id = item["id"] start_time = datetime.strptime(date, "%Y-%m-%d_%H:%M:%S") end_time = start_time + timedelta(hours=qa_session_length_hr) session_offset = len(sessions_for_paper[paper_id]) sessions_for_paper[paper_id].append( SessionInfo( session_name=session_name, start_time=start_time, end_time=end_time, zoom_link=item["join_link"], # TODO: the prefix should be configurable? ical_link=f"{calendar_stub}/paper_{paper_id}.{session_offset}.ics", ) ) return [ Paper( id=item["UID"], forum=item["UID"], presentation_id=slideslive_id_mapping[item["UID"]], content=PaperContent( title=item["title"], authors=extract_list_field(item, "authors"), keywords=extract_list_field(item, "keywords"), abstract=item["abstract"], pdf_url=item.get("pdf_url", ""), demo_url=item.get("demo_url", ""), track=item.get("track", ""), sessions=sessions_for_paper[item["UID"]], similar_paper_uids=paper_recs[item["UID"]], ), ) for item in raw_papers ]