def load_paper_json(): global_config = load_gloabl_config() filename = "paper-excel.json" filename = os.path.join(global_config["home"],"output", filename) with open(filename, 'r') as f: content = f.read() list_paper_excel = lib_data.text2json(content) print len(list_paper_excel) map_paper_excel = {} map_paper_excel_no = {} for paper in list_paper_excel: map_paper_excel[str(paper["paper_id"])] = paper map_paper_excel_no[str(paper["paper_no"])] = paper map_name_session = {} filename = "paper-industry.json" filename = os.path.join(global_config["home"],"output", filename) with open(filename, 'r') as f: content = f.read() list_paper_industry = lib_data.text2json(content) set_session_name = set() for paper in list_paper_industry: paper_id = str(paper["paper_id"]) map_paper_excel[paper_id] = paper session_name = paper["session_name"] set_session_name.add(session_name) default_session_id = 100 + len(set_session_name) default_entry = { "session_time": paper["session_time"], "session_name": session_name, "session_id" : default_session_id, "session_index" : default_session_id, } entry = map_name_session.get(session_name, default_entry) map_name_session[session_name]=entry paper_list = entry.get("paper_list",[]) lib_data.list_append_unique(paper_list, paper_id) entry["paper_list"] =paper_list entry["paper_count"]= len(entry["paper_list"]) filename = "paper-pdf.json" filename = os.path.join(global_config["home"],"output", filename) with open(filename, 'r') as f: content = f.read() list_paper_pdf = lib_data.text2json(content) print len(list_paper_pdf) map_paper_pdf = {} for paper in list_paper_pdf: map_paper_pdf[str(paper["paper_id"])] = paper filename = "session.csv" filename = os.path.join(global_config["home"],"data", filename) map_paper_session = {} with open(filename,'r') as f: csvreader = UnicodeReader(f) headers = csvreader.next() session_no = None session_name = None session_index = 1 for row in csvreader: entry = dict(zip(headers, row)) if entry.get("Paper no."): entry["session_no"] = session_no entry["session_id"] = int(session_no.split(" ")[-1]) entry["session_name"] = session_name entry["session_index"] = session_index session_index+=1 map_paper_session[entry["Paper no."]]=entry map_name_session[session_name]=entry else: session_no = entry["Session no"] session_name = entry["Title"].strip() session_index = 1 print len(map_paper_session) filename = "event.csv" filename = os.path.join(global_config["home"],"data", filename) map_event_session = {} with open(filename,'r') as f: csvreader = UnicodeReader(f) headers = csvreader.next() for row in csvreader: if row[0].startswith("#"): continue entry = dict(zip(headers, row)) print entry event_start, event_end = entry["Time"].split("-") event_day = entry["day"] for k,v in entry.items(): if k in ["Time","day"]: continue if v: event_id = (len(map_event_session)+1) event = { "day":event_day, "start":event_start.strip(), "end": event_end.strip(), "name": v.strip(), "location": k, "id": event_id, } if "Session" in v or "Industry Track:" in v: session_name = v.replace("Session:","") session_name = session_name.replace("Industry Track:","") session_name = re.sub("\([^\)]+\)","", session_name) session_name = session_name.strip() if session_name not in map_name_session: print session_name assert session_name in map_name_session event["session_name"] = session_name map_event_session[event_id] = event print len(map_paper_session) return map_paper_excel, map_paper_excel_no, map_paper_pdf, map_paper_session, map_name_session, map_event_session
def create_json(): map_paper_excel, map_paper_excel_no, map_paper_pdf, map_paper_session, map_name_session, map_event_session = load_paper_json() ret = {} #event_index list_event = sorted(map_event_session.values(), key=lambda event: event["id"]) #print lib_data.json2text(list_session) ret["events"]= list_event map_session_event = {} for event in map_event_session.values(): if "session_name" in event: map_session_event[event["session_name"]] = event #session_index map_session = {} for paper in sorted(map_paper_session.values(), key=lambda paper: paper["session_index"]): session_id = paper["session_id"] session_info = map_session.get(session_id, lib_data.json_update({},paper, ["session_no","session_name","session_id"])) map_session[session_id] =session_info paper_no = paper["Paper no."] paper_id = map_paper_excel_no[paper_no]['paper_id'] paper_list = session_info.get("paper_list",[]) lib_data.list_append_unique(paper_list, paper_id) session_info["paper_list"] =paper_list session_info["paper_count"]= len(session_info["paper_list"]) list_session = map_session.values() for session in map_name_session.values(): if "paper_list" in session: list_session.append(session) list_session = sorted(list_session, key=lambda paper: paper["session_id"]) #print lib_data.json2text(list_session) ret["sessions"]= list_session #Track_index map_track = {} TRACK_MAP=[ {"track_id":"In Use", "track_name":"In Use Track", "category": "Semantic Web In Use Track Paper"}, {"track_id":"RDBS", "track_name":"Replication, Benchmark, Data and Software Track","category": "Replication, Benchmark, Data and Software Track Paper"}, {"track_id":"Research", "track_name":"Research Track","category": "Research Track Paper"}, {"track_id":"DC", "track_name":"Doctoral Consortium", "category":"Doctoral Consortium Paper"}, {"track_id":"Industry", "track_name":"Industry Track","category": "Industry Track Paper"}, ] for paper in map_paper_excel.values(): category = paper["category"] track = map_track.get(category, {"category": category}) map_track[category]=track paper_id = paper['paper_id'] paper_list = track.get("paper_list",[]) lib_data.list_append_unique(paper_list, paper_id) track["paper_list"] = sorted(paper_list) print lib_data.json2text(map_track.keys()) for track in TRACK_MAP: if track["category"] in map_track: track["paper_list"]= map_track[track["category"]]["paper_list"] track["paper_count"]= len(track["paper_list"]) ret["tracks"] = TRACK_MAP #print lib_data.json2text(TRACK_MAP) #map_paper_id2info for paper_id, paper in map_paper_excel.items(): if paper_id.startswith("industry"): continue paper_pdf = map_paper_pdf.get(paper_id) lib_data.json_update(paper, paper_pdf, ["keywords", "abstract","number_of_pages"]) if "pages" in paper: end_page = int(paper["start_page"])+ paper_pdf["number_of_pages"] - 1 paper["pages"]= "{}-{}".format(paper["start_page"], end_page) list_paper = sorted(map_paper_excel.values(), key=lambda paper: paper["paper_id"]) ret["papers"] = list_paper list_talk = [] for session_info in ret["sessions"]: session_name = session_info["session_name"] start_diff = 0 for paper_id in session_info["paper_list"]: paper_info = map_paper_excel[paper_id] event = map_session_event[session_name] if "Regular Talks" in session_name: diff_len = 15 elif "Pechakucha" in session_name: diff_len = 10 elif paper_info["paper_no"].endswith("*"): diff_len= 10 else: diff_len= 20 talk = { "day": event["day"], "start": time_add(event["start"], start_diff), "end": time_add(event["start"], start_diff+diff_len), "event": event["id"], "paper": paper_id, "paper_title": paper_info["title"], "paper_author": paper_info["author"], "id": paper_id, } print talk list_talk.append(talk) start_diff +=diff_len ret["talks"] = list_talk print lib_data.json2text(ret) return ret