for j in clazz: clazz_dict[list(j.keys())[0]] = clazz_index for i in conf_rat_list: conf_rat_dict[list(i.keys())[0]] = list(i.values())[0] conf_rank() dataset = open(sys.argv[1]).read().split("\n")[1:] output = open(sys.argv[2], "a") all_count = len(dataset) bar = DownloadProgressBar(max=all_count - 1) value = 0 output.write(get_features("", head=True)) print("正在生成全部特征 ...") for i in bar.iter(range(all_count - 1)): line = dataset[value] author_name = line.split(",")[0] if author_name != "": feature = get_features(author_name) output.write(feature) value += 1 def check_dimension(): s = open(sys.argv[2], "r") print("正在检查维数") while True: line = s.readline() if line == "": break
rs['year'] = int(info[2:]) elif info[:2] == '#c': rs['conf'] = info[2:] elif info[:2] == '#%': rs['ref'].append(int(info[2:])) if rs['year'] == None: print(s) if rs['year'] > 2013: return False return index, rs paper_dict = dict() all_count = len(papers) bar_count = int(ceil(len(papers) / 1000)) bar = DownloadProgressBar(max=bar_count) _i = 0 for i in bar.iter(range(bar_count)): for j in range(_i * 1000, (_i + 1) * 1000): if j == all_count: break paper = handlePaperInfo(papers[j]) if paper == False: continue paper_dict[paper[0]] = paper[1] _i += 1 papers = paper_dict print("正在构建被引用关系") cnt = 0 for p_index in papers.keys(): paper = papers[p_index] if "ref" in paper: