Example #1
0
        for j in clazz:
            clazz_dict[list(j.keys())[0]] = clazz_index

    for i in conf_rat_list:
        conf_rat_dict[list(i.keys())[0]] = list(i.values())[0]


conf_rank()
dataset = open(sys.argv[1]).read().split("\n")[1:]
output = open(sys.argv[2], "a")
all_count = len(dataset)
bar = DownloadProgressBar(max=all_count - 1)
value = 0
output.write(get_features("", head=True))
print("正在生成全部特征 ...")
for i in bar.iter(range(all_count - 1)):
    line = dataset[value]
    author_name = line.split(",")[0]
    if author_name != "":
        feature = get_features(author_name)
    output.write(feature)
    value += 1


def check_dimension():
    s = open(sys.argv[2], "r")
    print("正在检查维数")
    while True:
        line = s.readline()
        if line == "":
            break
Example #2
0
            rs['year'] = int(info[2:])
        elif info[:2] == '#c':
            rs['conf'] = info[2:]
        elif info[:2] == '#%':
            rs['ref'].append(int(info[2:]))
    if rs['year'] == None: print(s)
    if rs['year'] > 2013: return False
    return index, rs


paper_dict = dict()
all_count = len(papers)
bar_count = int(ceil(len(papers) / 1000))
bar = DownloadProgressBar(max=bar_count)
_i = 0
for i in bar.iter(range(bar_count)):
    for j in range(_i * 1000, (_i + 1) * 1000):
        if j == all_count:
            break
        paper = handlePaperInfo(papers[j])
        if paper == False:
            continue
        paper_dict[paper[0]] = paper[1]
    _i += 1
papers = paper_dict

print("正在构建被引用关系")
cnt = 0
for p_index in papers.keys():
    paper = papers[p_index]
    if "ref" in paper: