Exemple #1
0
def get_affiliation_info():
    filename = aff_pattern.format(
        date=datetime.datetime.now().strftime("%y%m%d_%H%M"))
    author = chain(load_json_lines(aff_input1), load_json_lines(aff_input2))
    aff_ids = get_aff(author)
    aff_ids = list(set(aff_ids))
    aff_info = get_aff_infos(aff_ids)
    write_json_lines(aff_info, filename)
def test():
    papers = load_json_lines(input_filename)
    for p in papers:
        print(
            p['title'], (p['ms_academic']['expr']),
            len(p['ms_academic']['entities'])
            if 'expr' in p['ms_academic'] else 'None')
Exemple #3
0
def get_x_from_y(input_file, output_file, get_func, extract_func):
    s_input = load_json_lines(input_file)
    s_input = log_stream(s_input, name='Input')
    infos = extract_func(s_input)
    s_info = get_infos(infos, get_func=get_func)
    s_info = log_stream(s_info, name='Output')
    write_json_lines(s_info, output_file)
Exemple #4
0
def get_citing_papers_info():
    filename = output_filename_pattern2.format(
        date=datetime.datetime.now().strftime("%y%m%d_%H%M"))
    papers = load_json_lines(input_filename2)
    paper_ids = get_citing_papers(papers)
    paper_infos = get_paper_infos2(paper_ids)
    write_json_lines(paper_infos, filename)
Exemple #5
0
def get_author_info():
    filename = author_filename_pattern2.format(
        date=datetime.datetime.now().strftime("%y%m%d_%H%M"))
    papers = load_json_lines(author_input2)
    author_ids = get_authors(papers)
    author_ids = list(set(author_ids))
    author_info = get_author_infos(author_ids)
    write_json_lines(author_info, filename)
Exemple #6
0
def get_ms_ac_info1():
    filename = output_filename_pattern1.format(
        date=datetime.datetime.now().strftime("%y%m%d_%H%M"))
    papers = load_json_lines(input_filename1)
    papers_filtered = filter(lambda p: len(p['ms_academic']['entities']),
                             papers)
    paper_infos = get_paper_infos(papers_filtered)
    write_json_lines(paper_infos, filename)
def add_ms_ac_info(input_file, output_file):
    papers = load_json_lines(input_file)
    papers = log_stream(papers, name='Input')
    papers_parsed = ({
        'ms_academic':
        get_mc_ac_paper(expr="and(Ti='" + normalize_title(p['title']) +
                        "',Y>=2014)"),
        **p
    } for p in delay(papers, 2))
    papers_parsed_printed = log_stream(papers_parsed, name='Output')
    write_json_lines(papers_parsed_printed, output_file)
def add_ms_ac_info():
    filename = output_filename_pattern.format(
        date=datetime.datetime.now().strftime("%y%m%d_%H%M"))
    papers = load_json_lines(input_filename)
    papers = ({
        'ms_academic':
        get_ms_ac_paper(expr="and(Ti='" + re.sub(
            " $", "", re.sub("[\:\'\,\?\!\.\-] ?", " ", p['title'].lower())) +
                        "',Y>=2014)"),
        **p
    } for p in papers)
    write_json_lines(papers, filename)
Exemple #9
0
def write_author_to_neo():
    for level, filename in enumerate(author_fn):
        recs = load_json_lines(filename)
        add_author_to_graph(recs)
Exemple #10
0
def write_aff_to_neo():
    recs = load_json_lines(affs_fn)
    add_aff_to_graph(recs)
Exemple #11
0
def write_to_neo():
    for level, filename in enumerate(paper_fn):
        recs = load_json_lines(filename)
        add_papers_to_graph(recs, level)
def gen_csv(input_file, output_file, extract_func):
    s_input = load_json_lines(input_file)
    s_df = to_df(s_input, extract_func)
    to_csv(s_df, output_file)