def get_affiliation_info(): filename = aff_pattern.format( date=datetime.datetime.now().strftime("%y%m%d_%H%M")) author = chain(load_json_lines(aff_input1), load_json_lines(aff_input2)) aff_ids = get_aff(author) aff_ids = list(set(aff_ids)) aff_info = get_aff_infos(aff_ids) write_json_lines(aff_info, filename)
def test(): papers = load_json_lines(input_filename) for p in papers: print( p['title'], (p['ms_academic']['expr']), len(p['ms_academic']['entities']) if 'expr' in p['ms_academic'] else 'None')
def get_x_from_y(input_file, output_file, get_func, extract_func): s_input = load_json_lines(input_file) s_input = log_stream(s_input, name='Input') infos = extract_func(s_input) s_info = get_infos(infos, get_func=get_func) s_info = log_stream(s_info, name='Output') write_json_lines(s_info, output_file)
def get_citing_papers_info(): filename = output_filename_pattern2.format( date=datetime.datetime.now().strftime("%y%m%d_%H%M")) papers = load_json_lines(input_filename2) paper_ids = get_citing_papers(papers) paper_infos = get_paper_infos2(paper_ids) write_json_lines(paper_infos, filename)
def get_author_info(): filename = author_filename_pattern2.format( date=datetime.datetime.now().strftime("%y%m%d_%H%M")) papers = load_json_lines(author_input2) author_ids = get_authors(papers) author_ids = list(set(author_ids)) author_info = get_author_infos(author_ids) write_json_lines(author_info, filename)
def get_ms_ac_info1(): filename = output_filename_pattern1.format( date=datetime.datetime.now().strftime("%y%m%d_%H%M")) papers = load_json_lines(input_filename1) papers_filtered = filter(lambda p: len(p['ms_academic']['entities']), papers) paper_infos = get_paper_infos(papers_filtered) write_json_lines(paper_infos, filename)
def add_ms_ac_info(input_file, output_file): papers = load_json_lines(input_file) papers = log_stream(papers, name='Input') papers_parsed = ({ 'ms_academic': get_mc_ac_paper(expr="and(Ti='" + normalize_title(p['title']) + "',Y>=2014)"), **p } for p in delay(papers, 2)) papers_parsed_printed = log_stream(papers_parsed, name='Output') write_json_lines(papers_parsed_printed, output_file)
def add_ms_ac_info(): filename = output_filename_pattern.format( date=datetime.datetime.now().strftime("%y%m%d_%H%M")) papers = load_json_lines(input_filename) papers = ({ 'ms_academic': get_ms_ac_paper(expr="and(Ti='" + re.sub( " $", "", re.sub("[\:\'\,\?\!\.\-] ?", " ", p['title'].lower())) + "',Y>=2014)"), **p } for p in papers) write_json_lines(papers, filename)
def write_author_to_neo(): for level, filename in enumerate(author_fn): recs = load_json_lines(filename) add_author_to_graph(recs)
def write_aff_to_neo(): recs = load_json_lines(affs_fn) add_aff_to_graph(recs)
def write_to_neo(): for level, filename in enumerate(paper_fn): recs = load_json_lines(filename) add_papers_to_graph(recs, level)
def gen_csv(input_file, output_file, extract_func): s_input = load_json_lines(input_file) s_df = to_df(s_input, extract_func) to_csv(s_df, output_file)