def run(target_folder, journals_path=None, authors_path=None, keywords_path=None, mesh_terms_path=None, from_date=None, to_date=None, date_interval_in_years=None): target_path = join(target_folder, 'results.csv') # Input retrieval journals = load_unique_lines(journals_path) text_words = load_unique_lines(keywords_path) mesh_terms = load_unique_lines(mesh_terms_path) authors = load_unique_lines(authors_path) # Get list containing tuples of date ranges based on interval try: date_ranges = get_date_ranges(from_date, to_date, date_interval_in_years) except ToolError as e: exit('to_date.error = %s' % e) query_list = journals if journals else authors search_journals = True if journals else False # Tabulate keywords results = tabulate(query_list, date_ranges, text_words, mesh_terms, search_journals) search_counts = results['search_counts'] queries = results['queries'] query_totals = results['query_totals'] # Output setup log_path = join(target_folder, 'log_results.txt') with open(log_path, 'w') as f: f.write(queries) f.write(query_totals) dates_index = [str(x)[:10] + ' to ' + str(y)[:10] for x, y in date_ranges] results_table = DataFrame(search_counts, index=dates_index) results_table.to_csv(target_path) # Required print statement for crosscompute tool print('results_table_path = ' + target_path) print('log_text_path = ' + log_path)
def run( target_folder, journals_path=None, authors_path=None, keywords_path=None, mesh_terms_path=None, from_date=None, to_date=None, date_interval_in_years=None): target_path = join(target_folder, 'results.csv') # Input retrieval journals = load_unique_lines(journals_path) text_words = load_unique_lines(keywords_path) mesh_terms = load_unique_lines(mesh_terms_path) authors = load_unique_lines(authors_path) # Get list containing tuples of date ranges based on interval try: date_ranges = get_date_ranges( from_date, to_date, date_interval_in_years) except ToolError as e: exit('to_date.error = %s' % e) query_list = journals if journals else authors search_journals = True if journals else False # Tabulate keywords results = tabulate( query_list, date_ranges, text_words, mesh_terms, search_journals) search_counts = results['search_counts'] queries = results['queries'] query_totals = results['query_totals'] # Output setup log_path = join(target_folder, 'log_results.txt') with open(log_path, 'w') as f: f.write(queries) f.write(query_totals) dates_index = [ str(x)[:10] + ' to ' + str(y)[:10] for x, y in date_ranges] results_table = DataFrame(search_counts, index=dates_index) results_table.to_csv(target_path) # Required print statement for crosscompute tool print('results_table_path = ' + target_path) print('log_text_path = ' + log_path)
def test_load_unique_lines(): lines = load_unique_lines(join(TEST_FOLDER, 'lines.txt')) assert lines == ['one', 'two']