def main(): parser = argparse.ArgumentParser() parser.add_argument("-s", "--selective_harvesting", action="store_true", help=ARG_HELP_STRINGS["selective_harvesting"]) args = parser.parse_args() with open("harvest_list.csv", "r") as harvest_list: reader = oat.UnicodeDictReader(harvest_list, encoding="utf-8") for line in reader: basic_url = line["basic_url"] if line["active"] == "TRUE": oat.print_g("Starting harvest from source " + basic_url) processing = None if len(line["processing"]) > 0: processing = line["processing"] oat.oai_harvest(basic_url, line["metadata_prefix"], line["oai_set"], processing, args.selective_harvesting) now = datetime.datetime.now() date_string = now.strftime("%Y_%m_%d") file_name = "oai_harvest_" + date_string + ".csv" target = os.path.join("..", line["directory"], file_name) os.rename("out.csv", target) else: oat.print_y("Skipping inactive source " + basic_url)
def main(): parser = argparse.ArgumentParser() parser.add_argument("-s", "--selective_harvesting", action="store_true", help=ARG_HELP_STRINGS["selective_harvesting"]) args = parser.parse_args() with open("harvest_list.csv", "r") as harvest_list: reader = oat.UnicodeDictReader(harvest_list, encoding="utf-8") for line in reader: basic_url = line["basic_url"] if line["active"] == "TRUE": oat.print_g("Starting harvest from source " + basic_url) oai_set = line["oai_set"] if len(line["oai_set"]) > 0 else None prefix = line["metadata_prefix"] if len( line["metadata_prefix"]) > 0 else None processing = line["processing"] if len( line["processing"]) > 0 else None oat.oai_harvest(basic_url, prefix, oai_set, processing, args.selective_harvesting) now = datetime.datetime.now() date_string = now.strftime("%Y_%m_%d") file_name = "oai_harvest_" + date_string + ".csv" target = os.path.join("..", line["directory"], file_name) os.rename("out.csv", target) else: oat.print_y("Skipping inactive source " + basic_url)
def main(): with open("harvest_list.csv", "r") as harvest_list: reader = DictReader(harvest_list) for line in reader: basic_url = line["basic_url"] if line["active"] == "TRUE": oat.print_g("Starting harvest from source " + basic_url) oai_set = line["oai_set"] if len(line["oai_set"]) > 0 else None prefix = line["metadata_prefix"] if len(line["metadata_prefix"]) > 0 else None processing = line["processing"] if len(line["processing"]) > 0 else None directory = os.path.join("..", line["directory"]) articles = oat.oai_harvest(basic_url, prefix, oai_set, processing) harvest_file_path = os.path.join(directory, "all_harvested_articles.csv") enriched_file_path = os.path.join(directory, "all_harvested_articles_enriched.csv") new_article_dicts, header = integrate_changes(articles, harvest_file_path, False) integrate_changes(articles, enriched_file_path, True) if header is None: # if no header was returned, an "all_harvested" file doesn't exist yet header = oat.OAI_COLLECTION_CONTENT.values() new_articles = [header] for article_dict in new_article_dicts: new_articles.append([article_dict[key] for key in header]) now = datetime.datetime.now() date_string = now.strftime("%Y_%m_%d") file_name = "new_articles_" + date_string + ".csv" target = os.path.join(directory, file_name) with open(target, "w") as t: writer = oat.OpenAPCUnicodeWriter(t, openapc_quote_rules=True, has_header=True) writer.write_rows(new_articles) else: oat.print_y("Skipping inactive source " + basic_url)
def main(): with open("harvest_list.csv", "r") as harvest_list: reader = DictReader(harvest_list) for line in reader: basic_url = line["basic_url"] if line["active"] == "TRUE": oat.print_g("Starting harvest from source " + basic_url) oai_set = line["oai_set"] if len(line["oai_set"]) > 0 else None prefix = line["metadata_prefix"] if len(line["metadata_prefix"]) > 0 else None processing = line["processing"] if len(line["processing"]) > 0 else None directory = os.path.join("..", line["directory"]) articles = oat.oai_harvest(basic_url, prefix, oai_set, processing) harvest_file_path = os.path.join(directory, "all_harvested_articles.csv") enriched_file_path = os.path.join(directory, "all_harvested_articles_enriched.csv") new_article_dicts, header = integrate_changes(articles, harvest_file_path, False) integrate_changes(articles, enriched_file_path, True) deal_wiley_path = os.path.join(directory, "all_harvested_articles_enriched_deal_wiley.csv") if os.path.isfile(deal_wiley_path): integrate_changes(articles, deal_wiley_path, True) if header is None: # if no header was returned, an "all_harvested" file doesn't exist yet header = list(oat.OAI_COLLECTION_CONTENT.keys()) new_articles = [header] for article_dict in new_article_dicts: new_articles.append([article_dict[key] for key in header]) now = datetime.datetime.now() date_string = now.strftime("%Y_%m_%d") file_name = "new_articles_" + date_string + ".csv" target = os.path.join(directory, file_name) with open(target, "w") as t: writer = oat.OpenAPCUnicodeWriter(t, openapc_quote_rules=True, has_header=True) writer.write_rows(new_articles) else: oat.print_y("Skipping inactive source " + basic_url)
def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--integrate", help=ARG_HELP_STRINGS["integrate"], action="store_true") parser.add_argument("-o", "--output", help=ARG_HELP_STRINGS["output"], action="store_true") args = parser.parse_args() with open("harvest_list.csv", "r") as harvest_list: reader = DictReader(harvest_list) for line in reader: basic_url = line["basic_url"] if line["active"] == "TRUE": oat.print_g("Starting harvest from source " + basic_url) oai_set = line["oai_set"] if len(line["oai_set"]) > 0 else None prefix = line["metadata_prefix"] if len( line["metadata_prefix"]) > 0 else None processing = line["processing"] if len( line["processing"]) > 0 else None directory = os.path.join("..", line["directory"]) out_file_suffix = os.path.basename( line["directory"]) if args.output else None articles = oat.oai_harvest(basic_url, prefix, oai_set, processing, out_file_suffix) harvest_file_path = os.path.join(directory, "all_harvested_articles.csv") enriched_file_path = os.path.join( directory, "all_harvested_articles_enriched.csv") new_article_dicts, header = integrate_changes( articles, harvest_file_path, False, not args.integrate) integrate_changes(articles, enriched_file_path, True, not args.integrate) if header is None: # if no header was returned, an "all_harvested" file doesn't exist yet header = list(oat.OAI_COLLECTION_CONTENT.keys()) new_articles = [header] for article_dict in new_article_dicts: new_articles.append([article_dict[key] for key in header]) now = datetime.datetime.now() date_string = now.strftime("%Y_%m_%d") file_name = "new_articles_" + date_string + ".csv" target = os.path.join(directory, file_name) with open(target, "w") as t: writer = oat.OpenAPCUnicodeWriter(t, openapc_quote_rules=True, has_header=True) writer.write_rows(new_articles) else: oat.print_y("Skipping inactive source " + basic_url)