def main(args): """Main method.""" collection = ChamplitteMetadataCollection() # csv_file = 'DV5_M0354_2006_9.csv' csv_file = "2006_7.csv" collection.retrieve_metadata_from_csv(csv_file, delimiter=",") alignment_template = "User:Jean-Frédéric/AlignmentRow".encode("utf-8") if args.prepare_alignment: for key, value in collection.count_metadata_values().items(): collection.write_dict_as_wiki(value, key, "wiki", alignment_template) if args.post_process: mapping_fields = [] mapper = commonprocessors.retrieve_metadata_alignments(mapping_fields, alignment_template) mapping_methods = { #'JOCONDE_TECH': commonprocessors.map_and_apply_technique(separator=";"), "JOCONDE_DIMS": (commonprocessors.process_DIMS, {}), "JOCONDE_DOMN": commonprocessors.split_and_keep_as_list(separator=";"), "JOCONDE_DESC": commonprocessors.wrap_with_template(template="fr"), "JOCONDE_REF": commonprocessors.wrap_within_pattern(pattern="{{online databases|{{Joconde|%s}}}}"), "JOCONDE_DACQ": commonprocessors.wrap_within_pattern( pattern="{{ProvenanceEvent|time=%s|type=acquisition|newowner=Musées de la Haute-Saône}}" ), "JOCONDE_PERI": (commonprocessors.look_for_date, {}), } categories_counter, categories_count_per_file = collection.post_process_collection(mapping_methods) # metadata.categorisation_statistics(categories_counter, categories_count_per_file) template_name = "User:Jean-Frédéric/Champlitte/Ingestion".encode("utf-8") front_titlefmt = "" # variable_titlefmt = "%(JOCONDE_TITR)s (%(JOCONDE_DENO)s)" variable_titlefmt = "%(JOCONDE_DENO)s" rear_titlefmt = " - Musées de la Haute-Saône - %(JOCONDE_REF)s" reader = iter(collection.records) string = StringIO() collection.write_metadata_to_xml(string) print string.getvalue() uploadBot = DataIngestionBot( reader=reader, front_titlefmt=front_titlefmt, rear_titlefmt=rear_titlefmt, variable_titlefmt=variable_titlefmt, pagefmt=template_name, subst=False, verifyDescription=True, ) if args.upload: uploadBot.doSingle() elif args.dry_run: uploadBot.dry_run()
def main(args): """Main method.""" collection = ArchivesMetadataCollection() csv_file = 'Metadata_ArchivesNationales5.csv' collection.retrieve_metadata_from_csv(csv_file, delimiter=';') alignment_template = 'User:Jean-Frédéric/AlignmentRow'.encode('utf-8') if args.prepare_alignment: for key, value in collection.count_metadata_values().items(): collection.write_dict_as_wiki(value, key, 'wiki', alignment_template) if args.post_process: mapping_fields = [] mapper = commonprocessors.retrieve_metadata_alignments( mapping_fields, alignment_template) mapping_methods = { 'Support': commonprocessors.map_and_apply_technique(), 'Dimensions': (commonprocessors.parse_format, {}), 'Date': (commonprocessors.look_for_date, {}), 'Analyse': (commonprocessors.remove_linebreaks, {}), 'Cote du document': (commonprocessors.remove_linebreaks, {}), 'Titre': (commonprocessors.remove_linebreaks, {}) } categories_counter, categories_count_per_file = collection.post_process_collection( mapping_methods) metadata.categorisation_statistics(categories_counter, categories_count_per_file) template_name = 'Commons:Archives_Nationales/Ingestion'.encode('utf-8') front_titlefmt = "" variable_titlefmt = "%(Titre)s" rear_titlefmt = " - Archives Nationales - %(Cote du document)s" reader = iter(collection.records[2:]) uploadBot = DataIngestionBot(reader=iter(reader), front_titlefmt=front_titlefmt, rear_titlefmt=rear_titlefmt, variable_titlefmt=variable_titlefmt, pagefmt=template_name, subst=True, verifyDescription=False) if args.upload: uploadBot.run() elif args.dry_run: uploadBot.dry_run()
def main(args): """Main method.""" collection = ArchivesMetadataCollection() csv_file = 'Metadata_ArchivesNationales5.csv' collection.retrieve_metadata_from_csv(csv_file, delimiter=';') alignment_template = 'User:Jean-Frédéric/AlignmentRow'.encode('utf-8') if args.prepare_alignment: for key, value in collection.count_metadata_values().items(): collection.write_dict_as_wiki(value, key, 'wiki', alignment_template) if args.post_process: mapping_fields = [] mapper = commonprocessors.retrieve_metadata_alignments(mapping_fields, alignment_template) mapping_methods = { 'Support': commonprocessors.map_and_apply_technique(), 'Dimensions': (commonprocessors.parse_format, {}), 'Date': (commonprocessors.look_for_date, {}), 'Analyse': (commonprocessors.remove_linebreaks, {}), 'Cote du document': (commonprocessors.remove_linebreaks, {}), 'Titre': (commonprocessors.remove_linebreaks, {}) } categories_counter, categories_count_per_file = collection.post_process_collection(mapping_methods) metadata.categorisation_statistics(categories_counter, categories_count_per_file) template_name = 'Commons:Archives_Nationales/Ingestion'.encode('utf-8') front_titlefmt = "" variable_titlefmt = "%(Titre)s" rear_titlefmt = " - Archives Nationales - %(Cote du document)s" reader = iter(collection.records[2:]) uploadBot = DataIngestionBot(reader=iter(reader), front_titlefmt=front_titlefmt, rear_titlefmt=rear_titlefmt, variable_titlefmt=variable_titlefmt, pagefmt=template_name, subst=True, verifyDescription=False ) if args.upload: uploadBot.run() elif args.dry_run: uploadBot.dry_run()
def main(args): """Main method.""" collection = BertheleMetadataCollection() xml_file = 'Test-Export_Berthele.xml' collection.retrieve_metadata_from_xml(xml_file, 'DocsFigures') alignment_template = 'User:Jean-Frédéric/AlignmentRow'.encode('utf-8') if args.prepare_alignment: for key, value in collection.count_metadata_values().items(): collection.write_dict_as_wiki(value, key, 'wiki', alignment_template) if args.post_process: mapping_fields = ['geoname', 'persname', 'subject', 'corpname'] mapper = commonprocessors.retrieve_metadata_alignments(mapping_fields, alignment_template) mapping_methods = { 'Format': (processors.parse_format, {}), 'Analyse': (processors.look_for_date, {}), 'geoname': (commonprocessors.process_with_alignment, {'mapper': mapper}), 'persname': (commonprocessors.process_with_alignment, {'mapper': mapper}), 'subject': (commonprocessors.process_with_alignment, {'mapper': mapper}), 'corpname': (commonprocessors.process_with_alignment, {'mapper': mapper}), #'Technique': commonprocessors.map_and_apply_technique, } categories_counter, categories_count_per_file = collection.post_process_collection(mapping_methods) print metadata.categorisation_statistics(categories_counter, categories_count_per_file) reader = iter(collection.records) template_name = 'Commons:Batch_uploading/Fonds_Berthelé/Ingestion'.decode('utf-8').encode('utf-8') uploadBot = DataIngestionBot(reader=reader, front_titlefmt=front_titlefmt, rear_titlefmt=rear_titlefmt, variable_titlefmt=variable_titlefmt, pagefmt=template_name, verifyDescription=False) if args.upload: uploadBot.run() elif args.dry_run: string = StringIO() tdt = ['"%s": {"label": "%s"},' % (a, a) for a in collection.count_metadata_values().keys()] template_data = """<templatedata> { "description": "Ingestion template", "params": { %s } </templatedata> """ % '\n'.join(tdt) # print template_data mapping = ['"%s": ["%s"]' % (a, a) for a in collection.count_metadata_values().keys()] # print '{%s}' % ','.join(mapping) #string = StringIO() #collection.write_metadata_to_xml(string) #print string.getvalue() uploadBot.dry_run()
def main(args): """Main method.""" collection = MHIDFMetadataCollection() # csv_file = 'photographies-serie-monuments-historiques-1851-a-1914.csv' csv_file = 'error.csv' collection.retrieve_metadata_from_csv(csv_file, delimiter=';') alignment_template = 'User:Jean-Frédéric/AlignmentRow'.encode('utf-8') if args.prepare_alignment: for key, value in collection.count_metadata_values().items(): collection.write_dict_as_wiki(value, key, 'wiki', alignment_template) if args.post_process: mapping_fields = ['autp', 'datpv', 'edif', 'lieucor'] mapper = commonprocessors.retrieve_metadata_alignments(mapping_fields, alignment_template) mapping_methods = { 'wgs84': commonprocessors.split_and_keep_as_list(separator=','), 'lbase': look_for_MH_titles(separator=';'), 'autp': (commonprocessors.process_with_alignment, {'mapper': mapper}), 'datpv': (commonprocessors.process_with_alignment, {'mapper': mapper}), 'edif': (commonprocessors.process_with_alignment, {'mapper': mapper}), 'lieucor': (commonprocessors.process_with_alignment, {'mapper': mapper}), 'insee': insee_to_commonscat(), } categories_counter, categories_count_per_file = collection.post_process_collection(mapping_methods) print metadata.categorisation_statistics(categories_counter, categories_count_per_file) reader = iter(collection.records) template_name = 'User:Jean-Frédéric/MH_IDF/Ingestion'.encode('utf-8') uploadBot = DataIngestionBot(reader=reader, front_titlefmt=front_titlefmt, rear_titlefmt=rear_titlefmt, variable_titlefmt=variable_titlefmt, pagefmt=template_name) if args.upload: uploadBot.doSingle() elif args.dry_run: string = StringIO() collection.write_metadata_to_xml(string) print string.getvalue()
def main(args): """Main method.""" collection = MHNTMetadataCollection() files_path = os.path.abspath('./images/') collection.retrieve_metadata_from_files(files_path) #collection.write_metadata_to_csv(open("toto.csv", 'w')) alignment_template = 'User:Jean-Frédéric/AlignmentRow'.encode('utf-8') # #if args.make_alignment: # for key, value in collection.count_metadata_values().items(): # collection.write_dict_as_wiki(value, key, 'wiki', # alignment_template) if args.post_process: mapping_fields = ['by-line', 'keywords'] mapper = commonprocessors.retrieve_metadata_alignments(mapping_fields, alignment_template) mapping_methods = { 'by-line': (commonprocessors.process_with_alignment, {'mapper': mapper}), 'keywords': (commonprocessors.process_with_alignment_on_list, {'mapper': mapper}), 'caption/abstract': (process_caption, {}), } categories_counter, categories_count_per_file = collection.post_process_collection(mapping_methods) metadata.categorisation_statistics(categories_counter, categories_count_per_file) template_name = 'User:Jean-Frédéric/MHNT/Ingestion'.encode('utf-8') front_titlefmt = "" variable_titlefmt = "%(title)s" rear_titlefmt = " - Fonds Trutat - %(object name)s" reader = iter(collection.records) uploadBot = DataIngestionBot(reader=iter(reader), front_titlefmt=front_titlefmt, rear_titlefmt=rear_titlefmt, variable_titlefmt=variable_titlefmt, pagefmt=template_name, subst=True, verifyDescription=False ) if args.upload: uploadBot.run() elif args.dry_run: #for record in collection.records: # record.to_disk('%(Cote)s', 'toto') #s = open('filename.xml', 'w') #collection.write_metadata_to_xml(s) uploadBot.dry_run()
def process_caption(field, old_field_value): result = {field: old_field_value} (date, year) = commonprocessors.look_for_date_unwrapped(old_field_value) if date: result['date'] = date if year: result['year'] = year parsed_format = _parse_format(old_field_value) if parsed_format: result['parsed_format'] = parsed_format parsed_technique = _parse_technique(old_field_value) if parsed_technique: result['parsed_technique'] = parsed_technique return result
def _parse_format(text): """Parse stuff like format 6,5x9 cm""" format_pattern = re.compile(r""" (format)\s (?P<a>[\d,\.]+?) # Digits, comma or dot, captured as group x # x (?P<b>[\d,\.]+?) # Same \s?cm # Whitespace, cm """, re.X) match = re.search(format_pattern, text) if match: new_value = commonprocessors._pattern_to_size(match).strip() return new_value else: return None
def main(args): """Main method.""" collection = TrutatBisMetadataCollection() csv_file = 'metadata.csv' collection.retrieve_metadata_from_csv(csv_file, delimiter=';') alignment_template = 'User:Jean-Frédéric/AlignmentRow'.encode('utf-8') if args.post_process: mapping_fields = ['Support', 'Technique', 'Auteur', 'Places'] mapper = commonprocessors.retrieve_metadata_alignments(mapping_fields, alignment_template) mapping_methods = { 'Format': (processors.parse_format, {}), 'Analyse': (processors.look_for_date, {}), 'Auteur': (commonprocessors.process_with_alignment, {'mapper': mapper}), 'Support': (commonprocessors.process_with_alignment, {'mapper': mapper}), 'Technique': (commonprocessors.process_with_alignment, {'mapper': mapper}), 'Cote': (processors.match_identifier_to_categories, {'mapper': mapper}), } categories_counter, categories_count_per_file = collection.post_process_collection(mapping_methods) metadata.categorisation_statistics(categories_counter, categories_count_per_file) template_name = 'Commons:Batch_uploading/Fonds_Eugène_Trutat_bis/Ingestion'.decode('utf-8').encode('utf-8') front_titlefmt = "" variable_titlefmt = "%(Titre)s" rear_titlefmt = " - Fonds Trutat - %(Cote)s" reader = iter(reversed(collection.records)) reader = itertools.islice(reader, 72, 280) uploadBot = DataIngestionBot(reader=iter(reader), front_titlefmt=front_titlefmt, rear_titlefmt=rear_titlefmt, variable_titlefmt=variable_titlefmt, pagefmt=template_name, subst=True, verifyDescription=False ) if args.upload: pass #uploadBot.run() elif args.dry_run: #for record in collection.records: # record.to_disk('%(Cote)s', 'toto') s = open('filename.xml', 'w') collection.write_metadata_to_xml(s)