Ejemplo n.º 1
0
def main(args):
    """Main method."""
    collection = ChamplitteMetadataCollection()
    #    csv_file = 'DV5_M0354_2006_9.csv'
    csv_file = "2006_7.csv"
    collection.retrieve_metadata_from_csv(csv_file, delimiter=",")

    alignment_template = "User:Jean-Frédéric/AlignmentRow".encode("utf-8")

    if args.prepare_alignment:
        for key, value in collection.count_metadata_values().items():
            collection.write_dict_as_wiki(value, key, "wiki", alignment_template)

    if args.post_process:
        mapping_fields = []
        mapper = commonprocessors.retrieve_metadata_alignments(mapping_fields, alignment_template)

        mapping_methods = {
            #'JOCONDE_TECH': commonprocessors.map_and_apply_technique(separator=";"),
            "JOCONDE_DIMS": (commonprocessors.process_DIMS, {}),
            "JOCONDE_DOMN": commonprocessors.split_and_keep_as_list(separator=";"),
            "JOCONDE_DESC": commonprocessors.wrap_with_template(template="fr"),
            "JOCONDE_REF": commonprocessors.wrap_within_pattern(pattern="{{online databases|{{Joconde|%s}}}}"),
            "JOCONDE_DACQ": commonprocessors.wrap_within_pattern(
                pattern="{{ProvenanceEvent|time=%s|type=acquisition|newowner=Musées de la Haute-Saône}}"
            ),
            "JOCONDE_PERI": (commonprocessors.look_for_date, {}),
        }
        categories_counter, categories_count_per_file = collection.post_process_collection(mapping_methods)
        # metadata.categorisation_statistics(categories_counter, categories_count_per_file)

    template_name = "User:Jean-Frédéric/Champlitte/Ingestion".encode("utf-8")
    front_titlefmt = ""
    # variable_titlefmt = "%(JOCONDE_TITR)s (%(JOCONDE_DENO)s)"
    variable_titlefmt = "%(JOCONDE_DENO)s"
    rear_titlefmt = " - Musées de la Haute-Saône - %(JOCONDE_REF)s"
    reader = iter(collection.records)
    string = StringIO()
    collection.write_metadata_to_xml(string)
    print string.getvalue()
    uploadBot = DataIngestionBot(
        reader=reader,
        front_titlefmt=front_titlefmt,
        rear_titlefmt=rear_titlefmt,
        variable_titlefmt=variable_titlefmt,
        pagefmt=template_name,
        subst=False,
        verifyDescription=True,
    )
    if args.upload:
        uploadBot.doSingle()
    elif args.dry_run:
        uploadBot.dry_run()
Ejemplo n.º 2
0
def main(args):
    """Main method."""
    collection = ArchivesMetadataCollection()
    csv_file = 'Metadata_ArchivesNationales5.csv'
    collection.retrieve_metadata_from_csv(csv_file, delimiter=';')
    alignment_template = 'User:Jean-Frédéric/AlignmentRow'.encode('utf-8')

    if args.prepare_alignment:
        for key, value in collection.count_metadata_values().items():
            collection.write_dict_as_wiki(value, key, 'wiki',
                                          alignment_template)

    if args.post_process:
        mapping_fields = []
        mapper = commonprocessors.retrieve_metadata_alignments(
            mapping_fields, alignment_template)
        mapping_methods = {
            'Support': commonprocessors.map_and_apply_technique(),
            'Dimensions': (commonprocessors.parse_format, {}),
            'Date': (commonprocessors.look_for_date, {}),
            'Analyse': (commonprocessors.remove_linebreaks, {}),
            'Cote du document': (commonprocessors.remove_linebreaks, {}),
            'Titre': (commonprocessors.remove_linebreaks, {})
        }

        categories_counter, categories_count_per_file = collection.post_process_collection(
            mapping_methods)
        metadata.categorisation_statistics(categories_counter,
                                           categories_count_per_file)

    template_name = 'Commons:Archives_Nationales/Ingestion'.encode('utf-8')
    front_titlefmt = ""
    variable_titlefmt = "%(Titre)s"
    rear_titlefmt = " - Archives Nationales - %(Cote du document)s"
    reader = iter(collection.records[2:])
    uploadBot = DataIngestionBot(reader=iter(reader),
                                 front_titlefmt=front_titlefmt,
                                 rear_titlefmt=rear_titlefmt,
                                 variable_titlefmt=variable_titlefmt,
                                 pagefmt=template_name,
                                 subst=True,
                                 verifyDescription=False)

    if args.upload:
        uploadBot.run()
    elif args.dry_run:
        uploadBot.dry_run()
def main(args):
    """Main method."""
    collection = ArchivesMetadataCollection()
    csv_file = 'Metadata_ArchivesNationales5.csv'
    collection.retrieve_metadata_from_csv(csv_file, delimiter=';')
    alignment_template = 'User:Jean-Frédéric/AlignmentRow'.encode('utf-8')

    if args.prepare_alignment:
        for key, value in collection.count_metadata_values().items():
            collection.write_dict_as_wiki(value, key, 'wiki',
                                          alignment_template)

    if args.post_process:
        mapping_fields = []
        mapper = commonprocessors.retrieve_metadata_alignments(mapping_fields,
                                                               alignment_template)
        mapping_methods = {
        'Support': commonprocessors.map_and_apply_technique(),
        'Dimensions': (commonprocessors.parse_format, {}),
        'Date': (commonprocessors.look_for_date, {}),
        'Analyse': (commonprocessors.remove_linebreaks, {}),
        'Cote du document': (commonprocessors.remove_linebreaks, {}),
        'Titre': (commonprocessors.remove_linebreaks, {})
        }

        categories_counter, categories_count_per_file = collection.post_process_collection(mapping_methods)
        metadata.categorisation_statistics(categories_counter, categories_count_per_file)


    template_name = 'Commons:Archives_Nationales/Ingestion'.encode('utf-8')
    front_titlefmt = ""
    variable_titlefmt = "%(Titre)s"
    rear_titlefmt = " - Archives Nationales - %(Cote du document)s"
    reader = iter(collection.records[2:])
    uploadBot = DataIngestionBot(reader=iter(reader),
                                 front_titlefmt=front_titlefmt,
                                 rear_titlefmt=rear_titlefmt,
                                 variable_titlefmt=variable_titlefmt,
                                 pagefmt=template_name,
                                 subst=True,
                                 verifyDescription=False
                                 )

    if args.upload:
        uploadBot.run()
    elif args.dry_run:
        uploadBot.dry_run()
Ejemplo n.º 4
0
def main(args):
    """Main method."""
    collection = BertheleMetadataCollection()
    xml_file = 'Test-Export_Berthele.xml'
    collection.retrieve_metadata_from_xml(xml_file, 'DocsFigures')
    alignment_template = 'User:Jean-Frédéric/AlignmentRow'.encode('utf-8')

    if args.prepare_alignment:
        for key, value in collection.count_metadata_values().items():
            collection.write_dict_as_wiki(value, key, 'wiki',
                                          alignment_template)

    if args.post_process:
        mapping_fields = ['geoname', 'persname', 'subject', 'corpname']
        mapper = commonprocessors.retrieve_metadata_alignments(mapping_fields,
                                                               alignment_template)
        mapping_methods = {
            'Format': (processors.parse_format, {}),
            'Analyse': (processors.look_for_date, {}),
            'geoname': (commonprocessors.process_with_alignment, {'mapper': mapper}),
            'persname': (commonprocessors.process_with_alignment, {'mapper': mapper}),
            'subject': (commonprocessors.process_with_alignment, {'mapper': mapper}),
            'corpname': (commonprocessors.process_with_alignment, {'mapper': mapper}),
            #'Technique': commonprocessors.map_and_apply_technique,
            }
        categories_counter, categories_count_per_file = collection.post_process_collection(mapping_methods)
        print metadata.categorisation_statistics(categories_counter, categories_count_per_file)

        reader = iter(collection.records)
        template_name = 'Commons:Batch_uploading/Fonds_Berthelé/Ingestion'.decode('utf-8').encode('utf-8')
        uploadBot = DataIngestionBot(reader=reader,
                                     front_titlefmt=front_titlefmt,
                                     rear_titlefmt=rear_titlefmt,
                                     variable_titlefmt=variable_titlefmt,
                                     pagefmt=template_name,
                                     verifyDescription=False)
    if args.upload:
        uploadBot.run()
    elif args.dry_run:
        string = StringIO()

        tdt = ['"%s": {"label": "%s"},' % (a, a) for a in collection.count_metadata_values().keys()]
        template_data = """<templatedata>   
{
    "description": "Ingestion template",
    "params": {
    %s
}
</templatedata>
""" % '\n'.join(tdt)
        # print template_data

        mapping = ['"%s": ["%s"]' % (a, a) for a in collection.count_metadata_values().keys()]
        # print '{%s}' % ','.join(mapping)

        #string = StringIO()
        #collection.write_metadata_to_xml(string)
        #print string.getvalue()
        uploadBot.dry_run()
Ejemplo n.º 5
0
def main(args):
    """Main method."""
    collection = MHIDFMetadataCollection()
#    csv_file = 'photographies-serie-monuments-historiques-1851-a-1914.csv'
    csv_file = 'error.csv'    
    collection.retrieve_metadata_from_csv(csv_file, delimiter=';')
    alignment_template = 'User:Jean-Frédéric/AlignmentRow'.encode('utf-8')

    if args.prepare_alignment:
        for key, value in collection.count_metadata_values().items():
            collection.write_dict_as_wiki(value, key, 'wiki',
                                          alignment_template)

    if args.post_process:
        mapping_fields = ['autp', 'datpv', 'edif', 'lieucor']
        mapper = commonprocessors.retrieve_metadata_alignments(mapping_fields,
                                                               alignment_template)
        mapping_methods = {
            'wgs84': commonprocessors.split_and_keep_as_list(separator=','),
            'lbase': look_for_MH_titles(separator=';'),
            'autp': (commonprocessors.process_with_alignment, {'mapper': mapper}),
            'datpv': (commonprocessors.process_with_alignment, {'mapper': mapper}),
            'edif': (commonprocessors.process_with_alignment, {'mapper': mapper}),
            'lieucor': (commonprocessors.process_with_alignment, {'mapper': mapper}),
            'insee': insee_to_commonscat(),
            }
        categories_counter, categories_count_per_file = collection.post_process_collection(mapping_methods)
        print metadata.categorisation_statistics(categories_counter, categories_count_per_file)

        reader = iter(collection.records)
        template_name = 'User:Jean-Frédéric/MH_IDF/Ingestion'.encode('utf-8')
        uploadBot = DataIngestionBot(reader=reader,
                                     front_titlefmt=front_titlefmt,
                                     rear_titlefmt=rear_titlefmt,
                                     variable_titlefmt=variable_titlefmt,
                                     pagefmt=template_name)

    if args.upload:
        uploadBot.doSingle()
    elif args.dry_run:
        string = StringIO()
        collection.write_metadata_to_xml(string)
        print string.getvalue()
Ejemplo n.º 6
0
def main(args):
    """Main method."""
    collection = MHNTMetadataCollection()
    files_path = os.path.abspath('./images/')
    collection.retrieve_metadata_from_files(files_path)
    #collection.write_metadata_to_csv(open("toto.csv", 'w'))
    
    alignment_template = 'User:Jean-Frédéric/AlignmentRow'.encode('utf-8')
    #
    #if args.make_alignment:
    #    for key, value in collection.count_metadata_values().items():
    #        collection.write_dict_as_wiki(value, key, 'wiki',
    #                                      alignment_template)

    if args.post_process:
        mapping_fields = ['by-line', 'keywords']
        mapper = commonprocessors.retrieve_metadata_alignments(mapping_fields,
                                                               alignment_template)
        mapping_methods = {
            'by-line': (commonprocessors.process_with_alignment, {'mapper': mapper}),
            'keywords': (commonprocessors.process_with_alignment_on_list, {'mapper': mapper}),
            'caption/abstract': (process_caption, {}),
            

        }
        categories_counter, categories_count_per_file = collection.post_process_collection(mapping_methods)
        metadata.categorisation_statistics(categories_counter, categories_count_per_file)

        template_name = 'User:Jean-Frédéric/MHNT/Ingestion'.encode('utf-8')
        front_titlefmt = ""
        variable_titlefmt = "%(title)s"
        rear_titlefmt = " - Fonds Trutat - %(object name)s"

        reader = iter(collection.records)
        uploadBot = DataIngestionBot(reader=iter(reader),
                                     front_titlefmt=front_titlefmt,
                                     rear_titlefmt=rear_titlefmt,
                                     variable_titlefmt=variable_titlefmt,
                                     pagefmt=template_name,
                                     subst=True,
                                     verifyDescription=False
                                     )

    if args.upload:
        uploadBot.run()
    elif args.dry_run:
        #for record in collection.records:
        #    record.to_disk('%(Cote)s', 'toto')
        #s = open('filename.xml', 'w')
        #collection.write_metadata_to_xml(s)
        uploadBot.dry_run()
Ejemplo n.º 7
0
def process_caption(field, old_field_value):
    result = {field: old_field_value}
    (date, year) = commonprocessors.look_for_date_unwrapped(old_field_value)
    if date:
        result['date'] = date
    if year:
        result['year'] = year
    parsed_format = _parse_format(old_field_value)
    if parsed_format:
        result['parsed_format'] = parsed_format
    parsed_technique = _parse_technique(old_field_value)
    if parsed_technique:
        result['parsed_technique'] = parsed_technique
    return result
Ejemplo n.º 8
0
def _parse_format(text):
    """Parse stuff like format 6,5x9 cm"""
    format_pattern = re.compile(r"""
        (format)\s
        (?P<a>[\d,\.]+?)   # Digits, comma or dot, captured as group
        x                  # x
        (?P<b>[\d,\.]+?)   # Same
        \s?cm              # Whitespace, cm
        """, re.X)
    match = re.search(format_pattern, text)
    if match:
        new_value = commonprocessors._pattern_to_size(match).strip()
        return new_value
    else:
        return None
Ejemplo n.º 9
0
def main(args):
    """Main method."""
    collection = TrutatBisMetadataCollection()
    csv_file = 'metadata.csv'
    collection.retrieve_metadata_from_csv(csv_file, delimiter=';')

    alignment_template = 'User:Jean-Frédéric/AlignmentRow'.encode('utf-8')

    if args.post_process:
        mapping_fields = ['Support', 'Technique', 'Auteur', 'Places']
        mapper = commonprocessors.retrieve_metadata_alignments(mapping_fields,
                                                               alignment_template)
        mapping_methods = {
            'Format': (processors.parse_format, {}),
            'Analyse': (processors.look_for_date, {}),
            'Auteur': (commonprocessors.process_with_alignment, {'mapper': mapper}),
            'Support': (commonprocessors.process_with_alignment, {'mapper': mapper}),
            'Technique': (commonprocessors.process_with_alignment, {'mapper': mapper}),
            'Cote': (processors.match_identifier_to_categories, {'mapper': mapper}),
        }
        categories_counter, categories_count_per_file = collection.post_process_collection(mapping_methods)
        metadata.categorisation_statistics(categories_counter, categories_count_per_file)

    template_name = 'Commons:Batch_uploading/Fonds_Eugène_Trutat_bis/Ingestion'.decode('utf-8').encode('utf-8')
    front_titlefmt = ""
    variable_titlefmt = "%(Titre)s"
    rear_titlefmt = " - Fonds Trutat - %(Cote)s"
    reader = iter(reversed(collection.records))
    reader = itertools.islice(reader, 72, 280)
    uploadBot = DataIngestionBot(reader=iter(reader),
                                 front_titlefmt=front_titlefmt,
                                 rear_titlefmt=rear_titlefmt,
                                 variable_titlefmt=variable_titlefmt,
                                 pagefmt=template_name,
                                 subst=True,
                                 verifyDescription=False
                                 )
    if args.upload:
        pass
        #uploadBot.run()
    elif args.dry_run:
        #for record in collection.records:
        #    record.to_disk('%(Cote)s', 'toto')
        s = open('filename.xml', 'w')
        collection.write_metadata_to_xml(s)