def iso2tables_old(master, entry_filename, rec_format, id_traitement): with open(entry_filename, 'rb') as fh: collection = mc.MARCReader(fh) collection.force_utf8 = True try: for record in collection: record2listemetas(record, rec_format) except mc.exceptions.RecordLengthInvalid as err: print("\n\n/*---------------------------------------------*\n\n") print(main.errors["pb_input_utf8"]) print(err) print("\n\n*------------------------------------------------*/") main.popup_errors( master, main.errors["pb_input_utf8_marcEdit"], "Aide en ligne : conversion iso2709 > XML", "https://github.com/Transition-bibliographique/bibliostratus/wiki/1-%5BBleu%5D-Pr%C3%A9parer-ses-donn%C3%A9es-pour-l'alignement-%C3%A0-partir-d'un-export-catalogue#un-probl%C3%A8me-dencodage--passez-en-xml-avec-marcedit" ) except UnicodeDecodeError as err: print("\n\n/*---------------------------------------------*\n\n") print(main.errors["pb_input_utf8"]) print(err) print("\n\n*------------------------------------------------*/") main.popup_errors( master, main.errors["pb_input_utf8_marcEdit"], "Aide en ligne : conversion iso2709 > XML", "https://github.com/Transition-bibliographique/bibliostratus/wiki/1-%5BBleu%5D-Pr%C3%A9parer-ses-donn%C3%A9es-pour-l'alignement-%C3%A0-partir-d'un-export-catalogue#un-probl%C3%A8me-dencodage--passez-en-xml-avec-marcedit" )
def align_from_bib(form, entry_filename, liste_reports, parametres): """Alignement de ses données d'autorité avec les autorités BnF à partir d'une extraction de sa base bibliographique (métadonnées BIB + Nom, prénom et dates de l'auteur)""" header_columns = [ "NumNot", "nbARK", "ark AUT trouvé", "ark BIB initial", "frbnf BIB initial", "Titre", "ISNI", "Nom", "Complément nom", "dates Auteur" ] if (parametres['meta_bnf'] == 1): header_columns.extend( ["[BnF] Nom", "[BnF] Complément Nom", "[BnF] Dates"]) if (parametres['file_nb'] == 1): row2file(header_columns, liste_reports) elif (parametres['file_nb '] == 2): row2files(header_columns, liste_reports) n = 0 with open(entry_filename, newline='\n', encoding="utf-8") as csvfile: entry_file = csv.reader(csvfile, delimiter='\t') if (parametres['headers']): try: next(entry_file) except UnicodeDecodeError: main.popup_errors( form, main.errors["pb_input_utf8"], "Comment modifier l'encodage du fichier", "https://github.com/Transition-bibliographique/bibliostratus/wiki/2-%5BBlanc%5D-:-alignement-des-donn%C3%A9es-bibliographiques-avec-la-BnF#erreur-dencodage-dans-le-fichier-en-entr%C3%A9e" ) for row in entry_file: align_from_bib_item(row, n, form, parametres, liste_reports) n += 1
def iso2tables(master, entry_filename, rec_format, id_traitement): input_file = open(entry_filename, 'r', encoding="utf-8").read().split(u'\u001D')[0:-1] temp_list = [el + u'\u001D' for el in input_file] for rec in temp_list: outputfilename = "temp_record.txt" outputfile = open(outputfilename, "w", encoding="utf-8") outputfile.write(rec) outputfile.close() with open(outputfilename, 'rb') as fh: collection = mc.MARCReader(fh) collection.force_utf8 = True try: for record in collection: print(record2meta(record, ["001"])) record2listemetas(record, rec_format) except mc.exceptions.RecordLengthInvalid as err: NumNot = record2meta(record, ["001"]) liste_notices_pb_encodage.append(NumNot) pass except UnicodeDecodeError as err: NumNot = record2meta(record, ["001"]) liste_notices_pb_encodage.append(NumNot) pass try: os.remove("temp_record.txt") except FileNotFoundError as err: main.popup_errors(master, main.errors["format_fichier_en_entree"])
def launch(entry_filename, file_format, rec_format, output_ID, master=None, form=None): """Lancement du programme après validation du formulaire de conversion d 'un fichier MARC en tableaux""" if entry_filename == []: main.popup_errors(form, "Merci d'indiquer un nom de fichier en entrée") raise else: entry_filename = entry_filename[0] try: [entry_filename, file_format, rec_format, output_ID] = [ str(entry_filename), int(file_format), int(rec_format), str(output_ID) ] except ValueError as err: print("\n\nDonnées en entrée erronées\n") print(err) main.check_file_name(form, entry_filename) # popup_en_cours = main.message_programme_en_cours(form) # Notices BIB : Type de document / type de notice if (rec_format == 1): for doct in doctype: for recordt in recordtype: dcrec = doct + recordt doct_libelle = doct if (doct in doctype): doct_libelle = doctype[doct] recordt_libelle = recordt if (recordt in recordtype): recordt_libelle = recordtype[recordt] dcrec_libelles = "-".join([doct_libelle, recordt_libelle]) doc_record_type[dcrec] = dcrec_libelles # Notices AUT : type d'autorité else: doct = "c" for recordt in recordtypeAUT: dcrec = doct + recordt doct_libelle = doct if (doct in doctypeAUT): doct_libelle = doctypeAUT[doct] recordt_libelle = recordt if (recordt in recordtypeAUT): recordt_libelle = recordtypeAUT[recordt] dcrec_libelles = "-".join([doct_libelle, recordt_libelle]) doc_record_type[dcrec] = dcrec_libelles print("Fichier en entrée : ", entry_filename) if (file_format == 1 or file_format == 2): iso2tables(master, entry_filename, file_format, rec_format, output_ID) else: xml2tables(master, entry_filename, rec_format, output_ID) end_of_treatments(form, output_ID)
def check_nb_colonnes(row, parametres, frame_master): """ Vérifie s'il y a bien dans le fichier le nombre de colonnes indiquées dans le formulaire """ nbcol = len(row) if parametres["correct_record_option"] != nbcol: alert = f"""Erreur dans les paramètres en entrée : Nombre de colonnes dans le fichier : {nbcol} Nombre de colonnes indiqué : {parametres["correct_record_option"]}""" main.popup_errors(frame_master, alert)
def iso2tables(master, entry_filename, id_traitement): with open(entry_filename, 'rb') as fh: collection = mc.MARCReader(fh) collection.force_utf8 = True try: for record in collection: record2listemetas(record) except mc.exceptions.RecordLengthInvalid: print("\n\n/*---------------------------------------------*\n\n") print(main.errors["pb_input_utf8"]) print("\n\n*------------------------------------------------*/") main.popup_errors(master, main.errors["pb_input_utf8"])
def test_encoding_file(master, entry_filename, encoding): test = True input_file = "" try: input_file = open(entry_filename, 'r', encoding=encoding).read().split(u'\u001D')[0:-1] except ValueError as err: if ("base 10" in str(err)): print( "Encodage UTF-8 BOM -> convertir le fichier en UTF8 sans BOM") except UnicodeDecodeError: main.popup_errors(master, main.errors["format_fichier_en_entree"]) return (test, input_file)
def test_encoding_file(master, entry_filename, encoding, file_format): test = True input_file = "" if (file_format == 1): file = open(entry_filename, "rb").read() if (len(file[0:3].decode(encoding)) == 1): file = file[3:] entry_filename = "temp_file_sans_bom.txt" temp_file = open("temp_file_sans_bom.txt", "wb") temp_file.write(file) temp_file.close() try: input_file = open(entry_filename, 'r', encoding=encoding).read().split(u'\u001D')[0:-1] except UnicodeDecodeError: main.popup_errors(master, main.errors["format_fichier_en_entree"]) try: os.remove("temp_file_sans_bom.txt") except FileNotFoundError: print("Fichier temporaire UTF8-sans BOM inutile") return (test, input_file)
def launch(form, entry_filename, headers, input_data_type, isni_option, file_nb, id_traitement, meta_bnf): #main.check_file_name(entry_filename) #results2file(nb_fichiers_a_produire) parametres = { "headers": headers, "input_data_type": input_data_type, "isni_option": isni_option, "file_nb": file_nb, "meta_bnf": meta_bnf, "id_traitement": id_traitement } liste_reports = create_reports(id_traitement, file_nb) if (input_data_type == 1): align_from_aut(form, entry_filename, liste_reports, parametres) elif (input_data_type == 2): align_from_bib(form, entry_filename, liste_reports, parametres) else: main.popup_errors("Format en entrée non défini") bib2ark.fin_traitements(form, liste_reports, nb_notices_nb_ARK)
def launch(filename, type_records_form, correct_record_option, headers, AUTlieesAUT, AUTlieesSUB, AUTlieesWORK, outputID, format_records=1, format_file=1, xml_encoding_option="utf-8", select_fields="", master=None, form=None): if filename == []: main.popup_errors(form, "Merci d'indiquer un nom de fichier en entrée") raise else: filename = filename[0] try: [ filename, type_records_form, correct_record_option, headers, AUTlieesAUT, AUTlieesSUB, AUTlieesWORK, outputID, format_records, format_file, xml_encoding_option, select_fields ] = [ str(filename), int(type_records_form), int(correct_record_option), int(headers), int(AUTlieesAUT), int(AUTlieesSUB), int(AUTlieesWORK), str(outputID), int(format_records), int(format_file), str(xml_encoding_option), str(select_fields) ] except ValueError as err: print("\n\nDonnées en entrée erronées\n") print(err) AUTliees = AUTlieesAUT + AUTlieesSUB + AUTlieesWORK format_BIB = dict_format_records[format_records] outputID = funcs.id_traitement2path(outputID) type_records = "bib" if (type_records_form == 2): type_records = "aut" parametres = { "headers": headers, "type_records": type_records, "correct_record_option": correct_record_option, "type_records_form": type_records_form, "AUTliees": AUTliees, "AUTlieesAUT": AUTlieesAUT, "AUTlieesSUB": AUTlieesSUB, "AUTlieesWORK": AUTlieesWORK, "outputID": outputID, "format_records": format_records, "format_file": format_file, "xml_encoding_option": xml_encoding_option, "format_BIB": format_BIB, "select_fields": select_fields, "listeARK_BIB": [], "listeNNA_AUT": [] } files = {} main.generic_input_controls(master, filename) bib_file = file_create(type_records, parametres) files["bib_file"] = bib_file if (parametres["AUTliees"] > 0): aut_file = file_create("aut", parametres) files["aut_file"] = aut_file file2extract(filename, parametres, files, master, form) file_fin(files["bib_file"], format_file) if (AUTliees == 1): file_fin(files['aut_file'], format_file) fin_traitements(form, outputID)
def launch(filename, type_records_form, correct_record_option, headers, AUTlieesAUT, AUTlieesSUB, AUTlieesWORK, outputID, format_records=1, format_file=1, xml_encoding_option="utf-8", select_fields="", master=None, form=None): if filename == []: main.popup_errors(form, "Merci d'indiquer un nom de fichier en entrée") raise else: filename = filename[0] try: [ filename, type_records_form, correct_record_option, headers, AUTlieesAUT, AUTlieesSUB, AUTlieesWORK, outputID, format_records, format_file, xml_encoding_option, select_fields ] = [ str(filename), int(type_records_form), int(correct_record_option), int(headers), int(AUTlieesAUT), int(AUTlieesSUB), int(AUTlieesWORK), str(outputID), int(format_records), int(format_file), str(xml_encoding_option), str(select_fields) ] except ValueError as err: print("\n\nDonnées en entrée erronées\n") print(err) AUTliees = AUTlieesAUT + AUTlieesSUB + AUTlieesWORK format_BIB = dict_format_records[format_records] outputID = funcs.id_traitement2path(outputID) type_records = "bib" if (type_records_form == 2): type_records = "aut" parametres = { "type_records": type_records, "correct_record_option": correct_record_option, "type_records_form": type_records_form, "AUTliees": AUTliees, "AUTlieesAUT": AUTlieesAUT, "AUTlieesSUB": AUTlieesSUB, "AUTlieesWORK": AUTlieesWORK, "outputID": outputID, "format_records": format_records, "format_file": format_file, "xml_encoding_option": xml_encoding_option, "format_BIB": format_BIB, "select_fields": select_fields, "listeARK_BIB": [], "listeNNA_AUT": [] } main.generic_input_controls(master, filename) bib_file = file_create(type_records, parametres) parametres["bib_file"] = bib_file if (parametres["AUTliees"] > 0): aut_file = file_create("aut", parametres) parametres["aut_file"] = aut_file with open(filename, newline='\n', encoding="utf-8") as csvfile: entry_file = csv.reader(csvfile, delimiter='\t') if headers: next(entry_file, None) j = 0 for row in entry_file: if j == 0: check_nb_colonnes(row, parametres, master) extract1record(row, j, form, headers, parametres) j = j + 1 file_fin(bib_file, format_file) if (AUTliees == 1): file_fin(aut_file, format_file) fin_traitements(form, outputID)