def validate_identifier(identifier): errors = [] if identifier: if identifier["id_type"] == "issn": try: issn.validate(identifier["value"]) except: errors.append("Invalid ISSN: {}".format(identifier["value"])) elif identifier["id_type"] == "isbn": try: isbn.validate(identifier["value"]) except: errors.append("Invalid ISBN: {}".format(identifier["value"])) elif identifier["id_type"] == "isan": try: isan.validate(identifier["value"]) except: errors.append("Invalid ISAN: {}".format(identifier["value"])) elif identifier["id_type"] == "ismn": try: ismn.validate(identifier["value"]) except: errors.append("Invalid ISMN: {}".format(identifier["value"])) elif identifier["id_type"] == "ean": try: ean.validate(identifier["value"]) except: errors.append("Invalid EAN: {}".format(identifier["value"])) return errors
def _deserialize(self, value, attr, data, **kwargs): try: issn.validate(value) return issn.format(value) except (InvalidChecksum, InvalidLength, InvalidFormat, InvalidComponent) as e: raise ValidationError(str(e)) except: raise ValidationError(f"Bad format {value}")
def validate_issn(issn): """ Raise an ValidationException if issn not valid """ if issn: try: issn_checker.validate(issn) except InvalidChecksum: raise ValidationError('ISSN invalid checksum: %(issn)s', code='invalid', params={'issn': issn}) except InvalidLength: raise ValidationError('ISSN invalid length: %(issn)s', code='invalid', params={'issn': issn}) except InvalidFormat: raise ValidationError('ISSN invalid format: %(issn)s', code='invalid', params={'issn': issn})
def validate_perios(documents, csv_file_path): if documents: source = "Serials Solutions" rec_id_prefix = "" issn_duplicated = {} # restore of the previous state previously_dict = {} if os.path.isfile(csv_file_path): with open(csv_file_path, "rb") as csv_file: csvreader = csv.DictReader(csv_file, delimiter=',') for csvdict in csvreader: previously_dict[csvdict["rec_id"]] = csvdict["rec_id"] with open(csv_file_path, "wb") as csv_file: fieldnames = ["rec_id", "rec_type", "title_non_sort", "title", "title_sub", "issn", "issn_status", "rel_eissn", "rel_response", "856_1_u", "856_1_status", "856_2_u", "856_2_status", "856_3_u", "856_3_status", "856_4_u", "856_4_status", "856_5_u", "856_5_status", "856_6_u", "856_6_status", "856_7_u", "856_7_status", "856_8_u", "856_8_status", "856_9_u", "856_9_status", "856_10_u", "856_10_status", "856_11_u", "856_11_status", "856_12_u", "856_12_status", "856_13_u", "856_13_status", "856_14_u", "856_14_status", "856_15_u", "856_15_status", "856_16_u", "856_16_status", "856_17_u", "856_17_status", "856_18_u", "856_18_status", "856_19_u", "856_19_status", "856_20_u", "856_20_status", "856_21_u", "856_21_status", "856_22_u", "856_22_status", "856_23_u", "856_23_status", "856_24_u", "856_24_status", "856_25_u", "856_25_status"] csvwriter = csv.DictWriter(csv_file, delimiter=',', fieldnames=fieldnames) csvwriter.writeheader() for index, document in enumerate(documents): rec_id = document["rec_id"] if rec_id in previously_dict: logging.info("# Document with index: {} and rec_id: {} - Previously verified".format(index, rec_id)) else: logging.info("# Document index: {} and rec_id: {} - Starting verification".format(index, rec_id)) csvdict = {} csvdict["rec_id"] = document["rec_id"] #logging.debug(csvdict["rec_id"]) csvdict["rec_type"] = document["rec_type"] if "title_non_sort" in document: csvdict["title_non_sort"] = document["title_non_sort"] if "title" in document: csvdict["title"] = document["title"] if "title_sub" in document: csvdict["title_sub"] = document["title_sub"] if "identifiers" in document: for identifier in document["identifiers"]: if identifier["id_type"] == "issn": csvdict["issn"] = identifier["value"] try: issn.validate(identifier["value"]) if identifier["value"] in issn_duplicated: csvdict["issn_status"] = "DUPLICATED" else: issn_duplicated[identifier["value"]] = "" csvdict["issn_status"] = "OK" except: csvdict["issn_status"] = "INVALID" break if "issn" not in csvdict: csvdict["issn_status"] = "EMPTY" # 856 : list, status if "resources" in document: for i, resource in enumerate(document["resources"]): if "url" in resource: csvdict["856_" + str(i+1) + "_u"] = resource["url"] # test URL res_dict = resource_service.fetch_url(resource["url"])[0] if res_dict["error"]: csvdict["856_" + str(i+1) + "_status"] = "ERROR" else: csvdict["856_" + str(i+1) + "_status"] = "OK" else: csvdict["856_" + str(i+1) + "_u"] = "EMPTY" csvdict["856_" + str(i+1) + "_status"] = "EMPTY" # revues en ligne / openurl if csvdict["issn_status"] == "OK": openurl_response = openurl_client.request_periodical_by_issn(csvdict["issn"]) if openurl_response is not None: openurl_documents = openurl_crosswalk.openurl_xmletree_to_metajson_list(openurl_response, source, rec_id_prefix, True) if openurl_documents: openurl_document = openurl_documents[0] if "identifiers" in openurl_document: for identifier in openurl_document["identifiers"]: if identifier["id_type"] == "eissn": csvdict["rel_eissn"] = identifier["value"] break if "resources" in openurl_document: rel_response = [] for resource in openurl_document["resources"]: if rel_response: rel_response.append("\n") if "institution_name" in resource: rel_response.append(resource["institution_name"]) if "service_name" in resource: rel_response.append(" - ") rel_response.append(resource["service_name"]) if "period_begin" in resource or "period_end" in resource: rel_response.append(" (") if "period_begin" in resource: rel_response.append(resource["period_begin"]) else: rel_response.append("....") if "period_end" in resource: rel_response.append(" - ") rel_response.append(resource["period_end"]) else: rel_response.append(" - ....") rel_response.append(")") if rel_response: csvdict["rel_response"] = "".join(rel_response) csvwriter.writerow(csvdict)
def transform(self, data): try: return issn.validate(data) except: pass
def validate_perios(documents, csv_file_path): if documents: source = "Serials Solutions" rec_id_prefix = "" issn_duplicated = {} # restore of the previous state previously_dict = {} if os.path.isfile(csv_file_path): with open(csv_file_path, "rb") as csv_file: csvreader = csv.DictReader(csv_file, delimiter=',') for csvdict in csvreader: previously_dict[csvdict["rec_id"]] = csvdict["rec_id"] with open(csv_file_path, "wb") as csv_file: fieldnames = [ "rec_id", "rec_type", "title_non_sort", "title", "title_sub", "issn", "issn_status", "rel_eissn", "rel_response", "856_1_u", "856_1_status", "856_2_u", "856_2_status", "856_3_u", "856_3_status", "856_4_u", "856_4_status", "856_5_u", "856_5_status", "856_6_u", "856_6_status", "856_7_u", "856_7_status", "856_8_u", "856_8_status", "856_9_u", "856_9_status", "856_10_u", "856_10_status", "856_11_u", "856_11_status", "856_12_u", "856_12_status", "856_13_u", "856_13_status", "856_14_u", "856_14_status", "856_15_u", "856_15_status", "856_16_u", "856_16_status", "856_17_u", "856_17_status", "856_18_u", "856_18_status", "856_19_u", "856_19_status", "856_20_u", "856_20_status", "856_21_u", "856_21_status", "856_22_u", "856_22_status", "856_23_u", "856_23_status", "856_24_u", "856_24_status", "856_25_u", "856_25_status" ] csvwriter = csv.DictWriter(csv_file, delimiter=',', fieldnames=fieldnames) csvwriter.writeheader() for index, document in enumerate(documents): rec_id = document["rec_id"] if rec_id in previously_dict: logging.info( "# Document with index: {} and rec_id: {} - Previously verified" .format(index, rec_id)) else: logging.info( "# Document index: {} and rec_id: {} - Starting verification" .format(index, rec_id)) csvdict = {} csvdict["rec_id"] = document["rec_id"] #logging.debug(csvdict["rec_id"]) csvdict["rec_type"] = document["rec_type"] if "title_non_sort" in document: csvdict["title_non_sort"] = document["title_non_sort"] if "title" in document: csvdict["title"] = document["title"] if "title_sub" in document: csvdict["title_sub"] = document["title_sub"] if "identifiers" in document: for identifier in document["identifiers"]: if identifier["id_type"] == "issn": csvdict["issn"] = identifier["value"] try: issn.validate(identifier["value"]) if identifier["value"] in issn_duplicated: csvdict["issn_status"] = "DUPLICATED" else: issn_duplicated[ identifier["value"]] = "" csvdict["issn_status"] = "OK" except: csvdict["issn_status"] = "INVALID" break if "issn" not in csvdict: csvdict["issn_status"] = "EMPTY" # 856 : list, status if "resources" in document: for i, resource in enumerate(document["resources"]): if "url" in resource: csvdict["856_" + str(i + 1) + "_u"] = resource["url"] # test URL res_dict = resource_service.fetch_url( resource["url"])[0] if res_dict["error"]: csvdict["856_" + str(i + 1) + "_status"] = "ERROR" else: csvdict["856_" + str(i + 1) + "_status"] = "OK" else: csvdict["856_" + str(i + 1) + "_u"] = "EMPTY" csvdict["856_" + str(i + 1) + "_status"] = "EMPTY" # revues en ligne / openurl if csvdict["issn_status"] == "OK": openurl_response = openurl_client.request_periodical_by_issn( csvdict["issn"]) if openurl_response is not None: openurl_documents = openurl_crosswalk.openurl_xmletree_to_metajson_list( openurl_response, source, rec_id_prefix, True) if openurl_documents: openurl_document = openurl_documents[0] if "identifiers" in openurl_document: for identifier in openurl_document[ "identifiers"]: if identifier["id_type"] == "eissn": csvdict["rel_eissn"] = identifier[ "value"] break if "resources" in openurl_document: rel_response = [] for resource in openurl_document[ "resources"]: if rel_response: rel_response.append("\n") if "institution_name" in resource: rel_response.append( resource["institution_name"]) if "service_name" in resource: rel_response.append(" - ") rel_response.append( resource["service_name"]) if "period_begin" in resource or "period_end" in resource: rel_response.append(" (") if "period_begin" in resource: rel_response.append( resource["period_begin"]) else: rel_response.append("....") if "period_end" in resource: rel_response.append(" - ") rel_response.append( resource["period_end"]) else: rel_response.append(" - ....") rel_response.append(")") if rel_response: csvdict["rel_response"] = "".join( rel_response) csvwriter.writerow(csvdict)