def validate_identifier(identifier):
    errors = []
    if identifier:
        if identifier["id_type"] == "issn":
            try:
                issn.validate(identifier["value"])
            except:
                errors.append("Invalid ISSN: {}".format(identifier["value"]))
        elif identifier["id_type"] == "isbn":
            try:
                isbn.validate(identifier["value"])
            except:
                errors.append("Invalid ISBN: {}".format(identifier["value"]))
        elif identifier["id_type"] == "isan":
            try:
                isan.validate(identifier["value"])
            except:
                errors.append("Invalid ISAN: {}".format(identifier["value"]))
        elif identifier["id_type"] == "ismn":
            try:
                ismn.validate(identifier["value"])
            except:
                errors.append("Invalid ISMN: {}".format(identifier["value"]))
        elif identifier["id_type"] == "ean":
            try:
                ean.validate(identifier["value"])
            except:
                errors.append("Invalid EAN: {}".format(identifier["value"]))
    return errors
Ejemplo n.º 2
0
def validate_identifier(identifier):
    errors = []
    if identifier:
        if identifier["id_type"] == "issn":
            try:
                issn.validate(identifier["value"])
            except:
                errors.append("Invalid ISSN: {}".format(identifier["value"]))
        elif identifier["id_type"] == "isbn":
            try:
                isbn.validate(identifier["value"])
            except:
                errors.append("Invalid ISBN: {}".format(identifier["value"]))
        elif identifier["id_type"] == "isan":
            try:
                isan.validate(identifier["value"])
            except:
                errors.append("Invalid ISAN: {}".format(identifier["value"]))
        elif identifier["id_type"] == "ismn":
            try:
                ismn.validate(identifier["value"])
            except:
                errors.append("Invalid ISMN: {}".format(identifier["value"]))
        elif identifier["id_type"] == "ean":
            try:
                ean.validate(identifier["value"])
            except:
                errors.append("Invalid EAN: {}".format(identifier["value"]))
    return errors
Ejemplo n.º 3
0
 def _deserialize(self, value, attr, data, **kwargs):
     try:
         issn.validate(value)
         return issn.format(value)
     except (InvalidChecksum, InvalidLength, InvalidFormat, InvalidComponent) as e:
         raise ValidationError(str(e))
     except:
         raise ValidationError(f"Bad format {value}")
Ejemplo n.º 4
0
def validate_issn(issn):
    """ Raise an ValidationException if issn not valid
    """
    if issn:
        try:
            issn_checker.validate(issn)
        except InvalidChecksum:
            raise ValidationError('ISSN invalid checksum: %(issn)s',
                                  code='invalid',
                                  params={'issn': issn})
        except InvalidLength:
            raise ValidationError('ISSN invalid length: %(issn)s',
                                  code='invalid',
                                  params={'issn': issn})
        except InvalidFormat:
            raise ValidationError('ISSN invalid format: %(issn)s',
                                  code='invalid',
                                  params={'issn': issn})
Ejemplo n.º 5
0
def validate_perios(documents, csv_file_path):
    if documents:
        source = "Serials Solutions"
        rec_id_prefix = ""
        issn_duplicated = {}
        # restore of the previous state
        previously_dict = {}
        if os.path.isfile(csv_file_path):
            with open(csv_file_path, "rb") as csv_file:
                csvreader = csv.DictReader(csv_file, delimiter=',')
                for csvdict in csvreader:
                    previously_dict[csvdict["rec_id"]] = csvdict["rec_id"]
        with open(csv_file_path, "wb") as csv_file:
            fieldnames = ["rec_id", "rec_type", "title_non_sort", "title", "title_sub", "issn", "issn_status", "rel_eissn", "rel_response", 
                          "856_1_u", "856_1_status", "856_2_u", "856_2_status", "856_3_u", "856_3_status", "856_4_u", "856_4_status", "856_5_u", "856_5_status", 
                          "856_6_u", "856_6_status", "856_7_u", "856_7_status", "856_8_u", "856_8_status", "856_9_u", "856_9_status", "856_10_u", "856_10_status", 
                          "856_11_u", "856_11_status", "856_12_u", "856_12_status", "856_13_u", "856_13_status", "856_14_u", "856_14_status", "856_15_u", "856_15_status", 
                          "856_16_u", "856_16_status", "856_17_u", "856_17_status", "856_18_u", "856_18_status", "856_19_u", "856_19_status", "856_20_u", "856_20_status", 
                          "856_21_u", "856_21_status", "856_22_u", "856_22_status", "856_23_u", "856_23_status", "856_24_u", "856_24_status", "856_25_u", "856_25_status"]
            csvwriter = csv.DictWriter(csv_file, delimiter=',', fieldnames=fieldnames)
            csvwriter.writeheader()
            for index, document in enumerate(documents):
                rec_id = document["rec_id"]
                if rec_id in previously_dict:
                    logging.info("# Document with index: {} and rec_id: {} - Previously verified".format(index, rec_id))
                else:
                    logging.info("# Document index: {} and rec_id: {} - Starting verification".format(index, rec_id))
                    csvdict = {}
                    csvdict["rec_id"] = document["rec_id"]
                    #logging.debug(csvdict["rec_id"])
                    csvdict["rec_type"] = document["rec_type"]
                    if "title_non_sort" in document:
                        csvdict["title_non_sort"] = document["title_non_sort"]
                    if "title" in document:
                        csvdict["title"] = document["title"]
                    if "title_sub" in document:
                        csvdict["title_sub"] = document["title_sub"]
                    if "identifiers" in document:
                        for identifier in  document["identifiers"]:
                            if identifier["id_type"] == "issn":
                                csvdict["issn"] = identifier["value"]
                                try:
                                    issn.validate(identifier["value"])
                                    if identifier["value"] in issn_duplicated:
                                        csvdict["issn_status"] = "DUPLICATED"
                                    else:
                                        issn_duplicated[identifier["value"]] = ""
                                        csvdict["issn_status"] = "OK"
                                except:
                                    csvdict["issn_status"] = "INVALID"
                                break
                    if "issn" not in csvdict:
                        csvdict["issn_status"] = "EMPTY"

                    # 856 : list, status
                    if "resources" in document:
                        for i, resource in enumerate(document["resources"]):
                            if "url" in resource:
                                csvdict["856_" + str(i+1) + "_u"] = resource["url"]
                                # test URL
                                res_dict  = resource_service.fetch_url(resource["url"])[0]
                                if res_dict["error"]:
                                    csvdict["856_" + str(i+1) + "_status"] = "ERROR"
                                else:
                                    csvdict["856_" + str(i+1) + "_status"] = "OK"
                            else:
                                csvdict["856_" + str(i+1) + "_u"] = "EMPTY"
                                csvdict["856_" + str(i+1) + "_status"] = "EMPTY"

                    # revues en ligne / openurl
                    if csvdict["issn_status"] == "OK":
                        openurl_response = openurl_client.request_periodical_by_issn(csvdict["issn"])
                        if openurl_response is not None:
                            openurl_documents = openurl_crosswalk.openurl_xmletree_to_metajson_list(openurl_response, source, rec_id_prefix, True)
                            if openurl_documents:
                                openurl_document = openurl_documents[0]
                                if "identifiers" in openurl_document:
                                    for identifier in openurl_document["identifiers"]:
                                        if identifier["id_type"] == "eissn":
                                            csvdict["rel_eissn"] = identifier["value"]
                                            break
                                if "resources" in openurl_document:
                                    rel_response = []
                                    for resource in openurl_document["resources"]:
                                        if rel_response:
                                            rel_response.append("\n")
                                        if "institution_name" in resource:
                                            rel_response.append(resource["institution_name"])
                                        if "service_name" in resource:
                                            rel_response.append(" - ")
                                            rel_response.append(resource["service_name"])
                                        if "period_begin" in resource or "period_end" in resource:
                                            rel_response.append(" (")
                                            if "period_begin" in resource:
                                                rel_response.append(resource["period_begin"])
                                            else:
                                                rel_response.append("....")
                                            if "period_end" in resource:
                                                rel_response.append(" - ")
                                                rel_response.append(resource["period_end"])
                                            else:
                                                rel_response.append(" - ....")
                                            rel_response.append(")")
                                    if rel_response:
                                        csvdict["rel_response"] = "".join(rel_response)
                    csvwriter.writerow(csvdict)
Ejemplo n.º 6
0
 def transform(self, data):
     try:
         return issn.validate(data)
     except:
         pass
Ejemplo n.º 7
0
def validate_perios(documents, csv_file_path):
    if documents:
        source = "Serials Solutions"
        rec_id_prefix = ""
        issn_duplicated = {}
        # restore of the previous state
        previously_dict = {}
        if os.path.isfile(csv_file_path):
            with open(csv_file_path, "rb") as csv_file:
                csvreader = csv.DictReader(csv_file, delimiter=',')
                for csvdict in csvreader:
                    previously_dict[csvdict["rec_id"]] = csvdict["rec_id"]
        with open(csv_file_path, "wb") as csv_file:
            fieldnames = [
                "rec_id", "rec_type", "title_non_sort", "title", "title_sub",
                "issn", "issn_status", "rel_eissn", "rel_response", "856_1_u",
                "856_1_status", "856_2_u", "856_2_status", "856_3_u",
                "856_3_status", "856_4_u", "856_4_status", "856_5_u",
                "856_5_status", "856_6_u", "856_6_status", "856_7_u",
                "856_7_status", "856_8_u", "856_8_status", "856_9_u",
                "856_9_status", "856_10_u", "856_10_status", "856_11_u",
                "856_11_status", "856_12_u", "856_12_status", "856_13_u",
                "856_13_status", "856_14_u", "856_14_status", "856_15_u",
                "856_15_status", "856_16_u", "856_16_status", "856_17_u",
                "856_17_status", "856_18_u", "856_18_status", "856_19_u",
                "856_19_status", "856_20_u", "856_20_status", "856_21_u",
                "856_21_status", "856_22_u", "856_22_status", "856_23_u",
                "856_23_status", "856_24_u", "856_24_status", "856_25_u",
                "856_25_status"
            ]
            csvwriter = csv.DictWriter(csv_file,
                                       delimiter=',',
                                       fieldnames=fieldnames)
            csvwriter.writeheader()
            for index, document in enumerate(documents):
                rec_id = document["rec_id"]
                if rec_id in previously_dict:
                    logging.info(
                        "# Document with index: {} and rec_id: {} - Previously verified"
                        .format(index, rec_id))
                else:
                    logging.info(
                        "# Document index: {} and rec_id: {} - Starting verification"
                        .format(index, rec_id))
                    csvdict = {}
                    csvdict["rec_id"] = document["rec_id"]
                    #logging.debug(csvdict["rec_id"])
                    csvdict["rec_type"] = document["rec_type"]
                    if "title_non_sort" in document:
                        csvdict["title_non_sort"] = document["title_non_sort"]
                    if "title" in document:
                        csvdict["title"] = document["title"]
                    if "title_sub" in document:
                        csvdict["title_sub"] = document["title_sub"]
                    if "identifiers" in document:
                        for identifier in document["identifiers"]:
                            if identifier["id_type"] == "issn":
                                csvdict["issn"] = identifier["value"]
                                try:
                                    issn.validate(identifier["value"])
                                    if identifier["value"] in issn_duplicated:
                                        csvdict["issn_status"] = "DUPLICATED"
                                    else:
                                        issn_duplicated[
                                            identifier["value"]] = ""
                                        csvdict["issn_status"] = "OK"
                                except:
                                    csvdict["issn_status"] = "INVALID"
                                break
                    if "issn" not in csvdict:
                        csvdict["issn_status"] = "EMPTY"

                    # 856 : list, status
                    if "resources" in document:
                        for i, resource in enumerate(document["resources"]):
                            if "url" in resource:
                                csvdict["856_" + str(i + 1) +
                                        "_u"] = resource["url"]
                                # test URL
                                res_dict = resource_service.fetch_url(
                                    resource["url"])[0]
                                if res_dict["error"]:
                                    csvdict["856_" + str(i + 1) +
                                            "_status"] = "ERROR"
                                else:
                                    csvdict["856_" + str(i + 1) +
                                            "_status"] = "OK"
                            else:
                                csvdict["856_" + str(i + 1) + "_u"] = "EMPTY"
                                csvdict["856_" + str(i + 1) +
                                        "_status"] = "EMPTY"

                    # revues en ligne / openurl
                    if csvdict["issn_status"] == "OK":
                        openurl_response = openurl_client.request_periodical_by_issn(
                            csvdict["issn"])
                        if openurl_response is not None:
                            openurl_documents = openurl_crosswalk.openurl_xmletree_to_metajson_list(
                                openurl_response, source, rec_id_prefix, True)
                            if openurl_documents:
                                openurl_document = openurl_documents[0]
                                if "identifiers" in openurl_document:
                                    for identifier in openurl_document[
                                            "identifiers"]:
                                        if identifier["id_type"] == "eissn":
                                            csvdict["rel_eissn"] = identifier[
                                                "value"]
                                            break
                                if "resources" in openurl_document:
                                    rel_response = []
                                    for resource in openurl_document[
                                            "resources"]:
                                        if rel_response:
                                            rel_response.append("\n")
                                        if "institution_name" in resource:
                                            rel_response.append(
                                                resource["institution_name"])
                                        if "service_name" in resource:
                                            rel_response.append(" - ")
                                            rel_response.append(
                                                resource["service_name"])
                                        if "period_begin" in resource or "period_end" in resource:
                                            rel_response.append(" (")
                                            if "period_begin" in resource:
                                                rel_response.append(
                                                    resource["period_begin"])
                                            else:
                                                rel_response.append("....")
                                            if "period_end" in resource:
                                                rel_response.append(" - ")
                                                rel_response.append(
                                                    resource["period_end"])
                                            else:
                                                rel_response.append(" - ....")
                                            rel_response.append(")")
                                    if rel_response:
                                        csvdict["rel_response"] = "".join(
                                            rel_response)
                    csvwriter.writerow(csvdict)