def check_existing_doi(key, flattened_data, errors, context): """ Based on a provided identifier, checks datacite for an existing DOI :param package_id: string :param identifier: string :return DOI object if saved, false if it didn't exist in datacite """ datacite_api = DOIDataCiteAPI() identifier = flattened_data[key] identifier_type = flattened_data[('identifier_type',)] package_id = flattened_data[('id',)] existing_doi = Session.query(DOI).filter(DOI.identifier == identifier).first() if not existing_doi: # And check against the datacite service try: datacite_doi = datacite_api.get(identifier) if not datacite_doi.text: raise Invalid("DOI %s does not exist in Datacite" % identifier) except HTTPError: raise Invalid("DOI %s does not exist in Datacite" % identifier) pass else: if not existing_doi.package_id == package_id: log.error('This DOI already exists and belongs to %s' % existing_doi.package_id) raise Invalid('This DOI already exists and belongs to %s' % existing_doi.package_id)
def filesize_bytes(value): """Returns the converted size into bytes size types TERABYTES, GIGABYTES, MEGABYTES, KILOBYTES :rtype: int """ if re.search(r'^\d*\.?\d+[A-Za-z]*$', value) is not None: size_type = re.search(r'[A-Za-z]+', value) size_number = re.search(r'\d*\.?\d*', value) if size_type is None or size_number is None: raise Invalid( 'Must be a valid filesize format (e.g. 123, 1.2KB, 2.5MB)') else: size_type = size_type.group().upper() size_number = float(size_number.group()) if size_type == 'TB' or size_type == 'T' or size_type == 'TERABYTES' or size_type == 'TBS' or size_type == 'TIB': fileMultiplier = 1099511627776 elif size_type == 'GB' or size_type == 'G' or size_type == 'GIGABYTES' or size_type == 'GIG' or size_type == 'GBS' or size_type == 'GIB': fileMultiplier = 1073741824 elif size_type == 'MB' or size_type == 'M' or size_type == 'MEGABYTES' or size_type == 'MBS' or size_type == 'MIB': fileMultiplier = 1048576 elif size_type == 'KB' or size_type == 'K' or size_type == 'KILOBYTES' or size_type == 'KBS' or size_type == 'KIB': fileMultiplier = 1024 elif size_type == 'B' or size_type == 'BYTES' or size_type == 'BS': fileMultiplier = 1 else: raise Invalid( 'Must be a valid filesize format (e.g. 123, 1.2KB, 2.5MB)') return int(size_number * fileMultiplier) else: raise Invalid('Must be a valid filesize format')
def get_package_process_state_by_name(pkg_name): if not pkg_name: raise Invalid(_('No dataset name exists')) pkg = toolkit.get_action("package_show")(data_dict={"id": pkg_name}) if pkg: ps = pkg.get('process_state') if ps: return ps else: raise Invalid(_("No dataset \"{0}\" can be found".format(pkg_name)))
def update_package_inventory_entry(value, context): session = context['session'] if not value: raise Invalid(_('No inventory entry provided')) result = session.query(InventoryEntry).filter_by(id=value).first() if not result: raise Invalid(_('Not found') + ': %s' % value) return value
def date_planned_validator(key, data, errors, context): #print "date_planned_validator(key, data, errors, context)" ''' date_planned is required when status is 'Gepland' of 'In onderzoek' ''' from pylons import config gepland_uri = config.get('donl.business_rules.dataset_status.gepland') in_onderzoek_uri = config.get( 'donl.business_rules.dataset_status.in_onderzoek') import ckan.lib.navl.dictization_functions as df Invalid = df.Invalid dataset_status = None date_planned = None for data_key, data_value in data.iteritems(): if (data_key[0] == 'dataset_status'): dataset_status = data_value if (data_key[0] == 'date_planned'): date_planned = data_value if (dataset_status == gepland_uri or dataset_status == in_onderzoek_uri) and (date_planned is None or date_planned == ''): raise Invalid( 'Dit veld moet gevuld zijn indien status Gepland of In onderzoek is.' )
def dcatapit_id_unique(value, context): model = context['model'] session = context['session'] package = context.get('package', None) if package: package_id = package.id # existing dataset, exclude current one from search result = session.query(model.PackageExtra)\ .join(model.Package, and_(model.PackageExtra.package_id == model.Package.id, model.Package.type == 'dataset', model.Package.state == 'active'))\ .filter(model.PackageExtra.package_id != package_id, model.PackageExtra.key == 'identifier', model.PackageExtra.value == value)\ .first() else: # no package in context, so this is new dataset, no exclude here # just search among live datasets result = session.query(model.PackageExtra)\ .join(model.Package, and_(model.PackageExtra.package_id == model.Package.id, model.Package.type == 'dataset', model.Package.state == 'active'))\ .filter(model.PackageExtra.key == 'identifier', model.PackageExtra.value == value)\ .first() if result is not None: raise Invalid(_('Another package exists with the same identifier')) return value
def valid_theme(key, data, errors, context): value = data.get(key) if value in themes() or value == 'None' or value == '': return else: raise Invalid( _('Primary theme {theme} is not valid'.format(theme=value)))
def required_for_nolicense(key, flattened_data, errors, context): license_id = flattened_data.get(('license_id', ), None) value = flattened_data.get(key) if license_id == meta_data_request_license_id() and not value: raise Invalid(u'No License(Sample)選択時は必須項目です') return value
def deposited_dataset_owner_org_dest(value, context): user = context.get('user') userobj = model.User.get(user) include_ids = [] package = context.get('package') dataset = None if package: # 'Package' object has no attribute 'owner_org_dest' dataset = toolkit.get_action('package_show')(context, { 'id': package.id }) include_ids = [dataset['owner_org_dest']] # Pass validation if data container exists and NOT for depositing deposit = helpers.get_data_deposit() orgs = helpers.get_all_data_containers( exclude_ids=[deposit['id']], include_unknown=True, userobj=userobj, include_ids=include_ids, dataset=dataset, ) for org in orgs: if value == org['id']: return value raise Invalid('Invalid data container')
def file_type_validator(key, data, errors, context): index = key[1] value = data.get(key) attach = _is_attachment(index, data) if (attach and value == 'microdata') or (not attach and value != 'microdata'): raise Invalid('Invalid value for the "file_type" field')
def date_only_validator(value): try: bool(value) and _try_parse(value) except ValueError as e: raise Invalid( _('Wrong date format. Please use either "YYYY.mm.dd", "YYYY mm dd" or" YYYY-mm-dd".' )) return value
def process_dashboardid(dashboardid, context): dashboardid = dashboardid.strip() if not DASHBOARD_RE.match(dashboardid): raise Invalid('This field must contain a valid dashboard id.') return dashboardid
def validator(value): if value is missing or not value: return value choices = sh.scheming_field_choices(field) for choice in choices: if value == choice['value']: return value raise Invalid(_('unexpected choice "%s"') % value)
def correct_email_suffix(key, data, errors, context): value = data.get(key) suffix = config.get('ckan.valid_email_regexes') permitted_regexes = suffix.split() for regex in permitted_regexes: if re.search(regex, value): return raise Invalid(_('Email {email} does not end with a valid suffix').format(email=value))
def geographies(value, context): values = helpers.normalize_list(value) for id_ in values: try: toolkit.get_action('geography_show')(context, {'id': id_}) except toolkit.ObjectNotFound: raise Invalid('Invalid Geography: {}'.format(id_)) return values
def deposited_dataset_owner_org(value, context): # Pass validation if data container exists and for depositing deposit = helpers.get_data_deposit() if value == deposit['id']: return value raise Invalid('Invalid data deposit')
def visibility_validator_resource(key, data, error, context): ''' Validates visibility has a correct value. Visibility only has two values in the schema, 'restricted' and 'public'. ''' value = data.get(key) if value not in ('restricted', 'public'): raise Invalid('Invalid value for the visibility field')
def deposited_dataset_curator_id(value, context): # Get curation role and raise if not curator if value: curation_role = helpers.get_deposited_dataset_user_curation_role(value) if curation_role not in ['admin', 'curator']: raise Invalid('Ivalid Curator id') return value
def decimal_validator(value): ''' Checks that the provided value (if it is present) is a valid decimal ''' if value: try: Decimal(value) except DecimalException: raise Invalid('Invalid decimal: %s' % value) return value
def couple_validator(value, context): if not is_blank(value): couples = value.split(',') for c in couples: if not c: raise Invalid(_('Invalid couple, one value is missing')) return value
def deposited_dataset_owner_org_dest(value, context): # Pass validation if data container exists and NOT for depositing deposit = helpers.get_data_deposit() orgs = helpers.get_all_data_containers(exclude_ids=[deposit['id']], include_unknown=True) for org in orgs: if value == org['id']: return value raise Invalid('Invalid data container')
def _check_date_element(cls, typ, elemstr): '''Checks an element of the DateRange string. <typ> = 'second', 'minute', hour', 'day', 'month', or 'year'. ''' reg = cls.regex_elem[typ] matchres = re.match(cls._solregex(reg), elemstr) if matchres is None: raise Invalid("{}: not a valid {}".format(elemstr, typ)) else: return (matchres.groupdict()[typ])
def date_modified_validator(key, data, errors, context): #print "date_modified_validator(key, data, errors, context)" ''' date modified must be in dd-mm-yyyy format ''' import ckan.lib.navl.dictization_functions as df Invalid = df.Invalid date_modified = None for data_key, data_value in data.iteritems(): if (data_key[0] == key[0]): date_modified = data_value try: if date_modified != datetime.strptime(date_modified, "%d-%m-%Y").strftime('%d-%m-%Y'): raise Invalid('Niet in dd-mm-jjjj formaat') except ValueError: raise Invalid('Niet in dd-mm-jjjj formaat')
def parse_date(val): for format in DATE_FORMATS: try: return datetime.strptime(val, format).date() except ( ValueError, TypeError, ): pass raise Invalid(_("Invalid date input: {}").format(val))
def fao_datatype(value, context): DEFAULT_DATATYPE = config.get(CONFIG_FAO_DATATYPE) if not value and DEFAULT_DATATYPE: return DEFAULT_DATATYPE try: v = Vocabulary.get(Vocabulary.VOCABULARY_DATATYPE) if not v.valid_term(value): raise ValueError(_("Term not valid")) return value except Exception, err: raise Invalid(_("Invalid datatype value: {}: {}").format(value, err))
def parse_date(val, default=None): for format in DATE_FORMATS: try: return datetime.strptime(val, format).date() except ( ValueError, TypeError, ): pass if default is not None: return default raise Invalid(_(u'Invalid date input: {}').format(val))
def fluent_required(value): '''Checks that the value inputed is a json object with at least "en" among its keys''' if DEBUG: log.info('fluent_required: %s', value) value_json = {} try: value_json = json.loads(value) except: raise Invalid( "This multilingual field is mandatory. Please specify a value, at least in English." ) if "en" not in value_json or not value_json["en"]: raise Invalid( "This multilingual field is mandatory. Please specify a value, at least in English." ) return value
def linked_datasets(value, context): if context.get('job'): return value # Check if the user has access to the linked datasets selected = utils.normalize_list(value) allowed = _get_allowed_linked_datasets() for id in selected: if id not in allowed: raise Invalid('Invalid linked datasets') return value
def single_link_validator(value, context): ''' Checks that the provided value (if it is present) is a valid URL ''' if value: pieces = urlparse(value) if all([pieces.scheme, pieces.netloc]) and \ set(pieces.netloc) <= set(string.ascii_letters + string.digits + '-.') and \ pieces.scheme in ['http', 'https']: return value else: raise Invalid('Please provide a valid link: %s' % value) return
def deposited_dataset_curation_state(value, context): ALLOWED = ['draft', 'submitted', 'review'] # Set default value if not value: value = 'draft' # Raise if not allowed if value not in ALLOWED: raise Invalid('Invalid curation state') return value