def format_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return # if a name has already been set # we don need to do it again name = data.get(key, u"") if name.endswith(u"-clone"): format_code = u"0" data[("format_code",)] = u"0" name = u"" else: format_code = _data_lookup(("format_code",), data) if not format_code: errors[("format_code",)].append(_("Missing value")) errors[key].append(_("Name could not be generated")) return # if name is missing or not name: parent_id = _data_lookup(("parent_id",), data) if not parent_id: errors[("parent_id",)].append(_("Missing value")) errors[key].append(_("Name could not be generated")) return format_id = u"{parent_id}_{format_code}".format( parent_id=parent_id.lower(), format_code=format_code.zfill(2).lower() ) _data_update(format_id, ("format_id",), data) data[key] = u"format-{format_id}".format(format_id=format_id) _data_update(data[key], ("title",), data)
def product_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return existing_name = _data_lookup(("name",), data) product_id_new = _data_lookup(("product_id_new",), data) if ( not product_id_new or product_id_new is missing or not existing_name or existing_name is missing or existing_name.endswith(u"-clone") ): create_product_id(("product_id_new",), data, errors, context) if errors[("product_id_new",)]: errors[key].append(_("Name could not be generated")) return product_id_new = _data_lookup(("product_id_new",), data) data_set_type = _data_lookup(("type",), data) if product_id_new: data[key] = u"{data_set_type}-{product_id_new}".format( data_set_type=data_set_type, product_id_new=product_id_new.lower() ) else: errors[("product_id_new",)].append(_("Missing value")) errors[key].append(_("Name could not be generated"))
def get_parent_content_types(product_id): """ Return content_type_codes for parent publication of product_id :param product_id: :type product_id: str :return: list :raises ValidationError """ if len(product_id) < 8: raise ValidationError((_('Invalid product ID: too short'),)) lc = ckanapi.LocalCKAN() results = lc.action.package_search( q='product_id_new:{parent_id}'.format(parent_id=product_id[:8]) ) if not results['count']: raise ValidationError((_('{parent_id}: Not found'.format( parent_id=product_id[:8] )),)) if results['count'] > 1: raise ValidationError((_('{parent_id}: Found more than one parent' .format( parent_id=product_id[:8] ) ),)) if results['results'][0]['content_type_codes']: return results['results'][0]['content_type_codes'] else: raise ValidationError((_('{parent_id}: no content_type_codes set' .format( parent_id=product_id[:8] ) ),))
def format_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return # if a name has already been set # we don need to do it again name = data.get(key, u'') if name.endswith(u'-clone'): format_code = u'0' data[('format_code',)] = u'0' name = u'' else: format_code = _data_lookup(('format_code',), data) if not format_code: errors[('format_code',)].append(_('Missing value')) errors[key].append(_('Name could not be generated')) return # if name is missing or not name: parent_id = _data_lookup(('parent_id',), data) if not parent_id: errors[('parent_id',)].append(_('Missing value')) errors[key].append(_('Name could not be generated')) return format_id = u'{parent_id}_{format_code}'.format( parent_id=parent_id.lower(), format_code=format_code.zfill(2).lower() ) _data_update(format_id, ('format_id',), data) data[key] = u'format-{format_id}'.format( format_id=format_id ) _data_update(data[key], ('title',), data)
def product_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return existing_name = _data_lookup(('name',), data) product_id_new = _data_lookup(('product_id_new',), data) if not product_id_new or product_id_new is missing or\ not existing_name or existing_name is missing or\ existing_name.endswith(u'-clone'): create_product_id(('product_id_new',), data, errors, context) if errors[('product_id_new',)]: errors[key].append(_('Name could not be generated')) return product_id_new = _data_lookup(('product_id_new',), data) data_set_type = _data_lookup(('type',), data) if product_id_new: data[key] = u'{data_set_type}-{product_id_new}'.format( data_set_type=data_set_type, product_id_new=product_id_new.lower() ) else: errors[('product_id_new',)].append(_('Missing value')) errors[key].append(_('Name could not be generated'))
def format_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return # if a name has already been set # we don need to do it again name = data.get(key, u'') if name.endswith(u'-clone'): format_code = u'0' data[('format_code', )] = u'0' name = u'' else: format_code = _data_lookup(('format_code', ), data) if not format_code: errors[('format_code', )].append(_('Missing value')) errors[key].append(_('Name could not be generated')) return # if name is missing or not name: parent_id = _data_lookup(('parent_id', ), data) if not parent_id: errors[('parent_id', )].append(_('Missing value')) errors[key].append(_('Name could not be generated')) return format_id = u'{parent_id}_{format_code}'.format( parent_id=parent_id.lower(), format_code=format_code.zfill(2).lower()) _data_update(format_id, ('format_id', ), data) data[key] = u'format-{format_id}'.format(format_id=format_id) _data_update(data[key], ('title', ), data)
def validator(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return codeset_type = field['codeset_type'] codeset_choices = h.codeset_choices(codeset_type) value = _data_lookup(key, data) if value is missing: value = [] elif isinstance(value, basestring): value = [value] elif isinstance(value, list): for element in value: if not isinstance(element, basestring): errors[key].append(_('expecting list of strings')) return else: errors[key].append(_('expecting list of strings')) return selected = set() # store in a set to eliminate duplicates for element in value: if element in codeset_choices: selected.add(element) else: errors[key].append(_('unexpected choice "%s"') % element) if not errors[key]: result = json.dumps(list(selected)) data[key] = result
def next_article_id(top_parent_id, issue_number): """ Get next available product ID :param top_parent_id: :type top_parent_id: 8 digit str :param issue_number :type issue_number: 7 digit str :return: 19 or 20 digit str """ if not isinstance(top_parent_id, basestring) or len(top_parent_id) != 8: raise ValidationError( (_('Invalid top parent ID. Expected 8 digit string'),) ) if not isinstance(issue_number, basestring) or len(issue_number) != 7: raise ValidationError( (_('Invalid issue number. Expected 7 digit string'),) ) i = 0 n = 1 article_sequence_number = 1 while i < n: lc = ckanapi.LocalCKAN() results = lc.action.package_search( q='type:article AND ' 'product_id_new:{top_parent_id}{issue_number}*'.format( top_parent_id=top_parent_id, issue_number=issue_number ), sort='product_id_new ASC', rows=1000, start=i*1000 ) if results['count'] == 0: return u'{top_parent_id}{issue_number}{sequence_number}'.format( top_parent_id=top_parent_id, issue_number=issue_number, sequence_number=unicode(article_sequence_number).zfill(4) ) n = results['count'] / 1000.0 i += 1 for result in results['results']: old_id = int(result['product_id_new'][15:]) article_sequence_number = max( article_sequence_number, old_id ) return ( u'{top_parent_id}' '{issue_number}' '{sequence_number}' ).format( top_parent_id=top_parent_id, issue_number=issue_number, sequence_number=unicode(article_sequence_number + 1).zfill(4) )
def next_article_id(top_parent_id, issue_number): """ Get next available product ID :param top_parent_id: :type top_parent_id: 8 digit str :param issue_number :type issue_number: 7 digit str :return: 19 or 20 digit str """ if not isinstance(top_parent_id, basestring) or len(top_parent_id) != 8: raise ValidationError( (_('Invalid top parent ID. Expected 8 digit string'), )) if not isinstance(issue_number, basestring) or len(issue_number) != 7: raise ValidationError( (_('Invalid issue number. Expected 7 digit string'), )) lc = ckanapi.LocalCKAN() # check whether issue dataset exists result = lc.action.package_search(q='{pid}{issue_number}'.format( pid=top_parent_id, issue_number=issue_number))['results'] if not result: raise ValidationError({ '{pid}{issue_number}'.format(pid=top_parent_id, issue_number=issue_number): 'Cannot create article, parent issue missing' }) i = 0 n = 1 article_sequence_number = 1 while i < n: results = lc.action.package_search( q=('type:article AND ' 'product_id_new:{top_parent_id}{issue_number}?*').format( top_parent_id=top_parent_id, issue_number=issue_number), sort='product_id_new ASC', rows=1000, start=i * 1000) if results['count'] == 0: return u'{top_parent_id}{issue_number}{sequence_number}'.format( top_parent_id=top_parent_id, issue_number=issue_number, sequence_number=unicode(article_sequence_number).zfill(5)) n = results['count'] / 1000.0 i += 1 for result in results['results']: old_id = int(result['product_id_new'][15:]) article_sequence_number = max(article_sequence_number, old_id) return (u'{top_parent_id}' '{issue_number}' '{sequence_number}').format( top_parent_id=top_parent_id, issue_number=issue_number, sequence_number=unicode(article_sequence_number + 1).zfill(5))
def shortcode_validate(key, data, errors, context): """ Accept shortcodes in the following forms and convert to a json list for storage: 1. a list of strings, eg. ["code-one", "code-two"] 2. a single string value with semicolon-separated values "code-one;code-two" """ # just in case there was an error before our validator, # bail out here because our errors won't be useful if errors[key]: return value = data[key] if value is missing or value == u'': return if isinstance(value, basestring): try: if isinstance(json.loads(value), list): return except ValueError: pass # value wasn't in json format, keep processing except TypeError: # FIXME should we return missing instead? or data[key] = json.dumps([]) return value = value.split(';') if not isinstance(value, list): errors[key].append(_('expecting list of strings')) return out = [] for element in value: if not isinstance(element, basestring): errors[key].append(_('invalid type for shortcode: %r') % element) continue if isinstance(element, str): try: element = element.decode('utf-8') except UnicodeDecodeError: errors[key].append(_('invalid encoding for "%s" value') % lang) continue out.append(element.strip()) # TODO: future: check values against valid choices for this field # using @scheming_validator decorator to get the form field name if not errors[key]: data[key] = json.dumps(out)
def apply_archive_rules(key, data, errors, context): if errors[key]: return release_date = _data_lookup(key, data) if release_date: archive_date = _data_lookup((u'archive_date',), data) content_type_codes = _data_lookup((u'content_type_codes',), data) product_type_code = _data_lookup((u'product_type_code',), data) product_id_new = _data_lookup((u'product_id_new',), data) if product_type_code == u'24': if not archive_date: _data_update( release_date+datetime.timedelta(days=2*365), (u'archive_date',), data ) elif product_type_code == u'20': if not content_type_codes: content_type_codes = h.get_parent_content_types( product_id_new ) if not content_type_codes: errors[(u'content_type_codes',)].append(_('Missing value')) errors[(u'archive_date',)].append(_('Unable to determine')) return # Analysis/Stats in brief if u'2016' in content_type_codes: if not archive_date: _data_update( release_date+datetime.timedelta(days=5*365), (u'archive_date',), data ) # Analysis/Articles and Reports elif u'2021' in content_type_codes: if not archive_date: _data_update( release_date+datetime.timedelta(days=5*365), (u'archive_date',), data ) # # Reference # elif content_type_code in [u'2002', u'2003', u'2023']: # set_archive_date() # # Reference/Classification elif u'2025' in content_type_codes and len(product_id_new) >= 15: try: h.set_previous_issue_archive_date( product_id_new, release_date+datetime.timedelta(days=5*365) ) except ValidationError as e: errors[(u'product_id_new',)].append( _(e.error_summary[u'Message'])) errors[(u'archive_date',)].append(_('Unable to determine'))
def next_non_data_product_id(subject_code, product_type_code): """ Get next available product ID :param subject_code: :type subject_code: 2 digit str :param product_type_code: :type product_type_code: 2 digit str :return: """ valid_product_codes = ['20', '21', '22', '23', '25', '26'] if not isinstance(subject_code, basestring) or \ not re.match('^\d\d$', subject_code): raise ValidationError((_('Invalid subject code.'), )) if isinstance(product_type_code, basestring): if product_type_code not in valid_product_codes: error_message = 'Invalid product type code. ' \ 'Expected one of {codes!r}'.format( codes=valid_product_codes, ) raise ValidationError((_(error_message), )) i = 0 n = 1 product_sequence_number = 1 while i < n: lc = ckanapi.LocalCKAN() results = lc.action.package_search( q='product_id_new:{subject_code}{product_type_code}????'.format( subject_code=subject_code, product_type_code=product_type_code), sort='product_id_new ASC', rows=1000, start=i * 1000) n = results['count'] / 1000.0 i += 1 for result in results['results']: if product_sequence_number < int(result['product_id_new'][5:8]): return ( u'{subject_code}{product_type_code}{sequence_number}' ).format( subject_code=subject_code, product_type_code=product_type_code, sequence_number=unicode(product_sequence_number).zfill(4)) else: product_sequence_number += 1 return u'{subject_code}{product_type_code}{sequence_number}'.format( subject_code=subject_code, product_type_code=product_type_code, sequence_number=unicode(product_sequence_number).zfill(4))
def daily_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return product_id_new = _data_lookup(("product_id_new",), data) if product_id_new: current_name = _data_lookup(("name",), data) new_name = u"daily-{0}".format(product_id_new.lower()) if (current_name.endswith(u"-clone") and not current_name.startswith(new_name)) or not current_name: data[key] = new_name else: errors[("product_id_new",)].append(_("Missing value")) errors[key].append(_("Name could not be generated"))
def geodescriptor_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return geodescriptor_code = _data_lookup(("geodescriptor_code",), data) if geodescriptor_code: current_name = _data_lookup(("name",), data) new_name = safe_name(u"geodescriptor-{0}".format(geodescriptor_code.lower())) if current_name.endswith(u"-clone") or not current_name: if not current_name.startswith(new_name): data[key] = new_name else: errors[("geodescriptor_code",)].append(_("Missing value")) errors[key].append(_("Name could not be generated"))
def repeating_text_delimited(key, data, errors, context): if errors[key]: return value = data[key] if not value: data[key] = json.dumps([]) return if isinstance(value, list): return elif isinstance(value, basestring): values = value.split(';') out = [] for val in values: val = val.strip() if val: out.append(val) data[key] = json.dumps(out) else: errors[key].append(_('expected list of basestring, got: {value}'.format( value=value )))
def correction_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return product_id_new = _data_lookup(("product_id_new",), data) correction_id = _data_lookup(("correction_id",), data) if not product_id_new: errors[key].append(_("could not find product_id_new")) elif not correction_id: errors[key].append(_("could not find correction_id")) else: data[key] = ( u"correction-{product_id}_{correction_id}".format(product_id=product_id_new, correction_id=correction_id) ).lower()
def repeating_text_delimited(key, data, errors, context): if errors[key]: return value = data[key] if not value: data[key] = json.dumps([]) return if isinstance(value, list): data[key] = json.dumps(value) return elif isinstance(value, basestring): values = value.split(';') out = [] for val in values: val = val.strip() if val: out.append(val) data[key] = json.dumps(out) else: errors[key].append( _('expected list of basestring, got: {value}'.format(value=value)))
def daily_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return product_id_new = _data_lookup(('product_id_new', ), data) if product_id_new: current_name = _data_lookup(('name', ), data) new_name = u'daily-{0}'.format(product_id_new.lower()) if (current_name.endswith(u'-clone') and not current_name.startswith(new_name)) or not current_name: data[key] = new_name else: errors[('product_id_new', )].append(_('Missing value')) errors[key].append(_('Name could not be generated'))
def set_previous_issue_archive_date(product_id, archive_date): """ Set the archive date of the previous issue of product_id :param product_id: :type product_id: str :param archive_date: :type archive_date: datetime.datetime :return: """ if len(product_id) < 15: raise ValidationError((_('{product_id}: expected product ID and issue ' 'number'.format(product_id=product_id)),)) lc = ckanapi.LocalCKAN() results = lc.action.package_search( q='product_id_new:{parent_id}???????'.format( parent_id=product_id[:8] ), sort='product_id_new desc' ) for result in results['results']: if result['product_id_new'] < product_id: if not result.get('archive_date'): result['archive_date'] = archive_date lc.action.package_update(**result) return
def geodescriptor_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return geodescriptor_code = _data_lookup(('geodescriptor_code', ), data) if geodescriptor_code: current_name = _data_lookup(('name', ), data) new_name = safe_name(u'geodescriptor-{0}'.format( geodescriptor_code.lower())) if current_name.endswith(u'-clone') or not current_name: if not current_name.startswith(new_name): data[key] = new_name else: errors[('geodescriptor_code', )].append(_('Missing value')) errors[key].append(_('Name could not be generated'))
def ndm_tag_name_validator(value, context): tag_name_match = re.compile('[\w \-.,:\'/()]*$', re.UNICODE) if not tag_name_match.match(value): raise df.Invalid(_( 'Tag "%s" must be alphanumeric characters or' ' symbols: - _ . , : \' / ( )' ) % value) return value
def product_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return product_id_new = _data_lookup(('product_id_new', ), data) if product_id_new is missing or not product_id_new: create_product_id(('product_id_new', ), data, errors, context) product_id_new = _data_lookup(('product_id_new', ), data) data_set_type = _data_lookup(('type', ), data) if product_id_new: data[key] = u'{data_set_type}-{product_id_new}'.format( data_set_type=data_set_type, product_id_new=product_id_new.lower()) else: errors[('product_id_new', )].append(_('Missing value')) errors[key].append(_('Name could not be generated'))
def format_create_id(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return # if a name has already been set # we don need to do it again if data.get(key) is not missing and len(data.get(key, "")): return parent_id = _data_lookup(("parent_id",), data) if not parent_id: errors[key].append(_("could not find parent_id of parent")) format_code = _data_lookup(("format_code",), data) if not format_code: errors[key].append(_("could not find format_code")) if errors[key]: return data[key] = u"{0}_{1}".format(parent_id.lower(), format_code.zfill(2).lower())
def format_create_id(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return # if a name has already been set # we don need to do it again if data.get(key) is not missing and len(data.get(key, '')): return parent_id = _data_lookup(('parent_id', ), data) if not parent_id: errors[key].append(_('could not find parent_id of parent')) format_code = _data_lookup(('format_code', ), data) if not format_code: errors[key].append(_('could not find format_code')) if errors[key]: return data[key] = u'{0}_{1}'.format(parent_id.lower(), format_code.zfill(2).lower())
def apply_archive_rules(key, data, errors, context): if errors[key]: return release_date = _data_lookup(key, data) if release_date: archive_date = _data_lookup((u"archive_date",), data) content_type_codes = _data_lookup((u"content_type_codes",), data) product_type_code = _data_lookup((u"product_type_code",), data) if product_type_code == u"24": if not archive_date: _data_update(release_date + datetime.timedelta(days=2 * 365), (u"archive_date",), data) elif product_type_code == u"20": if not content_type_codes: try: content_type_codes = h.get_parent_content_types(_data_lookup((u"product_id_new",), data)) except ValidationError: errors[(u"content_type_codes",)].append(_("Missing value")) errors[(u"archive_date",)].append(_("Unable to determine")) return # Analysis/Stats in brief if u"2016" in content_type_codes: if not archive_date: _data_update(release_date + datetime.timedelta(days=5 * 365), (u"archive_date",), data) # Analysis/Articles and Reports elif u"2021" in content_type_codes: if not archive_date: _data_update(release_date + datetime.timedelta(days=5 * 365), (u"archive_date",), data) # # Reference # elif content_type_code in [u'2002', u'2003', u'2023']: # set_archive_date() # # Reference/Classification elif u"2025" in content_type_codes: try: h.set_previous_issue_archive_date( _data_lookup(("product_id_new",), data), release_date + datetime.timedelta(days=5 * 365) ) except ValidationError as e: errors[(u"product_id_new",)].append(_(e)) errors[(u"archive_date",)].append(_("Unable to determine"))
def keyword_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return controlled_keyword_code = _data_lookup(('controlled_keyword_code',), data) if controlled_keyword_code: current_name = _data_lookup(('name',), data) new_name = u'keyword-{0}'.format(unicode(controlled_keyword_code)) if current_name.endswith(u'-clone') or not current_name: if not current_name.startswith(new_name): data[key] = new_name else: errors[key].append(_('could not find controlled_keyword_code'))
def survey_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return product_id_new = _data_lookup(('product_id_new',), data) if product_id_new: current_name = _data_lookup(('name',), data) new_name = u'survey-{0}'.format(product_id_new.lower()) if current_name.endswith(u'-clone') or not current_name: if not current_name.startswith(new_name): data[key] = new_name else: errors[key].append(_('could not find product_id_new'))
def province_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return sgc_code = _data_lookup(('sgc_code',), data) if sgc_code: current_name = _data_lookup(('name',), data) new_name = u'province-{0}'.format(sgc_code.lower()) if current_name.endswith(u'-clone') or not current_name: if not current_name.startswith(new_name): data[key] = new_name else: errors[key].append(_('could not find sgc_code'))
def subject_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return subject_code = _data_lookup(("subject_code",), data) if subject_code: current_name = _data_lookup(("name",), data) new_name = u"subject-{0}".format(subject_code.lower()) if current_name.endswith(u"-clone") or not current_name: if not current_name.startswith(new_name): data[key] = new_name else: errors[key].append(_("could not find subject_code"))
def survey_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return product_id_new = _data_lookup(('product_id_new', ), data) if product_id_new: current_name = _data_lookup(('name', ), data) new_name = u'survey-{0}'.format(product_id_new.lower()) if current_name.endswith(u'-clone') or not current_name: if not current_name.startswith(new_name): data[key] = new_name else: errors[key].append(_('could not find product_id_new'))
def province_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return sgc_code = _data_lookup(('sgc_code', ), data) if sgc_code: current_name = _data_lookup(('name', ), data) new_name = u'province-{0}'.format(sgc_code.lower()) if current_name.endswith(u'-clone') or not current_name: if not current_name.startswith(new_name): data[key] = new_name else: errors[key].append(_('could not find sgc_code'))
def keyword_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return controlled_keyword_code = _data_lookup(('controlled_keyword_code', ), data) if controlled_keyword_code: current_name = _data_lookup(('name', ), data) new_name = u'keyword-{0}'.format(unicode(controlled_keyword_code)) if current_name.endswith(u'-clone') or not current_name: if not current_name.startswith(new_name): data[key] = new_name else: errors[key].append(_('could not find controlled_keyword_code'))
def codeset_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return codeset_type = _data_lookup(('codeset_type',), data) codeset_value = _data_lookup(('codeset_value',), data).lower() if codeset_type and codeset_value: current_name = _data_lookup(('name',), data) new_name = u'{0}-{1}'.format(codeset_type, codeset_value.lower()) if current_name.endswith(u'-clone') or not current_name: if not current_name.startswith(new_name): data[key] = new_name else: errors[key].append(_('could not find codeset_type or codeset_value'))
def codeset_create_name(key, data, errors, context): # if there was an error before calling our validator # don't bother with our validation if errors[key]: return codeset_type = _data_lookup(('codeset_type', ), data) codeset_value = _data_lookup(('codeset_value', ), data).lower() if codeset_type and codeset_value: current_name = _data_lookup(('name', ), data) new_name = u'{0}-{1}'.format(codeset_type, codeset_value.lower()) if current_name.endswith(u'-clone') or not current_name: if not current_name.startswith(new_name): data[key] = new_name else: errors[key].append(_('could not find codeset_type or codeset_value'))
def get_by_code(self, code): """ :param code: code for specific geo code :type code: str :return: tuple of english and french descriptions """ q = ( 'zckownerorg_bi_strs:tmsgccode AND ' 'tmsgcspecificcode_bi_tmtxtm:{code}' ).format(code=code) data_dict = {'q': q, 'rows': '1'} response = get_action('package_search')(self.context, data_dict) if response['count'] == 0: raise ValidationError((_('Specific Geo code not found.'),)) # This is very messy but the tmsgccode entries might have multiple # codes as in the "all provinces" entry. If this is the case, iterate # through them and eliminate entries that start with "all." if response['count'] > 1: result = None for res in response['results']: for extra in res['extras']: if extra['key'] == 'tmsgccode_bi_tmtxtm': if not extra['value'].lower().startswith('all'): result = res break if result: break else: result = response['results'][0] if result: result_dict = {} for extra in result['extras']: result_dict[extra['key']] = extra['value'] en_text = result_dict.get('tmsgcname_en_tmtxtm', '') fr_text = result_dict.get('tmsgcname_fr_tmtxtm', '') else: en_text = fr_text = None return en_text, fr_text
def get_next_cube_id(context, data_dict): """ Returns the next available cube_id (without registering it). :param subjectCode: two-digit subjectCode (i.e. '24') :type subjectCode: str :return: next available cube_id :rtype: str :raises: ValidationError """ subject_code = _get_or_bust(data_dict, 'subjectCode') if not re.match('^\d\d$', subject_code): raise ValidationError(_('Invalid subject_code'),) lc = ckanapi.LocalCKAN(context=context) response = lc.action.package_search( q=( 'product_id_new:{subject_code}10* AND ' 'dataset_type:cube' ).format(subject_code=subject_code), sort='product_id_new desc', rows=1 ) if response['results']: result = response['results'][0] oldest_product_id = result['product_id_new'] if oldest_product_id.endswith('9999'): # FIXME: This system is going to encounter numerous # problems down the road. raise ValidationError( 'All Cube IDs for this subject have been registered.' 'Reusing IDs is in development.' ) return str(int(oldest_product_id) + 1) return '{subject_code}100001'.format(subject_code=subject_code)
def set_previous_issue_archive_date(product_id, archive_date): """ Set the archive date of the previous issue of product_id :param product_id: :type product_id: str :param archive_date: :type archive_date: datetime.datetime :return: """ if len(product_id) < 15: raise ValidationError((_('{product_id}: expected product ID and issue ' 'number'.format(product_id=product_id)), )) lc = ckanapi.LocalCKAN() results = lc.action.package_search( q='product_id_new:{parent_id}???????'.format(parent_id=product_id[:8]), sort='product_id_new desc') for result in results['results']: if result['product_id_new'] < product_id: if not result.get('archive_date'): result['archive_date'] = archive_date lc.action.package_update(**result) return
def get_next_cube_id(context, data_dict): """ Returns the next available cube_id (without registering it). :param subjectCode: two-digit subjectCode (i.e. '24') :type subjectCode: str :return: next available cube_id :rtype: str :raises: ValidationError """ subject_code = _get_or_bust(data_dict, 'subjectCode') if not re.match('^\d\d$', subject_code): raise ValidationError(_('Invalid subject_code'), ) lc = ckanapi.LocalCKAN(context=context) response = lc.action.package_search( q=('product_id_new:{subject_code}10* AND ' 'dataset_type:cube').format(subject_code=subject_code), sort='product_id_new desc', rows=1) if response['results']: result = response['results'][0] oldest_product_id = result['product_id_new'] if oldest_product_id.endswith('9999'): # FIXME: This system is going to encounter numerous # problems down the road. raise ValidationError( 'All Cube IDs for this subject have been registered.' 'Reusing IDs is in development.') return str(int(oldest_product_id) + 1) return '{subject_code}100001'.format(subject_code=subject_code)
def create_product_id(key, data, errors, context): general_non_data_types = (u"publication", u"video", u"conference", u"service", u"pumf", u"generic") general_data_types = (u"view", u"indicator", u"chart") # if there was an error before calling our validator # don't bother with our validation if errors[key] or errors[("subject_codes",)] or errors[("top_parent_id",)]: return product_id_new = _data_lookup(("product_id_new",), data) if product_id_new and is_legacy_id(product_id_new): return data_set_type = _data_lookup(("type",), data) # make sure subject_codes processed shortcode_validate(("subject_codes",), data, errors, context) subject_codes = shortcode_output(_data_lookup(("subject_codes",), data)) top_parent_id = _data_lookup(("top_parent_id",), data) if data_set_type in general_non_data_types: try: product_id_new = h.next_non_data_product_id( subject_code=subject_codes[0][:2], product_type_code=_data_lookup(("product_type_code",), data) ) data[key] = product_id_new return product_id_new except ValidationError as ve: errors[("subject_codes",)].append(_(ve.error_summary[u"Message"])) errors[key].append(_("PID could not be generated")) return except IndexError: errors[("subject_codes",)].append(_("Missing value")) errors[key].append(_("PID could not be generated")) return elif data_set_type == u"issue": if not top_parent_id: errors[("top_parent_id",)].append(_("Missing value")) errors[key].append(_("PID could not be generated")) return issue_number = _data_lookup("issue_number", data) if not issue_number: issue_number = h.next_issue_number(top_parent_id) _data_update(issue_number, ("issue_number",), data) product_id_new = u"{pid}{issue_number}".format(pid=top_parent_id, issue_number=issue_number) data[key] = product_id_new return product_id_new elif data_set_type == u"article": if not top_parent_id: errors[("top_parent_id",)].append(_("Missing value")) errors[key].append(_("PID could not be generated")) return issue_number = _data_lookup("issue_number", data) if not issue_number: issue_number = h.next_issue_number(top_parent_id) _data_update(issue_number, ("issue_number",), data) try: if is_legacy_id(top_parent_id): product_id_new = get_next_legacy_article_id( context=context, data_dict={ "parentProduct": u"{top_parent_id}{issue_number}".format( top_parent_id=top_parent_id, issue_number=issue_number ) }, ) else: product_id_new = h.next_article_id(top_parent_id=top_parent_id, issue_number=issue_number) data[key] = product_id_new return product_id_new except ValidationError as ve: errors[key].append(_(ve)) return elif data_set_type == u"cube": try: product_id_new = get_next_cube_id(context=context, data_dict={"subjectCode": subject_codes[0][:2]}) data[key] = product_id_new return product_id_new except ValidationError as ve: errors[("subject_codes",)].append(_(ve.error_dict["message"])) errors[key].append(_("PID could not be generated")) return except IndexError: errors[("subject_codes",)].append(_("Missing value")) errors[key].append(_("PID could not be generated")) return elif data_set_type in general_data_types: if not top_parent_id or top_parent_id is missing: errors[("top_parent_id",)].append(_("Missing value")) errors[key].append(_("PID could not be generated")) return try: product_id_new = get_next_product_id( context, {"parentProductId": top_parent_id, "productType": data.get((u"product_type_code",))} ) data[key] = product_id_new return product_id_new except ValidationError as ve: errors[("top_parent_id",)].append(ve.error_dict["message"]) errors[key].append(_("PID could not be generated")) return except NotFound as e: errors[("top_parent_id",)].append(e[0]) errors[key].append(_("PID could not be generated")) return else: errors[key].append( _("create_product_id not yet implemented for {data_set_type}".format(data_set_type=data_set_type)) ) return
def ndm_tag_name_validator(value, context): if re.match(ur'[^\w \-_.,:\'/()]+', value, re.UNICODE): raise df.Invalid( _('Tag "%s" must be alphanumeric characters or' ' symbols: - _ . , : \' / ( )') % value)
def next_non_data_product_id(subject_code, product_type_code): """ Get next available product ID :param subject_code: :type subject_code: 2 digit str :param product_type_code: :type product_type_code: 2 digit str :return: """ valid_product_codes = [ '20', '21', '22', '23', '25', '26' ] if not isinstance(subject_code, basestring) or \ not re.match('^\d\d$', subject_code): raise ValidationError((_('Invalid subject code.'),)) if isinstance(product_type_code, basestring): if product_type_code not in valid_product_codes: error_message = 'Invalid product type code. ' \ 'Expected one of {codes!r}'.format( codes=valid_product_codes, ) raise ValidationError((_(error_message),)) i = 0 n = 1 product_sequence_number = 1 while i < n: lc = ckanapi.LocalCKAN() results = lc.action.package_search( q='product_id_new:{subject_code}{product_type_code}????'.format( subject_code=subject_code, product_type_code=product_type_code ), sort='product_id_new ASC', rows=1000, start=i*1000 ) n = results['count'] / 1000.0 i += 1 for result in results['results']: if product_sequence_number < int(result['product_id_new'][5:8]): return ( u'{subject_code}' '{product_type_code}' '{sequence_number}' ).format( subject_code=subject_code, product_type_code=product_type_code, sequence_number=unicode(product_sequence_number).zfill(4) ) else: product_sequence_number += 1 return u'{subject_code}{product_type_code}{sequence_number}'.format( subject_code=subject_code, product_type_code=product_type_code, sequence_number=unicode(product_sequence_number).zfill(4) )
def create_product_id(key, data, errors, context): general_non_data_types = ( u'publication', u'video', u'conference', u'service', u'pumf', u'generic' ) general_data_types = ( u'view', u'indicator', u'chart' ) # if there was an error before calling our validator # don't bother with our validation if errors[key] or errors[('subject_codes',)] or errors[('top_parent_id',)]: return # product_id_new = _data_lookup(('product_id_new',), data) # if product_id_new: # return data_set_type = _data_lookup(('type',), data) # make sure subject_codes processed shortcode_validate(('subject_codes',), data, errors, context) subject_codes = shortcode_output(_data_lookup(('subject_codes',), data)) top_parent_id = _data_lookup(('top_parent_id',), data) if data_set_type in general_non_data_types: try: product_id_new = h.next_non_data_product_id( subject_code=subject_codes[0][:2], product_type_code=_data_lookup(('product_type_code',), data) ) data[key] = product_id_new return product_id_new except ValidationError as ve: errors[('subject_codes',)].append(_(ve.error_summary[u'Message'])) errors[key].append(_('PID could not be generated')) return except IndexError: errors[('subject_codes',)].append(_('Missing value')) errors[key].append(_('PID could not be generated')) return elif data_set_type == u'article': if not top_parent_id: errors[('top_parent_id',)].append(_('Missing value')) errors[key].append(_('PID could not be generated')) return issue_number = _data_lookup('issue_number', data) if not issue_number: errors[('issue_number',)].append(_('Missing value')) errors[key].append(_('PID could not be generated')) return try: if is_legacy_id(top_parent_id): product_id_new = get_next_legacy_article_id( context=context, data_dict={ 'parentProduct': u'{top_parent_id}{issue_number}' .format( top_parent_id=top_parent_id, issue_number=issue_number ) } ) else: product_id_new = h.next_article_id( top_parent_id=top_parent_id, issue_number=issue_number ) data[key] = product_id_new return product_id_new except ValidationError as ve: errors[key].append(_(ve)) return elif data_set_type == u'cube': try: product_id_new = get_next_cube_id( context=context, data_dict={'subjectCode': subject_codes[0][:2]} ) data[key] = product_id_new return product_id_new except ValidationError as ve: errors[('subject_codes',)].append(_(ve.error_dict['message'])) errors[key].append(_('PID could not be generated')) return except IndexError: errors[('subject_codes',)].append(_('Missing value')) errors[key].append(_('PID could not be generated')) return elif data_set_type in general_data_types: if not top_parent_id or top_parent_id is missing: errors[('top_parent_id',)].append(_('Missing value')) errors[key].append(_('PID could not be generated')) return try: product_id_new = get_next_product_id( context, { 'parentProductId': top_parent_id, 'productType': data.get((u'product_type_code',)) } ) data[key] = product_id_new return product_id_new except ValidationError as ve: errors[('top_parent_id',)].append(ve.error_dict['message']) errors[key].append(_('PID could not be generated')) return except NotFound as e: errors[('top_parent_id',)].append(e[0]) errors[key].append(_('PID could not be generated')) return else: errors[key].append(_( 'create_product_id not yet implemented for {data_set_type}'.format( data_set_type=data_set_type ) )) return
def create_product_id(key, data, errors, context): general_non_data_types = (u'publication', u'video', u'conference', u'service', u'pumf', u'generic') general_data_types = (u'view', u'indicator', u'chart') # if there was an error before calling our validator # don't bother with our validation if errors[key] or errors[('subject_codes', )] or errors[( 'top_parent_id', )]: return product_id_new = _data_lookup(('product_id_new', ), data) if product_id_new and is_legacy_id(product_id_new): return data_set_type = _data_lookup(('type', ), data) # make sure subject_codes processed shortcode_validate(('subject_codes', ), data, errors, context) subject_codes = shortcode_output(_data_lookup(('subject_codes', ), data)) top_parent_id = _data_lookup(('top_parent_id', ), data) if data_set_type in general_non_data_types: try: product_id_new = h.next_non_data_product_id( subject_code=subject_codes[0][:2], product_type_code=_data_lookup(('product_type_code', ), data)) data[key] = product_id_new return product_id_new except ValidationError as ve: errors[('subject_codes', )].append(_(ve.error_summary[u'Message'])) errors[key].append(_('PID could not be generated')) return except IndexError: errors[('subject_codes', )].append(_('Missing value')) errors[key].append(_('PID could not be generated')) return elif data_set_type == u'issue': if not top_parent_id: errors[('top_parent_id', )].append(_('Missing value')) errors[key].append(_('PID could not be generated')) return issue_number = _data_lookup('issue_number', data) if not issue_number: issue_number = h.next_issue_number(top_parent_id) _data_update(issue_number, ('issue_number', ), data) product_id_new = u'{pid}{issue_number}'.format( pid=top_parent_id, issue_number=issue_number) data[key] = product_id_new return product_id_new elif data_set_type == u'article': if not top_parent_id: errors[('top_parent_id', )].append(_('Missing value')) errors[key].append(_('PID could not be generated')) return issue_number = _data_lookup('issue_number', data) if not issue_number: issue_number = h.next_issue_number(top_parent_id) _data_update(issue_number, ('issue_number', ), data) try: if is_legacy_id(top_parent_id): product_id_new = get_next_legacy_article_id( context=context, data_dict={ 'parentProduct': u'{top_parent_id}{issue_number}'.format( top_parent_id=top_parent_id, issue_number=issue_number) }) else: product_id_new = h.next_article_id(top_parent_id=top_parent_id, issue_number=issue_number) data[key] = product_id_new return product_id_new except ValidationError as ve: errors[key].append(_(ve)) return elif data_set_type == u'cube': try: product_id_new = get_next_cube_id( context=context, data_dict={'subjectCode': subject_codes[0][:2]}) data[key] = product_id_new return product_id_new except ValidationError as ve: errors[('subject_codes', )].append(_(ve.error_dict['message'])) errors[key].append(_('PID could not be generated')) return except IndexError: errors[('subject_codes', )].append(_('Missing value')) errors[key].append(_('PID could not be generated')) return elif data_set_type in general_data_types: if not top_parent_id or top_parent_id is missing: errors[('top_parent_id', )].append(_('Missing value')) errors[key].append(_('PID could not be generated')) return try: product_id_new = get_next_product_id( context, { 'parentProductId': top_parent_id, 'productType': data.get((u'product_type_code', )) }) data[key] = product_id_new return product_id_new except ValidationError as ve: errors[('top_parent_id', )].append(ve.error_dict['message']) errors[key].append(_('PID could not be generated')) return except NotFound as e: errors[('top_parent_id', )].append(e[0]) errors[key].append(_('PID could not be generated')) return else: errors[key].append( _('create_product_id not yet implemented for {data_set_type}'. format(data_set_type=data_set_type))) return
def ndm_tag_name_validator(value, context): if re.match(ur'[^\w \-_.,:\'/()]+', value, re.UNICODE): raise df.Invalid(_( 'Tag "%s" must be alphanumeric characters or' ' symbols: - _ . , : \' / ( )' ) % value)