def import_item(self, item, crawler='wiki', truncate_strings=True, verbosity=0): """Import a single record from a Scrapy Item dict >> WikiItem().import_item({'url': 'http://test.com', 'modified': '13 January 2014 00:15', 'crawler': 'more than thirty characters in this silly name'}) # doctest: +ELLIPSIS <WikiItem: WikiItem('more than thirty characters in', u'http://test.com', '', datetime.datetime(2014, 1, 13, 0, 15), '')> """ item = dict(item) self.crawler = str(crawler) for k, v in self._item_mapping.iteritems(): if verbosity > 2: print('%r: %r' % (k, v)) value = item.get(k, v['default']) if value is None: continue try: value = v['type'](value) except: pass field = self.__class__._meta.get_field_by_name(v['name'])[0] if isinstance(value, basestring): max_length = getattr(field, 'max_length', None) if max_length and len(value) > max_length: if truncate_strings: value = value[:max_length] else: raise RuntimeError('String loaded from json is length %s and destination field max_length is %s.' % (len(value), max_length)) if isinstance(field, (models.DateTimeField, models.DateField)): value = util.clean_wiki_datetime(value) setattr(self, v['name'], value) return self
def datetime_parser(s, default=None): if s: return util.clean_wiki_datetime(s) return default