예제 #1
0
파일: models.py 프로젝트: MiddleFork/pug
    def import_item(self, item, crawler='wiki', truncate_strings=True, verbosity=0):
        """Import a single record from a Scrapy Item dict

        >> WikiItem().import_item({'url': 'http://test.com', 'modified': '13 January 2014 00:15', 'crawler': 'more than thirty characters in this silly name'})  # doctest: +ELLIPSIS
        <WikiItem: WikiItem('more than thirty characters in', u'http://test.com', '', datetime.datetime(2014, 1, 13, 0, 15), '')>
        """
    
        item = dict(item)
        self.crawler = str(crawler)
        for k, v in self._item_mapping.iteritems():
            if verbosity > 2:
                print('%r: %r' % (k, v))
            value = item.get(k, v['default'])
            if value is None:
                continue
            try: 
                value = v['type'](value)
            except:
                pass
            field = self.__class__._meta.get_field_by_name(v['name'])[0]
            if isinstance(value, basestring):
                max_length = getattr(field, 'max_length', None)
                if max_length and len(value) > max_length:
                    if truncate_strings:
                        value = value[:max_length]
                    else:
                        raise RuntimeError('String loaded from json is length %s and destination field max_length is %s.' % (len(value), max_length))
            if isinstance(field, (models.DateTimeField, models.DateField)):
                value = util.clean_wiki_datetime(value)
            setattr(self, v['name'], value)
        return self
예제 #2
0
    def import_item(self, item, crawler='wiki', truncate_strings=True, verbosity=0):
        """Import a single record from a Scrapy Item dict

        >> WikiItem().import_item({'url': 'http://test.com', 'modified': '13 January 2014 00:15', 'crawler': 'more than thirty characters in this silly name'})  # doctest: +ELLIPSIS
        <WikiItem: WikiItem('more than thirty characters in', u'http://test.com', '', datetime.datetime(2014, 1, 13, 0, 15), '')>
        """
    
        item = dict(item)
        self.crawler = str(crawler)
        for k, v in self._item_mapping.iteritems():
            if verbosity > 2:
                print('%r: %r' % (k, v))
            value = item.get(k, v['default'])
            if value is None:
                continue
            try: 
                value = v['type'](value)
            except:
                pass
            field = self.__class__._meta.get_field_by_name(v['name'])[0]
            if isinstance(value, basestring):
                max_length = getattr(field, 'max_length', None)
                if max_length and len(value) > max_length:
                    if truncate_strings:
                        value = value[:max_length]
                    else:
                        raise RuntimeError('String loaded from json is length %s and destination field max_length is %s.' % (len(value), max_length))
            if isinstance(field, (models.DateTimeField, models.DateField)):
                value = util.clean_wiki_datetime(value)
            setattr(self, v['name'], value)
        return self
예제 #3
0
파일: models.py 프로젝트: MiddleFork/pug
def datetime_parser(s, default=None):
    if s:
        return util.clean_wiki_datetime(s)
    return default
예제 #4
0
def datetime_parser(s, default=None):
    if s:
        return util.clean_wiki_datetime(s)
    return default