예제 #1
0
    def setUp(self):
        db.query(self.model_class).delete()
        db.commit()

        for x in range(5):
            p = self.model_class(
                    username=u'rodrigocesar.savian%s' % x,
                    facebook_id='100003194166055%s' % x,
                    name=u'Rodrigo Cesar Savian%s' % x,
                    gender=u'male')
            db.add(p)
        db.commit()
        self.object_list = db.query(self.model_class).all()
        self.object = self.object_list[0]
예제 #2
0
    def get_app(self):
        # first clear all
        db.query(self.model_class).delete()
        db.commit()

        for x in range(5):
            p = self.model_class(
                    username=u'rodrigocesar.savian%s' % x,
                    facebook_id='100003194166055%s' % x,
                    name=u'Rodrigo Cesar Savian%s' % x,
                    gender=u'male')
            db.add(p)
        db.commit()
        self.object_list = db.query(self.model_class).all()
        self.object = self.object_list[0]
        return app.make_app_test()
예제 #3
0
    def process_item(self, item, spider):
        if item.get('song_name') is None:
            # 分页完
            raise DropItem('ajax page over.')
        singer = db.query(
            Singer.pk).filter_by(face=item['singer_face']).first()
        if singer is None:
            singer = Singer(name=item['singer'], face=item['singer_face'])
            db.add(singer)

        album_name = item.get('album_name')
        if album_name is not None:
            cover = item.get('album_cover')
            album = db.query(Album.pk).filter_by(cover=cover).first()
            if album is None:
                album = Album(
                    name=album_name,
                    intro=item.get('album_intro'),
                    rdt=item['release_date'],
                    cover=cover)
                db.add(album)
        else:
            album = Empty()

        db.commit()

        lrc = item.get('lrc')
        song = db.query(Song).filter_by(
            name=item['song_name'], singer=singer.pk).first()
        if song is None:
            song = Song(
                name=item['song_name'],
                singer=singer.pk,
                album=album.pk,
                lrc=lrc)
            db.add(song)
            db.commit()
        elif None not in (lrc, song.lrc):
            song.lrc = lrc

        tag_objs = []
        for tag in item['tags']:
            t = db.query(Tag.pk).filter_by(name=tag).first()
            if t is None:
                t = Tag(name=tag)
                db.add(t)
            tag_objs.append(t)
        db.commit()

        for tag in tag_objs:
            db.merge(SongTag(sid=song.pk, tid=tag.pk))
        db.commit()

        return item
예제 #4
0
파일: cache.py 프로젝트: sergei4e/Sitest
def create_cache(col1, col2):

    cache = Cache(start_date=col1, end_date=col2)
    cache.pages = []

    pages = db.query(Collection).filter(Collection.date == col1).all()

    parameters = [
        'status_code', 'robots_txt', 'redirects', 'b_home_footer',
        'description', 'b_footer_search_also', 'h2', 'h3', 'title',
        'canonical', 'robots', 'b_descr_blocks_item',
        'p_gsarticle_promo_aside', 'b_left', 'headers', 'b_descr_text',
        'keywords', 'error', 'h1', 'load_time', 'b_similar', 'size'
    ]

    for page1 in pages:
        page2 = db.query(Collection).filter(
            Collection.date == col2,
            CollectionItem.url == page1.url).one_or_none()

        # Main data
        if page1 and page2:

            for key in page1.__dict__:
                if key.startswith('_'):
                    continue
                if getattr(page1, key, None) != getattr(page2, key, None):
                    if getattr(cache, key, None):
                        cache.__dict__[key] += 1
                    else:
                        setattr(cache, key, 1)

        # URLs data
        keys = []
        for key in parameters:
            if page1 and page2:
                if getattr(page1, key, None) != getattr(page2, key, None):
                    keys.append(key)

        if page1.status_code == 404:
            keys.append('404')
        elif page1.robots_txt == 'Disallowed':
            keys.append('rb_txt')
        elif page1.robots == 'noindex, nofollow':
            keys.append('rb_meta')
        elif page1.redirects != '301':
            keys.append('redirects')

        cache_page = CachePage(url=page1.url, status_code=page1.status_code)
        for k in keys:
            setattr(cache_page, k, True)

        cache.pages.append(cache_page)

    re_redirects = re.compile(r'^301', re.IGNORECASE)

    cache.errors_1 = \
        db.query(Collection).filter(Collection.items == col1, CollectionItem.status_code == 404).count()
    cache.disallowed_1 = \
        db.query(Collection).filter(Collection.items == col1, CollectionItem.robots_txt == 'Disallowed').count()
    cache.noindex_1 = \
        db.query(Collection).filter(Collection.items == col1, CollectionItem.robots == 'noindex, nofollow').count()
    cache.redirects_1 = \
        db.query(Collection).filter(Collection.items == col1, CollectionItem.redirects == re_redirects).count()

    cache.errors_2 = \
        db.query(Collection).filter(Collection.items == col2, CollectionItem.status_code == 404).count()
    cache.disallowed_2 = \
        db.query(Collection).filter(Collection.items == col2, CollectionItem.robots_txt == 'Disallowed').count()
    cache.noindex_2 = \
        db.query(Collection).filter(Collection.items == col2, CollectionItem.robots == 'noindex, nofollow').count()
    cache.redirects_2 = \
        db.query(Collection).filter(Collection.items == col2, CollectionItem.redirects == re_redirects).count()

    db.add(cache)
    db.commit()

    return cache
예제 #5
0
파일: cache.py 프로젝트: sergei4e/Sitest
def create_cache(col1, col2):

    cache = Cache(start_date=col1, end_date=col2)
    cache.pages = []

    pages = db.query(Collection).filter(Collection.date == col1).all()

    parameters = ['status_code', 'robots_txt', 'redirects', 'b_home_footer', 'description',
                  'b_footer_search_also', 'h2', 'h3', 'title', 'canonical', 'robots', 'b_descr_blocks_item',
                  'p_gsarticle_promo_aside', 'b_left', 'headers', 'b_descr_text', 'keywords', 'error', 'h1',
                  'load_time', 'b_similar', 'size']

    for page1 in pages:
        page2 = db.query(Collection).filter(Collection.date == col2, CollectionItem.url == page1.url).one_or_none()

        # Main data
        if page1 and page2:

            for key in page1.__dict__:
                if key.startswith('_'):
                    continue
                if getattr(page1, key, None) != getattr(page2, key, None):
                    if getattr(cache, key, None):
                        cache.__dict__[key] += 1
                    else:
                        setattr(cache, key, 1)

        # URLs data
        keys = []
        for key in parameters:
            if page1 and page2:
                if getattr(page1, key, None) != getattr(page2, key, None):
                    keys.append(key)

        if page1.status_code == 404:
            keys.append('404')
        elif page1.robots_txt == 'Disallowed':
            keys.append('rb_txt')
        elif page1.robots == 'noindex, nofollow':
            keys.append('rb_meta')
        elif page1.redirects != '301':
            keys.append('redirects')

        cache_page = CachePage(url=page1.url, status_code=page1.status_code)
        for k in keys:
            setattr(cache_page, k, True)

        cache.pages.append(cache_page)

    re_redirects = re.compile(r'^301', re.IGNORECASE)

    cache.errors_1 = \
        db.query(Collection).filter(Collection.items == col1, CollectionItem.status_code == 404).count()
    cache.disallowed_1 = \
        db.query(Collection).filter(Collection.items == col1, CollectionItem.robots_txt == 'Disallowed').count()
    cache.noindex_1 = \
        db.query(Collection).filter(Collection.items == col1, CollectionItem.robots == 'noindex, nofollow').count()
    cache.redirects_1 = \
        db.query(Collection).filter(Collection.items == col1, CollectionItem.redirects == re_redirects).count()

    cache.errors_2 = \
        db.query(Collection).filter(Collection.items == col2, CollectionItem.status_code == 404).count()
    cache.disallowed_2 = \
        db.query(Collection).filter(Collection.items == col2, CollectionItem.robots_txt == 'Disallowed').count()
    cache.noindex_2 = \
        db.query(Collection).filter(Collection.items == col2, CollectionItem.robots == 'noindex, nofollow').count()
    cache.redirects_2 = \
        db.query(Collection).filter(Collection.items == col2, CollectionItem.redirects == re_redirects).count()

    db.add(cache)
    db.commit()

    return cache