def setUp(self): db.query(self.model_class).delete() db.commit() for x in range(5): p = self.model_class( username=u'rodrigocesar.savian%s' % x, facebook_id='100003194166055%s' % x, name=u'Rodrigo Cesar Savian%s' % x, gender=u'male') db.add(p) db.commit() self.object_list = db.query(self.model_class).all() self.object = self.object_list[0]
def get_app(self): # first clear all db.query(self.model_class).delete() db.commit() for x in range(5): p = self.model_class( username=u'rodrigocesar.savian%s' % x, facebook_id='100003194166055%s' % x, name=u'Rodrigo Cesar Savian%s' % x, gender=u'male') db.add(p) db.commit() self.object_list = db.query(self.model_class).all() self.object = self.object_list[0] return app.make_app_test()
def process_item(self, item, spider): if item.get('song_name') is None: # 分页完 raise DropItem('ajax page over.') singer = db.query( Singer.pk).filter_by(face=item['singer_face']).first() if singer is None: singer = Singer(name=item['singer'], face=item['singer_face']) db.add(singer) album_name = item.get('album_name') if album_name is not None: cover = item.get('album_cover') album = db.query(Album.pk).filter_by(cover=cover).first() if album is None: album = Album( name=album_name, intro=item.get('album_intro'), rdt=item['release_date'], cover=cover) db.add(album) else: album = Empty() db.commit() lrc = item.get('lrc') song = db.query(Song).filter_by( name=item['song_name'], singer=singer.pk).first() if song is None: song = Song( name=item['song_name'], singer=singer.pk, album=album.pk, lrc=lrc) db.add(song) db.commit() elif None not in (lrc, song.lrc): song.lrc = lrc tag_objs = [] for tag in item['tags']: t = db.query(Tag.pk).filter_by(name=tag).first() if t is None: t = Tag(name=tag) db.add(t) tag_objs.append(t) db.commit() for tag in tag_objs: db.merge(SongTag(sid=song.pk, tid=tag.pk)) db.commit() return item
def create_cache(col1, col2): cache = Cache(start_date=col1, end_date=col2) cache.pages = [] pages = db.query(Collection).filter(Collection.date == col1).all() parameters = [ 'status_code', 'robots_txt', 'redirects', 'b_home_footer', 'description', 'b_footer_search_also', 'h2', 'h3', 'title', 'canonical', 'robots', 'b_descr_blocks_item', 'p_gsarticle_promo_aside', 'b_left', 'headers', 'b_descr_text', 'keywords', 'error', 'h1', 'load_time', 'b_similar', 'size' ] for page1 in pages: page2 = db.query(Collection).filter( Collection.date == col2, CollectionItem.url == page1.url).one_or_none() # Main data if page1 and page2: for key in page1.__dict__: if key.startswith('_'): continue if getattr(page1, key, None) != getattr(page2, key, None): if getattr(cache, key, None): cache.__dict__[key] += 1 else: setattr(cache, key, 1) # URLs data keys = [] for key in parameters: if page1 and page2: if getattr(page1, key, None) != getattr(page2, key, None): keys.append(key) if page1.status_code == 404: keys.append('404') elif page1.robots_txt == 'Disallowed': keys.append('rb_txt') elif page1.robots == 'noindex, nofollow': keys.append('rb_meta') elif page1.redirects != '301': keys.append('redirects') cache_page = CachePage(url=page1.url, status_code=page1.status_code) for k in keys: setattr(cache_page, k, True) cache.pages.append(cache_page) re_redirects = re.compile(r'^301', re.IGNORECASE) cache.errors_1 = \ db.query(Collection).filter(Collection.items == col1, CollectionItem.status_code == 404).count() cache.disallowed_1 = \ db.query(Collection).filter(Collection.items == col1, CollectionItem.robots_txt == 'Disallowed').count() cache.noindex_1 = \ db.query(Collection).filter(Collection.items == col1, CollectionItem.robots == 'noindex, nofollow').count() cache.redirects_1 = \ db.query(Collection).filter(Collection.items == col1, CollectionItem.redirects == re_redirects).count() cache.errors_2 = \ db.query(Collection).filter(Collection.items == col2, CollectionItem.status_code == 404).count() cache.disallowed_2 = \ db.query(Collection).filter(Collection.items == col2, CollectionItem.robots_txt == 'Disallowed').count() cache.noindex_2 = \ db.query(Collection).filter(Collection.items == col2, CollectionItem.robots == 'noindex, nofollow').count() cache.redirects_2 = \ db.query(Collection).filter(Collection.items == col2, CollectionItem.redirects == re_redirects).count() db.add(cache) db.commit() return cache
def create_cache(col1, col2): cache = Cache(start_date=col1, end_date=col2) cache.pages = [] pages = db.query(Collection).filter(Collection.date == col1).all() parameters = ['status_code', 'robots_txt', 'redirects', 'b_home_footer', 'description', 'b_footer_search_also', 'h2', 'h3', 'title', 'canonical', 'robots', 'b_descr_blocks_item', 'p_gsarticle_promo_aside', 'b_left', 'headers', 'b_descr_text', 'keywords', 'error', 'h1', 'load_time', 'b_similar', 'size'] for page1 in pages: page2 = db.query(Collection).filter(Collection.date == col2, CollectionItem.url == page1.url).one_or_none() # Main data if page1 and page2: for key in page1.__dict__: if key.startswith('_'): continue if getattr(page1, key, None) != getattr(page2, key, None): if getattr(cache, key, None): cache.__dict__[key] += 1 else: setattr(cache, key, 1) # URLs data keys = [] for key in parameters: if page1 and page2: if getattr(page1, key, None) != getattr(page2, key, None): keys.append(key) if page1.status_code == 404: keys.append('404') elif page1.robots_txt == 'Disallowed': keys.append('rb_txt') elif page1.robots == 'noindex, nofollow': keys.append('rb_meta') elif page1.redirects != '301': keys.append('redirects') cache_page = CachePage(url=page1.url, status_code=page1.status_code) for k in keys: setattr(cache_page, k, True) cache.pages.append(cache_page) re_redirects = re.compile(r'^301', re.IGNORECASE) cache.errors_1 = \ db.query(Collection).filter(Collection.items == col1, CollectionItem.status_code == 404).count() cache.disallowed_1 = \ db.query(Collection).filter(Collection.items == col1, CollectionItem.robots_txt == 'Disallowed').count() cache.noindex_1 = \ db.query(Collection).filter(Collection.items == col1, CollectionItem.robots == 'noindex, nofollow').count() cache.redirects_1 = \ db.query(Collection).filter(Collection.items == col1, CollectionItem.redirects == re_redirects).count() cache.errors_2 = \ db.query(Collection).filter(Collection.items == col2, CollectionItem.status_code == 404).count() cache.disallowed_2 = \ db.query(Collection).filter(Collection.items == col2, CollectionItem.robots_txt == 'Disallowed').count() cache.noindex_2 = \ db.query(Collection).filter(Collection.items == col2, CollectionItem.robots == 'noindex, nofollow').count() cache.redirects_2 = \ db.query(Collection).filter(Collection.items == col2, CollectionItem.redirects == re_redirects).count() db.add(cache) db.commit() return cache