def _get_other_site(site): all_site = Site.get_all() for x in range(100): _site = random.choice(all_site) if _site.id != site.id: return _site raise ValueError
def image(): """ 各サイト毎のトップページ """ site_all = Site.get_all() return render_template('image/index.html', site_all=site_all)
def _tests_production(): """ 本番環境の試験 """ host = "www.niku.tokyo" url_base = "http://{}/{}/" site = random.choice(Site.get_all()) test_url = url_base.format(host, site.title) parse_and_request(test_url)
def _tests_develop(): """ 開発環境の試験 """ host = "127.0.0.1:5000" url_base = "http://{}/{}/" site = random.choice(Site.get_all()) test_url = url_base.format(host, site.title) parse_and_request(test_url)
def index(): """ 全てのサイトのトップページ """ sites = Site.get_all() name = 'ぼすにく速報' return render_template('site_top/index.html', sites=sites, name=name)
def run(self, force=None): print('start') # 全サイト取得と重複排除 sites = {} for site in Site.get_all(): sites[site.url] = site # リストに対してignoreとkeywordマッチを排除 sure = [] for key in sites: site = sites[key] response = requests.get(site.subjects_url) assert (response.status_code == 200), response.text # parse data = list(response.text.split('\n')) for line in data: try: _ = Subject(site, line) sure.append(_) except: pass print(sure) # リスト出力 t = Tokenizer() r = defaultdict(int) r2 = defaultdict(list) r3 = defaultdict(int) for _sure in sure: try: for token in t.tokenize(_sure.title): if not token_filter(token): r[token.surface] += 1 r2[token.surface] += [_sure] r3[token] += 0 except: pass # sort sure = sorted(sure, key=lambda x: x.title) for _sure in sure: try: point = 0 for token in t.tokenize(_sure.title): if not token_filter(token): point += r[token.surface] if not filter_title(point, _sure): print(_sure.title, _sure.count_res) except: pass
def _run(self, force=None): for site in Site.get_all(): print(site) try: SearchManager().search_and_scraping(site, force=force) except AttributeError as err: traceback.print_tb(err.__traceback__) for x in range(100): print("未設定エラー:{}:{}の検出関数が未設定".format(site.id, site.title)) break except Exception as err: traceback.print_tb(err.__traceback__)
def _run(self, force=None): for site in Site.get_all(): print(site) try: SearchManager().search_and_scraping(site, force=force) except AttributeError as err: traceback.print_tb(err.__traceback__) for x in range(100): print('未設定エラー:{}:{}の検出関数が未設定'.format(site.id, site.title)) break except Exception as err: traceback.print_tb(err.__traceback__)
def decorated_function(*args, **kwargs): site_title = kwargs.pop('site_title') if site_title == "example": return "HelloWorld" if site_title in IGNORE_NAMES: # faviconやrobots.txtにアクセスされた場合 app_log(logging.ERROR, "File does not exist :{}".format(site_title)) return "File does not exist" try: site = Site.get_title(site_title) except IndexError: # サイトトップにリダイレクト app_log(logging.ERROR, "Site title does not exist :{}".format(site_title)) return redirect(url_for('site_top.index')) return f(site, **kwargs)
def sitemap(): """ googleクローラー用のsitemap.xml """ all_sites = Site.get_all() new_pages = Page.gets_new(10000) new_keywords = PageKeywordRelation.gets_new(10000) now = datetime.datetime.now(pytz.utc) - datetime.timedelta(seconds=3600) new_keyword_pages = [keyword for keyword in new_keywords if keyword.page and keyword.page.is_enable(now)] return render_template('sitemap/sitemap.html', url_base='http://www.niku.tokyo/', new_site_date=max([site.created_at for site in all_sites]), all_sites=all_sites, new_pages=new_pages, new_keyword_pages=new_keyword_pages, one_days_ago=datetime.datetime.now() - datetime.timedelta(days=1), three_days_ago=datetime.datetime.now() - datetime.timedelta(days=3), )
def tests_site_search(): site = Site.get(1) SearchManager().search(site)
def site(self): return Site.get(self.site_id)
def _run(self): for site in Site.get_all(): print("-----------------") print(site.id, site.title) print("-----------------") SearchManager().search(site)
def tests_scraping_storage(): site = Site.get(1) s = SearchStorage(site) dat = 'test123' s.set_dat(dat) assert s.get_dat(dat) is not None
def tests_site(): # get_test assert(Site.get(1))