Beispiel #1
0
def _get_other_site(site):
    all_site = Site.get_all()
    for x in range(100):
        _site = random.choice(all_site)
        if _site.id != site.id:
            return _site
    raise ValueError
Beispiel #2
0
def image():
    """
    各サイト毎のトップページ
    """
    site_all = Site.get_all()
    return render_template('image/index.html',
                           site_all=site_all)
Beispiel #3
0
def _get_other_site(site):
    all_site = Site.get_all()
    for x in range(100):
        _site = random.choice(all_site)
        if _site.id != site.id:
            return _site
    raise ValueError
Beispiel #4
0
def _tests_production():
    """
    本番環境の試験
    """
    host = "www.niku.tokyo"
    url_base = "http://{}/{}/"
    site = random.choice(Site.get_all())
    test_url = url_base.format(host, site.title)
    parse_and_request(test_url)
Beispiel #5
0
def _tests_develop():
    """
    開発環境の試験
    """
    host = "127.0.0.1:5000"
    url_base = "http://{}/{}/"
    site = random.choice(Site.get_all())
    test_url = url_base.format(host, site.title)
    parse_and_request(test_url)
Beispiel #6
0
def index():
    """
    全てのサイトのトップページ
    """
    sites = Site.get_all()
    name = 'ぼすにく速報'
    return render_template('site_top/index.html',
                           sites=sites,
                           name=name)
Beispiel #7
0
    def run(self, force=None):
        print('start')
        # 全サイト取得と重複排除
        sites = {}
        for site in Site.get_all():
            sites[site.url] = site

        # リストに対してignoreとkeywordマッチを排除
        sure = []
        for key in sites:
            site = sites[key]
            response = requests.get(site.subjects_url)
            assert (response.status_code == 200), response.text

            # parse
            data = list(response.text.split('\n'))
            for line in data:
                try:
                    _ = Subject(site, line)
                    sure.append(_)
                except:
                    pass

        print(sure)

        # リスト出力
        t = Tokenizer()
        r = defaultdict(int)
        r2 = defaultdict(list)
        r3 = defaultdict(int)
        for _sure in sure:
            try:
                for token in t.tokenize(_sure.title):
                    if not token_filter(token):
                        r[token.surface] += 1
                        r2[token.surface] += [_sure]
                        r3[token] += 0
            except:
                pass

        # sort
        sure = sorted(sure, key=lambda x: x.title)

        for _sure in sure:
            try:
                point = 0
                for token in t.tokenize(_sure.title):
                    if not token_filter(token):
                        point += r[token.surface]
                if not filter_title(point, _sure):
                    print(_sure.title, _sure.count_res)

            except:
                pass
Beispiel #8
0
 def _run(self, force=None):
     for site in Site.get_all():
         print(site)
         try:
             SearchManager().search_and_scraping(site, force=force)
         except AttributeError as err:
             traceback.print_tb(err.__traceback__)
             for x in range(100):
                 print("未設定エラー:{}:{}の検出関数が未設定".format(site.id, site.title))
             break
         except Exception as err:
             traceback.print_tb(err.__traceback__)
Beispiel #9
0
 def _run(self, force=None):
     for site in Site.get_all():
         print(site)
         try:
             SearchManager().search_and_scraping(site, force=force)
         except AttributeError as err:
             traceback.print_tb(err.__traceback__)
             for x in range(100):
                 print('未設定エラー:{}:{}の検出関数が未設定'.format(site.id, site.title))
             break
         except Exception as err:
             traceback.print_tb(err.__traceback__)
Beispiel #10
0
    def decorated_function(*args, **kwargs):
        site_title = kwargs.pop('site_title')

        if site_title == "example":
            return "HelloWorld"

        if site_title in IGNORE_NAMES:
            # faviconやrobots.txtにアクセスされた場合
            app_log(logging.ERROR, "File does not exist :{}".format(site_title))
            return "File does not exist"
        try:
            site = Site.get_title(site_title)
        except IndexError:
            # サイトトップにリダイレクト
            app_log(logging.ERROR, "Site title does not exist :{}".format(site_title))
            return redirect(url_for('site_top.index'))
        return f(site, **kwargs)
Beispiel #11
0
def sitemap():
    """
    googleクローラー用のsitemap.xml
    """
    all_sites = Site.get_all()
    new_pages = Page.gets_new(10000)
    new_keywords = PageKeywordRelation.gets_new(10000)
    now = datetime.datetime.now(pytz.utc) - datetime.timedelta(seconds=3600)
    new_keyword_pages = [keyword for keyword in new_keywords if keyword.page and keyword.page.is_enable(now)]

    return render_template('sitemap/sitemap.html',
                           url_base='http://www.niku.tokyo/',
                           new_site_date=max([site.created_at for site in all_sites]),
                           all_sites=all_sites,
                           new_pages=new_pages,
                           new_keyword_pages=new_keyword_pages,
                           one_days_ago=datetime.datetime.now() - datetime.timedelta(days=1),
                           three_days_ago=datetime.datetime.now() - datetime.timedelta(days=3),
                           )
Beispiel #12
0
def tests_site_search():
    site = Site.get(1)
    SearchManager().search(site)
Beispiel #13
0
 def site(self):
     return Site.get(self.site_id)
Beispiel #14
0
 def _run(self):
     for site in Site.get_all():
         print("-----------------")
         print(site.id, site.title)
         print("-----------------")
         SearchManager().search(site)
Beispiel #15
0
def tests_scraping_storage():
    site = Site.get(1)
    s = SearchStorage(site)
    dat = 'test123'
    s.set_dat(dat)
    assert s.get_dat(dat) is not None
Beispiel #16
0
 def _run(self):
     for site in Site.get_all():
         print("-----------------")
         print(site.id, site.title)
         print("-----------------")
         SearchManager().search(site)
Beispiel #17
0
def tests_site():
    # get_test
    assert(Site.get(1))
Beispiel #18
0
 def site(self):
     return Site.get(self.site_id)