Ejemplo n.º 1
0
def main():
    # 取得未處理清單
    # unluckyhouse.state=0
    ROW_LIMIT = 100
    LATEST_LIMIT = 11399
    BASE_LIMIT = 11300
    sql = 'SELECT id FROM unluckyhouse WHERE state=0 AND id>=? AND id<=? ORDER BY id DESC LIMIT ?'
    con = smart_dbapi.connect('unluckyhouse.sqlite')
    cur = con.execute(sql, (BASE_LIMIT, LATEST_LIMIT, ROW_LIMIT))

    todolist = []
    for row in cur:
        todolist.append(row['id'])
    print('分析範圍: {} ~ {}'.format(todolist[0], todolist[-1]))
    print('*' * 50)

    cur.close()

    # 使用 BeautifulSoup 4 分析文章
    # http://unluckyhouse.com/archive/index.php/t-%d.html
    url = 'http://unluckyhouse.com/archive/index.php/t-{}.html'

    for t in todolist:
        # 同步最新文章時,限制編號下限
        if t < BASE_LIMIT:
            break
        try:
            soup = smart_http.get(url.format(t))
            if soup != False:
                analyze(con, t, soup)
            else:
                con.execute('UPDATE unluckyhouse SET state=-1 WHERE id=?',
                            (t, ))
                print('主題 %d 已刪除' % t)
            del soup
        except Exception as e:
            print('分析文章 #{} 過程發生錯誤'.format(t))
            print('前往 URL 確認吧: {}'.format(url.format(t)))
            print('-' * 50)
            traceback.print_exc()
            print('-' * 50)
            break

    con.commit()
    con.close()
def main():
	# 取得未處理清單
	# unluckyhouse.state=0
	ROW_LIMIT    = 100
	LATEST_LIMIT = 11399
	BASE_LIMIT   = 11300
	sql = 'SELECT id FROM unluckyhouse WHERE state=0 AND id>=? AND id<=? ORDER BY id DESC LIMIT ?'
	con = smart_dbapi.connect('unluckyhouse.sqlite')
	cur = con.execute(sql, (BASE_LIMIT, LATEST_LIMIT, ROW_LIMIT))

	todolist = []
	for row in cur:
		todolist.append(row['id'])
	print('分析範圍: {} ~ {}'.format(todolist[0], todolist[-1]))
	print('*' * 50)

	cur.close()

	# 使用 BeautifulSoup 4 分析文章
	# http://unluckyhouse.com/archive/index.php/t-%d.html
	url = 'http://unluckyhouse.com/archive/index.php/t-{}.html'

	for t in todolist:
		# 同步最新文章時,限制編號下限
		if t < BASE_LIMIT:
			break
		try:
			soup = smart_http.get(url.format(t))
			if soup != False:
				analyze(con, t, soup)
			else:
				con.execute('UPDATE unluckyhouse SET state=-1 WHERE id=?', (t,))
				print('主題 %d 已刪除' % t)
			del soup
		except Exception as e:
			print('分析文章 #{} 過程發生錯誤'.format(t))
			print('前往 URL 確認吧: {}'.format(url.format(t)))
			print('-' * 50)
			traceback.print_exc()
			print('-' * 50)
			break

	con.commit()
	con.close()
Ejemplo n.º 3
0
def main():
    try:
        # 取得台灣凶宅網的最新討論串 id
        # http://unluckyhouse.com/external.php
        # rss > channel > item > link (t=...)
        # resp = smart_http.request('unluckyhouse.com', '/external.php')
        resp = smart_http.get('https://unluckyhouse.com/external.php')
        if resp != False:
            latest_url = resp.find('channel/item/link').text
            m = re.match(r".+t=(\d+).+", latest_url)
            latest_id = int(m.group(1))
        else:
            print('latest_id is -1')
            latest_id = -1

        # SQLite 同步台灣凶宅網的 id,資料採用預設值
        if latest_id is not -1:
            con = smart_dbapi.connect('unluckyhouse.sqlite')
            cur = con.execute('SELECT max(id) sync_id FROM unluckyhouse')
            row = cur.fetchone()

            if row['sync_id'] is not None:
                sync_id = row['sync_id']
            else:
                sync_id = 0

            if sync_id < latest_id:
                print('Add entries %d ~ %d' % (sync_id + 1, latest_id))
                diff = range(sync_id + 1, latest_id + 1)
                for i in diff:
                    sql = 'INSERT INTO unluckyhouse(id) VALUES (?)'
                    con.execute(sql, (i, ))
                con.commit()
            else:
                print('Already synchronized (%d)' % latest_id)

            con.close()
    except Exception as ex:
        print(ex)
Ejemplo n.º 4
0
commons_path = os.path.realpath('../../commons')
sys.path.insert(1, commons_path)

import corp_utils
import smart_dbapi
from print_progress import print_progress

ABORT_IF_ERROR = False
BEGIN = 4151
END = 4441

script_begin = datetime.now()

rank = 0
sql = 'SELECT id,corp,boss,gov FROM unluckylabor WHERE lat=0 AND id>=? AND id<=?'
conn = smart_dbapi.connect('unluckylabor.sqlite')
rows = conn.execute(sql, (BEGIN, END)).fetchall()

# 蒐集要修改項目
error_cnt = 0
modified = 0
visited = 0
total = len(rows)

for row in rows:
    info = corp_utils.get_corp_info(row['corp'], row['boss'], row['gov'])
    if info != False:
        # 連續定位失敗偵測
        if len(info['addr']) >= 8 and info['lat'] == 0:
            # TODO: 定位失敗時,記錄到 log 檔
            print('\n定位失敗: #%d %s %s' % (row['id'], row['corp'], info['addr']))
Ejemplo n.º 5
0
def get_conn():
	global _conn
	if _conn is None:
		dbfile = '%s/corp_cache.sqlite' % CODEPATH
		_conn = smart_dbapi.connect(dbfile)
	return _conn
import re
import sys
import geojson

commons_path = os.path.realpath('../../commons')
sys.path.insert(1, commons_path)

import smart_dbapi

DEBUG = False

sql = 'SELECT * FROM unluckylabor WHERE lat>20 ORDER BY id'
if DEBUG:
	sql = sql + ' LIMIT 10'

con = smart_dbapi.connect('unluckylabor.sqlite')
cur = con.execute(sql)

features = []
for row in cur:
	# 違反法律條文格式化
	law_list = row['law'].split(';')
	law_desc = ''
	for e in law_list:
		if law_desc != '':
			law_desc = law_desc + '\n'

		m = re.match('(\d+)\-(\d+)', e)
		if m is not None:
			law_desc = law_desc + '勞動基準法第%s條第%s項' % (m.group(1), m.group(2))
		else:
Ejemplo n.º 7
0
#!../../../bin/python
# coding: utf-8

import os
import sys
import geojson

commons_path = os.path.realpath('../../commons')
sys.path.insert(1, commons_path)

import smart_dbapi

sql = 'SELECT * FROM unluckyhouse WHERE state>1 ORDER BY id DESC'
con = smart_dbapi.connect('unluckyhouse.sqlite')
cur = con.execute(sql)

# 死法代碼對應文字
INITATIVE_TAGS = {"A": u"意外", "S": u"自殺", "M": u"他殺"}

features = []
for row in cur:
    point = geojson.Point((row['lng'], row['lat']))
    properties = {
        'id': row['id'],
        'news': row['news'],
        'datetime': row['datetime'],
        'address': row['area'] + row['address'],
        'approach':
        '%s %s' % (INITATIVE_TAGS[row['initative']], row['approach']),
        'marker-color': '#b00000',
        'marker-symbol': 'danger'
#!../../../bin/python
# coding: utf-8

import os
import sys
import geojson

commons_path = os.path.realpath('../../commons')
sys.path.insert(1, commons_path)

import smart_dbapi

sql = 'SELECT * FROM unluckyhouse WHERE state>1 ORDER BY id DESC'
con = smart_dbapi.connect('unluckyhouse.sqlite')
cur = con.execute(sql)

# 死法代碼對應文字
INITATIVE_TAGS = {"A": u"意外", "S": u"自殺", "M": u"他殺"}

features = []
for row in cur:
	point = geojson.Point((row['lng'], row['lat']))
	properties = {
		'id': row['id'],
		'news': row['news'],
		'datetime': row['datetime'],
		'address': row['area'] + row['address'],
		'approach': '%s %s' % (INITATIVE_TAGS[row['initative']], row['approach']),
		'marker-color': '#b00000',
		'marker-symbol': 'danger'
	}