def task_2(): """task-1 で保存したスレッドからタスク待ちオブジェクトを作成する""" task = datastore.get_next_task() if task is None: logging.info('task not found.') return request.url dat = i2ch.download_html(task.dat_url) if dat is None: logging.error('download failed. URL:' + task.dat_url) return request.url # スレッド情報を取得し、チェック済み番号を取得 thread_data = datastore.get_thread(task.id) checked_response = 0 if thread_data is not None: checked_response = thread_data.response_num unicodedat = unicode(dat, 'shift-jis', 'ignore') counter = Counter(unicodedat) users = i2ch.get_user_list_from_dat(unicodedat) entries = [] response_num = 0 for user in users: response_num = int(user['response']['number']) # まだ未チェックのレス番までスキップする if response_num < checked_response: continue entries.append({ 'number': response_num, 'author': user['response']['name'], 'mail': user['response']['mail'], 'body': user['response']['body'], 'authorid': user['response']['id'], 'thread': task.id, 'at': user['response']['datetime'].strftime('%Y/%m/%d %H:%M:%S.%f'), 'ids': user['ids'] }) thread_title = unicodedat.splitlines()[0].split('<>')[4] # 乱暴、データ形式が変わるとこける thread_title = thread_title.replace('&', '&'); datastore.add_entry_task(entries) datastore.update_thread(task.id, { 'url': task.url, 'response_num': response_num, 'title': thread_title }) return request.url
def import_1(): file_name = datastore.pop_imported_list() if file_name is None: return file = open('import/' + file_name + '.txt') dat = '\n'.join(file.readlines()) file.close() file = open('import/' + file_name + '.json') thread = '\n'.join(file.readlines()) file.close() thread = json.loads(thread) unicodedat = unicode(dat, 'shift-jis', 'ignore') counter = Counter(unicodedat) users = i2ch.get_user_list_from_dat(unicodedat) entries = [] for user in users: response_num = int(user['response']['number']) entries.append({ 'number': response_num, 'author': user['response']['name'], 'mail': user['response']['mail'], 'body': user['response']['body'], 'authorid': user['response']['id'], 'thread': thread['url'], 'at': user['response']['datetime'].strftime('%Y/%m/%d %H:%M:%S.%f'), 'ids': user['ids'] }) thread_title = unicodedat.splitlines()[0].split('<>')[4] # 乱暴、データ形式が変わるとこける thread_title = thread_title.replace('&', '&'); datastore.add_entry_task(entries) datastore.update_thread(thread['id'], { 'url': thread['url'], 'response_num': counter['\n'], 'title': thread['title'] }) return request.url