def run(): try: # スレッドIDの発行 log = logger.ThreadLogging('-') thread_id = thread.ThreadId().CreateThread('compress_media.py', 1) log = logger.ThreadLogging(thread_id) # 処理予約を行う # サムネイル作成キューにプロセス番号を登録する。 # ステータスが「0:準備完了」かつ、プロセス番号が空。 db = databeses.DbConnection(log) db.execute( " UPDATE queue_compress_medias A" " SET A.thread_id = %(thread_id)s" " WHERE A.`status` = 0" " AND A.thread_id IS NULL " " LIMIT 5000", {'thread_id': thread_id}) db.commit() # 対象のファイル一覧を取得 results = db.execute( " SELECT A.service_user_id,A.user_id,A.tweet_id, A.url, B.`type`, B.file_name, B.directory_path" " FROM queue_compress_medias A" " INNER JOIN tweet_medias B" " ON A.service_user_id = B.service_user_id" " AND A.tweet_id = B.tweet_id" " AND A.url = B.url" " WHERE A.thread_id = %(thread_id)s" " AND A.`status` = 0", {'thread_id': thread_id}) # サムネイルの作成 for result in results: try: # 画像メディアの圧縮 # 動画メディアは圧縮しない if result['type'] == 'photo': file_path = result['directory_path'] + result[ 'file_name'] Image.open(file_path).convert('RGB').save(file_path, quality=95) file_size = os.path.getsize(file_path) db.execute( " UPDATE tweet_medias" " SET file_size = %(file_size)s" " WHERE service_user_id = %(service_user_id)s" " AND user_id = %(user_id)s" " AND tweet_id = %(tweet_id)s" " AND url = %(url)s", { 'file_size': file_size, 'service_user_id': result['service_user_id'], 'user_id': result['user_id'], 'tweet_id': result['tweet_id'], 'url': result['url'] }) # 圧縮が完了したらキューから削除する db.execute( " DELETE FROM queue_compress_medias" " WHERE service_user_id = %(service_user_id)s" " AND user_id = %(user_id)s" " AND tweet_id = %(tweet_id)s" " AND url = %(url)s", { 'service_user_id': result['service_user_id'], 'user_id': result['user_id'], 'tweet_id': result['tweet_id'], 'url': result['url'] }) db.commit() except Exception as e: # 例外が発生したレコードはステータスを更新する log.error(e) db.execute( " UPDATE queue_compress_medias A" " SET A.`status` = 9" " ,A.error_text = %(error_text)s" " WHERE A.service_user_id = %(service_user_id)s" " AND A.user_id = %(user_id)s" " AND A.tweet_id = %(tweet_id)s" " AND A.url = %(url)s", { 'error_text': str(e), 'service_user_id': result['service_user_id'], 'user_id': result['user_id'], 'tweet_id': result['tweet_id'], 'url': result['url'] }) db.commit() except exceptions.UncreatedThreadException: # スレッドの作成ができない時は処理終了 sys.exit() except Exception as e: log.error(e) sys.exit() finally: if 'thread_id' in locals(): log.info('プロセスを終了します。') thread.ThreadId().ExitThread('compress_media.py', thread_id)
def run(): thread_id = '-' log = logger.ThreadLogging(thread_id) try: # スレッドIDの発行 thread_id = thread.ThreadId().CreateThread( 'delete_checked_tweets.py', 1) log = logger.ThreadLogging(thread_id) db = databeses.DbConnection(log) # 削除対象のツイートIDを取得する log.info('削除対象のツイートIDを取得しています。') delete_tweets = db.execute( " SELECT service_user_id, user_id, tweet_id" " FROM tweets" " WHERE kept = 0" " AND shown = 1" " LIMIT 5000", {}) for delete_tweet in delete_tweets: # メディアの削除 log.info('ツイートを削除します。 tweet_id:' + delete_tweet['tweet_id']) tweet_medias = db.execute( " SELECT file_name, directory_path, thumb_file_name, thumb_directory_path" " FROM tweet_medias tm" " WHERE tm.service_user_id = %(service_user_id)s" " AND tm.user_id = %(user_id)s" " AND tm.tweet_id = %(tweet_id)s", { 'service_user_id': delete_tweet['service_user_id'], 'user_id': delete_tweet['user_id'], 'tweet_id': delete_tweet['tweet_id'], }) for tweet_media in tweet_medias: if tweet_media['file_name']: media_file_path = tweet_media[ 'directory_path'] + tweet_media['file_name'] log.info('メディアのファイルを削除します。:' + media_file_path) if os.path.isfile(media_file_path): os.remove(media_file_path) log.info('削除しました。:' + media_file_path) if tweet_media['thumb_file_name']: thumb_file_path = tweet_media[ 'thumb_directory_path'] + tweet_media[ 'thumb_file_name'] log.info('サムネイルファイルを削除します。:' + thumb_file_path) if os.path.isfile(thumb_file_path): os.remove(thumb_file_path) log.info('削除しました。:' + thumb_file_path) # tweet_mediasの削除 db.execute( " DELETE FROM tweet_medias" " WHERE service_user_id = %(service_user_id)s" " AND user_id = %(user_id)s" " AND tweet_id = %(tweet_id)s", { 'service_user_id': delete_tweet['service_user_id'], 'user_id': delete_tweet['user_id'], 'tweet_id': delete_tweet['tweet_id'], }) # tweetsの削除 db.execute( " DELETE FROM tweets" " WHERE service_user_id = %(service_user_id)s" " AND user_id = %(user_id)s" " AND tweet_id = %(tweet_id)s", { 'service_user_id': delete_tweet['service_user_id'], 'user_id': delete_tweet['user_id'], 'tweet_id': delete_tweet['tweet_id'], }) db.commit() except exceptions.UncreatedThreadException: # スレッドの作成ができない時は処理終了 sys.exit() except Exception as e: log.error(e) sys.exit() finally: if 'thread_id' in locals(): log.info('プロセスを終了します。') thread.ThreadId().ExitThread('delete_checked_tweets.py', thread_id)
def run(): try: # スレッドIDの発行 thread_id = thread.ThreadId().CreateThread('set_thumbfile_size.py', 1) log = logger.ThreadLogging(thread_id) db = databeses.DbConnection(log) filesize_empty_meadias = db.fetch( " SELECT tm.service_user_id, tm.user_id, tm.tweet_id, tm.url, tm.thumb_directory_path, tm.thumb_file_name " " FROM tweet_medias tm " " INNER JOIN tweets t " " ON tm.service_user_id = t.service_user_id " " AND tm.user_id = t.user_id " " AND tm.tweet_id = t.tweet_id " " WHERE t.media_ready = 1 " " AND tm.thumb_file_size = 0 " " LIMIT 50000 ", {}) for filesize_empty_meadia in filesize_empty_meadias: try: file_path = filesize_empty_meadia[ 'thumb_directory_path'] + filesize_empty_meadia[ 'thumb_file_name'] file_size = -1 if os.path.exists(file_path): file_size = os.path.getsize(file_path) db.execute( " UPDATE tweet_medias " " SET thumb_file_size = %(file_size)s " " WHERE service_user_id = %(service_user_id)s " " AND user_id = %(user_id)s " " AND tweet_id = %(tweet_id)s " " AND url = %(url)s ", { 'file_size': file_size, 'service_user_id': filesize_empty_meadia['service_user_id'], 'user_id': filesize_empty_meadia['user_id'], 'tweet_id': filesize_empty_meadia['tweet_id'], 'url': filesize_empty_meadia['url'] }) except Exception as e: log.error(e) if len(filesize_empty_meadias) > 0: db.commit() except exceptions.UncreatedThreadException: # スレッドの作成ができない時は処理終了 sys.exit() except Exception as e: log.error(e) sys.exit() finally: if 'thread_id' in locals(): log.info('プロセスを終了します。') thread.ThreadId().ExitThread('set_thumbfile_size.py', thread_id)
def run(): try: # スレッドIDの発行 thread_id = thread.ThreadId().CreateThread( 'existing_media_thumbs.py', 1) log = logger.ThreadLogging(thread_id) db = databeses.DbConnection(log) existing_media_thumbs = db.fetch( " SELECT CONCAT(tm.thumb_directory_path,tm.thumb_file_name) AS file_path " " ,tm.service_user_id" " ,tm.user_id" " ,tm.tweet_id" " ,tm.url" " ,tm.type" " ,tm.sizes" " ,tm.bitrate" " ,tm.file_name" " ,tm.directory_path" " ,tm.thumb_file_name" " ,tm.thumb_directory_path" " ,tm.download_error" " ,tm.create_datetime" " ,tm.update_datetime" " ,tm.deleted" " FROM existing_media_thumbs tm" " WHERE thumb_directory_path IS NOT NULL" " LIMIT 100000", {}) for existing_media_thumb in existing_media_thumbs: # ファイルのチェック is_lost = False log.info(existing_media_thumb['file_path']) if not existing_media_thumb['file_path'] == None: if not os.path.isfile(existing_media_thumb['file_path']): is_lost = True if is_lost: log.info("・・・存在しませんでした。") db.execute( " INSERT INTO queue_create_thumbs (" " service_user_id" " ,user_id" " ,tweet_id" " ,url" " ) VALUES (" " %(service_user_id)s" " ,%(user_id)s" " ,%(tweet_id)s" " ,%(url)s" " )" " ON DUPLICATE KEY UPDATE" " status = 0" " ,thread_id = NULL" " ,error_text = NULL", { 'service_user_id': existing_media_thumb['service_user_id'], 'user_id': existing_media_thumb['user_id'], 'tweet_id': existing_media_thumb['tweet_id'], 'url': existing_media_thumb['url'] }) db.execute( " DELETE FROM existing_media_thumbs" " WHERE service_user_id = %(service_user_id)s" " AND user_id = %(user_id)s" " AND tweet_id = %(tweet_id)s" " AND url = %(url)s" " ;", { 'service_user_id': existing_media_thumb['service_user_id'], 'user_id': existing_media_thumb['user_id'], 'tweet_id': existing_media_thumb['tweet_id'], 'url': existing_media_thumb['url'], }) db.commit() except exceptions.UncreatedThreadException: # スレッドの作成ができない時は処理終了 sys.exit() except Exception as e: log.error(e) sys.exit() finally: if 'thread_id' in locals(): log.info('プロセスを終了します。') thread.ThreadId().ExitThread('existing_media_thumbs.py', thread_id)
def run(): try: log = logger.ThreadLogging('-') thread_id = thread.ThreadId().CreateThread('repair_users.py', 1) log = logger.ThreadLogging(thread_id) twitter = OAuth1Session(config.CONSUMER_KEY, config.CONSUMER_SECRET, config.ACCESS_TOKEN, config.ACCESS_TOKEN_SECRET) db = databeses.DbConnection(log) repair_user_ids = db.execute( " SELECT user_id" " FROM relational_users" " WHERE name = ' '" " AND icecream = 0" " LIMIT 500", {}) for repair_user_id in repair_user_ids: log.info(f"ユーザ情報を復旧します [user_id={repair_user_id['user_id']}]") res = twitter.get( "https://api.twitter.com/1.1/users/show.json", params={"user_id": repair_user_id['user_id']}) if res.status_code != 200: log.warn( f"APIのリクエストが異常値を返しました [res.status_code={res.status_code}]" ) continue parsed_res = json.loads(res.text) if 'id' not in parsed_res.keys(): log.warn(f"Twitterに存在しないか削除されたユーザです") continue db.execute( " DELETE FROM relational_users" " WHERE user_id = %(user_id)s", { 'user_id': parsed_res['id_str'], }) db.execute( " INSERT INTO relational_users (" " user_id" " ,disp_name" " ,name" " ,description" " ,theme_color" " ,follow_count" " ,follower_count" " ,create_datetime" " ,update_datetime" " ,deleted" " ) VALUES (" " %(user_id)s" " ,%(disp_name)s" " ,%(name)s" " ,NULL" " ,NULL" " ,0" " ,0" " ,NOW()" " ,'1990-01-01'" " ,0" " )", { 'user_id': parsed_res['id_str'], 'disp_name': parsed_res['screen_name'], 'name': parsed_res['name'], }) db.commit() except exceptions.UncreatedThreadException: sys.exit() except Exception as e: log.error(e) sys.exit() finally: if 'thread_id' in locals(): log.info('プロセスを終了します。') thread.ThreadId().ExitThread('repair_users.py', thread_id)
def run(): try: # スレッドIDの発行 log = logger.ThreadLogging('-') thread_id = thread.ThreadId().CreateThread('create_thumbnail.py', 1) # サムネイルの作成 log = logger.ThreadLogging(thread_id) log.info("サムネイル作成情報を作成しています...") # 処理予約をする。 # サムネイル作成キューにプロセス番号を登録する。 # ステータスが「0:準備完了」かつ、プロセス番号が空。 # 登録数のMAXはコマンドライン引数で受け取る。 db = databeses.DbConnection(log) db.execute( " UPDATE queue_create_thumbs A" " SET A.thread_id = %(thread_id)s" " WHERE A.`status` = 0" " AND A.thread_id IS NULL " " LIMIT 5000", { 'thread_id': thread_id, }) db.commit() # 予約したレコードを取得する。 # サムネイル作成キューから、自プロセス番号のレコードを取得する。 results = db.execute( " SELECT A.service_user_id" " ,A.user_id" " ,A.tweet_id" " ,A.url" " ,B.`type`" " ,B.file_name" " ,B.directory_path" " ,D.disp_name" " FROM queue_create_thumbs A" " INNER JOIN tweet_medias B" " ON A.service_user_id = B.service_user_id" " AND A.tweet_id = B.tweet_id" " AND A.url = B.url" " INNER JOIN tweets C" " ON B.service_user_id = C.service_user_id" " AND B.tweet_id = C.tweet_id" " INNER JOIN relational_users D" " ON C.user_id = D.user_id" " WHERE A.thread_id = %(thread_id)s" " AND A.`status` = 0", { 'thread_id': thread_id, }) # サムネイルを作成する for result in results: try: # サムネイルファイル名を発行する log.info(" -> サムネイル名を発行しています...") origin_text = result['url'] storage_path = config.STRAGE_MEDIAS_PATH + result[ 'service_user_id'] + '_' + result['disp_name'] + '/' thumb_name = hashlib.md5( origin_text.encode()).hexdigest() + ".jpg" # 画像メディアの読み込み if result['type'] in ('photo', 'animated_gif'): log.info("画像サムネイルを作成しています[" + result['directory_path'] + result['file_name'] + "]...") log.info(" -> 画像を読み込みます...") original = Image.open(result['directory_path'] + result['file_name']).convert( 'RGB') # 動画メディアの読み込み elif result['type'] in 'video': log.info("動画サムネイルを作成しています[" + result['directory_path'] + result['file_name'] + "]...") log.info(" -> 動画を読み込みます...") video = cv2.VideoCapture(result['directory_path'] + result['file_name']) if not video.isOpened(): continue # 動画の30フレーム目を画像として保存する log.info(" -> フレームを切り出して保存しています...") video.set(cv2.CAP_PROP_POS_FRAMES, 30) ret, frame = video.read() cv2.imwrite(storage_path + thumb_name, frame) # 保存した画像を読み込む log.info(" -> 保存したフレームを読み込みます...") original = Image.open(storage_path + thumb_name).convert('RGB') # 長辺は縦・横のどちらか? # -> 縦の場合は、横360pxになるように縮小する # -> 横の場合は、縦260pxになるように縮小する # -> 同じ場合は、縦260pxになるように縮小する log.info(" -> 画像を縮小しています...") width, height = original.size scale = 0.0 if width > height: # 横が長辺 scale = 260.0 / height else: # 縦が長辺 scale = 360.0 / width # 画像の縮小 original.thumbnail( (int(width * scale), int(height * scale)), Image.ANTIALIAS) # サムネイルのトリミングを行う # -> サイズは360×260 log.info(" -> 画像をトリミングしています...") thumb = original.crop((0, 0, 360, 260)) # ディレクトリパス(無ければ作る) if not os.path.exists(storage_path): os.mkdir(storage_path) # サムネイルを保存する log.info(" -> サムネイルを保存しています...") thumb.save(storage_path + thumb_name, quality=80) log.info(" -> サムネイルを保存しました。[" + storage_path + thumb_name + "]") # データベースにサムネイル情報を登録し、キューからレコードを削除する log.info(" -> データベースにサムネイル情報を登録しています...") file_size = os.path.getsize(storage_path + thumb_name) db.execute( " UPDATE tweet_medias" " SET thumb_file_name = %(thumb_name)s" " ,thumb_directory_path = %(storage_path)s" " ,thumb_file_size = %(file_size)s" " ,update_datetime = NOW()" " WHERE service_user_id = %(service_user_id)s" " AND user_id = %(user_id)s" " AND tweet_id = %(tweet_id)s" " AND url = %(url)s", { 'thumb_name': thumb_name, 'storage_path': storage_path, 'file_size': file_size, 'service_user_id': result['service_user_id'], 'user_id': result['user_id'], 'tweet_id': result['tweet_id'], 'url': result['url'], }) db.execute( " UPDATE tweets" " SET media_ready = 1" " WHERE service_user_id = %(service_user_id)s" " AND user_id = %(user_id)s" " AND tweet_id = %(tweet_id)s", { 'service_user_id': result['service_user_id'], 'user_id': result['user_id'], 'tweet_id': result['tweet_id'], }) db.execute( " DELETE FROM queue_create_thumbs" " WHERE service_user_id = %(service_user_id)s" " AND user_id = %(user_id)s" " AND tweet_id = %(tweet_id)s" " AND url = %(url)s", { 'service_user_id': result['service_user_id'], 'user_id': result['user_id'], 'tweet_id': result['tweet_id'], 'url': result['url'], }) db.commit() log.info(" -> 登録しました。") except Exception as e: # 例外が発生したレコードはステータスを更新する log.error(e) db.execute( " UPDATE queue_create_thumbs A" " SET A.`status` = 9" " ,A.error_text = %(error_text)s" " WHERE A.service_user_id = %(service_user_id)s" " AND A.user_id = %(user_id)s" " AND A.tweet_id = %(tweet_id)s" " AND A.url = %(url)s", { 'error_text': str(e), 'service_user_id': result['service_user_id'], 'user_id': result['user_id'], 'tweet_id': result['tweet_id'], 'url': result['url'] }) db.commit() except exceptions.UncreatedThreadException: # スレッドの作成ができない時は処理終了 sys.exit() except Exception as e: log.error(e) sys.exit() finally: if 'thread_id' in locals(): log.info('プロセスを終了します。') thread.ThreadId().ExitThread('create_thumbnail.py', thread_id)
def run(): try: # スレッドIDの発行 thread_id = thread.ThreadId().CreateThread( 'delete_missing_medias.py', 1) log = logger.ThreadLogging(thread_id) db = databeses.DbConnection(log) # チェックテーブルから対象のディレクトリを取得する # テーブルが空の場合はディレクトリリストを取得して登録する matching_directories = db.fetch( " SELECT directory " " FROM matching_directories", {}) if len(matching_directories) == 0: file_directories = os.listdir(path=config.STRAGE_MEDIAS_PATH) directories = [ f for f in file_directories if os.path.isdir( os.path.join(config.STRAGE_MEDIAS_PATH, f)) ] for directory in directories: db.execute( " INSERT INTO matching_directories (" " directory" " ) VALUES (" " %(directory)s" " )", { 'directory': directory, }) db.commit() matching_directories = db.fetch( " SELECT directory " " FROM matching_directories", {}) directory_count = 1 for matching_directory in matching_directories: # ディレクトリパスを条件にレコードをすべて取得 # また、ディレクトリ内のファイルリストを取得する log.info( f"SEARCH DIRECTORY ({ directory_count }/{ len(matching_directories) }) : { matching_directory['directory'] }" ) file_directories = os.listdir(path=config.STRAGE_MEDIAS_PATH + matching_directory['directory']) files = [ f for f in file_directories if os.path.isfile( os.path.join( config.STRAGE_MEDIAS_PATH + matching_directory['directory'], f)) ] matching_files = db.fetch( " SELECT file_name" " FROM profile_icons" " WHERE directory_path = %(directory_path)s", { 'directory_path': config.STRAGE_ICON_PATH + matching_directory['directory'] + '/', }) matching_files = matching_files + db.fetch( " SELECT file_name" " FROM tweet_medias" " WHERE directory_path = %(directory_path)s", { 'directory_path': config.STRAGE_MEDIAS_PATH + matching_directory['directory'] + '/', }) matching_files = matching_files + db.fetch( " SELECT thumb_file_name as file_name" " FROM tweet_medias" " WHERE thumb_directory_path = %(directory_path)s", { 'directory_path': config.STRAGE_MEDIAS_PATH + matching_directory['directory'] + '/', }) matching_file_names = [f['file_name'] for f in matching_files] # マッチングする。マッチングしないファイルは削除する。 for file_name in files: if file_name in matching_file_names: log.info(f'MATCHING OK! : {file_name}') else: log.info(f'MATCHING MISS! DELETED!!: {file_name}') os.remove(config.STRAGE_MEDIAS_PATH + matching_directory['directory'] + '/' + file_name) # 終了したディレクトリはテーブルから削除。 db.execute( " DELETE FROM matching_directories" " WHERE directory = %(directory)s", { 'directory': matching_directory['directory'], }) db.commit() directory_count = directory_count + 1 except exceptions.UncreatedThreadException: # スレッドの作成ができない時は処理終了 sys.exit() except Exception as e: log.error(e) sys.exit() finally: if 'thread_id' in locals(): log.info('プロセスを終了します。') thread.ThreadId().ExitThread('delete_missing_medias.py', thread_id)
def run(process_name,max_thread): try: # スレッドIDの発行 log = logger.ThreadLogging('-') thread_id = thread.ThreadId().CreateThread(process_name,max_thread) log = logger.ThreadLogging(thread_id) # 削除対象となるツイートをキューに登録する db = databeses.DbConnection(log) db.execute( " INSERT IGNORE INTO queue_delete_tweets "\ " ( "\ " service_user_id, "\ " user_id, "\ " tweet_id, "\ " tweeted_datetime "\ " ) "\ " SELECT B.service_user_id, "\ " B.user_id, "\ " B.tweet_id, "\ " B.tweeted_datetime"\ " FROM tweets B "\ " LEFT JOIN keep_tweets C "\ " ON B.service_user_id = C.service_user_id "\ " AND B.tweet_id = C.tweet_id "\ " WHERE B.create_datetime < ( NOW() - INTERVAL 1 MONTH ) "\ " AND C.tweet_id IS NULL ", { } ) # キューに登録されているもののうち、KEEPされたものは削除する。 db.execute( " DELETE FROM queue_delete_tweets"\ " WHERE EXISTS ( "\ " SELECT 1 "\ " FROM keep_tweets B "\ " WHERE B.service_user_id = A.service_user_id "\ " AND B.tweet_id = A.tweet_id "\ " ) ", { } ) db.commit() except exceptions.UncreatedThreadException: # スレッドの作成ができない時は処理終了 sys.exit() except Exception as e: log.error(e) sys.exit() finally: if 'thread_id' in locals(): log.info('プロセスを終了します。') thread.ThreadId().ExitThread(process_name,thread_id) db.close()
def run(): try: # スレッドIDの発行 log = logger.ThreadLogging('-') thread_id = thread.ThreadId().CreateThread('exists_media_file.py', 1) log = logger.ThreadLogging(thread_id) # 処理予約を行う # サムネイル作成キューにプロセス番号を登録する。 # ステータスが「0:準備完了」かつ、プロセス番号が空。 db = databeses.DbConnection(log) checked_tweet_medias = db.execute( " SELECT tm.service_user_id" " ,tm.user_id" " ,tm.tweet_id" " ,tm.url" " ,tm.type" " ,tm.sizes" " ,tm.bitrate" " ,tm.file_name" " ,tm.directory_path" " ,tm.thumb_file_name" " ,tm.thumb_directory_path" " ,tm.download_error" " ,tm.create_datetime" " ,tm.update_datetime" " ,tm.deleted" " FROM tweets t" " INNER JOIN tweet_medias tm" " ON t.service_user_id = tm.service_user_id" " AND t.user_id = tm.user_id" " AND t.tweet_id = tm.tweet_id" " LEFT JOIN losted_tweet_medias lt" " ON tm.service_user_id = lt.service_user_id" " AND tm.user_id = lt.user_id" " AND tm.tweet_id = lt.tweet_id" " AND tm.url = lt.url" " WHERE lt.service_user_id IS NULL" " AND kept = 1", {}) for checked_tweet_media in checked_tweet_medias: try: # ファイルのチェック is_lost = False thumb_file_path = checked_tweet_media[ 'thumb_directory_path'] + checked_tweet_media[ 'thumb_file_name'] media_file_path = checked_tweet_media[ 'directory_path'] + checked_tweet_media['file_name'] log.info(media_file_path) if not os.path.isfile(thumb_file_path): is_lost = True elif not os.path.isfile(media_file_path): is_lost = True if is_lost: log.info("・・・存在しませんでした。") db.execute( " INSERT INTO losted_tweet_medias (" " service_user_id" " ,user_id" " ,tweet_id" " ,url" " ,`type`" " ,sizes" " ,bitrate" " ,file_name" " ,directory_path" " ,thumb_file_name" " ,thumb_directory_path" " ,download_error" " ,create_datetime" " ,update_datetime" " ,deleted" " ) VALUES (" " %(service_user_id)s" " ,%(user_id)s" " ,%(tweet_id)s" " ,%(url)s" " ,%(type)s" " ,%(sizes)s" " ,%(bitrate)s" " ,%(file_name)s" " ,%(directory_path)s" " ,%(thumb_file_name)s" " ,%(thumb_directory_path)s" " ,%(download_error)s" " ,%(create_datetime)s" " ,%(update_datetime)s" " ,%(deleted)s" " )" " ON DUPLICATE KEY UPDATE" " losted_datetime = NOW()" " ,download_entried = 0" " ;", { 'service_user_id': checked_tweet_media['service_user_id'], 'user_id': checked_tweet_media['user_id'], 'tweet_id': checked_tweet_media['tweet_id'], 'url': checked_tweet_media['url'], 'type': checked_tweet_media['type'], 'sizes': checked_tweet_media['sizes'], 'bitrate': checked_tweet_media['bitrate'], 'file_name': checked_tweet_media['file_name'], 'directory_path': checked_tweet_media['directory_path'], 'thumb_file_name': checked_tweet_media['thumb_file_name'], 'thumb_directory_path': checked_tweet_media['thumb_directory_path'], 'download_error': checked_tweet_media['download_error'], 'create_datetime': checked_tweet_media['create_datetime'], 'update_datetime': checked_tweet_media['update_datetime'], 'deleted': checked_tweet_media['deleted'], }) db.commit() except Exception as e: log.error(e) except exceptions.UncreatedThreadException: sys.exit() except Exception as e: log.error(e) sys.exit() finally: if 'thread_id' in locals(): log.info('プロセスを終了します。') thread.ThreadId().ExitThread('exists_media_file.py', thread_id)
def run(process_name, max_thread, max_rows): try: # スレッドIDの発行 thread_id = thread.ThreadId().CreateThread(process_name, max_thread) log = logger.ThreadLogging(thread_id) # 処理予約をする。 # 対象の利用者IDを取得する log.info("削除対象のツイートを抽出しています...") db = databeses.DbConnection(log) service_user_id_list = db.execute( " SELECT DISTINCT A.service_user_id "\ " FROM queue_delete_tweets A ", { # 引数なし } ) # 利用者ごとに削除対象を選択する for service_user_id in service_user_id_list: db.execute( " UPDATE queue_delete_tweets"\ " SET thread_id = %(thread_id)s"\ " WHERE service_user_id = %(service_user_id)s "\ " AND `status` = '0'"\ " ORDER BY tweeted_datetime "\ " LIMIT %(max_rows)s ", { 'thread_id':thread_id, 'service_user_id':service_user_id['service_user_id'], 'max_rows':max_rows } ) db.commit() # 予約したレコードを取得する。 # サムネイル作成キューから、自プロセス番号のレコードを取得する。 tweet_id_list = db.execute( " SELECT A.service_user_id,A.user_id,A.tweet_id"\ " FROM queue_delete_tweets A"\ " WHERE A.thread_id = %(thread_id)s"\ " AND A.`status` = 0", { 'thread_id':thread_id } ) for tweet_id in tweet_id_list: media_path_list = db.execute( " SELECT B.directory_path,"\ " B.file_name,"\ " B.thumb_directory_path,"\ " B.thumb_file_name"\ " FROM tweet_medias B"\ " WHERE B.tweet_id = %(tweet_id)s"\ " AND B.file_name IS NOT NULL"\ " AND B.thumb_file_name IS NOT NULL", { 'tweet_id':tweet_id['tweet_id'] } ) # メディアファイルを削除する log.info("メディアファイルを削除しています...") for media_path in media_path_list: media_file_path = media_path[ 'directory_path'] + media_path['file_name'] thumb_file_path = media_path[ 'thumb_directory_path'] + media_path['thumb_file_name'] if os.path.isfile(media_file_path): log.info(media_file_path) os.remove(media_file_path) if os.path.isfile(thumb_file_path): log.info(thumb_file_path) os.remove(thumb_file_path) # ツイートメディアレコードを削除する log.info("メディアレコードを削除しています...") db.execute( " DELETE FROM tweet_medias"\ " WHERE tweet_id = %(tweet_id)s", { 'tweet_id':tweet_id['tweet_id'] } ) # ツイートレコードを削除する log.info("ツイートレコードを削除しています...") db.execute( " DELETE FROM tweets"\ " WHERE service_user_id = %(service_user_id)s"\ " AND user_id = %(user_id)s"\ " AND tweet_id = %(tweet_id)s", { 'service_user_id':tweet_id['service_user_id'], 'user_id':tweet_id['user_id'], 'tweet_id':tweet_id['tweet_id'] } ) # キューレコードを削除する log.info("キューレコードを削除しています...") db.execute( " DELETE FROM queue_delete_tweets"\ " WHERE service_user_id = %(service_user_id)s"\ " AND user_id = %(user_id)s"\ " AND tweet_id = %(tweet_id)s", { 'service_user_id':tweet_id['service_user_id'], 'user_id':tweet_id['user_id'], 'tweet_id':tweet_id['tweet_id'] } ) db.commit() except exceptions.UncreatedThreadException: # スレッドの作成ができない時は処理終了 sys.exit() except Exception as e: log.error(e) sys.exit() finally: if 'thread_id' in locals(): log.info('プロセスを終了します。') thread.ThreadId().ExitThread(process_name, thread_id)
def run(): try: # スレッドIDの発行 thread_id = thread.ThreadId().CreateThread('existing_tweet_medias.py', 1) log = logger.ThreadLogging(thread_id) db = databeses.DbConnection(log) existing_tweet_medias = db.fetch( " SELECT CONCAT(tm.directory_path,tm.file_name) AS file_path " " ,tm.service_user_id" " ,tm.user_id" " ,tm.tweet_id" " ,tm.url" " ,tm.type" " ,tm.sizes" " ,tm.bitrate" " ,tm.file_name" " ,tm.directory_path" " ,tm.thumb_file_name" " ,tm.thumb_directory_path" " ,tm.download_error" " ,tm.create_datetime" " ,tm.update_datetime" " ,tm.deleted" " FROM existing_tweet_medias tm" " LIMIT 100000" , {} ) for existing_tweet_media in existing_tweet_medias: # ファイルのチェック is_lost = False log.info(existing_tweet_media['file_path']) if not existing_tweet_media['file_path'] == None: if not os.path.isfile(existing_tweet_media['file_path']): is_lost = True if is_lost: log.info("・・・存在しませんでした。") db.execute( " INSERT INTO losted_tweet_medias (" " service_user_id" " ,user_id" " ,tweet_id" " ,url" " ,`type`" " ,sizes" " ,bitrate" " ,file_name" " ,directory_path" " ,thumb_file_name" " ,thumb_directory_path" " ,download_error" " ,create_datetime" " ,update_datetime" " ,deleted" " ) VALUES (" " %(service_user_id)s" " ,%(user_id)s" " ,%(tweet_id)s" " ,%(url)s" " ,%(type)s" " ,%(sizes)s" " ,%(bitrate)s" " ,%(file_name)s" " ,%(directory_path)s" " ,%(thumb_file_name)s" " ,%(thumb_directory_path)s" " ,%(download_error)s" " ,%(create_datetime)s" " ,%(update_datetime)s" " ,%(deleted)s" " )" " ON DUPLICATE KEY UPDATE" " losted_datetime = NOW()" " ,download_entried = 0" " ;" , { 'service_user_id': existing_tweet_media['service_user_id'], 'user_id': existing_tweet_media['user_id'], 'tweet_id': existing_tweet_media['tweet_id'], 'url': existing_tweet_media['url'], 'type': existing_tweet_media['type'], 'sizes': existing_tweet_media['sizes'], 'bitrate': existing_tweet_media['bitrate'], 'file_name': existing_tweet_media['file_name'], 'directory_path': existing_tweet_media['directory_path'], 'thumb_file_name': existing_tweet_media['thumb_file_name'], 'thumb_directory_path': existing_tweet_media['thumb_directory_path'], 'download_error': existing_tweet_media['download_error'], 'create_datetime': existing_tweet_media['create_datetime'], 'update_datetime': existing_tweet_media['update_datetime'], 'deleted': existing_tweet_media['deleted'], } ) db.execute( " DELETE FROM existing_tweet_medias" " WHERE service_user_id = %(service_user_id)s" " AND user_id = %(user_id)s" " AND tweet_id = %(tweet_id)s" " AND url = %(url)s" " ;" , { 'service_user_id': existing_tweet_media['service_user_id'], 'user_id': existing_tweet_media['user_id'], 'tweet_id': existing_tweet_media['tweet_id'], 'url': existing_tweet_media['url'], } ) db.commit() except exceptions.UncreatedThreadException: # スレッドの作成ができない時は処理終了 sys.exit() except Exception as e: log.error(e) sys.exit() finally: if 'thread_id' in locals(): log.info('プロセスを終了します。') thread.ThreadId().ExitThread('existing_tweet_medias.py', thread_id)
def run(): try: # スレッドIDの発行 log = logger.ThreadLogging('-') thread_id = thread.ThreadId().CreateThread('download_medias.py', 1) # サムネイルの作成 log = logger.ThreadLogging(thread_id) log.info("メディア情報を確認しています...") # 処理予約をする。 db = databeses.DbConnection(log) db.execute( " UPDATE queue_download_medias" " SET thread_id = %(thread_id)s" " WHERE `status` = 0" " AND thread_id IS NULL " " LIMIT 5000", {'thread_id': thread_id}) db.execute( " UPDATE queue_download_medias" " SET thread_id = %(thread_id)s" " WHERE status = 0" " AND thread_id IS NULL" " AND (service_user_id, user_id) IN (" " SELECT service_user_id, user_id" " FROM tweet_take_users" " WHERE `high_priority` = 1" " )", {'thread_id': thread_id}) db.commit() download_medias = db.execute( " SELECT qdm.service_user_id,qdm.user_id,qdm.tweet_id,qdm.url,ru.disp_name,tm.sizes,tm.`type` " " FROM queue_download_medias qdm" " INNER JOIN tweet_medias tm" " ON qdm.service_user_id = tm.service_user_id" " AND qdm.tweet_id = tm.tweet_id" " AND qdm.url = tm.url " " INNER JOIN tweets tw" " ON tm.service_user_id = tw.service_user_id" " AND tm.tweet_id = tw.tweet_id" " INNER JOIN relational_users ru" " ON tw.user_id = ru.user_id" " WHERE qdm.thread_id = %(thread_id)s", {'thread_id': thread_id}) log.info("ダウンロードを開始します。") for download_media in download_medias: splited_usls = download_media['url'].split('/') file_name = splited_usls[len(splited_usls) - 1].split('?')[0] # ディレクトリパス(無ければ作る) directory_path = config.STRAGE_MEDIAS_PATH + download_media[ 'service_user_id'] + '_' + download_media['disp_name'] + '/' if not os.path.exists(directory_path): os.mkdir(directory_path) # 画像ファイルの対応サイズを判定 size = "" if "large" in download_media['sizes']: size = ":large" elif "medium" in download_media['sizes']: size = ":medium" elif "small" in download_media['sizes']: size = ":small" elif "thumb" in download_media['sizes']: size = ":thumb" try: print("ダウンロード中... " + download_media['url']) data = urllib.request.urlopen(download_media['url'] + size, timeout=20).read() with open(directory_path + file_name, mode="wb") as f: f.write(data) file_size = os.path.getsize(directory_path + file_name) db.execute( " UPDATE tweet_medias" " SET file_name = %(file_name)s" " ,directory_path = %(directory_path)s" " ,file_size = %(file_size)s" " WHERE service_user_id = %(service_user_id)s" " AND user_id = %(user_id)s" " AND tweet_id = %(tweet_id)s" " AND url = %(url)s", { 'file_name': file_name, 'directory_path': directory_path, 'file_size': file_size, 'service_user_id': download_media['service_user_id'], 'user_id': download_media['user_id'], 'tweet_id': download_media['tweet_id'], 'url': download_media['url'] }) db.execute( " INSERT INTO queue_create_thumbs (" " service_user_id" " ,user_id" " ,tweet_id" " ,url" " ) VALUES (" " %(service_user_id)s" " ,%(user_id)s" " ,%(tweet_id)s" " ,%(url)s" " )" " ON DUPLICATE KEY UPDATE" " status = 0" " ,thread_id = NULL" " ,error_text = NULL", { 'service_user_id': download_media['service_user_id'], 'user_id': download_media['user_id'], 'tweet_id': download_media['tweet_id'], 'url': download_media['url'] }) db.execute( " INSERT INTO queue_compress_medias (service_user_id, user_id, tweet_id, url)" " VALUES ( %(service_user_id)s, %(user_id)s, %(tweet_id)s, %(url)s)", { 'service_user_id': download_media['service_user_id'], 'user_id': download_media['user_id'], 'tweet_id': download_media['tweet_id'], 'url': download_media['url'] }) db.execute( " DELETE FROM queue_download_medias" " WHERE service_user_id = %(service_user_id)s" " AND user_id = %(user_id)s" " AND tweet_id = %(tweet_id)s" " AND url = %(url)s", { 'service_user_id': download_media['service_user_id'], 'user_id': download_media['user_id'], 'tweet_id': download_media['tweet_id'], 'url': download_media['url'] }) db.commit() except Exception as e: log.error(e) db.execute( " UPDATE queue_download_medias" " SET `status` = 9" " ,error_text = %(error_text)s" " WHERE service_user_id = %(service_user_id)s" " AND user_id = %(user_id)s" " AND tweet_id = %(tweet_id)s" " AND url = %(url)s", { 'error_text': str(e), 'service_user_id': download_media['service_user_id'], 'user_id': download_media['user_id'], 'tweet_id': download_media['tweet_id'], 'url': download_media['url'] }) db.commit() except exceptions.UncreatedThreadException: # スレッドの作成ができない時は処理終了 sys.exit() except Exception as e: log.error(e) sys.exit() finally: if 'thread_id' in locals(): log.info('プロセスを終了します。') thread.ThreadId().ExitThread('download_medias.py', thread_id)