def main(): # batch processing for the videos in the dataset from dbimpl import DBImpl db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select id, title from playlists where used = 1' sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order' res = db.querymany(sql) for list_id, title in res: list_folder = os.path.join(video_dir, list_id) if not os.path.exists(list_folder): continue print list_id videos = db.querymany(sql2, list_id) for video_hash, video_title in videos: video_path = os.path.join(list_folder, video_title + "_" + video_hash + ".mp4") if not os.path.exists(video_path): continue print video_path video = video_title + "_" + video_hash out_folder = os.path.join(images_dir, video) if os.path.exists(out_folder): continue else: os.mkdir(out_folder) extract_frames(video_path, out_folder=out_folder) diff_frames(out_folder)
def download(): db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select id, title from playlists where used = 1' sql2 = 'select * from videos where playlist = ?' res = db.querymany(sql) video_folder = "/Volumes/Seagate/VideoAnalytics/Videos" for list_id, title in res: res = db.querymany(sql2, list_id) if len(res) > 0: print 'list has been downloaded', list_id continue print list_id, title playlist_url = "https://www.youtube.com/playlist?list=%s" % list_id output_folder = os.path.join(video_folder, list_id) if not os.path.exists(output_folder): os.mkdir(output_folder) videos = download_youtube_list(playlist_url, output_folder) for idx, (video_hash, title) in enumerate(videos): insert_video(db, video_hash, title, list_id, idx + 1) db.close()
def main(): from dbimpl import DBImpl model = keras.models.load_model('weights.h5') print 'finish loading model' print video_dir, images_dir db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select id, title from playlists where used = 1' sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order' res = db.querymany(sql) for list_id, title in res: list_folder = os.path.join(video_dir, list_id) # if list_id in ['PLS1QulWo1RIbfTjQvTdj8Y6yyq4R7g-Al', 'PLFE2CE09D83EE3E28', 'PLE7E8B7F4856C9B19', 'PL27BCE863B6A864E3']: # continue print list_id videos = db.querymany(sql2, list_id) for video_hash, video_title in videos: # video_path = os.path.join(list_folder, video_title + "_" + video_hash + ".mp4") video = video_title + "_" + video_hash print video predict_video(video, model)
def batch(): from dbimpl import DBImpl import preprocess from video_tagging.predict import predict_video, load_model db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) default_config = { 'eps1': 3, 'eps2': 2, 'min_samples': 2, 'line_ratio': 0.7 } sql = 'select id, title from playlists where used = 1' sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order' res = db.querymany(sql) for list_id, title in res: list_folder = os.path.join(video_dir, list_id) if not os.path.exists(list_folder): continue print list_id videos = db.querymany(sql2, list_id) for video_hash, video_title in videos: video_title = video_title.strip() video_folder = video_title + '_' + video_hash video_path = os.path.join(video_dir, list_id, video_folder + ".mp4") if not os.path.exists(os.path.join(images_dir, video_folder)): continue if not os.path.exists( os.path.join(images_dir, video_folder, 'predict.txt')): predict_video(video_folder, valid_model) if os.path.exists(os.path.join(crop_dir, video_folder)): continue cvideo = CVideo(video_folder, config=default_config) if len(cvideo.images) <= 0: continue if not os.path.exists(os.path.join(lines_dir, video_folder)): os.mkdir(os.path.join(lines_dir, video_folder)) cvideo.cluster_lines() cvideo.adjust_lines() cvideo.detect_rects() print video_title, video_hash cvideo.crop_rects()
def main(): from dbimpl import DBImpl db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select id, title from playlists where used = 1' sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order' res = db.querymany(sql) for list_id, title in res: list_folder = os.path.join(video_dir, list_id) print list_id videos = db.querymany(sql2, list_id) for video_hash, video_title in videos: print video_title, video_hash google_ocr(video_title, video_hash)
def main(): db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select id, title from playlists where used = 1' sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order' res = db.querymany(sql) for list_id, title in res: list_folder = os.path.join(video_dir, list_id) print list_id videos = db.querymany(sql2, list_id) for video_hash, video_title in videos: video_title = video_title.strip() video_folder = video_title + '_' + video_hash OCR_noise(video_folder)
def batch_crop(): db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select id, title from playlists where used = 1' sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order' res = db.querymany(sql) for list_id, title in res: list_folder = os.path.join(video_dir, list_id) if os.path.exists(list_folder): continue print list_id videos = db.querymany(sql2, list_id) for video_hash, video_title in videos: video_title = video_title.strip() video_folder = video_title + '_' + video_hash print video_folder crop_noisy_frame(video_folder)
def main(): with open("verified_videos.txt") as fin: process_hashes = [line.strip() for line in fin.readlines()] from dbimpl import DBImpl db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select a.hash, a.title from videos a, playlists b where a.playlist = b.id and a.used = 1 and b.used = 1' num = 1 for r in db.querymany(sql): video_hash, video_name = r video_name = video_name.strip() ocr_folder = os.path.join(ocr_dir, video_name + "_" + video_hash) if video_hash in process_hashes: print ocr_folder parser = GoogleOCRParser(video_name, ocr_folder) parser.correct_words()
class APIDBImpl: def __init__(self): self.dbimpl = DBImpl({ "type": "mysql", "url": "127.0.0.1", "username": "******", "password": "******", "database": "link_api" }) def query_records(self, entity): idx = entity.find('(') if idx > 0: entity = entity[0:idx].strip() sql = 'select * from link_api_record where name = %s' return self.dbimpl.querymany(sql, entity) def query_web_cache(self, link): sql = 'select * from web_cache where url = %s' return self.dbimpl.queryone(sql, link) def insert_or_update_cache(self, result): try: if not result[3]: sql = 'update web_cache set content=%s, access_time=%s where url=%s' self.dbimpl.updateone(sql, result[1], datetime.now(), result[2]) else: sql = 'insert web_cache(url, content) values(%s, %s)' self.dbimpl.updateone(sql, result[2], result[1]) except Exception as e: print e def close(self): self.dbimpl.close()
import os import json from dbimpl import DBImpl from sklearn.feature_extraction.text import TfidfVectorizer import pickle sys.path.append('..') from setting import * from util import correct_non_ascii from OCR.adjust_ocr import GoogleOCRParser, diff_lines from OCR.JavaLine import JavaLine from OCR.lm import JAVA_WORDS, JAVA_LINE_STRUCTURE db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')}) sql = 'select a.hash, a.title from videos a, playlists b where a.playlist = b.id and a.used = 1 and b.used = 1' res = db.querymany(sql) video_folders = [] video_hash_map = {} for video_hash, video_title in res: video_folders.append((video_title.strip(), video_hash)) video_hash_map[video_hash] = video_title # baseline def construct_index_with_noise(): all_docs = [] video_track = {} num = 0 all_frame_docs = [] frame_track = {}