Example #1
0
def main():  # batch processing for the videos in the dataset
    from dbimpl import DBImpl

    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
    sql = 'select id, title from playlists where used = 1'
    sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order'
    res = db.querymany(sql)
    for list_id, title in res:
        list_folder = os.path.join(video_dir, list_id)
        if not os.path.exists(list_folder):
            continue

        print list_id
        videos = db.querymany(sql2, list_id)
        for video_hash, video_title in videos:
            video_path = os.path.join(list_folder,
                                      video_title + "_" + video_hash + ".mp4")

            if not os.path.exists(video_path):
                continue
            print video_path

            video = video_title + "_" + video_hash
            out_folder = os.path.join(images_dir, video)
            if os.path.exists(out_folder):
                continue
            else:
                os.mkdir(out_folder)

            extract_frames(video_path, out_folder=out_folder)
            diff_frames(out_folder)
Example #2
0
def download():
    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
    sql = 'select id, title from playlists where used = 1'
    sql2 = 'select * from videos where playlist = ?'
    res = db.querymany(sql)

    video_folder = "/Volumes/Seagate/VideoAnalytics/Videos"
    for list_id, title in res:
        res = db.querymany(sql2, list_id)
        if len(res) > 0:
            print 'list has been downloaded', list_id
            continue

        print list_id, title
        playlist_url = "https://www.youtube.com/playlist?list=%s" % list_id

        output_folder = os.path.join(video_folder, list_id)
        if not os.path.exists(output_folder):
            os.mkdir(output_folder)

        videos = download_youtube_list(playlist_url, output_folder)

        for idx, (video_hash, title) in enumerate(videos):
            insert_video(db, video_hash, title, list_id, idx + 1)

    db.close()
Example #3
0
def main():
    from dbimpl import DBImpl

    model = keras.models.load_model('weights.h5')
    print 'finish loading model'

    print video_dir, images_dir

    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
    sql = 'select id, title from playlists where used = 1'
    sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order'
    
    res = db.querymany(sql)
    for list_id, title in res:
        list_folder = os.path.join(video_dir, list_id)

        # if list_id in ['PLS1QulWo1RIbfTjQvTdj8Y6yyq4R7g-Al', 'PLFE2CE09D83EE3E28', 'PLE7E8B7F4856C9B19', 'PL27BCE863B6A864E3']:
            # continue
        print list_id
        
        videos = db.querymany(sql2, list_id)
        for video_hash, video_title in videos:
            # video_path = os.path.join(list_folder, video_title + "_" + video_hash + ".mp4")

            video = video_title + "_" + video_hash

            print video 
            predict_video(video, model)
Example #4
0
def batch():
    from dbimpl import DBImpl
    import preprocess
    from video_tagging.predict import predict_video, load_model

    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
    default_config = {
        'eps1': 3,
        'eps2': 2,
        'min_samples': 2,
        'line_ratio': 0.7
    }

    sql = 'select id, title from playlists where used = 1'
    sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order'
    res = db.querymany(sql)
    for list_id, title in res:
        list_folder = os.path.join(video_dir, list_id)
        if not os.path.exists(list_folder):
            continue

        print list_id
        videos = db.querymany(sql2, list_id)
        for video_hash, video_title in videos:
            video_title = video_title.strip()
            video_folder = video_title + '_' + video_hash

            video_path = os.path.join(video_dir, list_id,
                                      video_folder + ".mp4")

            if not os.path.exists(os.path.join(images_dir, video_folder)):
                continue
            if not os.path.exists(
                    os.path.join(images_dir, video_folder, 'predict.txt')):
                predict_video(video_folder, valid_model)

            if os.path.exists(os.path.join(crop_dir, video_folder)):
                continue

            cvideo = CVideo(video_folder, config=default_config)
            if len(cvideo.images) <= 0:
                continue

            if not os.path.exists(os.path.join(lines_dir, video_folder)):
                os.mkdir(os.path.join(lines_dir, video_folder))

            cvideo.cluster_lines()
            cvideo.adjust_lines()
            cvideo.detect_rects()

            print video_title, video_hash
            cvideo.crop_rects()
Example #5
0
def main():
    from dbimpl import DBImpl

    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
    sql = 'select id, title from playlists where used = 1'
    sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order'
    res = db.querymany(sql)
    for list_id, title in res:
        list_folder = os.path.join(video_dir, list_id)

        print list_id
        videos = db.querymany(sql2, list_id)
        for video_hash, video_title in videos:
            print video_title, video_hash
            google_ocr(video_title, video_hash)
Example #6
0
def main():
    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})

    sql = 'select id, title from playlists where used = 1'
    sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order'
    res = db.querymany(sql)
    for list_id, title in res:
        list_folder = os.path.join(video_dir, list_id)

        print list_id
        videos = db.querymany(sql2, list_id)
        for video_hash, video_title in videos:
            video_title = video_title.strip()
            video_folder = video_title + '_' + video_hash

            OCR_noise(video_folder)
Example #7
0
def batch_crop():
    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})

    sql = 'select id, title from playlists where used = 1'
    sql2 = 'select hash, title from videos where playlist = ? and used = 1 order by list_order'
    res = db.querymany(sql)
    for list_id, title in res:
        list_folder = os.path.join(video_dir, list_id)
        if os.path.exists(list_folder):
            continue

        print list_id
        videos = db.querymany(sql2, list_id)
        for video_hash, video_title in videos:
            video_title = video_title.strip()
            video_folder = video_title + '_' + video_hash
            print video_folder

            crop_noisy_frame(video_folder)
Example #8
0
def main():
    with open("verified_videos.txt") as fin:
        process_hashes = [line.strip() for line in fin.readlines()]

    from dbimpl import DBImpl
    db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
    sql = 'select a.hash, a.title from videos a, playlists b where a.playlist = b.id and a.used = 1 and b.used = 1'
    num = 1
    for r in db.querymany(sql):
        video_hash, video_name = r
        video_name = video_name.strip()
        ocr_folder = os.path.join(ocr_dir, video_name + "_" + video_hash)

        if video_hash in process_hashes:
            print ocr_folder
            parser = GoogleOCRParser(video_name, ocr_folder)
            parser.correct_words()
Example #9
0
class APIDBImpl:
    def __init__(self):
        self.dbimpl = DBImpl({
            "type": "mysql",
            "url": "127.0.0.1",
            "username": "******",
            "password": "******",
            "database": "link_api"
        })

    def query_records(self, entity):
        idx = entity.find('(')
        if idx > 0:
            entity = entity[0:idx].strip()

        sql = 'select * from link_api_record where name = %s'
        return self.dbimpl.querymany(sql, entity)

    def query_web_cache(self, link):
        sql = 'select * from web_cache where url = %s'
        return self.dbimpl.queryone(sql, link)

    def insert_or_update_cache(self, result):
        try:
            if not result[3]:
                sql = 'update web_cache set content=%s, access_time=%s where url=%s'
                self.dbimpl.updateone(sql, result[1], datetime.now(),
                                      result[2])
            else:
                sql = 'insert web_cache(url, content) values(%s, %s)'
                self.dbimpl.updateone(sql, result[2], result[1])
        except Exception as e:
            print e

    def close(self):
        self.dbimpl.close()
Example #10
0
import os
import json
from dbimpl import DBImpl
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
sys.path.append('..')
from setting import *
from util import correct_non_ascii
from OCR.adjust_ocr import GoogleOCRParser, diff_lines
from OCR.JavaLine import JavaLine
from OCR.lm import JAVA_WORDS, JAVA_LINE_STRUCTURE

db = DBImpl({'url': os.path.join(playlists_dir, 'videos.db')})
sql = 'select a.hash, a.title from videos a, playlists b where a.playlist = b.id and a.used = 1 and b.used = 1'
res = db.querymany(sql)

video_folders = []
video_hash_map = {}
for video_hash, video_title in res:
    video_folders.append((video_title.strip(), video_hash))
    video_hash_map[video_hash] = video_title


# baseline
def construct_index_with_noise():
    all_docs = []
    video_track = {}
    num = 0

    all_frame_docs = []
    frame_track = {}