Exemple #1
0
    def __init__(self, company_id):
        self.company_id = str(company_id)
        db_connector = DBConnector()
        sli_rev_db = db_connector.get_mongo_client(db='mongo_dest')[Database]

        self.collection = sli_rev_db[self.company_id]
        self.collection.create_index([("revisiondpid", pymongo.ASCENDING)])
 def __init__(self, commission, algo_id):
     super().__init__()
     self.dbc = DBConnector()
     self.current_date = datetime.now().date()
     self.commission = commission
     self.algo_id = algo_id
     self.portfolio = self.dbc.fetch_portfolio(algo_id)
     self.orders = dict()
    def __init__(self, company_id):
        self.company_id = str(company_id)
        collection_name = "va_{company_id}".format(company_id=self.company_id)

        db_connector = DBConnector()
        self.conn = db_connector.get_remote_mongo_client(
            section_name='remote_mongo_config')

        database = self.conn[DATABASE]
        self.collection = database[collection_name]

        self.collection.find()
class VirtualBroker(Broker):
    def __init__(self, commission, algo_id):
        super().__init__()
        self.dbc = DBConnector()
        self.current_date = datetime.now().date()
        self.commission = commission
        self.algo_id = algo_id
        self.portfolio = self.dbc.fetch_portfolio(algo_id)
        self.orders = dict()

    def get_portfolio(self, context):
        self.portfolio = self.dbc.update_portfolio(self.algo_id)
class PostgreSQL:
    def __init__(self, company_id):
        self.company_id = str(company_id)
        self.db_connector = DBConnector()
        self.conn = self.db_connector.get_postgre_conn("postgres_sql")

    def get_mysql_data(self):
        return SQlAlchemyOperations.get_computeinfojson(
            Database, self.company_id)

    def insert(self, rows, db):
        try:
            print "Insertion started"

            start = time.time()
            self.conn.autocommit = False
            cursor = self.conn.cursor()
            query = 'INSERT INTO "{table}" (revisiondpid, expression, computeinfojson) VALUES %s'
            query = query.format(db=db, table=self.company_id)

            while True:
                data_chunk = rows.fetchmany(1000)
                if not data_chunk:
                    break

                execute_values(cursor, query, tuple(data_chunk))

            self.conn.commit()

            print "All rows inserted in {time}".format(time=time.time() -
                                                       start)

        except Exception as ex:
            print "An error occurred"
            self.conn.rollback()

        finally:
            self.conn.autocommit = True
            cursor.close()

    def get_postgre_data(self, db):
        print "Started fetching data from MySQL company id: {company_id}".format(
            company_id=self.company_id)

        start = time.time()
        cursor = self.conn.cursor()

        query = 'SELECT revisiondpid, expression, computeinfojson FROM "{company_id}" ' \
                'WHERE revisiondpid IN (9529232226, 8117414530, 8117415374)'.format(company_id=self.company_id)

        cursor.execute(query)
        data = cursor.fetchall()

        print "Completed fetching data for {db} in {time}".format(
            db=db, time=time.time() - start)

        return list(data)
    def __init__(self, company_id):
        self.company_id = str(company_id)

        db_connector = DBConnector()
        self.session = db_connector.get_mysql_ds_session(db='mysql_ds')

        self.db = self.session.get_schema('mysql_ds')
        self.collection = self.db.get_collection(self.company_id)

        if not self.collection.exists_in_database():
            self.collection = self.db.create_collection(self.company_id)

            self.collection.create_index('revisiondpid', {
                'fields': [{
                    'field': '$.revisiondpid',
                    "type": "BIGINT"
                }]
            }).execute()
Exemple #7
0
class SQlAlchemyOperations:
    db_connector_obj = DBConnector()

    def __init__(self):
        pass

    @classmethod
    def get_data_from_raw_query(cls, db, company_id):
        print "Started fetching data from MySQL company id: {company_id}".format(
            company_id=company_id)

        start = time.time()

        query = "SELECT revisiondpid, expression FROM {db}.`{company_id}`".format(
            db=db, company_id=company_id)
        conn = cls.db_connector_obj.get_mysql_engine(db=db)

        data = conn.execute(text(query))

        print "Completed fetching data for {db} in {time}".format(
            db=db, time=time.time() - start)

        return data

    @classmethod
    def get_computeinfojson(cls, db, company_id):
        print "Started fetching data from MySQL company id: {company_id}".format(
            company_id=company_id)

        start = time.time()
        query = "SELECT revisiondpid, expression, computeinfojson FROM {db}.{company_id}".format(
            db=db, company_id=company_id)

        conn = cls.db_connector_obj.get_mysql_engine(db=db)

        data = conn.execute(text(query))

        print "Completed fetching data for {db} in {time}".format(
            db=db, time=time.time() - start)
        return data

    @classmethod
    def get_model(cls, company_id, db):
        conn = cls.db_connector_obj.get_mysql_engine(db=db)
        session = cls.db_connector_obj.get_sql_alchemy_session(conn)
        return get_sli_revision_model(company_id, session)

    @classmethod
    def get_local_mysql_data(cls, db, company_id):
        print "Started fetching data from MySQL company id: {company_id}".format(
            company_id=company_id)

        start = time.time()
        query = "SELECT revisiondpid, expression, computeinfojson FROM {db}.`{company_id}` " \
                "WHERE revisiondpid IN (9529232226, 8117414530, 8117415374)".format(db=db, company_id=company_id)

        conn = cls.db_connector_obj.get_mysql_engine(db=db)
        data = list(conn.execute(text(query)))

        print "Completed fetching data for {db} in {time}".format(
            db=db, time=time.time() - start)

        return list(data)

    @classmethod
    def insert_bulk_data_result(cls, db, model, rows, chunk_size=1000):
        """
        Insert one table output into another table
        :param db: Db name of the config file
        :param model: Model of the table in which data will be inserted
        :param rows: Data which need to be inserted
        :param chunk_size: Chunk size by which data will be inserted
        """

        start = time.time()
        engine = cls.db_connector_obj.get_mysql_engine(db=db)

        with engine.connect() as conn:
            with conn.begin() as trans:
                try:
                    while True:
                        data_chunk = rows.fetchmany(chunk_size)

                        if not data_chunk:
                            break

                        conn.execute(model.__table__.insert(), *rows)

                    trans.commit()
                except Exception as ex:
                    trans.rollback()
                    print "An error occurred"
                    raise

        print "Completed data insertion in {time}".format(time=time.time() -
                                                          start)
from flask import Flask, request, jsonify
from flask_cors import CORS

import sys
sys.path.append("/home/sergey/Documents/homeworks/twitch_speech/")

from utils.main_server import TwitchSpeechServer
from utils.db_connector import DBConnector
import config as cf

pipeline_server = TwitchSpeechServer()
db_connector = DBConnector(cf.DATABASE_NAME)

app = Flask(__name__)
CORS(app)


# {
# 'status': 'Ищем видяшку',
# 'progress': int,
# 'download_speed': str,
# }
def check_id(video_id):
    status_info = db_connector.get_status(video_id)

    if not status_info:
        pipeline_server.process_videos(ids=[video_id])
        return {
            'status': 'Процесс запущен',
            'progress': 0,
            'download_speed': "0MB",
 def __init__(self):
     self.db_connector = DBConnector(cf.DATABASE_NAME)
class TwitchSpeechServer:
    def __init__(self):
        self.db_connector = DBConnector(cf.DATABASE_NAME)

    def process_videos(self, ids, workers=20):
        for video_id in ids:
            self.db_connector.delete_video(video_id)
            self.db_connector.update_status(video_id,
                                            "preparing_for_download",
                                            info="-")

        partials = (partial(self._process_video, i, workers=workers)
                    for i in ids)
        threads = [Thread(target=fn) for fn in partials]
        for th in threads:
            th.start()

    def _process_video(self, video_id, workers):
        #  download
        try:
            self._download_video(video_id, workers=workers, filename=video_id)
        except:
            self.db_connector.update_status(video_id, 'fail_on_downloading')
            return

        #  mkv2wav
        self.db_connector.update_status(video_id, 'converting_to_wav')
        try:
            video2wav(f"{cf.VODS_DIR_PATH}{video_id}.mkv",
                      f"{cf.SOUNDS_DIR_PATH}{video_id}.wav")
            # delete video
        except:
            # delete wav if exist
            self.db_connector.update_status(video_id,
                                            'fail_on_converting_to_wav')
            return

        # recognition
        self.db_connector.update_status(video_id, 'recognition')
        try:
            recognize(video_id, cf.START_RECOGNITION_PATH, cf.SOUNDS_DIR_PATH,
                      cf.RECOGNITION_RESULTS_DIR_PATH)
        except:
            self.db_connector.update_status(video_id, 'fail_on_recognition')
            return

        self.db_connector.update_status(video_id, 'finished')
        self.db_connector.insert_parts(parse_ass(video_id, cf.SUBS_DIR_PATH))

    def _download_video(self,
                        video_id,
                        quality="worst",
                        workers=20,
                        video_format="mkv",
                        path=None,
                        filename=None):
        # Matching video_id
        match = None
        for pattern in VIDEO_PATTERNS:
            match = re.match(pattern, video_id)
            if match:
                break
        if not match:
            raise ValueError(f"Invalid video: {video_id}")

        video_id = match.group('id')

        # Looking up video
        video = twitch.get_video(video_id)

        # Fetching access token
        access_token = twitch.get_access_token(video_id)

        # Fetching playlists
        playlists_m3u8 = twitch.get_playlists(video_id, access_token)
        playlists = list(_parse_playlists(playlists_m3u8))
        playlist_uri = (_get_playlist_by_name(playlists, quality))

        # Fetching playlist
        response = requests.get(playlist_uri)
        response.raise_for_status()
        playlist = m3u8.loads(response.text)

        base_uri = re.sub("/[^/]+$", "/", playlist_uri)
        target_dir = _crete_temp_dir(base_uri)
        vod_paths = _get_vod_paths(playlist)

        # Save playlists for debugging purposes
        with open(os.path.join(target_dir, "playlists.m3u8"), "w") as f:
            f.write(playlists_m3u8)
        with open(os.path.join(target_dir, "playlist.m3u8"), "w") as f:
            f.write(response.text)

        # Downloading VODs to target_dir
        path_map = self._download_files(video_id, base_uri, target_dir,
                                        vod_paths, workers)
        self.db_connector.update_status(video_id, 'joining_segments')

        # Make a modified playlist which references downloaded VODs
        # Keep only the downloaded segments and skip the rest
        org_segments = playlist.segments.copy()
        playlist.segments.clear()
        for segment in org_segments:
            if segment.uri in path_map:
                segment.uri = path_map[segment.uri]
                playlist.segments.append(segment)

        playlist_path = os.path.join(target_dir, "playlist_downloaded.m3u8")
        playlist.dump(playlist_path)

        # Joining files
        target = _video_target_filename(video,
                                        video_format,
                                        path=path,
                                        filename=filename)
        _join_vods(playlist_path, target)

        # Deleting temporary files
        shutil.rmtree(target_dir)

    def _download_files(self, video_id, base_url, target_dir, vod_paths,
                        max_workers):
        """
        Downloads a list of VODs defined by a common `base_url` and a list of
        `vod_paths`, returning a dict which maps the paths to the downloaded files.
        """
        self.db_connector.update_status(video_id, 'downloading')

        urls = [base_url + path for path in vod_paths]
        targets = [
            os.path.join(target_dir, "{:05d}.ts".format(k))
            for k, _ in enumerate(vod_paths)
        ]
        partials = (partial(download_file, url, path)
                    for url, path in zip(urls, targets))

        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            start_time = datetime.now()

            # run download
            futures = [executor.submit(fn) for fn in partials]

            downloaded_count = 0
            downloaded_size = 0
            total_count = len(futures)
            for future in as_completed(futures):
                size = future.result()
                downloaded_count += 1
                downloaded_size += size

                est_total_size = int(total_count * downloaded_size /
                                     downloaded_count)
                duration = (datetime.now() - start_time).seconds
                speed = downloaded_size // duration if duration else 0
                remaining = (total_count -
                             downloaded_count) * duration / downloaded_count

                info = {
                    'total_count': total_count,
                    'downloaded_count': downloaded_count,
                    'downloaded_size': format_size(downloaded_size),
                    'est_total_size': format_size(est_total_size),
                    'speed': format_size(speed) if speed > 0 else "-",
                    'remaining':
                    format_duration(remaining) if speed > 0 else "-",
                }
                self.db_connector.update_status(
                    video_id,
                    self.db_connector.get_status(video_id)['status'], info)

        return OrderedDict(zip(vod_paths, targets))
                    'downloaded_count': downloaded_count,
                    'downloaded_size': format_size(downloaded_size),
                    'est_total_size': format_size(est_total_size),
                    'speed': format_size(speed) if speed > 0 else "-",
                    'remaining':
                    format_duration(remaining) if speed > 0 else "-",
                }
                self.db_connector.update_status(
                    video_id,
                    self.db_connector.get_status(video_id)['status'], info)

        return OrderedDict(zip(vod_paths, targets))


if __name__ == '__main__':
    # Example
    server = TwitchSpeechServer()
    db_con = DBConnector(cf.DATABASE_NAME)

    videos = ['760718196', '574423677', '658442340']
    server.process_videos(videos)

    for _ in range(30):
        for video_id in videos:
            tmp = db_con.get_status(video_id)
            status = tmp['status']
            info = tmp['info']
            print(f'Video {video_id}:\nStatus: {status}\nInfo: {info}\n\n')

        time.sleep(5)
 def __init__(self, company_id):
     self.company_id = str(company_id)
     self.db_connector = DBConnector()
     self.conn = self.db_connector.get_postgre_conn("postgres_sql")
Exemple #13
0
def init_test(db_name='test'):
    init_empty_mongo_db(db_name)
    conn = DBConnector(db_name=db_name)

    ids = [1, 13, 24]
    status_types = [
        "finished",
        "preparing_for_download",
        "fail_on_downloading",
        "converting_to_wav",
        "fail_on_converting_to_wav",
        "recognition",
        "fail_on_recognition",
        "joining_segments",
        "downloading",
    ]
    seed = 42
    random.seed(42)

    gen_parts = []
    for video_id in ids:
        for i in range(10):
            gen_parts.append({
                'video_id': video_id,
                "start": 2 * i,
                "end": 2 * i + 1,
                "text": f"{13 * i + 17} русский {video_id}"
            })

    gen_status = []
    for video_id in ids:
        gen_status.append({
            "video_id": video_id,
            "status": random.choice(status_types),
            "info": {
                "1": 1,
                "2": 2
            }
        })

    for status in gen_status:
        conn.update_status(status["video_id"], status["status"],
                           status["info"])

    res = conn.get_status_table()
    print("\nStatus table:")
    for el in res:
        print(el)

    conn.insert_parts(gen_parts)

    res = conn.get_parts_table()
    print("\nParts table:")
    for el in res:
        print(el)

    conn.update_status(ids[0], "NEW SUPER STATUS", info={"speed": 424242})

    print(f"\nStatus {ids[0]} video:")
    res = conn.get_status(ids[0])
    print(res)

    print(f"\nParts {ids[0]} video:")
    res = conn.get_parts(ids[0])
    for el in res:
        print(el)

    request_text = "русский"
    print(f"\nFind {request_text} in {ids[0]} video:")
    res = conn.find_text(ids[0], request_text, limit=5)
    for el in res:
        print(el)

    print(f"\nStatus not exists {145421232} video:")
    res = conn.get_status(145421232)
    print(res)

    client = MongoClient()
    client.drop_database(db_name)
Exemple #14
0
    request_text = "русский"
    print(f"\nFind {request_text} in {ids[0]} video:")
    res = conn.find_text(ids[0], request_text, limit=5)
    for el in res:
        print(el)

    print(f"\nStatus not exists {145421232} video:")
    res = conn.get_status(145421232)
    print(res)

    client = MongoClient()
    client.drop_database(db_name)


if __name__ == '__main__':
    db_con = DBConnector("twitch_speech")

    video_id = '760718196'
    print(f"\nStatus {video_id}:")
    print(db_con.get_status(video_id))

    print(f"\nParts {video_id}")
    parts = db_con.get_parts(video_id)
    for part in parts:
        print(part)

    #res = db_con.get_parts_table()
    #for el in res:
    #    print(el)

    res = db_con.find_text(video_id, "ребята")