def __init__(self, company_id): self.company_id = str(company_id) db_connector = DBConnector() sli_rev_db = db_connector.get_mongo_client(db='mongo_dest')[Database] self.collection = sli_rev_db[self.company_id] self.collection.create_index([("revisiondpid", pymongo.ASCENDING)])
def __init__(self, commission, algo_id): super().__init__() self.dbc = DBConnector() self.current_date = datetime.now().date() self.commission = commission self.algo_id = algo_id self.portfolio = self.dbc.fetch_portfolio(algo_id) self.orders = dict()
def __init__(self, company_id): self.company_id = str(company_id) collection_name = "va_{company_id}".format(company_id=self.company_id) db_connector = DBConnector() self.conn = db_connector.get_remote_mongo_client( section_name='remote_mongo_config') database = self.conn[DATABASE] self.collection = database[collection_name] self.collection.find()
class VirtualBroker(Broker): def __init__(self, commission, algo_id): super().__init__() self.dbc = DBConnector() self.current_date = datetime.now().date() self.commission = commission self.algo_id = algo_id self.portfolio = self.dbc.fetch_portfolio(algo_id) self.orders = dict() def get_portfolio(self, context): self.portfolio = self.dbc.update_portfolio(self.algo_id)
class PostgreSQL: def __init__(self, company_id): self.company_id = str(company_id) self.db_connector = DBConnector() self.conn = self.db_connector.get_postgre_conn("postgres_sql") def get_mysql_data(self): return SQlAlchemyOperations.get_computeinfojson( Database, self.company_id) def insert(self, rows, db): try: print "Insertion started" start = time.time() self.conn.autocommit = False cursor = self.conn.cursor() query = 'INSERT INTO "{table}" (revisiondpid, expression, computeinfojson) VALUES %s' query = query.format(db=db, table=self.company_id) while True: data_chunk = rows.fetchmany(1000) if not data_chunk: break execute_values(cursor, query, tuple(data_chunk)) self.conn.commit() print "All rows inserted in {time}".format(time=time.time() - start) except Exception as ex: print "An error occurred" self.conn.rollback() finally: self.conn.autocommit = True cursor.close() def get_postgre_data(self, db): print "Started fetching data from MySQL company id: {company_id}".format( company_id=self.company_id) start = time.time() cursor = self.conn.cursor() query = 'SELECT revisiondpid, expression, computeinfojson FROM "{company_id}" ' \ 'WHERE revisiondpid IN (9529232226, 8117414530, 8117415374)'.format(company_id=self.company_id) cursor.execute(query) data = cursor.fetchall() print "Completed fetching data for {db} in {time}".format( db=db, time=time.time() - start) return list(data)
def __init__(self, company_id): self.company_id = str(company_id) db_connector = DBConnector() self.session = db_connector.get_mysql_ds_session(db='mysql_ds') self.db = self.session.get_schema('mysql_ds') self.collection = self.db.get_collection(self.company_id) if not self.collection.exists_in_database(): self.collection = self.db.create_collection(self.company_id) self.collection.create_index('revisiondpid', { 'fields': [{ 'field': '$.revisiondpid', "type": "BIGINT" }] }).execute()
class SQlAlchemyOperations: db_connector_obj = DBConnector() def __init__(self): pass @classmethod def get_data_from_raw_query(cls, db, company_id): print "Started fetching data from MySQL company id: {company_id}".format( company_id=company_id) start = time.time() query = "SELECT revisiondpid, expression FROM {db}.`{company_id}`".format( db=db, company_id=company_id) conn = cls.db_connector_obj.get_mysql_engine(db=db) data = conn.execute(text(query)) print "Completed fetching data for {db} in {time}".format( db=db, time=time.time() - start) return data @classmethod def get_computeinfojson(cls, db, company_id): print "Started fetching data from MySQL company id: {company_id}".format( company_id=company_id) start = time.time() query = "SELECT revisiondpid, expression, computeinfojson FROM {db}.{company_id}".format( db=db, company_id=company_id) conn = cls.db_connector_obj.get_mysql_engine(db=db) data = conn.execute(text(query)) print "Completed fetching data for {db} in {time}".format( db=db, time=time.time() - start) return data @classmethod def get_model(cls, company_id, db): conn = cls.db_connector_obj.get_mysql_engine(db=db) session = cls.db_connector_obj.get_sql_alchemy_session(conn) return get_sli_revision_model(company_id, session) @classmethod def get_local_mysql_data(cls, db, company_id): print "Started fetching data from MySQL company id: {company_id}".format( company_id=company_id) start = time.time() query = "SELECT revisiondpid, expression, computeinfojson FROM {db}.`{company_id}` " \ "WHERE revisiondpid IN (9529232226, 8117414530, 8117415374)".format(db=db, company_id=company_id) conn = cls.db_connector_obj.get_mysql_engine(db=db) data = list(conn.execute(text(query))) print "Completed fetching data for {db} in {time}".format( db=db, time=time.time() - start) return list(data) @classmethod def insert_bulk_data_result(cls, db, model, rows, chunk_size=1000): """ Insert one table output into another table :param db: Db name of the config file :param model: Model of the table in which data will be inserted :param rows: Data which need to be inserted :param chunk_size: Chunk size by which data will be inserted """ start = time.time() engine = cls.db_connector_obj.get_mysql_engine(db=db) with engine.connect() as conn: with conn.begin() as trans: try: while True: data_chunk = rows.fetchmany(chunk_size) if not data_chunk: break conn.execute(model.__table__.insert(), *rows) trans.commit() except Exception as ex: trans.rollback() print "An error occurred" raise print "Completed data insertion in {time}".format(time=time.time() - start)
from flask import Flask, request, jsonify from flask_cors import CORS import sys sys.path.append("/home/sergey/Documents/homeworks/twitch_speech/") from utils.main_server import TwitchSpeechServer from utils.db_connector import DBConnector import config as cf pipeline_server = TwitchSpeechServer() db_connector = DBConnector(cf.DATABASE_NAME) app = Flask(__name__) CORS(app) # { # 'status': 'Ищем видяшку', # 'progress': int, # 'download_speed': str, # } def check_id(video_id): status_info = db_connector.get_status(video_id) if not status_info: pipeline_server.process_videos(ids=[video_id]) return { 'status': 'Процесс запущен', 'progress': 0, 'download_speed': "0MB",
def __init__(self): self.db_connector = DBConnector(cf.DATABASE_NAME)
class TwitchSpeechServer: def __init__(self): self.db_connector = DBConnector(cf.DATABASE_NAME) def process_videos(self, ids, workers=20): for video_id in ids: self.db_connector.delete_video(video_id) self.db_connector.update_status(video_id, "preparing_for_download", info="-") partials = (partial(self._process_video, i, workers=workers) for i in ids) threads = [Thread(target=fn) for fn in partials] for th in threads: th.start() def _process_video(self, video_id, workers): # download try: self._download_video(video_id, workers=workers, filename=video_id) except: self.db_connector.update_status(video_id, 'fail_on_downloading') return # mkv2wav self.db_connector.update_status(video_id, 'converting_to_wav') try: video2wav(f"{cf.VODS_DIR_PATH}{video_id}.mkv", f"{cf.SOUNDS_DIR_PATH}{video_id}.wav") # delete video except: # delete wav if exist self.db_connector.update_status(video_id, 'fail_on_converting_to_wav') return # recognition self.db_connector.update_status(video_id, 'recognition') try: recognize(video_id, cf.START_RECOGNITION_PATH, cf.SOUNDS_DIR_PATH, cf.RECOGNITION_RESULTS_DIR_PATH) except: self.db_connector.update_status(video_id, 'fail_on_recognition') return self.db_connector.update_status(video_id, 'finished') self.db_connector.insert_parts(parse_ass(video_id, cf.SUBS_DIR_PATH)) def _download_video(self, video_id, quality="worst", workers=20, video_format="mkv", path=None, filename=None): # Matching video_id match = None for pattern in VIDEO_PATTERNS: match = re.match(pattern, video_id) if match: break if not match: raise ValueError(f"Invalid video: {video_id}") video_id = match.group('id') # Looking up video video = twitch.get_video(video_id) # Fetching access token access_token = twitch.get_access_token(video_id) # Fetching playlists playlists_m3u8 = twitch.get_playlists(video_id, access_token) playlists = list(_parse_playlists(playlists_m3u8)) playlist_uri = (_get_playlist_by_name(playlists, quality)) # Fetching playlist response = requests.get(playlist_uri) response.raise_for_status() playlist = m3u8.loads(response.text) base_uri = re.sub("/[^/]+$", "/", playlist_uri) target_dir = _crete_temp_dir(base_uri) vod_paths = _get_vod_paths(playlist) # Save playlists for debugging purposes with open(os.path.join(target_dir, "playlists.m3u8"), "w") as f: f.write(playlists_m3u8) with open(os.path.join(target_dir, "playlist.m3u8"), "w") as f: f.write(response.text) # Downloading VODs to target_dir path_map = self._download_files(video_id, base_uri, target_dir, vod_paths, workers) self.db_connector.update_status(video_id, 'joining_segments') # Make a modified playlist which references downloaded VODs # Keep only the downloaded segments and skip the rest org_segments = playlist.segments.copy() playlist.segments.clear() for segment in org_segments: if segment.uri in path_map: segment.uri = path_map[segment.uri] playlist.segments.append(segment) playlist_path = os.path.join(target_dir, "playlist_downloaded.m3u8") playlist.dump(playlist_path) # Joining files target = _video_target_filename(video, video_format, path=path, filename=filename) _join_vods(playlist_path, target) # Deleting temporary files shutil.rmtree(target_dir) def _download_files(self, video_id, base_url, target_dir, vod_paths, max_workers): """ Downloads a list of VODs defined by a common `base_url` and a list of `vod_paths`, returning a dict which maps the paths to the downloaded files. """ self.db_connector.update_status(video_id, 'downloading') urls = [base_url + path for path in vod_paths] targets = [ os.path.join(target_dir, "{:05d}.ts".format(k)) for k, _ in enumerate(vod_paths) ] partials = (partial(download_file, url, path) for url, path in zip(urls, targets)) with ThreadPoolExecutor(max_workers=max_workers) as executor: start_time = datetime.now() # run download futures = [executor.submit(fn) for fn in partials] downloaded_count = 0 downloaded_size = 0 total_count = len(futures) for future in as_completed(futures): size = future.result() downloaded_count += 1 downloaded_size += size est_total_size = int(total_count * downloaded_size / downloaded_count) duration = (datetime.now() - start_time).seconds speed = downloaded_size // duration if duration else 0 remaining = (total_count - downloaded_count) * duration / downloaded_count info = { 'total_count': total_count, 'downloaded_count': downloaded_count, 'downloaded_size': format_size(downloaded_size), 'est_total_size': format_size(est_total_size), 'speed': format_size(speed) if speed > 0 else "-", 'remaining': format_duration(remaining) if speed > 0 else "-", } self.db_connector.update_status( video_id, self.db_connector.get_status(video_id)['status'], info) return OrderedDict(zip(vod_paths, targets))
'downloaded_count': downloaded_count, 'downloaded_size': format_size(downloaded_size), 'est_total_size': format_size(est_total_size), 'speed': format_size(speed) if speed > 0 else "-", 'remaining': format_duration(remaining) if speed > 0 else "-", } self.db_connector.update_status( video_id, self.db_connector.get_status(video_id)['status'], info) return OrderedDict(zip(vod_paths, targets)) if __name__ == '__main__': # Example server = TwitchSpeechServer() db_con = DBConnector(cf.DATABASE_NAME) videos = ['760718196', '574423677', '658442340'] server.process_videos(videos) for _ in range(30): for video_id in videos: tmp = db_con.get_status(video_id) status = tmp['status'] info = tmp['info'] print(f'Video {video_id}:\nStatus: {status}\nInfo: {info}\n\n') time.sleep(5)
def __init__(self, company_id): self.company_id = str(company_id) self.db_connector = DBConnector() self.conn = self.db_connector.get_postgre_conn("postgres_sql")
def init_test(db_name='test'): init_empty_mongo_db(db_name) conn = DBConnector(db_name=db_name) ids = [1, 13, 24] status_types = [ "finished", "preparing_for_download", "fail_on_downloading", "converting_to_wav", "fail_on_converting_to_wav", "recognition", "fail_on_recognition", "joining_segments", "downloading", ] seed = 42 random.seed(42) gen_parts = [] for video_id in ids: for i in range(10): gen_parts.append({ 'video_id': video_id, "start": 2 * i, "end": 2 * i + 1, "text": f"{13 * i + 17} русский {video_id}" }) gen_status = [] for video_id in ids: gen_status.append({ "video_id": video_id, "status": random.choice(status_types), "info": { "1": 1, "2": 2 } }) for status in gen_status: conn.update_status(status["video_id"], status["status"], status["info"]) res = conn.get_status_table() print("\nStatus table:") for el in res: print(el) conn.insert_parts(gen_parts) res = conn.get_parts_table() print("\nParts table:") for el in res: print(el) conn.update_status(ids[0], "NEW SUPER STATUS", info={"speed": 424242}) print(f"\nStatus {ids[0]} video:") res = conn.get_status(ids[0]) print(res) print(f"\nParts {ids[0]} video:") res = conn.get_parts(ids[0]) for el in res: print(el) request_text = "русский" print(f"\nFind {request_text} in {ids[0]} video:") res = conn.find_text(ids[0], request_text, limit=5) for el in res: print(el) print(f"\nStatus not exists {145421232} video:") res = conn.get_status(145421232) print(res) client = MongoClient() client.drop_database(db_name)
request_text = "русский" print(f"\nFind {request_text} in {ids[0]} video:") res = conn.find_text(ids[0], request_text, limit=5) for el in res: print(el) print(f"\nStatus not exists {145421232} video:") res = conn.get_status(145421232) print(res) client = MongoClient() client.drop_database(db_name) if __name__ == '__main__': db_con = DBConnector("twitch_speech") video_id = '760718196' print(f"\nStatus {video_id}:") print(db_con.get_status(video_id)) print(f"\nParts {video_id}") parts = db_con.get_parts(video_id) for part in parts: print(part) #res = db_con.get_parts_table() #for el in res: # print(el) res = db_con.find_text(video_id, "ребята")