def setup(): print "Running Setup" # check for required packages, install if needed packages = ["flask", "sqlite3", "plotly", "numpy", "pandas"] for p in packages: import_or_install(p) # check if database exists, create new if needed if not os.path.isfile('./db/database.db'): setup_db() print "Setup done"
def main(): """ Used to clean feeds """ dbhelper = collect_feeds.DBHelper() old_df = dbhelper.get_old_df() # Keep the data old_df = old_df.drop_duplicates() delete_db.delete_feed_stories(dbhelper.get_connection()) dbhelper.open_connection() setup_db.setup_db(dbhelper.get_connection()) dbhelper.open_connection() dbhelper.update_sql(old_df.to_dict('list')) dbhelper.close_connection()
def start_workflow(shared_state, start_date, review_number=0): db_connection = setup_db().connect() logger = Logger(db_connection) shared_state.job_id = None shared_state.completed = False max_downloads = environ.get('MAX_DOWNLOADS') if max_downloads is not None: max_downloads = int(max_downloads) max_upload_workers = int(environ.get('MAX_UPLOADERS', 20)) try: workflow = Workflow( db_connection, logger, start_date, max_downloads, max_upload_workers, environ.get('ALLOW_REPEAT', 'FALSE') == 'TRUE' ) workflow.start(shared_state) except Exception: logger.exception() if shared_state.job_id is not None: job_serializer = Serializer(db_connection, job) job_serializer.put(shared_state.job_id, { 'status': JobStatus.FAILED, })
def db2data(db_file: str, data_dir: str, db_provider: str = 'sqlite', ) -> None: """ Writes a db to Kaldi-style file directory :param db_file: Full path to db_file :param data_dir: Full path to Kaldi-style directory :param db_provider: db type. :return: None """ db = setup_db() db.bind(provider=db_provider, filename=db_file, create_db=False) db.generate_mapping(create_tables=False) if os.path.exists(data_dir): print(f"{data_dir} already exists. Remove or rename it before proceed.") sys.exit(1) os.mkdir(data_dir) with db_session: _write_from_table(db=db, data_dir=data_dir, table='Recording') _write_from_table(db=db, data_dir=data_dir, table='Speaker') _write_from_table(db=db, data_dir=data_dir, table='Utterance') remove_empty(data_dir)
def ensure_database_setup(ob_ctx, defaults): db_path = ob_ctx.db_path default_db_path = os.path.join(defaults["db_dir"], defaults["db_file"]) default_dev_db_path = os.path.join(defaults["db_dir"], defaults["dev_db_file"]) if ob_ctx.dev_mode and db_path == default_db_path: # override default db_path to developer database path. db_path = default_dev_db_path # make sure the folder exists wherever it is db_dirname = os.path.dirname(db_path) if not os.path.exists(db_dirname): os.makedirs(db_dirname, 0755) if not os.path.exists(db_path): # setup the database if file not there. print "[openbazaar] bootstrapping database ", os.path.basename(db_path) setup_db.setup_db(db_path, ob_ctx.disable_sqlite_crypt) print "[openbazaar] database setup completed\n"
def ensure_database_setup(ob_ctx, defaults): db_path = ob_ctx.db_path default_db_path = os.path.join(defaults['db_dir'], defaults['db_file']) default_dev_db_path = os.path.join(defaults['db_dir'], defaults['dev_db_file']) if ob_ctx.dev_mode and db_path == default_db_path: # override default db_path to developer database path. db_path = default_dev_db_path # make sure the folder exists wherever it is db_dirname = os.path.dirname(db_path) if not os.path.exists(db_dirname): os.makedirs(db_dirname, 0755) if not os.path.exists(db_path): # setup the database if file not there. print "[openbazaar] bootstrapping database ", os.path.basename(db_path) setup_db.setup_db(db_path, ob_ctx.disable_sqlite_crypt) print "[openbazaar] database setup completed\n"
def main(): db_connection = setup_db().connect() logger = Logger(db_connection) logger.info('Initializing Downloader') try: run_downloader(db_connection, logger) except Exception: logger.exception() logger.info('Finishing Downloader')
def main(data_dir: str, db_file: str = None, db_provider: str = 'sqlite', corpus: str = None) -> None: db_file = db_file if db_file is not None else os.path.basename(data_dir) # Set up database if os.path.exists(db_file): print(f"{db_file} already exist. Remove or rename it before proceed.") sys.exit(1) db = setup_db() db.bind(provider=db_provider, filename=db_file, create_db=True) db.generate_mapping(create_tables=True) assert all(os.path.exists(os.path.join(data_dir, f)) for f in required_files), \ f"Required these file to exist in {data_dir}:\n\t{required_files}" corpus = corpus if corpus else os.path.basename(data_dir) dummy_segment = _check_segment(data_dir) # Build from required files with db_session: recordings = _build_recordings(wav_file=os.path.join( data_dir, 'wav.scp'), corpus=corpus, db=db) sentences = _build_sentences(text=os.path.join(data_dir, 'text'), db=db) speakers = _build_speakers(utt2spk=os.path.join(data_dir, 'utt2spk'), db=db) _build_utterances(recordings=recordings, speakers=speakers, sentences=sentences, segments=os.path.join(data_dir, 'segments'), db=db) # Build from optional files for file, t in optional_files_map.items(): entity, field = t file = os.path.join(data_dir, file) if os.path.exists(file): _update_db_from_file(db=db, file=file, entity=entity, field=field) commit() if dummy_segment: os.remove(os.path.join(data_dir, 'segments'))
def main(): start = datetime.utcnow() print("\n\n\n============ Out-of-Field ETL ============") setup_data() engine = setup_db() if (engine): oof_maker = DataMaker(engine, OOFFile.data.value) oof_maker.make_tables() oof_maker.make_migrations() else: pretty_print("ERROR: ENGINE NOT SET") print("\n\n=====================================\n") print(f"Program Completed in {datetime.utcnow() - start}\n\n\n")
def upload_worker(queue, job_id, worker_id): # Uploader to the S3 bucket. db_connection = setup_db().connect() logger = Logger(db_connection, job_id) bucket_name = environ.get('UPLOAD_BUCKET') try: logger.info(f'Creating S3 uploader #{worker_id}') uploader = S3Uploader(bucket=bucket_name) granule_serializer = Serializer(db_connection, granule) while True: message = queue.get() if message == 'DONE': # Put in back for other workers. queue.put('DONE') break product_id, filename = message try: checksum = hashlib.md5(open(filename, 'rb').read()).hexdigest().upper() # Download status = SUCCESS granule_serializer.put( product_id, { 'download_status': DownloadStatus.SUCCESS, 'downloaded_at': datetime.now(), 'validated': False, 'checksum': checksum, 's3_location': f'{bucket_name}/filename' }) logger.info(f'Uploading {product_id} at #{worker_id}', f'Filename: {filename}') uploader.upload_file(filename) remove(filename) logger.info(f'Uploaded {product_id} at #{worker_id}', f'Filename: {filename}') except Exception: logger.exception() except Exception: logger.exception()
def main(): start = datetime.utcnow() print("\n\n\n============ BETTER CRDC ============") reset_containers() start_postgres_container() db_config = json.load(open('./crdc_api/db_config.json')) setup_data() engine = setup_db(db_config) pretty_print("CREATE LEA ARTIFACTS") lea_files = { 'layout_file': CRDCFile.LeaLayout.value, 'data_file': CRDCFile.LeaData.value, } lea_config = json.load(open('./crdc_api/lea_config.json')) lea_maker = DataMaker(engine, lea_files, lea_config, db_config) lea_maker.make_tables_and_files() lea_maker.make_views() lea_maker.make_migrations() pretty_print("CREATE SCHOOL ARTIFACTS") school_files = { 'layout_file': CRDCFile.SchoolLayout.value, 'data_file': CRDCFile.SchoolData.value, } school_config = json.load(open('./crdc_api/school_config.json')) school_maker = DataMaker(engine, school_files, school_config, db_config) school_maker.make_tables_and_files() school_maker.make_views() school_maker.make_migrations() start_graphql_engine_container() print("\n\n=====================================\n") print(f"Program Completed in {datetime.utcnow() - start}\n\n\n")
# -*- coding: utf-8 -*- """ This file is covered by the LICENSING file in the root of this project. """ from hackathon.hmongo import drop_db from setup_db import setup_db drop_db() setup_db()
# # ----------------------------------------------------------------------------------- # Copyright (c) Microsoft Open Technologies (Shanghai) Co. Ltd. All rights reserved. # # The MIT License (MIT) # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # ----------------------------------------------------------------------------------- from hackathon.database import Base, engine from setup_db import setup_db Base.metadata.drop_all(bind=engine) setup_db()
def main(): answers = prompt(QUESTIONS) input_path = answers['input_path'] filenames = os.listdir(input_path) filenames = list(filter(lambda f: f.endswith('.csv'), filenames)) files = map(lambda f: input_path + os.sep + f, filenames) # clean files = map( lambda f: clean_csv.clean_from_preset(f, None, functions_data.choose_preset(f)), files) #print(list(files)) # filter private/company # todo: ask how many lines the user wants to classify # todo: save and load user classifications answer = prompt([{ 'type': 'confirm', 'message': 'Do you want to filter records by private/company?', 'name': 'classify', 'default': True, }]) if answer['classify']: files = map(lambda f: classify_account(f), files) files = list(files) #print(files[0][['text', 'account']]) #print(files[0][:365]['account'].value_counts()) # setup db # ask for database filename answer = prompt([{ 'type': 'input', 'name': 'db_file', 'message': 'Filename to write database to:', 'default': 'db.db', }]) db_file = answer['db_file'] account_names = map(lambda f: f.replace('.csv', ''), filenames) account_names = list(account_names) setup_db.setup_db(db_file, account_names) # csv_to_db i = 0 for f in files: csv_to_db.import_records(f, db_file, account_names[i]) i = i + 1 # query primary account names conn = functions_db.create_connection(db_file) with conn: primary_accounts = functions_db.get_primary_accounts(conn) conn.close() if len(primary_accounts) > 0: # query secondary account names conn = functions_db.create_connection(db_file) with conn: secondary_accounts = functions_db.get_secondary_accounts(conn) conn.close() # match_records_db for a in secondary_accounts: account_name = a[0] print('Matching records from account {} with main account'.format( account_name)) match_records_db.match_records(db_file, account_name, False) else: print('No primary account defined, not matching records.')
from auth_keys import auth_keys from flask import Flask, request, jsonify from setup_db import setup_db app = Flask(__name__) conn, cursor = setup_db() @app.route('/set_status/', methods=['POST']) def status(): response = {'message': ''} new_status = int(request.args.get("new_status", None)) id = request.args.get('id', None) auth_key = request.args.get('auth_key', None) if id is None: response['message'] += "[ERROR] No id was passed." response["code"] = False return jsonify(response) if new_status is None: response['message'] += "[ERROR] No new status was passed." response["code"] = False return jsonify(response) if (auth_key == auth_keys.get(id, None)): response['message'] += "[AUTH] auth valid." try: cursor.execute("UPDATE light_meta SET status = %s WHERE id = %s", (new_status, id)) conn.commit()