Exemple #1
0
def setup():
    print "Running Setup"
    # check for required packages, install if needed
    packages = ["flask", "sqlite3", "plotly", "numpy", "pandas"]
    for p in packages:
        import_or_install(p)

    # check if database exists, create new if needed
    if not os.path.isfile('./db/database.db'):
        setup_db()

    print "Setup done"
def main():
    """
    Used to clean feeds
    """
    dbhelper = collect_feeds.DBHelper()
    old_df = dbhelper.get_old_df() # Keep the data
    old_df = old_df.drop_duplicates()
    delete_db.delete_feed_stories(dbhelper.get_connection())
    dbhelper.open_connection()
    setup_db.setup_db(dbhelper.get_connection())
    dbhelper.open_connection()
    dbhelper.update_sql(old_df.to_dict('list'))
    dbhelper.close_connection()
Exemple #3
0
def start_workflow(shared_state, start_date, review_number=0):
    db_connection = setup_db().connect()
    logger = Logger(db_connection)

    shared_state.job_id = None
    shared_state.completed = False

    max_downloads = environ.get('MAX_DOWNLOADS')
    if max_downloads is not None:
        max_downloads = int(max_downloads)
    max_upload_workers = int(environ.get('MAX_UPLOADERS', 20))

    try:
        workflow = Workflow(
            db_connection, logger,
            start_date,
            max_downloads, max_upload_workers,
            environ.get('ALLOW_REPEAT', 'FALSE') == 'TRUE'
        )
        workflow.start(shared_state)
    except Exception:
        logger.exception()
        if shared_state.job_id is not None:
            job_serializer = Serializer(db_connection, job)
            job_serializer.put(shared_state.job_id, {
                'status': JobStatus.FAILED,
            })
Exemple #4
0
def db2data(db_file: str,
            data_dir: str,
            db_provider: str = 'sqlite', ) -> None:
    """
    Writes a db to Kaldi-style file directory
    :param db_file: Full path to db_file
    :param data_dir: Full path to Kaldi-style directory
    :param db_provider: db type.
    :return: None
    """

    db = setup_db()
    db.bind(provider=db_provider,
            filename=db_file,
            create_db=False)
    db.generate_mapping(create_tables=False)
    if os.path.exists(data_dir):
        print(f"{data_dir} already exists. Remove or rename it before proceed.")
        sys.exit(1)
    os.mkdir(data_dir)

    with db_session:
        _write_from_table(db=db,
                          data_dir=data_dir,
                          table='Recording')
        _write_from_table(db=db,
                          data_dir=data_dir,
                          table='Speaker')
        _write_from_table(db=db,
                          data_dir=data_dir,
                          table='Utterance')

    remove_empty(data_dir)
Exemple #5
0
def ensure_database_setup(ob_ctx, defaults):
    db_path = ob_ctx.db_path
    default_db_path = os.path.join(defaults["db_dir"], defaults["db_file"])
    default_dev_db_path = os.path.join(defaults["db_dir"], defaults["dev_db_file"])

    if ob_ctx.dev_mode and db_path == default_db_path:
        # override default db_path to developer database path.
        db_path = default_dev_db_path

    # make sure the folder exists wherever it is
    db_dirname = os.path.dirname(db_path)
    if not os.path.exists(db_dirname):
        os.makedirs(db_dirname, 0755)

    if not os.path.exists(db_path):
        # setup the database if file not there.
        print "[openbazaar] bootstrapping database ", os.path.basename(db_path)
        setup_db.setup_db(db_path, ob_ctx.disable_sqlite_crypt)
        print "[openbazaar] database setup completed\n"
Exemple #6
0
def ensure_database_setup(ob_ctx, defaults):
    db_path = ob_ctx.db_path
    default_db_path = os.path.join(defaults['db_dir'], defaults['db_file'])
    default_dev_db_path = os.path.join(defaults['db_dir'],
                                       defaults['dev_db_file'])

    if ob_ctx.dev_mode and db_path == default_db_path:
        # override default db_path to developer database path.
        db_path = default_dev_db_path

    # make sure the folder exists wherever it is
    db_dirname = os.path.dirname(db_path)
    if not os.path.exists(db_dirname):
        os.makedirs(db_dirname, 0755)

    if not os.path.exists(db_path):
        # setup the database if file not there.
        print "[openbazaar] bootstrapping database ", os.path.basename(db_path)
        setup_db.setup_db(db_path, ob_ctx.disable_sqlite_crypt)
        print "[openbazaar] database setup completed\n"
Exemple #7
0
def main():
    db_connection = setup_db().connect()
    logger = Logger(db_connection)

    logger.info('Initializing Downloader')

    try:
        run_downloader(db_connection, logger)
    except Exception:
        logger.exception()

    logger.info('Finishing Downloader')
def main(data_dir: str,
         db_file: str = None,
         db_provider: str = 'sqlite',
         corpus: str = None) -> None:
    db_file = db_file if db_file is not None else os.path.basename(data_dir)
    # Set up database
    if os.path.exists(db_file):
        print(f"{db_file} already exist. Remove or rename it before proceed.")
        sys.exit(1)

    db = setup_db()
    db.bind(provider=db_provider, filename=db_file, create_db=True)
    db.generate_mapping(create_tables=True)

    assert all(os.path.exists(os.path.join(data_dir, f)) for f in required_files), \
        f"Required these file to exist in {data_dir}:\n\t{required_files}"

    corpus = corpus if corpus else os.path.basename(data_dir)
    dummy_segment = _check_segment(data_dir)

    # Build from required files
    with db_session:
        recordings = _build_recordings(wav_file=os.path.join(
            data_dir, 'wav.scp'),
                                       corpus=corpus,
                                       db=db)
        sentences = _build_sentences(text=os.path.join(data_dir, 'text'),
                                     db=db)
        speakers = _build_speakers(utt2spk=os.path.join(data_dir, 'utt2spk'),
                                   db=db)
        _build_utterances(recordings=recordings,
                          speakers=speakers,
                          sentences=sentences,
                          segments=os.path.join(data_dir, 'segments'),
                          db=db)

        # Build from optional files
        for file, t in optional_files_map.items():
            entity, field = t
            file = os.path.join(data_dir, file)
            if os.path.exists(file):
                _update_db_from_file(db=db,
                                     file=file,
                                     entity=entity,
                                     field=field)

        commit()
        if dummy_segment:
            os.remove(os.path.join(data_dir, 'segments'))
Exemple #9
0
def main():
    start = datetime.utcnow()
    print("\n\n\n============ Out-of-Field ETL ============")
    setup_data()
    engine = setup_db()

    if (engine):
        oof_maker = DataMaker(engine, OOFFile.data.value)
        oof_maker.make_tables()
        oof_maker.make_migrations()
    else:
        pretty_print("ERROR: ENGINE NOT SET")

    print("\n\n=====================================\n")
    print(f"Program Completed in {datetime.utcnow() - start}\n\n\n")
Exemple #10
0
def upload_worker(queue, job_id, worker_id):
    # Uploader to the S3 bucket.
    db_connection = setup_db().connect()
    logger = Logger(db_connection, job_id)
    bucket_name = environ.get('UPLOAD_BUCKET')

    try:
        logger.info(f'Creating S3 uploader #{worker_id}')
        uploader = S3Uploader(bucket=bucket_name)
        granule_serializer = Serializer(db_connection, granule)

        while True:
            message = queue.get()
            if message == 'DONE':
                # Put in back for other workers.
                queue.put('DONE')
                break

            product_id, filename = message

            try:
                checksum = hashlib.md5(open(filename,
                                            'rb').read()).hexdigest().upper()

                # Download status = SUCCESS
                granule_serializer.put(
                    product_id, {
                        'download_status': DownloadStatus.SUCCESS,
                        'downloaded_at': datetime.now(),
                        'validated': False,
                        'checksum': checksum,
                        's3_location': f'{bucket_name}/filename'
                    })

                logger.info(f'Uploading {product_id} at #{worker_id}',
                            f'Filename: {filename}')
                uploader.upload_file(filename)
                remove(filename)

                logger.info(f'Uploaded {product_id} at #{worker_id}',
                            f'Filename: {filename}')
            except Exception:
                logger.exception()
    except Exception:
        logger.exception()
Exemple #11
0
def main():
    start = datetime.utcnow()

    print("\n\n\n============ BETTER CRDC ============")
    reset_containers()
    start_postgres_container()

    db_config = json.load(open('./crdc_api/db_config.json'))
    setup_data()
    engine = setup_db(db_config)

    pretty_print("CREATE LEA ARTIFACTS")
    lea_files = {
        'layout_file': CRDCFile.LeaLayout.value,
        'data_file': CRDCFile.LeaData.value,
    }
    lea_config = json.load(open('./crdc_api/lea_config.json'))
    lea_maker = DataMaker(engine, lea_files, lea_config, db_config)
    lea_maker.make_tables_and_files()
    lea_maker.make_views()
    lea_maker.make_migrations()

    pretty_print("CREATE SCHOOL ARTIFACTS")
    school_files = {
        'layout_file': CRDCFile.SchoolLayout.value,
        'data_file': CRDCFile.SchoolData.value,
    }
    school_config = json.load(open('./crdc_api/school_config.json'))
    school_maker = DataMaker(engine, school_files, school_config, db_config)
    school_maker.make_tables_and_files()
    school_maker.make_views()
    school_maker.make_migrations()

    start_graphql_engine_container()

    print("\n\n=====================================\n")
    print(f"Program Completed in {datetime.utcnow() - start}\n\n\n")
Exemple #12
0
# -*- coding: utf-8 -*-
"""
This file is covered by the LICENSING file in the root of this project.
"""

from hackathon.hmongo import drop_db
from setup_db import setup_db

drop_db()
setup_db()
Exemple #13
0
#
# -----------------------------------------------------------------------------------
# Copyright (c) Microsoft Open Technologies (Shanghai) Co. Ltd.  All rights reserved.
#  
# The MIT License (MIT)
#  
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#  
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#  
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
# -----------------------------------------------------------------------------------

from hackathon.database import Base, engine
from setup_db import setup_db

Base.metadata.drop_all(bind=engine)
setup_db()
def main():
    answers = prompt(QUESTIONS)
    input_path = answers['input_path']
    filenames = os.listdir(input_path)
    filenames = list(filter(lambda f: f.endswith('.csv'), filenames))
    files = map(lambda f: input_path + os.sep + f, filenames)

    # clean
    files = map(
        lambda f: clean_csv.clean_from_preset(f, None,
                                              functions_data.choose_preset(f)),
        files)
    #print(list(files))

    # filter private/company
    # todo: ask how many lines the user wants to classify
    # todo: save and load user classifications
    answer = prompt([{
        'type': 'confirm',
        'message': 'Do you want to filter records by private/company?',
        'name': 'classify',
        'default': True,
    }])

    if answer['classify']:
        files = map(lambda f: classify_account(f), files)

    files = list(files)
    #print(files[0][['text', 'account']])
    #print(files[0][:365]['account'].value_counts())

    # setup db
    # ask for database filename
    answer = prompt([{
        'type': 'input',
        'name': 'db_file',
        'message': 'Filename to write database to:',
        'default': 'db.db',
    }])
    db_file = answer['db_file']
    account_names = map(lambda f: f.replace('.csv', ''), filenames)
    account_names = list(account_names)
    setup_db.setup_db(db_file, account_names)

    # csv_to_db
    i = 0
    for f in files:
        csv_to_db.import_records(f, db_file, account_names[i])
        i = i + 1

    # query primary account names
    conn = functions_db.create_connection(db_file)
    with conn:
        primary_accounts = functions_db.get_primary_accounts(conn)
    conn.close()

    if len(primary_accounts) > 0:
        # query secondary account names
        conn = functions_db.create_connection(db_file)
        with conn:
            secondary_accounts = functions_db.get_secondary_accounts(conn)
        conn.close()

        # match_records_db
        for a in secondary_accounts:
            account_name = a[0]
            print('Matching records from account {} with main account'.format(
                account_name))
            match_records_db.match_records(db_file, account_name, False)
    else:
        print('No primary account defined, not matching records.')
Exemple #15
0
from auth_keys import auth_keys
from flask import Flask, request, jsonify
from setup_db import setup_db

app = Flask(__name__)

conn, cursor = setup_db()


@app.route('/set_status/', methods=['POST'])
def status():
    response = {'message': ''}
    new_status = int(request.args.get("new_status", None))
    id = request.args.get('id', None)
    auth_key = request.args.get('auth_key', None)

    if id is None:
        response['message'] += "[ERROR] No id was passed."
        response["code"] = False
        return jsonify(response)
    if new_status is None:
        response['message'] += "[ERROR] No new status was passed."
        response["code"] = False
        return jsonify(response)

    if (auth_key == auth_keys.get(id, None)):
        response['message'] += "[AUTH] auth valid."
        try:
            cursor.execute("UPDATE light_meta SET status = %s WHERE id = %s",
                           (new_status, id))
            conn.commit()