コード例 #1
0
ファイル: main.py プロジェクト: vgkienzler/sparkify-redshift
def main():
    """Launches all the steps required to create the data tables in redshift."""

    # Launches the creation of an IAM role and redshift clusters if they don't already exists:
    client, cluster_name = create_rc.main()

    # Check redshift cluster's availability. If available, create the tables:
    cluster_status = check_rc.check_cluster_status(client, cluster_name)

    if cluster_status == 'creating':
        print(
            f"Cluster '{cluster_name}' is being created.\n" +
            "This can take several minutes. Do you want to Wait (W) or Exit (Q)?\n"
            +
            "Exiting won't interrupt cluster creation. You can resume later by re-launching main.py"
        )
        valid_choices = ['q', 'Q', 'w', 'W']
        waiting = advanced_input(valid_choices)

        if waiting.lower() == 'q':
            sys.exit(0)
        elif waiting.lower() == 'w':
            print("Waiting...")
            while cluster_status == 'creating':
                time.sleep(20)
                cluster_status = check_rc.check_cluster_status(
                    client, cluster_name)
                print(f"Waiting... cluster status: {cluster_status}.")

    if cluster_status == 'available':
        cluster = client.describe_clusters(
            ClusterIdentifier=cluster_name)['Clusters'][0]
        # TODO: remove local reference to 'dwh.cfg'.
        create_rc.update_section_key('dwh.cfg', 'CLUSTER', 'cl_endpoint',
                                     cluster['Endpoint']['Address'])

        # When cluster available ask the user if she wants to launch the etl process:
        print(
            f"Cluster '{cluster_name}' available.\n" +
            "Do you want to create tables and launch the ETL process? Yes (Y), No (n)?\n"
            + "This will drop existing tables, re-create them and load data.")
        valid_choices = ['y', 'Y', 'n', 'N']
        launch_etl = advanced_input(valid_choices)
        if launch_etl.lower() == 'y':
            create_tables.main()
            etl.main()
        else:
            sys.exit(0)

    else:
        print(
            f"Cluster '{cluster_name}' current status: '{cluster_status}'.\n"
            "Please activate or repair the cluster and relaunch the program.\n"
            "Exiting.")
        sys.exit(1)

    print("The End.")
コード例 #2
0
ファイル: test_etl.py プロジェクト: mozilla-tw/taipei-bi-etl
def test_rps__global_package__fs():
    sys.argv = [
        "./etl.py",
        "--debug",
        "--config=test",
        "--task=rps",
        "--step=e",
        "--source=global_package",
        "--dest=fs",
    ]
    etl.main()
コード例 #3
0
def execute():
    """
    Execute the complete ETL process.
    Create database tables.
    Insert json files into created tables.
    """
    create_tables.main()
    print('All tables are created!')
    print('-' * 70)
    etl.main()
    print('-' * 70)
    print('OMG is done!')
コード例 #4
0
sys.path.insert(
    0,
    '/Applications/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine'
)
sys.path.insert(
    0,
    '/Applications/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/fancy_urllib'
)
sys.path.insert(
    0,
    '/Applications/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/jinja2-2.6'
)
sys.path.insert(
    0,
    '/Applications/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2'
)
sys.path.insert(
    0,
    '/Applications/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webob-1.2.3'
)
sys.path.insert(
    0,
    '/Applications/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/yaml/lib'
)

import argparse
import etl
if __name__ == '__main__':
    PARSER = argparse.ArgumentParser(add_help=False, parents=[etl.PARSER])
    etl.main(PARSER.parse_args())
コード例 #5
0
import sys
sys.path.insert(0,'/home/smaini/BigDataMOOC')
sys.path.insert(0,'/home/smaini/google_appengine')
sys.path.insert(0,'/home/smaini/google_appengine/lib/fancy_urllib')
sys.path.insert(0,'/home/smaini/google_appengine/lib/jinja2-2.6')
sys.path.insert(0,'/home/smaini/google_appengine/lib/webapp2-2.5.2')
sys.path.insert(0,'/home/smaini/google_appengine/lib/webob-1.2.3')
sys.path.insert(0,'/home/smaini/google_appengine/lib/yaml/lib')

import argparse
import etl
if __name__ =='__main__':
    PARSER = argparse.ArgumentParser(add_help=False, parents=[etl.PARSER])
    etl.main(PARSER.parse_args())
コード例 #6
0
ファイル: main.py プロジェクト: Mohammed966/spark_datalake
import etl
etl.main()