def main(): try: settings = startup.read_settings(filename="file2db_settings.json") Log.start(settings.debug) with DB(settings.db) as db: db.execute(""" DROP TABLE IF EXISTS b2g_tests """) db.execute(""" CREATE TABLE b2g_tests ( id INTEGER PRIMARY KEY NOT NULL, branch VARCHAR(100), name VARCHAR(100), version VARCHAR(100), suite varchar(200), revision varchar(100), `date` LONG ) """) file2db(db, "b2g_tests", settings.source_file) except Exception, e: Log.error("can not seem to startup", e)
def main(): try: settings = startup.read_settings(defs=[{ "name": ["--no_restart", "--no_reset", "--no_redo", "--norestart", "--noreset", "--noredo"], "help": "do not allow creation of new index (for debugging rouge resets)", "action": "store_true", "dest": "no_restart" }, { "name": ["--restart", "--reset", "--redo"], "help": "force a reprocessing of all data", "action": "store_true", "dest": "restart" }, { "name": ["--file", "--scan_file", "--scanfile", "--use_file", "--usefile"], "help": "scan file for missing ids", "action": "store_true", "dest": "scan_file" }, { "name": ["--nofile", "--no_file", "--no-file"], "help": "do not scan file for missing ids", "action": "store_false", "dest": "scan_file" }]) Log.start(settings.debug) with startup.SingleInstance(flavor_id=settings.args.filename): settings.production.threads = nvl(settings.production.threads, 1) settings.param.output_file = nvl(settings.param.output_file, "./results/raw_json_blobs.tab") transformer = DZ_to_ES(settings.pushlog) #RESET ONLY IF NEW Transform IS USED if settings.args.restart: es = Cluster(settings.elasticsearch).create_index(settings.elasticsearch) es.add_alias() es.delete_all_but_self() extract_from_datazilla_using_id(es, settings, transformer) else: es = Cluster(settings.elasticsearch).get_or_create_index(settings.elasticsearch) extract_from_datazilla_using_id(es, settings, transformer) except Exception, e: Log.error("Problem with etl", e)