def post(self): """ Executes a prep job to create an image corpus for training. Use this method to start a prep job. """ job_def = request.json job_def['process_json'] = True # Hardcode to process json file from project folder job = Job(job_def['name'],job_def) job.type = 'preprocess' dt = newdt.now() job.start_time = int(dt.timestamp()*1000) job.request = {'full_path': request.full_path,'remote_addr':request.remote_addr,'method':request.method} jb = aug_queue.enqueue( preprocess, job,job_timeout=-1,result_ttl=86400,ttl=-1) jb.meta['job_def'] = job_def dt = newdt.now() jb.meta['job_init_time'] = str(int(dt.timestamp()*1000)) jb.status = 'Running' jb.save_meta() json_str = job.to_json_string() st = { 'BUCKET' : job.bucket, 'USE_GCS' : job.use_gcs, 'ACCESS_KEY' : access_key, 'SECRET_KEY' : secret_key, 'S3_URL' : s3_url } storage = Storage(st) storage.upload_data(json_str,'jobs/running/{}_0_preprocess_r_{}.json'.format(str(job.start_time),jb.id),contentType='application/json') storage.upload_data(json_str,'jobs/all/{}_0_preprocess_r_{}.json'.format(str(job.start_time),jb.id),contentType='application/json') return { "status": jb.status, 'job_id': jb.id, 'meta':jb.meta},201
def test_train_mlengine(self): train_job.train_job['name'] = str(uuid.uuid4()) job = Job(train_job.train_job['name'], train_job.train_job) job.type = 'preprocess' job.init_temp(str(uuid.uuid4())) try: logging.info("step1") job.init_storage() logging.info("step2") if not hasattr(job, 'label_file') or job.label_file is None: job.label_file = 'corpus/' + job.prep_name + "/object-detection.pbtxt" job.init_labels() self.assertGreater(len(job.categories), 0) logging.info("step3") source = json.loads( job.download_to_string('corpus/' + job.prep_name + "/job_def.json")) job.project_name = source['project_name'] logging.info("step4") updateFileML(job) logging.info("step5") upload_model(job) logging.info("step6") upload_packages(job) logging.info("step7") start_ml_engine(job) logging.info("step8") history = json.loads( job.download_to_string( 'corpus/' + job.prep_name + "/job_history.json", )) upload_metadata(job, "training_jobs/" + job.name, history) finally: job.cleanup()
def test_train_mlengine_copy(self): train_job.train_job['name'] = str(uuid.uuid4()) job = Job(train_job.train_job['name'], train_job.train_job) job.type = 'preprocess' job.init_temp(str(uuid.uuid4())) try: logging.info("step1") job.init_storage() logging.info("step2") if hasattr(job, 'source_training') and job.source_training is not '': sjd = json.loads( job.download_to_string('training_jobs/' + job.source_training + "/job_def.json")) job.num_train_steps += sjd['num_train_steps'] job.model = sjd['model'] st = 'training_jobs/{}/'.format(job.source_training) dt = 'training_jobs/{}/'.format(job.name) job.copy_folder(st, dt) job.delete_cloud_file('{}{}'.format(dt, "job_def.json")) job.delete_cloud_file('{}{}'.format(dt, "job_history.json")) logging.info("step3") if not hasattr(job, 'label_file') or job.label_file is None: job.label_file = 'corpus/' + job.prep_name + "/object-detection.pbtxt" job.init_labels() self.assertGreater(len(job.categories), 0) logging.info("step4") source = json.loads( job.download_to_string('corpus/' + job.prep_name + "/job_def.json")) job.project_name = source['project_name'] logging.info("step5") updateFileML(job) logging.info("step6") upload_model(job) logging.info("step7") upload_packages(job) logging.info("step8") start_ml_engine(job) logging.info("step9") history = json.loads( job.download_to_string( 'corpus/' + job.prep_name + "/job_history.json", )) upload_metadata(job, "training_jobs/" + job.name, history) finally: job.cleanup()
def post(self): """ Executes a training. Use this method to start a training. """ job_def = request.json job = Job(job_def['name'], job_def) job.type = 'train' dt = newdt.now() job.start_time = int(dt.timestamp() * 1000) job.request = { 'full_path': request.full_path, 'remote_addr': request.remote_addr, 'method': request.method } if hasattr(job, 'ml_engine') and job.ml_engine: jb = train_queue.enqueue(train_mlengine, job, job_timeout=-1, result_ttl=-1) else: jb = train_queue.enqueue(train_job_method, job, job_timeout=-1) jb.meta['job_init_time'] = str(int(dt.timestamp() * 1000)) jb.meta['job_def'] = job_def jb.save_meta() json_str = job.to_json_string() st = { 'BUCKET': job.bucket, 'USE_GCS': job.use_gcs, 'ACCESS_KEY': access_key, 'SECRET_KEY': secret_key, 'S3_URL': s3_url } storage = Storage(st) storage.upload_data(json_str, 'jobs/running/{}_0_train_r_{}.json'.format( str(job.start_time), jb.id), contentType='application/json') storage.upload_data(json_str, 'jobs/all/{}_0_train_r_{}.json'.format( str(job.start_time), jb.id), contentType='application/json') return { "status": jb.get_status(), 'job_id': jb.id, 'meta': jb.meta }, 201
def test_process_all(self): for jb in prep_job.jobs: job = Job(jb['name'], jb) job.type = 'preprocess' job.init_temp(str(uuid.uuid4())) try: job.init_labels() job.init_storage() job.testcoco = {"info": { "description": "COCO 2017 Dataset", "url": "http://cocodataset.org", "version": "1.0", "year": 2018, "contributor": "COCO Consortium", "date_created": "2017/09/01" }, "licenses": [], "images": [], "categories": [], "annotations": [], } job.traincoco = {"info": { "description": "COCO 2017 Dataset", "url": "http://cocodataset.org", "version": "1.0", "year": 2018, "contributor": "COCO Consortium", "date_created": "2017/09/01" }, "licenses": [], "images": [], "categories": [], "annotations": [], } process_json(job) create_label_pbtxt(job) create_tf_example(job) create_tf_example(job, False) delete_staged(job) upload_metadata(job) finally: job.cleanup()
def test_init_storage(self): for jb in prep_job.jobs: job = Job(jb['name'], jb) job.type = 'preprocess' job.init_temp(str(uuid.uuid4())) try: job.init_labels() job.init_storage() job.testcoco = {"info": { "description": "COCO 2017 Dataset", "url": "http://cocodataset.org", "version": "1.0", "year": 2018, "contributor": "COCO Consortium", "date_created": "2017/09/01" }, "licenses": [], "images": [], "categories": [], "annotations": [], } job.traincoco = {"info": { "description": "COCO 2017 Dataset", "url": "http://cocodataset.org", "version": "1.0", "year": 2018, "contributor": "COCO Consortium", "date_created": "2017/09/01" }, "licenses": [], "images": [], "categories": [], "annotations": [], } finally: job.cleanup()