def get_image_ids_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()  

        project = get_current_project(session)
        version = get_current_version(session)

        data = request.get_json(force=True)
        search_term = data.get("search_term", None)
        print(search_term, file=sys.stderr)
        if search_term is None:
            Images = session.query(Image).filter_by(version_id=version.id).order_by(Image.original_filename.desc()).limit(128)
        else: 
            search_term = "%" + search_term + "%"
            Images = session.query(Image).filter_by(version_id=version.id).filter(Image.original_filename.like(search_term)).order_by(Image.original_filename.desc()).limit(128)

        gcs = storage.Client()
        gcs = get_gcs_service_account(gcs)
        bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)

        Pre_condition_checked_images = []
        for i in Images:
            if i.soft_delete != True:
                if i.url_signed_expiry is None or i.url_signed_expiry <= time.time():
                    rebuild_secure_urls(session, project, version, i)                    

                Pre_condition_checked_images.append(i)
        
        out = jsonify(images=[i.serialize() for i in Pre_condition_checked_images])

        return out, 200, {'ContentType':'application/json'}
def runNewModel(session):

    if LoggedIn() != True:
        return defaultRedirect()

    project = get_current_project(session)
    JOB_NAME = "a_" + str(project.id)

    # Creating model
    requestDict = {'name': JOB_NAME,
				    'description': 'Built by runNewModel()'}
    request = ml.projects().models().create(parent=projectID, body=requestDict)

    try:
        response = request.execute()
        print(response, file=sys.stderr)
        operationID = response['name']
        out = 'success'
        return out, 200, {'ContentType':'application/json'}
    except errors.HttpError as EOFError:
        print('There was an error. Check the details:', file=sys.stderr)
        print(EOFError._get_reason(), file=sys.stderr)
        out = 'failed'
        return out, 200, {'ContentType':'application/json'}

    return "Success", 200
    def test_out_scope(session):    
        if LoggedIn() != True:
            return defaultRedirect()

        project = get_current_project(session)
        version = get_current_version(session)
        Images = session.query(Image).filter_by(version_id=version.id, is_test_image=True).order_by(Image.id.desc())

        gcs = storage.Client()
        gcs = get_gcs_service_account(gcs)
        bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)

        Public_urls = []
        expiration_time = int(time.time() + 300)

        file = str(project.id)+"/"+str(version.id)+"/"+ "test_inference_out/"+ "0_.jpg"
        blob = bucket.blob(file)
        public_url = blob.generate_signed_url(expiration=expiration_time)
        Public_urls.append(public_url)

        out = {}
        out['image_ids'] = [i.id for i in Images]
        out['width'] = [i.width for i in Images]
        out['height'] = [i.height for i in Images]
        out['public_url'] = [i for i in Public_urls]

        return json.dumps(out), 200, {'ContentType':'application/json'}
    def remove_duplicate_filenames_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        # May want to remove specific duplicates only
        data = request.get_json(force=True)   
        images = data['images']

        version = get_current_version(session)
        Existing_images = session.query(Image).filter_by(version_id=version.id)
    
        seen_file_once = []
        looked_at = 0
        soft_delete_marked = 0
        for i in Existing_images:
            looked_at += 1
            for j in images:
                if i.id == j['id']:
                    if i.original_filename not in seen_file_once:
                        seen_file_once.append(i.original_filename)
                    else:
                        i.soft_delete = True
                        session.add(i)
                        soft_delete_marked +=1

            if looked_at % 100 == 0:
                print("Looked at", looked_at, "Removed", soft_delete_marked)
    
        print("Removed", soft_delete_marked, "duplicates")             
        out = 'success'

        return json.dumps(out), 200, {'ContentType':'application/json'}
    def toggle_test_image_all_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        data = request.get_json(force=True)   # Force = true if not set as application/json' 
        images = data['images']
        print("len(images)", len(images), file=sys.stderr)

        version = get_current_version(session)

        for i in images:
            image_db = session.query(Image).filter_by(version_id=version.id, id=i['id']).first()

            if image_db.is_test_image == True:
                version.train_length += 1
                version.test_length -= 1
            else:
                version.train_length -= 1
                version.test_length += 1
              
            image_db.is_test_image = not image_db.is_test_image
            session.add(image_db)
        
        session.add(version)
        out = 'success'
        return json.dumps(out), 200, {'ContentType':'application/json'}
    def image_delete_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()


        data = request.get_json(force=True)   # Force = true if not set as application/json' 
        image = data['image']

        version = get_current_version(session)

        Existing_images = session.query(Image).filter_by(version_id=version.id)

	    # Could delete multiple if get list of images to delete....
	    # Same issue as above otherwise, could do in query by checking id ie
	    # session.query(Employer).filter_by(id=employer_id).one()

        for i in Existing_images:
            if i.id == image['id']:
                i.soft_delete = True

                if i.is_test_image is True:
                    version.test_length -= 1
                else:
                    version.train_length -= 1
                # TODO Handle updating test / train length cache without db hit
               
                session.add(i)

        out = 'success'
        return json.dumps(out), 200, {'ContentType':'application/json'}
    def get_boxes_scope(session):

        if LoggedIn() != True:
            return defaultRedirect()

        data = request.get_json(force=True)

        if data['image_id'] is not None:
            print("current_image.id", data['image_id'], file=sys.stderr)
            image_id = data['image_id']
            boxes = session.query(Box).filter_by(image_id=image_id).order_by(Box.id.desc())
        else:
            # This could be more sophisticated ie store last image we were working with
            version = get_current_version(session)
            image = session.query(Image).filter_by(version_id=version.id).order_by(Image.id.desc()).first()
            boxes = session.query(Box).filter_by(image_id=image.id).order_by(Box.id.desc())

        labels = []
        for b in boxes:
            label = session.query(Label).filter_by(id=b.label_id)
            labels.append(label[0].serialize())

        out = jsonify(boxes=[i.serialize() for i in boxes], labels=labels)
        

        return out, 200, {'ContentType':'application/json'}
    def new_box_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        data = request.get_json(force=True)   # Force = true if not set as application/json' 
        boxes = data['boxes']

        existing_boxes = session.query(Box).filter_by(image_id=data['image_id']).all()

        # TODO better way to do this
        for box_old in existing_boxes:
            for box_new in boxes:
                if box_old.id != box_new['id']:
                    session.delete(box_old)
            if len(boxes) == 0:   # Handle case of all boxes being deleted
                session.delete(box_old)

        for box in boxes:
            if box['width'] > 5 and box['height'] > 5:
                new_box = Box(x_min=box['x_min'], y_min=box['y_min'], x_max=box['x_max'], y_max=box['y_max'],
                                width=box['width'], height=box['height'], 
                                image_id = box['image_id'], 
                                label_id = box['label']['id'])
    
                session.add(new_box)
                session.commit()
                print(box['id'], file=sys.stderr)

        return json.dumps({'success':True}), 200, {'ContentType':'application/json'} 
    def remove_duplicate_filenames_all_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        version = get_current_version(session)
        Existing_images = session.query(Image).filter_by(version_id=version.id)

        seen_file_once = []
        looked_at = 0
        soft_delete_marked = 0
        # basic greedy approach
        # If we have seen the filename mark future objects with same file name as soft delete, else add to list
        for i in Existing_images:   
            if i.original_filename in seen_file_once:
                i.soft_delete = True
                session.add(i)
                soft_delete_marked +=1
            else:
                seen_file_once.append(i.original_filename)
                looked_at += 1

            if looked_at % 100 == 0:
                print("Looked at", looked_at, "Removed", soft_delete_marked)
    
        print("Removed", soft_delete_marked, "duplicates")
        out = 'success'
        return json.dumps(out), 200, {'ContentType':'application/json'}
def runTraining(session):
    if LoggedIn() != True:
        return defaultRedirect()

    have_error, params = training_pre_conditions(session)
    if have_error:
        print("have error", params, file=sys.stderr)
        return json.dumps(params), 200, {'ContentType':'application/json'}

    # TODO Thinking on reasonable way to "copy" a version and track changes

    project = get_current_project(session)
    version = get_current_version(session)
    machine_learning_settings = get_ml_settings(session=session, version=version)

    JOB_NAME = "train_" + machine_learning_settings.JOB_NAME
    print(JOB_NAME, file=sys.stderr)

    REGION="us-central1"
    RUNTIME_VERSION="1.2"

    root_dir = "gs://" + settings.CLOUD_STORAGE_BUCKET + "/" + str(project.id) + "/" + str(version.id) + "/ml/" + str(machine_learning_settings.ml_compute_engine_id) + "/"
    JOB_DIR = root_dir + "train"
    pipeline_config_path = root_dir + "faster_rcnn_resnet.config"

    MAIN_TRAINER_MODULE='object_detection.train'

    training_inputs = {'scaleTier': 'CUSTOM',
	    'masterType': 'standard_gpu',
	    'workerType': 'standard_gpu',
	    'parameterServerType': 'standard_gpu',
	    'workerCount': 2,
	    'parameterServerCount': 1,
	    'packageUris': ['gs://' + settings.CLOUD_STORAGE_BUCKET + '/' + settings.LIB_OBJECT_DETECTION_PYTHON,
					    'gs://' + settings.CLOUD_STORAGE_BUCKET + '/' + settings.LIB_SLIM_PYTHON ],
	    'pythonModule': MAIN_TRAINER_MODULE,
	    'args': ['--train_dir', JOB_DIR, 
				    '--pipeline_config_path', pipeline_config_path],
	    'region': REGION,
	    'jobDir': JOB_DIR,
	    'runtimeVersion': RUNTIME_VERSION }

    job_spec = {'jobId': JOB_NAME, 'trainingInput': training_inputs}

    request = ml.projects().jobs().create(body=job_spec, parent=projectID)

    try:
        response = request.execute()
        print(response, file=sys.stderr)
        out = 'success'
        return out, 200, {'ContentType':'application/json'}

    except errors.HttpError as EOFError:
        print('There was an error. Check the details:', file=sys.stderr)
        print(EOFError._get_reason(), file=sys.stderr)
        out = 'failed'
        return out, 500, {'ContentType':'application/json'}

    return "success", 200
    def version_view_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        version = get_current_version(session)
        out = jsonify(version=version.serialize())

        return out, 200, {'ContentType':'application/json'}
Example #12
0
def user_view():
    if LoggedIn() != True:
        return defaultRedirect()

    with sessionMaker.session_scope() as session:
        user_id = getUserID()
        user = session.query(User).filter_by(id=getUserID()).first()
        out = jsonify(user=user.serialize())
        return out, 200, {'ContentType': 'application/json'}
def trainingFrozenRun(session):

    if LoggedIn() != True:
        return defaultRedirect()

    project = get_current_project(session)
    version = get_current_version(session)
    machine_learning_settings = get_ml_settings(session=session, version=version)

    #now=strftime("%Y_%m_%d_%H_%M_%S", gmtime())
    JOB_NAME = "frozen_user_" + machine_learning_settings.JOB_NAME
    print(JOB_NAME, file=sys.stderr)

    root_dir = "gs://" + settings.CLOUD_STORAGE_BUCKET + "/" + str(project.id) + "/" + str(version.id) + "/ml/" + str(machine_learning_settings.ml_compute_engine_id) + "/"
    JOB_DIR = root_dir + str(machine_learning_settings.re_train_id) + "/frozen"
    REGION ="us-central1"
    RUNTIME_VERSION ="1.2"
 
    # Should be updated during training and store in db?
    trained_checkpoint_prefix = configNew.check_actual_model_path_name(session=session)

    pipeline_config_path = root_dir + "faster_rcnn_resnet.config"
    MAIN_TRAINER_MODULE ="object_detection.export_inference_graph"

    training_inputs = {'scaleTier': 'CUSTOM',
	    'masterType': 'large_model',
	    'workerCount': 0,
	    'packageUris': ['gs://' + settings.CLOUD_STORAGE_BUCKET + '/' + settings.LIB_OBJECT_DETECTION_PYTHON,
					    'gs://' + settings.CLOUD_STORAGE_BUCKET + '/' + settings.LIB_SLIM_PYTHON ],
	    'pythonModule': MAIN_TRAINER_MODULE,
	    'args': ['--trained_checkpoint_prefix', trained_checkpoint_prefix, 
				    '--pipeline_config_path', pipeline_config_path,
				    '--input_type', 'encoded_image_string_tensor',
				    '--output_directory', JOB_DIR],
	    'region': REGION,
	    'jobDir': JOB_DIR,
	    'runtimeVersion': RUNTIME_VERSION }

    job_spec = {'jobId': JOB_NAME, 'trainingInput': training_inputs}

    request = ml.projects().jobs().create(body=job_spec, parent=projectID)

    try:
        response = request.execute()
        print(response, file=sys.stderr)
        out = 'success'
        return out, 200, {'ContentType':'application/json'}
    except errors.HttpError as EOFError:
        print('There was an error. Check the details:', file=sys.stderr)
        print(EOFError._get_reason(), file=sys.stderr)
        out = 'failed'
        return out, 200, {'ContentType':'application/json'}

    return "Success", 200
def machine_learning_settings_edit(session, next_id=False):

    if LoggedIn() != True:
        return defaultRedirect()

    project = get_current_project(session)
    version = get_current_version(session)
    machine_learning_settings = get_ml_settings(session, version)

    if next_id is True: 
        machine_learning_settings.ml_compute_engine_id += 1

    session.add(machine_learning_settings)
    session.commit()

    return "Success", 200
    def toggle_done_labeling_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        data = request.get_json(force=True)   # Force = true if not set as application/json' 
        image = data['image']

        version = get_current_version(session)
        Existing_images = session.query(Image).filter_by(version_id=version.id)

        for i in Existing_images:
            if i.id == image['id']:
                i.done_labeling = not i.done_labeling
                session.add(i)

        out = 'success'
        return json.dumps(out), 200, {'ContentType':'application/json'}
def runNewVersion(session):

    if LoggedIn() != True:
        return defaultRedirect()

    project = get_current_project(session)
    version = get_current_version(session)
    machine_learning_settings = get_ml_settings(session=session, version=version)
    project_root = "a_" + str(project.id)

    # Creating version
    modelID= '{}/models/{}'.format(projectID, project_root)
    versionName = "a_" + str(version.id) + "_" + str(machine_learning_settings.ml_compute_engine_id)
    versionName += "_" + str(machine_learning_settings.re_train_id)

    # Maybe could include more info like date time?
    versionDescription = 'created by runNewVersion()'
    root_dir = "gs://" + settings.CLOUD_STORAGE_BUCKET + "/" + str(project.id) + "/" + str(version.id)
    root_dir += "/ml/" + str(machine_learning_settings.ml_compute_engine_id) + "/" + str(machine_learning_settings.re_train_id) + "/"
    JOB_DIR = root_dir + "frozen/saved_model"

    requestDict = {'name': versionName,
                    'description': versionDescription,
                    'deploymentUri': JOB_DIR,
                    'runtimeVersion': '1.2'}

    request = ml.projects().models().versions().create(
        parent=modelID, body=requestDict)

    try:
        response = request.execute()
        print(response, file=sys.stderr)

        operationID = response['name']

        out = 'success'
        return out, 200, {'ContentType':'application/json'}
    except errors.HttpError as EOFError:
        # Something went wrong, print out some information.
        print('There was an error. Check the details:', file=sys.stderr)
        print(EOFError._get_reason(), file=sys.stderr)
        out = 'failed'
        return out, 200, {'ContentType':'application/json'}
    
    return out, 200, {'ContentType':'application/json'}
Example #17
0
    def label_refresh_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        project = get_current_project(session)
        Labels_db = session.query(Label).filter_by(
            project_id=project.id).order_by(Label.id.desc())
        # TODO can do soft_delete != "True" check in here???

        Labels = []
        for i in Labels_db:
            if i.soft_delete != True:
                Labels.append(i)

        out = {}
        out['ids'] = [i.id for i in Labels]
        out['names'] = [i.name for i in Labels]
        #Colour?

        return json.dumps(out), 200, {'ContentType': 'application/json'}
Example #18
0
    def label_delete_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        data = request.get_json(
            force=True)  # Force = true if not set as application/json'
        label = data['label']

        project = get_current_project(session)
        existing_Labels = session.query(Label).filter_by(
            project_id=project.id).order_by(Label.id.desc())

        for i in existing_Labels:
            if i.id == label['id']:
                i.soft_delete = True
                session.add(i)

        out = 'success'
        session.commit()

        return json.dumps(out), 200, {'ContentType': 'application/json'}
def machine_learning_settings_new(session):

    if LoggedIn() != True:
        return defaultRedirect()

    project = get_current_project(session)
    version = get_current_version(session)

    iterations = 2000 # Could get from user
    previous_goal_iterations = iterations
    ml_compute_engine_id = 0

    new_ml_settings = Machine_learning_settings(iterations=iterations,
                                                previous_goal_iterations=previous_goal_iterations,
                                                ml_compute_engine_id=ml_compute_engine_id)
    session.add(new_ml_settings)
    session.commit()

    version.machine_learning_settings_id = new_ml_settings.id
    session.add(version)
    session.commit()

    return "Success", 200
Example #20
0
    def label_new_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        data = request.get_json(
            force=True)  # Force = true if not set as application/json'
        label = data['label']
        print(label, data.keys(), file=sys.stderr)

        have_error = False
        params = {}
        #existing_label = session.query(Label).filter_by(id=label['id']).first()
        existing_label = None  # Maybe do more with this later

        project = get_current_project(session)

        if label is None:
            params['error'] = "No Label"
            have_error = True

        if existing_label is not None:
            params['error'] = "Existing label"
            have_error = True

        if have_error:
            return json.dumps(params), 200, {'ContentType': 'application/json'}
        else:
            label['colour'] = "blue"  # since JS is being strange
            new_label = Label(name=label['name'],
                              colour=label['colour'],
                              project_id=project.id)
            session.add(new_label)

            return json.dumps({'success': True}), 200, {
                'ContentType': 'application/json'
            }
def runTrainingPipeline(re_train=0):

    """

    """

    # 1 == retrain
    # Can you pass a bool here? prefer that but just doing this for now

    if LoggedIn() != True:
        return defaultRedirect()
        
    @copy_current_request_context
    def task_manager():
        def task_manager_scope(session):
            print("[Training task manager] Started. Retrain_flag:", re_train,  file=sys.stderr)
            session = sessionMaker.scoppedSession() # Threadsafe

            # Maybe better to have this somewhere else
            version = get_current_version(session=session)
            if version.machine_learning_settings_id is None:
                ml_settings.machine_learning_settings_new(session=session)

            # Advance one for training if not retraining
            if re_train == 0:
                ml_settings.machine_learning_settings_edit(session=session, next_id=True)

            project = get_current_project(session=session)

            machine_learning_settings = get_ml_settings(session=session, version=version)

            JOB_NAME = "__projectID_" + str(project.id) + "__versionID_" + str(version.id) + "__ml_compute_id_" + str(machine_learning_settings.ml_compute_engine_id)

            if re_train == 1:
                machine_learning_settings.re_train_id += 1
                JOB_NAME += "__retrainID_" + str(machine_learning_settings.re_train_id)
        
            machine_learning_settings.JOB_NAME = JOB_NAME
            session.add(machine_learning_settings)
            session.commit()

            # Do YAML for retraining
            # TODO way to detect if this is needed or not...
            yamlNew(hold_thread=True)

            labelMapNew()
            fasterRcnnResnetNew(re_train=re_train)  # Config file

            tfrecordsNew(hold_thread=True)

            ### TRAINING
            runTraining(session)
            
            config = {}
            config['PUBSSUB_TOPIC'] = settings.PUB_SUB_TOPIC
            config['PROJECT'] = settings.GOOGLE_PROJECT_NAME
            publisher = pubsub_v1.PublisherClient()
            topic_path = publisher.topic_path(config['PROJECT'], config['PUBSSUB_TOPIC'])
            JOB_NAME = "train_" + machine_learning_settings.JOB_NAME
            JOB_NAME_FORMATTED = projectID + "/jobs/" + JOB_NAME

            training_flag = True
            while training_flag is True:
                
                request = ml.projects().jobs().get(name=JOB_NAME_FORMATTED)
                # TODO error handling
                response = request.execute()
                
                data = json.dumps(response)
                print(data, file=sys.stderr)
                data = data.encode()
                publisher.publish(topic_path, data=data)

                a = response['state']
                if a == "SUCCEEDED" or a == "FAILED" or a =="CANCELLED":
                    training_flag = False
                else:
                    time.sleep(30)
            
            #### END TRAINING

            # Now need to run new model on re training
            if re_train == 0:
                runNewModel(session)

            ##### FROZEN
            trainingFrozenRun(session)

            JOB_NAME = "frozen_user_" + machine_learning_settings.JOB_NAME
            JOB_NAME_FORMATTED = projectID + "/jobs/" + JOB_NAME

            frozen_flag = True
            while frozen_flag is True:
                
                request = ml.projects().jobs().get(name=JOB_NAME_FORMATTED)

                # TODO error handling
                response = request.execute()

                data = json.dumps(response)
                print(data, file=sys.stderr)
                data = data.encode()

                publisher.publish(topic_path, data=data)

                a = response['state']
                if a == "SUCCEEDED" or a == "FAILED" or a =="CANCELLED":
                    frozen_flag = False
                else:
                    time.sleep(30)

            
            #####
            runNewVersion(session)
            time.sleep(60*8)  # Sleep while long running operation
            runInferenceSingle()

            print("[Training task manager] SUCCESS", file=sys.stderr)
            t.cancel()

        with sessionMaker.session_scope() as session:
            task_manager_scope(session)


    t = threading.Timer(0, task_manager)
    t.daemon = True
    t.start()

    out = 'success'
    return out, 200, {'ContentType':'application/json'}
         
def test(): 
    if LoggedIn() != True:
        return defaultRedirect()
    
    return render_template('/annotation/test.html')
Example #23
0
def uploadPOST():
    if LoggedIn() != True:
        return defaultRedirect()
 
    @copy_current_request_context
    def task_manager(name, extension):  # Function is defined here to so as to use request context decorator with scopped session.
        def task_manager_scope(session):
            counter = 0
            project = get_current_project(session)
            version = get_current_version(session)
            project_id = project.id
            version_id = version.id
            out = ""
            with open(name, "rb") as file:              
                if extension == ".zip":
                    try:                        
                        zip_ref = zipfile.ZipFile(BytesIO(file.read()), 'r')
                        temp_dir = tempfile.mkdtemp()
                        zip_ref.extractall(temp_dir)
                        zip_ref.close()

                        filenames = sorted(os.listdir(temp_dir))
                        len_filenames = len(filenames)  # Variable used in loop below so storing here
                        print("[ZIP processor] Found", len_filenames, file=sys.stderr)

                        Thread_session = sessionMaker.scoppedSession()  # Threadsafe

                        for filename in filenames:

                            t_2 = threading.Timer(0, multi_thread_task_manager, 
                                                args=(temp_dir, filename, Thread_session,
                                                      project_id, version_id))
                            t_2.start()

                            # Slow down new threads if too many open
                            len_threads = len(threading.enumerate())
                            if len_threads > settings.MAX_UPLOAD_THREADS:
                                time.sleep(settings.MAX_UPLOAD_THREADS * 25)          
                            if len_threads > settings.TARGET_UPLOAD_THREADS:
                                time.sleep(settings.TARGET_UPLOAD_THREADS * 5)

                            counter += 1
                            if counter % 10 == 0:
                                print("[ZIP processor]", (counter / len(filenames) ) * 100, "% done." , file=sys.stderr)

                        Thread_session.remove()

                    except zipfile.BadZipFile:
                        out = {"files": [{"name": "Error bad zip file"}]}
        
                else:
                    content_type = "image/" + str(extension)
                    file_name = os.path.split(file.name)[1]

                    out = process_one_image_file(file=file, name=file_name, 
                                        content_type=content_type, extension=extension, 
                                        session=session, project_id=project_id, version_id=version_id)

            out = {"files": [{"name": "Processed files"}]}
            print(out, file=sys.stderr)
            t.cancel()

        with sessionMaker.session_scope() as session:
            task_manager_scope(session)

        # Update train counts
        with sessionMaker.session_scope() as session:
            time.sleep(1)
            version = get_current_version(session)
            image_count = session.query(Image).filter_by(version_id=version.id).filter(Image.is_test_image == False, Image.soft_delete == False).count()
            version.train_length = image_count
            session.add(version)



    file = request.files.get('files[]')
    if not file:
        return "No file", 400
        
    extension = os.path.splitext(file.filename)[1].lower()
    if extension in allowed_file_names:

        file.filename = secure_filename(file.filename) # http://flask.pocoo.org/docs/0.12/patterns/fileuploads/          
        temp_dir = tempfile.mkdtemp()
        name = temp_dir + "/" + file.filename
        file.save(name)

        t = threading.Timer(0, task_manager, args=(name, extension))  # https://stackoverflow.com/questions/29330982/python-timer-nonetype-object-is-not-callable-error
        t.daemon = True
        t.start()

        out = {"files": [{"name": "Processing files. Please wait a few seconds per file uploaded"}]}
    else:
        out = {"files": [{"name": "Invalid file extension"}]}


    return jsonify(out)
Example #24
0
def yamlNew(hold_thread=False):

    if LoggedIn() != True:
        return defaultRedirect()

    @copy_current_request_context
    def task_manager():
        def task_manager_scope(session):
            project = get_current_project(session)
            version = get_current_version(session)
            machine_learning_settings = get_ml_settings(session=session,
                                                        version=version)

            Images = session.query(Image).filter_by(
                version_id=version.id).order_by(Image.id.desc())

            annotations_list = []
            len_images = Images.count()
            counter = 0
            for image in Images:

                # TODO maybe better to do in database?
                if image.soft_delete != True and image.is_test_image != True and image.done_labeling == True:

                    boxes = session.query(Box).filter_by(
                        image_id=image.id).order_by(Box.id.desc()).limit(100)

                    box_dict_list = []
                    for box in boxes:

                        label = session.query(Label).filter_by(
                            id=box.label_id).one()
                        if label is None:
                            print("Label is none", file=sys.stderr)

                        box_dict_list.append({
                            'label_id': label.id,
                            'label_name': label.name,
                            'x_min': box.x_min,
                            'x_max': box.x_max,
                            'y_min': box.y_min,
                            'y_max': box.y_max
                        })

                    image_dict = {
                        'image': {
                            'image_id': image.id,
                            'image_width': image.width,
                            'image_height': image.height,
                            'original_filename': image.original_filename
                        }
                    }

                    boxes_dict = {'boxes': box_dict_list}
                    annotations_list.append(
                        {'annotations': [image_dict, boxes_dict]})

                if counter % 10 == 0:
                    print("Percent done", (counter / len_images) * 100,
                          file=sys.stderr)
                counter += 1

            print("annotations_list len",
                  len(annotations_list),
                  file=sys.stderr)
            yaml_data = yaml.dump(annotations_list, default_flow_style=False)

            gcs = storage.Client()
            gcs = get_gcs_service_account(gcs)
            bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)

            project_str = str(project.id) + "/" + str(
                version.id) + "/ml/" + str(
                    machine_learning_settings.ml_compute_engine_id
                ) + "/annotations.yaml"

            blob = bucket.blob(project_str)
            blob.upload_from_string(yaml_data, content_type='text/yaml')

            print("Built YAML, link below", file=sys.stderr)

            link = get_secure_link(blob)
            print(link, file=sys.stderr)

            t.cancel()

        with sessionMaker.session_scope() as session:
            task_manager_scope(session)

    t = threading.Timer(0, task_manager)
    t.daemon = True
    t.start()

    print("[YAML processor] Started", file=sys.stderr)

    # Default to False for HTTP
    # Use
    if hold_thread is True:
        t.join()

    out = "Started"
    return out, 200
Example #25
0
def tfrecordsNew(hold_thread=False):

    if LoggedIn() != True:
        return defaultRedirect()

    @copy_current_request_context
    def task_manager():
        def task_manager_scope(session):
            project = get_current_project(session)
            version = get_current_version(session)
            ml_settings = get_ml_settings(session=session, version=version)

            project_str = str(project.id) + "/" + str(version.id) + "/"

            gcs = storage.Client()
            gcs = get_gcs_service_account(gcs)
            bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)
            blob = bucket.blob(project_str + "ml/" +
                               str(ml_settings.ml_compute_engine_id) +
                               "/tfrecords_0.record")
            INPUT_YAML = project_str + "ml/" + str(
                ml_settings.ml_compute_engine_id) + "/annotations.yaml"
            yaml_blob = bucket.blob(INPUT_YAML)

            yaml_bytes = yaml_blob.download_as_string()
            examples = yaml.load(yaml_bytes)

            len_examples = len(examples)
            print("Loaded ", len(examples), "examples", file=sys.stderr)

            images_dir = project_str + "images/"
            for i in range(len(examples)):
                examples[i]['annotations'][0]['image'][
                    'image_id'] = images_dir + str(
                        examples[i]['annotations'][0]['image']['image_id'])

            counter = 0
            all_examples = []

            # Reassign db ids to be 1 2 3  etc for tensorflow
            # TODO this is terrible surely better way to do this
            Labels = []
            labels = session.query(Label).filter_by(project_id=project.id)
            for i in labels:
                if i.soft_delete != True:
                    Labels.append(i)
            Labels_unique = set(Labels)
            Labels.sort(key=lambda x: x.id)
            label_dict = {}
            start_at_1_label = 1
            lowest_label = 0
            for label in Labels:
                if label.id > lowest_label:
                    label_dict[label.id] = start_at_1_label
                    start_at_1_label += 1
                    lowest_label = label.id

            print("label_dict length", len(label_dict), file=sys.stderr)

            temp = tempfile.NamedTemporaryFile()
            writer = tf.python_io.TFRecordWriter(str(temp.name))

            for example in examples:

                tf_example = create_tf_example(example['annotations'],
                                               label_dict)
                writer.write(tf_example.SerializeToString())

                if counter % 2 == 0:
                    print("Percent done", (counter / len_examples) * 100)
                counter += 1

            writer.close()

            blob.upload_from_file(temp, content_type='text/record')
            temp.close()

            link = get_secure_link(blob)
            print(blob.name, file=sys.stderr)
            print("Built TF records", file=sys.stderr)
            t.cancel()

        with sessionMaker.session_scope() as session:
            task_manager_scope(session)

    t = threading.Timer(0, task_manager)
    t.daemon = True
    t.start()

    print("[TF records processor] Started", file=sys.stderr)

    if hold_thread is True:
        t.join()

    return "Started tf_records", 200
def project_get():
    if LoggedIn():
        return render_template('/workspace/newWorkspace.html')
    else:
        return defaultRedirect()
    def faster_rcnn_resnet_new_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        project = get_current_project(session)
        version = get_current_version(session)
        machine_learning_settings = get_ml_settings(session=session,
                                                    version=version)

        project_str = str(project.id) + "/" + str(version.id) + "/ml/" + str(
            machine_learning_settings.ml_compute_engine_id)
        project_str += "/faster_rcnn_resnet.config"
        # Faster R-CNN with Resnet-101 (v1)

        root_dir = "gs://" + settings.CLOUD_STORAGE_BUCKET + "/" + str(
            project.id) + "/" + str(version.id) + "/ml/" + str(
                machine_learning_settings.ml_compute_engine_id) + "/"
        num_classes_var = version.labels_number  # TODO get this automatically
        print("version.labels_number", version.labels_number, file=sys.stderr)
        min_dimension_var = 720
        max_dimension_var = 1280  # TODO get this automaticaly within limit
        first_stage_max_proposals_var = 100

        label_map_path_var = root_dir + "label_map.pbtxt"

        # Testing for multiple records?
        input_path_var = root_dir + "tfrecords_*.record"
        num_steps_var = machine_learning_settings.iterations

        # This is the shared generic starting point
        fine_tune_checkpoint_var = "gs://" + settings.CLOUD_STORAGE_BUCKET + "/" + settings.RESNET_PRE_TRAINED_MODEL
        if re_train == 1:

            machine_learning_settings.previous_goal_iterations = machine_learning_settings.iterations
            num_steps_var = machine_learning_settings.iterations + 1500
            fine_tune_checkpoint_var = check_actual_model_path_name(
                session=session)

        model = "model {"
        faster_rcnn = "\nfaster_rcnn {"
        num_classes = "\nnum_classes: " + str(num_classes_var)
        image_resizer = "\nimage_resizer { \nkeep_aspect_ratio_resizer {"
        min_dimension = "\nmin_dimension: " + str(min_dimension_var)
        max_dimension = "\nmax_dimension: " + str(
            max_dimension_var) + "\n} \n}"

        feature_extractor = "\nfeature_extractor { \n type: 'faster_rcnn_resnet101' "
        first_stage_features_stride = "\nfirst_stage_features_stride: 16 \n } "
        first_stage_anchor_generator = """first_stage_anchor_generator \n{ \ngrid_anchor_generator 
        { \nscales: [0.25, 0.5, 1.0, 2.0] \naspect_ratios: [0.5, 1.0, 2.0] \nheight_stride: 16 \n
        width_stride: 16 \n } \n } \n"""

        first_stage_box_predictor_conv_hyperparams = """
        first_stage_box_predictor_conv_hyperparams {
        op: CONV
        regularizer {
        l2_regularizer {
        weight: 0.0
        }
        }
        initializer {
        truncated_normal_initializer {
        stddev: 0.01
        }
        }
        }
        first_stage_nms_score_threshold: 0.0
        first_stage_nms_iou_threshold: 0.7
        first_stage_localization_loss_weight: 2.0
        first_stage_objectness_loss_weight: 1.0
        initial_crop_size: 14
        maxpool_kernel_size: 2
        maxpool_stride: 2
        """

        first_stage_max_proposals = "\nfirst_stage_max_proposals:" + str(
            first_stage_max_proposals_var)

        second_stage_box_predictor = """
        second_stage_box_predictor {
        mask_rcnn_box_predictor {
        use_dropout: false
        dropout_keep_probability: 1.0
        fc_hyperparams {
        op: FC
        regularizer {
        l2_regularizer {
        weight: 0.0
        }
        }
        initializer {
        variance_scaling_initializer {
        factor: 1.0
        uniform: true
        mode: FAN_AVG
        }
        }
        }
        }
        }
        """
        second_stage_post_processing = """
        second_stage_post_processing {
        batch_non_max_suppression {
        score_threshold: 0.0
        iou_threshold: 0.6
        max_detections_per_class: 100
        """
        max_total_detections = "max_total_detections:" + str(
            first_stage_max_proposals_var) + "\n}"

        score_converter = """
        score_converter: SOFTMAX
        }
        second_stage_localization_loss_weight: 2.0
        second_stage_classification_loss_weight: 1.0
        """
        second_stage_batch_size = "\nsecond_stage_batch_size: " + str(
            first_stage_max_proposals_var) + "\n }\n }\n"

        train_config = """
        train_config: {
        batch_size: 1
        optimizer {
        momentum_optimizer: {
        learning_rate: {
        manual_step_learning_rate {
        initial_learning_rate: 0.0003
        schedule {
        step: 0
        learning_rate: .0003
        }
        schedule {
        step: 900000
        learning_rate: .00003
        }
        schedule {
        step: 1200000
        learning_rate: .000003
        }
        }
        }
        momentum_optimizer_value: 0.9
        }
        use_moving_average: false
        }
        gradient_clipping_by_norm: 10.0
        """
        fine_tune_checkpoint = "\nfine_tune_checkpoint: '" + str(
            fine_tune_checkpoint_var) + "'"

        from_detection_checkpoint = "\nfrom_detection_checkpoint: true"

        num_steps = "\nnum_steps: " + str(num_steps_var)

        data_augmentation_options = """
        data_augmentation_options {
        random_horizontal_flip {
        }
        }
        }
        """

        train_input_reader = """
        train_input_reader: {
        tf_record_input_reader {
        """

        input_path = "\ninput_path: '" + str(input_path_var) + "' \n}"
        label_map_path = "\nlabel_map_path: '" + str(
            label_map_path_var) + "'\n}"

        config_file_a = model + faster_rcnn + num_classes + image_resizer + min_dimension + max_dimension + feature_extractor + first_stage_features_stride + first_stage_anchor_generator
        config_file_b = first_stage_box_predictor_conv_hyperparams + first_stage_max_proposals + second_stage_box_predictor + second_stage_post_processing + max_total_detections
        config_file_c = score_converter + second_stage_batch_size + train_config + fine_tune_checkpoint + from_detection_checkpoint + num_steps + data_augmentation_options + train_input_reader + input_path + label_map_path

        config_file = config_file_a + config_file_b + config_file_c

        gcs = storage.Client()
        bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)
        blob = bucket.blob(project_str)
        blob.upload_from_string(config_file, content_type='text/config')

        print("Built Config", file=sys.stderr)
        out = 'success'

        return out, 200, {'ContentType': 'application/json'}
Example #28
0
    def label_map_new_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        project = get_current_project(session)
        version = get_current_version(session)
        ml_settings = get_ml_settings(session=session, version=version)
        Images = session.query(Image).filter_by(version_id=version.id)

        Labels = []

        # TO DO Refactor ie maintain a cache all label ids used in a version
        # Would need to store that cache per version
        # And update / delete it as labels are changed  OR Collect at YAML stage

        labels = session.query(Label).filter_by(project_id=project.id)
        for i in labels:
            if i.soft_delete != True:
                Labels.append(i)

        # Map db ids to id s staring with 123
        Labels.sort(key=lambda x: x.id)
        label_dict = {}
        start_at_1_label = 1
        lowest_label = 0
        for label in Labels:
            if label.id > lowest_label:
                label_dict[label.id] = start_at_1_label
                start_at_1_label += 1
                lowest_label = label.id

        print("label_dict length", len(label_dict), file=sys.stderr)

        project_str = str(project.id) + "/" + str(version.id) + "/ml/" + str(
            ml_settings.ml_compute_engine_id)
        project_str += "/label_map.pbtext"

        file = ""

        Labels_unique = set(Labels)

        len_labels = len(Labels_unique)

        version.labels_number = len_labels
        session.add(version)
        session.commit()

        for i, c in enumerate(Labels_unique):
            new = "\nitem {"
            id = "\nid: " + str(label_dict[c.id])
            name = "\nname: " + str(c.name) + "\n }\n"

            file += new + id + name

        gcs = storage.Client()
        gcs = get_gcs_service_account(gcs)
        bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)
        blob = bucket.blob(project_str)
        blob.upload_from_string(file, content_type='text/pbtext')

        print("Built label_map", file=sys.stderr)
        out = get_secure_link(blob)

        return out, 200, {'ContentType': 'application/json'}
Example #29
0
def upload():
    if LoggedIn() != True:
        return defaultRedirect()
    
    return render_template('/storage/uploadView.html')
Example #30
0
def runInferenceSingle():

    if LoggedIn() != True:
        return defaultRedirect()

    @copy_current_request_context
    def task_manager():
        def task_manager_scope(session):
            credentials = GoogleCredentials.get_application_default()
            ml = discovery.build('ml', 'v1', credentials=credentials)
            projectID = 'projects/{}'.format(settings.GOOGLE_PROJECT_NAME)

            project = get_current_project(session=session)
            version = get_current_version(session=session)
            machine_learning_settings = get_ml_settings(session=session,
                                                        version=version)

            Images_db = session.query(Image).filter_by(version_id=version.id,
                                                       is_test_image=True)

            REGION = "us-central1"
            RUNTIME_VERSION = "1.2"

            modelName = "a_" + str(project.id)
            versionName = "a_" + str(version.id) + "_" + str(
                machine_learning_settings.ml_compute_engine_id)
            versionName += "_" + str(machine_learning_settings.re_train_id)
            modelVersionName = '{}/models/{}/versions/{}'.format(
                projectID, modelName, versionName)

            gcs = storage.Client()
            bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)
            filenames = []

            root_dir = str(project.id) + "/" + str(version.id) + "/"
            for image in Images_db:
                #print(image.is_test_image, file=sys.stderr)
                if image.soft_delete != True:
                    filenames.append(root_dir + "images/" + str(image.id))
                    break

            Rows = []
            Images = []
            print("len(filenames):", len(filenames), file=sys.stderr)

            for file in filenames:
                blob = bucket.blob(file)
                image = blob.download_as_string()

                # Resize
                image = scipy.misc.imread(BytesIO(image))
                if image is None:
                    raise IOError("Could not open")

                # TODO BETTER WAY
                #image = scipy.misc.imresize(image, (640, 960))
                temp = tempfile.mkdtemp()
                new_temp_filename = temp + "/resized.jpg"
                scipy.misc.imsave(new_temp_filename, image)

                # Otherwise have strange byte issues
                blob = bucket.blob(file + "_test_resized")
                blob.upload_from_filename(new_temp_filename,
                                          content_type="image/jpg")
                image = blob.download_as_string()

                encoded_contents = base64.b64encode(image).decode('UTF-8')
                row = {'b64': encoded_contents}
                Rows.append(row)
                Images.append(image)

            output = {'instances': Rows}

            ml_request = ml.projects().predict(name=modelVersionName,
                                               body=output)

            PATH_TO_LABELS = root_dir + "ml/" + str(
                machine_learning_settings.ml_compute_engine_id
            ) + "/label_map.pbtext"

            label_map_blob = bucket.blob(PATH_TO_LABELS)
            label_map_data = label_map_blob.download_as_string()

            category_index = categoryMap(session=session)

            try:
                time0 = time.time()
                response = ml_request.execute()
                time1 = time.time()
                print("Time in seconds", (time1 - time0), file=sys.stderr)

                print(response, file=sys.stderr)

                for i in range(len(Images)):
                    response = response['predictions'][i]  # First one

                    boxes = response['detection_boxes']
                    scores = response['detection_scores']
                    classes = response['detection_classes']

                    boxes = np.array(boxes)
                    scores = np.array(scores)
                    classes = np.array(classes, dtype=int)
                    print(classes, file=sys.stderr)

                    image_np = scipy.misc.imread(BytesIO(Images[i]))

                    # Handle gray scale
                    if len(image_np.shape) == 2:
                        image_np = np.stack((image_np, ) * 3, axis=2)

                    print(image_np.shape)

                    visualization_utils.visualize_boxes_and_labels_on_image_array(
                        image_np,
                        boxes,
                        classes,
                        scores,
                        category_index,
                        use_normalized_coordinates=True,
                        min_score_thresh=.3,
                        line_thickness=2)

                    blob = bucket.blob(root_dir + "test_inference_out/" +
                                       str(i) + "_.jpg")

                    temp = tempfile.mkdtemp()
                    new_temp_filename = temp + "/inference_" + str(i) + "_.jpg"
                    scipy.misc.imsave(new_temp_filename, image_np)
                    blob.upload_from_filename(new_temp_filename,
                                              content_type="image/jpg")

                min_score_thresh = .05
                for i in range(len(boxes)):
                    if scores[i] > min_score_thresh:

                        class_name = category_index[classes[i]]['name']
                        print(class_name, scores[i], file=sys.stderr)

                # TODO add pub sub messaging
                out = 'success'

            except errors.HttpError as EOFError:
                print('There was an error. Check the details:',
                      file=sys.stderr)
                print(EOFError._get_reason(), file=sys.stderr)
                out = 'failed'

            t.cancel()

        with sessionMaker.session_scope() as session:
            task_manager_scope(session)

    t = threading.Timer(0, task_manager)
    t.daemon = True
    t.start()

    out = 'success'
    return out, 200,