def get_image_ids_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()  

        project = get_current_project(session)
        version = get_current_version(session)

        data = request.get_json(force=True)
        search_term = data.get("search_term", None)
        print(search_term, file=sys.stderr)
        if search_term is None:
            Images = session.query(Image).filter_by(version_id=version.id).order_by(Image.original_filename.desc()).limit(128)
        else: 
            search_term = "%" + search_term + "%"
            Images = session.query(Image).filter_by(version_id=version.id).filter(Image.original_filename.like(search_term)).order_by(Image.original_filename.desc()).limit(128)

        gcs = storage.Client()
        gcs = get_gcs_service_account(gcs)
        bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)

        Pre_condition_checked_images = []
        for i in Images:
            if i.soft_delete != True:
                if i.url_signed_expiry is None or i.url_signed_expiry <= time.time():
                    rebuild_secure_urls(session, project, version, i)                    

                Pre_condition_checked_images.append(i)
        
        out = jsonify(images=[i.serialize() for i in Pre_condition_checked_images])

        return out, 200, {'ContentType':'application/json'}
    def remove_duplicate_filenames_all_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        version = get_current_version(session)
        Existing_images = session.query(Image).filter_by(version_id=version.id)

        seen_file_once = []
        looked_at = 0
        soft_delete_marked = 0
        # basic greedy approach
        # If we have seen the filename mark future objects with same file name as soft delete, else add to list
        for i in Existing_images:   
            if i.original_filename in seen_file_once:
                i.soft_delete = True
                session.add(i)
                soft_delete_marked +=1
            else:
                seen_file_once.append(i.original_filename)
                looked_at += 1

            if looked_at % 100 == 0:
                print("Looked at", looked_at, "Removed", soft_delete_marked)
    
        print("Removed", soft_delete_marked, "duplicates")
        out = 'success'
        return json.dumps(out), 200, {'ContentType':'application/json'}
    def get_boxes_scope(session):

        if LoggedIn() != True:
            return defaultRedirect()

        data = request.get_json(force=True)

        if data['image_id'] is not None:
            print("current_image.id", data['image_id'], file=sys.stderr)
            image_id = data['image_id']
            boxes = session.query(Box).filter_by(image_id=image_id).order_by(Box.id.desc())
        else:
            # This could be more sophisticated ie store last image we were working with
            version = get_current_version(session)
            image = session.query(Image).filter_by(version_id=version.id).order_by(Image.id.desc()).first()
            boxes = session.query(Box).filter_by(image_id=image.id).order_by(Box.id.desc())

        labels = []
        for b in boxes:
            label = session.query(Label).filter_by(id=b.label_id)
            labels.append(label[0].serialize())

        out = jsonify(boxes=[i.serialize() for i in boxes], labels=labels)
        

        return out, 200, {'ContentType':'application/json'}
    def toggle_test_image_all_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        data = request.get_json(force=True)   # Force = true if not set as application/json' 
        images = data['images']
        print("len(images)", len(images), file=sys.stderr)

        version = get_current_version(session)

        for i in images:
            image_db = session.query(Image).filter_by(version_id=version.id, id=i['id']).first()

            if image_db.is_test_image == True:
                version.train_length += 1
                version.test_length -= 1
            else:
                version.train_length -= 1
                version.test_length += 1
              
            image_db.is_test_image = not image_db.is_test_image
            session.add(image_db)
        
        session.add(version)
        out = 'success'
        return json.dumps(out), 200, {'ContentType':'application/json'}
    def remove_duplicate_filenames_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        # May want to remove specific duplicates only
        data = request.get_json(force=True)   
        images = data['images']

        version = get_current_version(session)
        Existing_images = session.query(Image).filter_by(version_id=version.id)
    
        seen_file_once = []
        looked_at = 0
        soft_delete_marked = 0
        for i in Existing_images:
            looked_at += 1
            for j in images:
                if i.id == j['id']:
                    if i.original_filename not in seen_file_once:
                        seen_file_once.append(i.original_filename)
                    else:
                        i.soft_delete = True
                        session.add(i)
                        soft_delete_marked +=1

            if looked_at % 100 == 0:
                print("Looked at", looked_at, "Removed", soft_delete_marked)
    
        print("Removed", soft_delete_marked, "duplicates")             
        out = 'success'

        return json.dumps(out), 200, {'ContentType':'application/json'}
    def image_delete_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()


        data = request.get_json(force=True)   # Force = true if not set as application/json' 
        image = data['image']

        version = get_current_version(session)

        Existing_images = session.query(Image).filter_by(version_id=version.id)

	    # Could delete multiple if get list of images to delete....
	    # Same issue as above otherwise, could do in query by checking id ie
	    # session.query(Employer).filter_by(id=employer_id).one()

        for i in Existing_images:
            if i.id == image['id']:
                i.soft_delete = True

                if i.is_test_image is True:
                    version.test_length -= 1
                else:
                    version.train_length -= 1
                # TODO Handle updating test / train length cache without db hit
               
                session.add(i)

        out = 'success'
        return json.dumps(out), 200, {'ContentType':'application/json'}
def check_actual_model_path_name(session):
    project = get_current_project(session)
    version = get_current_version(session)
    machine_learning_settings = get_ml_settings(session=session,
                                                version=version)

    root_dir = "gs://" + settings.CLOUD_STORAGE_BUCKET + "/" + str(
        project.id) + "/" + str(version.id) + "/ml/" + str(
            machine_learning_settings.ml_compute_engine_id) + "/"
    previous_goal_iterations = machine_learning_settings.previous_goal_iterations
    model_name_ranges = [
        i for i in range(previous_goal_iterations -
                         1, previous_goal_iterations + 5)
    ]
    for i in model_name_ranges:
        MODEL_NAME = 'model.ckpt-' + str(i) + '.index'
        trained_checkpoint_prefix = str(project.id) + "/" + str(
            version.id) + "/ml/" + str(
                machine_learning_settings.ml_compute_engine_id
            ) + "/train/" + MODEL_NAME
        blob = bucket.blob(trained_checkpoint_prefix)
        if blob.exists() == True:
            MODEL_NAME = 'model.ckpt-' + str(i)
            trained_checkpoint_prefix = root_dir + "train/" + MODEL_NAME

            # Store in DB for other functions that need it
            machine_learning_settings.trained_checkpoint_prefix = trained_checkpoint_prefix
            session.add(machine_learning_settings)
            session.commit()

            print(trained_checkpoint_prefix, file=sys.stderr)
            return trained_checkpoint_prefix
    def test_out_scope(session):    
        if LoggedIn() != True:
            return defaultRedirect()

        project = get_current_project(session)
        version = get_current_version(session)
        Images = session.query(Image).filter_by(version_id=version.id, is_test_image=True).order_by(Image.id.desc())

        gcs = storage.Client()
        gcs = get_gcs_service_account(gcs)
        bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)

        Public_urls = []
        expiration_time = int(time.time() + 300)

        file = str(project.id)+"/"+str(version.id)+"/"+ "test_inference_out/"+ "0_.jpg"
        blob = bucket.blob(file)
        public_url = blob.generate_signed_url(expiration=expiration_time)
        Public_urls.append(public_url)

        out = {}
        out['image_ids'] = [i.id for i in Images]
        out['width'] = [i.width for i in Images]
        out['height'] = [i.height for i in Images]
        out['public_url'] = [i for i in Public_urls]

        return json.dumps(out), 200, {'ContentType':'application/json'}
def runTraining(session):
    if LoggedIn() != True:
        return defaultRedirect()

    have_error, params = training_pre_conditions(session)
    if have_error:
        print("have error", params, file=sys.stderr)
        return json.dumps(params), 200, {'ContentType':'application/json'}

    # TODO Thinking on reasonable way to "copy" a version and track changes

    project = get_current_project(session)
    version = get_current_version(session)
    machine_learning_settings = get_ml_settings(session=session, version=version)

    JOB_NAME = "train_" + machine_learning_settings.JOB_NAME
    print(JOB_NAME, file=sys.stderr)

    REGION="us-central1"
    RUNTIME_VERSION="1.2"

    root_dir = "gs://" + settings.CLOUD_STORAGE_BUCKET + "/" + str(project.id) + "/" + str(version.id) + "/ml/" + str(machine_learning_settings.ml_compute_engine_id) + "/"
    JOB_DIR = root_dir + "train"
    pipeline_config_path = root_dir + "faster_rcnn_resnet.config"

    MAIN_TRAINER_MODULE='object_detection.train'

    training_inputs = {'scaleTier': 'CUSTOM',
	    'masterType': 'standard_gpu',
	    'workerType': 'standard_gpu',
	    'parameterServerType': 'standard_gpu',
	    'workerCount': 2,
	    'parameterServerCount': 1,
	    'packageUris': ['gs://' + settings.CLOUD_STORAGE_BUCKET + '/' + settings.LIB_OBJECT_DETECTION_PYTHON,
					    'gs://' + settings.CLOUD_STORAGE_BUCKET + '/' + settings.LIB_SLIM_PYTHON ],
	    'pythonModule': MAIN_TRAINER_MODULE,
	    'args': ['--train_dir', JOB_DIR, 
				    '--pipeline_config_path', pipeline_config_path],
	    'region': REGION,
	    'jobDir': JOB_DIR,
	    'runtimeVersion': RUNTIME_VERSION }

    job_spec = {'jobId': JOB_NAME, 'trainingInput': training_inputs}

    request = ml.projects().jobs().create(body=job_spec, parent=projectID)

    try:
        response = request.execute()
        print(response, file=sys.stderr)
        out = 'success'
        return out, 200, {'ContentType':'application/json'}

    except errors.HttpError as EOFError:
        print('There was an error. Check the details:', file=sys.stderr)
        print(EOFError._get_reason(), file=sys.stderr)
        out = 'failed'
        return out, 500, {'ContentType':'application/json'}

    return "success", 200
    def version_view_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        version = get_current_version(session)
        out = jsonify(version=version.serialize())

        return out, 200, {'ContentType':'application/json'}
Example #11
0
        def task_manager_scope(session):
            counter = 0
            project = get_current_project(session)
            version = get_current_version(session)
            project_id = project.id
            version_id = version.id
            out = ""
            with open(name, "rb") as file:              
                if extension == ".zip":
                    try:                        
                        zip_ref = zipfile.ZipFile(BytesIO(file.read()), 'r')
                        temp_dir = tempfile.mkdtemp()
                        zip_ref.extractall(temp_dir)
                        zip_ref.close()

                        filenames = sorted(os.listdir(temp_dir))
                        len_filenames = len(filenames)  # Variable used in loop below so storing here
                        print("[ZIP processor] Found", len_filenames, file=sys.stderr)

                        Thread_session = sessionMaker.scoppedSession()  # Threadsafe

                        for filename in filenames:

                            t_2 = threading.Timer(0, multi_thread_task_manager, 
                                                args=(temp_dir, filename, Thread_session,
                                                      project_id, version_id))
                            t_2.start()

                            # Slow down new threads if too many open
                            len_threads = len(threading.enumerate())
                            if len_threads > settings.MAX_UPLOAD_THREADS:
                                time.sleep(settings.MAX_UPLOAD_THREADS * 25)          
                            if len_threads > settings.TARGET_UPLOAD_THREADS:
                                time.sleep(settings.TARGET_UPLOAD_THREADS * 5)

                            counter += 1
                            if counter % 10 == 0:
                                print("[ZIP processor]", (counter / len(filenames) ) * 100, "% done." , file=sys.stderr)

                        Thread_session.remove()

                    except zipfile.BadZipFile:
                        out = {"files": [{"name": "Error bad zip file"}]}
        
                else:
                    content_type = "image/" + str(extension)
                    file_name = os.path.split(file.name)[1]

                    out = process_one_image_file(file=file, name=file_name, 
                                        content_type=content_type, extension=extension, 
                                        session=session, project_id=project_id, version_id=version_id)

            out = {"files": [{"name": "Processed files"}]}
            print(out, file=sys.stderr)
            t.cancel()
def trainingFrozenRun(session):

    if LoggedIn() != True:
        return defaultRedirect()

    project = get_current_project(session)
    version = get_current_version(session)
    machine_learning_settings = get_ml_settings(session=session, version=version)

    #now=strftime("%Y_%m_%d_%H_%M_%S", gmtime())
    JOB_NAME = "frozen_user_" + machine_learning_settings.JOB_NAME
    print(JOB_NAME, file=sys.stderr)

    root_dir = "gs://" + settings.CLOUD_STORAGE_BUCKET + "/" + str(project.id) + "/" + str(version.id) + "/ml/" + str(machine_learning_settings.ml_compute_engine_id) + "/"
    JOB_DIR = root_dir + str(machine_learning_settings.re_train_id) + "/frozen"
    REGION ="us-central1"
    RUNTIME_VERSION ="1.2"
 
    # Should be updated during training and store in db?
    trained_checkpoint_prefix = configNew.check_actual_model_path_name(session=session)

    pipeline_config_path = root_dir + "faster_rcnn_resnet.config"
    MAIN_TRAINER_MODULE ="object_detection.export_inference_graph"

    training_inputs = {'scaleTier': 'CUSTOM',
	    'masterType': 'large_model',
	    'workerCount': 0,
	    'packageUris': ['gs://' + settings.CLOUD_STORAGE_BUCKET + '/' + settings.LIB_OBJECT_DETECTION_PYTHON,
					    'gs://' + settings.CLOUD_STORAGE_BUCKET + '/' + settings.LIB_SLIM_PYTHON ],
	    'pythonModule': MAIN_TRAINER_MODULE,
	    'args': ['--trained_checkpoint_prefix', trained_checkpoint_prefix, 
				    '--pipeline_config_path', pipeline_config_path,
				    '--input_type', 'encoded_image_string_tensor',
				    '--output_directory', JOB_DIR],
	    'region': REGION,
	    'jobDir': JOB_DIR,
	    'runtimeVersion': RUNTIME_VERSION }

    job_spec = {'jobId': JOB_NAME, 'trainingInput': training_inputs}

    request = ml.projects().jobs().create(body=job_spec, parent=projectID)

    try:
        response = request.execute()
        print(response, file=sys.stderr)
        out = 'success'
        return out, 200, {'ContentType':'application/json'}
    except errors.HttpError as EOFError:
        print('There was an error. Check the details:', file=sys.stderr)
        print(EOFError._get_reason(), file=sys.stderr)
        out = 'failed'
        return out, 200, {'ContentType':'application/json'}

    return "Success", 200
def machine_learning_settings_edit(session, next_id=False):

    if LoggedIn() != True:
        return defaultRedirect()

    project = get_current_project(session)
    version = get_current_version(session)
    machine_learning_settings = get_ml_settings(session, version)

    if next_id is True: 
        machine_learning_settings.ml_compute_engine_id += 1

    session.add(machine_learning_settings)
    session.commit()

    return "Success", 200
def training_pre_conditions(session):
    
    project = get_current_project(session)
    version = get_current_version(session)
    
    params = {}
    have_error = False
    if project.train_credits <= 0:
        params['train_credits'] = "Out of train credits"
        have_error = True
        session.close()
    else:
        project.train_credits -= 1
        session.add(project)
        session.commit()
        
    return have_error, params
    def toggle_done_labeling_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        data = request.get_json(force=True)   # Force = true if not set as application/json' 
        image = data['image']

        version = get_current_version(session)
        Existing_images = session.query(Image).filter_by(version_id=version.id)

        for i in Existing_images:
            if i.id == image['id']:
                i.done_labeling = not i.done_labeling
                session.add(i)

        out = 'success'
        return json.dumps(out), 200, {'ContentType':'application/json'}
def runNewVersion(session):

    if LoggedIn() != True:
        return defaultRedirect()

    project = get_current_project(session)
    version = get_current_version(session)
    machine_learning_settings = get_ml_settings(session=session, version=version)
    project_root = "a_" + str(project.id)

    # Creating version
    modelID= '{}/models/{}'.format(projectID, project_root)
    versionName = "a_" + str(version.id) + "_" + str(machine_learning_settings.ml_compute_engine_id)
    versionName += "_" + str(machine_learning_settings.re_train_id)

    # Maybe could include more info like date time?
    versionDescription = 'created by runNewVersion()'
    root_dir = "gs://" + settings.CLOUD_STORAGE_BUCKET + "/" + str(project.id) + "/" + str(version.id)
    root_dir += "/ml/" + str(machine_learning_settings.ml_compute_engine_id) + "/" + str(machine_learning_settings.re_train_id) + "/"
    JOB_DIR = root_dir + "frozen/saved_model"

    requestDict = {'name': versionName,
                    'description': versionDescription,
                    'deploymentUri': JOB_DIR,
                    'runtimeVersion': '1.2'}

    request = ml.projects().models().versions().create(
        parent=modelID, body=requestDict)

    try:
        response = request.execute()
        print(response, file=sys.stderr)

        operationID = response['name']

        out = 'success'
        return out, 200, {'ContentType':'application/json'}
    except errors.HttpError as EOFError:
        # Something went wrong, print out some information.
        print('There was an error. Check the details:', file=sys.stderr)
        print(EOFError._get_reason(), file=sys.stderr)
        out = 'failed'
        return out, 200, {'ContentType':'application/json'}
    
    return out, 200, {'ContentType':'application/json'}
Example #17
0
def categoryMap(session):

    project = get_current_project(session=session)
    version = get_current_version(session=session)
    ml_settings = get_ml_settings(session=session, version=version)
    Labels_db = session.query(Label).filter_by(project_id=project.id).order_by(
        Label.id.desc())

    Images = session.query(Image).filter_by(version_id=version.id)

    Labels = []

    for i in Labels_db:
        if i.soft_delete != True:
            Labels.append(i)

    Labels_unique = set(Labels)

    Labels.sort(key=lambda x: x.id)
    label_dict = {}
    start_at_1_label = 1
    lowest_label = 0
    for label in Labels:
        if label.id > lowest_label:
            label_dict[label.id] = start_at_1_label
            start_at_1_label += 1
            lowest_label = label.id

    project_str = str(project.id) + "/" + str(version.id) + "/ml/" + str(
        ml_settings.ml_compute_engine_id)
    project_str += "/label_map.pbtext"

    categoryMap = {}
    for i, c in enumerate(Labels_unique):
        name = str(c.name)
        id = int(label_dict[int(c.id)])

        dict = {'id': int(i + 1), 'name': name}
        categoryMap[id] = dict

    return categoryMap
def machine_learning_settings_new(session):

    if LoggedIn() != True:
        return defaultRedirect()

    project = get_current_project(session)
    version = get_current_version(session)

    iterations = 2000 # Could get from user
    previous_goal_iterations = iterations
    ml_compute_engine_id = 0

    new_ml_settings = Machine_learning_settings(iterations=iterations,
                                                previous_goal_iterations=previous_goal_iterations,
                                                ml_compute_engine_id=ml_compute_engine_id)
    session.add(new_ml_settings)
    session.commit()

    version.machine_learning_settings_id = new_ml_settings.id
    session.add(version)
    session.commit()

    return "Success", 200
Example #19
0
        def task_manager_scope(session):
            credentials = GoogleCredentials.get_application_default()
            ml = discovery.build('ml', 'v1', credentials=credentials)
            projectID = 'projects/{}'.format(settings.GOOGLE_PROJECT_NAME)

            project = get_current_project(session=session)
            version = get_current_version(session=session)
            machine_learning_settings = get_ml_settings(session=session,
                                                        version=version)

            Images_db = session.query(Image).filter_by(version_id=version.id,
                                                       is_test_image=True)

            REGION = "us-central1"
            RUNTIME_VERSION = "1.2"

            modelName = "a_" + str(project.id)
            versionName = "a_" + str(version.id) + "_" + str(
                machine_learning_settings.ml_compute_engine_id)
            versionName += "_" + str(machine_learning_settings.re_train_id)
            modelVersionName = '{}/models/{}/versions/{}'.format(
                projectID, modelName, versionName)

            gcs = storage.Client()
            bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)
            filenames = []

            root_dir = str(project.id) + "/" + str(version.id) + "/"
            for image in Images_db:
                #print(image.is_test_image, file=sys.stderr)
                if image.soft_delete != True:
                    filenames.append(root_dir + "images/" + str(image.id))
                    break

            Rows = []
            Images = []
            print("len(filenames):", len(filenames), file=sys.stderr)

            for file in filenames:
                blob = bucket.blob(file)
                image = blob.download_as_string()

                # Resize
                image = scipy.misc.imread(BytesIO(image))
                if image is None:
                    raise IOError("Could not open")

                # TODO BETTER WAY
                #image = scipy.misc.imresize(image, (640, 960))
                temp = tempfile.mkdtemp()
                new_temp_filename = temp + "/resized.jpg"
                scipy.misc.imsave(new_temp_filename, image)

                # Otherwise have strange byte issues
                blob = bucket.blob(file + "_test_resized")
                blob.upload_from_filename(new_temp_filename,
                                          content_type="image/jpg")
                image = blob.download_as_string()

                encoded_contents = base64.b64encode(image).decode('UTF-8')
                row = {'b64': encoded_contents}
                Rows.append(row)
                Images.append(image)

            output = {'instances': Rows}

            ml_request = ml.projects().predict(name=modelVersionName,
                                               body=output)

            PATH_TO_LABELS = root_dir + "ml/" + str(
                machine_learning_settings.ml_compute_engine_id
            ) + "/label_map.pbtext"

            label_map_blob = bucket.blob(PATH_TO_LABELS)
            label_map_data = label_map_blob.download_as_string()

            category_index = categoryMap(session=session)

            try:
                time0 = time.time()
                response = ml_request.execute()
                time1 = time.time()
                print("Time in seconds", (time1 - time0), file=sys.stderr)

                print(response, file=sys.stderr)

                for i in range(len(Images)):
                    response = response['predictions'][i]  # First one

                    boxes = response['detection_boxes']
                    scores = response['detection_scores']
                    classes = response['detection_classes']

                    boxes = np.array(boxes)
                    scores = np.array(scores)
                    classes = np.array(classes, dtype=int)
                    print(classes, file=sys.stderr)

                    image_np = scipy.misc.imread(BytesIO(Images[i]))

                    # Handle gray scale
                    if len(image_np.shape) == 2:
                        image_np = np.stack((image_np, ) * 3, axis=2)

                    print(image_np.shape)

                    visualization_utils.visualize_boxes_and_labels_on_image_array(
                        image_np,
                        boxes,
                        classes,
                        scores,
                        category_index,
                        use_normalized_coordinates=True,
                        min_score_thresh=.3,
                        line_thickness=2)

                    blob = bucket.blob(root_dir + "test_inference_out/" +
                                       str(i) + "_.jpg")

                    temp = tempfile.mkdtemp()
                    new_temp_filename = temp + "/inference_" + str(i) + "_.jpg"
                    scipy.misc.imsave(new_temp_filename, image_np)
                    blob.upload_from_filename(new_temp_filename,
                                              content_type="image/jpg")

                min_score_thresh = .05
                for i in range(len(boxes)):
                    if scores[i] > min_score_thresh:

                        class_name = category_index[classes[i]]['name']
                        print(class_name, scores[i], file=sys.stderr)

                # TODO add pub sub messaging
                out = 'success'

            except errors.HttpError as EOFError:
                print('There was an error. Check the details:',
                      file=sys.stderr)
                print(EOFError._get_reason(), file=sys.stderr)
                out = 'failed'

            t.cancel()
    def faster_rcnn_resnet_new_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        project = get_current_project(session)
        version = get_current_version(session)
        machine_learning_settings = get_ml_settings(session=session,
                                                    version=version)

        project_str = str(project.id) + "/" + str(version.id) + "/ml/" + str(
            machine_learning_settings.ml_compute_engine_id)
        project_str += "/faster_rcnn_resnet.config"
        # Faster R-CNN with Resnet-101 (v1)

        root_dir = "gs://" + settings.CLOUD_STORAGE_BUCKET + "/" + str(
            project.id) + "/" + str(version.id) + "/ml/" + str(
                machine_learning_settings.ml_compute_engine_id) + "/"
        num_classes_var = version.labels_number  # TODO get this automatically
        print("version.labels_number", version.labels_number, file=sys.stderr)
        min_dimension_var = 720
        max_dimension_var = 1280  # TODO get this automaticaly within limit
        first_stage_max_proposals_var = 100

        label_map_path_var = root_dir + "label_map.pbtxt"

        # Testing for multiple records?
        input_path_var = root_dir + "tfrecords_*.record"
        num_steps_var = machine_learning_settings.iterations

        # This is the shared generic starting point
        fine_tune_checkpoint_var = "gs://" + settings.CLOUD_STORAGE_BUCKET + "/" + settings.RESNET_PRE_TRAINED_MODEL
        if re_train == 1:

            machine_learning_settings.previous_goal_iterations = machine_learning_settings.iterations
            num_steps_var = machine_learning_settings.iterations + 1500
            fine_tune_checkpoint_var = check_actual_model_path_name(
                session=session)

        model = "model {"
        faster_rcnn = "\nfaster_rcnn {"
        num_classes = "\nnum_classes: " + str(num_classes_var)
        image_resizer = "\nimage_resizer { \nkeep_aspect_ratio_resizer {"
        min_dimension = "\nmin_dimension: " + str(min_dimension_var)
        max_dimension = "\nmax_dimension: " + str(
            max_dimension_var) + "\n} \n}"

        feature_extractor = "\nfeature_extractor { \n type: 'faster_rcnn_resnet101' "
        first_stage_features_stride = "\nfirst_stage_features_stride: 16 \n } "
        first_stage_anchor_generator = """first_stage_anchor_generator \n{ \ngrid_anchor_generator 
        { \nscales: [0.25, 0.5, 1.0, 2.0] \naspect_ratios: [0.5, 1.0, 2.0] \nheight_stride: 16 \n
        width_stride: 16 \n } \n } \n"""

        first_stage_box_predictor_conv_hyperparams = """
        first_stage_box_predictor_conv_hyperparams {
        op: CONV
        regularizer {
        l2_regularizer {
        weight: 0.0
        }
        }
        initializer {
        truncated_normal_initializer {
        stddev: 0.01
        }
        }
        }
        first_stage_nms_score_threshold: 0.0
        first_stage_nms_iou_threshold: 0.7
        first_stage_localization_loss_weight: 2.0
        first_stage_objectness_loss_weight: 1.0
        initial_crop_size: 14
        maxpool_kernel_size: 2
        maxpool_stride: 2
        """

        first_stage_max_proposals = "\nfirst_stage_max_proposals:" + str(
            first_stage_max_proposals_var)

        second_stage_box_predictor = """
        second_stage_box_predictor {
        mask_rcnn_box_predictor {
        use_dropout: false
        dropout_keep_probability: 1.0
        fc_hyperparams {
        op: FC
        regularizer {
        l2_regularizer {
        weight: 0.0
        }
        }
        initializer {
        variance_scaling_initializer {
        factor: 1.0
        uniform: true
        mode: FAN_AVG
        }
        }
        }
        }
        }
        """
        second_stage_post_processing = """
        second_stage_post_processing {
        batch_non_max_suppression {
        score_threshold: 0.0
        iou_threshold: 0.6
        max_detections_per_class: 100
        """
        max_total_detections = "max_total_detections:" + str(
            first_stage_max_proposals_var) + "\n}"

        score_converter = """
        score_converter: SOFTMAX
        }
        second_stage_localization_loss_weight: 2.0
        second_stage_classification_loss_weight: 1.0
        """
        second_stage_batch_size = "\nsecond_stage_batch_size: " + str(
            first_stage_max_proposals_var) + "\n }\n }\n"

        train_config = """
        train_config: {
        batch_size: 1
        optimizer {
        momentum_optimizer: {
        learning_rate: {
        manual_step_learning_rate {
        initial_learning_rate: 0.0003
        schedule {
        step: 0
        learning_rate: .0003
        }
        schedule {
        step: 900000
        learning_rate: .00003
        }
        schedule {
        step: 1200000
        learning_rate: .000003
        }
        }
        }
        momentum_optimizer_value: 0.9
        }
        use_moving_average: false
        }
        gradient_clipping_by_norm: 10.0
        """
        fine_tune_checkpoint = "\nfine_tune_checkpoint: '" + str(
            fine_tune_checkpoint_var) + "'"

        from_detection_checkpoint = "\nfrom_detection_checkpoint: true"

        num_steps = "\nnum_steps: " + str(num_steps_var)

        data_augmentation_options = """
        data_augmentation_options {
        random_horizontal_flip {
        }
        }
        }
        """

        train_input_reader = """
        train_input_reader: {
        tf_record_input_reader {
        """

        input_path = "\ninput_path: '" + str(input_path_var) + "' \n}"
        label_map_path = "\nlabel_map_path: '" + str(
            label_map_path_var) + "'\n}"

        config_file_a = model + faster_rcnn + num_classes + image_resizer + min_dimension + max_dimension + feature_extractor + first_stage_features_stride + first_stage_anchor_generator
        config_file_b = first_stage_box_predictor_conv_hyperparams + first_stage_max_proposals + second_stage_box_predictor + second_stage_post_processing + max_total_detections
        config_file_c = score_converter + second_stage_batch_size + train_config + fine_tune_checkpoint + from_detection_checkpoint + num_steps + data_augmentation_options + train_input_reader + input_path + label_map_path

        config_file = config_file_a + config_file_b + config_file_c

        gcs = storage.Client()
        bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)
        blob = bucket.blob(project_str)
        blob.upload_from_string(config_file, content_type='text/config')

        print("Built Config", file=sys.stderr)
        out = 'success'

        return out, 200, {'ContentType': 'application/json'}
Example #21
0
    def label_map_new_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        project = get_current_project(session)
        version = get_current_version(session)
        ml_settings = get_ml_settings(session=session, version=version)
        Images = session.query(Image).filter_by(version_id=version.id)

        Labels = []

        # TO DO Refactor ie maintain a cache all label ids used in a version
        # Would need to store that cache per version
        # And update / delete it as labels are changed  OR Collect at YAML stage

        labels = session.query(Label).filter_by(project_id=project.id)
        for i in labels:
            if i.soft_delete != True:
                Labels.append(i)

        # Map db ids to id s staring with 123
        Labels.sort(key=lambda x: x.id)
        label_dict = {}
        start_at_1_label = 1
        lowest_label = 0
        for label in Labels:
            if label.id > lowest_label:
                label_dict[label.id] = start_at_1_label
                start_at_1_label += 1
                lowest_label = label.id

        print("label_dict length", len(label_dict), file=sys.stderr)

        project_str = str(project.id) + "/" + str(version.id) + "/ml/" + str(
            ml_settings.ml_compute_engine_id)
        project_str += "/label_map.pbtext"

        file = ""

        Labels_unique = set(Labels)

        len_labels = len(Labels_unique)

        version.labels_number = len_labels
        session.add(version)
        session.commit()

        for i, c in enumerate(Labels_unique):
            new = "\nitem {"
            id = "\nid: " + str(label_dict[c.id])
            name = "\nname: " + str(c.name) + "\n }\n"

            file += new + id + name

        gcs = storage.Client()
        gcs = get_gcs_service_account(gcs)
        bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)
        blob = bucket.blob(project_str)
        blob.upload_from_string(file, content_type='text/pbtext')

        print("Built label_map", file=sys.stderr)
        out = get_secure_link(blob)

        return out, 200, {'ContentType': 'application/json'}
Example #22
0
        def task_manager_scope(session):
            project = get_current_project(session)
            version = get_current_version(session)
            machine_learning_settings = get_ml_settings(session=session,
                                                        version=version)

            Images = session.query(Image).filter_by(
                version_id=version.id).order_by(Image.id.desc())

            annotations_list = []
            len_images = Images.count()
            counter = 0
            for image in Images:

                # TODO maybe better to do in database?
                if image.soft_delete != True and image.is_test_image != True and image.done_labeling == True:

                    boxes = session.query(Box).filter_by(
                        image_id=image.id).order_by(Box.id.desc()).limit(100)

                    box_dict_list = []
                    for box in boxes:

                        label = session.query(Label).filter_by(
                            id=box.label_id).one()
                        if label is None:
                            print("Label is none", file=sys.stderr)

                        box_dict_list.append({
                            'label_id': label.id,
                            'label_name': label.name,
                            'x_min': box.x_min,
                            'x_max': box.x_max,
                            'y_min': box.y_min,
                            'y_max': box.y_max
                        })

                    image_dict = {
                        'image': {
                            'image_id': image.id,
                            'image_width': image.width,
                            'image_height': image.height,
                            'original_filename': image.original_filename
                        }
                    }

                    boxes_dict = {'boxes': box_dict_list}
                    annotations_list.append(
                        {'annotations': [image_dict, boxes_dict]})

                if counter % 10 == 0:
                    print("Percent done", (counter / len_images) * 100,
                          file=sys.stderr)
                counter += 1

            print("annotations_list len",
                  len(annotations_list),
                  file=sys.stderr)
            yaml_data = yaml.dump(annotations_list, default_flow_style=False)

            gcs = storage.Client()
            gcs = get_gcs_service_account(gcs)
            bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)

            project_str = str(project.id) + "/" + str(
                version.id) + "/ml/" + str(
                    machine_learning_settings.ml_compute_engine_id
                ) + "/annotations.yaml"

            blob = bucket.blob(project_str)
            blob.upload_from_string(yaml_data, content_type='text/yaml')

            print("Built YAML, link below", file=sys.stderr)

            link = get_secure_link(blob)
            print(link, file=sys.stderr)

            t.cancel()
Example #23
0
        def task_manager_scope(session):
            project = get_current_project(session)
            version = get_current_version(session)
            ml_settings = get_ml_settings(session=session, version=version)

            project_str = str(project.id) + "/" + str(version.id) + "/"

            gcs = storage.Client()
            gcs = get_gcs_service_account(gcs)
            bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)
            blob = bucket.blob(project_str + "ml/" +
                               str(ml_settings.ml_compute_engine_id) +
                               "/tfrecords_0.record")
            INPUT_YAML = project_str + "ml/" + str(
                ml_settings.ml_compute_engine_id) + "/annotations.yaml"
            yaml_blob = bucket.blob(INPUT_YAML)

            yaml_bytes = yaml_blob.download_as_string()
            examples = yaml.load(yaml_bytes)

            len_examples = len(examples)
            print("Loaded ", len(examples), "examples", file=sys.stderr)

            images_dir = project_str + "images/"
            for i in range(len(examples)):
                examples[i]['annotations'][0]['image'][
                    'image_id'] = images_dir + str(
                        examples[i]['annotations'][0]['image']['image_id'])

            counter = 0
            all_examples = []

            # Reassign db ids to be 1 2 3  etc for tensorflow
            # TODO this is terrible surely better way to do this
            Labels = []
            labels = session.query(Label).filter_by(project_id=project.id)
            for i in labels:
                if i.soft_delete != True:
                    Labels.append(i)
            Labels_unique = set(Labels)
            Labels.sort(key=lambda x: x.id)
            label_dict = {}
            start_at_1_label = 1
            lowest_label = 0
            for label in Labels:
                if label.id > lowest_label:
                    label_dict[label.id] = start_at_1_label
                    start_at_1_label += 1
                    lowest_label = label.id

            print("label_dict length", len(label_dict), file=sys.stderr)

            temp = tempfile.NamedTemporaryFile()
            writer = tf.python_io.TFRecordWriter(str(temp.name))

            for example in examples:

                tf_example = create_tf_example(example['annotations'],
                                               label_dict)
                writer.write(tf_example.SerializeToString())

                if counter % 2 == 0:
                    print("Percent done", (counter / len_examples) * 100)
                counter += 1

            writer.close()

            blob.upload_from_file(temp, content_type='text/record')
            temp.close()

            link = get_secure_link(blob)
            print(blob.name, file=sys.stderr)
            print("Built TF records", file=sys.stderr)
            t.cancel()
        def task_manager_scope(session):
            print("[Training task manager] Started. Retrain_flag:", re_train,  file=sys.stderr)
            session = sessionMaker.scoppedSession() # Threadsafe

            # Maybe better to have this somewhere else
            version = get_current_version(session=session)
            if version.machine_learning_settings_id is None:
                ml_settings.machine_learning_settings_new(session=session)

            # Advance one for training if not retraining
            if re_train == 0:
                ml_settings.machine_learning_settings_edit(session=session, next_id=True)

            project = get_current_project(session=session)

            machine_learning_settings = get_ml_settings(session=session, version=version)

            JOB_NAME = "__projectID_" + str(project.id) + "__versionID_" + str(version.id) + "__ml_compute_id_" + str(machine_learning_settings.ml_compute_engine_id)

            if re_train == 1:
                machine_learning_settings.re_train_id += 1
                JOB_NAME += "__retrainID_" + str(machine_learning_settings.re_train_id)
        
            machine_learning_settings.JOB_NAME = JOB_NAME
            session.add(machine_learning_settings)
            session.commit()

            # Do YAML for retraining
            # TODO way to detect if this is needed or not...
            yamlNew(hold_thread=True)

            labelMapNew()
            fasterRcnnResnetNew(re_train=re_train)  # Config file

            tfrecordsNew(hold_thread=True)

            ### TRAINING
            runTraining(session)
            
            config = {}
            config['PUBSSUB_TOPIC'] = settings.PUB_SUB_TOPIC
            config['PROJECT'] = settings.GOOGLE_PROJECT_NAME
            publisher = pubsub_v1.PublisherClient()
            topic_path = publisher.topic_path(config['PROJECT'], config['PUBSSUB_TOPIC'])
            JOB_NAME = "train_" + machine_learning_settings.JOB_NAME
            JOB_NAME_FORMATTED = projectID + "/jobs/" + JOB_NAME

            training_flag = True
            while training_flag is True:
                
                request = ml.projects().jobs().get(name=JOB_NAME_FORMATTED)
                # TODO error handling
                response = request.execute()
                
                data = json.dumps(response)
                print(data, file=sys.stderr)
                data = data.encode()
                publisher.publish(topic_path, data=data)

                a = response['state']
                if a == "SUCCEEDED" or a == "FAILED" or a =="CANCELLED":
                    training_flag = False
                else:
                    time.sleep(30)
            
            #### END TRAINING

            # Now need to run new model on re training
            if re_train == 0:
                runNewModel(session)

            ##### FROZEN
            trainingFrozenRun(session)

            JOB_NAME = "frozen_user_" + machine_learning_settings.JOB_NAME
            JOB_NAME_FORMATTED = projectID + "/jobs/" + JOB_NAME

            frozen_flag = True
            while frozen_flag is True:
                
                request = ml.projects().jobs().get(name=JOB_NAME_FORMATTED)

                # TODO error handling
                response = request.execute()

                data = json.dumps(response)
                print(data, file=sys.stderr)
                data = data.encode()

                publisher.publish(topic_path, data=data)

                a = response['state']
                if a == "SUCCEEDED" or a == "FAILED" or a =="CANCELLED":
                    frozen_flag = False
                else:
                    time.sleep(30)

            
            #####
            runNewVersion(session)
            time.sleep(60*8)  # Sleep while long running operation
            runInferenceSingle()

            print("[Training task manager] SUCCESS", file=sys.stderr)
            t.cancel()