def check_actual_model_path_name(session):
    project = get_current_project(session)
    version = get_current_version(session)
    machine_learning_settings = get_ml_settings(session=session,
                                                version=version)

    root_dir = "gs://" + settings.CLOUD_STORAGE_BUCKET + "/" + str(
        project.id) + "/" + str(version.id) + "/ml/" + str(
            machine_learning_settings.ml_compute_engine_id) + "/"
    previous_goal_iterations = machine_learning_settings.previous_goal_iterations
    model_name_ranges = [
        i for i in range(previous_goal_iterations -
                         1, previous_goal_iterations + 5)
    ]
    for i in model_name_ranges:
        MODEL_NAME = 'model.ckpt-' + str(i) + '.index'
        trained_checkpoint_prefix = str(project.id) + "/" + str(
            version.id) + "/ml/" + str(
                machine_learning_settings.ml_compute_engine_id
            ) + "/train/" + MODEL_NAME
        blob = bucket.blob(trained_checkpoint_prefix)
        if blob.exists() == True:
            MODEL_NAME = 'model.ckpt-' + str(i)
            trained_checkpoint_prefix = root_dir + "train/" + MODEL_NAME

            # Store in DB for other functions that need it
            machine_learning_settings.trained_checkpoint_prefix = trained_checkpoint_prefix
            session.add(machine_learning_settings)
            session.commit()

            print(trained_checkpoint_prefix, file=sys.stderr)
            return trained_checkpoint_prefix
def runTraining(session):
    if LoggedIn() != True:
        return defaultRedirect()

    have_error, params = training_pre_conditions(session)
    if have_error:
        print("have error", params, file=sys.stderr)
        return json.dumps(params), 200, {'ContentType':'application/json'}

    # TODO Thinking on reasonable way to "copy" a version and track changes

    project = get_current_project(session)
    version = get_current_version(session)
    machine_learning_settings = get_ml_settings(session=session, version=version)

    JOB_NAME = "train_" + machine_learning_settings.JOB_NAME
    print(JOB_NAME, file=sys.stderr)

    REGION="us-central1"
    RUNTIME_VERSION="1.2"

    root_dir = "gs://" + settings.CLOUD_STORAGE_BUCKET + "/" + str(project.id) + "/" + str(version.id) + "/ml/" + str(machine_learning_settings.ml_compute_engine_id) + "/"
    JOB_DIR = root_dir + "train"
    pipeline_config_path = root_dir + "faster_rcnn_resnet.config"

    MAIN_TRAINER_MODULE='object_detection.train'

    training_inputs = {'scaleTier': 'CUSTOM',
	    'masterType': 'standard_gpu',
	    'workerType': 'standard_gpu',
	    'parameterServerType': 'standard_gpu',
	    'workerCount': 2,
	    'parameterServerCount': 1,
	    'packageUris': ['gs://' + settings.CLOUD_STORAGE_BUCKET + '/' + settings.LIB_OBJECT_DETECTION_PYTHON,
					    'gs://' + settings.CLOUD_STORAGE_BUCKET + '/' + settings.LIB_SLIM_PYTHON ],
	    'pythonModule': MAIN_TRAINER_MODULE,
	    'args': ['--train_dir', JOB_DIR, 
				    '--pipeline_config_path', pipeline_config_path],
	    'region': REGION,
	    'jobDir': JOB_DIR,
	    'runtimeVersion': RUNTIME_VERSION }

    job_spec = {'jobId': JOB_NAME, 'trainingInput': training_inputs}

    request = ml.projects().jobs().create(body=job_spec, parent=projectID)

    try:
        response = request.execute()
        print(response, file=sys.stderr)
        out = 'success'
        return out, 200, {'ContentType':'application/json'}

    except errors.HttpError as EOFError:
        print('There was an error. Check the details:', file=sys.stderr)
        print(EOFError._get_reason(), file=sys.stderr)
        out = 'failed'
        return out, 500, {'ContentType':'application/json'}

    return "success", 200
def trainingFrozenRun(session):

    if LoggedIn() != True:
        return defaultRedirect()

    project = get_current_project(session)
    version = get_current_version(session)
    machine_learning_settings = get_ml_settings(session=session, version=version)

    #now=strftime("%Y_%m_%d_%H_%M_%S", gmtime())
    JOB_NAME = "frozen_user_" + machine_learning_settings.JOB_NAME
    print(JOB_NAME, file=sys.stderr)

    root_dir = "gs://" + settings.CLOUD_STORAGE_BUCKET + "/" + str(project.id) + "/" + str(version.id) + "/ml/" + str(machine_learning_settings.ml_compute_engine_id) + "/"
    JOB_DIR = root_dir + str(machine_learning_settings.re_train_id) + "/frozen"
    REGION ="us-central1"
    RUNTIME_VERSION ="1.2"
 
    # Should be updated during training and store in db?
    trained_checkpoint_prefix = configNew.check_actual_model_path_name(session=session)

    pipeline_config_path = root_dir + "faster_rcnn_resnet.config"
    MAIN_TRAINER_MODULE ="object_detection.export_inference_graph"

    training_inputs = {'scaleTier': 'CUSTOM',
	    'masterType': 'large_model',
	    'workerCount': 0,
	    'packageUris': ['gs://' + settings.CLOUD_STORAGE_BUCKET + '/' + settings.LIB_OBJECT_DETECTION_PYTHON,
					    'gs://' + settings.CLOUD_STORAGE_BUCKET + '/' + settings.LIB_SLIM_PYTHON ],
	    'pythonModule': MAIN_TRAINER_MODULE,
	    'args': ['--trained_checkpoint_prefix', trained_checkpoint_prefix, 
				    '--pipeline_config_path', pipeline_config_path,
				    '--input_type', 'encoded_image_string_tensor',
				    '--output_directory', JOB_DIR],
	    'region': REGION,
	    'jobDir': JOB_DIR,
	    'runtimeVersion': RUNTIME_VERSION }

    job_spec = {'jobId': JOB_NAME, 'trainingInput': training_inputs}

    request = ml.projects().jobs().create(body=job_spec, parent=projectID)

    try:
        response = request.execute()
        print(response, file=sys.stderr)
        out = 'success'
        return out, 200, {'ContentType':'application/json'}
    except errors.HttpError as EOFError:
        print('There was an error. Check the details:', file=sys.stderr)
        print(EOFError._get_reason(), file=sys.stderr)
        out = 'failed'
        return out, 200, {'ContentType':'application/json'}

    return "Success", 200
def machine_learning_settings_edit(session, next_id=False):

    if LoggedIn() != True:
        return defaultRedirect()

    project = get_current_project(session)
    version = get_current_version(session)
    machine_learning_settings = get_ml_settings(session, version)

    if next_id is True: 
        machine_learning_settings.ml_compute_engine_id += 1

    session.add(machine_learning_settings)
    session.commit()

    return "Success", 200
def runNewVersion(session):

    if LoggedIn() != True:
        return defaultRedirect()

    project = get_current_project(session)
    version = get_current_version(session)
    machine_learning_settings = get_ml_settings(session=session, version=version)
    project_root = "a_" + str(project.id)

    # Creating version
    modelID= '{}/models/{}'.format(projectID, project_root)
    versionName = "a_" + str(version.id) + "_" + str(machine_learning_settings.ml_compute_engine_id)
    versionName += "_" + str(machine_learning_settings.re_train_id)

    # Maybe could include more info like date time?
    versionDescription = 'created by runNewVersion()'
    root_dir = "gs://" + settings.CLOUD_STORAGE_BUCKET + "/" + str(project.id) + "/" + str(version.id)
    root_dir += "/ml/" + str(machine_learning_settings.ml_compute_engine_id) + "/" + str(machine_learning_settings.re_train_id) + "/"
    JOB_DIR = root_dir + "frozen/saved_model"

    requestDict = {'name': versionName,
                    'description': versionDescription,
                    'deploymentUri': JOB_DIR,
                    'runtimeVersion': '1.2'}

    request = ml.projects().models().versions().create(
        parent=modelID, body=requestDict)

    try:
        response = request.execute()
        print(response, file=sys.stderr)

        operationID = response['name']

        out = 'success'
        return out, 200, {'ContentType':'application/json'}
    except errors.HttpError as EOFError:
        # Something went wrong, print out some information.
        print('There was an error. Check the details:', file=sys.stderr)
        print(EOFError._get_reason(), file=sys.stderr)
        out = 'failed'
        return out, 200, {'ContentType':'application/json'}
    
    return out, 200, {'ContentType':'application/json'}
Beispiel #6
0
def categoryMap(session):

    project = get_current_project(session=session)
    version = get_current_version(session=session)
    ml_settings = get_ml_settings(session=session, version=version)
    Labels_db = session.query(Label).filter_by(project_id=project.id).order_by(
        Label.id.desc())

    Images = session.query(Image).filter_by(version_id=version.id)

    Labels = []

    for i in Labels_db:
        if i.soft_delete != True:
            Labels.append(i)

    Labels_unique = set(Labels)

    Labels.sort(key=lambda x: x.id)
    label_dict = {}
    start_at_1_label = 1
    lowest_label = 0
    for label in Labels:
        if label.id > lowest_label:
            label_dict[label.id] = start_at_1_label
            start_at_1_label += 1
            lowest_label = label.id

    project_str = str(project.id) + "/" + str(version.id) + "/ml/" + str(
        ml_settings.ml_compute_engine_id)
    project_str += "/label_map.pbtext"

    categoryMap = {}
    for i, c in enumerate(Labels_unique):
        name = str(c.name)
        id = int(label_dict[int(c.id)])

        dict = {'id': int(i + 1), 'name': name}
        categoryMap[id] = dict

    return categoryMap
    def faster_rcnn_resnet_new_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        project = get_current_project(session)
        version = get_current_version(session)
        machine_learning_settings = get_ml_settings(session=session,
                                                    version=version)

        project_str = str(project.id) + "/" + str(version.id) + "/ml/" + str(
            machine_learning_settings.ml_compute_engine_id)
        project_str += "/faster_rcnn_resnet.config"
        # Faster R-CNN with Resnet-101 (v1)

        root_dir = "gs://" + settings.CLOUD_STORAGE_BUCKET + "/" + str(
            project.id) + "/" + str(version.id) + "/ml/" + str(
                machine_learning_settings.ml_compute_engine_id) + "/"
        num_classes_var = version.labels_number  # TODO get this automatically
        print("version.labels_number", version.labels_number, file=sys.stderr)
        min_dimension_var = 720
        max_dimension_var = 1280  # TODO get this automaticaly within limit
        first_stage_max_proposals_var = 100

        label_map_path_var = root_dir + "label_map.pbtxt"

        # Testing for multiple records?
        input_path_var = root_dir + "tfrecords_*.record"
        num_steps_var = machine_learning_settings.iterations

        # This is the shared generic starting point
        fine_tune_checkpoint_var = "gs://" + settings.CLOUD_STORAGE_BUCKET + "/" + settings.RESNET_PRE_TRAINED_MODEL
        if re_train == 1:

            machine_learning_settings.previous_goal_iterations = machine_learning_settings.iterations
            num_steps_var = machine_learning_settings.iterations + 1500
            fine_tune_checkpoint_var = check_actual_model_path_name(
                session=session)

        model = "model {"
        faster_rcnn = "\nfaster_rcnn {"
        num_classes = "\nnum_classes: " + str(num_classes_var)
        image_resizer = "\nimage_resizer { \nkeep_aspect_ratio_resizer {"
        min_dimension = "\nmin_dimension: " + str(min_dimension_var)
        max_dimension = "\nmax_dimension: " + str(
            max_dimension_var) + "\n} \n}"

        feature_extractor = "\nfeature_extractor { \n type: 'faster_rcnn_resnet101' "
        first_stage_features_stride = "\nfirst_stage_features_stride: 16 \n } "
        first_stage_anchor_generator = """first_stage_anchor_generator \n{ \ngrid_anchor_generator 
        { \nscales: [0.25, 0.5, 1.0, 2.0] \naspect_ratios: [0.5, 1.0, 2.0] \nheight_stride: 16 \n
        width_stride: 16 \n } \n } \n"""

        first_stage_box_predictor_conv_hyperparams = """
        first_stage_box_predictor_conv_hyperparams {
        op: CONV
        regularizer {
        l2_regularizer {
        weight: 0.0
        }
        }
        initializer {
        truncated_normal_initializer {
        stddev: 0.01
        }
        }
        }
        first_stage_nms_score_threshold: 0.0
        first_stage_nms_iou_threshold: 0.7
        first_stage_localization_loss_weight: 2.0
        first_stage_objectness_loss_weight: 1.0
        initial_crop_size: 14
        maxpool_kernel_size: 2
        maxpool_stride: 2
        """

        first_stage_max_proposals = "\nfirst_stage_max_proposals:" + str(
            first_stage_max_proposals_var)

        second_stage_box_predictor = """
        second_stage_box_predictor {
        mask_rcnn_box_predictor {
        use_dropout: false
        dropout_keep_probability: 1.0
        fc_hyperparams {
        op: FC
        regularizer {
        l2_regularizer {
        weight: 0.0
        }
        }
        initializer {
        variance_scaling_initializer {
        factor: 1.0
        uniform: true
        mode: FAN_AVG
        }
        }
        }
        }
        }
        """
        second_stage_post_processing = """
        second_stage_post_processing {
        batch_non_max_suppression {
        score_threshold: 0.0
        iou_threshold: 0.6
        max_detections_per_class: 100
        """
        max_total_detections = "max_total_detections:" + str(
            first_stage_max_proposals_var) + "\n}"

        score_converter = """
        score_converter: SOFTMAX
        }
        second_stage_localization_loss_weight: 2.0
        second_stage_classification_loss_weight: 1.0
        """
        second_stage_batch_size = "\nsecond_stage_batch_size: " + str(
            first_stage_max_proposals_var) + "\n }\n }\n"

        train_config = """
        train_config: {
        batch_size: 1
        optimizer {
        momentum_optimizer: {
        learning_rate: {
        manual_step_learning_rate {
        initial_learning_rate: 0.0003
        schedule {
        step: 0
        learning_rate: .0003
        }
        schedule {
        step: 900000
        learning_rate: .00003
        }
        schedule {
        step: 1200000
        learning_rate: .000003
        }
        }
        }
        momentum_optimizer_value: 0.9
        }
        use_moving_average: false
        }
        gradient_clipping_by_norm: 10.0
        """
        fine_tune_checkpoint = "\nfine_tune_checkpoint: '" + str(
            fine_tune_checkpoint_var) + "'"

        from_detection_checkpoint = "\nfrom_detection_checkpoint: true"

        num_steps = "\nnum_steps: " + str(num_steps_var)

        data_augmentation_options = """
        data_augmentation_options {
        random_horizontal_flip {
        }
        }
        }
        """

        train_input_reader = """
        train_input_reader: {
        tf_record_input_reader {
        """

        input_path = "\ninput_path: '" + str(input_path_var) + "' \n}"
        label_map_path = "\nlabel_map_path: '" + str(
            label_map_path_var) + "'\n}"

        config_file_a = model + faster_rcnn + num_classes + image_resizer + min_dimension + max_dimension + feature_extractor + first_stage_features_stride + first_stage_anchor_generator
        config_file_b = first_stage_box_predictor_conv_hyperparams + first_stage_max_proposals + second_stage_box_predictor + second_stage_post_processing + max_total_detections
        config_file_c = score_converter + second_stage_batch_size + train_config + fine_tune_checkpoint + from_detection_checkpoint + num_steps + data_augmentation_options + train_input_reader + input_path + label_map_path

        config_file = config_file_a + config_file_b + config_file_c

        gcs = storage.Client()
        bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)
        blob = bucket.blob(project_str)
        blob.upload_from_string(config_file, content_type='text/config')

        print("Built Config", file=sys.stderr)
        out = 'success'

        return out, 200, {'ContentType': 'application/json'}
        def task_manager_scope(session):
            print("[Training task manager] Started. Retrain_flag:", re_train,  file=sys.stderr)
            session = sessionMaker.scoppedSession() # Threadsafe

            # Maybe better to have this somewhere else
            version = get_current_version(session=session)
            if version.machine_learning_settings_id is None:
                ml_settings.machine_learning_settings_new(session=session)

            # Advance one for training if not retraining
            if re_train == 0:
                ml_settings.machine_learning_settings_edit(session=session, next_id=True)

            project = get_current_project(session=session)

            machine_learning_settings = get_ml_settings(session=session, version=version)

            JOB_NAME = "__projectID_" + str(project.id) + "__versionID_" + str(version.id) + "__ml_compute_id_" + str(machine_learning_settings.ml_compute_engine_id)

            if re_train == 1:
                machine_learning_settings.re_train_id += 1
                JOB_NAME += "__retrainID_" + str(machine_learning_settings.re_train_id)
        
            machine_learning_settings.JOB_NAME = JOB_NAME
            session.add(machine_learning_settings)
            session.commit()

            # Do YAML for retraining
            # TODO way to detect if this is needed or not...
            yamlNew(hold_thread=True)

            labelMapNew()
            fasterRcnnResnetNew(re_train=re_train)  # Config file

            tfrecordsNew(hold_thread=True)

            ### TRAINING
            runTraining(session)
            
            config = {}
            config['PUBSSUB_TOPIC'] = settings.PUB_SUB_TOPIC
            config['PROJECT'] = settings.GOOGLE_PROJECT_NAME
            publisher = pubsub_v1.PublisherClient()
            topic_path = publisher.topic_path(config['PROJECT'], config['PUBSSUB_TOPIC'])
            JOB_NAME = "train_" + machine_learning_settings.JOB_NAME
            JOB_NAME_FORMATTED = projectID + "/jobs/" + JOB_NAME

            training_flag = True
            while training_flag is True:
                
                request = ml.projects().jobs().get(name=JOB_NAME_FORMATTED)
                # TODO error handling
                response = request.execute()
                
                data = json.dumps(response)
                print(data, file=sys.stderr)
                data = data.encode()
                publisher.publish(topic_path, data=data)

                a = response['state']
                if a == "SUCCEEDED" or a == "FAILED" or a =="CANCELLED":
                    training_flag = False
                else:
                    time.sleep(30)
            
            #### END TRAINING

            # Now need to run new model on re training
            if re_train == 0:
                runNewModel(session)

            ##### FROZEN
            trainingFrozenRun(session)

            JOB_NAME = "frozen_user_" + machine_learning_settings.JOB_NAME
            JOB_NAME_FORMATTED = projectID + "/jobs/" + JOB_NAME

            frozen_flag = True
            while frozen_flag is True:
                
                request = ml.projects().jobs().get(name=JOB_NAME_FORMATTED)

                # TODO error handling
                response = request.execute()

                data = json.dumps(response)
                print(data, file=sys.stderr)
                data = data.encode()

                publisher.publish(topic_path, data=data)

                a = response['state']
                if a == "SUCCEEDED" or a == "FAILED" or a =="CANCELLED":
                    frozen_flag = False
                else:
                    time.sleep(30)

            
            #####
            runNewVersion(session)
            time.sleep(60*8)  # Sleep while long running operation
            runInferenceSingle()

            print("[Training task manager] SUCCESS", file=sys.stderr)
            t.cancel()
Beispiel #9
0
        def task_manager_scope(session):
            project = get_current_project(session)
            version = get_current_version(session)
            ml_settings = get_ml_settings(session=session, version=version)

            project_str = str(project.id) + "/" + str(version.id) + "/"

            gcs = storage.Client()
            gcs = get_gcs_service_account(gcs)
            bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)
            blob = bucket.blob(project_str + "ml/" +
                               str(ml_settings.ml_compute_engine_id) +
                               "/tfrecords_0.record")
            INPUT_YAML = project_str + "ml/" + str(
                ml_settings.ml_compute_engine_id) + "/annotations.yaml"
            yaml_blob = bucket.blob(INPUT_YAML)

            yaml_bytes = yaml_blob.download_as_string()
            examples = yaml.load(yaml_bytes)

            len_examples = len(examples)
            print("Loaded ", len(examples), "examples", file=sys.stderr)

            images_dir = project_str + "images/"
            for i in range(len(examples)):
                examples[i]['annotations'][0]['image'][
                    'image_id'] = images_dir + str(
                        examples[i]['annotations'][0]['image']['image_id'])

            counter = 0
            all_examples = []

            # Reassign db ids to be 1 2 3  etc for tensorflow
            # TODO this is terrible surely better way to do this
            Labels = []
            labels = session.query(Label).filter_by(project_id=project.id)
            for i in labels:
                if i.soft_delete != True:
                    Labels.append(i)
            Labels_unique = set(Labels)
            Labels.sort(key=lambda x: x.id)
            label_dict = {}
            start_at_1_label = 1
            lowest_label = 0
            for label in Labels:
                if label.id > lowest_label:
                    label_dict[label.id] = start_at_1_label
                    start_at_1_label += 1
                    lowest_label = label.id

            print("label_dict length", len(label_dict), file=sys.stderr)

            temp = tempfile.NamedTemporaryFile()
            writer = tf.python_io.TFRecordWriter(str(temp.name))

            for example in examples:

                tf_example = create_tf_example(example['annotations'],
                                               label_dict)
                writer.write(tf_example.SerializeToString())

                if counter % 2 == 0:
                    print("Percent done", (counter / len_examples) * 100)
                counter += 1

            writer.close()

            blob.upload_from_file(temp, content_type='text/record')
            temp.close()

            link = get_secure_link(blob)
            print(blob.name, file=sys.stderr)
            print("Built TF records", file=sys.stderr)
            t.cancel()
Beispiel #10
0
        def task_manager_scope(session):
            project = get_current_project(session)
            version = get_current_version(session)
            machine_learning_settings = get_ml_settings(session=session,
                                                        version=version)

            Images = session.query(Image).filter_by(
                version_id=version.id).order_by(Image.id.desc())

            annotations_list = []
            len_images = Images.count()
            counter = 0
            for image in Images:

                # TODO maybe better to do in database?
                if image.soft_delete != True and image.is_test_image != True and image.done_labeling == True:

                    boxes = session.query(Box).filter_by(
                        image_id=image.id).order_by(Box.id.desc()).limit(100)

                    box_dict_list = []
                    for box in boxes:

                        label = session.query(Label).filter_by(
                            id=box.label_id).one()
                        if label is None:
                            print("Label is none", file=sys.stderr)

                        box_dict_list.append({
                            'label_id': label.id,
                            'label_name': label.name,
                            'x_min': box.x_min,
                            'x_max': box.x_max,
                            'y_min': box.y_min,
                            'y_max': box.y_max
                        })

                    image_dict = {
                        'image': {
                            'image_id': image.id,
                            'image_width': image.width,
                            'image_height': image.height,
                            'original_filename': image.original_filename
                        }
                    }

                    boxes_dict = {'boxes': box_dict_list}
                    annotations_list.append(
                        {'annotations': [image_dict, boxes_dict]})

                if counter % 10 == 0:
                    print("Percent done", (counter / len_images) * 100,
                          file=sys.stderr)
                counter += 1

            print("annotations_list len",
                  len(annotations_list),
                  file=sys.stderr)
            yaml_data = yaml.dump(annotations_list, default_flow_style=False)

            gcs = storage.Client()
            gcs = get_gcs_service_account(gcs)
            bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)

            project_str = str(project.id) + "/" + str(
                version.id) + "/ml/" + str(
                    machine_learning_settings.ml_compute_engine_id
                ) + "/annotations.yaml"

            blob = bucket.blob(project_str)
            blob.upload_from_string(yaml_data, content_type='text/yaml')

            print("Built YAML, link below", file=sys.stderr)

            link = get_secure_link(blob)
            print(link, file=sys.stderr)

            t.cancel()
Beispiel #11
0
    def label_map_new_scope(session):
        if LoggedIn() != True:
            return defaultRedirect()

        project = get_current_project(session)
        version = get_current_version(session)
        ml_settings = get_ml_settings(session=session, version=version)
        Images = session.query(Image).filter_by(version_id=version.id)

        Labels = []

        # TO DO Refactor ie maintain a cache all label ids used in a version
        # Would need to store that cache per version
        # And update / delete it as labels are changed  OR Collect at YAML stage

        labels = session.query(Label).filter_by(project_id=project.id)
        for i in labels:
            if i.soft_delete != True:
                Labels.append(i)

        # Map db ids to id s staring with 123
        Labels.sort(key=lambda x: x.id)
        label_dict = {}
        start_at_1_label = 1
        lowest_label = 0
        for label in Labels:
            if label.id > lowest_label:
                label_dict[label.id] = start_at_1_label
                start_at_1_label += 1
                lowest_label = label.id

        print("label_dict length", len(label_dict), file=sys.stderr)

        project_str = str(project.id) + "/" + str(version.id) + "/ml/" + str(
            ml_settings.ml_compute_engine_id)
        project_str += "/label_map.pbtext"

        file = ""

        Labels_unique = set(Labels)

        len_labels = len(Labels_unique)

        version.labels_number = len_labels
        session.add(version)
        session.commit()

        for i, c in enumerate(Labels_unique):
            new = "\nitem {"
            id = "\nid: " + str(label_dict[c.id])
            name = "\nname: " + str(c.name) + "\n }\n"

            file += new + id + name

        gcs = storage.Client()
        gcs = get_gcs_service_account(gcs)
        bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)
        blob = bucket.blob(project_str)
        blob.upload_from_string(file, content_type='text/pbtext')

        print("Built label_map", file=sys.stderr)
        out = get_secure_link(blob)

        return out, 200, {'ContentType': 'application/json'}
Beispiel #12
0
        def task_manager_scope(session):
            credentials = GoogleCredentials.get_application_default()
            ml = discovery.build('ml', 'v1', credentials=credentials)
            projectID = 'projects/{}'.format(settings.GOOGLE_PROJECT_NAME)

            project = get_current_project(session=session)
            version = get_current_version(session=session)
            machine_learning_settings = get_ml_settings(session=session,
                                                        version=version)

            Images_db = session.query(Image).filter_by(version_id=version.id,
                                                       is_test_image=True)

            REGION = "us-central1"
            RUNTIME_VERSION = "1.2"

            modelName = "a_" + str(project.id)
            versionName = "a_" + str(version.id) + "_" + str(
                machine_learning_settings.ml_compute_engine_id)
            versionName += "_" + str(machine_learning_settings.re_train_id)
            modelVersionName = '{}/models/{}/versions/{}'.format(
                projectID, modelName, versionName)

            gcs = storage.Client()
            bucket = gcs.get_bucket(settings.CLOUD_STORAGE_BUCKET)
            filenames = []

            root_dir = str(project.id) + "/" + str(version.id) + "/"
            for image in Images_db:
                #print(image.is_test_image, file=sys.stderr)
                if image.soft_delete != True:
                    filenames.append(root_dir + "images/" + str(image.id))
                    break

            Rows = []
            Images = []
            print("len(filenames):", len(filenames), file=sys.stderr)

            for file in filenames:
                blob = bucket.blob(file)
                image = blob.download_as_string()

                # Resize
                image = scipy.misc.imread(BytesIO(image))
                if image is None:
                    raise IOError("Could not open")

                # TODO BETTER WAY
                #image = scipy.misc.imresize(image, (640, 960))
                temp = tempfile.mkdtemp()
                new_temp_filename = temp + "/resized.jpg"
                scipy.misc.imsave(new_temp_filename, image)

                # Otherwise have strange byte issues
                blob = bucket.blob(file + "_test_resized")
                blob.upload_from_filename(new_temp_filename,
                                          content_type="image/jpg")
                image = blob.download_as_string()

                encoded_contents = base64.b64encode(image).decode('UTF-8')
                row = {'b64': encoded_contents}
                Rows.append(row)
                Images.append(image)

            output = {'instances': Rows}

            ml_request = ml.projects().predict(name=modelVersionName,
                                               body=output)

            PATH_TO_LABELS = root_dir + "ml/" + str(
                machine_learning_settings.ml_compute_engine_id
            ) + "/label_map.pbtext"

            label_map_blob = bucket.blob(PATH_TO_LABELS)
            label_map_data = label_map_blob.download_as_string()

            category_index = categoryMap(session=session)

            try:
                time0 = time.time()
                response = ml_request.execute()
                time1 = time.time()
                print("Time in seconds", (time1 - time0), file=sys.stderr)

                print(response, file=sys.stderr)

                for i in range(len(Images)):
                    response = response['predictions'][i]  # First one

                    boxes = response['detection_boxes']
                    scores = response['detection_scores']
                    classes = response['detection_classes']

                    boxes = np.array(boxes)
                    scores = np.array(scores)
                    classes = np.array(classes, dtype=int)
                    print(classes, file=sys.stderr)

                    image_np = scipy.misc.imread(BytesIO(Images[i]))

                    # Handle gray scale
                    if len(image_np.shape) == 2:
                        image_np = np.stack((image_np, ) * 3, axis=2)

                    print(image_np.shape)

                    visualization_utils.visualize_boxes_and_labels_on_image_array(
                        image_np,
                        boxes,
                        classes,
                        scores,
                        category_index,
                        use_normalized_coordinates=True,
                        min_score_thresh=.3,
                        line_thickness=2)

                    blob = bucket.blob(root_dir + "test_inference_out/" +
                                       str(i) + "_.jpg")

                    temp = tempfile.mkdtemp()
                    new_temp_filename = temp + "/inference_" + str(i) + "_.jpg"
                    scipy.misc.imsave(new_temp_filename, image_np)
                    blob.upload_from_filename(new_temp_filename,
                                              content_type="image/jpg")

                min_score_thresh = .05
                for i in range(len(boxes)):
                    if scores[i] > min_score_thresh:

                        class_name = category_index[classes[i]]['name']
                        print(class_name, scores[i], file=sys.stderr)

                # TODO add pub sub messaging
                out = 'success'

            except errors.HttpError as EOFError:
                print('There was an error. Check the details:',
                      file=sys.stderr)
                print(EOFError._get_reason(), file=sys.stderr)
                out = 'failed'

            t.cancel()