예제 #1
0
def main():
    file = open(GOOGLE_CAPTIONS_FILE, 'r')

    # Ensure workspace directory exists
    make_directories(WORKSPACE)

    index_file = open(os.path.join(WORKSPACE, "index_file.txt"), 'a')
    count = 0

    future_list = list()
    with concurrent.futures.ThreadPoolExecutor(max_workers=15) as executor:
        for line in file:
            count += 1
            future_list.append(
                executor.submit(download_image,
                                line.strip().split("\t")[-1]))
            if count >= NUM_SAMPLES:
                break

        for future in concurrent.futures.as_completed(future_list):
            try:
                url, file_name = future.result()
            except Exception as exc:
                print("Executor Service exception..!")
            else:
                if url is not None:
                    index_file.write(url + " " + file_name + "\n")
        nargs="?",
        type=str,
        default="../configs/resnet50_lstm_with_threshold.yaml",
        help="Configuration file to use",
    )

    args = parser.parse_args()

    with open(args.config) as fp:
        cfg = yaml.load(fp)

    dataset_cfg = get_dataset_metadata_cfg()
    model_workspace_dir = os.path.join(cfg["workspace"]["directory"],
                                       cfg["dataset"]["name"],
                                       cfg["model"]["arch"])
    utils.make_directories(model_workspace_dir)

    img_model = ResNet50(weights='imagenet')

    dataset_preprocessor = PreProcessing(cfg, "resnet50", False, False)
    dataset_preprocessor.run_one_time_encoding(img_model)

    # Load train, validation sets from the pre-processor
    training_generator, validation_generator, test_generator = dataset_preprocessor.get_keras_generators(
        "resnet50")

    MAX_LEN = 40
    EMBEDDING_DIM = 300
    IMAGE_ENC_DIM = 300
    vocab_size = get_line_count(
        os.path.join(cfg["workspace"]["directory"], cfg["dataset"]["name"],