コード例 #1
0
def news_baseline_test():

    dataset = AutoDLDataset('./tweet.data/train/')
    dataset.init()
    iterator = dataset.get_dataset().make_one_shot_iterator()
    next_element = iterator.get_next()

    # features, labels = next_element
    #
    # features = features.eval()
    # labels = labels.eval()
    #
    # print next_element
    data = []
    sess = tf.Session()
    for idx in range(10):
        print("Example " + str(idx))
        data.append(sess.run(next_element))

    for each_data in data:

        print each_data
コード例 #2
0
      if not use_done_training_api:
        logger.warning("Your model object doesn't have an attribute " +
                       "`done_training`. But this is necessary for ingestion " +
                       "program to know whether the model has done training " +
                       "and to decide whether to proceed more training. " +
                       "Please add this attribute to your model.")

      # Keeping track of how many predictions are made
      prediction_order_number = 0

      # Start the CORE PART: train/predict process
      while(not (use_done_training_api and M.done_training)):
        remaining_time_budget = start + time_budget - time.time()
        # Train the model
        logger.info("Begin training the model...")
        M.train(D_train.get_dataset(),
                remaining_time_budget=remaining_time_budget)
        logger.info("Finished training the model.")
        remaining_time_budget = start + time_budget - time.time()
        # Make predictions using the trained model
        logger.info("Begin testing the model by making predictions " +
                     "on test set...")
        Y_pred = M.test(D_test.get_dataset(),
                        remaining_time_budget=remaining_time_budget)
        logger.info("Finished making predictions.")
        if Y_pred is None: # Stop train/predict process if Y_pred is None
          logger.info("The method model.test returned `None`. " +
                      "Stop train/predict process.")
          break
        else: # Check if the prediction has good shape
          prediction_shape = tuple(Y_pred.shape)
コード例 #3
0
    # Write solution files in output_dir
    copy_solution(dataset_dir, output_dir)

    ##### Begin creating training set and test set #####
    logger.info("Reading training set and test set...")
    D_train = AutoDLDataset(os.path.join(dataset_dir, basename, "train"))
    D_test = AutoDLDataset(os.path.join(dataset_dir, basename, "test"))
    D_predict_data = None

    if os.path.exists(os.path.join(dataset_dir, basename, "unlabelled")):
        logger.info("Found some data to predict...")
        D_predict = AutoDLDataset(
            os.path.join(dataset_dir, basename, "unlabelled"))
        num_examples_predict = D_predict.get_metadata().size()
        D_predict_data = D_predict.get_dataset()

    ##### End creating training set and test set #####

    ## Get correct prediction shape
    num_examples_test = D_test.get_metadata().size()
    output_dim = D_test.get_metadata().get_output_size()
    correct_prediction_shape = (num_examples_test, output_dim)

    # 20 min for participants to initializing and install other packages
    try:
        init_time_budget = 20 * 60  # time budget for initilization.
        timer = Timer()
        timer.set(init_time_budget)
        with timer.time_limit("Initialization"):
            ##### Begin creating model #####
コード例 #4
0
def ingestion_fn(dataset_dir,
                 code_dir,
                 time_budget,
                 time_budget_approx,
                 output_dir,
                 score_dir,
                 model_config_name=None,
                 model_config=None):
    #### Check whether everything went well
    ingestion_success = True

    # Parse directories
    root_dir = _HERE(os.pardir)
    ingestion_program_dir = join(root_dir, "ingestion_program")

    if dataset_dir.endswith("run/input") and code_dir.endswith("run/program"):
        logger.debug(
            "Since dataset_dir ends with 'run/input' and code_dir "
            "ends with 'run/program', suppose running on " +
            "CodaLab platform. Modify dataset_dir to 'run/input_data' "
            "and code_dir to 'run/submission'. " +
            "Directory parsing should be more flexible in the code of " +
            "compute worker: we need explicit directories for " +
            "dataset_dir and code_dir.")
        dataset_dir = dataset_dir.replace("run/input", "run/input_data")
        code_dir = code_dir.replace("run/program", "run/submission")

    # Show directories for debugging
    logger.debug("sys.argv = " + str(sys.argv))
    logger.debug("Using dataset_dir: " + dataset_dir)
    logger.debug("Using output_dir: " + output_dir)
    logger.debug("Using ingestion_program_dir: " + ingestion_program_dir)
    logger.debug("Using code_dir: " + code_dir)

    # Our libraries
    path.append(ingestion_program_dir)
    path.append(code_dir)
    # IG: to allow submitting the starting kit as sample submission
    path.append(code_dir + "/sample_code_submission")
    import data_io
    from dataset import AutoDLDataset  # THE class of AutoDL datasets

    data_io.mkdir(output_dir)

    #### INVENTORY DATA (and sort dataset names alphabetically)
    datanames = data_io.inventory_data(dataset_dir)
    #### Delete zip files and metadata file
    datanames = [x for x in datanames if x.endswith(".data")]

    if len(datanames) != 1:
        raise ValueError("{} datasets found in dataset_dir={}!\n".format(
            len(datanames), dataset_dir) +
                         "Please put only ONE dataset under dataset_dir.")

    basename = datanames[0]

    logger.info("************************************************")
    logger.info("******** Processing dataset " + basename[:-5].capitalize() +
                " ********")
    logger.info("************************************************")
    logger.debug("Version: {}. Description: {}".format(VERSION, DESCRIPTION))

    ##### Begin creating training set and test set #####
    logger.info("Reading training set and test set...")
    D_train = AutoDLDataset(os.path.join(dataset_dir, basename, "train"))
    D_test = AutoDLDataset(os.path.join(dataset_dir, basename, "test"))
    ##### End creating training set and test set #####

    ## Get correct prediction shape
    num_examples_test = D_test.get_metadata().size()
    output_dim = D_test.get_metadata().get_output_size()
    correct_prediction_shape = (num_examples_test, output_dim)

    # 20 min for participants to initializing and install other packages
    # try:
    #     init_time_budget = 20 * 60  # time budget for initilization.
    #     timer = Timer()
    #     timer.set(init_time_budget)
    #     with timer.time_limit("Initialization"):

    ##### Begin creating model #####
    logger.info("Creating model...this process should not exceed 20min.")
    from model import Model  # in participants' model.py

    # The metadata of D_train and D_test only differ in sample_count
    M = Model(D_train.get_metadata(),
              model_config_name=model_config_name,
              model_config=model_config)
    ###### End creating model ######

    # except TimeoutException as e:
    #     logger.info("[-] Initialization phase exceeded time budget. Move to train/predict phase")
    # except Exception as e:
    #     logger.error("Failed to initializing model.")
    #     logger.error("Encountered exception:\n" + str(e), exc_info=True)
    #

    # Mark starting time of ingestion
    start = time.time()
    logger.info("=" * 5 + " Start core part of ingestion program. " +
                "Version: {} ".format(VERSION) + "=" * 5)

    write_start_file(output_dir,
                     start_time=start,
                     time_budget=time_budget,
                     task_name=basename.split(".")[0])

    try:
        # Check if the model has methods `train` and `test`.
        for attr in ["train", "test"]:
            if not hasattr(M, attr):
                raise ModelApiError(
                    "Your model object doesn't have the method " +
                    "`{}`. Please implement it in model.py.")

        # Check if model.py uses new done_training API instead of marking
        # stopping by returning None
        use_done_training_api = hasattr(M, "done_training")
        if not use_done_training_api:
            logger.warning(
                "Your model object doesn't have an attribute " +
                "`done_training`. But this is necessary for ingestion " +
                "program to know whether the model has done training " +
                "and to decide whether to proceed more training. " +
                "Please add this attribute to your model.")

        # Keeping track of how many predictions are made
        prediction_order_number = 0

        # Start the CORE PART: train/predict process
        while not (use_done_training_api and M.done_training):
            remaining_time_budget = start + time_budget - time.time()
            # Train the model
            logger.info("Begin training the model...")
            M.train(D_train.get_dataset(),
                    remaining_time_budget=remaining_time_budget)
            logger.info("Finished training the model.")
            # Make predictions using the trained model
            logger.info("Begin testing the model by making predictions " +
                        "on test set...")
            remaining_time_budget = start + time_budget - time.time()
            Y_pred = M.test(D_test.get_dataset(),
                            remaining_time_budget=remaining_time_budget)
            logger.info("Finished making predictions.")
            if Y_pred is None:  # Stop train/predict process if Y_pred is None
                logger.info("The method model.test returned `None`. " +
                            "Stop train/predict process.")
                break
            else:  # Check if the prediction has good shape
                prediction_shape = tuple(Y_pred.shape)
                if prediction_shape != correct_prediction_shape:
                    raise BadPredictionShapeError(
                        "Bad prediction shape! Expected {} but got {}.".format(
                            correct_prediction_shape, prediction_shape))
            remaining_time_budget = start + time_budget_approx - time.time()
            if remaining_time_budget < 0:
                break
            # Write timestamp to 'start.txt'
            write_timestamp(output_dir,
                            predict_idx=prediction_order_number,
                            timestamp=time.time())
            # Prediction files: adult.predict_0, adult.predict_1, ...
            filename_test = basename[:-5] + ".predict_" + str(
                prediction_order_number)
            # Write predictions to output_dir
            data_io.write(os.path.join(output_dir, filename_test), Y_pred)
            prediction_order_number += 1
            logger.info(
                "[+] {0:d} predictions made, time spent so far {1:.2f} sec".
                format(prediction_order_number,
                       time.time() - start))
            remaining_time_budget = start + time_budget_approx - time.time()
            logger.info(
                "[+] Time left {0:.2f} sec".format(remaining_time_budget))

    except Exception as e:
        ingestion_success = False
        logger.info("Failed to run ingestion.")
        logger.error("Encountered exception:\n" + str(e), exc_info=True)

    # Finishing ingestion program
    end_time = time.time()
    overall_time_spent = end_time - start

    # Write overall_time_spent to a end.txt file
    end_filename = "end.txt"
    with open(os.path.join(output_dir, end_filename), "w") as f:
        f.write("ingestion_duration: " + str(overall_time_spent) + "\n")
        f.write("ingestion_success: " + str(int(ingestion_success)) + "\n")
        f.write("end_time: " + str(end_time) + "\n")
        logger.info("Wrote the file {} marking the end of ingestion.".format(
            end_filename))
        if ingestion_success:
            logger.info("[+] Done. Ingestion program successfully terminated.")
            logger.info("[+] Overall time spent %5.2f sec " %
                        overall_time_spent)
        else:
            logger.info(
                "[-] Done, but encountered some errors during ingestion.")
            logger.info("[-] Overall time spent %5.2f sec " %
                        overall_time_spent)

    # Copy all files in output_dir to score_dir
    os.system("cp -R {} {}".format(os.path.join(output_dir, "*"), score_dir))
    logger.debug("Copied all ingestion output to scoring output directory.")

    logger.info("[Ingestion terminated]")