Python input_fn Examples, read_dataset.input_fn Python Examples

Example #1

0

Show file

def train_and_evaluate(train_dir, eval_dir, config, model_dir=None):
    """Trains and evaluates the model.

    Args:
        train_dir (string): Path of the training  directory.
        eval_dir (string): Path of the evaluation directory.
        config (configparser): Config file containing the diferent configurations
                               and hyperparameters.
        model_dir (string): Directory where all outputs (checkpoints, event files, etc.) are written.
                            If model_dir is not set, a temporary directory is used.
    """

    my_checkpoint_config = tf.estimator.RunConfig(
        save_checkpoints_secs=int(
            config['RUN_CONFIG']['save_checkpoints_secs']),
        keep_checkpoint_max=int(config['RUN_CONFIG']['keep_checkpoint_max']))

    estimator = tf.estimator.Estimator(model_fn=model_fn,
                                       model_dir=model_dir,
                                       config=my_checkpoint_config,
                                       params=config)

    train_spec = tf.estimator.TrainSpec(
        input_fn=lambda: input_fn(train_dir, repeat=True, shuffle=True),
        max_steps=int(config['RUN_CONFIG']['train_steps']))

    eval_spec = tf.estimator.EvalSpec(
        input_fn=lambda: input_fn(eval_dir, repeat=False, shuffle=False),
        throttle_secs=int(config['RUN_CONFIG']['throttle_secs']))

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

Example #2

0

Show file

def predict(test_dir, model_dir, config):
    """
    Generate the predictions given a model.

    Args:
        test_dir (string): Path of the test directory.
        model_dir (string): Directory with the trained model.
        config (configparser): Config file containing the diferent configurations
                               and hyperparameters.

    Returns:
        list: A list with the predicted values.
    """

    estimator = tf.estimator.Estimator(model_fn=model_fn,
                                       model_dir=model_dir,
                                       params=config)

    pred_results = estimator.predict(
        input_fn=lambda: input_fn(test_dir, repeat=False, shuffle=False))

    return [pred['predictions'] for pred in pred_results]

Example #3

0

Show file

def predict_and_save(test_dir, model_dir, save_dir, filename, config):
    """Generates and saves a Pandas Dataframe in CSV format with the real and the predicted delay.
    It also computes the MAPE (Mean Absolute Percentage Error) of all the samples in the dataset
    and computes its mean.

    Args:
        test_dir (string): Path of the test directory.
        model_dir (string): Directory with the trained model.
        save_dir (string): Directory where the generated dataframe will be saved (in csv).
        filename (string): The filename of the dataframe.
        config (configparser): Config file containing the diferent configurations
                               and hyperparameters.

    Returns:
        float: The Mean Absolute Percentage Error.
    """

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    tmp_dir = tempfile.mkdtemp()

    ds = input_fn(test_dir, repeat=False, shuffle=False)

    dataframes_to_concat = []

    it = 0

    df_files = []
    delays = np.array([])
    for predictors, target in ds:

        it += 1
        delays = np.append(delays, target)

        if it % 1000 == 0:

            aux_df = pd.DataFrame({"Delay": delays})

            dataframes_to_concat.append(aux_df)
            delays = np.array([])

            if it % 3000 == 0:
                df = pd.concat(dataframes_to_concat)
                file = os.path.join(tmp_dir, "tmp_df_" + str(it) + ".parquet")
                df.to_parquet(file)
                df_files.append(file)
                dataframes_to_concat = []

    if it % 3000 != 0:
        if it % 1000 != 0:

            aux_df = pd.DataFrame({"Delay": delays})

            dataframes_to_concat.append(aux_df)

        df = pd.concat(dataframes_to_concat)
        file = os.path.join(tmp_dir, "tmp_df_" + str(it) + ".parquet")
        df.to_parquet(file)
        df_files.append(file)

    df_list = []

    for file in df_files:
        df_list.append(pd.read_parquet(os.path.join(file)))

    df = pd.concat(df_list)

    file = os.path.join(save_dir, filename)

    predictions = predict(test_dir, model_dir, config)

    df["Predicted_Delay"] = predictions
    df['Absolute_Error'] = np.abs(df["Delay"] - df["Predicted_Delay"])
    df['Absolute_Percentage_Error'] = (df['Absolute_Error'] /
                                       np.abs(df["Delay"])) * 100

    df.to_csv(file)

    return df

Example #4

0

Show file

File: generate_submission.py Project: ZiyadMoraished/ITU-ML5G-PS-014_WAZD

def generate_upload_csv(test_dir, model_dir, filename, config):
    """Generates, compresses (in ZIP) and saves a Pandas Dataframe in CSV format with the predicted delays.

    Args:
        test_dir (string): Path of the test dataset root directory.
        model_dir (string): Directory of the trained model.
        filename (string): The filename of the compressed CSV file.
        config (configparser): Config file containing the different configurations
                               and hyperparameters.
    """

    # IMPORTANT NOTE! In order to compress the data, pandas needs for the output file a simple filename, without including the route or path and the extension.
    # (i.e., "submission_file", not "./home/dataset/submission_file.zip")
    if '/' in filename:
        print("---WARNING---")
        print(
            "---Filename must be a simple filename, it should not include a path--- Use \"submission_file\" instead of \"./home/dataset/submission_file.zip\""
        )

    print("GENERATING DELAY LABELS WITH THE TRAINED MODEL...")
    ########################
    # Generate predictions #
    ########################

    # Create the estimator loading the model
    estimator = tf.estimator.Estimator(model_fn=model_fn,
                                       model_dir=model_dir,
                                       params=config)

    # Generate the dataset and make the predictions
    pred_results = estimator.predict(
        input_fn=lambda: input_fn(test_dir, repeat=False, shuffle=False))

    # Collect the predictions
    pred = np.array([pred['predictions'] for pred in pred_results])

    ###################
    # Denormalization #
    ###################
    # If you have applied any normalization, please denormalize the predicted values here

    ####################
    # Prepare the data #
    ####################
    print("RESHAPING THE DATA...")
    # Prepare the data as it should be in the CSV file (each line contains the 342 src-dst delays of a sample)
    # The network of the test dataset has in total 342 src-dst paths (19 sources x 18 destinations = 342 src-dst pairs)
    pred = pred.reshape(int(pred.shape[0] / 342), 342)

    print("CHECKING CSV format...")
    if pred.shape != (50000, 342):
        print("--- WARNING ---")
        print(
            "--- The format of the CSV file is not correct. It must have 50,000 lines with 342 values each one---"
        )
        print("It has currently the following lines and and elements: " +
              str(pred.shape))

    print("SAVING CSV FILE COMPRESSED IN ZIP...")

    df = pd.DataFrame(pred)
    # The CSV file will be directly compressed in ZIP
    compression_options = dict(method='zip', archive_name=f'{filename}.csv')
    # The CSV file uses ";" as separator between values
    # df.to_csv(f'{filename}.zip', header=False, index=False, sep=";", compression=compression_options)
    # df.to_csv(filename+'.zip', header=False, index=False, sep=";", compression=compression_options)
    df.to_csv(filename + '.zip',
              header=False,
              index=False,
              sep=";",
              compression='zip')