Beispiel #1
0
def prepare_submit(video_ids, predictions_path, output_path):
    predictions = []
    current_video_id_index = 0
    for chunk_name in sort_files_natural(os.listdir(predictions_path)):
        chunk_path = os.path.join(predictions_path, chunk_name)

        chunk = np.load(chunk_path)
        for video_predictions in chunk:
            video_id = video_ids[current_video_id_index]
            labels = np.argsort(video_predictions)[::-1]

            label_confidence_pairs = []
            for label in labels[:NUM_PREDICTIONS]:
                confidence = video_predictions[label]
                label_confidence_pairs.append((label, confidence))

            predictions.append((video_id, label_confidence_pairs))
            current_video_id_index += 1

    predictions = sorted(predictions)
    with open(output_path, "w") as fout:
        fout.write("VideoId,LabelConfidencePairs\n")
        for video_id, pairs in predictions:
            label_confidence_list = []
            for label, confidence in pairs:
                label_confidence_list.append(str(label))
                label_confidence_list.append(str(confidence))

            fout.write("{},{}\n".format(video_id, " ".join(label_confidence_list)))
def main():
    parser = argparse.ArgumentParser(
        "This script loads predictions, sorts them by video_id and saves in several chunks"
    )
    parser.add_argument("predictions_path")
    parser.add_argument("id_chunks_path")
    parser.add_argument("output_path")
    parser.add_argument("--chunk-size", type=int, default=10000)

    args = parser.parse_args()

    os.makedirs(args.output_path, exist_ok=True)

    video_ids = []
    id_chunks_paths = sort_files_natural(os.listdir(args.id_chunks_path))

    for chunk_name_path in id_chunks_paths:
        video_ids_path = os.path.join(args.id_chunks_path, chunk_name_path,
                                      "video_ids")
        video_ids_chunk = np.load(video_ids_path)
        video_ids += video_ids_chunk.tolist()

    video_ids = np.array(video_ids)

    predictions = []
    for chunk_name in sort_files_natural(os.listdir(args.predictions_path)):
        chunk_path = os.path.join(args.predictions_path, chunk_name)

        chunk = np.load(chunk_path)
        predictions.append(chunk)

    predictions = np.concatenate(predictions)

    sort_order = np.argsort(video_ids)
    predictions = predictions[sort_order]

    chunk_id = 0
    for chunk_start in range(0, len(predictions), args.chunk_size):
        chunk_predictions = predictions[chunk_start:chunk_start +
                                        args.chunk_size]

        chunk_output_path = os.path.join(args.output_path,
                                         "chunk_{}".format(chunk_id))
        with open(chunk_output_path, "wb") as fout:
            np.save(fout, chunk_predictions)

        chunk_id += 1
 def __init__(self, chunk_dirs, modalities, in_modalities_n, batch_size=64, shuffle=True, drop_small_batch=False,
              data_to_torch=any_to_tensor):
     self._chunk_dirs = sort_files_natural(chunk_dirs)
     self._modalities = modalities
     self._in_modalities_n = in_modalities_n
     self._batch_size = batch_size
     self._shuffle = shuffle
     self._drop_small_batch = drop_small_batch
     self._data_to_torch = data_to_torch
def extract_features(prediction_path, output_path, is_training):
    os.makedirs(output_path, exist_ok=True)

    chunk_paths = sort_files_natural(os.listdir(prediction_path))

    X = []
    y = []
    candidates = []
    video_ids = []

    for chunk_id in tqdm.tqdm(chunk_paths):
        chunk_path = os.path.join(prediction_path, chunk_id)

        features = np.load(os.path.join(chunk_path, "features"))
        example_ids = np.load(os.path.join(chunk_path, "video_ids"))

        if is_training:
            labels = np.load(os.path.join(chunk_path, "labels"))

        for example_index in range(len(features)):
            labels_candidates = np.unique(
                np.argpartition(features[example_index], -TOP_LABELS_PER_MODEL)[:, -TOP_LABELS_PER_MODEL:].ravel())

            candidates_features = features[example_index][:, labels_candidates].transpose((1, 0)).astype("float32")

            for i in range(len(candidates_features)):
                lgbm_example = np.concatenate([[labels_candidates[i]], candidates_features[i]])
                X.append(lgbm_example)
                candidates.append(labels_candidates[i])
                video_ids.append(example_ids[example_index])

                if is_training:
                    label = int(labels_candidates[i] in labels[example_index])
                    y.append(label)

    with open(os.path.join(output_path, "features"), "wb") as fout:
        np.save(fout, np.array(X))

    with open(os.path.join(output_path, "candidates"), "wb") as fout:
        np.save(fout, np.array(candidates))

    with open(os.path.join(output_path, "video_ids"), "wb") as fout:
        np.save(fout, np.array(video_ids))

    if is_training:
        with open(os.path.join(output_path, "labels"), "wb") as fout:
            np.save(fout, np.array(y))
Beispiel #5
0
def get_video_id_to_fold_id_dict(fold_split_path):
    fold_paths = os.listdir(fold_split_path)
    fold_paths = sort_files_natural(fold_paths)

    result = {}
    for fold_path_name in fold_paths:
        fold_id = int(fold_path_name.split("_")[-1])
        fold_path = os.path.join(fold_split_path, fold_path_name)
        chunk_file_paths = os.listdir(fold_path)

        for chunk_file_path_name in chunk_file_paths:
            chunk_path = os.path.join(fold_path, chunk_file_path_name)
            video_ids_path = os.path.join(chunk_path, "video_ids")
            video_ids = np.load(video_ids_path)

            for video_id in video_ids:
                result[video_id] = fold_id

    return result
Beispiel #6
0
def main():
    parser = argparse.ArgumentParser(
        "This script loads predictions from numpy and prepares submit file")
    parser.add_argument("predictions_path")
    parser.add_argument("test_chunks_path")
    parser.add_argument("output_path")

    args = parser.parse_args()

    video_ids = []
    test_chunk_paths = sort_files_natural(os.listdir(args.test_chunks_path))

    for chunk_name_path in test_chunk_paths:
        video_ids_path = os.path.join(args.test_chunks_path, chunk_name_path,
                                      "video_ids")
        video_ids_chunk = np.load(video_ids_path)
        video_ids += video_ids_chunk.tolist()

    prepare_submit(video_ids, args.predictions_path, args.output_path)