Example #1
0
def load_data(f):
    input = open(labelsfile, "rb")
    label_names = input.read().split("\n")[:36]
    #    print("\n".join(label_names))

    mosi = Dataloader('http://sorena.multicomp.cs.cmu.edu/downloads/MOSI')
    covarep = mosi.covarep()
    sentiments = mosi.sentiments(
    )  # sentiment labels, real-valued. for this tutorial we'll binarize them
    train_ids = mosi.train()  # set of video ids in the training set

    # sort through all the video ID, segment ID pairs
    train_set_ids = []
    for vid in train_ids:
        for sid in covarep['covarep'][vid].keys():
            train_set_ids.append((vid, sid))

    train_set_audio = np.array([
        norm(f, covarep['covarep'][vid][sid]) for (vid, sid) in train_set_ids
        if covarep['covarep'][vid][sid]
    ])
    train_set_audio[train_set_audio != train_set_audio] = 0
    #    y_data = np.array([sentiments[vid][sid] for (vid, sid) in train_set_ids])
    #    y_train_mc = multiclass(np.array([sentiments[vid][sid] for (vid, sid) in train_set_ids]))
    y_train_bin = np.array(
        [sentiments[vid][sid] for (vid, sid) in train_set_ids]) > 0
    # normalize covarep and facet features, remove possible NaN values
    #    audio_max = np.max(np.max(np.abs(train_set_audio), axis=0), axis=0)
    #    train_set_audio = train_set_audio / audio_max
    x_data = pandas.DataFrame(data=train_set_audio, columns=label_names)
    return label_names, train_set_audio, x_data, y_train_bin
Example #2
0
    # recall that data at each time step is a tuple (start_time, end_time, feature_vector), we only take the vector
    data = np.array([feature[2] for feature in data])
    n_rows = data.shape[0]
    dim = data.shape[1]
    if max_len >= n_rows:
        diff = max_len - n_rows
        padding = np.zeros((diff, dim))
        padded = np.concatenate((padding, data))
        return padded
    else:
        return data[-max_len:]


if __name__ == "__main__":
    # Download the data if not present
    mosi = Dataloader('http://sorena.multicomp.cs.cmu.edu/downloads/MOSI')
    embeddings = mosi.embeddings()
    facet = mosi.facet()
    covarep = mosi.covarep()
    sentiments = mosi.sentiments(
    )  # sentiment labels, real-valued. for this tutorial we'll binarize them
    train_ids = mosi.train()  # set of video ids in the training set
    valid_ids = mosi.valid()  # set of video ids in the valid set
    test_ids = mosi.test()  # set of video ids in the test set

    # Merge different features and do word level feature alignment (align according to timestamps of embeddings)
    bimodal = Dataset.merge(embeddings, facet)
    trimodal = Dataset.merge(bimodal, covarep)
    dataset = trimodal.align('embeddings')

    # sort through all the video ID, segment ID pairs