def load_data(f): input = open(labelsfile, "rb") label_names = input.read().split("\n")[:36] # print("\n".join(label_names)) mosi = Dataloader('http://sorena.multicomp.cs.cmu.edu/downloads/MOSI') covarep = mosi.covarep() sentiments = mosi.sentiments( ) # sentiment labels, real-valued. for this tutorial we'll binarize them train_ids = mosi.train() # set of video ids in the training set # sort through all the video ID, segment ID pairs train_set_ids = [] for vid in train_ids: for sid in covarep['covarep'][vid].keys(): train_set_ids.append((vid, sid)) train_set_audio = np.array([ norm(f, covarep['covarep'][vid][sid]) for (vid, sid) in train_set_ids if covarep['covarep'][vid][sid] ]) train_set_audio[train_set_audio != train_set_audio] = 0 # y_data = np.array([sentiments[vid][sid] for (vid, sid) in train_set_ids]) # y_train_mc = multiclass(np.array([sentiments[vid][sid] for (vid, sid) in train_set_ids])) y_train_bin = np.array( [sentiments[vid][sid] for (vid, sid) in train_set_ids]) > 0 # normalize covarep and facet features, remove possible NaN values # audio_max = np.max(np.max(np.abs(train_set_audio), axis=0), axis=0) # train_set_audio = train_set_audio / audio_max x_data = pandas.DataFrame(data=train_set_audio, columns=label_names) return label_names, train_set_audio, x_data, y_train_bin
# recall that data at each time step is a tuple (start_time, end_time, feature_vector), we only take the vector data = np.array([feature[2] for feature in data]) n_rows = data.shape[0] dim = data.shape[1] if max_len >= n_rows: diff = max_len - n_rows padding = np.zeros((diff, dim)) padded = np.concatenate((padding, data)) return padded else: return data[-max_len:] if __name__ == "__main__": # Download the data if not present mosi = Dataloader('http://sorena.multicomp.cs.cmu.edu/downloads/MOSI') embeddings = mosi.embeddings() facet = mosi.facet() covarep = mosi.covarep() sentiments = mosi.sentiments( ) # sentiment labels, real-valued. for this tutorial we'll binarize them train_ids = mosi.train() # set of video ids in the training set valid_ids = mosi.valid() # set of video ids in the valid set test_ids = mosi.test() # set of video ids in the test set # Merge different features and do word level feature alignment (align according to timestamps of embeddings) bimodal = Dataset.merge(embeddings, facet) trimodal = Dataset.merge(bimodal, covarep) dataset = trimodal.align('embeddings') # sort through all the video ID, segment ID pairs