def process_data(folders=["cmumosei_highlevel", "cmumosei_labels"]): log.status( "You can also download all the outputs of this code from here: http://immortal.multicomp.cs.cmu.edu/ACL20Challenge/" ) cmumosei_dataset = {} for folder in folders: cmumosei_dataset[folder.split("_")[1]] = mmdatasdk.mmdataset(folder) #performs word alignment. Labels are not part of the word alignment process. # cmumosei_dataset["highlevel"].align("glove_vectors") #replacing missing modality information for words - some words may experience failed COVAREP, etc. # cmumosei_dataset["highlevel"].impute('glove_vectors') #this writes the word aligned computational sequences to the disk deploy(cmumosei_dataset["highlevel"], "word_aligned_highlevel") #if you want to load the word aligned from the disk, comment out the lines for align and impute, and uncomment the line below. #----I am here ------- cmumosei_dataset["highlevel"] = mmdatasdk.mmdataset( "word_aligned_highlevel") #now aligning to the labels - first adding labels to the dataset cmumosei_dataset["highlevel"].computational_sequences[ "All Labels"] = cmumosei_dataset["labels"]["All Labels"] #the actual alignment without collapse function this time cmumosei_dataset["highlevel"].align("All Labels") #removing sentences which have missing modality information cmumosei_dataset["highlevel"].hard_unify() #writing the final aligned to disk deploy(cmumosei_dataset["highlevel"], "final_aligned") #reading from the disk - if the above process is done. #cmumosei_dataset["highlevel"]=mmdatasdk.mmdataset("final_aligned") #getting the final tensors for machine learning - pass the folds to this function to get data based on tr,va,te folds. tensors = cmumosei_dataset["highlevel"].get_tensors( seq_len=50, non_sequences=["All Labels"], direction=False, folds=[ mmdatasdk.cmu_mosei.standard_folds.standard_train_fold, mmdatasdk.cmu_mosei.standard_folds.standard_valid_fold, mmdatasdk.cmu_mosei.standard_folds.standard_test_fold ]) fold_names = ["train", "valid", "test"] for i in range(3): #output the shape of the tensors for csd in list(cmumosei_dataset["highlevel"].keys()): print("Shape of the %s computational sequence for %s fold is %s" % (csd, fold_names[i], tensors[i][csd].shape))
def main(): parser = argparse.ArgumentParser() parser.add_argument("--textField",type=str,default='CMU_MOSI_ModifiedTimestampedWords') parser.add_argument("--visualField",type=str,default='CMU_MOSI_Visual_Facet_41') parser.add_argument("--speechField",type=str,default='CMU_MOSI_COVAREP') parser.add_argument("--labelField",type=str,default='CMU_MOSI_Opinion_Labels') parser.add_argument("--datasetName",type=str,required=True) args = parser.parse_args() TRAINSPLIT, VALSPLIT, TESTSPLIT = download_dataset(args.datasetName) features = [ args.textField, args.visualField, args.speechField ] recipe = {feat: os.path.join(DATA_PATH, feat) + '.csd' for feat in features} dataset = md.mmdataset(recipe) dataset.align(args.textField, collapse_functions=[avg]) labelRecipe = {args.labelField: os.path.join(DATA_PATH, args.labelField + '.csd')} dataset.add_computational_sequences(labelRecipe, destination=None) dataset.align(args.labelField) features.append(args.labelField) train, val, test = prepare_save(features, dataset, TRAINSPLIT, VALSPLIT, TESTSPLIT) save(train, val, test, args.datasetName)
def download(params): print('download {} dataset begins!'.format(params.dataset_name)) dataset_dic = { "cmumosei": mmdatasdk.cmu_mosei.highlevel, "cmumosi": mmdatasdk.cmu_mosi.highlevel, "pom": mmdatasdk.pom.highlevel } label_dic = { "cmumosei": mmdatasdk.cmu_mosei.labels, "cmumosi": mmdatasdk.cmu_mosi.labels, "pom": mmdatasdk.pom.labels } raw_dic = { "cmumosei": mmdatasdk.cmu_mosei.raw, "cmumosi": mmdatasdk.cmu_mosi.raw, "pom": mmdatasdk.pom.raw } dataset_dir = os.path.join(params.datasets_dir, params.dataset_name) dataset = mmdatasdk.mmdataset(dataset_dic[params.dataset_name], dataset_dir + '/') dataset.add_computational_sequences(label_dic[params.dataset_name], dataset_dir + '/') dataset.add_computational_sequences(raw_dic[params.dataset_name], dataset_dir + '/')
def load_mosi(): highlevel = mmdatasdk.mmdataset(mmdatasdk.cmu_mosi.highlevel, 'cmumosi/') highlevel.align('glove_vectors', collapse_functions=[myavg]) highlevel.add_computational_sequences(mmdatasdk.cmu_mosi.labels, 'cmumosi/') highlevel.align('Opinion Segment Labels') return highlevel
def load_modality(base_path, feature_cfg, modality): mfile = feature_cfg[modality] path = os.path.join(base_path, "{}.csd".format(mfile)) logger.info("Using {} for {} modality".format(path, modality)) data = md.mmdataset(path) return data
def load_dataset( base_path, dataset="mosi", feature_cfg=MOSI_COVAREP_FACET_GLOVE, modalities={"audio", "text", "visual"}, already_segmented=False, ): dataset = select_dataset(dataset) download_mmdata(base_path, dataset) recipe = { f: os.path.join(base_path, "{}.csd".format(f)) for k, f in feature_cfg.items() if k in list(modalities) + ["raw"] } data = md.mmdataset(recipe) patch_missing_metadata(data) all_words = get_vocabulary(data[feature_cfg["raw"]]) word2idx = create_word2idx(all_words) label_recipe = { feature_cfg["labels"]: os.path.join(base_path, "{}.csd".format(feature_cfg["labels"])) } data.add_computational_sequences(label_recipe, destination=None) patch_missing_metadata(data) if not already_segmented: data.align(feature_cfg["labels"]) data.hard_unify() return data, word2idx
def download_data(keys): print("You only need to download the data once!") cmumosei_challenge_acl20 = {} for key in keys: cmumosei_challenge_acl20[key] = mmdatasdk.mmdataset( ChallengeHML20.challenge20_data[key], 'cmumosei_%s/' % key) cmumosei_challenge_acl20 return cmumosei_challenge_acl20
def process_data(folders=["cmumosei_highlevel", "cmumosei_labels"]): log.status( "You only need to run this script once. CMU-MOSEI processing requires a combination of 300GB in RAM and swap combined. As optimized as the process is, it may take up to a day to finish." ) log.status( "Alternatively, you can send us your computational sequences to align for you if you don't have enough computational power for alignment." ) log.status( "The standard aligned features are availabel on the challenge github.") cmumosei_challenge_acl20 = {} for folder in folders: cmumosei_challenge_acl20[folder.split("_")[1]] = mmdatasdk.mmdataset( folder) #performs word alignment. Labels are not part of the word alignment process. cmumosei_challenge_acl20["highlevel"].align("glove_vectors") #replacing missing modality information for words - some words may experience failed COVAREP, etc. cmumosei_challenge_acl20["highlevel"].impute('glove_vectors') #this writes the word aligned computational sequences to the disk deploy(cmumosei_challenge_acl20["highlevel"], "word_aligned_highlevel") #if you want to load the word aligned from the disk, comment out the lines for align and impute, and uncomment the line below. #cmumosei_challenge_acl20["highlevel"]=mmdatasdk.mmdataset("word_aligned_highlevel") #now aligning to the labels - first adding labels to the dataset cmumosei_challenge_acl20["highlevel"].computational_sequences[ "Emotion Labels"] = cmumosei_challenge_acl20["labels"][ "Emotion Labels"] #the actual alignment without collapse function this time cmumosei_challenge_acl20["highlevel"].align("Emotion Labels") #removing sentences which have missing modality information cmumosei_challenge_acl20["highlevel"].hard_unify() #writing the final aligned to disk deploy(cmumosei_challenge_acl20["highlevel"], "final_aligned") #reading from the disk - if the above process is done. #cmumosei_challenge_acl20["highlevel"]=mmdatasdk.mmdataset("final_aligned") #getting the final tensors for machine learning - pass the folds to this function to get data based on tr,va,te folds. tensors = cmumosei_challenge_acl20["highlevel"].get_tensors( seq_len=50, non_sequences=["Emotion Labels"], direction=False, folds=[ mmdatasdk.cmu_mosei.standard_folds.standard_train_fold, mmdatasdk.cmu_mosei.standard_folds.standard_valid_fold, mmdatasdk.cmu_mosei.standard_folds.standard_test_fold ]) fold_names = ["train", "valid", "test"] for i in range(3): #output the shape of the tensors for csd in list(cmumosei_challenge_acl20["highlevel"].keys()): print("Shape of the %s computational sequence for %s fold is %s" % (csd, fold_names[i], tensors[i][csd].shape))
def load(visual_field, acoustic_field, text_field): features = [text_field, visual_field, acoustic_field] recipe = { feat: os.path.join(DATA_PATH, feat) + '.csd' for feat in features } dataset = md.mmdataset(recipe) return dataset
def _load_speech(): """Loads TIMIT speech data into memory.""" path = os.path.join(DATA_ROOT, 'timit', 'csd_format') files = {} files['train'] = os.path.join(path, 'train.csd') files['val'] = os.path.join(path, 'val.csd') return mmdatasdk.mmdataset(files)
def download_data(): source = { "raw": mmdatasdk.cmu_mosei.raw, "highlevel": mmdatasdk.cmu_mosei.highlevel, "labels": mmdatasdk.cmu_mosei.labels } cmumosei_dataset = {} for key in source: cmumosei_dataset[key] = mmdatasdk.mmdataset(source[key], 'cmumosei_%s/' % key) return cmumosei_dataset
def _load_raw(): """Loads all raw data into memory.""" path = os.path.join(DATA_ROOT, 'audioset_verified', 'csd_format', 'cut', '16000') files = {} for csd_file in os.listdir(path): # category = csd_file files[csd_file] = os.path.join(path, csd_file) return mmdatasdk.mmdataset(files)
def download(): # create folders for storing the data if not os.path.exists(DATA_PATH): check_call(' '.join(['mkdir', '-p', DATA_PATH]), shell=True) # download highlevel features, low-level (raw) data and labels for the dataset MOSI # if the files are already present, instead of downloading it you just load it yourself. # here we use CMU_MOSI dataset as example. DATASET = md.cmu_mosi try: md.mmdataset(DATASET.highlevel, DATA_PATH) except RuntimeError: print("High-level features have been downloaded previously.") try: md.mmdataset(DATASET.raw, DATA_PATH) except RuntimeError: print("Raw data have been downloaded previously.") try: md.mmdataset(DATASET.labels, DATA_PATH) except RuntimeError: print("Labels have been downloaded previously.") return DATASET
def download_data(DATASET): # create folders for storing the data if not os.path.exists(DATA_PATH): #./data/ check_call(' '.join(['mkdir', '-p', DATA_PATH]), shell=True) # download highlevel features, low-level (raw) data and labels for the dataset MOSI # if the files are already present, instead of downloading it you just load it yourself. # here we use CMU_MOSI dataset as example. try: md.mmdataset(DATASET.highlevel, DATA_PATH) except RuntimeError: print("High-level features have been downloaded previously.") try: md.mmdataset(DATASET.raw, DATA_PATH) except RuntimeError: print("Raw data have been downloaded previously.") try: md.mmdataset(DATASET.labels, DATA_PATH) except RuntimeError: print("Labels have been downloaded previously.") #Inspecting the download dataset # list the directory contents... let's see what features there are data_files = os.listdir(DATA_PATH) print('\n'.join(data_files))
def load_and_align( base_path, dataset="mosi", feature_cfg=MOSI_COVAREP_FACET_GLOVE, modalities={"audio", "visual", "text"}, collapse=None, ): dataset = select_dataset(dataset) download_mmdata(base_path, dataset) recipe = { f: os.path.join(base_path, "{}.csd".format(f)) for k, f in feature_cfg.items() if k in list(modalities) + ["raw"] } data = md.mmdataset(recipe) patch_missing_metadata(data) if collapse is None: collapse = [avg_collapse] # first we align to words with averaging # collapse_function receives a list of functions word_align_path = base_path + "_word_aligned" safe_mkdirs(word_align_path) data.align(feature_cfg["raw"], collapse_functions=collapse) data.impute(feature_cfg["raw"]) deploy(data, word_align_path) all_words = get_vocabulary(data[feature_cfg["raw"]]) word2idx = create_word2idx(all_words) label_recipe = { feature_cfg["labels"]: os.path.join(base_path, "{}.csd".format(feature_cfg["labels"])) } data.add_computational_sequences(label_recipe, destination=None) patch_missing_metadata(data) data.align(feature_cfg["labels"]) data.hard_unify() align_path = base_path + "_final_aligned" safe_mkdirs(align_path) deploy(data, align_path) return data, word2idx
def align(): #first time dl #socialiq_no_align=mmdatasdk.mmdataset(mmdatasdk.socialiq.highlevel,"socialiq") #second time dl socialiq_no_align=mmdatasdk.mmdataset("socialiq") #don't need these guys for aligning del socialiq_no_align.computational_sequences["SOCIAL-IQ_QA_BERT_LASTLAYER_BINARY_CHOICE"] del socialiq_no_align.computational_sequences["SOCIAL-IQ_QA_BERT_MULTIPLE_CHOICE"] del socialiq_no_align.computational_sequences["SOCIAL_IQ_VGG_1FPS"] socialiq_no_align.align('SOCIAL_IQ_TRANSCRIPT_RAW_CHUNKS_BERT',collapse_functions=[myavg]) #simple name change - now the dataset is aligned socialiq_aligned=socialiq_no_align socialiq_aligned.impute("SOCIAL_IQ_TRANSCRIPT_RAW_CHUNKS_BERT") socialiq_aligned.revert() deploy_files={x:x for x in socialiq_aligned.keys()} socialiq_aligned.deploy("./deployed",deploy_files)
def align(): socialiq_no_align = mmdatasdk.mmdataset(mmdatasdk.socialiq.highlevel, "socialiq") #don't need these guys del socialiq_no_align["QA_BERT_lastlayer_binarychoice"] del socialiq_no_align["SOCIAL-IQ_QA_BERT_MULTIPLE_CHOICE"] del socialiq_no_align["SOCIAL_IQ_VGG_1FPS"] socialiq_no_align.align('Transcript_Raw_Chunks_BERT', collapse_functions=[myavg]) #simple name change - now the dataset is aligned socialiq_aligned = socialiq_no_align socialiq_aligned.impute("Transcript_Raw_Chunks_BERT") socialiq_aligned.revert() deploy_files = {x: x for x in socialiq_aligned.keys()} socialiq_aligned.deploy("./deployed", deploy_files)
def download_mmdata(base_path, dataset): safe_mkdirs(base_path) try: md.mmdataset(dataset.highlevel, base_path) except RuntimeError: logger.info("High-level features have been downloaded previously.") try: md.mmdataset(dataset.raw, base_path) except RuntimeError: logger.info("Raw data have been downloaded previously.") try: md.mmdataset(dataset.labels, base_path) except RuntimeError: logger.info("Labels have been downloaded previously.")
def _load_noise(): """Loads AudioSet into memory.""" path = os.path.join(DATA_ROOT, 'audioset_verified', 'csd_format', 'cut', '16000') files = {} noise_classes = [ "shaver.csd", "vacuum_cleaner.csd", "chainsaw.csd", "baby_laughter.csd", "duck.csd", "bark.csd", "engine.csd", "water.csd", "wind.csd", ] for c in noise_classes: files[c] = os.path.join(path, c) return mmdatasdk.mmdataset(files)
def load_dataset(): visual_field = 'CMU_MOSI_Visual_Facet_41' acoustic_field = 'CMU_MOSI_COVAREP' text_field = 'CMU_MOSI_TimestampedWordVectors' label_field = 'CMU_MOSI_Opinion_Labels' features = [text_field, visual_field, acoustic_field] recipe = { feat: os.path.join( '/misc/kfdata01/kf_grp/hrwang/socialKG/CMU-MultimodalSDK/cmumosi', feat) + '.csd' for feat in features } label_recipe = { label_field: os.path.join( '/misc/kfdata01/kf_grp/hrwang/socialKG/CMU-MultimodalSDK/cmumosi', label_field + '.csd') } dataset = md.mmdataset(recipe) dataset.add_computational_sequences(label_recipe, destination=None) return dataset
def load_dataset(visual_field,acoustic_field,text_field): features = [ text_field, visual_field, acoustic_field ] recipe = {feat: os.path.join(DATA_PATH, feat) + '.csd' for feat in features} dataset = md.mmdataset(recipe) #Just look at its data print(list(dataset.keys())) print("=" * 80) print(list(dataset[visual_field].keys())[:10]) print(list(dataset[text_field].keys())[:10]) print("=" * 80) some_id = list(dataset[visual_field].keys())[15] print(list(dataset[visual_field][some_id].keys())) print("=" * 80) print('Interval dimention is 2 since each step has the start and end timestamp' ) print('Visual:',list(dataset[visual_field][some_id]['intervals'].shape)) print('Text:',list(dataset[text_field][some_id]['intervals'].shape)) print('Accoustic:',list(dataset[acoustic_field][some_id]['intervals'].shape)) print("=" * 80) print('ID:', some_id) print(list(dataset[visual_field][some_id]['features'].shape)) print(list(dataset[text_field][some_id]['features'].shape)) print(list(dataset[acoustic_field][some_id]['features'].shape)) print("Different modalities have different number of time steps!") return dataset
def download_dataset(datasetName): if SDK_PATH is None: print("SDK path is not specified! Please specify first in constants/paths.py") exit(0) else: sys.path.append(SDK_PATH) if not os.path.exists(DATA_PATH): check_call(' '.join(['mkdir', '-p', DATA_PATH]), shell=True) if datasetName == "cmu_mosi": DATASET = md.cmu_mosi elif datasetName == "cmu_mosei": DATASET = md.cmu_mosei print(DATASET.highlevel) try: md.mmdataset(DATASET.highlevel,DATA_PATH) except RuntimeError: print("High-level features have been donwloaded previously.") try: md.mmdataset(DATASET.raw,DATA_PATH) except RuntimeError: print("Raw data have been downloaded previously.") try: md.mmdataset(DATASET.labels, DATA_PATH) except RuntimeError: print("Labels have been downloaded previously.") TRAINSPLIT = DATASET.standard_folds.standard_train_fold VALSPLIT = DATASET.standard_folds.standard_valid_fold TESTSPLIT = DATASET.standard_folds.standard_test_fold return TRAINSPLIT, VALSPLIT, TESTSPLIT
import h5py from mmsdk import mmdatasdk import csv import os from mosi_fold import * #This file is to explore the CSD files and convert them in to train , valid and test folders num_files=[] #mydict={'Label':'./cmumosei/CMU_MOSEI_LabelsEmotions.csd'} mydict={'Label':'./cmumosi/CMU_MOSI_Opinion_Labels.csd'} mydataset=mmdatasdk.mmdataset(mydict) raw_vid_path='./Video/Full/' raw_vid_files=[] for filename in os.listdir(raw_vid_path): if filename.endswith(".mp4"): raw_vid_files.append(filename.split('.')[0]) print('number of raw full video files',len(raw_vid_files)) mydic=mydataset.computational_sequences['Label'].data
word_field = 'CMU_MOSEI_TimestampedWords' features = [ #text_field, visual_field, #acoustic_field ] #raw_features = [word_field] """## Use the SDK to load the computational sequences""" recipe = {feat: os.path.join(ALIGNED_DATA_PATH_HIGH_LEVEL, feat) + '.csd' for feat in features} print(recipe) dataset = md.mmdataset(recipe) """## Inspect the data""" print(list(dataset.keys())) print("=" * 80) print(list(dataset[visual_field].keys())[:10]) print("=" * 80) some_id = list(dataset[visual_field].keys())[15] print(list(dataset[visual_field][some_id].keys())) print("=" * 80) word_id = list(dataset[visual_field].keys())[15] print(list(dataset[visual_field][word_id].keys()))
data = { 'computation_sequence': { 'data': { 'id': { 'features': N * feature_length, 'intervals': N * feature_length } }, 'metadata': { 'Alignment compatible': True, ...... } } } ''' data = mmdatasdk.mmdataset(mmdatasdk.cmu_mosi.highlevel) data.add_computational_sequences(mmdatasdk.cmu_mosi.labels, None) print(data.computational_sequences.keys()) data.align('Opinion Segment Labels') def train_valid_test_judge(video_name): """ 判断该视频文件属于训练集、验证集or测试集 """ if video_name in standard_train_list: fold_choose = "train" elif video_name in standard_valid_list: fold_choose = "valid" else: fold_choose = "test"
#first aligns a dataset to the words vectors and collapses other modalities (by taking average of them for the duration of the word). After this operation every modality will have the same frequency (same as word vectors). Then the code aligns based on opinion labels (note that collapse does not happen for this step. import mmsdk from mmsdk import mmdatasdk import numpy #uncomment all the ==> lines together #A simple averaging technique. More advanced methods can be built based on intervals. def myavg(intervals, features): return numpy.average(features, axis=0) #Downloading the dataset cmumosi_highlevel = mmdatasdk.mmdataset(mmdatasdk.cmu_mosi.highlevel, 'cmumosi/') #some random video from cmumosi_highlevel #==>some_video=list(cmumosi_highlevel["glove_vectors"].data.keys())[0] #Aligning to the words to get word-level alignments cmumosi_highlevel.align('glove_vectors', collapse_functions=[myavg]) #get the intervals and features accompanying the 100th word in the some_video #==>some_video_100th_word=some_video+'[100]' #==>for compseq_name in list(cmumosi_highlevel.computational_sequences.keys()): #==> compseq=cmumosi_highlevel[compseq_name] #==> print (compseq_name) #==> print (numpy.array(compseq.data[some_video_100th_word]["intervals"]).shape,numpy.array(compseq.data[some_video_100th_word]["features"]).shape) #==> print ("-------")
import time #Loading the data of Social-IQ #Yellow warnings fro SDK are ok! if os.path.isdir("./deployed/") is False: print ("Need to run the modality alignment first") from alignment import align,myavg align() paths={} paths["QA_BERT_lastlayer_binarychoice"]="./socialiq/SOCIAL-IQ_QA_BERT_LASTLAYER_BINARY_CHOICE.csd" paths["DENSENET161_1FPS"]="./deployed/DENSENET161_1FPS.csd" paths["Transcript_Raw_Chunks_BERT"]="./deployed/Transcript_Raw_Chunks_BERT.csd" paths["Acoustic"]="./deployed/Acoustic.csd" social_iq=mmdatasdk.mmdataset(paths) social_iq.unify() def qai_to_tensor(in_put,keys,total_i=1): data=dict(in_put.data) features=[] for i in range (len(keys)): features.append(numpy.array(data[keys[i]]["features"])) input_tensor=numpy.array(features,dtype="float32")[:,0,...] in_shape=list(input_tensor.shape) q_tensor=input_tensor[:,:,:,0:1,:,:] ai_tensor=input_tensor[:,:,:,1:,:,:]
from mmsdk import mmdatasdk cmumosi_highlevel = mmdatasdk.mmdataset(mmdatasdk.cmu_mosi.highlevel)
print(error) try: os.mkdir(VAL_PATH) except OSError as error: print(error) try: os.mkdir(TEST_PATH) except OSError as error: print(error) # Downloading the dataset try: md.mmdataset(DATASET.highlevel, CSD_PATH) except RuntimeError: print("High-level features have been downloaded previously.") try: md.mmdataset(DATASET.raw, CSD_PATH) except RuntimeError: print("Raw data have been downloaded previously.") try: md.mmdataset(DATASET.labels, CSD_PATH) except RuntimeError: print("Labels have been downloaded previously.") # Loading the dataset # All fields are listed here:
vid_keys = [ "video1", "video2", "video3", "video4", "video5", "Hello", "World", "UG3sfZKtCQI" ] #let's assume compseq_1 is some modality with a random feature dimension compseq_1_data = {} compseq_1_feature_dim = numpy.random.randint(low=20, high=100, size=1) random_init(compseq_1_data, compseq_1_feature_dim) compseq_1 = mmdatasdk.computational_sequence("my_compseq_1") compseq_1.setData(compseq_1_data, "my_compseq_1") #let's assume compseq_1 is some other modality with a random feature dimension compseq_2_data = {} compseq_2_feature_dim = numpy.random.randint(low=20, high=100, size=1) random_init(compseq_2_data, compseq_2_feature_dim) compseq_2 = mmdatasdk.computational_sequence("my_compseq_2") compseq_2.setData(compseq_2_data, "my_compseq_2") #NOTE: if you don't want to manually input the metdata, set it by creating a metdata key-value dictionary based on mmsdk/mmdatasdk/configurations/metadataconfigs.py compseq_1.deploy("compseq_1.csd") compseq_2.deploy("compseq_2.csd") #now creating a toy dataset from the toy compseqs mydataset_recipe = { "compseq_1": "compseq_1.csd", "compseq_2": "compseq_2.csd" } mydataset = mmdatasdk.mmdataset(mydataset_recipe) #let's also see if we can align to compseq_1 mydataset.align("compseq_1")