Example #1
0
    def __init__(self, recipe, destination=None):
        self.computational_sequences = {}

        if type(recipe) is str:
            if os.path.isdir(recipe) is False:
                log.error("Dataset folder does not exist ...", error=True)

            from os import listdir
            from os.path import isfile, join
            computational_sequence_list = [
                f for f in listdir(recipe)
                if isfile(join(recipe, f)) and f[-4:] == '.csd'
            ]
            for computational_sequence_fname in computational_sequence_list:
                this_sequence = computational_sequence(
                    join(recipe, computational_sequence_fname))
                self.computational_sequences[
                    this_sequence.metadata["root name"]] = this_sequence

        if type(recipe) is dict:
            for entry, address in recipe.items():
                self.computational_sequences[entry] = computational_sequence(
                    address, destination)

        if len(self.computational_sequences.keys()) == 0:
            log.error("Dataset failed to initialize ...", error=True)

        log.success("Dataset initialized successfully ... ")
Example #2
0
    def __set_computational_sequences(self,
                                      new_computational_sequences_data,
                                      metadata_copy=True):

        #getting the old metadata from the sequence before replacing it. Even if this is a new computational sequence this will not cause an issue since old_metadat will just be empty
        old_metadata = {
            m: self.computational_sequences[m].metadata
            for m in list(self.computational_sequences.keys())
        }
        self.computational_sequences = {}
        for sequence_name in list(new_computational_sequences_data.keys()):
            self.computational_sequences[
                sequence_name] = computational_sequence(sequence_name)
            self.computational_sequences[sequence_name].setData(
                new_computational_sequences_data[sequence_name], sequence_name)
            if metadata_copy:
                #if there is no metadata for this computational sequences from the previous one or no previous computational sequenece
                if sequence_name not in list(old_metadata.keys()):
                    log.error(
                        "Metadata not available to copy ..., please provide metadata before writing to disk later",
                        error=False)
                self.computational_sequences[sequence_name].setMetadata(
                    old_metadata[sequence_name], sequence_name)
            self.computational_sequences[
                sequence_name].rootName = sequence_name
Example #3
0
 def add_computational_sequences(self, recipe, destination):
     for entry, address in recipe.items():
         if entry in self.computational_sequences:
             log.error(
                 "Dataset already contains <%s> computational sequence ..."
                 % entry)
         self.computational_sequences[entry] = computational_sequence(
             address, destination)
Example #4
0
	def __init__(self,recipe,destination=None):
		
		self.computational_sequences={}	

		if type(recipe) is not dict:
			log.error("Dataset recipe must be a dictionary type object ...")
		
		for entry, address in recipe.iteritems():
			self.computational_sequences[entry]=computational_sequence(address,destination)
Example #5
0
 def __set_computational_sequences(self, new_computational_sequences_data):
     self.computational_sequences = {}
     for sequence_name in list(new_computational_sequences_data.keys()):
         self.computational_sequences[
             sequence_name] = computational_sequence(sequence_name)
         self.computational_sequences[sequence_name].setData(
             new_computational_sequences_data[sequence_name], sequence_name)
         self.computational_sequences[
             sequence_name].rootName = sequence_name
Example #6
0
    def readFromCSD(self):
        labelCompSeq = computational_sequence(
            self.root + 'CMU_MOSI_Opinion_Labels.csd').data
        facetCompSeq = computational_sequence(
            self.root + 'CMU_MOSI_VisualFacet_4.1.csd').data
        wordCompSeq = computational_sequence(
            self.root + 'CMU_MOSI_ModifiedTimestampedWords.csd').data
        covarepCompSeq = computational_sequence(self.root +
                                                'CMU_MOSI_COVAREP.csd').data

        self.vidList = []
        self.sidList = []
        self.labelList = []
        self.facetList = []
        self.facetInterval = []
        self.covarepList = []
        self.covarepInterval = []
        self.wordList = []
        self.wordInterval = []

        wordids_List = []
        for i, vid in enumerate(labelCompSeq):
            if gc.debug:
                if i > 0:
                    break
            if i == 88 or i == 66:
                continue
            print("processing video %d, uid %s" % (i, vid))
            labels = labelCompSeq[vid]['features']
            sen_intervals = labelCompSeq[vid]['intervals']
            facet = facetCompSeq[vid]['features']
            facet_intervals = facetCompSeq[vid]['intervals']
            covarep = covarepCompSeq[vid]['features']
            covarep_intervals = covarepCompSeq[vid]['intervals']
            words = wordCompSeq[vid]['features']
            word_intervals = wordCompSeq[vid]['intervals']

            # add basic infomation
            sen_num = 0
            while sen_num < len(labels):
                self.labelList.append(labels[sen_num])
                self.vidList.append(vid)
                self.sidList.append(sen_num)
                sen_num += 1

            # add word ids
            start, end = 0, 0
            sen_num = 0
            while sen_num < len(labels):
                while start < len(words) and mid(
                        word_intervals[start]) < sen_intervals[sen_num][0]:
                    start += 1
                end = start
                while end < len(words) and mid(
                        word_intervals[end]) < sen_intervals[sen_num][1]:
                    end += 1
                toAppend = [
                    word2id[word_feat[0].decode('utf-8')]
                    for word_feat in words[start:end]
                ]
                wordids_List.append(toAppend)
                self.wordInterval.append(word_intervals[start:end])
                if len(toAppend) > 50:
                    print(len(toAppend))
                start = end
                sen_num += 1

            # add facets
            start, end = 0, 0
            sen_num = 0
            while sen_num < len(labels):
                while start < len(facet) and mid(
                        facet_intervals[start]) < sen_intervals[sen_num][0]:
                    start += 1
                end = start
                while end < len(facet) and mid(
                        facet_intervals[end]) < sen_intervals[sen_num][1]:
                    end += 1
                self.facetList.append(facet[start:end])
                self.facetInterval.append(facet_intervals[start:end])
                start = end
                sen_num += 1

            # add covarep
            start, end = 0, 0
            sen_num = 0
            while sen_num < len(labels):
                while start < len(covarep) and mid(
                        covarep_intervals[start]) < sen_intervals[sen_num][0]:
                    start += 1
                end = start
                while end < len(covarep) and mid(
                        covarep_intervals[end]) < sen_intervals[sen_num][1]:
                    end += 1
                self.covarepList.append(covarep[start:end])
                self.covarepInterval.append(covarep_intervals[start:end])
                start = end
                sen_num += 1

        emb_mat = np.random.randn(len(word2id), 300)
        if not gc.debug:
            f = open(gc.embed_path, 'r')
            found = 0
            for line in f:
                content = line.strip().split()
                word = ' '.join(content[:-300])
                if word in word2id:
                    word_id = word2id[word]
                    vector = np.asarray(
                        list(map(lambda x: float(x), content[-300:])))
                    emb_mat[word_id, :] = vector
                    found += 1
            print(f"Found {found} words in the embedding file.")
        for list_id, word_ids in enumerate(wordids_List):
            toAppend = []
            for word_id in word_ids:
                toAppend.append(emb_mat[word_id])
            self.wordList.append(toAppend)
        compseq[vid_key]["intervals"] = numpy.arange(
            start=0, stop=60 + 0.000001,
            step=60. / ((2 * num_entries) - 1)).reshape([num_entries, 2])


if __name__ == "__main__":
    vid_keys = [
        "video1", "video2", "video3", "video4", "video5", "Hello", "World",
        "UG3sfZKtCQI"
    ]

    #let's assume compseq_1 is some modality with a random feature dimension
    compseq_1_data = {}
    compseq_1_feature_dim = numpy.random.randint(low=20, high=100, size=1)
    random_init(compseq_1_data, compseq_1_feature_dim)
    compseq_1 = mmdatasdk.computational_sequence("my_compseq_1")
    compseq_1.setData(compseq_1_data, "my_compseq_1")
    #let's assume compseq_1 is some other  modality with a random feature dimension
    compseq_2_data = {}
    compseq_2_feature_dim = numpy.random.randint(low=20, high=100, size=1)
    random_init(compseq_2_data, compseq_2_feature_dim)
    compseq_2 = mmdatasdk.computational_sequence("my_compseq_2")
    compseq_2.setData(compseq_2_data, "my_compseq_2")

    #NOTE: if you don't want to manually input the metdata, set it by creating a metdata key-value dictionary based on mmsdk/mmdatasdk/configurations/metadataconfigs.py
    compseq_1.deploy("compseq_1.csd")
    compseq_2.deploy("compseq_2.csd")

    #now creating a toy dataset from the toy compseqs
    mydataset_recipe = {
        "compseq_1": "compseq_1.csd",
Example #8
0
    def readFromCSD(self):
        labelCompSeq = computational_sequence(
            self.root + 'CMU_MOSI_Opinion_Labels.csd').data
        facetCompSeq = computational_sequence(
            self.root + 'CMU_MOSI_VisualFacet_4.1.csd').data
        wordCompSeq = computational_sequence(
            self.root + 'CMU_MOSI_TimestampedWordVectors.csd').data
        covarepCompSeq = computational_sequence(self.root +
                                                'CMU_MOSI_COVAREP.csd').data

        self.vidList = []
        self.sidList = []
        self.labelList = []
        self.facetList = []
        self.facetInterval = []
        self.covarepList = []
        self.covarepInterval = []
        self.wordList = []
        self.wordInterval = []
        for i, vid in enumerate(labelCompSeq):
            if gc.debug:
                if i > 5:
                    break
            if i == 88 or i == 66:
                continue
            print "processing video %d, uid %s" % (i, vid)
            labels = labelCompSeq[vid]['features']
            sen_intervals = labelCompSeq[vid]['intervals']
            facet = facetCompSeq[vid]['features']
            facet_intervals = facetCompSeq[vid]['intervals']
            covarep = covarepCompSeq[vid]['features']
            covarep_intervals = covarepCompSeq[vid]['intervals']
            words = wordCompSeq[vid]['features']
            word_intervals = wordCompSeq[vid]['intervals']

            #add basic infomation
            sen_num = 0
            while sen_num < len(labels):
                self.labelList.append(labels[sen_num])
                self.vidList.append(vid)
                self.sidList.append(sen_num)
                sen_num += 1

            #add word vectors
            start, end = 0, 0
            sen_num = 0
            while sen_num < len(labels):
                while start < len(words) and mid(
                        word_intervals[start]) < sen_intervals[sen_num][0]:
                    start += 1
                end = start
                while end < len(words) and mid(
                        word_intervals[end]) < sen_intervals[sen_num][1]:
                    end += 1
                toAppend = []
                toAppendInterval = []
                for k in range(start, end):
                    toAppend.append(words[k])
                    toAppendInterval.append(word_intervals[k])
                self.wordList.append(toAppend)
                self.wordInterval.append(toAppendInterval)
                if len(toAppend) > 50:
                    print len(toAppend)
                start = end
                sen_num += 1

            #add facets
            start, end = 0, 0
            sen_num = 0
            while sen_num < len(labels):
                while start < len(facet) and mid(
                        facet_intervals[start]) < sen_intervals[sen_num][0]:
                    start += 1
                end = start
                while end < len(facet) and mid(
                        facet_intervals[end]) < sen_intervals[sen_num][1]:
                    end += 1
                self.facetList.append(facet[start:end])
                self.facetInterval.append(facet_intervals[start:end])
                start = end
                sen_num += 1

            #add covarep
            start, end = 0, 0
            sen_num = 0
            while sen_num < len(labels):
                while start < len(covarep) and mid(
                        covarep_intervals[start]) < sen_intervals[sen_num][0]:
                    start += 1
                end = start
                while end < len(covarep) and mid(
                        covarep_intervals[end]) < sen_intervals[sen_num][1]:
                    end += 1
                self.covarepList.append(covarep[start:end])
                self.covarepInterval.append(covarep_intervals[start:end])
                start = end
                sen_num += 1
Example #9
0
    def readFromCSD(self):
        phoCompSeq = computational_sequence(self.root+'CMU_MOSI_TimestampedPhones.csd').data
        labelCompSeq = computational_sequence(self.root+'CMU_MOSI_Opinion_Labels.csd').data
        facetCompSeq = computational_sequence(self.root+'CMU_MOSI_VisualFacet_4.1.csd').data
        openfaceCompSeq = computational_sequence(self.root+'CMU_MOSI_OpenFace_V1.csd').data
        smileCompSeq = computational_sequence(self.root+'CMU_MOSI_OpenSmile.csd').data
        wordCompSeq = computational_sequence(self.root+'CMU_MOSI_TimestampedWordVectors.csd').data
        rawCompSeq = computational_sequence(self.root+'CMU_MOSI_TimestampedWords.csd').data
        self.vidList = []
        self.sidList = []
        self.labelList = []
        self.phoList = []
        self.phoInterval = []
        self.facetList = []
        self.facetInterval = []
        self.openfaceList = []
        self.openfaceInterval = []
        self.smileList = []
        self.wordList = []
        self.wordInterval = []
        self.rawList = []
        fout = open("raw_data.txt", 'w')
        fout2 = open("raw_data_no_sp.txt", 'w')
        for i, vid in enumerate(labelCompSeq):
            if gc.debug:
                if i > 2:
                    break
            if vid == "c5xsKMxpXnc":
                continue
            print("processing video %d, uid %s" % (i, vid))
            labels = labelCompSeq[vid]['features']
            sen_intervals = labelCompSeq[vid]['intervals']
            phos = phoCompSeq[vid]['features']
            pho_intervals = phoCompSeq[vid]['intervals']
            facet = facetCompSeq[vid]['features']
            facet_intervals = facetCompSeq[vid]['intervals']
            openface = openfaceCompSeq[vid]['features']
            openface_intervals = openfaceCompSeq[vid]['intervals']
            smile = smileCompSeq[vid]['features']
            words = wordCompSeq[vid]['features']
            word_intervals = wordCompSeq[vid]['intervals']
            raws = rawCompSeq[vid]['features']

            timescale = sen_intervals[-1][1]

            #add phomemes and basic infomation
            start, end = 0, 0
            sen_num = 0
            while sen_num < len(labels):
                while start < len(phos) and mid(pho_intervals[start]) < sen_intervals[sen_num][0]:
                    start += 1
                end = start
                while end < len(phos) and mid(pho_intervals[end]) < sen_intervals[sen_num][1]:
                    end += 1
                self.phoList.append([e[0] for e in phos[start:end]])
                self.phoInterval.append(pho_intervals[start:end])
                self.labelList.append(labels[sen_num])
                #add smiles here, for the intervals of opensmile is the same as the labels
                self.smileList.append(smile[sen_num])
                self.vidList.append(vid)
                self.sidList.append(sen_num)
                start = end
                sen_num += 1

            #add word vectors
            start, end = 0, 0
            sen_num = 0
            while sen_num < len(labels):
                while start < len(words) and mid(word_intervals[start]) < sen_intervals[sen_num][0]:
                    start += 1
                end = start
                while end < len(words) and mid(word_intervals[end]) < sen_intervals[sen_num][1]:
                    end += 1
                toAppend = []
                toAppendInterval = []
                for k in range(start, end):
                    if raws[k] != 'sp' or not gc.no_sp:
                        toAppend.append(words[k])
                        toAppendInterval.append(word_intervals[k])
                self.wordList.append(toAppend)
                self.wordInterval.append(toAppendInterval)
                if len(toAppend) > 50:
                    print(len(toAppend))

                fout.write("%s: %s %f\n" % (vid, ''.join([x[0] + ' ' for x in raws[start:end]]), labels[sen_num]))
                fout2.write("%s: %s %f\n" % (vid, ''.join([x[0] + ' ' if x[0] != "sp" else '' for x in raws[start:end]]), labels[sen_num]))
                start = end
                sen_num += 1

            #add facets
            start, end = 0, 0
            sen_num = 0
            while sen_num < len(labels):
                while start < len(facet) and mid(facet_intervals[start]) < sen_intervals[sen_num][0]:
                    start += 1
                end = start
                while end < len(facet) and mid(facet_intervals[end]) < sen_intervals[sen_num][1]:
                    end += 1
                self.facetList.append(facet[start:end])
                self.facetInterval.append(facet_intervals[start:end])
                start = end
                sen_num += 1

            #add openface
            start, end = 0, 0
            sen_num = 0
            while sen_num < len(labels):
                while start < len(openface) and mid(openface_intervals[start]) < sen_intervals[sen_num][0]:
                    start += 1
                end = start
                while end < len(openface) and mid(openface_intervals[end]) < sen_intervals[sen_num][1]:
                    end += 1
                self.openfaceList.append(openface[start:end])
                self.openfaceInterval.append(openface_intervals[start:end])
                start = end
                sen_num += 1