예제 #1
0
    def getSyllebleFromPhones(self, phones, fileNm):
        syllabelList = []
        global utterence_not_found
        if (len(phones) == 0):
            print "Some issues with file, blank phone ", fileNm
            return syllabelList

        for utterence in self.utterences:
            #print "Trying to match phone:", phones, " with Utterence", utterence
            syllabelList = []
            dict_phone_index = 0
            txGrd_phone_index = 0
            start_time = phones[0].minTime
            end_time = 0.0
            syllableName = ''
            matchFail = False

            while (dict_phone_index < len(utterence)):
                dictPhone = utterence[dict_phone_index]

                #if syllable marker found, create a new Interval with start time, end time and combined mark
                if (dictPhone == '-'):
                    sylInterval = Interval(start_time, end_time, syllableName)
                    syllabelList.append(sylInterval)
                    syllableName = ''
                    dict_phone_index += 1
                    dictPhone = utterence[dict_phone_index]
                    start_time = txGrdPhone.maxTime

                if (txGrd_phone_index >= len(phones)):
                    print "Some issues with file ", fileNm, " around the place", phones
                else:
                    txGrdPhone = phones[txGrd_phone_index]

                if (txGrdPhone.mark.strip() == dictPhone.strip()):
                    end_time = txGrdPhone.maxTime
                    syllableName += dictPhone
                else:
                    matchFail = True
                    break

                dict_phone_index += 1
                txGrd_phone_index += 1

            if (not matchFail):
                end_time = phones[len(phones) - 1].maxTime
                sylInterval = Interval(start_time, end_time, syllableName)
                syllabelList.append(sylInterval)
                break  #matched utterence

        if (matchFail):
            print "Could not match word", self.word, " with phone ", phones, " with Utterence", utterence, fileNm
            utterence_not_found[self.word] = phones
            start_time = phones[0].minTime
            end_time = phones[len(phones) - 1].maxTime
            syllabelList = [(Interval(start_time, end_time,
                                      self.word + '_unMatched'))]

        return syllabelList
예제 #2
0
def convert_textgrid(tg_old, sil):
    tg_new = TextGrid(tg_old.start, tg_old.end)
    phontier = Tier('phones', tg_old['PHONEMES'].start, tg_old['PHONEMES'].end,
                    'Interval',
                    [Interval(x.start, x.end,
                              convert_mark(x.mark, sil))
                     for x in tg_old['PHONEMES']])
    wordtier = Tier('words', tg_old['WORDS'].start, tg_old['WORDS'].end,
                    'Interval',
                    [Interval(x.start, x.end,
                              convert_mark(x.mark, sil))
                     for x in tg_old['WORDS']])
    tg_new.append_tier(wordtier)
    tg_new.append_tier(phontier)
    return tg_new
예제 #3
0
def generatePhoneSyllableRelationship(gridobj, dictMap, fileNm):
    syllable = []
    global word_not_found
    wordTier = gridobj.getFirst('words')

    #Get all the words and their associated times
    for word in wordTier.intervals:
        if not word.mark == '':
            phoneList = getPhonesForWord(word, gridobj)
            word_text = word.mark.upper()
            if word_text in dictMap:
                wspObj = dictMap[word.mark.upper()]
                #Returns List of Syllables by mathing Time info from text grid with
                # Syllable info from Dictionary
                sylIntervals = wspObj.getSyllebleFromPhones(phoneList, fileNm)
                for interval in sylIntervals:
                    syllable.append(interval)
            else:
                word_not_found[word_text] = [
                    p.mark for p in phoneList
                ]  # the word is not found in syllable dict.
                syllable.append(
                    Interval(word.minTime, word.maxTime,
                             word_text + '_unknown'))

    return syllable
예제 #4
0
def get_intervals(symbols: List[str], total_duration_s: float,
                  n_digits: int) -> Generator[Interval, None, None]:
    symbols_count = len(symbols)
    for added_symbols_count, symbol in enumerate(symbols):
        min_time = added_symbols_count / symbols_count * total_duration_s
        max_time = (added_symbols_count + 1) / symbols_count * total_duration_s
        symbol_interval = Interval(round(min_time, n_digits),
                                   round(max_time, n_digits), symbol)
        yield symbol_interval
예제 #5
0
def read_tg_from_str(tg_str, round_digits=DEFAULT_TEXTGRID_PRECISION):
    """
    Read the tiers contained in the Praat-formatted string tg_str into a
    TextGrid object.
    Times are rounded to the specified precision.

    Adapted from TextGrid.read()
    """
    source = StringIO(tg_str)
    tg = TextGrid()

    file_type, short = parse_header(source)
    if file_type != "TextGrid":
        raise ValueError("The file could not be parsed as a TextGrid as it is "
                         "lacking a proper header.")
    tg.minTime = parse_line(source.readline(), short, round_digits)
    tg.maxTime = parse_line(source.readline(), short, round_digits)
    source.readline()  # More header junk
    if short:
        m = int(source.readline().strip())  # Will be tg.n
    else:
        m = int(source.readline().strip().split()[2])  # Will be tg.n
    if not short:
        source.readline()
    for i in range(m):  # Loop over grids
        if not short:
            source.readline()
        if parse_line(source.readline(), short,
                      round_digits) == "IntervalTier":
            inam = parse_line(source.readline(), short, round_digits)
            imin = parse_line(source.readline(), short, round_digits)
            imax = parse_line(source.readline(), short, round_digits)
            itie = IntervalTier(inam, imin, imax)
            itie.strict = tg.strict
            n = int(parse_line(source.readline(), short, round_digits))
            for j in range(n):
                if not short:
                    source.readline().rstrip().split()  # Header junk
                jmin = parse_line(source.readline(), short, round_digits)
                jmax = parse_line(source.readline(), short, round_digits)
                jmrk = get_mark(source, short)
                if jmin < jmax:  # Non-null
                    itie.addInterval(Interval(jmin, jmax, jmrk))
            tg.append(itie)
        else:  # PointTier
            inam = parse_line(source.readline(), short, round_digits)
            imin = parse_line(source.readline(), short, round_digits)
            imax = parse_line(source.readline(), short, round_digits)
            itie = PointTier(inam)
            n = int(parse_line(source.readline(), short, round_digits))
            for j in range(n):
                source.readline().rstrip()  # Header junk
                jtim = parse_line(source.readline(), short, round_digits)
                jmrk = get_mark(source, short)
                itie.addPoint(Point(jtim, jmrk))
            tg.append(itie)
    return tg
예제 #6
0
def merge_and_mark_tiers(tg_file="", output_file="", tiers=()):
    """
    Creates a new TextGrid file with an added IntervalTier.
    """
    tg = textgrid.TextGrid()
    tg.read(f=tg_file)

    for t1_name, t2_name in combinations(tiers, 2):
        validate_overlapping_tiers(tg.getFirst(t1_name), tg.getFirst(t2_name))

    merged_tier = IntervalTier(
        name="Merged",
        minTime=min(map(lambda x: tg.getFirst(x).minTime, tiers)),
        maxTime=min(map(lambda x: tg.getFirst(x).maxTime, tiers)))

    marked_tier = IntervalTier(
        name="Marked",
        minTime=min(map(lambda x: tg.getFirst(x).minTime, tiers)),
        maxTime=min(map(lambda x: tg.getFirst(x).maxTime, tiers)))

    for tier_name, interval in filter(
            lambda x: x[1].mark,
            chain.from_iterable(
                map(lambda x: zip(repeat(x.name), iter(x)),
                    map(lambda t: tg.getFirst(t), tiers)))):
        marked_tier.addInterval(
            Interval(minTime=interval.minTime,
                     maxTime=interval.maxTime,
                     mark=tier_name))

        merged_tier.addInterval(
            Interval(minTime=interval.minTime,
                     maxTime=interval.maxTime,
                     mark=interval.mark))

    tg.tiers.insert(1, marked_tier)
    tg.tiers.insert(2, merged_tier)

    with open(output_file, "w") as f:
        tg.write(f)
예제 #7
0
def get_intervals(sig, fs, winlen=0.05, threshold=0.005, minlen=0.01):
    """Return a list of intervals (in sec) and an 'X' mark if they contain
    energy, otherwise a '#' mark

    Arguments:
    :param sig: s
    :param fs:
    :param winlen: windowlength for the rms in seconds
    :param threshold: energy cutoff
    :param minlen: minimum length of an interval in seconds
    """
    transients = detect_transients(sig, fs, winlen, threshold, minlen)
    intervals = []
    prev_end = 0.0
    if transients.shape[0] == 0:
        intervals.append(Interval(0.0, len(sig) / fs, '#'))
    else:
        for start, end in transients:
            intervals.append(Interval(prev_end, start / fs, '#'))
            intervals.append(Interval(start / fs, end / fs, 'X'))
            prev_end = end / fs
        intervals.append(Interval(prev_end, len(sig) / fs, '#'))
    return intervals
except OSError:
    pass

for tgfile in sorted(list(data.rglob(indir, '*.TextGrid'))):
    basename = path.splitext(path.basename(tgfile))[0]
    print basename
    tg = TextGrid.read(tgfile)
    try:
        tier = tg['CALLS']
    except KeyError as e:
        print 'KeyError in', basename
        raise e
    intervals = [i for i in tg['CALLS'] if not i.mark in ['#', 'X']]
    start, end = tg.start, tg.end

    prev_end = start
    new_intervals = []
    for interval in intervals:
        new_intervals.append(Interval(prev_end, interval.start, ''))
        new_intervals.append(interval)
        prev_end = interval.end
    new_intervals.append(Interval(prev_end, end, ''))

    new_tier = Tier('', tg['CALLS'].start, tg['CALLS'].end,
                    'Interval', intervals)
    new_tg = TextGrid(start, end, [new_tier])

    outfile = path.join(outdir, basename + '.TextGrid')
    with open(outfile, 'w') as fid:
        new_tg.write(fid, fmt='long')
def clean_interval(old_interval):
    new_interval = Interval(minTime=old_interval.minTime,
                            maxTime=old_interval.maxTime,
                            mark=clean_mark(old_interval.mark))
    return new_interval