def run(sentence, tagset, hmm_prob):
    max_iterations = 200
    step_size = 15

    n = len(sentence)

    u1 = defaultdict() # dual decomposition parameter
    u2 = defaultdict()
    for i in xrange(0, n):
        u1[i] = defaultdict()
        u2[i] = defaultdict()
        for t in tagset:
            u1[i][t] = 0
            u2[i][t] = 0
    
    k = 0 # number of iterations
    while k < max_iterations:

       tags1 = viterbi.run(sentence, tagset, hmm_prob, u1)
       tags2 = viterbi.run(sentence, tagset, hmm_prob, u2)
       
       if k == 0:
           print "initial tags:"
           print tags1, ":tagger1"
           print tags2, ":tagger2"
              
       if disagree(tags1, tags2): 
           return k, tags1, tags2  # converges in the kth iteration
       
       update(tags1, tags2, u1, u2, step_size)

       k += 1
    return -1, tags1, tags2 # does not converge
Exemple #2
0
def iterate_sentences_viterbi(outfile):
	"""iterates over all saved sentences and runs the cky parser
	for each one of them
    """
	i = 1
	for s in sentences:
		print "Sentence in line ", i
		if len(s) <= 15:
			chart = cky_parsing_most(s)
		 	viterbi.run(chart,0,len(s),start_node,s,0)
		viterbi.build_tree(s,outfile)
		i += 1
def run(sentence, tagset, hmm, k_best_list):
    max_iter = len(k_best_list)*200
    
    n = len(sentence)
    k = len(k_best_list)

    u = {} # dual decomposition parameter
    init_dd_param(u, n, tagset) 

    iteration = 1
    while iteration <= max_iter:
        #print iteration
        step_size = 1.0 / math.sqrt(iteration)
        #print "step size", step_size 
        
        seq1, score1, score2 = viterbi.run(sentence, tagset, hmm, u)
        y = compute_indicators(seq1, tagset)
        #print 0, ' '.join(seq1)

        seq2, fst_score = fst_search.run(k_best_list, u, tagset)
        z = compute_indicators(seq2, tagset)
        #print j+1, ' '.join(seq)
       
        # check for agreement
        if seq1 != seq2:
            update(y, z, u, step_size)
        else:
            return seq1, iteration

        iteration += 1
    return seq1, -1 
def run(sentence, tagset, preterms, start, hmm_prob, pcfg, pcfg_u):
    max_iterations = 20
    step_size = 1.0

    n = len(sentence)

    u = defaultdict() # dual decomposition parameter
    init_dd_param(u, n, tagset)

    k = 0 # number of iterations
    while k < max_iterations:
          
        tags = viterbi.run(sentence, tagset, hmm_prob, u)

        parse = fast_cky.run(sentence, preterms, start, pcfg, pcfg_u, u)
        parse_list = utils.make_parse_list(parse)
        terms, parse_tags = utils.get_terminals_tags(parse_list)

        print k
        print tags, "tagger"
        print parse_tags, "parser"

        if agree(parse_tags, tags):
            return k, tags, parse  # converges in the kth iteration
        
        y = compute_indicators(tags, tagset)
        z = compute_indicators(parse_tags, tagset)
        k += 1
        step_size = 1.0/k
        update(y, z, u, step_size)

    return -1, tags, parse # does not converge
def run(sentence, tagset, hmm_prob):
    max_iterations = 200
    #step_size = 100

    n = len(sentence)

    u = {}#defaultdict() # dual decomposition parameter
    init_dd_param(u, n, tagset)
 
    k = 1 # number of iterations
    while k <= max_iterations:
       step_size = 1.0 / math.sqrt(k)
       #print "\niteration:", k
       #print "-------------------------------"
       #print "step size = ", "{0:.2f}".format(step_size)
       tags1, aug_hmm_score, hmm_score = viterbi.run(sentence, tagset, hmm_prob, u)
       #print "vit output:", ' '.join(tags1)
       if k == 1:
          best_tags = tags1
       tags2, fst_score = fst_search.run(best_tags, u, tagset)

       if agree(tags1, tags2): 
           #sys.stderr.write("hmm only = "+ str( hmm_score) + "\n")
           #sys.stderr.write("fst only = "+ str(fst_score) + "\n")
           #sys.stderr.write("big hmm  = "+ str(aug_hmm_score) + "\n")
           #sys.stderr.write("hmm fst  = "+ str(aug_hmm_score + fst_score) + "\n")
           return best_tags, k, tags1, tags2  # converges in the kth iteration
       y = compute_indicators(tags1, tagset)
       z = compute_indicators(tags2, tagset)
       update(y, z, u, step_size)

       k += 1
    return best_tags, -1, tags1, tags2 # does not converge
def run(sentence, tagset, hmm, k_best_list):
    max_iter = len(k_best_list)*200
    
    n = len(sentence)
    k = len(k_best_list)

    u = [] # dd parameter list
    for j in range(k+1):
        u_j = {}
        init_dd_param(u_j, n, tagset)
        u.append(u_j)
    w = {}
    init_dd_param(w, n, tagset)
    ku = {}
    init_dd_param(ku, n, tagset)
 
    iteration = 1
    while iteration <= max_iter:
        #print iteration
        step_size = 21.0 / math.sqrt(iteration)
        #print "step size", step_size 
        
        seqs = []
        indicators = []
        for i in u[0].iterkeys():
            for t in u[0][i].iterkeys():
                ku[i][t] = -1 * u[0][i][t]
        seq1, score1, score2 = viterbi.run(sentence, tagset, hmm, ku)
        seqs.append(seq1)
        indicators.append(compute_indicators(seq1, tagset))
        #print 0, ' '.join(seq1)


        for j in range(k):
            seq, fst_score = bigram_fst_search.run(k_best_list[j], u[j+1], tagset)
            #print j+1, ' '.join(seq)
            seqs.append(seq)
            indicators.append(compute_indicators(seq, tagset))
       
        # check for agreement
        agree = True
        for seq in seqs[1:]:
            if seq != seq1:
                agree = False
                break
       
        if agree == False:
            update(indicators, u, w, step_size)
        else:
            return seq1, iteration

        iteration += 1
    return seq1, -1 
def execute(treebank, dev):
    print "reading treebank..."
    parses = utils.read_parses_no_indent(treebank)
    parse_lists = []
    for parse in parses:
        parse_lists.append(utils.make_parse_list(parse))
      
    print "learning pcfg..."  
    nonterms, terms, start, prob = grammar.learn(parse_lists)
    
    print "learning hmm..."
    emission, transition = sequnece_labeler.learn(parse_lists)

    print "reading dev data..."
    dev_sentences = utils.get_sentences(dev)
    print dev_sentences[100] 
    for sentence in dev_sentences:
        parse = cky.run(sentence, nonterms, start, prob)
        sequnece = viterbi.run(sentence, emission, transition)
def run(obs, states_list, start_p, trans_p, emit_p):
    # define global variables
    global states, start_probs, transition, emission, test_sequence, sequence_syms, end_probs
    states, start_probs, transition, emission = states_list, start_p, trans_p, emit_p

    test_sequence = obs  # list of strings of observations (angular velocities)

    # probabilities of going to end state <-- What is this?
    end_probs = [0.1, 0.1]

    states_dic = {}
    for i in range(len(states)):
        states_dic[states[i]] = i

    # generating initial probabilities
    sequence = np.unique(
        test_sequence
    )  # should these be a range of all possible theoretical observations or a list of all observations in the data?
    sequence_syms = {}
    for i in range(len(sequence)):
        sequence_syms[sequence[i]] = i

    #performing iterations until convergence

    for iteration in range(2000):

        print('\nIteration No: ', iteration + 1)
        # print('\nTransition:\n ', transition)
        # print('\nEmission: \n', emission)

        #Calling probability functions to calculate all probabilities
        fwd_probs, fwd_val = forward_probs()
        bwd_probs, bwd_val = backward_probs()
        si_probabilities = si_probs(fwd_probs, bwd_probs, fwd_val)
        gamma_probabilities = gamma_probs(fwd_probs, bwd_probs, fwd_val)

        # print('Forward Probs:')
        # print(np.matrix(fwd_probs))
        #
        # print('Backward Probs:')
        # print(np.matrix(bwd_probs))
        #
        # print('Si Probs:')
        # print(si_probabilities)
        #
        # print('Gamma Probs:')
        # print(np.matrix(gamma_probabilities))

        #calculating 'a' and 'b' matrices
        a = np.zeros((len(states), len(states)))
        b = np.zeros((len(states), len(sequence_syms)))

        #'a' matrix
        for j in range(len(states)):
            for i in range(len(states)):
                for t in range(len(test_sequence) - 1):
                    a[j, i] = a[j, i] + si_probabilities[j, t, i]

                denomenator_a = [
                    si_probabilities[j, t_x, i_x]
                    for t_x in range(len(test_sequence) - 1)
                    for i_x in range(len(states))
                ]
                denomenator_a = sum(denomenator_a)

                if (denomenator_a == 0):
                    a[j, i] = 0
                else:
                    a[j, i] = a[j, i] / denomenator_a

        #'b' matrix
        for j in range(len(states)):  #states
            for i in range(len(sequence)):  #seq
                indices = [
                    idx for idx, val in enumerate(test_sequence)
                    if val == sequence[i]
                ]
                numerator_b = sum(gamma_probabilities[j, indices])
                denomenator_b = sum(gamma_probabilities[j, :])

                if (denomenator_b == 0):
                    b[j, i] = 0
                else:
                    b[j, i] = numerator_b / denomenator_b

        print('\nMatrix a:\n')
        print(np.array(a.round(decimals=4)))
        print('\nMatrix b:\n')
        print(np.array(b.round(decimals=4)))

        transition = a
        emission = b

        new_fwd_temp, new_fwd_temp_val = forward_probs()
        print('New forward probability: ', new_fwd_temp_val)
        diff = np.abs(fwd_val - new_fwd_temp_val)
        print('Difference in forward probability: ', diff)

        if (diff < 0.0000001):
            break

    # update params with a, b to run viterbi
    start_p = {"Sac": 0.5, "Fix": 0.5}  # update from list to dict
    # update transition and emission from list lists to dict
    t = {}
    for i in range(len(states)):
        d = {}
        for j in range(len(states)):
            d[states[j]] = a[i, j]
        t[states[i]] = d

    e = {}
    for i in range(len(states)):
        d = {}
        for j in range(len(obs)):
            d[obs[j]] = b[i, j]
        e[states[i]] = d

    viterbi.run(obs, states, start_p, t, e)

    c = 1
Exemple #9
0
if args.melody == 'little_happiness':
    melody = converter.parse(A_LITTLE_HAPPINESS)
elif args.melody == 'jj_lin':
    melody = converter.parse(JJ_LIN_MELODY)
else:
    print('Unrecognized melody: should be jj_lin or little_happiness')
    sys.exit(1)

if args.series not in ('major', 'minor'):
    print('Unrecognized series: should be major or minor')
    sys.exit(1)

melody.insert(0, MetronomeMark(number=95))

# Pick algorithm
if args.algorithm == 'basic':
    chord_search.run(chords, melody, args.series)
elif args.algorithm == 'hmm':
    viterbi.run(chords, melody, args.series)
else:
    print('Unrecognized algorithm: should be basic or hmm')
    sys.exit(1)

# Combine two parts
song = Stream()
song.insert(0, melody)
song.insert(0, chords)

# song.show('midi')
song.show()
Exemple #10
0
elif args.series == 'a_jazz':
    chords = A_JAZZ
elif args.series == 'f_sharp_jazz':
    chords = F_SHARP_JAZZ
else:
    print('Unrecognized series')
    sys.exit(1)

# Init chord stream
chords_output = converter.parse("""tinynotation: 4/4""")

# Pick algorithm
if args.algorithm == 'basic':
    chord_search.run(chords, melody, args.series, chords_output)
elif args.algorithm == 'markov':
    viterbi.run(chords, melody, args.series, chords_output)
else:
    print('Unrecognized algorithm: should be basic or markov')
    sys.exit(1)

# Combine two parts
song = stream.Score(id='mainScore')
part0 = stream.Part(id='melody')
part0.append(melody)
part1 = stream.Part(id='chords')
part1.append(chords_output)
song.insert(0, part0)
song.insert(0, part1)
song.makeNotation(inPlace=True)

song.show()
Exemple #11
0
def main():

    # files
    df = pd.read_csv(
        '/Users/ischoning/PycharmProjects/GitHub/data/participant08_preprocessed172.csv'
    )

    # shorten dataset for better visuals and quicker results
    #df = df[int(len(df)/200):int(len(df)/100)]
    df = df[100:int(len(df) / 500)]
    df.reset_index(drop=True, inplace=True)

    # assign relevant data
    lx = df['left_forward_x']
    ly = df['left_forward_y']
    lz = df['left_forward_z']
    rx = df['right_forward_x']
    ry = df['right_forward_y']
    rz = df['right_forward_z']
    t = df['timestamp_milis']

    # compute angular values
    df['Ax_left'] = np.rad2deg(np.arctan2(lx, lz))
    df['Ay_left'] = np.rad2deg(np.arctan2(ly, lz))
    df['Ax_right'] = np.rad2deg(np.arctan2(rx, rz))
    df['Ay_right'] = np.rad2deg(np.arctan2(ry, rz))
    df['Avg_angular_x'] = df[['Ax_left', 'Ax_right']].mean(axis=1)
    df['Avg_angular_y'] = df[['Ay_left', 'Ay_right']].mean(axis=1)

    # show vision path in averaged angular degrees
    #    show_path(df['Avg_angular_x'], df['Avg_angular_y'])
    # print('Length of capture time:', len(t))
    # print('Length of capture time differences:',
    #       len(np.diff(t/1000000)))

    # # show vision path, separately for each eye
    #    plot_eye_path(df)

    # show angular displacement over time, averaged over both eyes
    #    plot_vs_time(t, df['Avg_angular_x'], df['Avg_angular_y'], 'Angular Displacement Over Time', 'degrees')

    # plot angular velocity for x and y
    # remove the last row so lengths of each column are consistent
    dt = np.diff(t)  # aka isi
    dx = np.diff(df['Avg_angular_x'])
    dy = np.diff(df['Avg_angular_y'])

    df.drop(df.tail(1).index, inplace=True)
    t = df['timestamp_milis']

    #    plot_vs_time(t,dx/dt,dy/dt, 'Angular Velocity Over Time', 'degrees per millisecond')

    # plot combined angular velocity
    df['ang_vel'] = np.sqrt(np.square(dx) + np.square(dy))
    ang_vel = df['ang_vel']
    #    plot_vs_time(t, ang_vel, y = [], title = 'Combined Angular Velocity Over Time', y_axis = 'degrees per millisecond')

    plt.scatter(range(len(ang_vel)), ang_vel)
    #    plt.show()
    plt.scatter(ang_vel, ang_vel)
    #    plt.show()

    # plot angular acceleration for x and y
    # remove the last row so lengths of each column are consistent
    dt = np.diff(t)  # aka isi
    dv = np.diff(df['ang_vel'])

    df_ = df.copy()
    df_.drop(df_.tail(1).index, inplace=True)
    df_['ang_acc'] = dv / dt

    # plot combined angular accleration
    #    plot_vs_time(df_['timestamp_milis'],df_['ang_vel'], y = df_['ang_acc'], title = 'Combined Angular Acceleration Over Time', y_axis = 'degrees per millisecond')

    # show histogram of angular velocity
    make_hist(ang_vel, 'Histogram of Angular Velocity', 'angular velocity',
              'number of occurrences')

    # make pmf
    pmf(ang_vel, 'PMF of Angular Velocity', 'angular velocity', 'probability')

    # if velocity is greater than 3 standard deviations from the mean of the pmf, classify the point as saccade, else fixation
    # NOTE that the white space in the plot is due to jump in ms between events
    states = ['Saccade', 'Fixation']
    df['fix1 sac0'] = np.where(ang_vel <= 0.02, 1, 0)
    event = df['fix1 sac0']
    #    plot_vs_time(t, ang_vel, y=[], title='Combined Angular Velocity Over Time', y_axis='degrees per millisecond', event = event)
    print_events(t, event=event, states=states)

    print('=============== STEP 1: Filter Saccades ===============')

    # estimate priors (sample means)
    mean_fix = np.mean(df[event == 1]['ang_vel'])
    mean_sac = np.mean(df[event == 0]['ang_vel'])
    std_fix = np.std(df[event == 1]['ang_vel'])
    std_sac = np.std(df[event == 0]['ang_vel'])
    print("Fixation: mean =", mean_fix, "standard deviation =", std_fix)
    print("Saccade: mean =", mean_sac, "standard deviation =", std_sac)

    print('\n============== BEGIN VITERBI ==============')
    # first run EM to get best match params (priors, trans, emission probabilities)
    # then run Viterbi HMM algorithm to output the most likely sequence given the params calculated in EM
    obs = ang_vel.astype(str)
    obs = obs.tolist()
    states = ['Sac', 'Fix']
    start_p = [0.5, 0.5]
    trans_p = np.array([[0.5, 0.5], [0.5, 0.5]])
    Sac = []
    Fix = []
    for o in obs:
        x = float(o)
        if o not in Sac:
            Sac.append(gaussian(mean_sac, std_sac, x))
        if o not in Fix:
            Fix.append(gaussian(mean_fix, std_fix, x))

    emit_p = np.array([Sac, Fix])
    df['hidden_state'] = viterbi.run(obs, states, start_p, trans_p, emit_p)
    print(df['hidden_state'].value_counts())
    print('=============== END VITERBI ===============')

    print('\n============== BEGIN BAUM-WELCH ==============')
    trans_p, emit_p = baum_welch.run(obs, states, start_p, trans_p, emit_p)
    print('============== END BAUM-WELCH ==============')

    print('\n============== BEGIN UPDATED VITERBI ==============')
    df['hidden_state'] = viterbi.run(obs, states, start_p, trans_p, emit_p)
    print('=============== END UPDATED VITERBI ===============')

    df = clean_sequence(df)

    print(
        '\n=============== STEP 2: Classify Fixations and Smooth Pursuits ==============='
    )

    # filter out Saccades
    df = df[df.hidden_state != 'Sac']
    df.reset_index(drop=True, inplace=True)

    ang_vel = df['ang_vel']
    states = ['Smooth Pursuit', 'Fixation']
    plt.plot(df.timestamp_milis, ang_vel)
    plt.show()

    df['fix1 smp0'] = np.where(ang_vel <= 0.02, 1, 0)
    event = df['fix1 smp0']
    print_events(t, event=event, states=states)

    # estimate priors (sample means)
    mean_fix = np.mean(df[event == 1]['ang_vel'])
    mean_smp = np.mean(df[event == 0]['ang_vel'])
    std_fix = np.std(df[event == 1]['ang_vel'])
    std_smp = np.std(df[event == 0]['ang_vel'])
    print("Fixation: mean =", mean_fix, "standard deviation =", std_fix)
    print("Smooth Pursuit: mean =", mean_smp, "standard deviation =", std_smp)

    print('\n============== BEGIN VITERBI ==============')

    # first run EM to get best match params (priors, trans, emission probabilities)
    # then run Viterbi HMM algorithm to output the most likely sequence given the params calculated in EM
    obs = ang_vel.astype(str)
    obs = obs.tolist()
    states = ['SmP', 'Fix']
    # p = math.log(0.5)
    start_p = [0.5, 0.5]
    trans_p = np.array([[0.5, 0.5], [0.5, 0.5]])
    # Note: not possible to have two contiguous saccades without a fixation (or smooth pursuit)
    # in between
    SmP = []
    Fix = []
    for o in obs:
        x = float(o)
        if o not in SmP:
            SmP.append(gaussian(mean_sac, std_sac, x))
        if o not in Fix:
            Fix.append(gaussian(mean_fix, std_fix, x))

    emit_p = np.array([SmP, Fix])
    df['hidden_state'] = viterbi.run(obs, states, start_p, trans_p, emit_p)
    print(df['hidden_state'].value_counts())

    print('=============== END VITERBI ===============')

    print('\n============== BEGIN BAUM-WELCH ==============')

    trans_p, emit_p = baum_welch.run(obs, states, start_p, trans_p, emit_p)

    print('============== END BAUM-WELCH ==============')

    print('\n============== BEGIN UPDATED VITERBI ==============')

    df['hidden_state'] = viterbi.run(obs, states, start_p, trans_p, emit_p)
    # print(len(df['hidden_state']))
    print(df['hidden_state'].value_counts())

    print('=============== END UPDATED VITERBI ===============')

    df = clean_sequence(df)