def build_domain(data): """ Do feature extraction to determine the set of *supported* featues, i.e. those active in the ground truth configuration and active labels. This function will each features and label an integer. """ L = Alphabet() A = Alphabet() for x in data: L.add_many(x.truth) A.add_many(f for token in x.sequence for f in token.attributes) # domains are now ready L.freeze() A.stop_growth() return (L, A)
def build_domain(data): """ Do feature extraction to determine the set of *supported* featues, i.e. those active in the ground truth configuration and active labels. This function will each features and label an integer. """ L = Alphabet() A = Alphabet() for x in data: L.add_many(x.truth) # add labels to label domain # extract features of the target path F = x.F path = x.truth A.add_many(F(0, None, path[0])) A.add_many(k for t in xrange(1, x.N) for k in F(t, path[t-1], path[t])) # domains are now ready L.freeze() A.stop_growth() return (L, A)