def reconstruct(f,tree,tau,minfrac,maxfrac): randomize_folder_size(tree,minfrac,maxfrac) fshifted = (f[0,:] - tau) % 1.0 tree_intervals = np.array([node.lbound for node in tree.leaves()]) indices = np.digitize(fshifted,tree_intervals) - 1 fy = np.zeros([tree.size,1]) fcts = np.zeros(np.shape(fy)) for (i,idx) in enumerate(indices): fcts[idx,0] += 1.0 n = fcts[idx,0] fy[idx,0] = ((n-1)/n)*fy[idx,0]+(1/n)*f[1,i] cl = tree.char_library() coeffs,iters = lb.l1_bregman(cl[indices,:],fy[indices,:],1,threshold=1e-6,verbose=False) shiftedy = cl.dot(coeffs).ravel() shiftedx = tree_intervals nx = (shiftedx + tau) % 1.0 sorted_ind = nx[0:tree.size].argsort() y = shiftedy[sorted_ind] y = np.hstack([y[-1],y[-1],y]) x = nx.copy()[0:tree.size] x.sort() x = np.hstack([[0.0],x,[1.0]]) return x,y,coeffs
def reconstruct_l2(f,tree,minfrac,maxfrac,alpha=1.0,suppress_warnings=True): randomize_folder_size(tree,minfrac,maxfrac) tree_intervals = np.array([node.lbound for node in tree.leaves()]) indices = np.digitize(f[0,:],tree_intervals) - 1 fy = np.zeros([tree.size]) fcts = np.zeros(np.shape(fy)) for (i,idx) in enumerate(indices): fcts[idx] += 1.0 n = fcts[idx] fy[idx] = ((n-1)/n)*fy[idx]+(1/n)*f[1,i] active_indices = np.where(fcts>0)[0] #print active_indices cl = tree.char_library(alpha) if suppress_warnings: with warnings.catch_warnings(): warnings.simplefilter('ignore', UserWarning) alphas,active_vars,coef_path = sklm.lars_path(cl[active_indices,:], fy[active_indices,:], method='lasso',max_iter=2000) else: alphas,active_vars,coef_path = sklm.lars_path(cl[active_indices,:], fy[active_indices,:],method='lasso', max_iter=2000) return tree_intervals, fy, alphas, active_vars, coef_path, active_indices
def count_word_freq(self, case_sensitive=False) -> dict: count = collections.defaultdict(int) for tree in self.trees: for leaf in tree.leaves(): if case_sensitive: count[leaf.label] += 1 else: count[leaf.label.lower()] += 1 return count
import ccgbank, tree for t in ccgbank.load( open('/u1/repos/candc/data/CCGbank1.2/data/AUTO/00/wsj_0001.auto')): pass print list(tree.leaves(t))
import ccgbank, tree for t in ccgbank.load(open('/u1/repos/candc/data/CCGbank1.2/data/AUTO/00/wsj_0001.auto')): pass print list(tree.leaves(t))