def length_cost(sx, sy, mean_xy, variance_xy): """ Calculate length cost given 2 sentence. Lower cost = higher prob. The original Gale-Church (1993:pp. 81) paper considers l2/l1 = 1 hence: delta = (l2-l1*c)/math.sqrt(l1*s2) If l2/l1 != 1 then the following should be considered: delta = (l2-l1*c)/math.sqrt((l1+l2*c)/2 * s2) substituting c = 1 and c = l2/l1, gives the original cost function. """ lx, ly = sum(sx), sum(sy) m = (lx + ly * mean_xy) / 2 try: delta = (lx - ly * mean_xy) / math.sqrt(m * variance_xy) except ZeroDivisionError: return float('-inf') return - 100 * (LOG2 + norm_logsf(abs(delta)))
def length_cost(sx, sy, mean_xy, variance_xy): """ Calculate length cost given 2 sentence. Lower cost = higher prob. The original Gale-Church (1993:pp. 81) paper considers l2/l1 = 1 hence: delta = (l2-l1*c)/math.sqrt(l1*s2) If l2/l1 != 1 then the following should be considered: delta = (l2-l1*c)/math.sqrt((l1+l2*c)/2 * s2) substituting c = 1 and c = l2/l1, gives the original cost function. """ lx, ly = sum(sx), sum(sy) m = old_div((lx + ly * mean_xy), 2) try: delta = old_div((lx - ly * mean_xy), math.sqrt(m * variance_xy)) except ZeroDivisionError: return float('-inf') return -100 * (LOG2 + norm_logsf(abs(delta)))