def compute_log_likelihoods(target, source, env): args = source[-1].read() old = numpy.seterr(divide='ignore', invalid='ignore') mat, labels, features = unpack_numpy(source[0].rstr(), dense=False) mat = numpy.asarray(mat.todense()) totals = mat.sum(1) logging.info("created totals matrix of shape %s", totals.shape) newmat = numpy.empty(shape=mat.shape) for i in range(mat.shape[1]): newmat[:, i] = ct_log_likelihood(mat[:, i], totals) pack_numpy(target[0].rstr(), data=newmat, labels=labels, features=features) numpy.seterr(divide="warn", invalid="warn") return None
def log_likelihood(target, source, env): args = source[-1].read() mat, labels, features = unpack_numpy(source[0].rstr()) newmat = numpy.empty(mat.shape) obs_totals = mat.sum(1) feat_totals = mat.sum(0) total = feat_totals.sum() for col, feature in enumerate(features): for row, label in enumerate(labels): ll = ct_log_likelihood([ (mat[row][col], obs_totals[row]), (feat_totals[col] - mat[row][col], total - obs_totals[row]) ]) newmat[row][col] = ll #pickle.dump((newmat, labels, features), meta_open(target[0].rstr(), "w")) return None
def pairwise_log_likelihood(target, source, env): args = source[-1].read() mat, labels, features = unpack_numpy(source[0].rstr()) combos = list(set([frozenset([x, y]) for x in labels for y in labels if x != y])) newmat = numpy.empty((len(combos), len(features))) obs_totals = mat.sum(1) for row, combo in enumerate(combos): for col, feature in enumerate(features): rowA = labels.index(list(combo)[0]) rowB = labels.index(list(combo)[1]) if mat[rowA][col] == 0 and mat[rowB][col] == 0: ll = 0.0 else: ll = ct_log_likelihood([ (mat[rowA][col], obs_totals[rowA]), (mat[rowB][col], obs_totals[rowB]) ]) newmat[row][col] = ll #pickle.dump((newmat, combos, [x for x in features]), meta_open(target[0].rstr(), "w")) return None