Example #1
1
def select_best_k_splits(sample, cov_name, a_logrank, cov_at_level, min_sub_size, mode):
    # returning values:
    # group = [ tuple(group1, desc1, group2, desc2, cov_name, p_value_split), ... ]
    def get_cov_values(samp, cov):
        return tuple(set(samp[cov]))

    def make_split(samp, cov, vals, mod):
        if cov == 'Sex':
            l, r = [j for j in xrange(len(samp[cov])) if samp[cov][j] == 1],\
                   [j for j in xrange(len(samp[cov])) if samp[cov][j] == 2]
            yield l, r, 'Male', 'Female'
        elif cov == 'Immun':
            if mod == desc.modes[0]:
                immun_b, immun_t = vals[:len(vals) / 2], vals[len(vals) / 2:]
                for k in xrange(1, 2 ** len(immun_b)):
                    l, r = [], []
                    s = bin(k)[2:]
                    if len(s) < len(immun_b):
                        s = "".join(['0' for j in xrange(len(immun_b) - len(s))]) + s
                    level = tuple([immun_b[j] for j in xrange(len(s)) if s[j] == '1'])
                    not_level = tuple([immun_b[j] for j in xrange(len(s)) if s[j] == '0'])
                    for j in xrange(len(samp[cov])):
                        if samp[cov][j] in level:
                            r.append(j)
                        else:
                            l.append(j)
                    yield l, r, 'Immun-B in %s' % (str(level)), 'Immun-B in %s' % (str(not_level))
                for k in xrange(1, 2 ** len(immun_t)):
                    l, r = [], []
                    s = bin(k)[2:]
                    if len(s) < len(immun_t):
                        s = "".join(['0' for j in xrange(len(immun_t) - len(s))]) + s
                    level = tuple([immun_t[j] for j in xrange(len(s)) if s[j] == '1'])
                    not_level = tuple([immun_t[j] for j in xrange(len(s)) if s[j] == '0'])
                    for j in xrange(len(samp[cov])):
                        if samp[cov][j] in level:
                            r.append(j)
                        else:
                            l.append(j)
                    yield l, r, 'Immun-T in %s' % (str(level)), 'Immun-T in %s' % (str(not_level))
            elif mod == desc.modes[1] or mode == desc.modes[2]:
                set_vals = set(vals)
                level = [{1, 2, 3, 4}, {10}, {12}, {13}, {5, 6, 7, 8}, {11}, {14, 15, 16, 17, 18, 19, 20}]
                not_level = [set_vals.difference(lvl) for lvl in level]
                for k in xrange(len(level)):
                    l, r = [j for j in xrange(len(samp[cov])) if samp[cov][j] in not_level[k]],\
                           [j for j in xrange(len(samp[cov])) if samp[cov][j] in level[k]]
                    # yield l, r, 'Immun in %s' % ('{' + str(not_level[k])[5:-2] + '}'),\
                    #             'Immun-B in %s' % ('{' + str(level[k])[5:-2] + '}') if k < 4\
                    #             else 'Immun-T in %s' % ('{' + str(level[k])[5:-2] + '}')
                    yield l, r, 'Immun',\
                                'Immun-B in %s' % ('{' + str(level[k])[5:-2] + '}') if k < 4\
                                else 'Immun-T in %s' % ('{' + str(level[k])[5:-2] + '}')
            else:
                exit('make_split() error')
        elif cov == 'CNS':
            if mod == desc.modes[0]:
                l, r = [j for j in xrange(len(samp[cov])) if samp[cov][j] == 1],\
                       [j for j in xrange(len(samp[cov])) if samp[cov][j] == 2]
                yield l, r, 'CNS = 1', 'CNS = 2'
            elif mod == desc.modes[1] or mod == desc.modes[2]:
                for lvl in vals:
                    l, r = [], []
                    for j in xrange(len(samp[cov])):
                        if samp[cov][j] < lvl:
                            l.append(j)
                        else:
                            r.append(j)
                    level = [x for x in vals if x < lvl]
                    not_level = [x for x in vals if x >= lvl]
                    yield l, r, '%s = %s' % (cov, '{' + str(level)[1:-1] + '}'),\
                                '%s = %s' % (cov, '{' + str(not_level)[1:-1] + '}')
            else:
                exit('make_split() error')
        elif cov == 'Mediastinum':
            if mod == desc.modes[0] or mod == desc.modes[1]:
                l, r = [j for j in xrange(len(samp[cov])) if samp[cov][j] == 1],\
                       [j for j in xrange(len(samp[cov])) if samp[cov][j] == 2]
                yield l, r, 'Mediastinum = 1', 'Mediastinum = 2'
            elif mod == desc.modes[2]:
                for lvl in vals:
                    l, r = [], []
                    for j in xrange(len(samp[cov])):
                        if samp[cov][j] < lvl:
                            l.append(j)
                        else:
                            r.append(j)
                    level = [x for x in vals if x < lvl]
                    not_level = [x for x in vals if x >= lvl]
                    yield l, r, '%s = %s' % (cov, '{' + str(level)[1:-1] + '}'),\
                                '%s = %s' % (cov, '{' + str(not_level)[1:-1] + '}')
            else:
                exit('make_split() error')
        elif cov == 'Age' or cov == 'Leuc' or cov == 'Leber' or cov == 'Milz':
            for level in vals:
                l, r = [], []
                for j in xrange(len(samp[cov])):
                    if samp[cov][j] < level:
                        l.append(j)
                    else:
                        r.append(j)
                yield l, r, '%s < %.2f' % (cov, level) if level != int(level) else '%s < %d' % (cov, level),\
                            '%s >= %.2f' % (cov, level) if level != int(level) else '%s < %d' % (cov, level)
        else:
            exit('make_split() error')

    def add_split(splts, other_splt):
        pos = -1
        j = 0
        while j < len(splts):
            if splts[j][-1] > other_splt[-1]:
                pos = j
                break
            j += 1
        if pos == -1:
            splts.append(other_splt)
        else:
            splts.insert(pos, other_splt)
        return splts

#    def adjust_pval(p_split, num_splits):
#        return 1 - pow(1 - p_split, num_splits)

    treatment = sample[desc.covariates[0]]  # Rand - protocol
    outcome = sample[desc.covariates[1]]  # Tod - alive, lost, dead
    time = sample[desc.covariates[2]]  # Time - lifetime
    treatment_type = sorted(list(set(treatment)))
    best_k_splits = []
    cov_values = get_cov_values(sample, cov_name)
    if len(cov_values) == 1:
        return best_k_splits
    for left, right, desc_left, desc_right in make_split(sample, cov_name, cov_values, mode):
        if len(left) < min_sub_size or len(right) < min_sub_size:
            continue
        group1, group2 = sc.Subgroup(), sc.Subgroup()
        treat1, treat2 = [treatment[i] for i in left], [treatment[i] for i in right]
        out1, out2 = [outcome[i] for i in left], [outcome[i] for i in right]
        time1, time2 = [time[i] for i in left], [time[i] for i in right]
        convert_out1, convert_out2 = convert_data(out1), convert_data(out2)
        out11, t11, out12, t12 = sep_treats(convert_out1, time1, treat1, treatment_type)
        out21, t21, out22, t22 = sep_treats(convert_out2, time2, treat2, treatment_type)
        kmf11 = stat.kaplan_meier(out11, t11, treatment_type[0])
        kmf12 = stat.kaplan_meier(out12, t12, treatment_type[1])
        kmf21 = stat.kaplan_meier(out21, t21, treatment_type[0])
        kmf22 = stat.kaplan_meier(out22, t22, treatment_type[1])
        surv11 = stat.get_kmf_survival(kmf11)
        surv12 = stat.get_kmf_survival(kmf12)
        surv21 = stat.get_kmf_survival(kmf21)
        surv22 = stat.get_kmf_survival(kmf22)
        res1 = stat.logrank(out11, t11, out12, t12, alpha=a_logrank)
        res2 = stat.logrank(out21, t21, out22, t22, alpha=a_logrank)
        pow1 = stat.logrank_power(min(len(out11), len(out12)), surv11, surv12, alpha=a_logrank)
        pow2 = stat.logrank_power(min(len(out21), len(out22)), surv21, surv22, alpha=a_logrank)
        p_split = split_criterion(res1.test_statistic, res2.test_statistic)
        group1.set_subgroup(left, kmf11, treatment_type[0], kmf12, treatment_type[1], logrank=res1, pwr=pow1)
        group2.set_subgroup(right, kmf21, treatment_type[0], kmf22, treatment_type[1], logrank=res2, pwr=pow2)
        add_split(best_k_splits, (group1, desc_left, group2, desc_right, cov_name, p_split))
        if len(best_k_splits) > cov_at_level:
            best_k_splits.pop()
    return best_k_splits
Example #2
0
def make_subgroup_from_all(sample, a_logrank=desc.a_logrank):
    subgroup = sc.Subgroup()
    treatment = sample[desc.covariates[0]]  # Rand - protocol
    outcome = sample[desc.covariates[1]]  # Tod - alive, lost, dead
    time = sample[desc.covariates[2]]  # Time - lifetime
    treatment_type = sorted(list(set(treatment)))
    convert_out = convert_data(outcome)
    out1, t1, out2, t2 = sep_treats(convert_out, time, treatment, treatment_type)
    kmf1 = stat.kaplan_meier(out1, t1, treatment_type[0])
    kmf2 = stat.kaplan_meier(out2, t2, treatment_type[1])
    surv1 = stat.get_kmf_survival(kmf1)
    surv2 = stat.get_kmf_survival(kmf2)
    res = stat.logrank(out1, t1, out2, t2, alpha=a_logrank)
    pow = stat.logrank_power(min(len(out1), len(out2)), surv1, surv2, alpha=a_logrank)
    subgroup.set_subgroup(range(len(sample)), kmf1, treatment_type[0], kmf2, treatment_type[1], logrank=res, pwr=pow)
    return subgroup