Ejemplo n.º 1
0
def run_mccoil(barcode_file_lines, maf_file_lines=None, verbose=False):
    ## init barcode set
    barcode_set = Barcode.SetOfBarcodes()
    barcode_set.readBarcodeFileLines(barcode_file_lines)
    # validate
    is_valid_barcode_set, err_msg = barcode_set.validate()
    if not is_valid_barcode_set:
        print err_msg
        return ([])
    ## init mafs
    if not maf_file_lines:
        mafs = barcode_set.computeMAFFromBarcodes(1)
    else:
        mafs = MAF.MAF()
        mafs.readMAFFileLines(maf_file_lines)
    mafs_R_vector = robjects.Vector(mafs.minor_allele_freqs())
    # validate
    is_valid_mafs, err_msg = mafs.validate()
    if not is_valid_mafs:
        print err_msg
        return ({})
    ## compute zygosity matrix, then convert to R DataFrame
    zygosity_matrix = barcode_set.to_zygosity_matrix(mafs,
                                                     header=True,
                                                     index=True)
    if verbose: print(zygosity_matrix)
    data = to_R_zygosity_df(zygosity_matrix)
    if verbose: print(data)
    ## import MCCOIL
    mccoil_R_code = open(mccoil_R, 'r').read()
    mccoil = SignatureTranslatedAnonymousPackage(mccoil_R_code, 'mccoil')
    ## compute result
    result = mccoil.McCOIL_categorical(data, P=mafs_R_vector)
    #print result
    ## get sites/samples
    sites, samples = list(result[-2]), list(result[-1])
    ## get maf/coi predictions, which map 1-to-1 sites/samples
    # (i.e. maf_prediction[i] is prediction for site[i])
    maf_predictions, coi_predictions = list(result[6]), list(result[5])
    return ({
        'mafs': zip(sites, maf_predictions),
        'cois': zip(samples, coi_predictions)
    })