Ejemplo n.º 1
0
def load_setworks(Arguments):
    '''
    Load the setworks pickled dictionary of dictionaries, and return lists that have been truncated
    to specified number of results (TopInteractions = 100, by default). The list can additionally be
    truncated using performance metric, thru the MinimumPerformance argument. 
    '''
   
    Results = cPickle.load(open(get_path("MOCA.results") + "/" + Arguments.Filename, "rb"))

    PValues = Results["PValues"] 
    QValues = Results["QValues"] 
    Performances = Results["Performances"]
    Interactions = Results["Interactions"] 
    FeatureVectors = Results["FeatureVectors"]
    UnionFeatures = Results["UnionFeatures"]
    IntersectionFeatures = Results["IntersectionFeatures"] 
    DifferenceFeatures = Results["DifferenceFeatures"]
    SampleCounts = Results["SampleCounts"]
    CaseCounts = Results["CaseCounts"]
    EffectSizes = Results["EffectSizes"]
    Barcodes = Results["Barcodes"]
    Report = Results["Report"]
    Labels = Results["Labels"]

    #Get rid of barcodes that for setworks that didn't pass a performance threshold (if provided)
    Barcodes = [Barcode for Barcode in Barcodes if minimum_performance(Performances[Barcode], Arguments)]

    #Initial sort will be done by decreasing balanced accuracy
    Barcodes = rank(Performances, Arguments.RankMethod, Arguments.TopInteractions)
    
    return Barcodes, PValues, QValues, Performances, Interactions, \
        UnionFeatures, IntersectionFeatures, DifferenceFeatures, \
        FeatureVectors, SampleCounts, CaseCounts, EffectSizes, Report, Labels
Ejemplo n.º 2
0
def supervised(Results, Arguments):
    '''
    Write a csv file of your unsupervised pairwise interactions and a few related states. Suggested for viewing in 
    MS excel (cross OS), number (os x), or the freely available OpenOffice Calc program (cross OS).
    '''

    PValues, FDRs, Interactions, Performances, SampleCounts, CaseCounts, EffectSizes = load_pairwise_results(Results)

    Report = Results["Report"] #Need to get the phenotype name because this is 'supervised'
    Phenotype = Report["Supervised"][1].replace("(","").replace(")", "")

    CSVfile = open(Arguments.Filename + ".csv", "wb")
    CSVwriter = csv.writer(CSVfile, dialect='excel')
    
    #Right the excel header
    CSVwriter.writerow(["Feature", "Interaction", "Phenotype", "P-value", "FDR", "Odds Ratio", "Effect Size",
                        "Sensitivity", "95% CI", "Specificity", "95% CI", "PPV", "95% CI", "NPV", "95% CI",
                        "Accuracy", "MCC", "Sample Count", "Case Count"])
    
    #just sort by p-value
    for Feature in rank(Performances, Arguments.RankMethod, Arguments.TopInteractions):
        p = Performances[Feature]
        Sens, Spec, PPV, NPV, Accuracy, MCC = p.sensitivity, p.specificity, p.PPV, p.NPV, p.accuracy, p.MCC
        if minimum_performance(Performances[Feature], Arguments):
            CSVwriter.writerow([Feature, Interactions[Feature], Phenotype, "%0.2e" %PValues[Feature], "%0.2e" %FDRs[PValues[Feature]],
                                "%0.2f" %EffectSizes[Feature].odds_ratio, "%0.2f" %EffectSizes[Feature].difference_of_proportions,
                                "%0.2f" %Sens, "-".join(["%0.2f" %CI for CI in confidence_interval(Sens, SampleCounts[Feature])]),
                                "%0.2f" %Spec, "-".join(["%0.2f" %CI for CI in confidence_interval(Spec, SampleCounts[Feature])]),
                                "%0.2f" %PPV, "-".join(["%0.2f" %CI for CI in confidence_interval(PPV, SampleCounts[Feature])]),
                                "%0.2f" %NPV, "-".join(["%0.2f" %CI for CI in confidence_interval(NPV, SampleCounts[Feature])]),
                                "%0.2f" %Accuracy, "%0.2f" %MCC, SampleCounts[Feature], CaseCounts[Feature]])

    CSVfile.close()
    
    return 
Ejemplo n.º 3
0
def supervised(Results, Arguments):
    '''
    Write a csv file of your unsupervised pairwise interactions and a few related states. Suggested for viewing in 
    MS excel (cross OS), number (os x), or the freely available OpenOffice Calc program (cross OS).
    '''

    PValues, FDRs, Interactions, Performances, SampleCounts, CaseCounts, EffectSizes = load_pairwise_results(
        Results)

    Report = Results[
        "Report"]  #Need to get the phenotype name because this is 'supervised'
    Phenotype = Report["Supervised"][1].replace("(", "").replace(")", "")

    CSVfile = open(Arguments.Filename + ".csv", "wb")
    CSVwriter = csv.writer(CSVfile, dialect='excel')

    #Right the excel header
    CSVwriter.writerow([
        "Feature", "Interaction", "Phenotype", "P-value", "FDR", "Odds Ratio",
        "Effect Size", "Sensitivity", "95% CI", "Specificity", "95% CI", "PPV",
        "95% CI", "NPV", "95% CI", "Accuracy", "MCC", "Sample Count",
        "Case Count"
    ])

    #just sort by p-value
    for Feature in rank(Performances, Arguments.RankMethod,
                        Arguments.TopInteractions):
        p = Performances[Feature]
        Sens, Spec, PPV, NPV, Accuracy, MCC = p.sensitivity, p.specificity, p.PPV, p.NPV, p.accuracy, p.MCC
        if minimum_performance(Performances[Feature], Arguments):
            CSVwriter.writerow([
                Feature, Interactions[Feature], Phenotype,
                "%0.2e" % PValues[Feature],
                "%0.2e" % FDRs[PValues[Feature]],
                "%0.2f" % EffectSizes[Feature].odds_ratio,
                "%0.2f" % EffectSizes[Feature].difference_of_proportions,
                "%0.2f" % Sens, "-".join([
                    "%0.2f" % CI
                    for CI in confidence_interval(Sens, SampleCounts[Feature])
                ]),
                "%0.2f" % Spec, "-".join([
                    "%0.2f" % CI
                    for CI in confidence_interval(Spec, SampleCounts[Feature])
                ]),
                "%0.2f" % PPV, "-".join([
                    "%0.2f" % CI
                    for CI in confidence_interval(PPV, SampleCounts[Feature])
                ]),
                "%0.2f" % NPV, "-".join([
                    "%0.2f" % CI
                    for CI in confidence_interval(NPV, SampleCounts[Feature])
                ]),
                "%0.2f" % Accuracy,
                "%0.2f" % MCC, SampleCounts[Feature], CaseCounts[Feature]
            ])

    CSVfile.close()

    return
Ejemplo n.º 4
0
def load_setworks(Arguments):
    '''
    Load the setworks pickled dictionary of dictionaries, and return lists that have been truncated
    to specified number of results (TopInteractions = 100, by default). The list can additionally be
    truncated using performance metric, thru the MinimumPerformance argument. 
    '''

    Results = cPickle.load(
        open(get_path("MOCA.results") + "/" + Arguments.Filename, "rb"))

    PValues = Results["PValues"]
    QValues = Results["QValues"]
    Performances = Results["Performances"]
    Interactions = Results["Interactions"]
    FeatureVectors = Results["FeatureVectors"]
    UnionFeatures = Results["UnionFeatures"]
    IntersectionFeatures = Results["IntersectionFeatures"]
    DifferenceFeatures = Results["DifferenceFeatures"]
    SampleCounts = Results["SampleCounts"]
    CaseCounts = Results["CaseCounts"]
    EffectSizes = Results["EffectSizes"]
    Barcodes = Results["Barcodes"]
    Report = Results["Report"]
    Labels = Results["Labels"]

    #Get rid of barcodes that for setworks that didn't pass a performance threshold (if provided)
    Barcodes = [
        Barcode for Barcode in Barcodes
        if minimum_performance(Performances[Barcode], Arguments)
    ]

    #Initial sort will be done by decreasing balanced accuracy
    Barcodes = rank(Performances, Arguments.RankMethod,
                    Arguments.TopInteractions)

    return Barcodes, PValues, QValues, Performances, Interactions, \
        UnionFeatures, IntersectionFeatures, DifferenceFeatures, \
        FeatureVectors, SampleCounts, CaseCounts, EffectSizes, Report, Labels