def load_setworks(Arguments): ''' Load the setworks pickled dictionary of dictionaries, and return lists that have been truncated to specified number of results (TopInteractions = 100, by default). The list can additionally be truncated using performance metric, thru the MinimumPerformance argument. ''' Results = cPickle.load(open(get_path("MOCA.results") + "/" + Arguments.Filename, "rb")) PValues = Results["PValues"] QValues = Results["QValues"] Performances = Results["Performances"] Interactions = Results["Interactions"] FeatureVectors = Results["FeatureVectors"] UnionFeatures = Results["UnionFeatures"] IntersectionFeatures = Results["IntersectionFeatures"] DifferenceFeatures = Results["DifferenceFeatures"] SampleCounts = Results["SampleCounts"] CaseCounts = Results["CaseCounts"] EffectSizes = Results["EffectSizes"] Barcodes = Results["Barcodes"] Report = Results["Report"] Labels = Results["Labels"] #Get rid of barcodes that for setworks that didn't pass a performance threshold (if provided) Barcodes = [Barcode for Barcode in Barcodes if minimum_performance(Performances[Barcode], Arguments)] #Initial sort will be done by decreasing balanced accuracy Barcodes = rank(Performances, Arguments.RankMethod, Arguments.TopInteractions) return Barcodes, PValues, QValues, Performances, Interactions, \ UnionFeatures, IntersectionFeatures, DifferenceFeatures, \ FeatureVectors, SampleCounts, CaseCounts, EffectSizes, Report, Labels
def supervised(Results, Arguments): ''' Write a csv file of your unsupervised pairwise interactions and a few related states. Suggested for viewing in MS excel (cross OS), number (os x), or the freely available OpenOffice Calc program (cross OS). ''' PValues, FDRs, Interactions, Performances, SampleCounts, CaseCounts, EffectSizes = load_pairwise_results(Results) Report = Results["Report"] #Need to get the phenotype name because this is 'supervised' Phenotype = Report["Supervised"][1].replace("(","").replace(")", "") CSVfile = open(Arguments.Filename + ".csv", "wb") CSVwriter = csv.writer(CSVfile, dialect='excel') #Right the excel header CSVwriter.writerow(["Feature", "Interaction", "Phenotype", "P-value", "FDR", "Odds Ratio", "Effect Size", "Sensitivity", "95% CI", "Specificity", "95% CI", "PPV", "95% CI", "NPV", "95% CI", "Accuracy", "MCC", "Sample Count", "Case Count"]) #just sort by p-value for Feature in rank(Performances, Arguments.RankMethod, Arguments.TopInteractions): p = Performances[Feature] Sens, Spec, PPV, NPV, Accuracy, MCC = p.sensitivity, p.specificity, p.PPV, p.NPV, p.accuracy, p.MCC if minimum_performance(Performances[Feature], Arguments): CSVwriter.writerow([Feature, Interactions[Feature], Phenotype, "%0.2e" %PValues[Feature], "%0.2e" %FDRs[PValues[Feature]], "%0.2f" %EffectSizes[Feature].odds_ratio, "%0.2f" %EffectSizes[Feature].difference_of_proportions, "%0.2f" %Sens, "-".join(["%0.2f" %CI for CI in confidence_interval(Sens, SampleCounts[Feature])]), "%0.2f" %Spec, "-".join(["%0.2f" %CI for CI in confidence_interval(Spec, SampleCounts[Feature])]), "%0.2f" %PPV, "-".join(["%0.2f" %CI for CI in confidence_interval(PPV, SampleCounts[Feature])]), "%0.2f" %NPV, "-".join(["%0.2f" %CI for CI in confidence_interval(NPV, SampleCounts[Feature])]), "%0.2f" %Accuracy, "%0.2f" %MCC, SampleCounts[Feature], CaseCounts[Feature]]) CSVfile.close() return
def supervised(Results, Arguments): ''' Write a csv file of your unsupervised pairwise interactions and a few related states. Suggested for viewing in MS excel (cross OS), number (os x), or the freely available OpenOffice Calc program (cross OS). ''' PValues, FDRs, Interactions, Performances, SampleCounts, CaseCounts, EffectSizes = load_pairwise_results( Results) Report = Results[ "Report"] #Need to get the phenotype name because this is 'supervised' Phenotype = Report["Supervised"][1].replace("(", "").replace(")", "") CSVfile = open(Arguments.Filename + ".csv", "wb") CSVwriter = csv.writer(CSVfile, dialect='excel') #Right the excel header CSVwriter.writerow([ "Feature", "Interaction", "Phenotype", "P-value", "FDR", "Odds Ratio", "Effect Size", "Sensitivity", "95% CI", "Specificity", "95% CI", "PPV", "95% CI", "NPV", "95% CI", "Accuracy", "MCC", "Sample Count", "Case Count" ]) #just sort by p-value for Feature in rank(Performances, Arguments.RankMethod, Arguments.TopInteractions): p = Performances[Feature] Sens, Spec, PPV, NPV, Accuracy, MCC = p.sensitivity, p.specificity, p.PPV, p.NPV, p.accuracy, p.MCC if minimum_performance(Performances[Feature], Arguments): CSVwriter.writerow([ Feature, Interactions[Feature], Phenotype, "%0.2e" % PValues[Feature], "%0.2e" % FDRs[PValues[Feature]], "%0.2f" % EffectSizes[Feature].odds_ratio, "%0.2f" % EffectSizes[Feature].difference_of_proportions, "%0.2f" % Sens, "-".join([ "%0.2f" % CI for CI in confidence_interval(Sens, SampleCounts[Feature]) ]), "%0.2f" % Spec, "-".join([ "%0.2f" % CI for CI in confidence_interval(Spec, SampleCounts[Feature]) ]), "%0.2f" % PPV, "-".join([ "%0.2f" % CI for CI in confidence_interval(PPV, SampleCounts[Feature]) ]), "%0.2f" % NPV, "-".join([ "%0.2f" % CI for CI in confidence_interval(NPV, SampleCounts[Feature]) ]), "%0.2f" % Accuracy, "%0.2f" % MCC, SampleCounts[Feature], CaseCounts[Feature] ]) CSVfile.close() return
def load_setworks(Arguments): ''' Load the setworks pickled dictionary of dictionaries, and return lists that have been truncated to specified number of results (TopInteractions = 100, by default). The list can additionally be truncated using performance metric, thru the MinimumPerformance argument. ''' Results = cPickle.load( open(get_path("MOCA.results") + "/" + Arguments.Filename, "rb")) PValues = Results["PValues"] QValues = Results["QValues"] Performances = Results["Performances"] Interactions = Results["Interactions"] FeatureVectors = Results["FeatureVectors"] UnionFeatures = Results["UnionFeatures"] IntersectionFeatures = Results["IntersectionFeatures"] DifferenceFeatures = Results["DifferenceFeatures"] SampleCounts = Results["SampleCounts"] CaseCounts = Results["CaseCounts"] EffectSizes = Results["EffectSizes"] Barcodes = Results["Barcodes"] Report = Results["Report"] Labels = Results["Labels"] #Get rid of barcodes that for setworks that didn't pass a performance threshold (if provided) Barcodes = [ Barcode for Barcode in Barcodes if minimum_performance(Performances[Barcode], Arguments) ] #Initial sort will be done by decreasing balanced accuracy Barcodes = rank(Performances, Arguments.RankMethod, Arguments.TopInteractions) return Barcodes, PValues, QValues, Performances, Interactions, \ UnionFeatures, IntersectionFeatures, DifferenceFeatures, \ FeatureVectors, SampleCounts, CaseCounts, EffectSizes, Report, Labels