def reduce(self, **keywords): """ 2012.10.15 run after all files have been walked through """ meanStdData = statistics.estimateMeanStdFromData( dataVector=self.dataLs, excludeTopFraction=0.2) chiSqData = statistics.calculateChiSqStatOfDeltaVector(dataVector=self.dataLs, mean=meanStdData.mean, \ std=meanStdData.std) xMedianValue = numpy.median(self.x_value_ls) yMedianValue = numpy.median(self.y_value_ls) corr = numpy.corrcoef(self.x_value_ls, self.y_value_ls)[0, 1] thisOutputID = os.path.basename(self.outputFname) outlierFraction = float(self.noOfOutliers) / self.noOfNonMissing dataRow = [thisOutputID, self.noOfOutliers, self.noOfNonMissing, outlierFraction, chiSqData.chiSqStat,\ chiSqData.chiSqMinusLogPvalue, xMedianValue, yMedianValue, corr] self.writer.writerow(dataRow) sys.stderr.write("%s/%s (%.3f) outliers, chiSqStat=%.3f, chiSqMinusLogPvalue=%.3f.\n"%\ (self.noOfOutliers, self.noOfNonMissing, outlierFraction,\ chiSqData.chiSqStat, chiSqData.chiSqMinusLogPvalue)) #close the self.invariantPData.writer AbstractMatrixFileWalker.reduce(self, **keywords)
def setup(self, **keywords): """ """ AbstractMatrixFileWalker.setup(self, **keywords) #construct a individualCode2readGroup from readGroupFname self.invariantPData.individualCode2readGroup = {} reader = MatrixFile(inputFname=self.readGroupFname) reader.constructColName2IndexFromHeader() if self.readGroupHeader: readGroupIndex = reader.getColIndexGivenColHeader(self.readGroupHeader) else: readGroupIndex = 0 for row in reader: readGroup = row[readGroupIndex] individualAlignment = self.db_vervet.parseAlignmentReadGroup(readGroup).individualAlignment if individualAlignment: individual_code = individualAlignment.individual_sequence.individual.code self.invariantPData.individualCode2readGroup[individual_code] = readGroup del reader return 1
def __init__(self, inputFnameLs=None, **keywords): """ """ AbstractMatrixFileWalker.__init__(self, inputFnameLs=inputFnameLs, **keywords)