def NumAssignments(self): """Write number of assignments """ if self.mLogLevel >= 1: print "# instance of <" + str( self.__class__) + "> on " + time.asctime( time.localtime(time.time())) print "# source: %s" % (self.mTableNameDomains) print "# hubs between domains" print string.join(("nid", "nassignments", "nclasses"), "\t") sys.stdout.flush() result = self.mTableDomains.GetNumAssignments() for r in result: print string.join(map(str, r), "\t") data = map(lambda x: x[1], result) h = Histogram.Calculate(data) print "# histogram of number of domains per sequence" Histogram.Print(h) data = map(lambda x: x[2], result) h = Histogram.Calculate(data) print "# histogram of number of different domains per sequence" Histogram.Print(h)
def CountDistribution(self): """print distribution of number of units and sequences per family. """ self.PrintStatus() print """# # NUM: number of units/sequences per family # NUNITS: number of families with x units # NSEQ: number of families with x sequences #""" print "NUM\tNUNITS\tNSEQ" sys.stdout.flush() histograms = [] class_name = self.mTableDomains.GetFieldNameClass() statement = "SELECT COUNT(*) FROM %s GROUP BY %s" % ( self.mTableNameDomains, class_name) data = map(lambda x: x[0], self.dbhandle.Execute(statement).fetchall()) h1 = Histogram.Calculate(data) histograms.append(h1) statement = "SELECT COUNT(DISTINCT nid) FROM %s GROUP BY %s" % ( self.mTableNameDomains, class_name) data = map(lambda x: x[0], self.dbhandle.Execute(statement).fetchall()) h2 = Histogram.Calculate(data) histograms.append(h2) ch = Histogram.Combine(histograms) Histogram.Print(ch)
def xCountDistribution( self ): """print distribution of number of units and sequences per family. """ self.PrintStatus() print """# # NUM: number of units/sequences per family # NUNITS: number of families with x units # NSEQ: number of families with x sequences #""" print "num\tnunits\tnseq" sys.stdout.flush() histograms = [] statement = "SELECT nunits, COUNT(*) FROM %s GROUP BY nunits" % self.mTableNameFamilies h1 = self.dbhandle.Execute( statement ).fetchall() histograms.append( h1 ) statement = "SELECT nsequences, COUNT(*) FROM %s GROUP BY nsequences" % self.mTableNameFamilies h2 = self.dbhandle.Execute( statement ).fetchall() histograms.append( h2 ) ch = Histogram.Combine( histograms ) Histogram.Print(ch)
def LengthDistribution(self): """print distribution of unit length of families """ self.PrintStatus() print """# # LENGTH: number of units/sequences per family # NUNITS: number of domains of that length #""" print "LENGTH\tNUNITS" sys.stdout.flush() statement = "SELECT end-start+1 AS length, COUNT(*) FROM %s GROUP BY length" % self.mTableNameDomains h1 = self.dbhandle.Execute(statement).fetchall() Histogram.Print(h1)
def DomainDistribution( self ): """ distribution of domains per sequence """ self.PrintStatus() if self.mLogLevel >= 1: sys.stdout.flush() print """# # COUNTS: number # NDOMAINS: number of domains per sequence # NDOMAINS/SIN: number of domains without singletons per sequence # NMOBILES: number of mobile modules per sequence #""" print "length\tndomains\tndomains/sin\tnmobiles" sys.stdout.flush() histograms = [] statement = "SELECT COUNT(*) FROM %s GROUP BY nid" % self.mTableNameDomains d1 = map(lambda x: x[0], self.dbhandle.Execute( statement ).fetchall()) histograms.append( Histogram.Calculate( d1 ) ) statement = "SELECT COUNT(*) FROM %s AS d, %s AS f WHERE f.family = d.family AND f.nunits > 1 GROUP BY d.nid" % (self.mTableNameDomains, self.mTableNameFamilies) d2 = map(lambda x: x[0], self.dbhandle.Execute( statement ).fetchall()) histograms.append( Histogram.Calculate( d2 ) ) statement = "SELECT COUNT(*) FROM %s AS d, %s AS f WHERE f.family = d.family GROUP BY d.nid" % (self.mTableNameDomains, self.mTableNameSubset) d3 = map(lambda x: x[0], self.dbhandle.Execute( statement ).fetchall()) histograms.append( Histogram.Calculate( d3 ) ) ch = Histogram.Combine( histograms ) Histogram.Print(ch)
def LengthDistribution( self ): """print distribution of unit length of families """ self.PrintStatus() print """# # LENGTH: number of units/sequences per family # NUNITS: number of domains of that length # NUNITS/SIN: number of domains without singletons # NSIN: number of singletons with that length #""" print "length\tnunits\tnunits/sin\tnsin" sys.stdout.flush() histograms = [] statement = "SELECT CEILING((end-start+1)/10) * 10 AS olength, COUNT(*) FROM %s GROUP BY olength" % self.mTableNameDomains h1 = self.dbhandle.Execute( statement ).fetchall() histograms.append( h1 ) statement = "SELECT CEILING((end-start+1)/10) * 10 AS dlength, COUNT(*) FROM %s AS a, %s AS d " %\ (self.mTableNameDomains, self.mTableNameFamilies) +\ " WHERE d.family = a.family AND d.nunits > 1 GROUP BY dlength" h2 = self.dbhandle.Execute( statement ).fetchall() histograms.append( h2 ) statement = "SELECT CEILING((end-start+1)/10) * 10 AS alength, COUNT(*) FROM %s AS a, %s AS d " %\ (self.mTableNameDomains, self.mTableNameFamilies) +\ " WHERE d.family = a.family AND d.nunits = 1 GROUP BY alength" h3 = self.dbhandle.Execute( statement ).fetchall() histograms.append( h3 ) ch = Histogram.Combine( histograms ) Histogram.Print(ch)
vals = [] # retrieve histogram lines = filter(lambda x: x[0] <> "#", sys.stdin.readlines()) for l in lines: data = string.split(l[:-1], "\t") try: val = string.atof(data[param_column]) except IndexError: print "# IndexError in line:", l[:-1] continue if param_upper_limit != None and val > param_upper_limit: val = param_upper_limit if param_lower_limit != None and val < param_lower_limit: val = param_lower_limit vals.append(val) lines = None h = Histogram.Calculate(vals, no_empty_bins=param_empty_bins, increment=param_bin_size) print "# num_values=%i" % len(vals) Histogram.Print(h, nonull=param_nonull)
continue header.append(filename) infile = open(filename, "r") h = [] while 1: line = infile.readline() if not line: break if line[0] == "#": continue if not re.match("(\d+)", line): continue data = map(string.atof, re.split("\s+", line[:-1])) h.append((data[0], tuple(data[1:]))) infile.close() histograms.append(h) print "# bin\t" + string.join(header, "\t\t") ch = Histogram.Combine(histograms) Histogram.Print(ch) ch = Histogram.Normalize(ch) print "# bin\t" + string.join(header, "\t\t") Histogram.Print(ch)
if options.normalize: h = Histogram.Normalize(h) if options.cumulative: h = Histogram.Cumulate(h) if options.reverse_cumulative: h = Histogram.Cumulate(h, direction=0) hists.append(h) for m in options.append: if m == "normalize": hists.append(Histogram.Normalize(h)) if options.headers: titles.append(options.headers[x]) elif options.titles: titles.append(options.titles[x]) else: titles.append("col%i" % options.columns[x]) if titles: options.stdout.write("bin\t" + "\t".join(titles) + "\n") if len(hists) == 1: Histogram.Print(hists[0], nonull=options.nonull) else: combined_histogram = Histogram.Combine( hists, missing_value=options.missing_value) Histogram.Print(combined_histogram, nonull=options.nonull) Experiment.Stop()