def NumAssignments(self): """Write number of assignments """ if self.mLogLevel >= 1: print "# instance of <" + str( self.__class__) + "> on " + time.asctime( time.localtime(time.time())) print "# source: %s" % (self.mTableNameDomains) print "# hubs between domains" print string.join(("nid", "nassignments", "nclasses"), "\t") sys.stdout.flush() result = self.mTableDomains.GetNumAssignments() for r in result: print string.join(map(str, r), "\t") data = map(lambda x: x[1], result) h = Histogram.Calculate(data) print "# histogram of number of domains per sequence" Histogram.Print(h) data = map(lambda x: x[2], result) h = Histogram.Calculate(data) print "# histogram of number of different domains per sequence" Histogram.Print(h)
def CountDistribution(self): """print distribution of number of units and sequences per family. """ self.PrintStatus() print """# # NUM: number of units/sequences per family # NUNITS: number of families with x units # NSEQ: number of families with x sequences #""" print "NUM\tNUNITS\tNSEQ" sys.stdout.flush() histograms = [] class_name = self.mTableDomains.GetFieldNameClass() statement = "SELECT COUNT(*) FROM %s GROUP BY %s" % ( self.mTableNameDomains, class_name) data = map(lambda x: x[0], self.dbhandle.Execute(statement).fetchall()) h1 = Histogram.Calculate(data) histograms.append(h1) statement = "SELECT COUNT(DISTINCT nid) FROM %s GROUP BY %s" % ( self.mTableNameDomains, class_name) data = map(lambda x: x[0], self.dbhandle.Execute(statement).fetchall()) h2 = Histogram.Calculate(data) histograms.append(h2) ch = Histogram.Combine(histograms) Histogram.Print(ch)
def printHistogram(values, section, options, min_value=0, increment=1.0): outfile = open(options.output_filename_pattern % section, "w") h = Histogram.Calculate(values, no_empty_bins=True, min_value=0, increment=1.0) outfile.write("bin\t%s\n" % section) for bin, val in h: outfile.write("%5.2f\t%i\n" % (bin, val)) outfile.close()
def DomainDistribution( self ): """ distribution of domains per sequence """ self.PrintStatus() if self.mLogLevel >= 1: sys.stdout.flush() print """# # COUNTS: number # NDOMAINS: number of domains per sequence # NDOMAINS/SIN: number of domains without singletons per sequence # NMOBILES: number of mobile modules per sequence #""" print "length\tndomains\tndomains/sin\tnmobiles" sys.stdout.flush() histograms = [] statement = "SELECT COUNT(*) FROM %s GROUP BY nid" % self.mTableNameDomains d1 = map(lambda x: x[0], self.dbhandle.Execute( statement ).fetchall()) histograms.append( Histogram.Calculate( d1 ) ) statement = "SELECT COUNT(*) FROM %s AS d, %s AS f WHERE f.family = d.family AND f.nunits > 1 GROUP BY d.nid" % (self.mTableNameDomains, self.mTableNameFamilies) d2 = map(lambda x: x[0], self.dbhandle.Execute( statement ).fetchall()) histograms.append( Histogram.Calculate( d2 ) ) statement = "SELECT COUNT(*) FROM %s AS d, %s AS f WHERE f.family = d.family GROUP BY d.nid" % (self.mTableNameDomains, self.mTableNameSubset) d3 = map(lambda x: x[0], self.dbhandle.Execute( statement ).fetchall()) histograms.append( Histogram.Calculate( d3 ) ) ch = Histogram.Combine( histograms ) Histogram.Print(ch)
vals = [] # retrieve histogram lines = filter(lambda x: x[0] <> "#", sys.stdin.readlines()) for l in lines: data = string.split(l[:-1], "\t") try: val = string.atof(data[param_column]) except IndexError: print "# IndexError in line:", l[:-1] continue if param_upper_limit != None and val > param_upper_limit: val = param_upper_limit if param_lower_limit != None and val < param_lower_limit: val = param_lower_limit vals.append(val) lines = None h = Histogram.Calculate(vals, no_empty_bins=param_empty_bins, increment=param_bin_size) print "# num_values=%i" % len(vals) Histogram.Print(h, nonull=param_nonull)
options.stdlog.write("# no data\n") Experiment.Stop() sys.exit(0) for x in range(len(options.columns)): if options.loglevel >= 1: options.stdlog.write("# column=%i, num_values=%i\n" % (options.columns[x], len(vals[x]))) if len(vals[x]) < options.min_data: continue h = Histogram.Calculate( vals[x], no_empty_bins=options.no_empty_bins, increment=options.bin_size, min_value=options.min_value, max_value=options.max_value, dynamic_bins=options.dynamic_bins, ignore_out_of_range=options.ignore_out_of_range) if options.normalize: h = Histogram.Normalize(h) if options.cumulative: h = Histogram.Cumulate(h) if options.reverse_cumulative: h = Histogram.Cumulate(h, direction=0) hists.append(h) for m in options.append: if m == "normalize": hists.append(Histogram.Normalize(h))