def test_nan(): """Test dealing with nan values.""" data = np.arange(7, dtype=float) data[5] = np.nan bins = np.arange(8) hp.print_hist((data, bins))
def print1d(h1d, title=""): # try: vals = h1d.project('msd').values()[()] edges = h1d.project('msd').axis("msd").edges() print_hist((vals, edges), title=title, columns=100, )
def test_boost(): """Test boost-histogram if it is available.""" try: import boost_histogram as bh except ModuleNotFoundError: return hist = bh.Histogram(bh.axis.Regular(20, -3, 3)) hist.fill(np.random.randn(1000)) hp.print_hist(hist, title="Boost Histogram")
def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", type=str, help="input file to read from (stdin by default)") parser.add_argument("-b", "--buckets", type=int, help="number of bins", default=20) parser.add_argument( "-s", "--screen-width", type=int, help="maximum screen width", default=os.get_terminal_size()[0], ) parser.add_argument("-t", "--label", type=str, help="label for plot") parser.add_argument("-o", "--output-image", type=str, help="save image to file") args = parser.parse_args() print( "Classic hist interface - please use histoprint instead; this supports multiple file formats and much more!" ) with open(args.input) if args.input else sys.stdin as f: values = [float(v) for v in f] h = bh.numpy.histogram(values, bins=args.buckets, histogram=hist.Hist) if args.output_image: import matplotlib.pyplot as plt h.plot() plt.savefig(args.output_image) else: print_hist( h, label=args.label, summary=True, )
def show(self, **kwargs: Any) -> Any: """ Pretty print the stacked histograms to the console. """ if "labels" not in kwargs: if all(h.name is not None for h in self): kwargs["labels"] = [h.name for h in self] return histoprint.print_hist(list(self), stack=True, **kwargs)
def test_uproot(): """Test uproot hsitograms if it is available.""" try: # Only used to check whether modules are available import awkward # noqa: F401 import uproot except ModuleNotFoundError: return with uproot.open("tests/data/histograms.root") as F: hist = F["one"] try: # Works with uproot 3 hist.show() except Exception: pass hp.print_hist(hist, title="uproot TH1")
def _histoprint_csv(infile, **kwargs): """Interpret file as as CSV file.""" import pandas as pd # Read the data data = pd.read_csv(infile) cut = kwargs.pop("cut", "") if cut is not None and len(cut) > 0: try: data = data[data.eval(cut)] except Exception as e: click.echo("Error interpreting the cut string:", err=True) click.echo(e, err=True) exit(1) # Interpret field numbers/names fields = list(kwargs.pop("fields", [])) if len(fields) > 0: try: data = data[fields] except KeyError: click.echo("Unknown column name.", err=True) exit(1) # Get default columns labels if kwargs.get("labels", ("", )) == ("", ): kwargs["labels"] = data.columns # Convert to array data = data.to_numpy().T # Interpret bins bins = _bin_edges(kwargs, data) # Create the histogram(s) hist = ([], bins) # type: Tuple[Any, Any] for d in data: hist[0].append(np.histogram(d, bins=bins)[0]) # Print the histogram hp.print_hist(hist, **kwargs)
def _histoprint_txt(infile, **kwargs): """Interpret file as as simple whitespace separated table.""" # Read the data data = np.loadtxt(infile, ndmin=2) data = data.T cut = kwargs.pop("cut", "") if cut is not None and len(cut) > 0: try: data = data[:, eval(cut)] except Exception as e: click.echo("Error interpreting the cut string:", err=True) click.echo(e, err=True) exit(1) # Interpret field numbers fields = kwargs.pop("fields", []) if len(fields) > 0: try: fields = [int(f) for f in fields] except ValueError: click.echo("Fields for a TXT file must be integers.", err=True) exit(1) try: data = data[fields] except KeyError: click.echo("Field out of bounds.", err=True) exit(1) # Interpret bins bins = _bin_edges(kwargs, data) # Create the histogram(s) hist = ([], bins) # type: Tuple[Any, Any] for d in data: hist[0].append(np.histogram(d, bins=bins)[0]) # Print the histogram hp.print_hist(hist, **kwargs)
def show_hists(h, title='', scaleH=False): hall = [] names = [] for name in h.axis(h.sparse_axes()[0]).identifiers(): _h = h[name].project('msd').values()[()] if name.name.startswith("GluGlu") and scaleH: if np.isreal(scaleH): _h *= scaleH else: _h *= 100 bins = h[name].axis('msd').edges() hall.append(_h) names.append(name.name) print_hist( (hall, bins), title=title, labels=names, stack=True, symbols=" ", #columns=100, bg_colors="rgbcmy")
def test_hist(): """Poor man's unit tests.""" A = np.random.randn(1000) - 2 B = np.random.randn(1000) C = np.random.randn(1000) + 2 D = np.random.randn(500) * 2 hp.text_hist(B) hp.text_hist(B, bins=[-5, -3, -2, -1, -0.5, 0, 0.5, 1, 2, 3, 5], title="Variable bin widths") histA = np.histogram(A, bins=15, range=(-5, 5)) histB = np.histogram(B, bins=15, range=(-5, 5)) histC = np.histogram(C, bins=15, range=(-5, 5)) histD = np.histogram(D, bins=15, range=(-5, 5)) histAll = ([histA[0], histB[0], histC[0], histD[0]], histA[1]) hp.print_hist(histAll, title="Overlays", labels="ABCDE") hp.print_hist( histAll, title="Stacks", stack=True, symbols=" ", bg_colors="rgbcmy", labels="ABCDE", ) hp.print_hist( histAll, title="Summaries", symbols=r"=|\/", fg_colors="0", bg_colors="0", labels=["AAAAAAAAAAAAAAAA", "B", "CCCCCCCCCCCCC", "D"], summary=True, ) hp.print_hist( (histAll[0][:3], histAll[1]), title="No composition", labels=["A", "B", "C"], )
def show(self, **kwargs: Any) -> Any: """ Pretty print histograms to the console. """ return histoprint.print_hist(self, **kwargs)
def _histoprint_root(infile, **kwargs): """Interpret file as as ROOT file.""" # Import uproot try: import uproot as up except ImportError: click.echo("Cannot try ROOT file format. Uproot module not found.", err=True) raise # Import awkward try: import awkward as ak except ImportError: click.echo("Cannot try ROOT file format. Awkward module not found.", err=True) raise # Open root file F = up.open(infile) # Interpret field names fields = list(kwargs.pop("fields", [])) if len(fields) == 0: click.echo("Must specify at least one field for ROOT files.", err=True) click.echo(F.keys(), err=True) exit(1) # Get default columns labels if kwargs.get("labels", ("", )) == ("", ): kwargs["labels"] = [field.split("/")[-1] for field in fields] labels = kwargs.pop("labels") # Get possible cut expression cut = kwargs.pop("cut", "") # Possibly a single histogram if len(fields) == 1: try: hist = F[fields[0]] except KeyError: pass # Deal with key error further down else: try: hist = F[fields[0]].to_numpy() except AttributeError: pass else: kwargs.pop("bins", None) # Get rid of useless parameter hp.print_hist(hist, **kwargs) return data = [] # Find TTrees trees = [] # type: List[up.models.TTree.Model_TTree_v19] tree_fields = [] # type: List[List[Dict[str, Any]]] for field, label in zip(fields, labels): branch = F splitfield = field.split("/") for i, key in enumerate(splitfield): try: branch = branch[key] except KeyError: click.echo( "Could not find key '%s'. Possible values: %s" % (key, branch.keys()), err=True, ) exit(1) # Has `arrays` method? try: branch.arrays except AttributeError: pass else: # Found it path = "/".join(splitfield[i + 1:]) if branch in trees: tree_fields[trees.index(branch)].append({ "label": label, "path": path }) else: trees.append(branch) tree_fields.append([{"label": label, "path": path}]) break # Reassign labels in correct order labels = [] # Get and flatten the data for tree, fields in zip(trees, tree_fields): aliases = {} d = [] for field in fields: labels.append(field["label"]) split = field["path"].split("[") path = split[0] if len(split) > 1: slic = "[" + "[".join(split[1:]) else: slic = "" aliases[field["label"]] = path # Get the branches try: d.append(eval("tree[path].array()" + slic)) except up.KeyInFileError as e: click.echo(e, err=True) click.echo("Possible keys: %s" % tree.keys(), err=True) exit(1) # Cut on values if cut is not None: try: index = tree.arrays("cut", aliases={"cut": cut}).cut except up.KeyInFileError as e: click.echo(e, err=True) click.echo("Possible keys: %s" % tree.keys(), err=True) exit(1) except Exception as e: click.echo("Error interpreting the cut string:", err=True) click.echo(e, err=True) exit(1) for i in range(len(d)): d[i] = d[i][index] # Flatten if necessary for i in range(len(d)): try: d[i] = ak.flatten(d[i]) except ValueError: pass # Turn into flat numpy array d[i] = ak.to_numpy(d[i]) data.extend(d) # Assign new label order kwargs["labels"] = labels # Interpret bins bins = _bin_edges(kwargs, data) # Create the histogram(s) hist = ([], bins) for d in data: hist[0].append(np.histogram(d, bins=bins)[0]) # Print the histogram hp.print_hist(hist, **kwargs)
def main( in_vcf: str, out_vcf: str, overwrite: bool, verbose: bool, min_qual: float, min_depth: int, min_fed: float, max_depth: int, min_strand_bias: int, min_bqb: float, min_mqb: float, min_rpb: float, min_rpbz: Optional[float], max_rpbz: Optional[float], max_scbz: Optional[float], max_sgb: float, min_vdb: float, hist: bool, min_frs: float, min_mq: int, ): """Apply the following filters to a VCF:\n - Minimum proportion of the expected (median) depth\n - Maximum proportion of the expected (median) depth\n - Minimum QUAL threshold\n - Minimum Strand bias percentage """ log_level = logging.DEBUG if verbose else logging.INFO logging.basicConfig( format="%(asctime)s [%(levelname)s]: %(message)s", level=log_level ) vcf_reader = VCF(in_vcf) if not vcf_reader.contains(Tags.Depth.value): raise DepthTagError(f"Depth tag {Tags.Depth} not found in header") if (not vcf_reader.contains(str(Tags.StrandDepth))) and min_strand_bias: logging.warning( f"Strand depth tag {Tags.StrandDepth} not found in header. " f"Turning off strand bias filter..." ) min_strand_bias = 0 logging.info("Calculating expected (median) depth...") depths = [] quals = [] for v in vcf_reader: depths.append(get_depth(v)) quals.append(v.QUAL or 0) median_depth = np.median(depths) logging.info(f"Expected depth: {median_depth}") if hist: import histoprint tick_format = "% .1f" logging.info("Depth histogram:") histoprint.print_hist( np.histogram(depths, bins=HIST_BINS), title="Depth histogram", summary=True, tick_format=tick_format, file=click.get_text_stream("stderr"), ) logging.info("QUAL histogram") histoprint.print_hist( np.histogram(quals, bins=HIST_BINS), title="QUAL histogram", summary=True, tick_format=tick_format, file=click.get_text_stream("stderr"), ) assessor = Filter( expected_depth=int(median_depth), min_qual=min_qual, min_depth=min_depth, min_fed=min_fed, max_depth=max_depth, min_strand_bias=min_strand_bias, min_bqb=min_bqb, min_mqb=min_mqb, min_rpb=min_rpb, max_sgb=max_sgb, min_vdb=min_vdb, min_frs=min_frs, min_mq=min_mq, min_rpbz=min_rpbz, max_rpbz=max_rpbz, max_scbz=max_scbz, ) vcf_reader = VCF(in_vcf) assessor.add_filters_to_header(vcf_reader) if not Path(out_vcf).parent.exists(): Path(out_vcf).parent.mkdir(exist_ok=True, parents=True) vcf_writer = Writer(out_vcf, tmpl=vcf_reader) stats = Counter() logging.info("Filtering variants...") for variant in vcf_reader: filter_status = assessor.filter_status(variant) if ( (not overwrite) and variant.FILTER is not None and filter_status != str(Tags.Pass) ): current_filter = variant.FILTER.rstrip(";") variant.FILTER = f"{current_filter};{filter_status}" else: variant.FILTER = filter_status vcf_writer.write_record(variant) stats.update(filter_status.split(";")) vcf_reader.close() vcf_writer.close() logging.info("FILTER STATISTICS") logging.info("=================") for filt, count in stats.items(): logging.info(f"Filter: {filt}\tCount: {count}") logging.info("Done!")