def main(): random.seed(12345) row_counts = {} err_count = 0 small_rows = [] weirdo_rows = [] large_rows = [] for fileid, giffile in enumerate(nrm.GetFilenames("./data/")): if fileid % 1000 == 0: print "...", fileid, "files" img = nrm.LoadGIF(giffile) if img is None: err_count += 1 continue shp = img.shape if len(shp) != 2: err_count += 1 continue rc, cc = shp row_counts[rc] = row_counts.get(rc, 0) + 1 if cc != 600: # Expect this exactly once: print "Weird column count!", cc, ":", giffile if rc < 170: small_rows.append(giffile) if rc > 180: large_rows.append(giffile) if rc > 250 and rc < 300: weirdo_rows.append(giffile) print err_count, "Errors" print "ROW COUNTS:" for k in range(min(row_counts.keys()), max(row_counts.keys()) + 1): print k, ",", row_counts.get(k, 0) print "TOO FEW ROWS: (", len(small_rows), " total)" for fname in random.sample(small_rows, 10): print fname print "\nTOO MANY ROWS: (", len(large_rows), " total)" for fname in random.sample(large_rows, 10): print fname print "\nWEIRDO MEDIUM ROWS: (", len(weirdo_rows), " total)" for fname in random.sample(weirdo_rows, 10): print fname
def main(): random.seed(12345) found_small = False found_medium = False found_large = False found_too_large = False filenames = nrm.GetFilenames("./data/") random.shuffle(filenames) for filename in filenames: # Grayscale, but non-standard size: orig_gif = nrm.LoadGIF(filename, gray=True) if orig_gif is None: print "Bogus file:", filename continue # Row and column counts: rc, cc = orig_gif.shape output_file = ("results/export_gray_padded_strips/out" + filename.split("/")[-1]) if not found_small and rc < 170: found_small = True print "Found small:", filename, orig_gif.shape nrm.SaveGIF(nrm.PadGIF2D(orig_gif), output_file) if not found_medium and 170 < rc < 180: found_medium = True print "Found medium:", filename, orig_gif.shape nrm.SaveGIF(nrm.PadGIF2D(orig_gif), output_file) if not found_large and rc == 180: found_large = True print "Found large:", filename, orig_gif.shape nrm.SaveGIF(nrm.PadGIF2D(orig_gif), output_file) if not found_too_large and rc > 180: if nrm.PadGIF2D(orig_gif) is not None: raise ValueError("Wait, this was supposed to be too large") found_too_large = True print "Found too large:", filename, orig_gif.shape if found_small and found_medium and found_large and found_too_large: break
def main(args): outfilename = args[1] random.seed(12345) sample_files = random.sample(nrm.GetFilenames("./data/"), 30) arrs = [nrm.LoadGIF(giffile) for giffile in sample_files] hist = nrm.Histogram() all_hists = [] for arr in arrs: hist.AddPanel(arr) counts = hist.Counts() s = 0 total = sum(counts) for pixel_val, count in enumerate(counts): s += count if count < 0.01 * total: continue print "%d\t%0.3f\t%0.3f" % (pixel_val, float(count) / total, float(s) / total)
def main(): random.seed(12345) filenames = random.sample(nrm.GetFilenames("./data/"), 10) for filename in filenames: orig_gif = nrm.LoadGIF(filename, gray=True) if orig_gif is None: print "Bogus file:", filename continue pad_gif = nrm.PadGIF2D(orig_gif) if pad_gif is None: print "Trouble with", filename, "; orig shape", orig_gif.shape continue panels = nrm.GetPanels(pad_gif) if panels is None: raise ValueError("Error fetching panels: %s; shape %s" % (filename, str(pad_gif.shape))) for panel_id, panel in enumerate(panels): print panel.shape panel_filename = ("results/export_panels/out_" + nrm.GetPanelFilename(filename, panel_id)) nrm.SaveGIF(panel, panel_filename)
def main(args): outfilename = args[1] random.seed(12345) sample_files = random.sample(nrm.GetFilenames("./data/"), 1500) all_arrs = [nrm.LoadGIF(giffile) for giffile in sample_files] # Remove too-tall Sunday strips arrs = [arr for arr in all_arrs if arr.shape[0] > 190] all_hists = [] for arr in arrs: val_counts = [0 for _ in range(256)] for pixel in arr.ravel(): val_counts[pixel] += 1 all_hists.append(val_counts) with open(outfilename, "w") as outfile: outfile.write("Intensity,2.5%%,25%%,50%%,75%%,97.5%%,Mean\n") for i in range(256): hist_vals = [hist[i] for hist in all_hists] pctiles = numpy.percentile(hist_vals, [2.5, 25, 50, 75, 97.5]) row = ",".join([str(p) for p in pctiles]) mean = numpy.mean(hist_vals) outfile.write(str(i) + "," + row + "," + str(mean) + "\n")
def main(): random.seed(12345) sample_files = random.sample(nrm.GetFilenames("./data/"), 4) nrm.SaveAllPanelsToDB("./data", "nermal.db") return for giffile in sample_files: int_arr = nrm.LoadGIF(giffile) if int_arr is None: continue pad_arr = nrm.PadGIF2D(int_arr) if pad_arr is None: continue for panel in nrm.GetPanels(pad_arr): print base64.encodestring(pickle.dumps(panel)) print '' return arr = arrs[0] int_arr = nrm.ToUnsignedByte(arr) lucky_pairs = random.sample(zip(arr.ravel(), int_arr.ravel()), 100) for arr_i, int_arr_i in lucky_pairs: print arr_i, int_arr_i pairs = sorted([(a, i) for a, i in zip(arr.ravel(), int_arr.ravel())], key=lambda t: abs(t[1] - t[0])) for p in pairs[:10]: print p for p in pairs[-10:]: print p arrs = [nrm.LoadGIF(giffile, gray=False) for giffile in sample_files] for a in arrs: print a.dtype