예제 #1
0
def main():
    random.seed(12345)
    row_counts = {}
    err_count = 0
    small_rows = []
    weirdo_rows = []
    large_rows = []
    for fileid, giffile in enumerate(nrm.GetFilenames("./data/")):
        if fileid % 1000 == 0:
            print "...", fileid, "files"
        img = nrm.LoadGIF(giffile)
        if img is None:
            err_count += 1
            continue
        shp = img.shape
        if len(shp) != 2:
            err_count += 1
            continue
        rc, cc = shp
        row_counts[rc] = row_counts.get(rc, 0) + 1
        if cc != 600:
            # Expect this exactly once:
            print "Weird column count!", cc, ":", giffile
        if rc < 170:
            small_rows.append(giffile)
        if rc > 180:
            large_rows.append(giffile)
        if rc > 250 and rc < 300:
            weirdo_rows.append(giffile)
    print err_count, "Errors"
    print "ROW COUNTS:"
    for k in range(min(row_counts.keys()), max(row_counts.keys()) + 1):
        print k, ",", row_counts.get(k, 0)

    print "TOO FEW ROWS: (", len(small_rows), " total)"
    for fname in random.sample(small_rows, 10):
        print fname
    print "\nTOO MANY ROWS: (", len(large_rows), " total)"
    for fname in random.sample(large_rows, 10):
        print fname
    print "\nWEIRDO MEDIUM ROWS: (", len(weirdo_rows), " total)"
    for fname in random.sample(weirdo_rows, 10):
        print fname
def main():
    random.seed(12345)
    found_small = False
    found_medium = False
    found_large = False
    found_too_large = False
    filenames = nrm.GetFilenames("./data/")
    random.shuffle(filenames)
    for filename in filenames:
        # Grayscale, but non-standard size:
        orig_gif = nrm.LoadGIF(filename, gray=True)
        if orig_gif is None:
            print "Bogus file:", filename
            continue
        # Row and column counts:
        rc, cc = orig_gif.shape
        output_file = ("results/export_gray_padded_strips/out" +
                       filename.split("/")[-1])
        if not found_small and rc < 170:
            found_small = True
            print "Found small:", filename, orig_gif.shape
            nrm.SaveGIF(nrm.PadGIF2D(orig_gif), output_file)

        if not found_medium and 170 < rc < 180:
            found_medium = True
            print "Found medium:", filename, orig_gif.shape
            nrm.SaveGIF(nrm.PadGIF2D(orig_gif), output_file)

        if not found_large and rc == 180:
            found_large = True
            print "Found large:", filename, orig_gif.shape
            nrm.SaveGIF(nrm.PadGIF2D(orig_gif), output_file)

        if not found_too_large and rc > 180:
            if nrm.PadGIF2D(orig_gif) is not None:
                raise ValueError("Wait, this was supposed to be too large")
            found_too_large = True
            print "Found too large:", filename, orig_gif.shape

        if found_small and found_medium and found_large and found_too_large:
            break
예제 #3
0
def main(args):
    outfilename = args[1]
    random.seed(12345)
    sample_files = random.sample(nrm.GetFilenames("./data/"), 30)
    arrs = [nrm.LoadGIF(giffile) for giffile in sample_files]

    hist = nrm.Histogram()

    all_hists = []
    for arr in arrs:
        hist.AddPanel(arr)

    counts = hist.Counts()
    s = 0
    total = sum(counts)
    for pixel_val, count in enumerate(counts):
        s += count
        if count < 0.01 * total:
            continue
        print "%d\t%0.3f\t%0.3f" % (pixel_val, float(count) / total,
                                    float(s) / total)
예제 #4
0
def main():
    random.seed(12345)
    filenames = random.sample(nrm.GetFilenames("./data/"), 10)
    for filename in filenames:
        orig_gif = nrm.LoadGIF(filename, gray=True)
        if orig_gif is None:
            print "Bogus file:", filename
            continue
        pad_gif = nrm.PadGIF2D(orig_gif)
        if pad_gif is None:
            print "Trouble with", filename, "; orig shape", orig_gif.shape
            continue
        panels = nrm.GetPanels(pad_gif)
        if panels is None:
            raise ValueError("Error fetching panels: %s; shape %s" %
                             (filename, str(pad_gif.shape)))
        for panel_id, panel in enumerate(panels):
            print panel.shape
            panel_filename = ("results/export_panels/out_" +
                              nrm.GetPanelFilename(filename, panel_id))
            nrm.SaveGIF(panel, panel_filename)
예제 #5
0
def main(args):
    outfilename = args[1]
    random.seed(12345)
    sample_files = random.sample(nrm.GetFilenames("./data/"), 1500)
    all_arrs = [nrm.LoadGIF(giffile) for giffile in sample_files]
    # Remove too-tall Sunday strips
    arrs = [arr for arr in all_arrs if arr.shape[0] > 190]

    all_hists = []
    for arr in arrs:
        val_counts = [0 for _ in range(256)]
        for pixel in arr.ravel():
            val_counts[pixel] += 1
        all_hists.append(val_counts)

    with open(outfilename, "w") as outfile:
        outfile.write("Intensity,2.5%%,25%%,50%%,75%%,97.5%%,Mean\n")
        for i in range(256):
            hist_vals = [hist[i] for hist in all_hists]
            pctiles = numpy.percentile(hist_vals, [2.5, 25, 50, 75, 97.5])
            row = ",".join([str(p) for p in pctiles])
            mean = numpy.mean(hist_vals)
            outfile.write(str(i) + "," + row + "," + str(mean) + "\n")
예제 #6
0
def main():
    random.seed(12345)
    sample_files = random.sample(nrm.GetFilenames("./data/"), 4)

    nrm.SaveAllPanelsToDB("./data", "nermal.db")

    return

    for giffile in sample_files:
        int_arr = nrm.LoadGIF(giffile)
        if int_arr is None:
            continue
        pad_arr = nrm.PadGIF2D(int_arr)
        if pad_arr is None:
            continue
        for panel in nrm.GetPanels(pad_arr):
            print base64.encodestring(pickle.dumps(panel))
            print ''

    return

    arr = arrs[0]
    int_arr = nrm.ToUnsignedByte(arr)
    lucky_pairs = random.sample(zip(arr.ravel(), int_arr.ravel()), 100)
    for arr_i, int_arr_i in lucky_pairs:
        print arr_i, int_arr_i
    pairs = sorted([(a, i) for a, i in zip(arr.ravel(), int_arr.ravel())],
                   key=lambda t: abs(t[1] - t[0]))
    for p in pairs[:10]:
        print p
    for p in pairs[-10:]:
        print p

    arrs = [nrm.LoadGIF(giffile, gray=False) for giffile in sample_files]
    for a in arrs:
        print a.dtype