def mergecn(args): """ %prog mergecn FACE.csv Compile matrix of GC-corrected copy numbers. Place a bunch of folders in csv file. Each folder will be scanned, one chromosomes after another. """ p = OptionParser(mergecn.__doc__) opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) (csvfile, ) = args samples = [x.replace("-cn", "").strip().strip("/") for x in open(csvfile)] betadir = "beta" mkdir(betadir) for seqid in allsomes: names = [ op.join(s + "-cn", "{}.{}.cn".format(op.basename(s), seqid)) for s in samples ] arrays = [np.fromfile(name, dtype=np.float) for name in names] shapes = [x.shape[0] for x in arrays] med_shape = np.median(shapes) arrays = [x for x in arrays if x.shape[0] == med_shape] ploidy = 2 if seqid not in ("chrY", "chrM") else 1 if seqid in sexsomes: chr_med = [np.median([x for x in a if x > 0]) for a in arrays] chr_med = np.array(chr_med) idx = get_kmeans(chr_med, k=2) zero_med = np.median(chr_med[idx == 0]) one_med = np.median(chr_med[idx == 1]) logging.debug("K-means with {} c0:{} c1:{}".format( seqid, zero_med, one_med)) higher_idx = 1 if one_med > zero_med else 0 # Use the higher mean coverage componen arrays = np.array(arrays)[idx == higher_idx] arrays = [[x] for x in arrays] ar = np.concatenate(arrays) print(seqid, ar.shape) rows, columns = ar.shape beta = [] std = [] for j in range(columns): a = ar[:, j] beta.append(np.median(a)) std.append(np.std(a) / np.mean(a)) beta = np.array(beta) / ploidy betafile = op.join(betadir, "{}.beta".format(seqid)) beta.tofile(betafile) stdfile = op.join(betadir, "{}.std".format(seqid)) std = np.array(std) std.tofile(stdfile) logging.debug("Written to `{}`".format(betafile)) ar.tofile("{}.bin".format(seqid))
def mergecn(args): """ %prog mergecn FACE.csv Compile matrix of GC-corrected copy numbers. Place a bunch of folders in csv file. Each folder will be scanned, one chromosomes after another. """ p = OptionParser(mergecn.__doc__) opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) csvfile, = args samples = [x.replace("-cn", "").strip().strip("/") for x in open(csvfile)] betadir = "beta" mkdir(betadir) for seqid in allsomes: names = [op.join(s + "-cn", "{}.{}.cn". format(op.basename(s), seqid)) for s in samples] arrays = [np.fromfile(name, dtype=np.float) for name in names] shapes = [x.shape[0] for x in arrays] med_shape = np.median(shapes) arrays = [x for x in arrays if x.shape[0] == med_shape] ploidy = 2 if seqid not in ("chrY", "chrM") else 1 if seqid in sexsomes: chr_med = [np.median([x for x in a if x > 0]) for a in arrays] chr_med = np.array(chr_med) idx = get_kmeans(chr_med, k=2) zero_med = np.median(chr_med[idx == 0]) one_med = np.median(chr_med[idx == 1]) logging.debug("K-means with {} c0:{} c1:{}" .format(seqid, zero_med, one_med)) higher_idx = 1 if one_med > zero_med else 0 # Use the higher mean coverage componen arrays = np.array(arrays)[idx == higher_idx] arrays = [[x] for x in arrays] ar = np.concatenate(arrays) print seqid, ar.shape rows, columns = ar.shape beta = [] std = [] for j in xrange(columns): a = ar[:, j] beta.append(np.median(a)) std.append(np.std(a) / np.mean(a)) beta = np.array(beta) / ploidy betafile = op.join(betadir, "{}.beta".format(seqid)) beta.tofile(betafile) stdfile = op.join(betadir, "{}.std".format(seqid)) std = np.array(std) std.tofile(stdfile) logging.debug("Written to `{}`".format(betafile)) ar.tofile("{}.bin".format(seqid))
def calibrate(args): """ %prog calibrate calibrate.JPG boxsize Calibrate pixel-inch ratio and color adjustment. - `calibrate.JPG` is the photo containig a colorchecker - `boxsize` is the measured size for the boxes on printed colorchecker, in squared centimeter (cm2) units """ xargs = args[2:] p = OptionParser(calibrate.__doc__) opts, args, iopts = add_seeds_options(p, args) if len(args) != 2: sys.exit(not p.print_help()) imagefile, boxsize = args boxsize = float(boxsize) # Read in color checker colorcheckerfile = op.join(datadir, "colorchecker.txt") colorchecker = [] expected = 0 for row in open(colorcheckerfile): boxes = row.split() colorchecker.append(boxes) expected += len(boxes) folder = op.split(imagefile)[0] objects = seeds([imagefile, "--outdir={0}".format(folder)] + xargs) nseeds = len(objects) logging.debug("Found {0} boxes (expected={1})".format(nseeds, expected)) assert expected - 4 <= nseeds <= expected + 4, \ "Number of boxes drastically different from {0}".format(expected) # Calculate pixel-cm ratio boxes = [t.area for t in objects] reject = reject_outliers(boxes) retained_boxes = [b for r, b in zip(reject, boxes) if not r] mbox = np.median(retained_boxes) # in pixels pixel_cm_ratio = (mbox / boxsize) ** .5 logging.debug("Median box size: {0} pixels. Measured box size: {1} cm2".\ format(mbox, boxsize)) logging.debug("Pixel-cm ratio: {0}".format(pixel_cm_ratio)) xs = [t.x for t in objects] ys = [t.y for t in objects] idx_xs = get_kmeans(xs, 6) idx_ys = get_kmeans(ys, 4) for xi, yi, s in zip(idx_xs, idx_ys, objects): s.rank = (yi, xi) objects.sort(key=lambda x: x.rank) colormap = [] for s in objects: x, y = s.rank observed, expected = s.rgb, rgb_to_triplet(colorchecker[x][y]) colormap.append((np.array(observed), np.array(expected))) # Color transfer tr0 = np.eye(3).flatten() print("Initial distance:", total_error(tr0, colormap), file=sys.stderr) tr = fmin(total_error, tr0, args=(colormap,)) tr.resize((3, 3)) print("RGB linear transform:\n", tr, file=sys.stderr) calib = {"PixelCMratio": pixel_cm_ratio, "RGBtransform": tr.tolist()} jsonfile = op.join(folder, "calibrate.json") fw = must_open(jsonfile, "w") print(json.dumps(calib, indent=4), file=fw) fw.close() logging.debug("Calibration specs written to `{0}`.".format(jsonfile)) return jsonfile
def calibrate(args): """ %prog calibrate calibrate.JPG boxsize Calibrate pixel-inch ratio and color adjustment. - `calibrate.JPG` is the photo containig a colorchecker - `boxsize` is the measured size for the boxes on printed colorchecker, in squared centimeter (cm2) units """ xargs = args[2:] p = OptionParser(calibrate.__doc__) opts, args, iopts = add_seeds_options(p, args) if len(args) != 2: sys.exit(not p.print_help()) imagefile, boxsize = args boxsize = float(boxsize) # Read in color checker colorcheckerfile = op.join(datadir, "colorchecker.txt") colorchecker = [] expected = 0 for row in open(colorcheckerfile): boxes = row.split() colorchecker.append(boxes) expected += len(boxes) folder = op.split(imagefile)[0] objects = seeds([imagefile, "--outdir={0}".format(folder)] + xargs) nseeds = len(objects) logging.debug("Found {0} boxes (expected={1})".format(nseeds, expected)) assert ( expected - 4 <= nseeds <= expected + 4), "Number of boxes drastically different from {0}".format(expected) # Calculate pixel-cm ratio boxes = [t.area for t in objects] reject = reject_outliers(boxes) retained_boxes = [b for r, b in zip(reject, boxes) if not r] mbox = np.median(retained_boxes) # in pixels pixel_cm_ratio = (mbox / boxsize)**0.5 logging.debug( "Median box size: {0} pixels. Measured box size: {1} cm2".format( mbox, boxsize)) logging.debug("Pixel-cm ratio: {0}".format(pixel_cm_ratio)) xs = [t.x for t in objects] ys = [t.y for t in objects] xs = [float(itemx) for itemx in xs] ys = [float(itemy) for itemy in ys] idx_xs = get_kmeans(xs, 6) idx_ys = get_kmeans(ys, 4) for xi, yi, s in zip(idx_xs, idx_ys, objects): s.rank = (yi, xi) objects.sort(key=lambda x: x.rank) colormap = [] for s in objects: x, y = s.rank observed, expected = s.rgb, rgb_to_triplet(colorchecker[x][y]) colormap.append((np.array(observed), np.array(expected))) # Color transfer tr0 = np.eye(3).flatten() print("Initial distance:", total_error(tr0, colormap), file=sys.stderr) tr = fmin(total_error, tr0, args=(colormap, )) tr.resize((3, 3)) print("RGB linear transform:\n", tr, file=sys.stderr) calib = {"PixelCMratio": pixel_cm_ratio, "RGBtransform": tr.tolist()} jsonfile = op.join(folder, "calibrate.json") fw = must_open(jsonfile, "w") print(json.dumps(calib, indent=4), file=fw) fw.close() logging.debug("Calibration specs written to `{0}`.".format(jsonfile)) return jsonfile