Python get_kmeans Examples, jcvi.algorithms.formula.get_kmeans Python Examples

Example #1

0

Show file

File: cnv.py Project: guo-cheng/jcvi

def mergecn(args):
    """
    %prog mergecn FACE.csv

    Compile matrix of GC-corrected copy numbers. Place a bunch of folders in
    csv file. Each folder will be scanned, one chromosomes after another.
    """
    p = OptionParser(mergecn.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    (csvfile, ) = args
    samples = [x.replace("-cn", "").strip().strip("/") for x in open(csvfile)]
    betadir = "beta"
    mkdir(betadir)
    for seqid in allsomes:
        names = [
            op.join(s + "-cn", "{}.{}.cn".format(op.basename(s), seqid))
            for s in samples
        ]
        arrays = [np.fromfile(name, dtype=np.float) for name in names]
        shapes = [x.shape[0] for x in arrays]
        med_shape = np.median(shapes)
        arrays = [x for x in arrays if x.shape[0] == med_shape]
        ploidy = 2 if seqid not in ("chrY", "chrM") else 1
        if seqid in sexsomes:
            chr_med = [np.median([x for x in a if x > 0]) for a in arrays]
            chr_med = np.array(chr_med)
            idx = get_kmeans(chr_med, k=2)
            zero_med = np.median(chr_med[idx == 0])
            one_med = np.median(chr_med[idx == 1])
            logging.debug("K-means with {} c0:{} c1:{}".format(
                seqid, zero_med, one_med))
            higher_idx = 1 if one_med > zero_med else 0
            # Use the higher mean coverage componen
            arrays = np.array(arrays)[idx == higher_idx]
        arrays = [[x] for x in arrays]
        ar = np.concatenate(arrays)
        print(seqid, ar.shape)
        rows, columns = ar.shape
        beta = []
        std = []
        for j in range(columns):
            a = ar[:, j]
            beta.append(np.median(a))
            std.append(np.std(a) / np.mean(a))
        beta = np.array(beta) / ploidy
        betafile = op.join(betadir, "{}.beta".format(seqid))
        beta.tofile(betafile)
        stdfile = op.join(betadir, "{}.std".format(seqid))
        std = np.array(std)
        std.tofile(stdfile)
        logging.debug("Written to `{}`".format(betafile))
        ar.tofile("{}.bin".format(seqid))

Example #2

0

Show file

File: cnv.py Project: xuanblo/jcvi

def mergecn(args):
    """
    %prog mergecn FACE.csv

    Compile matrix of GC-corrected copy numbers. Place a bunch of folders in
    csv file. Each folder will be scanned, one chromosomes after another.
    """
    p = OptionParser(mergecn.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    csvfile, = args
    samples = [x.replace("-cn", "").strip().strip("/") for x in open(csvfile)]
    betadir = "beta"
    mkdir(betadir)
    for seqid in allsomes:
        names = [op.join(s + "-cn", "{}.{}.cn".
                 format(op.basename(s), seqid)) for s in samples]
        arrays = [np.fromfile(name, dtype=np.float) for name in names]
        shapes = [x.shape[0] for x in arrays]
        med_shape = np.median(shapes)
        arrays = [x for x in arrays if x.shape[0] == med_shape]
        ploidy = 2 if seqid not in ("chrY", "chrM") else 1
        if seqid in sexsomes:
            chr_med = [np.median([x for x in a if x > 0]) for a in arrays]
            chr_med = np.array(chr_med)
            idx = get_kmeans(chr_med, k=2)
            zero_med = np.median(chr_med[idx == 0])
            one_med = np.median(chr_med[idx == 1])
            logging.debug("K-means with {} c0:{} c1:{}"
                          .format(seqid, zero_med, one_med))
            higher_idx = 1 if one_med > zero_med else 0
            # Use the higher mean coverage componen
            arrays = np.array(arrays)[idx == higher_idx]
        arrays = [[x] for x in arrays]
        ar = np.concatenate(arrays)
        print seqid, ar.shape
        rows, columns = ar.shape
        beta = []
        std = []
        for j in xrange(columns):
            a = ar[:, j]
            beta.append(np.median(a))
            std.append(np.std(a) / np.mean(a))
        beta = np.array(beta) / ploidy
        betafile = op.join(betadir, "{}.beta".format(seqid))
        beta.tofile(betafile)
        stdfile = op.join(betadir, "{}.std".format(seqid))
        std = np.array(std)
        std.tofile(stdfile)
        logging.debug("Written to `{}`".format(betafile))
        ar.tofile("{}.bin".format(seqid))

Example #3

0

Show file

File: grabseeds.py Project: tanghaibao/jcvi

def calibrate(args):
    """
    %prog calibrate calibrate.JPG boxsize

    Calibrate pixel-inch ratio and color adjustment.
    - `calibrate.JPG` is the photo containig a colorchecker
    - `boxsize` is the measured size for the boxes on printed colorchecker, in
      squared centimeter (cm2) units
    """
    xargs = args[2:]
    p = OptionParser(calibrate.__doc__)
    opts, args, iopts = add_seeds_options(p, args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    imagefile, boxsize = args
    boxsize = float(boxsize)

    # Read in color checker
    colorcheckerfile = op.join(datadir, "colorchecker.txt")
    colorchecker = []
    expected = 0
    for row in open(colorcheckerfile):
        boxes = row.split()
        colorchecker.append(boxes)
        expected += len(boxes)

    folder = op.split(imagefile)[0]
    objects = seeds([imagefile, "--outdir={0}".format(folder)] + xargs)
    nseeds = len(objects)
    logging.debug("Found {0} boxes (expected={1})".format(nseeds, expected))
    assert expected - 4 <= nseeds <= expected + 4, \
                "Number of boxes drastically different from {0}".format(expected)

    # Calculate pixel-cm ratio
    boxes = [t.area for t in objects]
    reject = reject_outliers(boxes)
    retained_boxes = [b for r, b in zip(reject, boxes) if not r]
    mbox = np.median(retained_boxes)  # in pixels
    pixel_cm_ratio = (mbox / boxsize) ** .5
    logging.debug("Median box size: {0} pixels. Measured box size: {1} cm2".\
                format(mbox, boxsize))
    logging.debug("Pixel-cm ratio: {0}".format(pixel_cm_ratio))

    xs = [t.x for t in objects]
    ys = [t.y for t in objects]
    idx_xs = get_kmeans(xs, 6)
    idx_ys = get_kmeans(ys, 4)
    for xi, yi, s in zip(idx_xs, idx_ys, objects):
        s.rank = (yi, xi)

    objects.sort(key=lambda x: x.rank)

    colormap = []
    for s in objects:
        x, y = s.rank
        observed, expected = s.rgb, rgb_to_triplet(colorchecker[x][y])
        colormap.append((np.array(observed), np.array(expected)))

    # Color transfer
    tr0 = np.eye(3).flatten()
    print("Initial distance:", total_error(tr0, colormap), file=sys.stderr)
    tr = fmin(total_error, tr0, args=(colormap,))
    tr.resize((3, 3))
    print("RGB linear transform:\n", tr, file=sys.stderr)
    calib = {"PixelCMratio": pixel_cm_ratio,
             "RGBtransform": tr.tolist()}

    jsonfile = op.join(folder, "calibrate.json")
    fw = must_open(jsonfile, "w")
    print(json.dumps(calib, indent=4), file=fw)
    fw.close()
    logging.debug("Calibration specs written to `{0}`.".format(jsonfile))

    return jsonfile

Example #4

0

Show file

File: grabseeds.py Project: fossabot/jcvi

def calibrate(args):
    """
    %prog calibrate calibrate.JPG boxsize

    Calibrate pixel-inch ratio and color adjustment.
    - `calibrate.JPG` is the photo containig a colorchecker
    - `boxsize` is the measured size for the boxes on printed colorchecker, in
      squared centimeter (cm2) units
    """
    xargs = args[2:]
    p = OptionParser(calibrate.__doc__)
    opts, args, iopts = add_seeds_options(p, args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    imagefile, boxsize = args
    boxsize = float(boxsize)

    # Read in color checker
    colorcheckerfile = op.join(datadir, "colorchecker.txt")
    colorchecker = []
    expected = 0
    for row in open(colorcheckerfile):
        boxes = row.split()
        colorchecker.append(boxes)
        expected += len(boxes)

    folder = op.split(imagefile)[0]
    objects = seeds([imagefile, "--outdir={0}".format(folder)] + xargs)
    nseeds = len(objects)
    logging.debug("Found {0} boxes (expected={1})".format(nseeds, expected))
    assert (
        expected - 4 <= nseeds <= expected +
        4), "Number of boxes drastically different from {0}".format(expected)

    # Calculate pixel-cm ratio
    boxes = [t.area for t in objects]
    reject = reject_outliers(boxes)
    retained_boxes = [b for r, b in zip(reject, boxes) if not r]
    mbox = np.median(retained_boxes)  # in pixels
    pixel_cm_ratio = (mbox / boxsize)**0.5
    logging.debug(
        "Median box size: {0} pixels. Measured box size: {1} cm2".format(
            mbox, boxsize))
    logging.debug("Pixel-cm ratio: {0}".format(pixel_cm_ratio))

    xs = [t.x for t in objects]
    ys = [t.y for t in objects]
    xs = [float(itemx) for itemx in xs]
    ys = [float(itemy) for itemy in ys]
    idx_xs = get_kmeans(xs, 6)
    idx_ys = get_kmeans(ys, 4)
    for xi, yi, s in zip(idx_xs, idx_ys, objects):
        s.rank = (yi, xi)

    objects.sort(key=lambda x: x.rank)

    colormap = []
    for s in objects:
        x, y = s.rank
        observed, expected = s.rgb, rgb_to_triplet(colorchecker[x][y])
        colormap.append((np.array(observed), np.array(expected)))

    # Color transfer
    tr0 = np.eye(3).flatten()
    print("Initial distance:", total_error(tr0, colormap), file=sys.stderr)
    tr = fmin(total_error, tr0, args=(colormap, ))
    tr.resize((3, 3))
    print("RGB linear transform:\n", tr, file=sys.stderr)
    calib = {"PixelCMratio": pixel_cm_ratio, "RGBtransform": tr.tolist()}

    jsonfile = op.join(folder, "calibrate.json")
    fw = must_open(jsonfile, "w")
    print(json.dumps(calib, indent=4), file=fw)
    fw.close()
    logging.debug("Calibration specs written to `{0}`.".format(jsonfile))

    return jsonfile