Esempio n. 1
0
                        dest="TSNE")
    parser.add_argument("--tsne-na",
                        type=float,
                        default=-1e3,
                        help="numerical value to use for missing values",
                        dest="NA_VALUE")
    parser.add_argument(
        "--tsne-npca",
        type=int,
        help="number of principal components to take before t-SNE",
        dest="N_PCA")

    args = parser.parse_args()

    # load input expression matrix
    emx = dataframe_helper.load(args.INPUT)

    print("Loaded %s %s" % (args.INPUT, str(emx.shape)))

    # load label file or generate empty labels
    if args.LABELS != None:
        print("Loading label file...")

        labels = np.loadtxt(args.LABELS, dtype=str)
    else:
        labels = np.zeros(len(emx.columns), dtype=str)

    # plot sample distributions
    if args.DENSITY != None:
        print("Plotting sample distributions...")
Esempio n. 2
0
import dataframe_helper
import numpy as np
import pandas as pd
import sys

if __name__ == "__main__":
    # parse command-line arguments
    if len(sys.argv) != 2:
        print("usage: python stats.py [infile]")
        sys.exit(-1)

    INFILE = sys.argv[1]

    # load input data
    emx = dataframe_helper.load(INFILE)
    emx = emx.values

    # print global stats
    print("shape: %s" % str(emx.shape))

    print("global:")
    print("  min: %12.6f" % (np.nanmin(emx)))
    print("  avg: %12.6f" % (np.nanmean(emx)))
    print("  max: %12.6f" % (np.nanmax(emx)))

    # print column-wise stats
    colmeans = np.nanmean(emx, axis=0)

    print("column-wise mean:")
    print("  min: %12.6f" % (np.nanmin(colmeans)))
    print("  avg: %12.6f" % (np.nanmean(colmeans)))
Esempio n. 3
0
import argparse
import dataframe_helper
import numpy as np
import pandas as pd

if __name__ == "__main__":
	# parse command-line arguments
	parser = argparse.ArgumentParser()
	parser.add_argument("--true", required=True, help="true expression matrix", dest="EMX_TRUE")
	parser.add_argument("--test", required=True, help="test expression matrix", dest="EMX_TEST")

	args = parser.parse_args()

	# load input dataframes
	emx_true = dataframe_helper.load(args.EMX_TRUE)
	emx_test = dataframe_helper.load(args.EMX_TEST)

	print("Loaded %s %s" % (args.EMX_TRUE, str(emx_true.shape)))
	print("Loaded %s %s" % (args.EMX_TEST, str(emx_test.shape)))

	# extract data matrix from each dataframe
	X_true = emx_true.values
	X_test = emx_test.values

	# print warnings for various mismatches
	if emx_true.shape != emx_test.shape:
		print("warning: shape does not match")

	if (emx_true.index != emx_test.index).any():
		print("warning: row names do not match")