def test_invalid_data(self):
        with self.assertRaises(Exceptions.InvalidInputFormat) as ctx:
            MatrixManager.load_matrix_manager("tests/_td/cov/cov.duplicate.txt.gz")

        self.assertTrue("duplicate" in ctx.exception.msg.lower())

        with self.assertRaises(Exceptions.InvalidInputFormat) as ctx:
            MatrixManager.load_matrix_manager("tests/_td/cov/cov.uncontiguous.txt.gz")

        self.assertTrue("contiguous" in ctx.exception.msg.lower())
    def test_invalid_data(self):
        with self.assertRaises(Exceptions.InvalidInputFormat) as ctx:
            MatrixManager.load_matrix_manager(
                "tests/_td/cov/cov.duplicate.txt.gz")

        self.assertTrue("duplicate" in ctx.exception.msg.lower())

        with self.assertRaises(Exceptions.InvalidInputFormat) as ctx:
            MatrixManager.load_matrix_manager(
                "tests/_td/cov/cov.uncontiguous.txt.gz")

        self.assertTrue("contiguous" in ctx.exception.msg.lower())
    def test_from_load(self):
        m = MatrixManager.load_matrix_manager("tests/_td/cov/cov.txt.gz")
        snps, cov = m.get("ENSG00000239789.1")
        self.assertEqual(snps, cov_data.SNPS_ENSG00000239789_1)
        numpy.testing.assert_array_almost_equal(cov,
                                                cov_data.COV_ENSG00000239789_1)

        n = m.n_snps("ENSG00000239789.1")
        self.assertEqual(n, len(cov_data.SNPS_ENSG00000239789_1))

        with self.assertRaises(Exceptions.InvalidArguments) as ctx:
            snps, cov = m.get("ENSG00000183742.8", ["rs7806506", "rs12718973"])

        self.assertTrue("whitelist" in ctx.exception.message)  #?

        whitelist = ["rs3094989", "rs7806506", "rs12536095", "rs10226814"]
        snps, cov = m.get("ENSG00000183742.8", whitelist)
        self.assertEqual(snps, cov_data.SNPS_ENSG00000183742_8_w)
        numpy.testing.assert_array_almost_equal(
            cov, cov_data.COV_ENSG00000183742_8_w)

        snps, cov = m.get("ENSG00000004766.11")
        self.assertEqual(snps, cov_data.SNPS_ENSG00000004766_11)
        numpy.testing.assert_array_almost_equal(
            cov, cov_data.COV_ENSG00000004766_11)

        n = m.n_snps("ENSG00000004766.11")
        self.assertEqual(n, len(cov_data.COV_ENSG00000004766_11))
    def test_from_load(self):
        m = MatrixManager.load_matrix_manager("tests/_td/cov/cov.txt.gz")
        snps, cov = m.get("ENSG00000239789.1")
        self.assertEqual(snps, cov_data.SNPS_ENSG00000239789_1)
        numpy.testing.assert_array_almost_equal(cov, cov_data.COV_ENSG00000239789_1)

        n = m.n_ids("ENSG00000239789.1")
        self.assertEqual(n, len(cov_data.SNPS_ENSG00000239789_1))

        with self.assertRaises(Exceptions.InvalidArguments) as ctx:
            snps, cov = m.get("ENSG00000183742.8", ["rs7806506", "rs12718973"])

        self.assertTrue("whitelist" in ctx.exception.message) #?

        whitelist = ["rs3094989", "rs7806506", "rs12536095", "rs10226814"]
        snps, cov = m.get("ENSG00000183742.8", whitelist)
        self.assertEqual(snps, cov_data.SNPS_ENSG00000183742_8_w)
        numpy.testing.assert_array_almost_equal(cov, cov_data.COV_ENSG00000183742_8_w)

        snps, cov = m.get("ENSG00000004766.11")
        self.assertEqual(snps, cov_data.SNPS_ENSG00000004766_11)
        numpy.testing.assert_array_almost_equal(cov, cov_data.COV_ENSG00000004766_11)

        n = m.n_ids("ENSG00000004766.11")
        self.assertEqual(n, len(cov_data.COV_ENSG00000004766_11))
Beispiel #5
0
def _context():
    gwas = _gwas()
    model = _prediction_model()
    s = SampleData.dataframe_from_covariance(SampleData.sample_covariance_s_1())
    covariance = MatrixManager.MatrixManager(s, D)
    c = Utilities._build_context(model, covariance, gwas)
    return c
    def test_flatten(self):
        labels = cov_data.SNPS_ENSG00000183742_8_w
        matrix = cov_data.COV_ENSG00000183742_8_w
        name= "test"

        flat = MatrixManager._flatten_matrix_data([(name, labels, matrix)])
        expected = \
            [('test', 'rs7806506', 'rs7806506', 0.28428631),
             ('test', 'rs7806506', 'rs12536095', -0.01636001),
             ('test', 'rs7806506', 'rs10226814', -0.00157224),
             ('test', 'rs12536095', 'rs12536095', 0.35760734),
             ('test', 'rs12536095', 'rs10226814', 0.00815426),
             ('test', 'rs10226814', 'rs10226814', 0.44923289)]
        numpy.testing.assert_array_equal(flat, expected)

        X = [0,1,3]
        cov = numpy.cov([X])
        flat = MatrixManager._flatten_matrix_data([("a", "b", cov)])

        expected = [('a', 'b', 'b', 2.33333333333333)]
        numpy.testing.assert_array_equal(flat, expected)
    def test_flatten(self):
        labels = cov_data.SNPS_ENSG00000183742_8_w
        matrix = cov_data.COV_ENSG00000183742_8_w
        name = "test"

        flat = MatrixManager._flatten_matrix_data([(name, labels, matrix)])
        expected = \
            [('test', 'rs7806506', 'rs7806506', 0.28428631),
             ('test', 'rs7806506', 'rs12536095', -0.01636001),
             ('test', 'rs7806506', 'rs10226814', -0.00157224),
             ('test', 'rs12536095', 'rs12536095', 0.35760734),
             ('test', 'rs12536095', 'rs10226814', 0.00815426),
             ('test', 'rs10226814', 'rs10226814', 0.44923289)]
        numpy.testing.assert_array_equal(flat, expected)

        X = [0, 1, 3]
        cov = numpy.cov([X])
        flat = MatrixManager._flatten_matrix_data([("a", "b", cov)])

        expected = [('a', 'b', 'b', 2.33333333333333)]
        numpy.testing.assert_array_equal(flat, expected)
Beispiel #8
0
def run(args):
    if os.path.exists(args.snp_covariance_output):
        logging.info("%s already exists, you have to move it or delete it if you want it done again", args.snp_covariance_output)
        return

    start = timer()

    logging.info("Loading models...")
    model_manager = PredictionModel.load_model_manager(args.models_folder, name_pattern=args.models_pattern)
    all_snps = model_manager.get_rsids()

    logging.info("processing genotype")
    for chromosome, metadata, dosage in GenotypeUtilities.genotype_by_chromosome_from_args(args, all_snps):
        logging.log(9, "Processing chromosome %s", str(chromosome))
        covariance_results = pandas.DataFrame()

        context = GenotypeAnalysis.GenotypeAnalysisContext(metadata, dosage, model_manager)
        genes = context.get_genes()
        reporter = Utilities.PercentReporter(9, len(genes))
        reporter.update(0, "%d %% of genes processed so far in chromosome " + str(chromosome))
        for i,gene in enumerate(genes):
            logging.log(6, "%d/%d:%s", i+1, len(genes), gene)
            cov_data = GenotypeAnalysis.get_prediction_covariance(context, gene)
            cov_data = MatrixManager._flatten_matrix_data([cov_data])
            cov_data = Utilities.to_dataframe(cov_data, GenotypeAnalysis.COVARIANCE_COLUMNS, to_numeric="ignore", fill_na="NA")
            covariance_results = pandas.concat([covariance_results, cov_data])

            reporter.update(i, "%d %% of genes processed so far in chromosome "+str(chromosome))

        reporter.update(len(genes), "%d %% of genes processed so far in chromosome " + str(chromosome))

        logging.log(9, "writing chromosome results")
        Utilities.save_dataframe(covariance_results, args.snp_covariance_output,
                                    mode="w" if chromosome ==1 else "a",
                                    header=chromosome==1)

    end = timer()
    logging.info("Ran covariance builder in %s seconds" % (str(end - start)))
    def test_from_data(self):
        s = SampleData.dataframe_from_covariance(
            SampleData.sample_covariance_s_1())
        m = MatrixManager.MatrixManager(s)
        snps, cov = m.get("A")
        self.assertEqual(snps, cov_data.SNPS_A)
        numpy.testing.assert_array_almost_equal(cov, cov_data.COV_A)

        snps, cov = m.get("B")
        self.assertEqual(snps, cov_data.SNPS_B)
        numpy.testing.assert_array_almost_equal(cov, cov_data.COV_B)

        snps, cov = m.get("C")
        self.assertEqual(snps, cov_data.SNPS_C)
        numpy.testing.assert_array_almost_equal(cov, cov_data.COV_C)

        snps, cov = m.get("C", ['rs100', 'rs101', 'rs102'])
        self.assertEqual(snps, cov_data.SNPS_C)
        numpy.testing.assert_array_almost_equal(cov, cov_data.COV_C)

        with self.assertRaises(Exceptions.InvalidArguments) as ctx:
            snps, cov = m.get("C", ["rs100", "rs12718973"])
        self.assertTrue("whitelist" in ctx.exception.message)
def run(args):
    if os.path.exists(args.snp_covariance_output):
        logging.info("%s already exists, you have to move it or delete it if you want it done again", args.snp_covariance_output)
        return

    start = timer()

    logging.info("Loading models...")
    model_manager = PredictionModel.load_model_manager(args.models_folder, name_pattern=args.models_pattern, name_filter=args.models_filter)
    all_snps = model_manager.get_rsids()
    Utilities.ensure_requisite_folders(args.snp_covariance_output)
    with gzip.open(args.snp_covariance_output, "w") as o:
        o.write("GENE\tRSID1\tRSID2\tVALUE\n")
        logging.info("processing genotype")

        for chromosome, metadata, dosage in GenotypeUtilities.genotype_by_chromosome_from_args(args, all_snps):
            logging.log(9, "Processing chromosome %s", str(chromosome))

            context = GenotypeAnalysis.GenotypeAnalysisContext(metadata, dosage, model_manager)
            genes = context.get_genes()
            reporter = Utilities.PercentReporter(9, len(genes))
            reporter.update(0, "%d %% of genes processed so far in chromosome " + str(chromosome))
            for i,gene in enumerate(genes):
                logging.log(6, "%d/%d:%s", i+1, len(genes), gene)
                cov_data = GenotypeAnalysis.get_prediction_covariance(context, gene)
                cov_data = MatrixManager._flatten_matrix_data([cov_data])
                for e in cov_data:
                    l = "{}\t{}\t{}\t{}\n".format(e[0], e[1], e[2], e[3])
                    o.write(l)

                reporter.update(i, "%d %% of genes processed so far in chromosome "+str(chromosome))

            reporter.update(len(genes), "%d %% of genes processed so far in chromosome " + str(chromosome))

    end = timer()
    logging.info("Ran covariance builder in %s seconds" % (str(end - start)))