Ejemplo n.º 1
0
    def test_flatten(self):
        labels = cov_data.SNPS_ENSG00000183742_8_w
        matrix = cov_data.COV_ENSG00000183742_8_w
        name= "test"

        flat = MatrixManager._flatten_matrix_data([(name, labels, matrix)])
        expected = \
            [('test', 'rs7806506', 'rs7806506', 0.28428631),
             ('test', 'rs7806506', 'rs12536095', -0.01636001),
             ('test', 'rs7806506', 'rs10226814', -0.00157224),
             ('test', 'rs12536095', 'rs12536095', 0.35760734),
             ('test', 'rs12536095', 'rs10226814', 0.00815426),
             ('test', 'rs10226814', 'rs10226814', 0.44923289)]
        numpy.testing.assert_array_equal(flat, expected)

        X = [0,1,3]
        cov = numpy.cov([X])
        flat = MatrixManager._flatten_matrix_data([("a", "b", cov)])

        expected = [('a', 'b', 'b', 2.33333333333333)]
        numpy.testing.assert_array_equal(flat, expected)
Ejemplo n.º 2
0
    def test_flatten(self):
        labels = cov_data.SNPS_ENSG00000183742_8_w
        matrix = cov_data.COV_ENSG00000183742_8_w
        name = "test"

        flat = MatrixManager._flatten_matrix_data([(name, labels, matrix)])
        expected = \
            [('test', 'rs7806506', 'rs7806506', 0.28428631),
             ('test', 'rs7806506', 'rs12536095', -0.01636001),
             ('test', 'rs7806506', 'rs10226814', -0.00157224),
             ('test', 'rs12536095', 'rs12536095', 0.35760734),
             ('test', 'rs12536095', 'rs10226814', 0.00815426),
             ('test', 'rs10226814', 'rs10226814', 0.44923289)]
        numpy.testing.assert_array_equal(flat, expected)

        X = [0, 1, 3]
        cov = numpy.cov([X])
        flat = MatrixManager._flatten_matrix_data([("a", "b", cov)])

        expected = [('a', 'b', 'b', 2.33333333333333)]
        numpy.testing.assert_array_equal(flat, expected)
Ejemplo n.º 3
0
def run(args):
    if os.path.exists(args.snp_covariance_output):
        logging.info("%s already exists, you have to move it or delete it if you want it done again", args.snp_covariance_output)
        return

    start = timer()

    logging.info("Loading models...")
    model_manager = PredictionModel.load_model_manager(args.models_folder, name_pattern=args.models_pattern)
    all_snps = model_manager.get_rsids()

    logging.info("processing genotype")
    for chromosome, metadata, dosage in GenotypeUtilities.genotype_by_chromosome_from_args(args, all_snps):
        logging.log(9, "Processing chromosome %s", str(chromosome))
        covariance_results = pandas.DataFrame()

        context = GenotypeAnalysis.GenotypeAnalysisContext(metadata, dosage, model_manager)
        genes = context.get_genes()
        reporter = Utilities.PercentReporter(9, len(genes))
        reporter.update(0, "%d %% of genes processed so far in chromosome " + str(chromosome))
        for i,gene in enumerate(genes):
            logging.log(6, "%d/%d:%s", i+1, len(genes), gene)
            cov_data = GenotypeAnalysis.get_prediction_covariance(context, gene)
            cov_data = MatrixManager._flatten_matrix_data([cov_data])
            cov_data = Utilities.to_dataframe(cov_data, GenotypeAnalysis.COVARIANCE_COLUMNS, to_numeric="ignore", fill_na="NA")
            covariance_results = pandas.concat([covariance_results, cov_data])

            reporter.update(i, "%d %% of genes processed so far in chromosome "+str(chromosome))

        reporter.update(len(genes), "%d %% of genes processed so far in chromosome " + str(chromosome))

        logging.log(9, "writing chromosome results")
        Utilities.save_dataframe(covariance_results, args.snp_covariance_output,
                                    mode="w" if chromosome ==1 else "a",
                                    header=chromosome==1)

    end = timer()
    logging.info("Ran covariance builder in %s seconds" % (str(end - start)))
Ejemplo n.º 4
0
def run(args):
    if os.path.exists(args.snp_covariance_output):
        logging.info("%s already exists, you have to move it or delete it if you want it done again", args.snp_covariance_output)
        return

    start = timer()

    logging.info("Loading models...")
    model_manager = PredictionModel.load_model_manager(args.models_folder, name_pattern=args.models_pattern, name_filter=args.models_filter)
    all_snps = model_manager.get_rsids()
    Utilities.ensure_requisite_folders(args.snp_covariance_output)
    with gzip.open(args.snp_covariance_output, "w") as o:
        o.write("GENE\tRSID1\tRSID2\tVALUE\n")
        logging.info("processing genotype")

        for chromosome, metadata, dosage in GenotypeUtilities.genotype_by_chromosome_from_args(args, all_snps):
            logging.log(9, "Processing chromosome %s", str(chromosome))

            context = GenotypeAnalysis.GenotypeAnalysisContext(metadata, dosage, model_manager)
            genes = context.get_genes()
            reporter = Utilities.PercentReporter(9, len(genes))
            reporter.update(0, "%d %% of genes processed so far in chromosome " + str(chromosome))
            for i,gene in enumerate(genes):
                logging.log(6, "%d/%d:%s", i+1, len(genes), gene)
                cov_data = GenotypeAnalysis.get_prediction_covariance(context, gene)
                cov_data = MatrixManager._flatten_matrix_data([cov_data])
                for e in cov_data:
                    l = "{}\t{}\t{}\t{}\n".format(e[0], e[1], e[2], e[3])
                    o.write(l)

                reporter.update(i, "%d %% of genes processed so far in chromosome "+str(chromosome))

            reporter.update(len(genes), "%d %% of genes processed so far in chromosome " + str(chromosome))

    end = timer()
    logging.info("Ran covariance builder in %s seconds" % (str(end - start)))