def run_permutations(self, temp_uuid):
        """Runs permutations and gets significant and suggestive LOD scores"""

        top_lod_scores = []

        #logger.debug("self.num_perm:", self.num_perm)

        for permutation in range(self.num_perm):

            pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals])
            np.random.shuffle(pheno_vector)

            key = "pylmm:input:" + temp_uuid

            if self.dataset.group.species == "human":
                p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid)
            else:
                genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]

                no_val_samples = self.identify_empty_samples()
                trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)

                genotype_matrix = np.array(trimmed_genotype_data).T

                params = dict(pheno_vector = pheno_vector.tolist(),
                            genotype_matrix = genotype_matrix.tolist(),
                            restricted_max_likelihood = True,
                            refit = False,
                            temp_uuid = temp_uuid,

                            # meta data
                            timestamp = datetime.datetime.now().isoformat(),
                            )

                json_params = json.dumps(params)
                Redis.set(key, json_params)
                Redis.expire(key, 60*60)

                command = PYLMM_COMMAND+' --key {} --species {}'.format(key,"other")
                shell(command)

                json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)
                results = json.loads(json_results[1])
                p_values = [float(result) for result in results['p_values']]

                lowest_p_value = 1
                for p_value in p_values:
                    if p_value < lowest_p_value:
                        lowest_p_value = p_value

                #logger.debug("lowest_p_value:", lowest_p_value)
                top_lod_scores.append(-math.log10(lowest_p_value))

        #logger.debug("top_lod_scores:", top_lod_scores)

        self.suggestive = np.percentile(top_lod_scores, 67)
        self.significant = np.percentile(top_lod_scores, 95)
    def run_permutations(self, temp_uuid):
        """Runs permutations and gets significant and suggestive LOD scores"""

        top_lod_scores = []

        #logger.debug("self.num_perm:", self.num_perm)

        for permutation in range(self.num_perm):

            pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals])
            np.random.shuffle(pheno_vector)

            key = "pylmm:input:" + temp_uuid

            if self.dataset.group.species == "human":
                p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid)
            else:
                genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]

                no_val_samples = self.identify_empty_samples()
                trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)

                genotype_matrix = np.array(trimmed_genotype_data).T

                params = dict(pheno_vector = pheno_vector.tolist(),
                            genotype_matrix = genotype_matrix.tolist(),
                            restricted_max_likelihood = True,
                            refit = False,
                            temp_uuid = temp_uuid,

                            # meta data
                            timestamp = datetime.datetime.now().isoformat(),
                            )

                json_params = json.dumps(params)
                Redis.set(key, json_params)
                Redis.expire(key, 60*60)

                command = PYLMM_COMMAND+' --key {} --species {}'.format(key,"other")
                shell(command)

                json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)
                results = json.loads(json_results[1])
                p_values = [float(result) for result in results['p_values']]

                lowest_p_value = 1
                for p_value in p_values:
                    if p_value < lowest_p_value:
                        lowest_p_value = p_value

                #logger.debug("lowest_p_value:", lowest_p_value)
                top_lod_scores.append(-math.log10(lowest_p_value))

        #logger.debug("top_lod_scores:", top_lod_scores)

        self.suggestive = np.percentile(top_lod_scores, 67)
        self.significant = np.percentile(top_lod_scores, 95)
    def gen_data(self, temp_uuid):
        """Generates p-values for each marker"""

        logger.debug("self.vals is:", self.vals)
        pheno_vector = np.array([(val == "x" or val == "") and np.nan or float(val) for val in self.vals])

        #lmm_uuid = str(uuid.uuid4())

        key = "pylmm:input:" + temp_uuid
        logger.debug("key is:", pf(key))
        #with Bench("Loading cache"):
        #    result = Redis.get(key)

        if self.dataset.group.species == "human":
            p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid)
            #p_values = self.trim_results(p_values)

        else:
            logger.debug("NOW CWD IS:", os.getcwd())
            genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]

            no_val_samples = self.identify_empty_samples()
            trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)

            genotype_matrix = np.array(genotype_data).T

            #logger.debug("pheno_vector: ", pf(pheno_vector))
            #logger.debug("genotype_matrix: ", pf(genotype_matrix))
            #logger.debug("genotype_matrix.shape: ", pf(genotype_matrix.shape))

            #params = {"pheno_vector": pheno_vector,
            #            "genotype_matrix": genotype_matrix,
            #            "restricted_max_likelihood": True,
            #            "refit": False,
            #            "temp_data": tempdata}

            # logger.debug("genotype_matrix:", str(genotype_matrix.tolist()))
            # logger.debug("pheno_vector:", str(pheno_vector.tolist()))

            params = dict(pheno_vector = pheno_vector.tolist(),
                        genotype_matrix = genotype_matrix.tolist(),
                        restricted_max_likelihood = True,
                        refit = False,
                        temp_uuid = temp_uuid,

                        # meta data
                        timestamp = datetime.datetime.now().isoformat(),
                        )

            json_params = json.dumps(params)
            #logger.debug("json_params:", json_params)
            Redis.set(key, json_params)
            Redis.expire(key, 60*60)
            logger.debug("before printing command")

            command = PYLMM_COMMAND + ' --key {} --species {}'.format(key, "other")
            logger.debug("command is:", command)
            logger.debug("after printing command")

            shell(command)

            #t_stats, p_values = lmm.run(key)
            #lmm.run(key)

            json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)
            results = json.loads(json_results[1])
            p_values = [float(result) for result in results['p_values']]
            #logger.debug("p_values:", p_values[:10])
            #p_values = self.trim_results(p_values)
            t_stats = results['t_stats']

            #t_stats, p_values = lmm.run(
            #    pheno_vector,
            #    genotype_matrix,
            #    restricted_max_likelihood=True,
            #    refit=False,
            #    temp_data=tempdata
            #)
            #logger.debug("p_values:", p_values)

        self.dataset.group.markers.add_pvalues(p_values)

        #self.get_lod_score_cutoff()

        return self.dataset.group.markers.markers
Exemple #4
0
    def gen_data(self, temp_uuid):
        """Generates p-values for each marker"""

        logger.debug("self.vals is:", self.vals)
        pheno_vector = np.array([(val == "x" or val == "") and np.nan
                                 or float(val) for val in self.vals])

        #lmm_uuid = str(uuid.uuid4())

        key = "pylmm:input:" + temp_uuid
        logger.debug("key is:", pf(key))
        #with Bench("Loading cache"):
        #    result = Redis.get(key)

        if self.dataset.group.species == "human":
            p_values, t_stats = self.gen_human_results(pheno_vector, key,
                                                       temp_uuid)
            #p_values = self.trim_results(p_values)

        else:
            logger.debug("NOW CWD IS:", os.getcwd())
            genotype_data = [
                marker['genotypes']
                for marker in self.dataset.group.markers.markers
            ]

            no_val_samples = self.identify_empty_samples()
            trimmed_genotype_data = self.trim_genotypes(
                genotype_data, no_val_samples)

            genotype_matrix = np.array(genotype_data).T

            #logger.debug("pheno_vector: ", pf(pheno_vector))
            #logger.debug("genotype_matrix: ", pf(genotype_matrix))
            #logger.debug("genotype_matrix.shape: ", pf(genotype_matrix.shape))

            #params = {"pheno_vector": pheno_vector,
            #            "genotype_matrix": genotype_matrix,
            #            "restricted_max_likelihood": True,
            #            "refit": False,
            #            "temp_data": tempdata}

            # logger.debug("genotype_matrix:", str(genotype_matrix.tolist()))
            # logger.debug("pheno_vector:", str(pheno_vector.tolist()))

            params = dict(
                pheno_vector=pheno_vector.tolist(),
                genotype_matrix=genotype_matrix.tolist(),
                restricted_max_likelihood=True,
                refit=False,
                temp_uuid=temp_uuid,

                # meta data
                timestamp=datetime.datetime.now().isoformat(),
            )

            json_params = json.dumps(params)
            #logger.debug("json_params:", json_params)
            Redis.set(key, json_params)
            Redis.expire(key, 60 * 60)
            logger.debug("before printing command")

            command = PYLMM_COMMAND + ' --key {} --species {}'.format(
                key, "other")
            logger.debug("command is:", command)
            logger.debug("after printing command")

            shell(command)

            #t_stats, p_values = lmm.run(key)
            #lmm.run(key)

            json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45 * 60)
            results = json.loads(json_results[1])
            p_values = [float(result) for result in results['p_values']]
            #logger.debug("p_values:", p_values[:10])
            #p_values = self.trim_results(p_values)
            t_stats = results['t_stats']

            #t_stats, p_values = lmm.run(
            #    pheno_vector,
            #    genotype_matrix,
            #    restricted_max_likelihood=True,
            #    refit=False,
            #    temp_data=tempdata
            #)
            #logger.debug("p_values:", p_values)

        self.dataset.group.markers.add_pvalues(p_values)

        #self.get_lod_score_cutoff()

        return self.dataset.group.markers.markers