Esempio n. 1
0
    def run(self):
        if os.path.exists(self.args.output):
            logging.info("%s already exists. Delete it if you want it done again", self.args.output)
            return

        logging.info("Loading %s", self.args.weight_db)
        weight_db_logic = WeightDBUtilities.WeightDBEntryLogic(self.args.weight_db)
        logging.info("Loaded %s genes", len(weight_db_logic.gene_data_for_gene))

        logging.info("Building snp dict from %s", self.args.gtex_snp)
        snp_dict = GTExSNPFile.build_snp_dict(self.args.gtex_snp, weight_db_logic)
        logging.info("Got %d snps in dictionary", len(snp_dict))

        logging.info("Building gene expression")
        gene_expression, individuals = GTExGenoFile.build_gene_expression(self.args.gtex_geno, weight_db_logic, snp_dict)
        logging.info("Loaded %d gene expression", len(gene_expression))

        if self.args.gencode_file:
            logging.info("Translating gene names to ensemble id")
            ensemble_to_name, name_to_ensemble = GencodeFile.ensemble_to_name_relationships(self.args.gencode_file)
            logging.info("Loaded %d (%d) names", len(ensemble_to_name), len(name_to_ensemble))
            keys = gene_expression.keys()
            for k in keys:
                expression = gene_expression[k]
                if k in ensemble_to_name:
                    pass
                elif k in name_to_ensemble:
                    del gene_expression[k]
                    ensemble_id = name_to_ensemble[k]
                    gene_expression[ensemble_id] = expression
                else:
                    del gene_expression[k]

        logging.info("Saving gene expression for %d genes", len(gene_expression))
        save_expression(self.args.output, gene_expression, individuals)