def __get_reference_gene_map(self): ''' :return: dict ''' ensembl_gene_ids = [] for output_row in self.output_rows: ensembl_gene_ids.extend(output_row[2]) ensembl_id_uniq = list(set(ensembl_gene_ids)) exec_ensembl_perl = ExecuteEnsemblPerl() reference_gene_map = exec_ensembl_perl.get_ensembl_gene_id_ref_map(ensembl_id_uniq) return reference_gene_map
class CollectVariantData: def __init__(self, rs_id_file): ''' :param rs_id_file: :return: None ''' self.rs_id_file = rs_id_file self.ensembl_perl = ExecuteEnsemblPerl(rs_id_file) def get_variants_in_ensembl_map(self): ''' Uses the Perl Ensembl REST API to determine if input variants are in Ensembl. :return: dict ''' return self.ensembl_perl.get_variant_in_ensembl_map() def get_nearest_gene_map(self): ''' Uses the PERL Ensembl API to get the nearest gene at the 5' end for each variant. :return: dict ''' return self.ensembl_perl.get_nearest_gene_map() def __chunk_list(self, input_list, chunk_size=200): ''' Breaks the input list into chunks. Used to limit the number of variants sent to the Ensembl REST VEP POST API call. :param chunk_size: int :return: generator ''' sublist = [] for element in input_list: sublist.append(element) if len(sublist) == chunk_size: yield sublist sublist = [] if sublist: yield sublist def get_rest_api_vep_list(self): ''' Uses the Ensembl REST VEP POST API to return a list of VEP JSONs. :return: list ''' vep_outputs = [] rs_ids = [rs_id.strip() for rs_id in open(self.rs_id_file, 'rt').read().split('\n')] for sublist in self.__chunk_list(rs_ids): vep_post = RsIdVepPost(sublist) vep_post_output = vep_post.get_vep_post_output() for entry in vep_post_output: vep_outputs.append(entry) return vep_outputs
def __init__(self, rs_id_file): ''' :param rs_id_file: :return: None ''' self.rs_id_file = rs_id_file self.ensembl_perl = ExecuteEnsemblPerl(rs_id_file)
:param rank: int :return: dictionary ''' # Sort a list of dictionaries numerically using the key "distance". genes = sorted(self._nearest_gene_list, key=lambda x: abs(int(x['distance']))) if len(genes) -1 < rank: return {} return genes[rank] def get_nearest_gene_map(self, rank=0): ''' Return a dictionary with required gene ID and distance information. Default rank=0 chooses the nearest gene but can provide rank=1 to get, for example, the next nearest gene. :param rank: int :return: dictionary ''' nearest_gene_map = self.get_gene_map_for_rank(rank) return nearest_gene_map if __name__ == '__main__': from execute_ensembl_perl import ExecuteEnsemblPerl test_file = './test_data/rs_id_list.txt' exec_ensembl_perl = ExecuteEnsemblPerl(test_file) nearest_gene_map = exec_ensembl_perl.get_nearest_gene_map() rs_id = 'rs1000113' nearest_gene5p = NearestGeneFivePrime(nearest_gene_map[rs_id]) print rs_id + ': ', print nearest_gene5p.get_nearest_gene_map()