Exemple #1
0
 def list_traits(self):
     sq = sql_client.sqlClient(self.file)
     traits = sq.get_traits()
     #traits = []
     #for group in self.groups:
     #    traits.extend(get_data(hdf=self.file, key=group, fields=['phenotype_id'])['phenotype_id'].drop_duplicates().values.tolist())
     return list(set(traits))
Exemple #2
0
 def chrom_from_trait(self, trait):
     sq = sql_client.sqlClient(self.file)
     chroms_found = sq.get_chrom_from_trait(trait)
     #for group in self.groups:
     #    chroms_found.extend(self.file.select(group, where='phenotype_id == trait', columns=['chromosome'], index=False).drop_duplicates().values.tolist())
     chroms_found = list(set(chroms_found))  # remove dupes
     return chroms_found
Exemple #3
0
 def chrom_from_gene(self, gene):
     sq = sql_client.sqlClient(self.file)
     chroms_found = sq.get_chrom_from_gene(gene)
     #for group in self.groups:
     #    chroms_found.extend(self.file.select(group, where='gene_id == gene', columns=['chromosome'], index=False).drop_duplicates().values.tolist())
     #chroms_found.extend(get_data(hdf=self.file, key=group, condition=condition, fields=['chromosome'])['chromosome'].drop_duplicates().values.tolist())
     chroms_found = list(set(chroms_found))  # remove dupes
     return chroms_found
 def get_trait_of_study(self, study_to_find):
     sq = sql_client.sqlClient(self.sqlite_db)
     traits = sq.get_traits_for_study(study_to_find)
     if traits:
         return sorted(list(set(traits)))
     else:
         # study not found
         raise NotFoundError("Study " + study_to_find)
 def map_snp_to_location(self):
     try:
         snp_no_prefix = re.search(r"[a-zA-Z]+([0-9]+)", self.snp).group(1)
         sql = sq.sqlClient(self.snpdb)
         mapping = sql.get_chr_pos(snp_no_prefix)
         chromosome, position = mapping[0] if mapping else (None, None)
         bp_interval = ':'.join([str(position), str(position)])
         return (chromosome, bp_interval)
     except AttributeError:
         return (None, None)
def main():
    argparser = argparse.ArgumentParser()
    argparser.add_argument('-vcf',
                           help='The name of the vcf to be processed',
                           required=False)
    argparser.add_argument('-db',
                           help='The name of the database to load to',
                           required=True)
    argparser.add_argument('-index',
                           help='create index on the rsid',
                           required=False,
                           action='store_true')
    args = argparser.parse_args()
    db = args.db
    if args.vcf:
        vcf = args.vcf

        vcfdf = pd.read_csv(vcf,
                            sep='\t',
                            comment='#',
                            header=None,
                            dtype=str,
                            usecols=[0, 1, 2],
                            names=['CHROM', 'POS', 'RSID'])

        vcfdf.RSID = vcfdf.RSID.str.replace("rs", "")

        sql = sq.sqlClient(db)
        sql.drop_rsid_index()
        list_of_tuples = list(vcfdf.itertuples(index=False, name=None))
        sql.cur.execute('BEGIN TRANSACTION')
        sql.cur.executemany(
            "insert or ignore into snp(chr, position, rsid) values (?, ?, ?)",
            list_of_tuples)
        sql.cur.execute('COMMIT')
    if args.index:
        sql = sq.sqlClient(db)
        sql.drop_rsid_index()
        sql.create_rsid_index()
    else:
        print("nothing left to do")
 def load(self):
     sqlcl = sqlClient(database=self.database)
     print('starting load...')
     print('database: {}'.format(self.database))
     try:
         sqlcl.drop_rsid_index()
     except sqlite3.OperationalError as e:
         print(e)
     for index, snp in enumerate(self.datasets[SNP_DSET]):
         data = (snp, self.datasets[CHR_DSET][index],
                 self.datasets[BP_DSET][index])
         print(data)
         sqlcl.insert_snp_row(data)
     sqlcl.create_rsid_index()
     sqlcl.commit()
 def _get_study_metadata(self, key):
     sql = sq.sqlClient(self.database)
     metadata_dict = sql.get_study_context_meta(key)
     return metadata_dict
 def get_qtl_list(self):
     sq = sql_client.sqlClient(self.sqlite_db)
     qtl_list = sq.get_qtl_list()
     return sorted(list(set(qtl_list)))
 def get_tissue_ont_dict(self):
     sq = sql_client.sqlClient(self.sqlite_db)
     tissue_ont_dict = sq.get_tissue_ont_dict()
     return tissue_ont_dict
 def get_list_of_tissues(self):
     sq = sql_client.sqlClient(self.sqlite_db)
     tissues = sq.get_tissue_ontos()
     return sorted(list(set(tissues)))
 def get_list_of_studies(self):
     sq = sql_client.sqlClient(self.sqlite_db)
     studies = sq.get_studies()
     return sorted(list(set(studies)))
 def check_study(self, study):
     sq = sql_client.sqlClient(self.sqlite_db)
     if sq.check_study(study):
         return True
     raise NotFoundError("Study " + study)
    def _narrow_hdf_pool(self):

        if self.snp:
            logger.debug("snp")
            self._chr_bp_from_snp()

        # narrow by tissue

        if self.tissue and self.study:
            logger.debug("tissue and study")
            sql = sq.sqlClient(self.database)
            file_ids = []
            resp = sql.get_file_ids_for_study_tissue(self.study, self.tissue,
                                                     self.quant_method)
            if resp:
                file_ids.extend(resp)
                if not self._narrow_by_chromosome(file_ids):
                    raise RequestedNotFound(
                        "Study :{} with tissue: {} and chr {}".format(
                            self.study, self.tissue, self.chromosome))
            else:
                raise RequestedNotFound(
                    "Study :{} with tissue: {} and quantification method: {}".
                    format(self.study, self.tissue, self.quant_method))

        if self.tissue and not self.study:
            logger.debug("tissue")
            sql = sq.sqlClient(self.database)
            file_ids = []
            resp = sql.get_file_ids_for_tissue(self.tissue, self.quant_method)
            if resp:
                file_ids.extend(resp)
                if not self._narrow_by_chromosome(file_ids):
                    raise RequestedNotFound("Tissue: {} with chr {}".format(
                        self.tissue, self.chromosome))
            else:
                raise RequestedNotFound(
                    "Tissue: {} with quantification method: {}".format(
                        self.tissue, self.quant_method))

        # narrow by qtl group

        if self.qtl_group and self.study:
            logger.debug("qtl_group and study")
            sql = sq.sqlClient(self.database)
            file_ids = []
            resp = sql.get_file_ids_for_study_qtl_group(
                self.study, self.qtl_group, self.quant_method)
            if resp:
                file_ids.extend(resp)
                if not self._narrow_by_chromosome(file_ids):
                    raise RequestedNotFound(
                        "Study :{} with qtl_group: {} and chr {}".format(
                            self.study, self.qtl_group, self.chromosome))
            else:
                raise RequestedNotFound(
                    "Study :{} with qtl_group: {} and quantification method: {}"
                    .format(self.study, self.qtl_group, self.quant_method))

        if self.qtl_group and not self.study:
            logger.debug("qtl_group")
            sql = sq.sqlClient(self.database)
            file_ids = []
            resp = sql.get_file_ids_for_qtl_group(self.qtl_group,
                                                  self.quant_method)
            if resp:
                file_ids.extend(resp)
                if not self._narrow_by_chromosome(file_ids):
                    raise RequestedNotFound("QTL group: {} with chr {}".format(
                        self.qtl_group, self.chromosome))
            else:
                raise RequestedNotFound(
                    "QTL group: {} with quantification method: {}".format(
                        self.qtl_group, self.quant_method))

        # narrow by anything else

        if self.study and not (self.qtl_group or self.tissue):
            logger.debug("study")
            sql = sq.sqlClient(self.database)
            file_ids = []
            resp = sql.get_file_id_for_study(self.study, self.quant_method)
            if resp:
                file_ids.extend(resp)
                if not self._narrow_by_chromosome(file_ids):
                    raise RequestedNotFound("Study :{} with chr {}".format(
                        self.study, self.chromosome))
            else:
                raise RequestedNotFound(
                    "Study :{} with quantification method: {}".format(
                        self.study, self.quant_method))

        if self.quant_method in ["ge", "microarray"]:
            if self.snp:
                self.hdfs = glob.glob(
                    os.path.join(self.search_path, self.chr_dir) + "/" +
                    "/file_" + str(self.chromosome) + "." +
                    str(self.quant_method) + ".h5")
                return "chr"
            if self.trait and not (self.study or self.tissue
                                   or self.qtl_group):
                logger.debug("phen")
                self.chrom_for_trait()
                self.hdfs = glob.glob(
                    os.path.join(self.search_path, self.chr_dir) + "/" +
                    "/file_" + str(self.chromosome) + "." +
                    str(self.quant_method) + ".h5")
                return "chr"
            if self.gene and not (self.study or self.tissue or self.qtl_group):
                logger.debug("gene")
                self.chrom_for_gene()
                self.hdfs = glob.glob(
                    os.path.join(self.search_path, self.chr_dir) + "/" +
                    "/file_" + str(self.chromosome) + "." +
                    str(self.quant_method) + ".h5")
                return "chr"
            if self.chromosome and all(
                    v is None
                    for v in [self.study, self.qtl_group, self.tissue]):
                logger.debug("bp/chr")
                self.hdfs = glob.glob(
                    os.path.join(self.search_path, self.chr_dir) + "/" +
                    "/file_" + str(self.chromosome) + "." +
                    str(self.quant_method) + ".h5")
                return "chr"
            if all(v is None for v in [
                    self.chromosome, self.study, self.gene, self.trait,
                    self.tissue, self.qtl_group
            ]):
                print("all")
                logger.debug("all")
                self.hdfs = glob.glob(
                    os.path.join(self.search_path, self.study_dir) +
                    "/*/file_*+" + str(self.quant_method) + ".h5")
                return "study"
        else:
            # block for tx/exon/txrev
            if self.trait and not (self.study or self.tissue):
                logger.debug("phen")
                self.chrom_for_trait()
                self.hdfs = glob.glob(
                    os.path.join(self.search_path, self.study_dir) + "/" +
                    str(self.chromosome) + "/file_*+" +
                    str(self.quant_method) + ".h5")
            if self.gene and not (self.study or self.tissue):
                logger.debug("gene")
                self.chrom_for_gene()
                self.hdfs = glob.glob(
                    os.path.join(self.search_path, self.study_dir) + "/" +
                    str(self.chromosome) + "/file_*+" +
                    str(self.quant_method) + ".h5")
            if self.chromosome and all(
                    v is None
                    for v in [self.study, self.trait, self.gene, self.tissue]):
                logger.debug("bp/chr")
                self.hdfs = glob.glob(
                    os.path.join(self.search_path, self.study_dir) + "/" +
                    str(self.chromosome) + "/file_*+" +
                    str(self.quant_method) + ".h5")
            if all(v is None for v in [
                    self.chromosome, self.study, self.gene, self.trait,
                    self.tissue, self.qtl_group
            ]):
                logger.debug("all")
                self.hdfs = glob.glob(
                    os.path.join(self.search_path, self.study_dir) +
                    "/*/file_*+" + str(self.quant_method) + ".h5")

        return "study"
Exemple #15
0
 def has_gene(self, gene):
     sq = sql_client.sqlClient(self.file)
     search = sq.get_gene(gene)
     if search:
         return True
     return False
Exemple #16
0
 def has_trait(self, trait):
     sq = sql_client.sqlClient(self.file)
     search = sq.get_trait(trait)
     if search:
         return True
     return False
Exemple #17
0
 def list_genes(self):
     sq = sql_client.sqlClient(self.file)
     genes = sq.get_genes()
     return list(set(genes))