Example #1
0
 def _alter_tables(self):
     # annotator table
     q = 'insert or replace into {:} values ("{:}", "{:}", "{}")'.format(
         self.level + "_annotator",
         self.module_name,
         self.conf["title"],
         self.conf["version"],
     )
     self.cursor_w.execute(q)
     # data table and header table
     header_table_name = self.level + "_header"
     for col_d in self.conf["output_columns"]:
         col_def = ColumnDefinition(col_d)
         colname = col_def.name
         coltype = col_def.type
         # data table
         try:
             self.cursor.execute(f"select {colname} from {self.level} limit 1")
         except:
             q = (
                 "alter table "
                 + self.level
                 + " add column "
                 + colname
                 + " "
                 + self.cr_type_to_sql[coltype]
             )
             self.cursor_w.execute(q)
         # header table
         # use prepared statement to allow " characters in colcats and coldesc
         q = "insert or replace into {} values (?, ?)".format(header_table_name)
         self.cursor_w.execute(q, [colname, col_def.get_json()])
     self.dbconn.commit()
 def _alter_tables(self):
     # annotator table
     q = 'insert or replace into {:} values ("{:}", "{:}", "{}")'.format(
         self.level + '_annotator', self.module_name, self.conf['title'],
         self.conf['version'])
     self.cursor_w.execute(q)
     # data table and header table
     header_table_name = self.level + '_header'
     for col_d in self.conf['output_columns']:
         col_def = ColumnDefinition(col_d)
         colname = col_def.name
         coltype = col_def.type
         # data table
         try:
             self.cursor.execute(
                 f'select {colname} from {self.level} limit 1')
         except:
             q = 'alter table ' + self.level + ' add column ' +\
                 colname + ' ' + self.cr_type_to_sql[coltype]
             self.cursor_w.execute(q)
         # header table
         # use prepared statement to allow " characters in colcats and coldesc
         q = 'insert or replace into {} values (?, ?)'.format(
             header_table_name)
         self.cursor_w.execute(q, [colname, col_def.get_json()])
     self.dbconn.commit()
Example #3
0
 def fill_categories(self):
     for col_d in self.conf["output_columns"]:
         col_def = ColumnDefinition(col_d)
         if col_def.category not in ["single", "multi"]:
             continue
         col_name = col_def.name
         q = "select distinct {} from {}".format(col_name, self.level)
         self.cursor.execute(q)
         col_cats = []
         for r in self.cursor:
             col_cat_str = r[0] if r[0] is not None else ""
             for col_cat in col_cat_str.split(";"):
                 if col_cat not in col_cats:
                     col_cats.append(col_cat)
         col_cats.sort()
         col_def.categories = col_cats
         q = "update {}_header set col_def=? where col_name=?".format(self.level)
         self.cursor.execute(q, [col_def.get_json(), col_def.name])
Example #4
0
 def fill_categories(self):
     header_table = self.level + '_header'
     coldefs = []
     if LooseVersion(
             au.get_current_package_version()) >= LooseVersion('1.5.0'):
         sql = f'select col_def from {header_table}'
         self.cursor.execute(sql)
         for row in self.cursor:
             coljson = row[0]
             coldef = ColumnDefinition({})
             coldef.from_json(coljson)
             coldefs.append(coldef)
     else:
         sql = f'pragma table_info("{header_table}")'
         self.cursor.execute(sql)
         header_cols = [row[1] for row in self.cursor.fetchall()]
         select_order = [
             cname for cname in ColumnDefinition.db_order
             if cname in header_cols
         ]
         sql = 'select {} from {}'.format(', '.join(select_order),
                                          header_table)
         self.cursor.execute(sql)
         column_headers = self.cursor.fetchall()
         for column_header in column_headers:
             coldef = ColumnDefinition({})
             coldef.from_row(column_header, order=select_order)
             coldefs.append(coldef)
     for coldef in coldefs:
         col_cats = coldef.categories
         if coldef.category in ['single', 'multi']:
             if col_cats is not None and len(col_cats) == 0:
                 q = f'select distinct {coldef.name} from {self.level}'
                 self.cursor.execute(q)
                 col_set = set([])
                 for r in self.cursor:
                     if r[0] == None:
                         continue
                     col_set.update(r[0].split(';'))
                 col_cats = list(col_set)
                 col_cats = self.do_reportsub_col_cats(
                     coldef.name, col_cats)
             else:
                 col_cats = self.do_reportsub_col_cats(
                     coldef.name, col_cats)
             col_cats.sort()
             coldef.categories = col_cats
             self.update_col_def(coldef)
     self.dbconn.commit()
 def fill_categories (self):
     for col_d in self.conf['output_columns']:
         col_def = ColumnDefinition(col_d)
         if col_def.category not in ['single', 'multi']:
             continue
         col_name = col_def.name
         q = 'select distinct {} from {}'.format(col_name, self.level)
         self.cursor.execute(q)
         col_cats = []
         for r in self.cursor:
             col_cat_str = r[0] if r[0] is not None else ''
             for col_cat in col_cat_str.split(';'):
                 if col_cat not in col_cats:
                     col_cats.append(col_cat)
         col_cats.sort()
         col_def.categories = col_cats
         q = 'update {}_header set col_def=? where col_name=?'.format(self.level)
         self.cursor.execute(q, [col_def.get_json(), col_def.name])
     self.dbconn.commit()
Example #6
0
 async def make_col_info(self, level, conn=None, cursor=None):
     self.colnames_to_display[level] = []
     await self.exec_db(self.store_mapper)
     cravat_conf = self.conf.get_cravat_conf()
     if "report_module_order" in cravat_conf:
         priority_colgroupnames = cravat_conf["report_module_order"]
     else:
         priority_colgroupnames = [
             "base", "hg38", "hg19", "hg18", "tagsampler"
         ]
     # level-specific column groups
     self.columngroups[level] = []
     sql = "select name, displayname from " + level + "_annotator"
     await cursor.execute(sql)
     rows = await cursor.fetchall()
     for row in rows:
         (name, displayname) = row
         self.columngroups[level].append({
             "name": name,
             "displayname": displayname,
             "count": 0
         })
     # level-specific column names
     header_table = level + "_header"
     coldefs = []
     sql = "select col_def from " + header_table
     await cursor.execute(sql)
     for row in await cursor.fetchall():
         coljson = row[0]
         coldef = ColumnDefinition({})
         coldef.from_json(coljson)
         coldefs.append(coldef)
     columns = []
     self.colnos[level] = {}
     colcount = 0
     # level-specific column details
     for coldef in coldefs:
         self.colnos[level][coldef.name] = colcount
         colcount += 1
         if coldef.category in ["single", "multi"] and len(
                 coldef.categories) == 0:
             sql = "select distinct {} from {}".format(coldef.name, level)
             await cursor.execute(sql)
             rs = await cursor.fetchall()
             for r in rs:
                 coldef.categories.append(r[0])
         [colgrpname, _] = coldef.name.split("__")
         column = coldef.get_colinfo()
         columns.append(column)
         self.add_conditional_to_colnames_to_display(
             level, column, colgrpname)
         for columngroup in self.columngroups[level]:
             if columngroup["name"] == colgrpname:
                 columngroup["count"] += 1
     # adds gene level columns to variant level.
     if (self.nogenelevelonvariantlevel == False and level == "variant"
             and await self.exec_db(self.table_exists, "gene")):
         modules_to_add = []
         q = "select name from gene_annotator"
         await cursor.execute(q)
         gene_annotators = [v[0] for v in await cursor.fetchall()]
         modules_to_add = [m for m in gene_annotators if m != "base"]
         for module in modules_to_add:
             cols = []
             q = 'select col_def from gene_header where col_name like "{}__%"'.format(
                 module)
             await cursor.execute(q)
             rs = await cursor.fetchall()
             for r in rs:
                 cd = ColumnDefinition({})
                 cd.from_json(r[0])
                 cols.append(cd)
             q = 'select displayname from gene_annotator where name="{}"'.format(
                 module)
             await cursor.execute(q)
             r = await cursor.fetchone()
             displayname = r[0]
             self.columngroups[level].append({
                 "name": module,
                 "displayname": displayname,
                 "count": len(cols)
             })
             for coldef in cols:
                 self.colnos[level][coldef.name] = colcount
                 colcount += 1
                 if (coldef.category in ["category", "multicategory"]
                         and len(coldef.categories) == 0):
                     sql = "select distinct {} from {}".format(
                         coldef.name, level)
                     await cursor.execute(sql)
                     rs = await cursor.fetchall()
                     for r in rs:
                         coldef.categories.append(r[0])
                 column = coldef.get_colinfo()
                 columns.append(column)
                 self.add_conditional_to_colnames_to_display(
                     level, column, module)
                 self.var_added_cols.append(coldef.name)
     # Gene level summary columns
     if level == "gene":
         q = "select name from variant_annotator"
         await cursor.execute(q)
         done_var_annotators = [v[0] for v in await cursor.fetchall()]
         self.summarizing_modules = []
         local_modules = au.get_local_module_infos_of_type("annotator")
         local_modules.update(
             au.get_local_module_infos_of_type("postaggregator"))
         summarizer_module_names = []
         for module_name in done_var_annotators:
             if module_name in [
                     "base",
                     "hg19",
                     "hg18",
                     "extra_vcf_info",
                     "extra_variant_info",
             ]:
                 continue
             if module_name not in local_modules:
                 if self.args.silent == False and module_name != 'original_input':
                     print(
                         "            [{}] module does not exist in the system. Gene level summary for this module is skipped."
                         .format(module_name))
                 continue
             module = local_modules[module_name]
             if "can_summarize_by_gene" in module.conf:
                 summarizer_module_names.append(module_name)
         local_modules[self.mapper_name] = au.get_local_module_info(
             self.mapper_name)
         summarizer_module_names = [self.mapper_name
                                    ] + summarizer_module_names
         for module_name in summarizer_module_names:
             mi = local_modules[module_name]
             sys.path = sys.path + [os.path.dirname(mi.script_path)]
             if module_name in done_var_annotators:
                 annot_cls = util.load_class(mi.script_path,
                                             "CravatAnnotator")
             elif module_name == self.mapper_name:
                 annot_cls = util.load_class(mi.script_path, "Mapper")
             cmd = {
                 "script_path": mi.script_path,
                 "input_file": "__dummy__",
                 "output_dir": self.output_dir,
             }
             annot = annot_cls(cmd)
             cols = mi.conf["gene_summary_output_columns"]
             columngroup = {
                 "name": mi.name,
                 "displayname": mi.title,
                 "count": len(cols),
             }
             self.columngroups[level].append(columngroup)
             for col in cols:
                 coldef = ColumnDefinition(col)
                 coldef.name = columngroup["name"] + "__" + coldef.name
                 coldef.genesummary = True
                 column = coldef.get_colinfo()
                 columns.append(column)
                 self.add_conditional_to_colnames_to_display(
                     level, column, mi.name)
             self.summarizing_modules.append([mi, annot, cols])
             for col in cols:
                 fullname = module_name + "__" + col["name"]
                 self.colnos[level][fullname] = len(self.colnos[level])
     # re-orders columns groups.
     colgrps = self.columngroups[level]
     newcolgrps = []
     for priority_colgrpname in priority_colgroupnames:
         for colgrp in colgrps:
             if colgrp["name"] == priority_colgrpname:
                 if colgrp["name"] in [self.mapper_name, "tagsampler"]:
                     newcolgrps[0]["count"] += colgrp["count"]
                 else:
                     newcolgrps.append(colgrp)
                 break
     colpos = 0
     for colgrp in newcolgrps:
         colgrp["lastcol"] = colpos + colgrp["count"]
         colpos = colgrp["lastcol"]
     colgrpnames = [
         v["displayname"] for v in colgrps
         if v["name"] not in priority_colgroupnames
     ]
     colgrpnames.sort()
     for colgrpname in colgrpnames:
         for colgrp in colgrps:
             if colgrp["displayname"] == colgrpname:
                 colgrp["lastcol"] = colpos + colgrp["count"]
                 newcolgrps.append(colgrp)
                 colpos += colgrp["count"]
                 break
     # re-orders columns.
     self.colname_conversion[level] = {}
     new_columns = []
     self.newcolnos[level] = {}
     newcolno = 0
     new_colnames_to_display = []
     for colgrp in newcolgrps:
         colgrpname = colgrp["name"]
         for col in columns:
             colname = col["col_name"]
             [grpname, _] = colname.split("__")
             if colgrpname == "base" and grpname in [
                     self.mapper_name, "tagsampler"
             ]:
                 newcolname = "base__" + colname.split("__")[1]
                 self.colname_conversion[level][newcolname] = colname
                 col["col_name"] = newcolname
                 new_columns.append(col)
                 self.newcolnos[level][newcolname] = newcolno
                 if newcolname in self.colnames_to_display[level]:
                     new_colnames_to_display.append(newcolname)
             elif grpname == colgrpname:
                 new_columns.append(col)
                 self.newcolnos[level][colname] = newcolno
                 if colname in self.colnames_to_display[level]:
                     new_colnames_to_display.append(colname)
             else:
                 continue
             newcolno += 1
     self.colinfo[level] = {"colgroups": newcolgrps, "columns": new_columns}
     self.colnames_to_display[level] = new_colnames_to_display
     # report substitution
     if level in ["variant", "gene"]:
         reportsubtable = level + "_reportsub"
         if await self.exec_db(self.table_exists, reportsubtable):
             q = "select * from {}".format(reportsubtable)
             await cursor.execute(q)
             reportsub = {
                 r[0]: json.loads(r[1])
                 for r in await cursor.fetchall()
             }
             self.column_subs[level] = []
             for i, column in enumerate(new_columns):
                 module, col = column["col_name"].split("__")
                 if module == self.mapper_name:
                     module = "base"
                 if module in reportsub and col in reportsub[module]:
                     self.column_subs[level].append(
                         SimpleNamespace(
                             module=module,
                             col=col,
                             index=i,
                             subs=reportsub[module][col],
                         ))
                     new_columns[i]["reportsub"] = reportsub[module][col]
     # display_select_columns
     if (level in self.extract_columns_multilevel
             and len(self.extract_columns_multilevel[level]) > 0
         ) or self.concise_report:
         self.display_select_columns[level] = True
     else:
         self.display_select_columns[level] = False
     # column numbers to display
     colno = 0
     self.colnos_to_display[level] = []
     for colgroup in self.colinfo[level]["colgroups"]:
         count = colgroup["count"]
         if count == 0:
             continue
         for col in self.colinfo[level]["columns"][colno:colno + count]:
             module_col_name = col["col_name"]
             if module_col_name in self.colnames_to_display[level]:
                 include_col = True
             else:
                 include_col = False
             if include_col:
                 self.colnos_to_display[level].append(colno)
             colno += 1
Example #7
0
 async def make_col_info(self, level):
     await self.store_mapper()
     cravat_conf = self.conf.get_cravat_conf()
     if 'report_module_order' in cravat_conf:
         priority_colgroupnames = cravat_conf['report_module_order']
     else:
         priority_colgroupnames = [
             'base', 'hg38', 'hg19', 'hg18', 'tagsampler'
         ]
     # level-specific column groups
     self.columngroups[level] = []
     sql = 'select name, displayname from ' + level + '_annotator'
     await self.cursor.execute(sql)
     rows = await self.cursor.fetchall()
     for row in rows:
         (name, displayname) = row
         self.columngroups[level].append({
             'name': name,
             'displayname': displayname,
             'count': 0
         })
     # level-specific column names
     header_table = level + '_header'
     coldefs = []
     sql = 'select col_def from ' + header_table
     await self.cursor.execute(sql)
     for row in await self.cursor.fetchall():
         coljson = row[0]
         coldef = ColumnDefinition({})
         coldef.from_json(coljson)
         coldefs.append(coldef)
     columns = []
     self.colnos[level] = {}
     colcount = 0
     # level-specific column details
     for coldef in coldefs:
         self.colnos[level][coldef.name] = colcount
         colcount += 1
         if coldef.category in ['single', 'multi'] and len(
                 coldef.categories) == 0:
             sql = 'select distinct {} from {}'.format(coldef.name, level)
             await self.cursor.execute(sql)
             rs = await self.cursor.fetchall()
             for r in rs:
                 coldef.categories.append(r[0])
         [colgrpname, colonlyname] = coldef.name.split('__')
         column = coldef.get_colinfo()
         columns.append(column)
         for columngroup in self.columngroups[level]:
             if columngroup['name'] == colgrpname:
                 columngroup['count'] += 1
     # adds gene level columns to variant level.
     if self.nogenelevelonvariantlevel == False and level == 'variant' and await self.table_exists(
             'gene'):
         modules_to_add = []
         q = 'select name from gene_annotator'
         await self.cursor.execute(q)
         gene_annotators = [v[0] for v in await self.cursor.fetchall()]
         modules_to_add = [m for m in gene_annotators if m != 'base']
         for module in modules_to_add:
             if not module in gene_annotators:
                 continue
             cols = []
             q = 'select col_def from gene_header where col_name like "{}__%"'.format(
                 module)
             await self.cursor.execute(q)
             rs = await self.cursor.fetchall()
             for r in rs:
                 cd = ColumnDefinition({})
                 cd.from_json(r[0])
                 cols.append(cd)
             q = 'select displayname from gene_annotator where name="{}"'.format(
                 module)
             await self.cursor.execute(q)
             r = await self.cursor.fetchone()
             displayname = r[0]
             self.columngroups[level].append({
                 'name': module,
                 'displayname': displayname,
                 'count': len(cols)
             })
             for coldef in cols:
                 self.colnos[level][coldef.name] = colcount
                 colcount += 1
                 if coldef.category in ['category', 'multicategory'
                                        ] and len(coldef.categories) == 0:
                     sql = 'select distinct {} from {}'.format(
                         coldef.name, level)
                     await self.cursor.execute(sql)
                     rs = await self.cursor.fetchall()
                     for r in rs:
                         coldef.categories.append(r[0])
                 column = coldef.get_colinfo()
                 columns.append(column)
                 self.var_added_cols.append(coldef.name)
     # Gene level summary columns
     if level == 'gene':
         q = 'select name from variant_annotator'
         await self.cursor.execute(q)
         done_var_annotators = [v[0] for v in await self.cursor.fetchall()]
         self.summarizing_modules = []
         local_modules = au.get_local_module_infos_of_type('annotator')
         local_modules.update(
             au.get_local_module_infos_of_type('postaggregator'))
         summarizer_module_names = []
         for module_name in done_var_annotators:
             if module_name in [
                     'base', 'hg19', 'hg18', 'extra_vcf_info',
                     'extra_variant_info'
             ]:
                 continue
             if module_name not in local_modules:
                 print(
                     '            [{}] module does not exist in the system. Gene level summary for this module is skipped.'
                     .format(module_name))
                 continue
             module = local_modules[module_name]
             if 'can_summarize_by_gene' in module.conf:
                 summarizer_module_names.append(module_name)
         local_modules[self.mapper_name] = au.get_local_module_info(
             self.mapper_name)
         summarizer_module_names = [self.mapper_name
                                    ] + summarizer_module_names
         for module_name in summarizer_module_names:
             mi = local_modules[module_name]
             sys.path = sys.path + [os.path.dirname(mi.script_path)]
             if module_name in done_var_annotators:
                 annot_cls = util.load_class(mi.script_path,
                                             'CravatAnnotator')
             elif module_name == self.mapper_name:
                 annot_cls = util.load_class(mi.script_path, 'Mapper')
             annot = annot_cls(
                 [mi.script_path, '__dummy__', '-d', self.output_dir], {})
             '''
             cols = conf['gene_summary_output_columns']
             columngroup = {}
             columngroup['name'] = os.path.basename(mi.script_path).split('.')[0]
             columngroup['displayname'] = conf['title']
             columngroup['count'] = len(cols)
             '''
             cols = mi.conf['gene_summary_output_columns']
             columngroup = {
                 'name': mi.name,
                 'displayname': mi.title,
                 'count': len(cols),
             }
             self.columngroups[level].append(columngroup)
             for col in cols:
                 coldef = ColumnDefinition(col)
                 coldef.name = columngroup['name'] + '__' + coldef.name
                 coldef.genesummary = True
                 column = coldef.get_colinfo()
                 columns.append(column)
             self.summarizing_modules.append([mi, annot, cols])
             for col in cols:
                 fullname = module_name + '__' + col['name']
                 self.colnos[level][fullname] = len(self.colnos[level])
     # re-orders columns groups.
     colgrps = self.columngroups[level]
     newcolgrps = []
     for priority_colgrpname in priority_colgroupnames:
         for colgrp in colgrps:
             if colgrp['name'] == priority_colgrpname:
                 if colgrp['name'] in [self.mapper_name, 'tagsampler']:
                     newcolgrps[0]['count'] += colgrp['count']
                 else:
                     newcolgrps.append(colgrp)
                 break
     colpos = 0
     for colgrp in newcolgrps:
         colgrp['lastcol'] = colpos + colgrp['count']
         colpos = colgrp['lastcol']
     colgrpnames = [
         v['displayname'] for v in colgrps
         if v['name'] not in priority_colgroupnames
     ]
     colgrpnames.sort()
     for colgrpname in colgrpnames:
         for colgrp in colgrps:
             if colgrp['displayname'] == colgrpname:
                 colgrp['lastcol'] = colpos + colgrp['count']
                 newcolgrps.append(colgrp)
                 colpos += colgrp['count']
                 break
     # re-orders columns.
     self.colname_conversion[level] = {}
     new_columns = []
     self.newcolnos[level] = {}
     newcolno = 0
     for colgrp in newcolgrps:
         colgrpname = colgrp['name']
         for col in columns:
             colname = col['col_name']
             [grpname, oricolname] = colname.split('__')
             if colgrpname == 'base' and grpname in [
                     self.mapper_name, 'tagsampler'
             ]:
                 newcolname = 'base__' + colname.split('__')[1]
                 self.colname_conversion[level][newcolname] = colname
                 col['col_name'] = newcolname
                 new_columns.append(col)
                 self.newcolnos[level][newcolname] = newcolno
                 #self.colnos[level][newcolname] = colno
                 #del self.colnos[level][oldcolname]
             elif grpname == colgrpname:
                 new_columns.append(col)
                 self.newcolnos[level][colname] = newcolno
             else:
                 continue
             newcolno += 1
     self.colinfo[level] = {'colgroups': newcolgrps, 'columns': new_columns}
     # report substitution
     if level in ['variant', 'gene']:
         reportsubtable = level + '_reportsub'
         if await self.table_exists(reportsubtable):
             q = 'select * from {}'.format(reportsubtable)
             await self.cursor.execute(q)
             rs = await self.cursor.fetchall()
             self.report_substitution = {}
             for r in rs:
                 module = r[0]
                 sub = json.loads(r[1])
                 self.report_substitution[module] = sub
             self.column_subs[level] = {}
             self.column_sub_allow_partial_match[level] = {}
             for i in range(len(new_columns)):
                 column = new_columns[i]
                 [module, col] = column['col_name'].split('__')
                 if module in [self.mapper_name]:
                     module = 'base'
                 if module in self.report_substitution:
                     sub = self.report_substitution[module]
                     if col in sub:
                         if module in [
                                 'base', self.mapper_name
                         ] and col in ['all_mappings', 'all_so']:
                             allow_partial_match = True
                             self.column_subs[level][i] = {
                                 re.compile(fr'\b{key}\b'): val
                                 for key, val in sub[col].items()
                             }
                         else:
                             allow_partial_match = False
                             self.column_subs[level][i] = sub[col]
                         self.column_sub_allow_partial_match[level][
                             i] = allow_partial_match
                         new_columns[i]['reportsub'] = sub[col]
Example #8
0
 def _setup_table(self):
     columns = []
     unique_names = set()
     # annotator table
     annotator_table = self.level + '_annotator'
     if not self.append:
         q = f'drop table if exists {annotator_table}'
         self.cursor.execute(q)
         q = f'create table {annotator_table} (name text primary key, displayname text, version text)'
         self.cursor.execute(q)
         q = f'insert into {annotator_table} values ("base", "Variant Annotation", "")'
         self.cursor.execute(q)
     for _, col_def in self.base_reader.get_all_col_defs().items():
         col_name = self.base_prefix + '__' + col_def.name
         col_def.name = col_name
         columns.append(col_def)
         unique_names.add(col_name)
     for annot_name in self.annotators:
         reader = self.readers[annot_name]
         annotator_name = reader.get_annotator_name()
         if annotator_name == '':
             annotator_name = annot_name
         annotator_displayname = reader.get_annotator_displayname()
         if annotator_displayname == '':
             annotator_displayname = annotator_name.upper()
         annotator_version = reader.get_annotator_version()
         q = f'insert or replace into {annotator_table} values (?, ?, ?)'
         self.cursor.execute(
             q, [annotator_name, annotator_displayname, annotator_version])
         orded_col_index = sorted(list(reader.get_all_col_defs().keys()))
         for col_index in orded_col_index:
             col_def = reader.get_col_def(col_index)
             reader_col_name = col_def.name
             if reader_col_name == self.key_name: continue
             col_def.name = '%s__%s' % (annot_name, reader_col_name)
             if col_def.name in unique_names and not self.append:
                 err_msg = 'Duplicate column name %s found in %s. ' \
                     %(col_def.name, reader.path)
                 sys.exit(err_msg)
             else:
                 columns.append(col_def)
                 unique_names.add(col_def.name)
     # data table
     col_def_strings = []
     for col_def in columns:
         name = col_def.name
         sql_type = self.cr_type_to_sql[col_def.type]
         s = name + ' ' + sql_type
         col_def_strings.append(s)
     if not self.append:
         q = f'drop table if exists {self.table_name}'
         self.cursor.execute(q)
         q = 'create table {} ({});'.format(
             self.table_name,
             ', '.join(col_def_strings),
         )
         self.cursor.execute(q)
         # index tables
         index_n = 0
         # index_columns is a list of columns to include in this index
         for index_columns in self.base_reader.get_index_columns():
             cols = ['base__{0}'.format(x) for x in index_columns]
             q = 'create index {}_idx_{} on {} ({});'.format(
                 self.table_name,
                 index_n,
                 self.table_name,
                 ', '.join(cols),
             )
             self.cursor.execute(q)
             index_n += 1
     else:
         q = f'pragma table_info({self.table_name})'
         self.cursor.execute(q)
         cur_cols = set([x[1] for x in self.cursor])
         for cds in col_def_strings:
             col_name = cds.split(' ')[0]
             if col_name in cur_cols:
                 if col_name.startswith('base'):
                     continue
                 q = f'update {self.table_name} set {col_name} = null'
             else:
                 q = f'alter table {self.table_name} add column {cds}'
             self.cursor.execute(q)
     # header table
     if not self.append:
         q = f'drop table if exists {self.header_table_name}'
         self.cursor.execute(q)
         q = f'create table {self.header_table_name} (col_name text primary key, col_def text);'
         self.cursor.execute(q)
     q = f'select col_name, col_def from {self.header_table_name}'
     self.cursor.execute(q)
     cdefs = OrderedDict()
     for cname, cjson in self.cursor:
         annot_name = cname.split('__')[0]
         cdefs[cname] = ColumnDefinition(json.loads(cjson))
     if cdefs:
         self.cursor.execute(f'delete from {self.header_table_name}')
     for cdef in columns:
         cdefs[cdef.name] = cdef
     insert_template = f'insert into {self.header_table_name} values (?, ?)'
     for cdef in cdefs.values():
         self.cursor.execute(insert_template, [cdef.name, cdef.get_json()])
     # report substitution table
     if self.level in ['variant', 'gene']:
         if not self.append:
             q = f'drop table if exists {self.reportsub_table_name}'
             self.cursor.execute(q)
             q = f'create table {self.reportsub_table_name} (module text primary key, subdict text)'
             self.cursor.execute(q)
             if hasattr(self.base_reader, 'report_substitution'):
                 sub = self.base_reader.report_substitution
                 if sub:
                     q = f'insert into {self.reportsub_table_name} values ("base", ?)'
                     self.cursor.execute(q, [json.dumps(sub)])
         for module in self.readers:
             if hasattr(self.base_reader, 'report_substitution'):
                 sub = self.readers[module].report_substitution
                 if sub:
                     q = f'insert or replace into {self.reportsub_table_name} values (?, ?)'
                     self.cursor.execute(q, [module, json.dumps(sub)])
     self.make_reportsub()
     # filter and layout save table
     if not self.append:
         q = 'drop table if exists viewersetup'
         self.cursor.execute(q)
         q = 'create table viewersetup (datatype text, name text, viewersetup text, unique (datatype, name))'
         self.cursor.execute(q)
     self.dbconn.commit()