def generate_bins(binRoot, locStart, locEnd, level, seqLength): ''' recursive function for generating bin index ''' global binCount if level >= numLevels: return lowerBound = locStart upperBound = locStart + increments[level] currentBin = 0 if locEnd > seqLength: locEnd = seqLength while lowerBound < locEnd: binCount = binCount + 1 currentBin = currentBin + 1 binLabel = binRoot + ".B" + xstr(currentBin) if level != 0 else xstr( binRoot) if upperBound > seqLength: upperBound = seqLength if upperBound > locEnd: upperBound = locEnd insert_bin(level, binCount, binLabel, lowerBound, upperBound) nextLevel = level + 1 if nextLevel <= numLevels: if lowerBound == 0: warning("New Level:", level) generate_bins(binLabel + ".L" + xstr(nextLevel), lowerBound, upperBound, nextLevel, seqLength) lowerBound = upperBound upperBound = upperBound + increments[level]
def match(self, chrm, position, indels=True): ''' match single position in CADD file; default indels to true b/c will be using this more with the indel file''' tbxFh = self._indel if indels else self._snv caddChr = 'MT' if chrm == 'M' else xstr(chrm) try: return tbxFh.fetch(caddChr, int(position) - 1, int(position), parser=pysam.asTuple()) except ValueError as e: # happens sometimes on chrm M/MT warning("WARNING", e) return []
def buffer_update_sql(self, metaseqId, evidence): chrm, pos, ref, alt = metaseqId.split(':') if self._chrm is not None: chrm = self._chrm else: chrm = 'chr' + xstr(chrm) sql = "UPDATE Variant_" + chrm \ + " v SET cadd_scores = '" + json.dumps(evidence) + "'" \ + " WHERE left(v.metaseq_id, 50) = left('" + metaseqId + "',50)" \ + " AND chromosome = '" + chrm + "'" \ + " AND v.metaseq_id = '" + metaseqId + "'" self._sql_buffer.append(sql)
def _update_current_bin_index(self, chrm, start, end): ''' query against database to get minimum enclosing bin; set as new current bin''' if self._verbose: warning("Updating current bin") result = None self._cursor.execute(BIN_INDEX_SQL, (chrm, start, end)) try: self._currentBin = self._cursor.fetchone() except ProgrammingError: raise ProgrammingError('Could not map ' + chrm + ':' + xstr(start) + '-' + xstr(end) + ' to a bin.') if self._verbose: warning(self._currentBin) return result
def find_bin_index(self, chrm, start, end=None): ''' finds the bin index for the position; if end == None assume SNV, and set to start''' if end is None: end = start if 'chr' not in chrm: chrm = 'chr' + xstr(chrm) if bool(self._currentBin): # if a current bin exists and the variant falls in it, return it if self._currentBin['bin_level'] >= 27: # otherwise may be a broad bin b/c of a indel; so need to do a lookup brange = self._currentBin['location'] if self._currentBin['chromosome'] == chrm \ and start in brange and end in brange: return self._currentBin['global_bin_path'] # otherwise, find & return the new bin self._update_current_bin_index(chrm, start, end) # not in current bin, so update bin return self._currentBin['global_bin_path']
def load_annotation(chromosome): ''' parse over a JSON file, extract position, frequencies, ids, and ADSP-ranked most severe consequence; bulk load using COPY ''' lfn = xstr(chromosome) + '.log' lfh = open(lfn, 'w') algInvocation = AlgorithmInvocation('parallel_load_dbsnp_vep_result.py', json.dumps({'chromosome': chromosome}), args.commit) algInvocId = xstr(algInvocation.getAlgorithmInvocationId()) algInvocation.close() warning("Algorithm Invocation ID", algInvocId, file=lfh, flush=True) vepParser = VepJsonParser(args.rankingFile, verbose=True) indexer = BinIndex(args.gusConfigFile, verbose=False) database = Database(args.gusConfigFile) database.connect() fname = 'chr' + xstr(chromosome) + '.json.gz' fname = path.join(args.dir, fname) lineCount = 0 variantCount = 0 skipCount = 0 resume = True if args.resumeAfter is None else False if not resume: warning("--resumeAfter flag specified; Finding skip until point", args.resumeAfter, file=lfh, flush=True) previousSnp = None with database.cursor() as cursor: copyObj = io.StringIO() with open(fname, 'r') as fhandle: mappedFile = mmap.mmap(fhandle.fileno(), 0, prot=mmap.PROT_READ) # put file in swap with gzip.GzipFile(mode='r', fileobj=mappedFile) as gfh: for line in gfh: lineCount = lineCount + 1 vepResult = json.loads(line.rstrip()) vepParser.set_annotation(copy.deepcopy(vepResult)) vepInputStr = vepParser.get('input') entry = VcfEntryParser(vepInputStr) # there are json formatting issues w/the input str # so replace w/the parsed entry; which is now a dict vepResult['input'] = entry.get_entry() refSnpId = entry.get_refsnp() if not resume: if previousSnp == args.resumeAfter: warning(previousSnp, refSnpId, file=lfh, flush=True) warning("Resuming after:", args.resumeAfter, "- SKIPPED", skipCount, "lines.", file=lfh, flush=True) resume = True else: previousSnp = refSnpId skipCount = skipCount + 1 continue if lineCount == 1 or variantCount % args.commitAfter == 0: warning('Processing new copy object', file=lfh, flush=True) tstart = datetime.now() vepParser.set('ref_snp_id', refSnpId) refAllele = entry.get('ref') altAllele = entry.get('alt').split(',') chrom = xstr(entry.get('chrom')) if chrom == 'MT': chrom = 'M' position = int(entry.get('pos')) isMultiAllelic = len(altAllele) > 1 try: vepParser.set('is_multi_allelic', isMultiAllelic) vepParser.set('ref_allele', refAllele) vepParser.set('alt_allele', altAllele) frequencies = get_frequencies(vepParser, lfh) vepParser.adsp_rank_and_sort_consequences() # for each allele for alt in altAllele: variantCount = variantCount + 1 metaseqId = ':'.join( (chrom, xstr(position), refAllele, alt)) positionEnd = entry.infer_variant_end_location(alt) binIndex = indexer.find_bin_index( chrom, position, positionEnd) # NOTE: VEP uses normalized alleles to indicate variant_allele nRef, nAlt = entry.normalize_alleles( refAllele, alt, snvDivMinus=True) alleleFreq = None if frequencies is None \ else get_allele_frequencies(nAlt, frequencies) msConseq = get_most_severe_consequence( nAlt, vepParser) valueStr = '#'.join( ('chr' + xstr(chrom), xstr(position), xstr(vepParser.get('is_multi_allelic')), binIndex, refSnpId, metaseqId, json2str(alleleFreq), json2str(msConseq), json2str( get_adsp_ranked_allele_consequences( nAlt, vepParser)), json.dumps(vepResult), algInvocId)) copyObj.write(valueStr + '\n') if variantCount % args.logAfter == 0 \ and variantCount % args.commitAfter != 0: warning("PARSED", variantCount, file=lfh, flush=True) if variantCount % args.commitAfter == 0: tendw = datetime.now() warning('Copy object prepared in ' + str(tendw - tstart) + '; ' + str(copyObj.tell()) + ' bytes; transfering to database', file=lfh, flush=True) copyObj.seek(0) cursor.copy_from(copyObj, 'variant', sep='#', null="NULL", columns=VARIANT_COLUMNS) message = '{:,}'.format(variantCount) if args.commit: database.commit() message = "COMMITTED " + message else: database.rollback() message = "PARSED " + message + " -- rolling back" if variantCount % args.logAfter == 0: warning(message, "; up to = ", refSnpId, file=lfh, flush=True) tend = datetime.now() warning('Database copy time: ' + str(tend - tendw), file=lfh, flush=True) warning(' Total time: ' + str(tend - tstart), file=lfh, flush=True) if args.test: die("Test complete") copyObj = io.StringIO() # reset io string except Exception as e: warning("ERROR parsing variant", refSnpId, file=lfh, flush=True) warning(lineCount, ":", line, file=lfh, flush=True) warning(str(e), file=lfh, flush=True) raise # final commit / leftovers copyObj.seek(0) cursor.copy_from(copyObj, 'variant', sep='#', null="NULL", columns=VARIANT_COLUMNS) message = '{:,}'.format(variantCount) if args.commit: database.commit() message = "DONE - COMMITTED " + message else: database.rollback() message = "DONE - PARSED " + message + " -- rolling back" warning(message, file=lfh, flush=True) mappedFile.close() database.close() indexer.close() lfh.close()
required=True, help="chromosome to load, e.g., 1, 2, M, X") parser.add_argument('--commitAfter', type=int, default=500, help="commit after specified inserts") parser.add_argument('--maxWorkers', default=10, type=int) parser.add_argument( '--logAfter', type=int, help= "number of inserts to log after completion; will work best if factor/multiple of commitAfter" ) parser.add_argument('--verbose', action='store_true') args = parser.parse_args() if not args.logAfter: args.logAfter = args.commitAfter VARIANT_COLUMNS = qw( 'chromosome location is_multi_allelic bin_index ref_snp_id metaseq_id allele_frequencies adsp_most_severe_consequence adsp_ranked_consequences vep_output row_algorithm_id', returnTuple=True) chrList = args.chr.split(',') if args.chr != 'all' \ else [c.value for c in Human] with ProcessPoolExecutor(args.maxWorkers) as executor: for c in chrList: warning("Create and start thread for chromosome:", xstr(c)) executor.submit(load_annotation, c)
def load_annotation(): ''' extract basic SNP information from VCF line; check against AnnotatedDB; if missing load ''' indexer = BinIndex(args.gusConfigFile, verbose=False) database = Database(args.gusConfigFile) database.connect() lineCount = 0 variantCount = 0 with database.cursor() as cursor: with open(args.vcfFile, 'r') as fh: with open(args.logFileName, 'w') as lfh: warning("Parsing", args.vcfFile, file=lfh, flush=True) for line in fh: lineCount = lineCount + 1 vepResult = {} # will just be the input string, so it matches the annotated variants if line.startswith('#'): continue entry = VcfEntryParser(line.rstrip()) # there are json formatting issues w/the input str # so replace w/the parsed entry; which is now a dict vepResult['input'] = entry.get_entry() refAllele = entry.get('ref') altAllele = entry.get('alt') # assuming not multiallelic if refAllele == '0': # happens sometimes refAllele = '?' if altAllele == '0': altAllele = '?' # truncatedRef = truncate(refAllele, 20) # truncatedAlt = truncate(altAllele, 20) chrom = xstr(entry.get('chrom')) if chrom == 'MT': chrom = 'M' position = int(entry.get('pos')) # metaseqId = ':'.join((chrom, xstr(position), truncatedRef, truncatedAlt)) isMultiAllelic = False # assuming b/c of how the VCF files were generated metaseqId = ':'.join((chrom, xstr(position), refAllele, altAllele)) try: if variant_is_missing(database, 'chr' + chrom, metaseqId): warning(metaseqId, "- MISSING -- LOADING", file=lfh, flush=True) variantCount = variantCount + 1 positionEnd = entry.infer_variant_end_location(altAllele) binIndex = indexer.find_bin_index(chrom, position, positionEnd) cursor.execute(INSERT_SQL, ('chr' + chrom, position, isMultiAllelic, binIndex, metaseqId, json.dumps(vepResult), algInvocId)) if args.commit: database.commit() else: database.rollback() if lineCount % 50 == 0: warning("Parsed", lineCount, "lines.", file=lfh, flush=True) warning("Loaded", variantCount, "missing variants", file=lfh, flush=True) except Exception: warning("ERROR parsing variant", metaseqId, file=lfh, flush=True) warning(lineCount, ":", line, file=lfh, flush=True) print("FAIL", file=sys.stdout) raise warning("DONE - Loaded", variantCount, "missing variants", file=lfh, flush=True) database.close() indexer.close()
indexer.close() if __name__ == "__main__": parser = argparse.ArgumentParser(description='load variants missing from VEP output into AnnotatedDB') parser.add_argument('-v', '--vcfFile', help="input file (vcf)", required=True) parser.add_argument('--commit', action='store_true', help="run in (auto)commit mode", required=False) parser.add_argument('--logFileName', help="log file name", required=True) parser.add_argument('--gusConfigFile', '--full path to gus config file, else assumes $GUS_HOME/config/gus.config') parser.add_argument('--verbose', action='store_true') args = parser.parse_args() INSERT_SQL = """INSERT INTO Variant (chromosome, location, is_multi_allelic, bin_index, metaseq_id, vep_output, row_algorithm_id) VALUES (%s, %s, %s, %s, %s, %s, %s)""" algInvocation = AlgorithmInvocation('load_non_vep_annotated_variants_from_vcf.py', json.dumps(vars(args)), args.commit, args.gusConfigFile) algInvocId = xstr(algInvocation.getAlgorithmInvocationId()) algInvocation.close() warning("Algorithm Invocation ID", algInvocId) load_annotation() print(algInvocId, file=sys.stdout)
def _slice(self, chrm, start, end, indels=False): ''' slice CADD file; assume using this more often with snv file; returns tuple''' tbxFh = self._indel if indels else self._snv caddChr = 'MT' if chrm == 'M' else xstr(chrm) return tbxFh.fetch(caddChr, start, end, parser=pysam.asTuple())
def load_annotation(chromosome): ''' extract basic SNP information from VCF line; check against AnnotatedDB; if missing load ''' indexer = BinIndex(args.gusConfigFile, verbose=False) database = Database(args.gusConfigFile) database.connect() fname = '00-All.MT.vcf.gz' if chromosome == 'M' \ else '00-All.' + xstr(chromosome) + '.vcf.gz' logFname = path.join(args.logDir, fname + '.log') fname = path.join(args.dir, fname) lineCount = 0 variantCount = 0 warning("Parsing", fname) warning("Logging:", logFname) with database.cursor() as cursor: with open(fname, 'r') as fhandle, open(logFname, 'w') as lfh: warning("Parsing", fname, file=lfh, flush=True) mappedFile = mmap.mmap(fhandle.fileno(), 0, prot=mmap.PROT_READ) # put file in swap with gzip.GzipFile(mode='r', fileobj=mappedFile) as gfh: for line in gfh: lineCount = lineCount + 1 vepResult = { } # will just be the input string, so it matches the annotated variants if line.startswith('#'): continue entry = VcfEntryParser(line.rstrip()) # there are json formatting issues w/the input str # so replace w/the parsed entry; which is now a dict vepResult['input'] = entry.get_entry() refSnpId = entry.get_refsnp() refAllele = entry.get('ref') altAllele = entry.get('alt').split(',') if '.' not in altAllele: if lineCount % 25000 == 0: warning("Parsed", lineCount, "lines.", file=lfh, flush=True) continue # sanity check/all missing are lacking alt alleles chrom = xstr(entry.get('chrom')) position = int(entry.get('pos')) isMultiAllelic = len(altAllele) > 1 try: if variant_is_missing(database, 'chr' + chrom, refSnpId): warning(refSnpId, "- MISSING -- LOADING", file=lfh, flush=True) for alt in altAllele: if alt == '.': # checking again in case some are multiallelic alt = '?' variantCount = variantCount + 1 metaseqId = ':'.join( (chrom, xstr(position), refAllele, alt)) positionEnd = entry.infer_variant_end_location( alt) binIndex = indexer.find_bin_index( chrom, position, positionEnd) cursor.execute( INSERT_SQL, ('chr' + chrom, xstr(position), isMultiAllelic, binIndex, refSnpId, metaseqId, json.dumps(vepResult), algInvocId)) if args.commit: database.commit() else: database.rollback() if lineCount % 25000 == 0: warning("Parsed", lineCount, "lines.", file=lfh, flush=True) warning("Loaded", variantCount, "missing variants", file=lfh, flush=True) except Exception: warning("ERROR parsing variant", refSnpId, file=lfh, flush=True) warning(lineCount, ":", line, file=lfh, flush=True) raise if not args.commit: database.rollback() warning("DONE -- rolling back") mappedFile.close() database.close() indexer.close() warning("DONE - Loaded", variantCount, "missing variants", file=lfh, flush=True)
action='store_true', help="load 'commitAfter' rows as test") parser.add_argument('--maxWorkers', default=5, type=int) parser.add_argument( '-c', '--chr', required=True, help="comma separated list of chromosomes to load, or 'all'") parser.add_argument('--verbose', action='store_true') args = parser.parse_args() INSERT_SQL = """INSERT INTO Variant (chromosome, location, is_multi_allelic, bin_index, ref_snp_id, metaseq_id, vep_output, row_algorithm_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)""" algInvocation = AlgorithmInvocation('load_missing_dbsnp_from_vcf.py', json.dumps(vars(args)), args.commit) algInvocId = xstr(algInvocation.getAlgorithmInvocationId()) algInvocation.close() warning("Algorithm Invocation ID", algInvocId) chrList = args.chr.split(',') if args.chr != 'all' \ else [c.value for c in Human] with ProcessPoolExecutor(args.maxWorkers) as executor: for c in chrList: warning("Create and start thread for chromosome:", xstr(c)) executor.submit(load_annotation, c)
def update_variant_records_from_vcf(): ''' lookup and update variant records from a VCF file assuming the load by file was called by a plugin, so this variant has already been verified to be new to the resource; no need to check alternative metaseq IDs''' cupdater = CADDUpdater(args.logFile, args.databaseDir) database = Database(args.gusConfigFile) database.connect() lineCount = 0 with database.cursor() as updateCursor, \ open(args.vcfFile, 'r') as fh: try: for line in fh: if line.startswith("#"): continue lineCount = lineCount + 1 entry = VcfEntryParser(line.rstrip()) refAllele = entry.get('ref') altAllele = entry.get('alt') chrom = xstr(entry.get('chrom')) if chrom == 'MT': chrom = 'M' position = int(entry.get('pos')) metaseqId = ':'.join( (chrom, xstr(position), refAllele, altAllele)) record = {"metaseq_id": metaseqId} # mimic "record" if len(refAllele) > 1 or len(altAllele) > 1: # only doing SNVs update_variant_record(record, cupdater, INDEL) else: update_variant_record(record, cupdater, SNV) if lineCount % args.commitAfter == 0: warning("Processed:", lineCount, "- SNVs:", cupdater.get_update_count(SNV), "- INDELS:", cupdater.get_update_count(INDEL), " - Not Matched:", cupdater.get_not_matched_count(), file=cupdater.lfh(), flush=True) updateCursor.execute(cupdater.sql_buffer_str()) if args.commit: database.commit() else: database.rollback() cupdater.clear_update_sql() if cupdater.buffered_variant_count() > 0: # trailing updateCursor.execute(cupdater.sql_buffer_str()) if args.commit: database.commit() else: database.rollback() warning("DONE - Updated SNVs:", cupdater.get_update_count(SNV), "- Updated INDELS:", cupdater.get_update_count(INDEL), "- Not Matched:", cupdater.get_not_matched_count(), file=cupdater.lfh(), flush=True) cupdater.close_lfh() except Exception as e: warning(e, entry, file=cupdater.lfh(), flush=True) database.rollback() database.close() print("FAIL", file=sys.stdout) raise
def update_variant_records(chromosome): chrLabel = 'chr' + xstr(chromosome) logFileName = path.join(args.logFilePath, chrLabel + '.log') cupdater = CADDUpdater(logFileName, args.databaseDir) cupdater.setChrm(chrLabel) selectSQL = "SELECT metaseq_id, cadd_scores FROM Variant_" + chrLabel database = Database(args.gusConfigFile) database.connect() lineCount = 0 updateCount = 0 updateIndelCount = 0 skipCount = 0 with database.cursor("RealDictCursor") as selectCursor, \ database.cursor() as updateCursor: try: warning("Fetching", chrLabel, "variants", file=cupdater.lfh(), flush=True) selectCursor.execute(selectSQL) warning("DONE - Fetching", file=cupdater.lfh(), flush=True) for record in selectCursor: if args.debug and args.veryVerbose: warning(record, file=cupdater.lfh(), flush=True) if record['cadd_scores'] is not None: if args.debug and args.veryVerbose: warning("Skipping", record['metaseq_id'], file=cupdater.lfh(), flush=True) skipCount = skipCount + 1 continue lineCount = lineCount + 1 metaseqId = record['metaseq_id'] chrm, position, refAllele, altAllele = metaseqId.split(':') if len(refAllele) > 1 or len(altAllele) > 1: # only doing SNVs update_variant_record(record, cupdater, INDEL) else: update_variant_record(record, cupdater, SNV) if cupdater.get_total_update_count( ) % args.commitAfter == 0 and cupdater.buffered_variant_count( ) > 0: if args.commit: if args.debug: warning("Starting Update", file=cupdater.lfh(), flush=True) updateCursor.execute(cupdater.sql_buffer_str()) if args.debug: warning("Done", file=cupdater.lfh(), flush=True) cupdater.clear_update_sql() database.commit() else: database.rollback() warning(metaseqId, "- Processed:", lineCount, "- SNVs:", cupdater.get_update_count(SNV), "- INDELS:", cupdater.get_update_count(INDEL), "- Skipped:", skipCount, " - Not Matched:", cupdater.get_not_matched_count(), file=cupdater.lfh(), flush=True) if cupdater.buffered_variant_count() > 0: updateCursor.execute(cupdater.sql_buffer_str()) if args.commit: # trailing database.commit() else: database.rollback() warning("DONE - Updated SNVs:", cupdater.get_update_count(SNV), "- Updated INDELS:", cupdater.get_update_count(INDEL), "- Skipped", skipCount, "- Not Matched:", cupdater.get_not_matched_count(), file=cupdater.lfh(), flush=True) cupdater.close_lfh() except Exception as e: warning(e, file=cupdater.lfh(), flush=True) if args.commit: database.commit() else: database.rollback() database.close() raise database.close()
def load_annotation(): ''' parse over a JSON file, extract position, frequencies, ids, and ADSP-ranked most severe consequence; bulk load using COPY ''' fname = args.inputFile lineCount = 0 variantCount = 0 skipCount = 0 warning("Parsing variants from:", fname) # should print to plugin log with database.cursor() as cursor, database.cursor("RealDictCursor") as dcursor: copyObj = io.StringIO() with open(fname, 'r') as fh: with open(args.logFile, 'w') as lfh: warning("Parsing variants from:", fname, file=lfh, flush=True) for line in fh: lineCount = lineCount + 1 vepResult = json.loads(line.rstrip()) vepParser.set_annotation(copy.deepcopy(vepResult)) vepInputStr = vepParser.get('input') entry = VcfEntryParser(vepInputStr) # there may be json formatting issues w/the input str # so replace w/the parsed entry; which is now a dict vepResult['input'] = entry.get_entry() if lineCount == 1 or variantCount % 5000 == 0: warning('Processing new copy object', file=lfh, flush=True) tstart = datetime.now() refAllele = entry.get('ref') altAllele = entry.get('alt') # assumes no multialleic variants if refAllele == '0': # happens sometimes refAllele = '?' if altAllele == '0': altAllele = '?' # truncatedRef = truncate(refAllele, 20) # truncatedAlt = truncate(altAllele, 20) chrom = xstr(entry.get('chrom')) if chrom == 'MT': chrom = 'M' position = int(entry.get('pos')) try: # metaseqId = ':'.join((chrom, xstr(position), truncatedRef, truncatedAlt)) metaseqId = ':'.join((chrom, xstr(position), refAllele, altAllele)) if duplicate(metaseqId, dcursor): warning("SKIPPING:",metaseqId, "- already loaded.", file=lfh, flush=True) continue vepParser.set('ref_allele', refAllele) vepParser.set('alt_allele', altAllele) frequencies = get_frequencies() vepParser.adsp_rank_and_sort_consequences() # for each allele variantCount = variantCount + 1 positionEnd = entry.infer_variant_end_location(altAllele) binIndex = indexer.find_bin_index(chrom, position, positionEnd) # NOTE: VEP uses normalized alleles to indicate variant_allele nRef, nAlt = entry.normalize_alleles(refAllele, altAllele, snvDivMinus=True) alleleFreq = None if frequencies is None \ else get_allele_frequencies(nAlt, frequencies) msConseq = get_most_severe_consequence(nAlt) valueStr = '#'.join(( 'chr' + xstr(chrom), xstr(position), binIndex, metaseqId, json2str(alleleFreq), json2str(msConseq), json2str(get_adsp_ranked_allele_consequences(nAlt)), json.dumps(vepResult), algInvocId )) copyObj.write(valueStr + '\n') if variantCount % 5000 == 0: warning("FOUND", variantCount, " new variants", file=lfh, flush=True) tendw = datetime.now() warning('Copy object prepared in ' + str(tendw - tstart) + '; ' + str(copyObj.tell()) + ' bytes; transfering to database', file=lfh, flush=True) copyObj.seek(0) cursor.copy_from(copyObj, 'variant', sep='#', null="NULL", columns=VARIANT_COLUMNS) message = '{:,}'.format(variantCount) if args.commit: database.commit() message = "COMMITTED " + message else: database.rollback() message = "PARSED " + message + " -- rolling back" warning(message, "; up to = ", metaseqId, file=lfh, flush=True) tend = datetime.now() warning('Database copy time: ' + str(tend - tendw), file=lfh, flush=True) warning(' Total time: ' + str(tend - tstart), file=lfh, flush=True) copyObj = io.StringIO() # reset io string except Exception: warning("ERROR parsing variant on line", line, ' - ', metaseqId, file=lfh, flush=True) print("FAIL", file=sys.stdout) raise # final commit / leftovers copyObj.seek(0) cursor.copy_from(copyObj, 'variant', sep='#', null="NULL", columns=VARIANT_COLUMNS) message = '{:,}'.format(variantCount) if args.commit: database.commit() message = "DONE - COMMITTED " + message else: database.rollback() message = "DONE - PARSED " + message + " -- rolling back" warning(message, file=lfh, flush=True)