def update_samples_in_release_table(syn, file_mapping, release, samples_in_release_synid): """ Updates the sample in release table This tracks the samples of each release. 1 means it exists, and 0 means it doesn't Args: syn: synapse object file_mapping: file mapping generated from file mapping function release: GENIE release number (ie. 5.3-consortium) samples_in_release_synid: Synapse Id of 'samples in release' Table """ clinical_ent = syn.get(file_mapping["clinical"], followLink=True) clinicaldf = pd.read_csv(clinical_ent.path, sep="\t", comment="#") cols = [ i["name"] for i in list(syn.getTableColumns(samples_in_release_synid)) ] if release not in cols: schema = syn.get(samples_in_release_synid) syn_col = synapseclient.Column(name=release, columnType="INTEGER", defaultValue=0) new_column = syn.store(syn_col) schema.addColumn(new_column) schema = syn.store(schema) # Columns of samples in release samples_per_release = syn.tableQuery( 'SELECT SAMPLE_ID, "{}" FROM {}'.format(release, samples_in_release_synid)) samples_per_releasedf = samples_per_release.asDataFrame() new_samples = clinicaldf[[ "SAMPLE_ID" ]][~clinicaldf.SAMPLE_ID.isin(samples_per_releasedf.SAMPLE_ID)] new_samples[release] = 1 old_samples = clinicaldf[["SAMPLE_ID"]][clinicaldf.SAMPLE_ID.isin( samples_per_releasedf.SAMPLE_ID)] old_samples[release] = 1 samples_in_releasedf = new_samples.append(old_samples) process_functions.updateDatabase( syn, samples_per_releasedf, samples_in_releasedf, samples_in_release_synid, ["SAMPLE_ID"], )
def update_data_completeness_table(syn, database_mappingdf): """ Updates the data completeness of the database Args: syn: synapse object database_mappingdf: mapping between synapse ids and database """ data_completion_synid = database_mappingdf["Id"][ database_mappingdf["Database"] == "dataCompletion"].values[0] sample = syn.tableQuery("select * from syn7517674") sampledf = sample.asDataFrame() patient = syn.tableQuery("select * from syn7517669") patientdf = patient.asDataFrame() data_completenessdf = pd.DataFrame() center_infos = sampledf.CENTER.drop_duplicates().apply( lambda center: get_center_data_completion(center, sampledf)) for center_info in center_infos: data_completenessdf = pd.concat([data_completenessdf, center_info]) center_infos = patientdf.CENTER.drop_duplicates().apply( lambda center: get_center_data_completion(center, patientdf)) for center_info in center_infos: data_completenessdf = pd.concat([data_completenessdf, center_info]) data_completeness_db = syn.tableQuery("select * from %s" % data_completion_synid) data_completeness_dbdf = data_completeness_db.asDataFrame() data_completenessdf.columns = data_completeness_dbdf.columns process_functions.updateDatabase( syn, data_completeness_dbdf, data_completenessdf, data_completion_synid, ["FIELD", "CENTER"], to_delete=True, )
def validation(syn, center, process, center_mapping_df, databaseToSynIdMappingDf, thread, testing, oncotreeLink): centerInputSynId = center_mapping_df['inputSynId'][ center_mapping_df['center'] == center][0] logger.info("Center: " + center) allFiles = getCenterInputFiles(syn, centerInputSynId, center, process) allFiles = pd.DataFrame(allFiles, columns=['synId', 'filePaths']) #If a center has no files, then return empty list if allFiles.empty: logger.info("%s has not uploaded any files" % center) return ([]) else: #Make sure the vcf validation statuses don't get wiped away if process != "vcf": addToQuery = "and name not like '%.vcf'" else: addToQuery = '' validationStatus = syn.tableQuery( "SELECT * FROM %s where center = '%s' %s" % (process_functions.getDatabaseSynId( syn, "validationStatus", databaseToSynIdMappingDf=databaseToSynIdMappingDf), center, addToQuery)) errorTracker = syn.tableQuery( "SELECT * FROM %s where center = '%s' %s" % (process_functions.getDatabaseSynId( syn, "errorTracker", databaseToSynIdMappingDf=databaseToSynIdMappingDf), center, addToQuery)) #VALIDATE FILES validationStatusDf = validationStatus.asDataFrame() errorTrackerDf = errorTracker.asDataFrame() validated = allFiles.apply( lambda x: validateFile(syn, validationStatusDf, errorTrackerDf, center, thread, x, testing, oncotreeLink), axis=1) inputValidStatus = [] invalidErrors = [] for inputStat, invalErrors in validated: inputValidStatus.extend(inputStat) if invalErrors is not None: invalidErrors.extend(invalErrors) inputValidStatus = pd.DataFrame(inputValidStatus, columns=[ "id", 'path', 'md5', 'status', 'name', 'modifiedOn', 'fileType' ]) logger.info("CHECK FOR DUPLICATED FILES") ##### DUPLICATED FILES ###### #check for duplicated filenames. There should be no duplication, files should be uploaded as new versions and the entire dataset should be uploaded everytime #cbs and seg files should not be duplicated. There can only be one duplicatedFiles = inputValidStatus[inputValidStatus['name'].duplicated( keep=False)] cbsSegBool = [ os.path.basename(i).endswith('.cbs') or os.path.basename(i).endswith('.seg') for i in inputValidStatus['name'] ] cbsSegFiles = inputValidStatus[cbsSegBool] if len(cbsSegFiles) > 1: duplicatedFiles = duplicatedFiles.append(cbsSegFiles) # nodups = ["data_mutations_extended"] # allDuplicatedFiles = [] # for nodup in nodups: # checkDups = [name for name in inputValidStatus['name'] if name.startswith(nodup)] # if len(checkDups) > 1: # allDuplicatedFiles.extend(checkDups) # duplicatedFiles = duplicatedFiles.append(inputValidStatus[inputValidStatus['name'].isin(allDuplicatedFiles)]) duplicatedFiles.drop_duplicates("id", inplace=True) inputValidStatus['status'][inputValidStatus['id'].isin( duplicatedFiles['id'])] = "INVALID" duplicatedFiles[ 'errors'] = "DUPLICATED FILENAME! FILES SHOULD BE UPLOADED AS NEW VERSIONS AND THE ENTIRE DATASET SHOULD BE UPLOADED EVERYTIME" #Send an email if there are any duplicated files if not duplicatedFiles.empty: incorrectFiles = ", ".join([ name for synId, name in zip(duplicatedFiles['id'], duplicatedFiles['name']) ]) incorrectEnt = syn.get(duplicatedFiles['id'].iloc[0]) sendEmail = set([incorrectEnt.modifiedBy, incorrectEnt.createdBy]) userNames = ", ".join( [syn.getUserProfile(user).userName for user in sendEmail]) syn.sendMessage( list(sendEmail), "GENIE Validation Error", "Dear %s,\n\nYour files (%s) are duplicated! FILES SHOULD BE UPLOADED AS NEW VERSIONS AND THE ENTIRE DATASET SHOULD BE UPLOADED EVERYTIME" % (userNames, incorrectFiles)) logger.info("THERE ARE %d DUPLICATED FILES" % len(duplicatedFiles)) ##### DUPLICATED FILES ###### #Create invalid error synapse table logger.info("UPDATE INVALID FILE REASON DATABASE") invalidErrors = pd.DataFrame(invalidErrors, columns=["id", 'errors', 'name']) # Remove fixed duplicated files dupIds = invalidErrors['id'][ invalidErrors['errors'] == "DUPLICATED FILENAME! FILES SHOULD BE UPLOADED AS NEW VERSIONS AND THE ENTIRE DATASET SHOULD BE UPLOADED EVERYTIME"] removeIds = dupIds[~dupIds.isin(duplicatedFiles['id'])] invalidErrors = invalidErrors[~invalidErrors['id'].isin(removeIds)] # Append duplicated file errors invalidErrors = invalidErrors.append( duplicatedFiles[['id', 'errors', 'name']]) invalidErrors['center'] = center invalidIds = inputValidStatus['id'][inputValidStatus['status'] == "INVALID"] invalidErrors = invalidErrors[invalidErrors['id'].isin(invalidIds)] process_functions.updateDatabase( syn, errorTracker.asDataFrame(), invalidErrors, process_functions.getDatabaseSynId( syn, "errorTracker", databaseToSynIdMappingDf=databaseToSynIdMappingDf), ["id"], toDelete=True) paths = inputValidStatus['path'] filenames = [os.path.basename(name) for name in paths] del inputValidStatus['path'] logger.info("UPDATE VALIDATION STATUS DATABASE") inputValidStatus['center'] = center #Remove fixed duplicated files inputValidStatus = inputValidStatus[~inputValidStatus['id']. isin(removeIds)] process_functions.updateDatabase( syn, validationStatus.asDataFrame(), inputValidStatus[[ "id", 'md5', 'status', 'name', 'center', 'modifiedOn' ]], process_functions.getDatabaseSynId( syn, "validationStatus", databaseToSynIdMappingDf=databaseToSynIdMappingDf), ["id"], toDelete=True) inputValidStatus['path'] = paths validFiles = inputValidStatus[[ 'id', 'path', 'fileType' ]][inputValidStatus['status'] == "VALIDATED"] return (validFiles)
def print_clinical_values_difference_table(syn, database_mappingdf): """ Checks for a decrease in values in the clinical file from last consortium release to most recent consortium release Args: syn: synapse object database_mappingdf: mapping between synapse ids and database """ release_folder_fileview_synid = database_mappingdf["Id"][ database_mappingdf["Database"] == "releaseFolder"].values[0] clinical_key_decrease_synid = database_mappingdf["Id"][ database_mappingdf["Database"] == "clinicalKeyDecrease"].values[0] release_folder = syn.tableQuery( f"select id,name from {release_folder_fileview_synid} " "where name not like 'Release%' and name <> 'case_lists' " "and name not like '%.0.%' and name not like '%-public' " "and name <> 'potential_artifacts'") release_folderdf = release_folder.asDataFrame() # Set release number as a numerical value since string "10" < "9" # Also can't set by created on date, because sometimes # there are patch releases release_folderdf["num_release"] = [ float(name.replace(".0", "").replace("-consortium", "")) for name in release_folderdf["name"] ] release_folderdf.sort_values("num_release", ascending=False, inplace=True) current_release = release_folderdf["id"][0] older_release = release_folderdf["id"][1] current_release_files = syn.getChildren(current_release) current_clinical_synids = { file["name"]: file["id"] for file in current_release_files if file["name"] in ["data_clinical_sample.txt", "data_clinical_patient.txt"] } older_release_files = syn.getChildren(older_release) older_clinical_synids = { file["name"]: file["id"] for file in older_release_files if file["name"] in ["data_clinical_sample.txt", "data_clinical_patient.txt"] } current_sample_ent = syn.get( current_clinical_synids["data_clinical_sample.txt"], followLink=True) older_sample_ent = syn.get( older_clinical_synids["data_clinical_sample.txt"], followLink=True) current_sampledf = pd.read_csv(current_sample_ent.path, sep="\t", comment="#") current_sampledf["CENTER"] = [ patient.split("-")[1] for patient in current_sampledf["PATIENT_ID"] ] older_sampledf = pd.read_csv(older_sample_ent.path, sep="\t", comment="#") older_sampledf["CENTER"] = [ patient.split("-")[1] for patient in older_sampledf["PATIENT_ID"] ] # Rather than take the CENTER, must take the SAMPLE_ID to compare current_sampledf = current_sampledf[current_sampledf["SAMPLE_ID"].isin( older_sampledf["SAMPLE_ID"].unique())] logger.info("SAMPLE CLINICAL VALUE DECREASES") center_decrease_mapping = dict() for center in older_sampledf["CENTER"].unique(): current_center_sampledf = current_sampledf[current_sampledf["CENTER"] == center] older_center_sampledf = older_sampledf[older_sampledf["CENTER"] == center] logger.info(center) decrease_map = check_column_decreases(current_center_sampledf, older_center_sampledf) center_decrease_mapping[center] = decrease_map current_patient_ent = syn.get( current_clinical_synids["data_clinical_patient.txt"], followLink=True) older_patient_ent = syn.get( older_clinical_synids["data_clinical_patient.txt"], followLink=True) current_patientdf = pd.read_csv(current_patient_ent.path, sep="\t", comment="#") older_patientdf = pd.read_csv(older_patient_ent.path, sep="\t", comment="#") # Rather than take the CENTER, must take the PATIENT_ID to compare current_patientdf = current_patientdf[current_patientdf["PATIENT_ID"].isin( older_patientdf["PATIENT_ID"].unique())] logger.info("PATIENT CLINICAL VALUE DECREASES") for center in older_patientdf["CENTER"].unique(): current_center_patientdf = current_patientdf[ current_patientdf["CENTER"] == center] older_center_patientdf = older_patientdf[older_patientdf["CENTER"] == center] logger.info(center) patient_decrease_map = check_column_decreases(current_center_patientdf, older_center_patientdf) center_decrease_mapping[center].update(patient_decrease_map) center_decrease_mapping = pd.DataFrame(center_decrease_mapping) center_decrease_mapping = center_decrease_mapping.transpose() center_decrease_mapping["CENTER"] = center_decrease_mapping.index clinical_key_decrease = syn.tableQuery( "select * from {0}".format(clinical_key_decrease_synid)) clinical_key_decreasedbdf = clinical_key_decrease.asDataFrame() process_functions.updateDatabase( syn, clinical_key_decreasedbdf, center_decrease_mapping, clinical_key_decrease_synid, ["CENTER"], to_delete=True, )
def update_sample_difference_table(syn, database_mappingdf): """ Updates sample difference table between consortium releases Args: syn: synapse object database_mappingdf: mapping between synapse ids and database """ cumulative_sample_count_synid = database_mappingdf["Id"][ database_mappingdf["Database"] == "cumulativeSampleCount"].values[0] sample_diff_count_synid = database_mappingdf["Id"][ database_mappingdf["Database"] == "sampleDiffCount"].values[0] # UPDATE DIFF TABLE sample_count_per_round = syn.tableQuery( "SELECT * FROM %s where Center <> 'Total' and Release <> 'Database'" % cumulative_sample_count_synid) sample_count_per_rounddf = sample_count_per_round.asDataFrame() releases = list(sample_count_per_rounddf["Release"].unique()) # sort the releases and remove public releases releases.sort() consortium_releases = [ release for release in releases if "public" not in release and ".0." not in release ] diff_between_releasesdf = sample_count_per_rounddf[ sample_count_per_rounddf["Release"] == consortium_releases[0]] for index, release_name in enumerate(consortium_releases[1:]): prior_release = sample_count_per_rounddf[ sample_count_per_rounddf["Release"] == consortium_releases[index]] current_release = sample_count_per_rounddf[ sample_count_per_rounddf["Release"] == release_name] prior_release.index = prior_release["Center"] current_release.index = current_release["Center"] del prior_release["Center"] del prior_release["Release"] del current_release["Center"] del current_release["Release"] # Append new rows of centers that are new and # just added to the releases new_centers = current_release.index[~current_release.index. isin(prior_release.index)] if not new_centers.empty: prior_release = pd.concat( [prior_release, pd.DataFrame(index=new_centers)]) prior_release = prior_release.fillna(0) difference = current_release - prior_release difference["Center"] = difference.index difference["Release"] = release_name diff_between_releasesdf = pd.concat( [diff_between_releasesdf, difference]) difftable_db = syn.tableQuery("SELECT * FROM %s" % sample_diff_count_synid) difftable_dbdf = difftable_db.asDataFrame() difftable_dbdf = difftable_dbdf.fillna(0) new_values = (diff_between_releasesdf[[ "Clinical", "Mutation", "CNV", "SEG", "Fusions" ]].fillna(0).applymap(int)) diff_between_releasesdf[["Clinical", "Mutation", "CNV", "SEG", "Fusions"]] = new_values process_functions.updateDatabase( syn, difftable_dbdf, diff_between_releasesdf, sample_diff_count_synid, ["Center", "Release"], to_delete=True, )
def update_oncotree_code_tables(syn, database_mappingdf): """ Updates database statistics of oncotree codes and primary onocotree codes Args: syn: synapse object database_mappingdf: mapping between synapse ids and database """ oncotree_distribution_synid = database_mappingdf["Id"][ database_mappingdf["Database"] == "oncotree"].values[0] clinical = syn.tableQuery("select * from syn7517674") clinicaldf = clinical.asDataFrame() # DISTRIBUTION OF ONCOTREE CODE TABLE UPDATE oncotree_code_distributiondf = pd.DataFrame( columns=set(clinicaldf["CENTER"]), index=set(clinicaldf["ONCOTREE_CODE"])) for center in oncotree_code_distributiondf.columns: onc_counts = clinicaldf["ONCOTREE_CODE"][clinicaldf["CENTER"] == center].value_counts() oncotree_code_distributiondf[center] = onc_counts oncotree_code_distributiondf = oncotree_code_distributiondf.fillna(0) oncotree_code_distributiondf = oncotree_code_distributiondf.applymap(int) oncotree_code_distributiondf["Total"] = oncotree_code_distributiondf.apply( sum, axis=1) oncotree_code_distributiondf[ "Oncotree_Code"] = oncotree_code_distributiondf.index oncotree_distribution_db = syn.tableQuery("SELECT %s FROM %s" % ( "Oncotree_Code," + ",".join(clinicaldf["CENTER"].unique()) + ",Total", oncotree_distribution_synid, )) oncotree_distribution_dbdf = oncotree_distribution_db.asDataFrame() process_functions.updateDatabase( syn, oncotree_distribution_dbdf, oncotree_code_distributiondf, oncotree_distribution_synid, ["Oncotree_Code"], to_delete=True, ) # DISTRIBUTION OF PRIMARY CODE TABLE UPDATE oncotree_link_synid = database_mappingdf["Id"][ database_mappingdf["Database"] == "oncotreeLink"].values[0] primary_code_synId = database_mappingdf["Id"][ database_mappingdf["Database"] == "primaryCode"].values[0] # Can also use most up to date oncotree code, # because these tables are updated from the database oncotree_link_ent = syn.get(oncotree_link_synid) oncotree_link = oncotree_link_ent.externalURL oncotree_mapping = process_functions.get_oncotree_code_mappings( oncotree_link) clinicaldf["PRIMARY_CODES"] = [ oncotree_mapping[i.upper()]["ONCOTREE_PRIMARY_NODE"] if i.upper() in oncotree_mapping.keys() else "DEPRECATED_CODE" for i in clinicaldf.ONCOTREE_CODE ] # ### DISTRIBUTION OF PRIMARY ONCOTREE CODE TABLE UPDATE primary_code_distributiondf = pd.DataFrame( columns=set(clinicaldf["CENTER"]), index=set(clinicaldf["PRIMARY_CODES"])) for center in primary_code_distributiondf.columns: onc_counts = clinicaldf["PRIMARY_CODES"][clinicaldf["CENTER"] == center].value_counts() primary_code_distributiondf[center] = onc_counts primary_code_distributiondf = primary_code_distributiondf.fillna(0) primary_code_distributiondf = primary_code_distributiondf.applymap(int) primary_code_distributiondf["Total"] = primary_code_distributiondf.apply( sum, axis=1) primary_code_distributiondf[ "Oncotree_Code"] = primary_code_distributiondf.index primary_code_dist_db = syn.tableQuery("SELECT %s FROM %s" % ( "Oncotree_Code," + ",".join(clinicaldf["CENTER"].unique()) + ",Total", primary_code_synId, )) primary_code_dist_dbdf = primary_code_dist_db.asDataFrame() process_functions.updateDatabase( syn, primary_code_dist_dbdf, primary_code_distributiondf, primary_code_synId, ["Oncotree_Code"], to_delete=True, )
def update_database_numbers(syn, database_mappingdf): """ Updates database cumulative numbers (Only called when not staging) Args: syn: synapse object database_mappingdf: mapping between synapse ids and database """ cumulative_sample_count_synid = database_mappingdf["Id"][ database_mappingdf["Database"] == "cumulativeSampleCount"].values[0] # Database database_count = syn.tableQuery( "SELECT * FROM %s where Release = 'Database'" % cumulative_sample_count_synid) database_countdf = database_count.asDataFrame() clinical = syn.tableQuery("select CENTER from syn7517674") clinicaldf = clinical.asDataFrame() clinincal_counts = clinicaldf["CENTER"].value_counts() clinincal_counts["Total"] = sum(clinincal_counts) clinincal_counts.name = "Clinical" fusion = syn.tableQuery("select * from syn7893268") fusiondf = fusion.asDataFrame() fusion_counts = fusiondf["CENTER"][~fusiondf["TUMOR_SAMPLE_BARCODE"]. duplicated()].value_counts() fusion_counts["Total"] = sum(fusion_counts) center_flat_files = syn.getChildren("syn12278118") cna_file_paths = [ syn.get(file["id"]).path for file in center_flat_files if file["name"].startswith("data_CNA") ] cna_numbers = {} for cna_file in cna_file_paths: center = os.path.basename(cna_file).replace(".txt", "").split("_")[2] with open(cna_file, "r") as cna: header = cna.readline() samples = header.split("\t") # Minus one because of Hugo_Symbol cna_numbers[center] = len(samples) - 1 cna_counts = pd.Series(cna_numbers) cna_counts["Total"] = sum(cna_counts) seg = syn.tableQuery("select * from syn7893341") segdf = seg.asDataFrame() seg_counts = segdf["CENTER"][~segdf["ID"].duplicated()].value_counts() seg_counts["Total"] = sum(seg_counts) db_counts = pd.DataFrame(clinincal_counts) db_counts["Fusions"] = fusion_counts db_counts["CNV"] = cna_counts db_counts["Mutation"] = clinincal_counts db_counts["SEG"] = seg_counts db_counts = db_counts.fillna(0) db_counts = db_counts.applymap(int) db_counts["Center"] = db_counts.index db_counts["Release"] = "Database" process_functions.updateDatabase( syn, database_countdf, db_counts, cumulative_sample_count_synid, ["Center", "Release"], ) today = datetime.date.today() if today.month in [1, 4, 8, 12]: db_count_tracker = db_counts[["Clinical", "Center", "Release"]] db_count_tracker.rename( columns={ "Clinical": "sample_count", "Center": "center", "Release": "date" }, inplace=True, ) db_count_tracker["date"] = today.strftime("%b-%Y") # Hard coded syn id syn.store(synapseclient.Table("syn18404852", db_count_tracker))
def update_cumulative_sample_table(syn, file_mapping, release, cumulative_sample_count_synid): """ Consortium release sample count table update function This gets the cumulative sample count of each file type in each release Args: syn: synapse object file_mapping: file mapping generated from file mapping function release: GENIE release number (ie. 5.3-consortium) cumulative_sample_count_synid: Synapse Id of 'Cumulative sample count' Table """ sample_count_per_round = syn.tableQuery( "SELECT * FROM {} where Release = '{}'".format( cumulative_sample_count_synid, release)) sample_count_per_rounddf = sample_count_per_round.asDataFrame() clinical_ent = syn.get(file_mapping["clinical"], followLink=True) clinicaldf = pd.read_csv(clinical_ent.path, sep="\t", comment="#") clinicaldf.columns = [i.upper() for i in clinicaldf.columns] if clinicaldf.get("CENTER") is None: clinicaldf["CENTER"] = [ sample.split("-")[1] for sample in clinicaldf.SAMPLE_ID ] clinical_counts = clinicaldf["CENTER"].value_counts() clinical_counts["Total"] = sum(clinical_counts) clinical_counts.name = "Clinical" fusion_ent = syn.get(file_mapping["fusion"], followLink=True) fusiondf = pd.read_csv(fusion_ent.path, sep="\t", comment="#") fusiondf.columns = [i.upper() for i in fusiondf.columns] fusion_counts = fusiondf["CENTER"][~fusiondf["TUMOR_SAMPLE_BARCODE"]. duplicated()].value_counts() fusion_counts["Total"] = sum(fusion_counts) cna_ent = syn.get(file_mapping["cna"], followLink=True) cnadf = pd.read_csv(cna_ent.path, sep="\t", comment="#") cna_counts = pd.Series([i.split("-")[1] for i in cnadf.columns[1:]]).value_counts() cna_counts["Total"] = sum(cna_counts) seg_ent = syn.get(file_mapping["seg"], followLink=True) segdf = pd.read_csv(seg_ent.path, sep="\t", comment="#") segdf.columns = [i.upper() for i in segdf.columns] segdf["CENTER"] = [i.split("-")[1] for i in segdf["ID"]] seg_counts = segdf["CENTER"][~segdf["ID"].duplicated()].value_counts() seg_counts["Total"] = sum(seg_counts) total_counts = pd.DataFrame(clinical_counts) total_counts["Fusions"] = fusion_counts total_counts["CNV"] = cna_counts total_counts["Mutation"] = clinical_counts total_counts["SEG"] = seg_counts total_counts = total_counts.fillna(0) total_counts = total_counts.applymap(int) total_counts["Center"] = total_counts.index total_counts["Release"] = release process_functions.updateDatabase( syn, sample_count_per_rounddf, total_counts, cumulative_sample_count_synid, ["Center", "Release"], to_delete=True, )