def filldatabase(fingerprintdf): mysession = SQLSession(Schemas.qsar_schema).get_session() username = '******' descriptors_index = 1448 compound_descriptor_set_objects = [] # print(fingerprintdf) for index, row in fingerprintdf.iterrows(): efk_dsstox_compound_id = row[0].split('0', 1)[1] fk_descriptor_set_id = descriptors_index xx = [str(x) for x in list(row[1])] descriptor_string_tsv = '\t'.join(xx) created_by = username updated_by = username compound_descriptor_sets = CompoundDescriptorSets( efk_dsstox_compound_id=efk_dsstox_compound_id, descriptor_string_tsv=descriptor_string_tsv, created_by=created_by, fk_descriptor_set_id=fk_descriptor_set_id, updated_by=updated_by) compound_descriptor_set_objects.append(compound_descriptor_sets) mysession.bulk_save_objects(compound_descriptor_set_objects) mysession.commit()
def filldatabase(yourlist, desc_set_id, prefix): username = '******' descriptors_index = desc_set_id mysession = SQLSession(Schemas.qsar_schema).get_session() descriptor_objects = [] for index, val in enumerate(yourlist): fk_descriptor_set_id = descriptors_index index_number = index + 1 # xx = [str(x) for x in list(row)] descriptors_name = str(prefix.format(index + 1)) created_by = username updated_by = username long_description = '' short_description = '' label = val descriptor = Descriptors(index_number=index_number, descriptors_name=descriptors_name, long_description=long_description, short_description=short_description, label=label, created_by=created_by, fk_descriptor_set_id=fk_descriptor_set_id, updated_by=updated_by) descriptor_objects.append(descriptor) mysession.bulk_save_objects(descriptor_objects) mysession.commit()
def updatedatabase(fingerprintdf): username = '******' descriptors_index = 1448 # Query for compound_descriptor_sets.id mysession = SQLSession(Schemas.qsar_schema).get_session() query3 = mysession.query(Compounds.dsstox_compound_id, CompoundDescriptorSets.id) \ .join(CompoundDescriptorSets, Compounds.id == CompoundDescriptorSets.efk_dsstox_compound_id).filter(CompoundDescriptorSets.fk_descriptor_set_id == descriptors_index) df3 = pd.DataFrame(list(query3)) #join query with fingerprintdf mytable = pd.merge(df3, fingerprintdf, on='dsstox_compound_id') # # # CODE FOR UPDATING # # # for index, row in mytable.iterrows(): id = row[1] descriptor_string_tsv = row[2] updated_by = username mysession.query(CompoundDescriptorSets.id, CompoundDescriptorSets.descriptor_string_tsv, CompoundDescriptorSets.updated_by).filter( CompoundDescriptorSets.id == id).update({ "descriptor_string_tsv": descriptor_string_tsv, "updated_by": updated_by }) mysession.commit()
def update_toxprint_database(fingerprintdf): username = '******' descriptors_index = 1445 # Query for compound_descriptor_sets.id mysession = SQLSession(Schemas.qsar_schema).get_session() query3 = mysession.query(CompoundDescriptorSets.id, CompoundDescriptorSets.efk_dsstox_compound_id)\ .filter(CompoundDescriptorSets.fk_descriptor_set_id == descriptors_index) df3 = pd.DataFrame(list(query3)) #join query with fingerprintdf fingerprintdf = fingerprintdf.reset_index() fingerprintdf = fingerprintdf.rename( columns={'M_NAME': 'efk_dsstox_compound_id'}) # not sure this will work fingerprintdf['efk_dsstox_compound_id'] = [ int(x[8:]) for x in fingerprintdf['efk_dsstox_compound_id'] ] mytable = pd.merge(df3, fingerprintdf, on='efk_dsstox_compound_id') # # # CODE FOR UPDATING # # # for index, row in mytable.iterrows(): id = str(row[0]) xx = [str(x) for x in list(row[2:])] descriptor_string_tsv = '\t'.join(xx) updated_by = username mysession.query(CompoundDescriptorSets.id, CompoundDescriptorSets.descriptor_string_tsv, CompoundDescriptorSets.updated_by).filter( CompoundDescriptorSets.id == id).update({ "descriptor_string_tsv": descriptor_string_tsv, "updated_by": updated_by }) mysession.commit()
def filluc(invitrodbdf, mydatasetid): #set starttime starttime = time.time() username = '******' descriptor_set_id = [1445, 1447, 1446, 1448] # descriptor_set_id = [1448] # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 1445 = toxprints # 1446 = MACCS # 1447 = pubchem # 1448 = Special Toxprints for id in descriptor_set_id: # add enrichment table to the mysql database try: filled_table = handle_duplicates(invitrodbdf.loc[:, ['dsstox_compound_id', 'hitc']]) except: print("DUPLICATE HANDLER FAILED: {}".format(mydatasetid)) sys.exit(1) try: filled_table = fillfp(filled_table, id) except: print("FILLFP FAILED: {}".format(mydatasetid)) sys.exit(1) # filled_table = pd.DataFrame(filled_table) try: my_enrichment_table = enrich(filled_table) except: print("ENRICH FAILED: {}".format(mydatasetid)) print(filled_table.head()) sys.exit(1) # add fk_descriptor_id ### CHECK THAT THESE ARE MATCHING! ### mysession2 = SQLSession(Schemas.qsar_schema).get_session() query3 = mysession2.query(Descriptors.id).filter(Descriptors.fk_descriptor_set_id == id) query3 = list(query3) query3 = [int(i[0]) for i in query3] my_enrichment_table.insert(0, 'fk_descriptor_id', query3) for index, row in my_enrichment_table.iterrows(): fk_dataset_id = int(mydatasetid) fk_descriptor_id = int(row['fk_descriptor_id']) univariate_calc = UnivariateCalculations(fk_dataset_id=fk_dataset_id, fk_descriptor_id=fk_descriptor_id, updated_by=username, created_by=username) mysession2.add(univariate_calc) mysession2.flush() fk_univ_calc_id = int(univariate_calc.id) ### NEED TO CHANGE for loop & stat_list IF THE STATISTICS ARE CHANGED IN Enrichment_Table_Generator ### count = 0 for i in row[1:]: if math.isnan(i): value = None elif math.isinf(i): value = 99999999.9 else: value = float(i) stat_list = [9, 10, 11, 12, 13, 4, 8, 7, 14, 15] fk_statistic_id = int(stat_list[count]) uc_statistics = UcStatistics(value=value, fk_univ_calc_id=int(fk_univ_calc_id), fk_statistic_id=int(fk_statistic_id), created_by=username, updated_by=username) mysession2.add(uc_statistics) count += 1 mysession2.commit() # mysession2.close() endtime = time.time() print('run time:{}'.format(endtime-starttime))
aeid_data[0] = [int(x) for x in aeid_data[0]] aeid_data = aeid_data.sort_values(0, axis=0) aeid_data = aeid_data.reset_index(drop=True) # print(aeid_data) # sys.exit(1) # add ASSAY_COMPONENT_ENDPOINT_NAME as descriptors.label # use aeid as descriptors_name for i, row in aeid_data.iterrows(): username = '******' # create a new datasets name entry description = "Hit Calls from MC5 for the toxcast AEID: {} taken on the date:{}" \ .format(row[0], datetime.datetime.today().strftime("%Y%m%d")) descriptors = Descriptors(fk_descriptor_set_id='1449', index_number=str(i + 1), descriptors_name='AEID-{}'.format(row[0]), label=row[1], updated_by=username, created_by=username, long_description=description, short_description=description) mysession.add(descriptors) # mysession.flush() # fk_dataset_id = int(datasets.id) mysession.commit() # sys.exit(1)