def filluc(invitrodbdf, mydatasetid): #set starttime starttime = time.time() username = '******' descriptor_set_id = [1445, 1447, 1446, 1448] # descriptor_set_id = [1448] # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 1445 = toxprints # 1446 = MACCS # 1447 = pubchem # 1448 = Special Toxprints for id in descriptor_set_id: # add enrichment table to the mysql database try: filled_table = handle_duplicates(invitrodbdf.loc[:, ['dsstox_compound_id', 'hitc']]) except: print("DUPLICATE HANDLER FAILED: {}".format(mydatasetid)) sys.exit(1) try: filled_table = fillfp(filled_table, id) except: print("FILLFP FAILED: {}".format(mydatasetid)) sys.exit(1) # filled_table = pd.DataFrame(filled_table) try: my_enrichment_table = enrich(filled_table) except: print("ENRICH FAILED: {}".format(mydatasetid)) print(filled_table.head()) sys.exit(1) # add fk_descriptor_id ### CHECK THAT THESE ARE MATCHING! ### mysession2 = SQLSession(Schemas.qsar_schema).get_session() query3 = mysession2.query(Descriptors.id).filter(Descriptors.fk_descriptor_set_id == id) query3 = list(query3) query3 = [int(i[0]) for i in query3] my_enrichment_table.insert(0, 'fk_descriptor_id', query3) for index, row in my_enrichment_table.iterrows(): fk_dataset_id = int(mydatasetid) fk_descriptor_id = int(row['fk_descriptor_id']) univariate_calc = UnivariateCalculations(fk_dataset_id=fk_dataset_id, fk_descriptor_id=fk_descriptor_id, updated_by=username, created_by=username) mysession2.add(univariate_calc) mysession2.flush() fk_univ_calc_id = int(univariate_calc.id) ### NEED TO CHANGE for loop & stat_list IF THE STATISTICS ARE CHANGED IN Enrichment_Table_Generator ### count = 0 for i in row[1:]: if math.isnan(i): value = None elif math.isinf(i): value = 99999999.9 else: value = float(i) stat_list = [9, 10, 11, 12, 13, 4, 8, 7, 14, 15] fk_statistic_id = int(stat_list[count]) uc_statistics = UcStatistics(value=value, fk_univ_calc_id=int(fk_univ_calc_id), fk_statistic_id=int(fk_statistic_id), created_by=username, updated_by=username) mysession2.add(uc_statistics) count += 1 mysession2.commit() # mysession2.close() endtime = time.time() print('run time:{}'.format(endtime-starttime))
aeid_data[0] = [int(x) for x in aeid_data[0]] aeid_data = aeid_data.sort_values(0, axis=0) aeid_data = aeid_data.reset_index(drop=True) # print(aeid_data) # sys.exit(1) # add ASSAY_COMPONENT_ENDPOINT_NAME as descriptors.label # use aeid as descriptors_name for i, row in aeid_data.iterrows(): username = '******' # create a new datasets name entry description = "Hit Calls from MC5 for the toxcast AEID: {} taken on the date:{}" \ .format(row[0], datetime.datetime.today().strftime("%Y%m%d")) descriptors = Descriptors(fk_descriptor_set_id='1449', index_number=str(i + 1), descriptors_name='AEID-{}'.format(row[0]), label=row[1], updated_by=username, created_by=username, long_description=description, short_description=description) mysession.add(descriptors) # mysession.flush() # fk_dataset_id = int(datasets.id) mysession.commit() # sys.exit(1)