def filluc(invitrodbdf, mydatasetid):
    #set starttime
    starttime = time.time()

    username = '******'
    descriptor_set_id = [1445, 1447, 1446, 1448]
    # descriptor_set_id = [1448] # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
    # 1445 = toxprints
    # 1446 = MACCS
    # 1447 = pubchem
    # 1448 = Special Toxprints

    for id in descriptor_set_id:
        # add enrichment table to the mysql database
        try:
            filled_table = handle_duplicates(invitrodbdf.loc[:, ['dsstox_compound_id', 'hitc']])

        except:
            print("DUPLICATE HANDLER FAILED: {}".format(mydatasetid))
            sys.exit(1)

        try:
            filled_table = fillfp(filled_table, id)
        except:
            print("FILLFP FAILED: {}".format(mydatasetid))
            sys.exit(1)

        # filled_table = pd.DataFrame(filled_table)

        try:
            my_enrichment_table = enrich(filled_table)

        except:
            print("ENRICH FAILED: {}".format(mydatasetid))
            print(filled_table.head())
            sys.exit(1)


        # add fk_descriptor_id
        ### CHECK THAT THESE ARE MATCHING! ###
        mysession2 = SQLSession(Schemas.qsar_schema).get_session()
        query3 = mysession2.query(Descriptors.id).filter(Descriptors.fk_descriptor_set_id == id)
        query3 = list(query3)
        query3 = [int(i[0]) for i in query3]
        my_enrichment_table.insert(0, 'fk_descriptor_id', query3)

        for index, row in my_enrichment_table.iterrows():
            fk_dataset_id = int(mydatasetid)
            fk_descriptor_id = int(row['fk_descriptor_id'])

            univariate_calc = UnivariateCalculations(fk_dataset_id=fk_dataset_id,
                                                     fk_descriptor_id=fk_descriptor_id,
                                                     updated_by=username,
                                                     created_by=username)
            mysession2.add(univariate_calc)
            mysession2.flush()

            fk_univ_calc_id = int(univariate_calc.id)

            ### NEED TO CHANGE for loop & stat_list IF THE STATISTICS ARE CHANGED IN Enrichment_Table_Generator ###
            count = 0
            for i in row[1:]:

                if math.isnan(i):
                    value = None
                elif math.isinf(i):
                    value = 99999999.9
                else:
                    value = float(i)

                stat_list = [9, 10, 11, 12, 13, 4, 8, 7, 14, 15]
                fk_statistic_id = int(stat_list[count])

                uc_statistics = UcStatistics(value=value,
                                             fk_univ_calc_id=int(fk_univ_calc_id),
                                             fk_statistic_id=int(fk_statistic_id),
                                             created_by=username,
                                             updated_by=username)

                mysession2.add(uc_statistics)
                count += 1
        mysession2.commit()
        # mysession2.close()
    endtime = time.time()
    print('run time:{}'.format(endtime-starttime))
Exemple #2
0
aeid_data[0] = [int(x) for x in aeid_data[0]]
aeid_data = aeid_data.sort_values(0, axis=0)

aeid_data = aeid_data.reset_index(drop=True)
# print(aeid_data)
# sys.exit(1)

# add ASSAY_COMPONENT_ENDPOINT_NAME as descriptors.label
# use aeid as descriptors_name

for i, row in aeid_data.iterrows():
    username = '******'
    # create a new datasets name entry

    description = "Hit Calls from MC5 for the toxcast AEID: {} taken on the date:{}" \
        .format(row[0], datetime.datetime.today().strftime("%Y%m%d"))

    descriptors = Descriptors(fk_descriptor_set_id='1449',
                              index_number=str(i + 1),
                              descriptors_name='AEID-{}'.format(row[0]),
                              label=row[1],
                              updated_by=username,
                              created_by=username,
                              long_description=description,
                              short_description=description)
    mysession.add(descriptors)
    # mysession.flush()
    # fk_dataset_id = int(datasets.id)

    mysession.commit()
    # sys.exit(1)