def filldatabase(fingerprintdf):
    mysession = SQLSession(Schemas.qsar_schema).get_session()

    username = '******'
    descriptors_index = 1448

    compound_descriptor_set_objects = []

    # print(fingerprintdf)

    for index, row in fingerprintdf.iterrows():

        efk_dsstox_compound_id = row[0].split('0', 1)[1]
        fk_descriptor_set_id = descriptors_index
        xx = [str(x) for x in list(row[1])]
        descriptor_string_tsv = '\t'.join(xx)
        created_by = username
        updated_by = username
        compound_descriptor_sets = CompoundDescriptorSets(
            efk_dsstox_compound_id=efk_dsstox_compound_id,
            descriptor_string_tsv=descriptor_string_tsv,
            created_by=created_by,
            fk_descriptor_set_id=fk_descriptor_set_id,
            updated_by=updated_by)
        compound_descriptor_set_objects.append(compound_descriptor_sets)
    mysession.bulk_save_objects(compound_descriptor_set_objects)
    mysession.commit()
Ejemplo n.º 2
0
def filldatabase(yourlist, desc_set_id, prefix):
    username = '******'
    descriptors_index = desc_set_id
    mysession = SQLSession(Schemas.qsar_schema).get_session()

    descriptor_objects = []
    for index, val in enumerate(yourlist):

        fk_descriptor_set_id = descriptors_index
        index_number = index + 1
        # xx = [str(x) for x in list(row)]
        descriptors_name = str(prefix.format(index + 1))
        created_by = username
        updated_by = username
        long_description = ''
        short_description = ''
        label = val
        descriptor = Descriptors(index_number=index_number,
                                 descriptors_name=descriptors_name,
                                 long_description=long_description,
                                 short_description=short_description,
                                 label=label,
                                 created_by=created_by,
                                 fk_descriptor_set_id=fk_descriptor_set_id,
                                 updated_by=updated_by)
        descriptor_objects.append(descriptor)
    mysession.bulk_save_objects(descriptor_objects)
    mysession.commit()
Ejemplo n.º 3
0
def updatedatabase(fingerprintdf):
    username = '******'
    descriptors_index = 1448

    # Query for compound_descriptor_sets.id
    mysession = SQLSession(Schemas.qsar_schema).get_session()

    query3 = mysession.query(Compounds.dsstox_compound_id, CompoundDescriptorSets.id) \
        .join(CompoundDescriptorSets, Compounds.id == CompoundDescriptorSets.efk_dsstox_compound_id).filter(CompoundDescriptorSets.fk_descriptor_set_id == descriptors_index)

    df3 = pd.DataFrame(list(query3))

    #join query with fingerprintdf

    mytable = pd.merge(df3, fingerprintdf, on='dsstox_compound_id')

    # # # CODE FOR UPDATING # # #
    for index, row in mytable.iterrows():
        id = row[1]
        descriptor_string_tsv = row[2]
        updated_by = username
        mysession.query(CompoundDescriptorSets.id,
                        CompoundDescriptorSets.descriptor_string_tsv,
                        CompoundDescriptorSets.updated_by).filter(
                            CompoundDescriptorSets.id == id).update({
                                "descriptor_string_tsv":
                                descriptor_string_tsv,
                                "updated_by":
                                updated_by
                            })
        mysession.commit()
Ejemplo n.º 4
0
def update_toxprint_database(fingerprintdf):
    username = '******'
    descriptors_index = 1445

    # Query for compound_descriptor_sets.id
    mysession = SQLSession(Schemas.qsar_schema).get_session()

    query3 = mysession.query(CompoundDescriptorSets.id, CompoundDescriptorSets.efk_dsstox_compound_id)\
        .filter(CompoundDescriptorSets.fk_descriptor_set_id == descriptors_index)

    df3 = pd.DataFrame(list(query3))

    #join query with fingerprintdf
    fingerprintdf = fingerprintdf.reset_index()
    fingerprintdf = fingerprintdf.rename(
        columns={'M_NAME':
                 'efk_dsstox_compound_id'})  # not sure this will work
    fingerprintdf['efk_dsstox_compound_id'] = [
        int(x[8:]) for x in fingerprintdf['efk_dsstox_compound_id']
    ]
    mytable = pd.merge(df3, fingerprintdf, on='efk_dsstox_compound_id')

    # # # CODE FOR UPDATING # # #
    for index, row in mytable.iterrows():
        id = str(row[0])
        xx = [str(x) for x in list(row[2:])]
        descriptor_string_tsv = '\t'.join(xx)
        updated_by = username
        mysession.query(CompoundDescriptorSets.id,
                        CompoundDescriptorSets.descriptor_string_tsv,
                        CompoundDescriptorSets.updated_by).filter(
                            CompoundDescriptorSets.id == id).update({
                                "descriptor_string_tsv":
                                descriptor_string_tsv,
                                "updated_by":
                                updated_by
                            })
        mysession.commit()
Ejemplo n.º 5
0
def filluc(invitrodbdf, mydatasetid):
    #set starttime
    starttime = time.time()

    username = '******'
    descriptor_set_id = [1445, 1447, 1446, 1448]
    # descriptor_set_id = [1448] # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
    # 1445 = toxprints
    # 1446 = MACCS
    # 1447 = pubchem
    # 1448 = Special Toxprints

    for id in descriptor_set_id:
        # add enrichment table to the mysql database
        try:
            filled_table = handle_duplicates(invitrodbdf.loc[:, ['dsstox_compound_id', 'hitc']])

        except:
            print("DUPLICATE HANDLER FAILED: {}".format(mydatasetid))
            sys.exit(1)

        try:
            filled_table = fillfp(filled_table, id)
        except:
            print("FILLFP FAILED: {}".format(mydatasetid))
            sys.exit(1)

        # filled_table = pd.DataFrame(filled_table)

        try:
            my_enrichment_table = enrich(filled_table)

        except:
            print("ENRICH FAILED: {}".format(mydatasetid))
            print(filled_table.head())
            sys.exit(1)


        # add fk_descriptor_id
        ### CHECK THAT THESE ARE MATCHING! ###
        mysession2 = SQLSession(Schemas.qsar_schema).get_session()
        query3 = mysession2.query(Descriptors.id).filter(Descriptors.fk_descriptor_set_id == id)
        query3 = list(query3)
        query3 = [int(i[0]) for i in query3]
        my_enrichment_table.insert(0, 'fk_descriptor_id', query3)

        for index, row in my_enrichment_table.iterrows():
            fk_dataset_id = int(mydatasetid)
            fk_descriptor_id = int(row['fk_descriptor_id'])

            univariate_calc = UnivariateCalculations(fk_dataset_id=fk_dataset_id,
                                                     fk_descriptor_id=fk_descriptor_id,
                                                     updated_by=username,
                                                     created_by=username)
            mysession2.add(univariate_calc)
            mysession2.flush()

            fk_univ_calc_id = int(univariate_calc.id)

            ### NEED TO CHANGE for loop & stat_list IF THE STATISTICS ARE CHANGED IN Enrichment_Table_Generator ###
            count = 0
            for i in row[1:]:

                if math.isnan(i):
                    value = None
                elif math.isinf(i):
                    value = 99999999.9
                else:
                    value = float(i)

                stat_list = [9, 10, 11, 12, 13, 4, 8, 7, 14, 15]
                fk_statistic_id = int(stat_list[count])

                uc_statistics = UcStatistics(value=value,
                                             fk_univ_calc_id=int(fk_univ_calc_id),
                                             fk_statistic_id=int(fk_statistic_id),
                                             created_by=username,
                                             updated_by=username)

                mysession2.add(uc_statistics)
                count += 1
        mysession2.commit()
        # mysession2.close()
    endtime = time.time()
    print('run time:{}'.format(endtime-starttime))
Ejemplo n.º 6
0
aeid_data[0] = [int(x) for x in aeid_data[0]]
aeid_data = aeid_data.sort_values(0, axis=0)

aeid_data = aeid_data.reset_index(drop=True)
# print(aeid_data)
# sys.exit(1)

# add ASSAY_COMPONENT_ENDPOINT_NAME as descriptors.label
# use aeid as descriptors_name

for i, row in aeid_data.iterrows():
    username = '******'
    # create a new datasets name entry

    description = "Hit Calls from MC5 for the toxcast AEID: {} taken on the date:{}" \
        .format(row[0], datetime.datetime.today().strftime("%Y%m%d"))

    descriptors = Descriptors(fk_descriptor_set_id='1449',
                              index_number=str(i + 1),
                              descriptors_name='AEID-{}'.format(row[0]),
                              label=row[1],
                              updated_by=username,
                              created_by=username,
                              long_description=description,
                              short_description=description)
    mysession.add(descriptors)
    # mysession.flush()
    # fk_dataset_id = int(datasets.id)

    mysession.commit()
    # sys.exit(1)