def __init__(self, response, message, parameters):
     self.response = response
     self.message = message
     self.parameters = parameters
     self.global_iter = 0
     self.pred = predictor(
         model_file=os.path.dirname(os.path.abspath(__file__)) +
         '/predictor/LogModel.pkl')
     self.pred.import_file(
         None,
         graph_file=os.path.dirname(os.path.abspath(__file__)) +
         '/predictor/rel_max.emb.gz',
         map_file=os.path.dirname(os.path.abspath(__file__)) +
         '/predictor/map.csv')
Exemple #2
0
    def __init__(self, response, message, parameters):
        self.response = response
        self.message = message
        self.parameters = parameters
        self.global_iter = 0
        ## check if the new model files exists in /predictor/retrain_data. If not, scp it from arax.rtx.ai
        pathlist = os.path.realpath(__file__).split(os.path.sep)
        RTXindex = pathlist.index("RTX")
        filepath = os.path.sep.join([*pathlist[:(RTXindex + 1)], 'code', 'ARAX', 'ARAXQuery', 'Overlay', 'predictor','retrain_data'])

        ## check if there is LogModel.pkl
        pkl_file = f"{filepath}/LogModel.pkl"
        if os.path.exists(pkl_file):
            pass
        else:
            os.system("scp [email protected]:/home/ubuntu/drug_repurposing_model_retrain/LogModel.pkl " + pkl_file)

        ## check if there is GRAPH.sqlite
        db_file = f"{filepath}/GRAPH.sqlite"
        if os.path.exists(db_file):
            pass
        else:
            os.system("scp [email protected]:/home/ubuntu/drug_repurposing_model_retrain/GRAPH.sqlite " + db_file)

        # use NodeSynonymizer to replace map.txt
        # check if there is map.txt
        # map_file = f"{filepath}/map.txt"
        # if os.path.exists(map_file):
        #     pass
        # else:
        #     os.system("scp [email protected]:/home/ubuntu/drug_repurposing_model_retrain/map.txt " + map_file)

        self.pred = predictor(model_file=pkl_file)
        self.pred.import_file(None, graph_database=db_file)
        # with open(map_file, 'r') as infile:
        #     map_file_content = infile.readlines()
        #     map_file_content.pop(0) ## remove title
        #     self.known_curies = set(line.strip().split('\t')[0] for line in map_file_content)

        self.synonymizer = NodeSynonymizer()
disease_curie_list = list(known_curies.intersection(
    disease_curie_list))  ## filter out the isolated nodes

all_set = set(list(drugs['id']) + list(diseases['id']))
intersect = all_set.intersection(known_curies)
graph = graph.loc[intersect, :]

#delete some variables to release memory
del drugs
del diseases
del res
del known_curies
del intersect

## build up the prediction model
pred = predictor(model_file=pkl_file)

# connect to database and create SQL table
conn = sqlite3.connect('DISEASE_DRUG_PROBABILITY.sqlite')
print("INFO: Creating database DISEASE_DRUG_PROBABILITY", flush=True)
conn.execute(f"DROP TABLE IF EXISTs PROBABILITY")

insert_command1 = f"CREATE TABLE PROBABILITY(disease VARCHAR(255), drug VARCHAR(255), probability INT)"
conn.execute(insert_command1)
conn.commit()

## pre-create an array of all drugs which will be used in create_array function.
drug_array = graph.loc[drug_curie_list, :].to_numpy()


def create_array(curie):
    def __init__(self, response, message, parameters):
        self.response = response
        self.message = message
        self.parameters = parameters
        self.global_iter = 0
        ## check if the new model files exists in /predictor/retrain_data. If not, scp it from arax.ncats.io
        pathlist = os.path.realpath(__file__).split(os.path.sep)
        RTXindex = pathlist.index("RTX")
        filepath = os.path.sep.join([*pathlist[:(RTXindex + 1)], 'code', 'ARAX', 'ARAXQuery', 'Overlay', 'predictor','retrain_data'])

        ## check if there is LogModel.pkl
        pkl_file = f"{filepath}/LogModel.pkl"
        if os.path.exists(pkl_file):
            pass
        else:
            os.system("scp [email protected]:/data/orangeboard/databases/KG2.3.4/LogModel.pkl " + pkl_file)

        ## check if there is GRAPH.sqlite
        db_file = f"{filepath}/GRAPH.sqlite"
        if os.path.exists(db_file):
            pass
        else:
            os.system("scp [email protected]:/data/orangeboard/databases/KG2.3.4/GRAPH.sqlite " + db_file)

        ## check if there is DTD_probability_database.db
        DTD_prob_db_file = f"{filepath}/DTD_probability_database_v1.0.db"
        if os.path.exists(DTD_prob_db_file):
            pass
        else:
            os.system("scp [email protected]:/data/orangeboard/databases/KG2.3.4/DTD_probability_database_v1.0.db " + DTD_prob_db_file)

        # use NodeSynonymizer to replace map.txt
        # check if there is map.txt
        # map_file = f"{filepath}/map.txt"
        # if os.path.exists(map_file):
        #     pass
        # else:
        #     os.system("scp [email protected]:/home/ubuntu/drug_repurposing_model_retrain/map.txt " + map_file)

        self.use_prob_db = True
        if self.use_prob_db is True:
            try:
                self.pred = predictor(DTD_prob_file=DTD_prob_db_file, use_prob_db=True)
            except:
                tb = traceback.format_exc()
                error_type, error, _ = sys.exc_info()
                self.response.error(tb, error_code=error_type.__name__)
                self.response.error(f"Internal Error encountered connecting to the local DTD prediction database.")
        else:
            try:
                self.pred = predictor(model_file=pkl_file, use_prob_db=False)
            except:
                tb = traceback.format_exc()
                error_type, error, _ = sys.exc_info()
                self.response.error(tb, error_code=error_type.__name__)
                self.response.error(f"Internal Error encountered connecting to the local LogModel.pkl file.")
            try:
                self.pred.import_file(None, graph_database=db_file)
            except:
                tb = traceback.format_exc()
                error_type, error, _ = sys.exc_info()
                self.response.error(tb, error_code=error_type.__name__)
                self.response.error(f"Internal Error encountered connecting to the local graph database file.")
        # with open(map_file, 'r') as infile:
        #     map_file_content = infile.readlines()
        #     map_file_content.pop(0) ## remove title
        #     self.known_curies = set(line.strip().split('\t')[0] for line in map_file_content)

        self.synonymizer = NodeSynonymizer()
    def __init__(self, response, message, parameters):
        self.response = response
        self.message = message
        self.parameters = parameters
        self.global_iter = 0
        ## check if the new model files exists in /predictor/retrain_data. If not, scp it from arax.ncats.io
        pathlist = os.path.realpath(__file__).split(os.path.sep)
        RTXindex = pathlist.index("RTX")
        filepath = os.path.sep.join([*pathlist[:(RTXindex + 1)], 'code', 'ARAX', 'KnowledgeSources', 'Prediction'])
        self.drug_label_list = ['chemicalsubstance','drug']
        self.disease_label_list = ['disease','phenotypicfeature','diseaseorphenotypicfeature']

        ## check if there is LogModel.pkl
        log_model_name = RTXConfig.log_model_path.split("/")[-1]
        pkl_file = f"{filepath}{os.path.sep}{log_model_name}"
        if os.path.exists(pkl_file):
            pass
        else:
            #os.system("scp [email protected]:/data/orangeboard/databases/KG2.3.4/LogModel.pkl " + pkl_file)
            os.system(f"scp {RTXConfig.log_model_username}@{RTXConfig.log_model_host}:{RTXConfig.log_model_path} {pkl_file}")


        ## check if there is GRAPH.sqlite
        graph_database_name = RTXConfig.graph_database_path.split("/")[-1]
        db_file = f"{filepath}{os.path.sep}{graph_database_name}"
        if os.path.exists(db_file):
            pass
        else:
            #os.system("scp [email protected]:/data/orangeboard/databases/KG2.3.4/GRAPH.sqlite " + db_file)
            os.system(f"scp {RTXConfig.graph_database_username}@{RTXConfig.graph_database_host}:{RTXConfig.graph_database_path} {db_file}")

        ## check if there is DTD_probability_database.db
        DTD_prob_db_file = f"{filepath}{os.path.sep}{RTXConfig.dtd_prob_path.split('/')[-1]}"
        if os.path.exists(DTD_prob_db_file):
            pass
        else:
            #os.system("scp [email protected]:/data/orangeboard/databases/KG2.3.4/DTD_probability_database_v1.0.db " + DTD_prob_db_file)
            os.system(f"scp {RTXConfig.dtd_prob_username}@{RTXConfig.dtd_prob_host}:{RTXConfig.dtd_prob_path} {DTD_prob_db_file}")

        # use NodeSynonymizer to replace map.txt
        # check if there is map.txt
        # map_file = f"{filepath}/map.txt"
        # if os.path.exists(map_file):
        #     pass
        # else:
        #     os.system("scp [email protected]:/home/ubuntu/drug_repurposing_model_retrain/map.txt " + map_file)

        self.use_prob_db = True
        if self.use_prob_db is True:
            try:
                self.pred = predictor(DTD_prob_file=DTD_prob_db_file, use_prob_db=True)
            except:
                tb = traceback.format_exc()
                error_type, error, _ = sys.exc_info()
                self.response.error(tb, error_code=error_type.__name__)
                self.response.error(f"Internal Error encountered connecting to the local DTD prediction database.")
        else:
            try:
                self.pred = predictor(model_file=pkl_file, use_prob_db=False)
            except:
                tb = traceback.format_exc()
                error_type, error, _ = sys.exc_info()
                self.response.error(tb, error_code=error_type.__name__)
                self.response.error(f"Internal Error encountered connecting to the local LogModel.pkl file.")
            try:
                self.pred.import_file(None, graph_database=db_file)
            except:
                tb = traceback.format_exc()
                error_type, error, _ = sys.exc_info()
                self.response.error(tb, error_code=error_type.__name__)
                self.response.error(f"Internal Error encountered connecting to the local graph database file.")
        # with open(map_file, 'r') as infile:
        #     map_file_content = infile.readlines()
        #     map_file_content.pop(0) ## remove title
        #     self.known_curies = set(line.strip().split('\t')[0] for line in map_file_content)

        self.synonymizer = NodeSynonymizer()