Example #1
0
#filePath = argData['dir'] + fileName #IMPORTANT: update this if folder name changes
tmpPath = fileName.split('/')[:-1]
filePath = '/'.join(i for i in tmpPath)

#fetch the saved important features
importance = load_obj(fileName)
#importance = Counter({'R-HSA-927802': 0.31735258141642814, 'hsa04740': 0.2208299216149202, 'hsa05100': 0.1847905733996812, 'hsa04930': 0.10625980494746863, 'hsa04514': 0.047493659101048136, 'hsa04114': 0.03542724660274679, 'hsa04810': 0.03365848585388666, 'hsa04144': 0.030556051003490892})


#access the database to get the description of important features
#dbAdapter = OlegDB()
dbAdapter = TCRD() if argData['db'] == "tcrd" else OlegDB()
#labelMap = convertLabels(importance.keys(),dbAdapter,selectAsDF,type='plot')

if True:
	currentGraph = ProteinDiseaseAssociationGraph.load(argData['kgfile'])

	# for the graph, we need the original importance 
	for imp in importance.most_common(numOfFeatures):
		print(imp)
		Visualize(imp, currentGraph.graph, diseaseName, filePath, dbAdapter=dbAdapter) #g,currentGraph.graph,Disease)
		#break

#newSet = {}
#for key in importance.keys():
#	newSet[labelMap[key]] = importance[key]

#print('STARTING FEAT VIS')
#AUC = 0.9
#print(newSet,labelMap)
#featureVisualize(Counter(newSet),AUC,"AAA")
Example #2
0
# Create a Protein Disease graph from the DB adapter 'OlegDB'

from ProteinGraphML.DataAdapter import OlegDB
from ProteinGraphML.GraphTools import ProteinDiseaseAssociationGraph

## we construct a base map of protein to disease just by creating the ProteinDiseaseAs

dbAdapter = OlegDB()
proteinGraph = ProteinDiseaseAssociationGraph(dbAdapter)

## the 'ProteinDiseaseAssociationGraph' object has helper methods, but we can also access the networkx graph directly it is created with:

print('Total nodes: %d' % len(proteinGraph.graph.nodes))

## we will want to filter by the proteins we are interested in, this list comes from a DB adapter, but any set will do
proteins = dbAdapter.loadTotalProteinList().protein_id
filterByProteins = set(proteins)

# using .attach will add edges from a DB as defined by the adapter,
# with this method we can create a graph of data, which can itself be saved, prevents the
# need from, rebuilding as we work on different diseases, perform analysis
# We've also filter by proteins we care about, in this case it is our original list

proteinGraph.attach(dbAdapter.loadPPI(filterByProteins))
proteinGraph.attach(dbAdapter.loadKegg(filterByProteins))
proteinGraph.attach(dbAdapter.loadReactome(filterByProteins))
proteinGraph.attach(dbAdapter.loadInterpro(filterByProteins))
proteinGraph.attach(dbAdapter.loadGo(filterByProteins))

# networkx provides an api we can nodes from \n",
# here i exploit the unique features of each node to count them\n",
    print("running on this disease", disease)

print("")
DEFAULT_GRAPH = "newCURRENT_GRAPH"

# CANT FIND THIS DISEASE
#disease = sys.argv[1]
Procedure = argData['procedure'][0]
print('Procedure', Procedure)

graphString = None

graphString = DEFAULT_GRAPH

# CANT FIND THIS GRAPH
currentGraph = ProteinDiseaseAssociationGraph.load(graphString)
# SOME DISEASES CAUSE "DIVIDE BY 0 error"
print("GRAPH {0} LOADED".format(graphString))

nodes = [ProteinInteractionNode, KeggNode, ReactomeNode, GoNode, InterproNode]
staticFeatures = []

print("--- USING {0} METAPATH FEATURE SETS".format(len(nodes)))
print("--- USING {0} STATIC FEATURE SETS".format(len(staticFeatures)))

if fileData is not None:
    #print("FOUND {0} POSITIVE LABELS".format(len(fileData[True])))
    #print("FOUND {0} NEGATIVE LABELS".format(len(fileData[False])))
    trainData = metapathFeatures(disease,
                                 currentGraph,
                                 nodes,
print('HERE ARE LABELS')

#importance = {'hsa01100': 0.31735258141642814, 'hsa04740': 0.2208299216149202, 'hsa05100': 0.1847905733996812, 'hsa04930': 0.10625980494746863, 'hsa04514': 0.047493659101048136, 'hsa04114': 0.03542724660274679, 'hsa04810': 0.03365848585388666, 'hsa04144': 0.030556051003490892}#{"MP_0000180":34,343:1.0,30001:0.3}
#labelMap = convertLabels(importance.keys(),dbAdapter,selectAsDF,type='plot')
#for value[key] in importance.values():

newSet = {}
for key in importance.keys():
    newSet[labelMap[key]] = importance[key]

AUC = 0.9
#print(newSet,labelMap)

if False:
    currentGraph = ProteinDiseaseAssociationGraph.load("newCURRENT_GRAPH")

    # for the graph, we need the original importance
    for key in importance.keys():
        Visualize((key, importance[key]),
                  currentGraph.graph,
                  "MP_0000180",
                  dbAdapter=dbAdapter)  #g,currentGraph.graph,Disease)
        break

print('STARTING FEAT VIS')

featureVisualize(Counter(newSet), AUC, "AAA")
#Visualize

#convertLabels([343,30001],dbAdapter,selectAsDF,type="han")
Example #5
0
    # folder where train and predict data with features will be stored
    # outputDir = argData['outputdir']
    if not os.path.isdir(args.outputdir):
        logging.info('Create the output directory')
        os.makedirs(args.outputdir)
    logging.info('Output directory for ML data(Training/predict): {0}'.format(
        args.outputdir))

    # check whether file or disease was given
    if args.trainingfile is None and args.disease is None:
        parser.error("--disease or -- training file must be specified.")

    # fetch KG data
    # graphString = args.kgfile
    currentGraph = ProteinDiseaseAssociationGraph.load(args.kgfile)
    logging.info("GRAPH {0} LOADED".format(args.kgfile))

    # Access the db adaptor. Make TCRD as the default DB
    dbAdapter = OlegDB() if args.db == "olegdb" else TCRD()

    if args.trainingfile is not None and args.disease is None:
        # trainingPklFile = trainingfile
        logging.info('Input training file: {0}'.format(args.trainingfile))
        try:
            with open(args.trainingfile, 'rb') as f:
                fileData = pickle.load(f)
        except:
            logging.error('Invalid pickled training set file')
            exit()
Example #6
0
dbAdapter = OlegDB()

# CANT FIND THIS DISEASE(?)
# disease = "MP_0000180"
disease = "MP_0000184"
with db_session:
    dname = dbAdapter.db.get("SELECT name FROM mp_onto WHERE mp_term_id = '" +
                             disease + "'")
    logging.info("disease: {0}: \"{1}\"".format(disease, dname))

fileData = None

pickleFile = "ProteinDisease_GRAPH.pickle"
# CANT FIND THIS GRAPH(?)
currentGraph = ProteinDiseaseAssociationGraph.load(pickleFile)

# SOME DISEASES CAUSE "DIVIDE BY 0 error"
logging.info("GRAPH LOADED: {0}".format(pickleFile))

nodes = [ProteinInteractionNode, KeggNode, ReactomeNode, GoNode, InterproNode]
staticFeatures = []  # ALL OPTIONS HERE... ["gtex","lincs","hpa","ccle"]

logging.info("USING {0} METAPATH FEATURE SETS".format(len(nodes)))
logging.info("USING {0} STATIC FEATURE SETS".format(len(staticFeatures)))

if fileData is not None:
    logging.info("FOUND {0} POSITIVE LABELS".format(len(fileData[True])))
    logging.info("FOUND {0} NEGATIVE LABELS".format(len(fileData[False])))
    trainData = metapathFeatures(disease,
                                 currentGraph,
Example #7
0
    args = parser.parse_args()

    logging.basicConfig(
        format='%(asctime)s %(levelname)s:%(message)s',
        level=(logging.DEBUG if args.verbose > 1 else logging.INFO))

    t0 = time.time()

    ## Construct base protein-disease map from ProteinDiseaseAssociationGraph.
    ## Db is PonyORM db (https://docs.ponyorm.org/api_reference.html).

    # Make TCRD as the default DB
    dbad = OlegDB() if args.db == "olegdb" else TCRD()

    pdg = ProteinDiseaseAssociationGraph(dbad)

    ## ProteinDiseaseAssociationGraph object has helper methods, but
    ## NetworkX methods also available.
    ## https://networkx.github.io/documentation/stable/reference/

    logging.info('Total nodes: %d; edges: %d' %
                 (pdg.graph.order(), pdg.graph.size()))

    ## Filter by proteins of interest; this list comes from a DB adapter, but any set will do.
    proteins = dbad.loadTotalProteinList().protein_id
    proteinSet = set(proteins)
    logging.info('Protein set: %d' % (len(proteinSet)))

    # Using attach() add edges from DB.
    # With this method create graph, which can be saved, avoiding