currentGraph = ProteinDiseaseAssociationGraph.load(graphString)
# SOME DISEASES CAUSE "DIVIDE BY 0 error"
print("GRAPH {0} LOADED".format(graphString))

nodes = [ProteinInteractionNode, KeggNode, ReactomeNode, GoNode, InterproNode]
staticFeatures = []

print("--- USING {0} METAPATH FEATURE SETS".format(len(nodes)))
print("--- USING {0} STATIC FEATURE SETS".format(len(staticFeatures)))

if fileData is not None:
    #print("FOUND {0} POSITIVE LABELS".format(len(fileData[True])))
    #print("FOUND {0} NEGATIVE LABELS".format(len(fileData[False])))
    trainData = metapathFeatures(disease,
                                 currentGraph,
                                 nodes,
                                 staticFeatures,
                                 loadedLists=fileData).fillna(0)
else:
    trainData = metapathFeatures(disease, currentGraph, nodes,
                                 staticFeatures).fillna(0)

d = BinaryLabel()
d.loadData(trainData)
#XGBCrossVal(d)
#print('calling function...', locals()[Procedure])
locals()[Procedure](d)

#print("FEATURES CREATED, STARTING ML")
#d = BinaryLabel()
#d.loadData(trainData)
Esempio n. 2
0
    logging.info("--- METAPATH FEATURE SETS: {0}".format(len(nodes)))
    logging.info("--- STATIC FEATURE SETS: {0}".format(len(staticFeatures)))
    logging.info("--- STATIC FEATURE DIR: {0}".format(args.static_dir))

    # fetch the description of proteins
    idDescription = dbAdapter.fetchPathwayIdDescription(
    )  # fetch the description

    # generate features
    if fileData is not None:
        # logging.info("FOUND {0} POSITIVE LABELS".format(len(fileData[True])))
        # logging.info("FOUND {0} NEGATIVE LABELS".format(len(fileData[False])))
        allData = metapathFeatures(args.disease,
                                   currentGraph,
                                   nodes,
                                   idDescription,
                                   staticFeatures,
                                   args.static_dir,
                                   loadedLists=fileData).fillna(0)
    else:
        logging.error('fileData should not be None')
        exit()

    # Divide allData into training/predict set and save them
    saveTrainPredictSet(allData, args.outputdir, args.disease,
                        args.trainingfile, args.predictfile)

    logging.info('{0}: elapsed time: {1}'.format(
        os.path.basename(sys.argv[0]),
        time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time() - t0))))