def obtainLabelList(graphList): """ input: list of graphs return: list of labels on each node (compound label list, value list, other list) """ #get all labels labelList = [] for g in graphList: for i in g.nodes: labelList.append(str(g.nodes[i]["label"])) CList = [] VList = [] OList = [] OList.append("_unknown_") #classification for i in labelList: #compound if i.startswith("C_"): CList.append(i[2:]) continue #value elif is_num(i): VList.append(i) continue #others else: OList.append(i) return CList, VList, OList
def convNodeToProblems(graphList, vecGraphList, masterEncoder, genre=["C", "V"], targetParams=CF.targetParams): """ graphList: list of graphs (before vectorization) vecGraphList: list of vectorized graphs (of graphList) masterEncoder: masterEncoder genre: genre to make problems: C: compound , V: numeric value, O: others *** this mode may not work..? targetParams: target parametes to make problems return: list of problem-type vectorized graphs, list of answers, list of parameter names of the answer """ probVecGraphList = [] targetList = [] neighborNodeNameList = [] print("converting nodes to problems") for graphID, vecGraph in tqdm(enumerate(vecGraphList)): for node in vecGraph.nodes: nodeLabel = graphList[graphID].nodes[node]["label"] if str(nodeLabel).startswith("C_"): category = "C" elif is_num(nodeLabel): category = "V" else: category = "O" #TODO: following codes are too complex and not clear.... flg = False #find nodes of target parameters for neighborNode in graphList[graphID].neighbors(node): neighborNodeName = graphList[graphID].nodes[neighborNode][ "label"] if CF.targetParamMode and neighborNodeName in targetParams: flg = True break #TODO: this func may not work with False.. if CF.targetParamMode == False: flg = True #TODO: genre mode may not work if category in genre and flg == True: g, target = convGraphToProblem(vecGraph, node, masterEncoder) probVecGraphList.append(g) neighborNodeNameList.append(neighborNodeName) if genre == ["V"]: targetList.append([target[-1]]) else: targetList.append(target[CF.CATEGORY_DIM:]) return probVecGraphList, targetList, neighborNodeNameList
def lowerGraph(g): """ g: graph """ for node in g.nodes: labelName = g.nodes[node]["label"] if str(labelName)[:2] != "C_" and is_num(labelName) == False: try: g.nodes[node]["label"] = labelName.lower() except: print("lowering error with label:", labelName)
def calcNodeValue(val, doLog): """ val: node value doLog: if true, convert to log scale (but slightly modified, see below) return processed value """ #do nothing in the case of "unknown" if val == "_unknown_": return val if doLog: logval = np.log10(float(val) + 1 / 10**4) else: if is_num(val): logval = float(val) else: logval = val return logval
def checkGraphList(gList, valThreshold=1500, showGraph=False): """ gList: list of graph valThreshold: absolute threshold of checking if unfamiliar values are detected, it will be printed """ print("checking graph values...") for num, g in enumerate(gList): for node in g.nodes: val = (g.nodes[node]["label"]) if is_num(val): absVal = (np.abs(float(val))) if absVal > valThreshold: print("caution: too large val: ", val, "graph No.: ", num) if showGraph: drawGraph(g, printNodes=True) print("check done")
def searchNodeIDforTargetUnit(g, targetLabel): """ g: graph targetLabel: target label to be searched (e.g., Dipole Moment) return: list of nodeID e.g, if the following graph was searched, node id of "23" will be returned [C_111]-[Dipole moment]-[23]-[-] """ neighborNodeList = [] for num, i in enumerate(g.nodes): currentLabel = g.nodes[i]["label"] if targetLabel == currentLabel: #check neighbor nodes for neighborNode in g.neighbors(i): #some of neightbor nodes should be values if is_num(g.nodes[neighborNode]["label"]): neighborNodeList.append(neighborNode) return neighborNodeList
def addParams(g, params, targetNodeID="comp"): """ g: graph object prams: array of (node name, label name, value to be written, log scale or not, unit name) targetNodeID: target node id return graph """ nodeName, labelName, val, doLog, unit = params[0], params[1], params[ 2], params[3], params[4] if is_num(val) or ((val is not np.nan and val == val and val is not None) and val != "unknown" and val != "nan"): logval = calcNodeValue(val, doLog) g.add_node(nodeName, label=labelName) g.add_edge(targetNodeID, nodeName) g.add_node(nodeName + "_val", label=str(logval)) g.add_edge(nodeName + "_val", nodeName) if unit != "[No unit]": g.add_node(nodeName + "_unit", label=unit) g.add_edge(nodeName + "_unit", nodeName + "_val") return g