def testZeroMeanNormalize(self): graphs = [] for i in range(11): nodeFeatures = np.random.randint(low=0, high=100, size=(i + 1, 13)) g = S2VGraph('binaryId', g=nx.Graph(), label='label', node_tags=[], node_features=nodeFeatures) graphs.append(g) normalizeFeatures(graphs, operation='zero_mean') for (i, g) in enumerate(graphs): log.debug(f'Feature shape after norm: {g.node_features.shape}') self.assertEqual(g.node_features.shape[0], i + 1, 'First dim size') self.assertEqual(g.node_features.shape[1], 13, 'Second dim size')
def testMinMaxNormalize(self): graphs = [] for i in range(11): nodeFeatures = np.random.randint(low=0, high=100, size=(i + 1, 13)) g = S2VGraph('binaryId', g=nx.Graph(), label='label', node_tags=[], node_features=nodeFeatures) graphs.append(g) normalizeFeatures(graphs, operation='min_max') for (i, g) in enumerate(graphs): log.debug(f'Feature shape after norm: {g.node_features.shape}') self.assertEqual(g.node_features.shape[0], i + 1, 'First dim size') self.assertEqual(g.node_features.shape[1], 13, 'Second dim size') for x in np.nditer(g.node_features): self.assertLessEqual(x, 1.0, 'Not <= 1') self.assertGreaterEqual(x, 0.0, 'Not >= 0')
histFile = open( '%sGpu%sRun%s.csv' % (cmd_args.data, cmd_args.gpu_id, runId), 'w') histFile.write("# %s\n" % str(gHP)) df.to_csv(histFile, index_label='Epoch', float_format='%.6f') histFile.close() if __name__ == '__main__': log.setLevel("INFO") random.seed(cmd_args.seed) np.random.seed(cmd_args.seed) torch.manual_seed(cmd_args.seed) startTime = time.process_time() graphs = loadGraphsMayCache(cmd_args.train_dir) normalizeFeatures(graphs, isTestSet=False, operation=cmd_args.norm_op) trainGraphs = filterOutNoEdgeGraphs(trainGraphs) dataReadyTime = time.process_time() - startTime log.info('Dataset ready takes %.2fs' % dataReadyTime) for (id, hp) in enumerate(HyperParameterIterator(cmd_args.hp_path)): for (key, val) in hp.items(): gHP[key] = val numNodesList = sorted([g.num_nodes for g in graphs]) idx = int(math.ceil(hp['poolingRatio'] * len(graphs))) - 1 gHP['poolingK'] = numNodesList[idx] kFoldGraphs = kFoldSplit(gHP['cvFold'], graphs) crossValidate(kFoldGraphs, id)
classifier = classifier.cuda() classifier.eval() startTime = time.process_time() testPredProb = predictDataset(testGraphs, classifier) log.info(f'Net test time = {time.process_time() - startTime} seconds') exportPredictions(testGraphs, testPredProb) if __name__ == '__main__': log.setLevel("INFO") random.seed(cmd_args.seed) np.random.seed(cmd_args.seed) torch.manual_seed(cmd_args.seed) if cmd_args.data == 'YANACFG': log.warning(f'No testset for YANACFG data') else: startTime = time.process_time() testGraphs = loadGraphsMayCache(cmd_args.test_dir, isTestSet=True) normalizeFeatures(testGraphs, isTestSet=True, operation=cmd_args.norm_op) dataReadyTime = time.process_time() - startTime log.info('Testset ready takes %.2fs' % dataReadyTime) trainGraphs = loadGraphsMayCache(cmd_args.train_dir, isTestSet=False) trainGraphs = filterOutNoEdgeGraphs(trainGraphs) decideHyperparameters(trainGraphs) testWithModel(testGraphs)