def main(_): flags_obj = tf.flags.FLAGS euler_graph = tf_euler.dataset.get_dataset(flags_obj.dataset) euler_graph.load_graph() dims = [flags_obj.hidden_dim] * (flags_obj.layers + 1) if flags_obj.run_mode == 'train': metapath = [euler_graph.train_edge_type] * flags_obj.layers else: metapath = [euler_graph.all_edge_type] * flags_obj.layers num_steps = int((euler_graph.total_size + 1) // flags_obj.batch_size * flags_obj.num_epochs) model = GIN(dims, metapath, euler_graph.num_classes, euler_graph.sparse_fea_idx, euler_graph.sparse_fea_max_id, eps=flags_obj.eps, train_eps=flags_obj.train_eps) params = { 'num_classes': euler_graph.num_classes, 'optimizer': flags_obj.optimizer, 'learning_rate': flags_obj.learning_rate, 'log_steps': flags_obj.log_steps, 'train_rate': euler_graph.train_rate, 'id_file': euler_graph.id_file, 'label': ['label'], 'model_dir': flags_obj.model_dir, 'total_size': euler_graph.total_size, 'infer_dir': flags_obj.model_dir, 'batch_size': flags_obj.batch_size, 'total_step': num_steps } config = tf.estimator.RunConfig(log_step_count_steps=None) model_estimator = GraphEstimator(model, params, config) if flags_obj.run_mode == 'train': model_estimator.train() elif flags_obj.run_mode == 'evaluate': model_estimator.evaluate() elif flags_obj.run_mode == 'infer': model_estimator.infer() else: raise ValueError('Run mode not exist!')
def run(rank, world_size, args): print('Running DDP on rank', rank) setup(rank, world_size, args) ragdoll.init() dev_id = ragdoll.device_id() if len(args.input_graph) > 0 or len(args.cached_dir) > 0: data = SynDataset(rank == 0, args) else: data = Dataset(rank == 0, args) feat_size = args.feat_size n_classes = args.n_classes torch.cuda.set_device(dev_id) features = torch.FloatTensor(data.features).cuda() labels = torch.LongTensor(data.labels).cuda() labels = torch.LongTensor([0]).cuda() train_mask = torch.BoolTensor(data.train_mask).cuda() val_mask = torch.BoolTensor(data.val_mask).cuda() test_mask = torch.BoolTensor(data.test_mask).cuda() n_classes = args.n_classes n_nodes = data.n_nodes local_n_nodes = data.local_n_nodes model = GIN( args.num_layers, args.num_mlp_layers, feat_size, args.hidden_dim, n_classes, args.final_dropout, args.learn_eps, args.graph_pooling_type, args.neighbor_pooling_type, n_nodes, local_n_nodes) model.cuda() model = DDP(model, device_ids=[dev_id]) loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) optimizer.zero_grad() print("Start training") dur = [] for epoch in range(args.epochs): model.train() torch.distributed.barrier() if epoch >= 3: t0 = time.time() # with profiler.profile(record_shapes=True, use_cuda=True) as prof: logits = model(data.graph, features) loss = loss_fcn(logits, labels) optimizer.zero_grad() loss.backward() optimizer.step() torch.cuda.current_stream().synchronize() # print(prof.key_averages().table(sort_by="cuda_time_total")) if epoch >= 3: dur.append(time.time() - t0) print('Peak memory is {} GB'.format(torch.cuda.max_memory_allocated(dev_id) / 1e9)) print('acc is {}, loss is {}, this epoch using time {} s, avg time {} s.'.format( 0, loss.item(), dur[-1] if epoch >= 3 else 0, np.mean(dur))) cleanup()
def main(args): # set up seeds, args.seed supported torch.manual_seed(seed=args.seed) np.random.seed(seed=args.seed) is_cuda = not args.disable_cuda and torch.cuda.is_available() if is_cuda: args.device = torch.device("cuda:" + str(args.device)) torch.cuda.manual_seed_all(seed=args.seed) else: args.device = torch.device("cpu") dataset = GINDataset(args.dataset, not args.learn_eps, args.degree_as_nlabel) trainloader, validloader = GINDataLoader( dataset, batch_size=args.batch_size, device=args.device, seed=args.seed, shuffle=True, split_name='fold10', fold_idx=args.fold_idx).train_valid_loader() # or split_name='rand', split_ratio=0.7 model = GIN(args.num_layers, args.num_mlp_layers, dataset.dim_nfeats, args.hidden_dim, dataset.gclasses, args.final_dropout, args.learn_eps, args.graph_pooling_type, args.neighbor_pooling_type).to(args.device) criterion = nn.CrossEntropyLoss() # defaul reduce is true optimizer = optim.Adam(model.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5) # it's not cost-effective to hanle the cursor and init 0 # https://stackoverflow.com/a/23121189 tbar = tqdm(range(args.epochs), unit="epoch", position=3, ncols=0, file=sys.stdout) vbar = tqdm(range(args.epochs), unit="epoch", position=4, ncols=0, file=sys.stdout) lrbar = tqdm(range(args.epochs), unit="epoch", position=5, ncols=0, file=sys.stdout) for epoch, _, _ in zip(tbar, vbar, lrbar): train(args, model, trainloader, optimizer, criterion, epoch) scheduler.step() train_loss, train_acc = eval_net(args, model, trainloader, criterion) tbar.set_description( 'train set - average loss: {:.4f}, accuracy: {:.0f}%'.format( train_loss, 100. * train_acc)) valid_loss, valid_acc = eval_net(args, model, validloader, criterion) vbar.set_description( 'valid set - average loss: {:.4f}, accuracy: {:.0f}%'.format( valid_loss, 100. * valid_acc)) if not args.filename == "": with open(args.filename, 'a') as f: f.write( '%s %s %s %s %s' % (args.dataset, args.learn_eps, args.neighbor_pooling_type, args.graph_pooling_type, epoch)) f.write("\n") f.write("%f %f %f %f" % (train_loss, train_acc, valid_loss, valid_acc)) f.write("\n") lrbar.set_description("Learning eps with learn_eps={}: {}".format( args.learn_eps, [layer.eps.data.item() for layer in model.ginlayers])) tbar.close() vbar.close() lrbar.close()
def main(args): # set up seeds, args.seed supported mx.random.seed(0) np.random.seed(seed=0) if args.device >= 0: args.device = mx.gpu(args.device) else: args.device = mx.cpu() dataset = GINDataset(args.dataset, not args.learn_eps) trainloader, validloader = GraphDataLoader( dataset, batch_size=args.batch_size, collate_fn=collate, seed=args.seed, shuffle=True, split_name='fold10', fold_idx=args.fold_idx).train_valid_loader() # or split_name='rand', split_ratio=0.7 model = GIN(args.num_layers, args.num_mlp_layers, dataset.dim_nfeats, args.hidden_dim, dataset.gclasses, args.final_dropout, args.learn_eps, args.graph_pooling_type, args.neighbor_pooling_type) model.initialize(ctx=args.device) criterion = gluon.loss.SoftmaxCELoss() print(model.collect_params()) lr_scheduler = mx.lr_scheduler.FactorScheduler(50, 0.5) trainer = gluon.Trainer(model.collect_params(), 'adam', {'lr_scheduler': lr_scheduler}) # it's not cost-effective to hanle the cursor and init 0 # https://stackoverflow.com/a/23121189 tbar = tqdm(range(args.epochs), unit="epoch", position=3, ncols=0, file=sys.stdout) vbar = tqdm(range(args.epochs), unit="epoch", position=4, ncols=0, file=sys.stdout) lrbar = tqdm(range(args.epochs), unit="epoch", position=5, ncols=0, file=sys.stdout) for epoch, _, _ in zip(tbar, vbar, lrbar): train(args, model, trainloader, trainer, criterion, epoch) train_loss, train_acc = eval_net(args, model, trainloader, criterion) tbar.set_description( 'train set - average loss: {:.4f}, accuracy: {:.0f}%'.format( train_loss, 100. * train_acc)) valid_loss, valid_acc = eval_net(args, model, validloader, criterion) vbar.set_description( 'valid set - average loss: {:.4f}, accuracy: {:.0f}%'.format( valid_loss, 100. * valid_acc)) if not args.filename == "": with open(args.filename, 'a') as f: f.write('%s %s %s %s' % (args.dataset, args.learn_eps, args.neighbor_pooling_type, args.graph_pooling_type)) f.write("\n") f.write("%f %f %f %f" % (train_loss, train_acc, valid_loss, valid_acc)) f.write("\n") lrbar.set_description("Learning eps with learn_eps={}: {}".format( args.learn_eps, [ layer.eps.data(args.device).asscalar() for layer in model.ginlayers ])) tbar.close() vbar.close() lrbar.close()
def main(args): path = os.path.join(args.dataDir, args.dataset + ".npz") data = custom_dataset(path, args.dim, args.classes, load_from_txt=False) g = data.g if args.gpu < 0: cuda = False else: cuda = True g = g.int().to(args.gpu) features = data.x labels = data.y in_feats = features.size(1) n_classes = data.num_classes # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) if args.model == 'gcn': model = GCN(g, in_feats=in_feats, n_hidden=args.hidden, n_classes=n_classes, n_layers=2) else: model = GIN(g, input_dim=in_feats, hidden_dim=64, output_dim=n_classes, num_layers=5) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) torch.cuda.synchronize() start = time.perf_counter() for _ in tqdm(range(args.n_epochs)): model.train() logits = model(features) loss = loss_fcn(logits[:], labels[:]) optimizer.zero_grad() loss.backward() optimizer.step() torch.cuda.synchronize() dur = time.perf_counter() - start if args.model == 'gcn': print("DGL GCN (L2-H16) Time: (ms) {:.3f}".format(dur * 1e3 / args.n_epochs)) else: print("DGL GIN (L5-H64) Time: (ms) {:.3f}".format(dur * 1e3 / args.n_epochs)) print()
def main(): args = parse_args() if args.augment.lower() == 'none': args.augment = None device = to_device(args.gpu) args.seed = args.seed + args.fold np.random.seed(args.seed) torch.manual_seed(args.seed) data = load_data(args.data) num_features = data.num_features num_classes = data.num_classes trn_graphs, test_graphs = load_data_fold(args.data, args.fold) trn_loader = DataLoader(trn_graphs, batch_size=256) test_loader = DataLoader(test_graphs, batch_size=256) if args.iters == 'auto': args.iters = math.ceil(len(trn_graphs) / args.batch_size) else: args.iters = int(args.iters) model = GIN(num_features, num_classes, args.units, args.layers, args.dropout) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5) loss_func = SoftCELoss() augment = Augment(trn_graphs, args.augment, aug_size=args.aug_size) if args.verbose > 0: print(' epochs\t loss\ttrn_acc\tval_acc') out_list = dict(trn_loss=[], trn_acc=[], test_loss=[], test_acc=[]) for epoch in range(args.epochs): model.train() loss_sum = 0 for _ in range(args.iters): idx = torch.randperm(len(trn_graphs))[:args.batch_size] data = augment(idx).to(device) output = model(data.x, data.edge_index, data.batch) loss = loss_func(output, data.y) optimizer.zero_grad() loss.backward() optimizer.step() loss_sum += loss.item() if args.schedule: scheduler.step(epoch) trn_loss = loss_sum / args.iters trn_acc = eval_acc(model, trn_loader, device) test_loss = eval_loss(model, loss_func, test_loader, device) test_acc = eval_acc(model, test_loader, device) out_list['trn_loss'].append(trn_loss) out_list['trn_acc'].append(trn_acc) out_list['test_loss'].append(test_loss) out_list['test_acc'].append(test_acc) if args.verbose > 0 and (epoch + 1) % args.verbose == 0: print( f'{epoch + 1:7d}\t{trn_loss:7.4f}\t{trn_acc:7.4f}\t{test_acc:7.4f}' ) if args.print_all: out = {arg: getattr(args, arg) for arg in vars(args)} out['all'] = out_list print(json.dumps(out)) else: print(f'Training accuracy: {out_list["trn_acc"][-1]}') print(f'Test accuracy: {out_list["test_acc"][-1]}')
''' PFN internship 2019 coding task machine learning task-4 Issei NAKASONE ''' import datasets as D import mlp from gin import GIN, TrainGIN from iterator import Iterator dirpath = '../datasets/train/' predict = '../datasets/test/' batch_size = 256 train = D.get_dataset(dirpath) train_iter = Iterator(train, batch_size) model = GIN() optimizer = mlp.Adam() optimizer.setup(model) trainer = TrainGIN(optimizer, train_iter) trainer.start(epoch=100) pred = D.GraphDataset(predict) trainer.predict(pred)
def main(args, run_config, house_name, csv_raw=None, shuffle=False): # set up seeds, args.seed supported torch.manual_seed(seed=args.seed) np.random.seed(seed=args.seed) is_cuda = not args.disable_cuda and torch.cuda.is_available() is_cuda = False if is_cuda: args.device = torch.device("cuda:" + str(args.device)) torch.cuda.manual_seed_all(seed=args.seed) else: args.device = torch.device("cpu") uniqueIndex = getUniqueStartIndex(csv_raw) total_num_iteration_for_LOOCV = 0 total_acc_for_LOOCV = [] total_f1_for_LOOCV = [] total_per_class_accuracy = [] total_confusion_matrix = [] total_num_iteration_for_LOOCV = 1 loo = LeaveOneOut() total_embeddings = pd.DataFrame() for train_index, test_index in loo.split(uniqueIndex): args.save_embeddings = False print('\n\n split: ', total_num_iteration_for_LOOCV) total_num_iteration_for_LOOCV += 1 path = 'checkpoint_' + house_name + '.pth' # initialize the early_stopping object early_stopping = EarlyStopping(patience=15, verbose=True, path=path) model = GIN(args.num_layers, args.num_mlp_layers, args.input_features, args.hidden_dim, args.nb_classes, args.final_dropout, args.learn_eps, args.graph_pooling_type, args.neighbor_pooling_type, args.save_embeddings).to(args.device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-5) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5) file_names = ['ordonezB', 'houseB', 'houseC', 'houseA', 'ordonezA'] if run_config is 'raw': graph_path = os.path.join('../../../data/', house_name, house_name + '.bin') graphs = [] labels = [] if not os.path.exists(graph_path): for file_name in file_names: print('\n\n\n\n') print( '*******************************************************************' ) print('\t\t\t\t\t' + file_name + '\t\t\t\t\t\t\t') print( '*******************************************************************' ) print('\n\n\n\n') if run_config is 'ob': house = pd.read_csv('../../../data/' + file_name + '/ob_' + file_name + '.csv') lastChangeTimeInMinutes = pd.read_csv( '../../../data/' + file_name + '/' + 'ob-house' + '-sensorChangeTime.csv') elif run_config is 'raw': house = pd.read_csv('../../../data/' + file_name + '/' + file_name + '.csv') lastChangeTimeInMinutes = pd.read_csv( '../../../data/' + file_name + '/' + 'house' + '-sensorChangeTime.csv') nodes = pd.read_csv('../../../data/' + file_name + '/nodes.csv') edges = pd.read_csv('../../../data/' + file_name + '/bidrectional_edges.csv') u = edges['Src'] v = edges['Dst'] # Create Graph per row of the House CSV # Combine Feature like this: Value, Place_in_House, Type, Last_change_Time_in_Second for each node for i in range(len(house)): # for i in range(5000): feature = [] flag = 0 prev_node_value = 0 prev_node_change_time = 0 # Define Graph g = dgl.graph((u, v)) node_num = 0 total_nodes = len(nodes) # Add Features for j in range(total_nodes - 1): if nodes.loc[j, 'Type'] == 1: node_value = -1 node_place_in_house = nodes.loc[j, 'place_in_house'] node_type = nodes.loc[j, 'Type'] feature.append([ node_value, node_place_in_house, node_type, -1 ]) node_num += 1 continue if flag == 0: node_value = house.iloc[i, 4 + j - node_num] last_change_time_in_minutes = lastChangeTimeInMinutes.iloc[ i, 4 + j - node_num] node_place_in_house = nodes.loc[j, 'place_in_house'] node_type = nodes.loc[j, 'Type'] feature.append([ node_value, node_place_in_house, node_type, last_change_time_in_minutes ]) if nodes.loc[j, 'Object'] == nodes.loc[j + 1, 'Object']: prev_node_value = node_value prev_node_change_time = last_change_time_in_minutes flag = 1 else: node_num += 1 node_place_in_house = nodes.loc[j, 'place_in_house'] node_type = nodes.loc[j, 'Type'] feature.append([ prev_node_value, node_place_in_house, node_type, prev_node_change_time ]) if nodes.loc[j, 'Object'] != nodes.loc[j + 1, 'Object']: flag = 0 feature.append( [house.loc[i, 'time_of_the_day'], -1, -1, -1]) g.ndata['attr'] = torch.tensor(feature) # Give Label try: mappedActivity = config['merging_activties'][ house.iloc[i, 2]] labels.append( getIDFromClassName(mappedActivity, config)) except: activity = house.iloc[i, 2] labels.append(getIDFromClassName(activity, config)) graphs.append(g) graph_labels = {"glabel": torch.tensor(labels)} save_graphs(graph_path, graphs, graph_labels) else: graphs, labels = load_graphs(graph_path) labels = list(labels['glabel'].numpy()) # print(np.unique(labels)) print(len(graphs)) if run_config is 'ob': config["house_start_end_dict"] = [{ 'ordonezB': (0, 2487) }, { 'houseB': (2487, 4636) }, { 'houseC': (4636, 6954) }, { 'houseA': (6954, 7989) }, { 'ordonezA': (7989, 8557) }] elif run_config is 'raw': config["house_start_end_dict"] = [{ 'ordonezB': (0, 30470) }, { 'houseB': (30470, 51052) }, { 'houseC': (51052, 77539) }, { 'houseA': (77539, 114626) }, { 'ordonezA': (114626, 134501) }] start, end = getStartAndEndIndex(csv_raw, uniqueIndex[test_index]) test_graphs = graphs[start:end] test_labels = labels[start:end] train_idx = list( set(np.arange(len(graphs))) - set(np.arange(start, end))) valid_idx = train_idx[:int(0.2 * len(train_idx))] train_idx = train_idx[int(0.2 * len(train_idx)):] train_graphs = [graphs[i] for i in train_idx] train_labels = [labels[i] for i in train_idx] val_graphs = [graphs[i] for i in valid_idx] val_labels = [labels[i] for i in valid_idx] trainDataset = GraphHouseDataset(train_graphs, train_labels) valDataset = GraphHouseDataset(val_graphs, val_labels) testDataset = GraphHouseDataset(test_graphs, test_labels) trainloader = GraphDataLoader( trainDataset, batch_size=args.batch_size, device=args.device, collate_fn=collate, seed=args.seed, shuffle=shuffle, split_name='fold10', fold_idx=args.fold_idx, save_embeddings=args.save_embeddings).train_valid_loader() validloader = GraphDataLoader( valDataset, batch_size=args.batch_size, device=args.device, collate_fn=collate, seed=args.seed, shuffle=shuffle, split_name='fold10', fold_idx=args.fold_idx, save_embeddings=args.save_embeddings).train_valid_loader() testloader = GraphDataLoader( testDataset, batch_size=args.batch_size, device=args.device, collate_fn=collate, seed=args.seed, shuffle=shuffle, split_name='fold10', fold_idx=args.fold_idx, save_embeddings=args.save_embeddings).train_valid_loader() # or split_name='rand', split_ratio=0.7 criterion = nn.CrossEntropyLoss() # default reduce is true for epoch in range(args.epochs): train(args, model, trainloader, optimizer, criterion, epoch) scheduler.step() # early_stopping needs the F1 score to check if it has increased, # and if it has, it will make a checkpoint of the current model if epoch % 10 == 0: print('epoch: ', epoch) train_loss, train_acc, train_f1_score, train_per_class_accuracy, _, _ = eval_net( args, model, trainloader, criterion, run_config, house_name) print( 'train set - average loss: {:.4f}, accuracy: {:.0f}% train_f1_score: {:.4f} ' .format(train_loss, 100. * train_acc, train_f1_score)) # print('train per_class accuracy', test_per_class_accuracy) valid_loss, valid_acc, val_f1_score, val_per_class_accuracy, _, _ = eval_net( args, model, validloader, criterion, run_config, house_name, text='val') print( 'valid set - average loss: {:.4f}, accuracy: {:.0f}% val_f1_score {:.4f}: ' .format(valid_loss, 100. * valid_acc, val_f1_score)) test_loss, test_acc, test_f1_score, test_per_class_accuracy, _, _ = eval_net( args, model, testloader, criterion, run_config, house_name) print( 'test set - average loss: {:.4f}, accuracy: {:.0f}% test_f1_score: {:.4f} ' .format(test_loss, 100. * test_acc, test_f1_score)) # print('val per_class accuracy', val_per_class_accuracy) # early_stopping needs the validation loss to check if it has decresed, # and if it has, it will make a checkpoint of the current model early_stopping(val_f1_score, model) if early_stopping.early_stop: print("Early stopping") break args.save_embeddings = True model = GIN(args.num_layers, args.num_mlp_layers, args.input_features, args.hidden_dim, args.nb_classes, args.final_dropout, args.learn_eps, args.graph_pooling_type, args.neighbor_pooling_type, args.save_embeddings).to(args.device) model.eval() # making loader here because weighted sampler is off for testing and it is on for other parts. # Since we want embeddings in order so sampler is off for testing. testDataset = GraphHouseDataset(test_graphs, test_labels) testloader = GraphDataLoader( testDataset, batch_size=args.batch_size, device=args.device, collate_fn=collate, seed=args.seed, shuffle=shuffle, split_name='fold10', fold_idx=args.fold_idx, save_embeddings=args.save_embeddings).train_valid_loader() if args.save_embeddings: if os.path.exists(path): print('loading saved checkpoint') state = torch.load(path) model.load_state_dict(state) # model.load_state_dict(state['state_dict']) # optimizer.load_state_dict(state['optimizer']) test_loss, test_acc, test_f1_score, test_per_class_accuracy, confusion_matrix, embedding = eval_net( args, model, testloader, criterion, run_config, house_name) total_embeddings = total_embeddings.append(embedding) print('embeddng is', total_embeddings) total_acc_for_LOOCV.append(test_acc) total_f1_for_LOOCV.append(test_f1_score) total_per_class_accuracy.append(test_per_class_accuracy) total_confusion_matrix.append(confusion_matrix) house_results_dictionary = {} print(house_name + '\n \n', 'test_acc:\t', np.mean(total_acc_for_LOOCV), '\t test f1 score', np.mean(total_f1_for_LOOCV), '\t test_per_class_accuracy: \n', dict(pd.DataFrame(total_per_class_accuracy).mean())) house_results_dictionary['accuracy'] = np.mean(total_acc_for_LOOCV) house_results_dictionary['f1_score'] = np.mean(total_f1_for_LOOCV) house_results_dictionary[ 'total_test_per_class_accuracy'] = total_per_class_accuracy house_results_dictionary['test_per_class_accuracy'] = dict( pd.DataFrame(total_per_class_accuracy).mean()) house_results_dictionary['confusion_matrix'] = total_confusion_matrix # print('test set - average loss: {:.4f}, accuracy: {:.0f}% test_f1_score: {:.4f} ' # .format(test_loss, 100. * test_acc, test_f1_score)) return house_results_dictionary, total_embeddings
def main(args, shuffle=True, decompressed_csv_path=None, ob_csv_file_path=None): file_names = ['ordonezB', 'houseB', 'houseC', 'houseA', 'ordonezA'] file_names = ['ordonezB'] # run_time_configs = ['ob_data_compressed', 'raw_data', 'ob_data_Decompressed'] run_time_configs = ['raw_data'] for run_configuration in run_time_configs: results_list = [] print('\n\n\n\n Running configuration', run_configuration, '\n\n\n\n') for file_name in file_names: print('house is: ', file_name) if run_configuration is 'raw_data': config['ob_data_compressed'] = False config['ob_data_Decompressed'] = False config['raw_data'] = True elif run_configuration is 'ob_data_compressed': config['ob_data_compressed'] = True config['ob_data_Decompressed'] = False config['raw_data'] = False elif run_configuration is 'ob_data_Decompressed': config['ob_data_compressed'] = False config['ob_data_Decompressed'] = True config['raw_data'] = False if config['ob_data_compressed']: ob_csv_file_path = os.path.join(os.getcwd(), '../../../', 'data', file_name, 'ob_' + file_name + '.csv') decompressed_csv_path = os.path.join( os.getcwd(), '../../../', 'data', file_name, 'ob_' + file_name + '.csv') elif config['raw_data']: ob_csv_file_path = os.path.join(os.getcwd(), '../../../', 'data', file_name, file_name + '.csv') decompressed_csv_path = os.path.join(os.getcwd(), '../../../', 'data', file_name, file_name + '.csv') elif config['ob_data_Decompressed']: ob_csv_file_path = os.path.join(os.getcwd(), '../../../', 'data', file_name, 'ob_' + file_name + '.csv') decompressed_csv_path = os.path.join( os.getcwd(), '../../../', 'data', file_name, 'ob_decompressed_' + file_name + '.csv') # # set up seeds, args.seed supported # torch.manual_seed(seed=args.seed) # np.random.seed(seed=args.seed) is_cuda = not args.disable_cuda and torch.cuda.is_available() is_cuda = False if is_cuda: args.device = torch.device("cuda:" + str(args.device)) torch.cuda.manual_seed_all(seed=args.seed) else: args.device = torch.device("cpu") if config['raw_data']: graph_path = os.path.join('../../../data', file_name, file_name + '.bin') # graph_path = os.path.join('../../../data/all_houses/all_houses_raw.bin') elif config['ob_data_compressed']: graph_path = os.path.join('../../../data', file_name, 'ob_' + file_name + '.bin') elif config['ob_data_Decompressed']: decompressedGraphPath = os.path.join('../../../data', file_name, file_name + '.bin') graph_path = os.path.join('../../../data', file_name, 'ob_' + file_name + '.bin') graphs = [] labels = [] if not os.path.exists(graph_path): print('\n\n\n\n') print( '*******************************************************************' ) print('\t\t\t\t\t' + file_name + '\t\t\t\t\t\t\t') print( '*******************************************************************' ) print('\n\n\n\n') nodes = pd.read_csv('../../../data/' + file_name + '/nodes.csv') edges = pd.read_csv('../../../data/' + file_name + '/bidrectional_edges.csv') if config['ob_data_compressed']: house = pd.read_csv('../../../data/' + file_name + '/ob_' + file_name + '.csv') lastChangeTimeInMinutes = pd.read_csv( '../../../data/' + file_name + '/' + 'ob-house' + '-sensorChangeTime.csv') elif config['raw_data']: house = pd.read_csv('../../../data/' + file_name + '/' + file_name + '.csv') lastChangeTimeInMinutes = pd.read_csv( '../../../data/' + file_name + '/' + 'house' + '-sensorChangeTime.csv') u = edges['Src'] v = edges['Dst'] # Create Graph per row of the House CSV # Combine Feature like this: Value, Place_in_House, Type, Last_change_Time_in_Second for each node for i in range(len(house)): # for i in range(5000): feature = [] flag = 0 prev_node_value = 0 prev_node_change_time = 0 # Define Graph g = dgl.graph((u, v)) node_num = 0 total_nodes = len(nodes) # Add Features for j in range(total_nodes - 1): if nodes.loc[j, 'Type'] == 1: node_value = -1 node_place_in_house = nodes.loc[j, 'place_in_house'] node_type = nodes.loc[j, 'Type'] feature.append([ node_value, node_place_in_house, node_type, -1 ]) node_num += 1 continue if flag == 0: node_value = house.iloc[i, 4 + j - node_num] last_change_time_in_minutes = lastChangeTimeInMinutes.iloc[ i, 4 + j - node_num] node_place_in_house = nodes.loc[j, 'place_in_house'] node_type = nodes.loc[j, 'Type'] feature.append([ node_value, node_place_in_house, node_type, last_change_time_in_minutes ]) if nodes.loc[j, 'Object'] == nodes.loc[j + 1, 'Object']: prev_node_value = node_value prev_node_change_time = last_change_time_in_minutes flag = 1 else: node_num += 1 node_place_in_house = nodes.loc[j, 'place_in_house'] node_type = nodes.loc[j, 'Type'] feature.append([ prev_node_value, node_place_in_house, node_type, prev_node_change_time ]) if nodes.loc[j, 'Object'] != nodes.loc[j + 1, 'Object']: flag = 0 feature.append( [house.loc[i, 'time_of_the_day'], -1, -1, -1]) g.ndata['attr'] = torch.tensor(feature) # Give Label try: mappedActivity = config['merging_activties'][ house.iloc[i, 2]] labels.append( getIDFromClassName(mappedActivity, config)) except: activity = house.iloc[i, 2] labels.append(getIDFromClassName(activity, config)) graphs.append(g) graph_labels = {"glabel": torch.tensor(labels)} save_graphs(graph_path, graphs, graph_labels) else: graphs, labels = load_graphs(graph_path) labels = list(labels['glabel'].numpy()) if config['ob_data_Decompressed']: DecompressedGraphs, DecompressedLabels = load_graphs( decompressedGraphPath) DecompressedLabels = list( DecompressedLabels['glabel'].numpy()) print(len(graphs)) total_num_iteration_for_LOOCV = 0 total_acc_for_LOOCV = [] total_f1_for_LOOCV = [] total_per_class_accuracy = [] total_confusion_matrix = [] score = 0 accuracy = 0 df = None # read csv Files house_name, all_test_loss, all_test_acc, all_test_f1_score, all_test_per_class_accuracy, all_test_confusion_matrix = [], [], [], [], [], [] house_name_list = [ 'ordonezB', 'houseB', 'houseC', 'houseA', 'ordonezA' ] decompressed_csv = pd.read_csv(decompressed_csv_path) compressed_csv = pd.read_csv(ob_csv_file_path) uniqueIndex = getUniqueStartIndex(compressed_csv) # Required in case of ob Decompressed, when you want test index from # Decompressed csv rather than from OB CSV uniqueIndex_decompressed = getUniqueStartIndex(decompressed_csv) # Mapped Activity as per the config/generalizing the activities not present in all csvs' loo = LeaveOneOut() for train_index, test_index in loo.split(uniqueIndex): model = GIN(args.num_layers, args.num_mlp_layers, args.input_features, args.hidden_dim, args.nb_classes, args.final_dropout, args.learn_eps, args.graph_pooling_type, args.neighbor_pooling_type, args.save_embeddings).to(args.device) optimizer = optim.Adam(model.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5) # initialize the early_stopping object early_stopping = EarlyStopping(patience=10, verbose=True) print( '----------------------------------------------------------------------------------------------' ) print('\n\n split: ', total_num_iteration_for_LOOCV) total_num_iteration_for_LOOCV += 1 # Get start and end of test dataset start, end = getStartAndEndIndex(compressed_csv, uniqueIndex[test_index]) # make dataframe for train, skip everything b/w test start and test end. rest everything is train. train_graphs = graphs[:start] + graphs[end:] train_labels = labels[:start] + labels[end:] # Divide train, test and val dataframe val_graphs = train_graphs[:int( len(train_graphs) * args.split_ratio)] val_labels = train_labels[:int( len(train_labels) * args.split_ratio)] train_graphs = train_graphs[ int(len(train_graphs) * args.split_ratio):] train_labels = train_labels[ int(len(train_labels) * args.split_ratio):] # Only Test index will be picked from decompressed because # Only while evaluating we are decompressing if config['ob_data_Decompressed']: start, end = getStartAndEndIndex( decompressed_csv, uniqueIndex_decompressed[test_index]) test_graphs = DecompressedGraphs[start:end] test_labels = DecompressedLabels[start:end] else: test_graphs = graphs[start:end] test_labels = labels[start:end] # Means this the last split and test has 1 element in it. skip it and continue, because this causes # the code to break. Kind of easy fix. if start == end: continue trainDataset = GraphHouseDataset(train_graphs, train_labels) valDataset = GraphHouseDataset(val_graphs, val_labels) testDataset = GraphHouseDataset(test_graphs, test_labels) trainloader = GraphDataLoader( trainDataset, batch_size=args.batch_size, device=args.device, collate_fn=collate, seed=args.seed, shuffle=shuffle, split_name='fold10', fold_idx=args.fold_idx, save_embeddings=args.save_embeddings).train_valid_loader() validloader = GraphDataLoader( valDataset, batch_size=args.batch_size, device=args.device, collate_fn=collate, seed=args.seed, shuffle=shuffle, split_name='fold10', fold_idx=args.fold_idx, save_embeddings=args.save_embeddings).train_valid_loader() testloader = GraphDataLoader( testDataset, batch_size=args.batch_size, device=args.device, collate_fn=collate, seed=args.seed, shuffle=shuffle, split_name='fold10', fold_idx=args.fold_idx, save_embeddings=args.save_embeddings).train_valid_loader() criterion = nn.CrossEntropyLoss() # default reduce is true # Training training(model, trainloader, validloader, optimizer, criterion, scheduler, early_stopping) # Load Best Model from early stopping path = './checkpoint.pth' if os.path.isfile(path): print("=> loading checkpoint '{}'".format(path)) checkpoint = torch.load(path, map_location=torch.device('cpu')) model.load_state_dict(checkpoint) # optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}'".format(path)) else: print("=> no checkpoint found at '{}'".format(path)) if len(testloader) != 0: test_loss, test_acc, test_f1_score, test_per_class_accuracy, test_confusion_matrix = eval_net( args, model, testloader, criterion, text='test') total_acc_for_LOOCV.append(test_acc) total_f1_for_LOOCV.append(test_f1_score) total_per_class_accuracy.append(test_per_class_accuracy) total_confusion_matrix.append(test_confusion_matrix) print( 'test set - average loss: {:.4f}, accuracy: {:.0f}% test_f1_score: {:.4f} ' .format(test_loss, 100. * test_acc, test_f1_score)) house_results_dictionary = {} print(file_name + '\n \n', 'test_acc:\t', np.mean(total_acc_for_LOOCV), '\t test f1 score', np.mean(total_f1_for_LOOCV), '\t test_per_class_accuracy: \n', dict(pd.DataFrame(total_per_class_accuracy).mean())) house_results_dictionary['accuracy'] = np.mean(total_acc_for_LOOCV) house_results_dictionary['f1_score'] = np.mean(total_f1_for_LOOCV) house_results_dictionary['test_per_class_accuracy'] = dict( pd.DataFrame(total_per_class_accuracy).mean()) house_results_dictionary[ 'confusion_matrix'] = total_confusion_matrix house_results_dictionary['house_name'] = file_name results_list.append(house_results_dictionary) if not os.path.exists( os.path.join('../../../logs', 'singleHouseGraphClassification')): os.mkdir( os.path.join('../../../logs', 'singleHouseGraphClassification')) print('\n\n\n\n\n\n Finished house', file_name, '\n\n\n\n') if config['ob_data_compressed']: print('saved') np.save( os.path.join('../../../logs/singleHouseGraphClassification', 'ob_compressed.npy'), results_list) elif config['ob_data_Decompressed']: print('saved') np.save( os.path.join('../../../logs/singleHouseGraphClassification', 'ob_decompressed.npy'), results_list) elif config['raw_data']: print('saved') np.save( os.path.join('../../../logs/singleHouseGraphClassification', 'raw.npy'), results_list)