# # The figure displays one sample of the SST dataset, which is a # constituency parse tree with their nodes labeled with sentiment. To # speed up things, let's build a tiny set with 5 sentences and take a look # at the first one: # import dgl from dgl.data.tree import SST from dgl.data import SSTBatch # Each sample in the dataset is a constituency tree. The leaf nodes # represent words. The word is a int value stored in the "x" field. # The non-leaf nodes has a special word PAD_WORD. The sentiment # label is stored in the "y" feature field. trainset = SST(mode='tiny') # the "tiny" set has only 5 trees tiny_sst = trainset.trees num_vocabs = trainset.num_vocabs num_classes = trainset.num_classes vocab = trainset.vocab # vocabulary dict: key -> id inv_vocab = {v: k for k, v in vocab.items()} # inverted vocabulary dict: id -> word a_tree = tiny_sst[0] a_tree.draw() for token in a_tree.ndata['x'].tolist(): if token != trainset.PAD_WORD: print(inv_vocab[token], end=" ")
def main(args): np.random.seed(args.seed) th.manual_seed(args.seed) th.cuda.manual_seed(args.seed) best_epoch = -1 best_dev_acc = 0 cuda = args.gpu >= 0 device = th.device('cuda:{}'.format(args.gpu)) if cuda else th.device('cpu') if cuda: th.cuda.set_device(args.gpu) trainset = SST() train_loader = DataLoader(dataset=trainset, batch_size=args.batch_size, collate_fn=batcher(device), shuffle=True, num_workers=0) devset = SST(mode='dev') dev_loader = DataLoader(dataset=devset, batch_size=100, collate_fn=batcher(device), shuffle=False, num_workers=0) testset = SST(mode='test') test_loader = DataLoader(dataset=testset, batch_size=100, collate_fn=batcher(device), shuffle=False, num_workers=0) model = TreeLSTM(trainset.num_vocabs, args.x_size, args.h_size, trainset.num_classes, args.dropout, cell_type='childsum' if args.child_sum else 'nary', pretrained_emb = trainset.pretrained_emb).to(device) print(model) params_ex_emb =[x for x in list(model.parameters()) if x.requires_grad and x.size(0)!=trainset.num_vocabs] params_emb = list(model.embedding.parameters()) for p in params_ex_emb: if p.dim() > 1: INIT.xavier_uniform_(p) optimizer = optim.Adagrad([ {'params':params_ex_emb, 'lr':args.lr, 'weight_decay':args.weight_decay}, {'params':params_emb, 'lr':0.1*args.lr}]) dur = [] for epoch in range(args.epochs): t_epoch = time.time() model.train() for step, batch in enumerate(train_loader): g = batch.graph n = g.number_of_nodes() h = th.zeros((n, args.h_size)).to(device) c = th.zeros((n, args.h_size)).to(device) if step >= 3: t0 = time.time() # tik logits = model(batch, h, c) logp = F.log_softmax(logits, 1) loss = F.nll_loss(logp, batch.label, reduction='sum') optimizer.zero_grad() loss.backward() optimizer.step() if step >= 3: dur.append(time.time() - t0) # tok if step > 0 and step % args.log_every == 0: pred = th.argmax(logits, 1) acc = th.sum(th.eq(batch.label, pred)) root_ids = [i for i in range(batch.graph.number_of_nodes()) if batch.graph.out_degree(i)==0] root_acc = np.sum(batch.label.cpu().data.numpy()[root_ids] == pred.cpu().data.numpy()[root_ids]) print("Epoch {:05d} | Step {:05d} | Loss {:.4f} | Acc {:.4f} | Root Acc {:.4f} | Time(s) {:.4f}".format( epoch, step, loss.item(), 1.0*acc.item()/len(batch.label), 1.0*root_acc/len(root_ids), np.mean(dur))) print('Epoch {:05d} training time {:.4f}s'.format(epoch, time.time() - t_epoch)) # eval on dev set accs = [] root_accs = [] model.eval() for step, batch in enumerate(dev_loader): g = batch.graph n = g.number_of_nodes() with th.no_grad(): h = th.zeros((n, args.h_size)).to(device) c = th.zeros((n, args.h_size)).to(device) logits = model(batch, h, c) pred = th.argmax(logits, 1) acc = th.sum(th.eq(batch.label, pred)).item() accs.append([acc, len(batch.label)]) root_ids = [i for i in range(batch.graph.number_of_nodes()) if batch.graph.out_degree(i)==0] root_acc = np.sum(batch.label.cpu().data.numpy()[root_ids] == pred.cpu().data.numpy()[root_ids]) root_accs.append([root_acc, len(root_ids)]) dev_acc = 1.0*np.sum([x[0] for x in accs])/np.sum([x[1] for x in accs]) dev_root_acc = 1.0*np.sum([x[0] for x in root_accs])/np.sum([x[1] for x in root_accs]) print("Epoch {:05d} | Dev Acc {:.4f} | Root Acc {:.4f}".format( epoch, dev_acc, dev_root_acc)) if dev_root_acc > best_dev_acc: best_dev_acc = dev_root_acc best_epoch = epoch th.save(model.state_dict(), 'best_{}.pkl'.format(args.seed)) else: if best_epoch <= epoch - 10: break # lr decay for param_group in optimizer.param_groups: param_group['lr'] = max(1e-5, param_group['lr']*0.99) #10 print(param_group['lr']) # test model.load_state_dict(th.load('best_{}.pkl'.format(args.seed))) accs = [] root_accs = [] model.eval() for step, batch in enumerate(test_loader): g = batch.graph n = g.number_of_nodes() with th.no_grad(): h = th.zeros((n, args.h_size)).to(device) c = th.zeros((n, args.h_size)).to(device) logits = model(batch, h, c) pred = th.argmax(logits, 1) acc = th.sum(th.eq(batch.label, pred)).item() accs.append([acc, len(batch.label)]) root_ids = [i for i in range(batch.graph.number_of_nodes()) if batch.graph.out_degree(i)==0] root_acc = np.sum(batch.label.cpu().data.numpy()[root_ids] == pred.cpu().data.numpy()[root_ids]) root_accs.append([root_acc, len(root_ids)]) test_acc = 1.0*np.sum([x[0] for x in accs])/np.sum([x[1] for x in accs]) test_root_acc = 1.0*np.sum([x[0] for x in root_accs])/np.sum([x[1] for x in root_accs]) print('------------------------------------------------------------------------------------') print("Epoch {:05d} | Test Acc {:.4f} | Root Acc {:.4f}".format( best_epoch, test_acc, test_root_acc))
# constituency parse tree with their nodes labeled with sentiment. To # speed up things, let's build a tiny set with 5 sentences and take a look # at the first one: # import dgl import pysnooper from dgl.data.tree import SST from dgl.data import SSTBatch import sys # Each sample in the dataset is a constituency tree. The leaf nodes # represent words. The word is a int value stored in the "x" field. # The non-leaf nodes has a special word PAD_WORD. The sentiment # label is stored in the "y" feature field. trainset = SST() # the "tiny" set has only 5 trees tiny_sst = trainset.trees num_vocabs = trainset.num_vocabs num_classes = trainset.num_classes vocab = trainset.vocab # vocabulary dict: key -> id inv_vocab = {v: k for k, v in vocab.items()} # inverted vocabulary dict: id -> word a_tree = tiny_sst[0] for token in a_tree.ndata['x'].tolist(): if token != trainset.PAD_WORD: print(inv_vocab[token], end=" ") ############################################################################## # Step 1: batching
def main(): np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) bestAll_epoch = -1 bestRoot_epoch = -1 bestRoot_acc = 0 bestAll_acc = 0 trainset = SST() # default mode='train' vocab = trainset.vocab # inclueding train,dev,test word_to_index = {word: id for word, id in vocab.items()} # inverted vocabulary dict: word -> id train_loader = DataLoader(dataset=trainset, batch_size=batch_size, collate_fn=batcher(), shuffle=True, num_workers=0) testset = SST(mode='test') test_loader = DataLoader(dataset=testset, batch_size=100, collate_fn=batcher(), shuffle=False, num_workers=0) model = RvNN(word_to_index, trainset.num_vocabs, emb_dim, trainset.num_classes, dropout).cuda() print(model) # embedding和其他的参数变量分开,为了设置不同的学习率 params_ex_emb =[x for x in list(model.parameters()) if x.requires_grad and x.size(0)!=trainset.num_vocabs] params_emb = list(model.embedding.parameters()) for p in params_ex_emb: if p.dim() > 1: INIT.xavier_uniform_(p) # 使用均匀分布初始化参数 optimizer = optim.Adagrad([ {'params':params_ex_emb, 'lr':LR, 'weight_decay':L2_reg}, {'params':params_emb, 'lr':emb_LR}]) pt = table(["epoch", "Test Acc", "Root Acc", "Epoch Time"]) t_epoch = time.time() # start time start = t_epoch for epoch in range(Epoch): # epochs model.train() for step, batch in enumerate(train_loader): g = batch.graph # g.set_n_initializer(dgl.init.zero_initializer) n = g.number_of_nodes() logits = model(batch) logp = F.log_softmax(logits, 1) loss = F.nll_loss(logp, batch.label, reduction='sum') optimizer.zero_grad() loss.backward() optimizer.step() end = time.time() # test accs = [] root_accs = [] model.eval() for step, batch in enumerate(test_loader): g = batch.graph g.set_n_initializer(dgl.init.zero_initializer) n = g.number_of_nodes() # 禁止梯度计算 with torch.no_grad(): logits = model(batch) # (n, 5) pred = torch.argmax(logits, 1) # (n, 1) acc = torch.sum(torch.eq(batch.label, pred)).item() accs.append([acc, len(batch.label)]) root_ids = [i for i in range(n) if batch.graph.out_degree(i) == 0] # root_acc = torch.sum(batch.label.cpu().data.numpy()[root_ids] == pred.cpu().data.numpy()[root_ids]) root_acc = torch.sum(batch.label.data[root_ids] == pred.data[root_ids]).item() root_accs.append([root_acc, len(root_ids)]) acc = 1.0 * np.sum([x[0] for x in accs]) / np.sum([x[1] for x in accs]) root_acc = 1.0 * np.sum([x[0] for x in root_accs]) / np.sum([x[1] for x in root_accs]) if acc > bestAll_acc: bestAll_acc = acc bestAll_epoch = epoch if root_acc > bestRoot_acc: bestRoot_acc = root_acc bestRoot_epoch = epoch pt.row([epoch, acc, root_acc, end - start]) start = end # summary, including total time of training print("BestAll_epoch_test: {} BestAll_acc_test: {:.4f}".format(bestAll_epoch, bestAll_acc)) print("BestRoot_epoch_test: {} BestRoot_acc_test: {:.4f}".format(bestRoot_epoch, bestRoot_acc)) print("Total time:", time.time() - t_epoch)
wordid=batch_trees.ndata['x'].to(dev), label=batch_trees.ndata['y'].to(dev)) return batcher_dev if __name__ == '__main__': device = th.device('cpu') x_size = 256 h_size = 256 dropout = 0.5 lr = 0.05 weight_decay = 1e-4 epochs = 10 # 'x' stands for word in form of int, 'y' stands for the labels, trainset = SST(mode='tiny') # 5 tiny_sst = trainset.trees # 5 dglGraph list num_vocabs = trainset.num_vocabs # 19536 num_classes = trainset.num_classes # 5 classes vocab = trainset.vocab # orderedDict([(word: int)]) 19536 print(len(vocab)) inv_vocab = {v: k for k, v in vocab.items()} a_tree = tiny_sst[0] # print(a_tree.ndata['x']) # print(a_tree.ndata['x'].tolist()) res = [] for token in a_tree.ndata['x'].tolist(): if token != trainset.PAD_WORD: # print(inv_vocab[token], end=' ')
def main(args): np.random.seed(args.seed) th.manual_seed(args.seed) th.cuda.manual_seed(args.seed) best_epoch = -1 cuda = args.gpu >= 0 device = th.device('cuda:{}'.format( args.gpu)) if cuda else th.device('cpu') if cuda: th.cuda.set_device(args.gpu) trainset = SST() graphset, train_graphset, node_attrs, G, A, G0, g_wwl, rootid = Datagenerator( ) model = TreeLSTM( trainset.num_vocabs, args.x_size, args.h_size, trainset.num_classes, args.dropout, # cell_type='childsum' if args.child_sum else 'nary', cell_type='childsum', pretrained_emb=trainset.pretrained_emb).to(device) params_ex_emb = [ x for x in list(model.parameters()) if x.requires_grad and x.size(0) != trainset.num_vocabs ] params_emb = list(model.embedding.parameters()) for p in params_ex_emb: if p.dim() > 1: INIT.xavier_uniform_(p) optimizer = optim.Adam([{ 'params': params_ex_emb, 'lr': args.lr, 'weight_decay': args.weight_decay }, { 'params': params_emb, 'lr': 0.1 * args.lr }]) # optimizer = optim.Adam(model.parameters(), lr=0.01) dur = [] #Reorganize the read dataframe into a list label_duration = [] feature_name = [] feature_name_word = [] Roleinstance_name = [] ActivityStart = [] NodeID = [] RootActivityId = [] ParentActivityId = [] ActivityId = [] labelclass = [] Tid = [] for k, v in node_attrs.items(): count = 0 vec = [] for k1, v1 in v.items(): # print("") if len(v) == 2: if count == 0: label_duration.append(v1) if count == 1: doc = nlp(v1) vec = doc.vector feature_name.append(vec.tolist()) feature_name_word.append(v1) vec = vec[0:25].tolist() if count == 2: ActivityStart.append(v1) if count == 3: NodeID.append(v1) if count == 4: RootActivityId.append(v1) if count == 5: ParentActivityId.append(v1) if count == 6: ActivityId.append(v1) if count == 7: Tid.append(v1) count = count + 1 else: if count == 1: label_duration.append(v1) if count == 2: # print("2 v1", v1) doc = nlp(v1) vec1 = doc.vector vec = vec1[0:20].tolist() feature_name_word.append(v1) if count == 3: # print("3 v1",v1) doc = nlp(v1) vec1 = doc.vector vec.extend(vec1[0:5].tolist()) # ActivityStart.append(v1) if count == 4: labelclass.append(int(v1)) if count == 6: ##cluster doc = nlp(v1) vec1 = doc.vector Roleinstance_name.append(v1) vec.extend(vec1[0:5].tolist()) if count == 7: ##cluster doc = nlp(v1) ActivityId.append(v1) if count == 8: labelclass.append(int(v1)) count = count + 1 feature_name.append(vec) feature_name_np = np.array(feature_name) kernel_matrix, node_representations = wwl(g_wwl, node_features=feature_name_np, num_iterations=1) feature_name_np2 = np.column_stack(( node_representations[0][0:feature_name_np.shape[0]], feature_name_np, )) feature_name_np_tensor = th.tensor(feature_name_np2, dtype=th.float32) g = graphset[0] n = g.number_of_nodes() feature_name_np_tensor1 = feature_name_np_tensor label_duration_tensor = th.tensor(label_duration, dtype=th.float32) labelclass = th.tensor(labelclass, dtype=th.float32) """ train part """ label_duration_tensor1 = label_duration_tensor.type(th.FloatTensor) label_duration_tensor1 = label_duration_tensor1.reshape( label_duration_tensor1.shape[0], 1) feature_name_np_tensor_aggragte = np.zeros([feature_name_np.shape[0], 32]) feature_name_np_tensor_aggragte_2np = np.zeros( [feature_name_np.shape[0], 50]) for i in range(feature_name_np.shape[1] - 2): path_all = networkx.shortest_path(G0, source=(i + 1)) pathlist = list(path_all.values())[-1] for k in range(len(pathlist)): feature_name_np_tensor_aggragte[i] = feature_name_np_tensor1[ pathlist[k]] + feature_name_np_tensor_aggragte[i] feature_name_np_tensor_aggragte_2np[i][0:32] = feature_name_np_tensor1[ i] feature_name_np_tensor_aggragte_2np[i][32:50] = ( feature_name_np_tensor_aggragte[i][0:18]) feature_name_np_tensor_aggragte_2 = torch.from_numpy( feature_name_np_tensor_aggragte_2np).type(torch.FloatTensor) import pickle picklefile1 = open("feature_name_np_tensor_aggragte_2np.pkl", "wb") pickle.dump(feature_name_np_tensor_aggragte_2np, picklefile1) picklefile1.close() #################################################################### labelclass_session = labelclass[rootid] # for epoch in range(1000): # t_epoch = time.time() # model.train() # # t0 = time.time() # tik # # h = th.zeros((feature_name_np_tensor1.shape[0], feature_name_np_tensor1.shape[1])) # c = th.zeros((feature_name_np_tensor1.shape[0], feature_name_np_tensor1.shape[1])) # # logits ,classlogits= model(g,G, h, c,feature_name_np_tensor1) # logits, classlogits = model(g, G, h, c, feature_name_np_tensor_aggragte_2,rootid,epoch) # logp=logits.type(th.FloatTensor) # # # labelclass= labelclass_session.type(th.LongTensor) # # logp=logp.reshape(k,1) # labelclass = labelclass.reshape(len(rootid)) # # loss = F.mse_loss(logp, labelclass, size_average=False) # # logp_class=F.log_softmax(classlogits, dim=1) # # logp_class=logp_class.type(th.FloatTensor) # # logp_class = logp_class.reshape([ len(rootid), 2]) # # loss1 = F.nll_loss(logp_class, labelclass) # # labelclass =np.array(labelclass) # labelclass=torch.from_numpy(labelclass).type(torch.LongTensor) # # optimizer.zero_grad() # loss1.backward() # optimizer.step() # dur.append(time.time() - t0) # tok # pred = logp_class.data.max(1, keepdim=True)[1] # acc = pred.eq(labelclass.data.view_as(pred)).cpu().sum().item() / float(labelclass.size()[0]) # # print("Epoch {:05d} | Step {:05d} | Loss {:.4f} | Acc {:.4f} | Root Acc {:.4f} | Time(s) {:.4f}", # epoch, loss1.item(),acc) # file_handle1 = open( # '1029_loss_sumVMOnCreate611_nodenumtrain_bin1.txt', # mode='a') # print(str(epoch), file=file_handle1) # print(str(loss.item()), file=file_handle1) # file_handle1.close() # # th.save(model.state_dict(), 'train.pkl'.format(args.seed)) ############################################################################################### """ test part """ model.load_state_dict(th.load('train.pkl'.format(args.seed))) accs = [] model.eval() # label_duration_tensor_test = label_duration_tensor.type(th.FloatTensor) label_duration_tensor_test = labelclass.type(th.FloatTensor) feature_name_np_tensor_test = feature_name_np_tensor feature_name_word_test = feature_name_word for step in range(500): g = graphset[0] n = g.number_of_nodes() with th.no_grad(): h = th.zeros((n, args.h_size)).to(device) c = th.zeros((n, args.h_size)).to(device) logits, classlogits = model(g, G, h, c, feature_name_np_tensor_aggragte_2, rootid, epoch) # logp_class=classlogits logp_class = F.log_softmax(classlogits, dim=1) file_handle3 = open('logp_class.txt', mode='a') logp_class.numpy() import pickle picklefile = open("logp_class_abnormal_normal.pkl", "wb") pickle.dump(logp_class, picklefile) picklefile.close() print("logp_class", logp_class.numpy().tolist(), file=file_handle3) file_handle3.close() logp_class = logp_class.type(th.FloatTensor) logp = logits.type(th.FloatTensor) # pred = logp_class.data.max(1, keepdim=True)[1] import pandas as pd logpnp = np.array(logp) test_acc = 91 label_duration_tensor_test = th.tensor(label_duration_tensor_test, dtype=th.int) label_duration_tensor_test = label_duration_tensor_test.reshape( len(rootid), 1) """ caculate mape """ loss_test = mape(logp, label_duration_tensor_test) logp = logp.reshape([1, len(rootid)]) label_duration_tensor_test = label_duration_tensor_test.reshape( [1, len(rootid)]) # label_duration_tensor_test = label_duration_tensor_test.reshape([1, 200]) print("label_duration_tensor_test", label_duration_tensor_test.shape) print("logp", logp.shape) # logp1.dtype='float32' # print("logp", logp1.dtype) label_duration_tensor_test1 = np.array(label_duration_tensor_test, dtype=np.int32) # label_duration_tensor_test.dtype='float32' print("label_duration_tensor_test", label_duration_tensor_test.dtype) label_duration_tensor_test1 = label_duration_tensor_test1.tolist()[0] print("label_duration_tensor_test1", len(label_duration_tensor_test1)) print("label_duration_tensor_test1", label_duration_tensor_test1) distribution = torch.argmax(logp_class, dim=1) print("distribution", distribution) # logp1= distribution.reshape([4, 261]) logp1 = np.array(distribution, dtype=np.int32) selector = SelectKBest(chi2, k=2) input = [] for i in range(len(feature_name_np_tensor_aggragte_2.numpy().tolist())): input.append( list( map(abs, feature_name_np_tensor_aggragte_2.numpy().tolist()[i]))) X = feature_name_np_tensor_aggragte_2np # print("X_new.scores_", selector.transform(X)) logp1 = logp1.tolist() listlog = distribution.numpy().tolist() label_duration_tensor_test1_np = np.array(label_duration_tensor_test1) Abnormlist_np = np.where((distribution == 2) | (distribution == 1), ) # K = cos(logp_class, logp_class.t()) K = getdistances(logp_class) for i in range(Abnormlist_np[0].shape[0]): causeroot = [] similarity = [] if i != 0: path = networkx.shortest_path(G0, source=Abnormlist_np[0][i]) print("path", path) list(path.values()) list_path = list(path.values()) print("list_path", list_path) rootcausedep = [] for iii in range(len(list_path)): for jjj in range(len(list_path[iii])): if list_path[iii][jjj] not in rootcausedep and ( list_path[iii][jjj] != Abnormlist_np[0][i]): rootcausedep.append(list_path[iii][jjj]) # similarity.append(K[Abnormlist_np[0][i]][list_path[iii][jjj]]) print("rootcausedep", rootcausedep) # similarity for j in range(len(rootcausedep)): KJ = 0 for jk in range(len(rootcausedep)): if jk is not j: KJ = K[rootcausedep[j]][rootcausedep[jk]] + KJ KJ = KJ + K[rootcausedep[j]][Abnormlist_np[0][i]] if KJ is not 0: similarity.append(KJ) print("similarity", similarity) if len(similarity) > 0: max_index = similarity.index(max(similarity, key=abs)) print("rootcausedep", rootcausedep, rootcausedep[max_index]) print("test 0", sum(distribution == 0)) print("test 1", sum(distribution == 1)) print("test 2", sum(distribution == 2)) print("test 3", sum(distribution == 3)) print("label 0", label_duration_tensor_test1.count(0)) print("label 1", label_duration_tensor_test1.count(1)) print("label 2", label_duration_tensor_test1.count(2)) print("label 3", label_duration_tensor_test1.count(3)) # logp1 print("logp1", len(logp1)) print("label_duration_tensor_test1", len(label_duration_tensor_test1)) f1score = sk.metrics.f1_score(logp1, label_duration_tensor_test1, average='micro') print("f1score", f1score) print("Epoch {:05d} | Test Acc {:.4f} | MAPE Loss {:.4f},f1score", best_epoch, test_acc, loss_test, f1score) # loss_test = mape(logp, label_duration_tensor_test[0:522]) abs_duration = abs(label_duration_tensor_test - logp) # abs_duration = abs(label_duration_tensor_test[0:522] - logp) abs_duration = abs_duration id = th.where(abs_duration > 0.05) id1 = th.where(abs_duration > 0.1) id11 = th.where(abs_duration >= 1) id4 = th.where(abs_duration > 0.4) id44 = np.array(id[0]) id44list = id44.tolist() feature_name_wordkk = [] ActivityStartkk = [] ActivityIdkk = [] label_durationkk = [] logpkk = [] abs_duration = (abs_duration).numpy() idk = heapq.nlargest(3000, range(len(abs_duration)), abs_duration.__getitem__) idklist = idk id44list = idklist logpk = [] print("len(idklist)", len(idklist)) print("len(feature_name_word_test)", len(feature_name_word_test)) for i in range(len(id44list)): print("i", i) feature_name_wordkk.append(feature_name_word_test[id44list[i]]) label_durationkk.append(label_duration[id44list[i]]) logpkk.append(abs_duration[id44list[i]]) logpk.append(logp[id44list[i]]) print("id0.05", id) print("id0.05", len(id[0])) print("id0.1", id1) print("id0.1", len(id1[0])) print("id0.01", id11) print("id0.01", len(id11[0])) print("id0.01", len(id11[0]) / 100) print("AnomalyID>0.01", len(id44list)) """ save result txt """ file_handle2 = open('1029sum_fristVMOnCreate611_nodenum_bin1.txt', mode='a') from collections import Counter import operator # 进行统计 a = dict(Counter(feature_name_wordkk)) # 给得出的word进行排序 b = sorted(a.items(), key=operator.itemgetter(1), reverse=True) for i in range(len(id44list)): print("index", str(i), file=file_handle2) print("indexcsv", str(id44list[i]), file=file_handle2) print("activity name", str(feature_name_wordkk[i]), file=file_handle2) # print("ActivityId",str(ActivityIdkk[i]), file=file_handle2) print("label duration", str(label_durationkk[i]), file=file_handle2) print("abs_duration", logpkk[i], file=file_handle2) print("predict duration", logpk[i], file=file_handle2) file_handle2.close() file_handle3 = open('0127sumaccVMOnCreate_nodenum_bin1.txt', mode='a') print("ActivityId", str(b), file=file_handle3) file_handle3.close() print( '------------------------------------------------------------------------------------' ) print("Epoch {:05d} | Test Acc {:.4f} | MAPE Loss {:.4f},f1score", best_epoch, test_acc, loss_test, f1score) file_handle4 = open('0127mean_mapeVMOnCreate611_nodenum_bin1.txt', mode='a') print("mape", file=file_handle4) print(str(loss_test), file=file_handle4) file_handle4.close() file_handle1 = open('0127_loss_sumVMOnCreate611_nodenumtest.txt', mode='a') # print(str(epoch), file=file_handle1) print(str(test_acc), file=file_handle1) # print(str(loss.item()), file=file_handle1) file_handle1.close() # print(str(), file=file_handle1) print("node_representations", node_representations) print("rootid", rootid) label_session = [] for i in range(len(rootid)): label_session.append(label_duration_tensor_test1[i]) print("sessionlabel", label_session)