def __init__(self): self.images = [] for format in self.formats: for fl in glob.glob(f'{self.DIR}\\*.{format}'): self.images.append(fl) self.images.sort() self.workonid = self.images[0] self.objDict = {} for fl in self.images: self.objDict[fl] = [] if self.old: data = CONFIG.readOld(self.old) for el in data: bitz = CONFIG.splitbyfour(el[0][2:]) for bit in bitz: II = [] for I in bit: II.append(int(I)) print(II) self.objDict[el[0][0]].append(II) cv2.imshow('tool_1', self.grabImg()) cv2.setMouseCallback('tool_1', self.onmouse)
def test(): config = CONFIG() print('加载word2id===========================') word2id = load_word2id(config.word2id_path) config.vocab_size = len(word2id) print('加载test语料库=========================') x, y = load_corpus(config.test_path, word2id, max_sen_len=config.max_sen_len) # x, y = x[:10], y[:10] model = TextCNN(config) with tf.Session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(config.save_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) yhat = model.predict(sess, x) cat, cat2id = cat_to_id() y_cls = np.argmax(y, 1) # 评估 print("Precision, Recall and F1-Score...") print(metrics.classification_report(y_cls, yhat, target_names=cat)) # 混淆矩阵 print("Confusion Matrix...") cm = metrics.confusion_matrix(y_cls, yhat) print(cm)
def train(): config = CONFIG() print('加载word2id===========================') word2id = load_word2id(config.word2id_file) config.vocab_size = len(word2id) print('加载word2vec==========================') word2vec = load_corpus_word2vec(config.corpus_w2v_file) print('加载train语料库========================') train = load_corpus(config.train_file, word2id, max_sen_len=config.max_sen_len) x_tr = train[:-1] y_tr = train[-1] print('加载test语料库==========================') test = load_corpus(config.test_file, word2id, max_sen_len=config.max_sen_len) x_te = test[:-1] y_te = test[-1] print('训练模型===============================') lstm = LSTM(CONFIG, embeddings=word2vec) with tf.Session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) lstm.fit(sess, x_tr, y_tr, x_te, y_te, config.save_dir, config.print_per_batch)
def rm(self, tup): x, y = tup for obj in self.objDict[self.workonid]: print(CONFIG.isInside(x, y, obj)) if CONFIG.isInside(x, y, obj): print(self.objDict[self.workonid].index(obj)) self.objDict[self.workonid].pop( self.objDict[self.workonid].index(obj))
def __init__(self): super(Attacker, self).__init__() # 调用父类初始化 self.attack = False self.conv1 = GCNConv(CONFIG.FeatureLen(), 180) self.conv2 = GCNConv(180, 120) self.lin1 = torch.nn.Linear(120, CONFIG.ClassNum()) # 这次反而需要调整的参数是输入 self.X = torch.tensor([1], dtype=torch.float) self.edges = torch.tensor([[], []], dtype=torch.long)
def predict(x, label=False, prob=False): """ :param x: 语句列表 :param label: 是否以分类标签的形式:pos或neg输出。默认为:0/1 :param prob: 是否以概率的形式输出。 :return: 情感预测结果 """ if label and prob: raise Exception("label和prob两个参数不能同时为True!") x = sent_to_id(x) config = CONFIG() model = TextCNN(config) with tf.Session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(config.save_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) y = model.predict(sess, x, prob=prob) if label: cat, _ = cat_to_id() y = [cat[w] for w in y.tolist()] return y
def Cluster(): # 得到需要聚类的样本的Features features = OpData.GetFeatures() features = features[CONFIG.TargetBegin():CONFIG.TargetEnd()] # print(features.shape) # 构造一个聚类分类器 cluster = KMeans(n_clusters=CONFIG.ClassNum()) # cluster.fit(features) result = cluster.fit_predict(features) # 将聚类结果输出 np.save("cluster_result.npy", result) np.savetxt("cluster_result.csv", result, delimiter=',', fmt="%d") class_num = np.zeros((CONFIG.ClassNum())) for r in result: class_num[r] += 1 np.savetxt("class_num.csv", class_num, delimiter=',', fmt="%d")
def init_add_edges() -> torch.tensor: """ 这是一个最基础的初始化方式,1节点连接1~100测试节点,2连接201~400测试节点,直到50000为止 :return: 用这个函数来初始化加入的邻接矩阵 """ x = [] y = [] data = [] # 前面三个初始化,用于记录起始点x,终点y和权重data for i in range(0, 500): for j in range(0, 100): src = CONFIG.TargetEnd() + i dst = CONFIG.TargetBegin() + i * 100 + j assert (src >= CONFIG.TargetEnd()) and (src < CONFIG.TargetEnd() + 500) assert (dst >= CONFIG.TargetBegin()) and (dst < CONFIG.TargetEnd()) x.append(src) y.append(dst) x.append(dst) y.append(src) data.append(1) data.append(1) assert len(x) == len(y) == len(data) # 将其化为tensor edges = [] edges.append(x) edges.append(y) return torch.tensor(edges, dtype=torch.long).to(device)
def train(): config = CONFIG() print('加载word2id===========================') word2id = load_word2id(config.word2id_path) print('加载word2vec==========================') word2vec = load_corpus_word2vec(config.corpus_word2vec_path) print('加载train语料库========================') x_tr, y_tr = load_corpus(config.train_path, word2id, max_sen_len=config.max_sen_len) print('加载dev语料库==========================') x_val, y_val = load_corpus(config.dev_path, word2id, max_sen_len=config.max_sen_len) print('训练模型===============================') tc = TextCNN(CONFIG, embeddings=word2vec) with tf.Session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) tc.fit(sess, x_tr, y_tr, x_val, y_val, config.save_dir, config.print_per_batch)
def sent_to_id(inputs): """ 将语句进行分词,然后将词语转换为word_to_id中的id编码 """ sentences = [] cut_sents = [jb.cut(w) for w in inputs] config = CONFIG() word2id = load_word2id(config.word2id_path) for cut_sent in cut_sents: sentence = [word2id.get(w, 0) for w in cut_sent] sentence = sentence[:config.max_sen_len] if len(sentence) < config.max_sen_len: sentence += [word2id['_PAD_'] ] * (config.max_sen_len - len(sentence)) sentences.append(sentence) return np.asarray(sentences)
def cut_cluster_by_100() -> list: """ :return: 返回一个500*100的list标记edges的构造 """ cut = [[]] class_divide = [] for i in range(0, 18): class_divide.append([]) cluster = np.load("cluster_result.npy") # print(cluster) cluster = cluster.tolist() # print(cluster) for c in range(0, len(cluster)): class_divide[cluster[c]].append(c) # print(class_divide) remain_data = [] # 用来记录遗留下的数据,用于二次划分 cut_i = 0 for i in range(0, CONFIG.ClassNum()): # 用于遍历cluster for j in range(0, len(class_divide[i])): if j < int(len(class_divide[i]) / 100) * 100: cut[cut_i].append(class_divide[i][j]) if len(cut[cut_i]) is 100: cut_i += 1 cut.append([]) else: # print(j) remain_data.append(class_divide[i][j]) for r in remain_data: cut[cut_i].append(r) # print(len(cut)) if (len(cut[cut_i]) is 100) and (len(cut) != 500): cut_i += 1 cut.append([]) # print(cut) # for cut_ in cut: # print(len(cut_)) return cut
def add_edges_by_cluster(): cut = ClusteringTarget.cut_cluster_by_100() x = [] y = [] coo_x = [] coo_y = [] data = [] for i in range(0, 500): for j in range(0, 100): src = CONFIG.TargetEnd() + i dst = CONFIG.TargetBegin() + cut[i][j] assert (src >= CONFIG.TargetEnd()) and (src < CONFIG.TargetEnd() + 500) assert (dst >= CONFIG.TargetBegin()) and (dst < CONFIG.TargetEnd()) x.append(src) y.append(dst) x.append(dst) y.append(src) coo_x.append(src - CONFIG.TargetEnd()) coo_y.append(dst) data.append(1) # 保存邻接矩阵形式: adj = scipy.sparse.coo_matrix((data, (coo_x, coo_y)), shape=(500, 593986)) adj = adj.tocsr() with open("adj.pkl", 'wb') as f: # 将数据写入pkl文件 pickle.dump(adj, f) # 将其化为tensor edges = [] edges.append(x) edges.append(y) return torch.tensor(edges, dtype=torch.long).to(device)
def GetFeatures() -> np.matrix: features_ndarray = OpFile.ReadFeatures() features_matrix = features_ndarray.reshape( (len(features_ndarray), CONFIG.FeatureLen())) return features_matrix
def __init__(self): super(GCNClassifier, self).__init__() # 调用父类初始化 self.conv1 = GCNConv(CONFIG.FeatureLen(), 180) self.conv2 = GCNConv(180, 120) # self.conv3 = GCNConv(100, CONFIG.ClassNum()) self.lin1 = torch.nn.Linear(120, CONFIG.ClassNum())
def __init__(self, drop=0.5, conv1_hide=100, leaky=0.01): super(RPGGCN, self).__init__() # 调用父类 self.drop = drop # 在dropout操作中,元素被置为0的概率 self.leaky = leaky self.conv1 = GCNConv(CONFIG.FeatureLen(), conv1_hide) self.conv2 = GCNConv(conv1_hide, CONFIG.ClassNum())
attack_model = Attacker().to(device) attack_model.load_state_dict(gcn_model.state_dict()) # attack_dict = attack_model.state_dict() # state_dict = {k: v for k, v in gcn_model.items() if k in attack_model.keys()} # print(state_dict.keys()) # attack_dict.update(state_dict) # attack_model.load_state_dict(attack_dict) print(">>>>> Load Model Finish") # 冻结Attack模型中的参数 attack_model.freeze() print(">>>>> Freeze all parameters in Attacker") # 这里得到所有的target的label _, pred = attack_model(data).max(dim=1) target_labels = pred[CONFIG.TargetBegin():CONFIG.TargetEnd()] print(target_labels) # 将需要添加的features载入模型 add_features = torch.zeros((500, 100), requires_grad=False).to(device) attack_model.loadX(add_features) # 将需要添加的邻接矩阵edges载入模型 add_edges = init_add_edges() # add_edges = add_edges_by_cluster() # add_edges = add_edges_by_class(target_labels) attack_model.loadEdges(add_edges) # 下面是attack的过程 while True: attack_model.zero_grad() attack_model.loadX(add_features)
class Image(): DIR = CONFIG.fetch(r"dir")[0] formats = CONFIG.fetch(r'formats') chunks = [] lowerBorder = int(CONFIG.fetch('Lborder')[0]) OutDir = CONFIG.fetch(r"outdir")[0] prefix = CONFIG.fetch(r"prefix")[0] try: old = CONFIG.fetch('old_data')[0] except: old = False def __init__(self): self.images = [] for format in self.formats: for fl in glob.glob(f'{self.DIR}\\*.{format}'): self.images.append(fl) self.images.sort() self.workonid = self.images[0] self.objDict = {} for fl in self.images: self.objDict[fl] = [] if self.old: data = CONFIG.readOld(self.old) for el in data: bitz = CONFIG.splitbyfour(el[0][2:]) for bit in bitz: II = [] for I in bit: II.append(int(I)) print(II) self.objDict[el[0][0]].append(II) cv2.imshow('tool_1', self.grabImg()) cv2.setMouseCallback('tool_1', self.onmouse) def grabImg(self): canvas = cv2.imread(self.workonid) for obj in self.objDict[self.workonid]: x, y, w, h = obj if w < self.lowerBorder or h < self.lowerBorder: cv2.rectangle(canvas, (x, y), (x + w, y + h), (0, 0, 255), 4) else: cv2.rectangle(canvas, (x, y), (x + w, y + h), (0, 255, 0), 4) return canvas.copy() def nextImage(self): cur = self.images.index(self.workonid) if cur + 1 > len(self.images) - 1: self.workonid = self.images[0] else: self.workonid = self.images[cur + 1] def prevImage(self): print('called') cur = self.images.index(self.workonid) if cur - 1 < 0: self.workonid = self.images[len(self.images) - 1] else: self.workonid = self.images[cur - 1] print(self.workonid) def grabRoi(self): roi = cv2.selectROI('tool_1', self.grabImg(), True) cv2.setMouseCallback('tool_1', self.onmouse) x, y, w, h = roi if h * w != 0: self.objDict[self.workonid].append([x, y, w, h]) def onmouse(self, k, x, y, s, p): if k == cv2.EVENT_LBUTTONDBLCLK: print(x, y) self.rm((x, y)) def rm(self, tup): x, y = tup for obj in self.objDict[self.workonid]: print(CONFIG.isInside(x, y, obj)) if CONFIG.isInside(x, y, obj): print(self.objDict[self.workonid].index(obj)) self.objDict[self.workonid].pop( self.objDict[self.workonid].index(obj)) def writeUp(self): with open( f'{self.OutDir}\\{self.prefix}{CONFIG.getfreename(self.OutDir,self.prefix)}.txt', 'a') as fl: for k, v in self.objDict.items(): print(len(v)) if len(v) > 0: line = np.concatenate(v.copy()) print(f'{k} {int(len(line)/4)} {CONFIG.prettyfy(line)}', file=fl) def run(self): while 1: try: cv2.imshow('tool_1', self.grabImg()) key = cv2.waitKey(1) if key == 27: self.writeUp() break if key == ord('q'): self.nextImage() if key == ord('e'): self.prevImage() if key == ord('w'): self.grabRoi() if key == ord('s'): self.writeUp() except Exception as e: with open(f'{self.OutDir}\\logz.txt', 'a') as logz: print(f'exception boiz, its-a {e} \n') quit()
def add_edges_by_class(labels: torch.tensor) -> torch.tensor: labels_cpu = labels.cpu() labels_list = labels_cpu.numpy().tolist() cut = [[]] # class_divide用于将索引划入不同的类别 class_divide = [] for i in range(0, CONFIG.ClassNum()): class_divide.append([]) for i in range(0, len(labels_list)): class_divide[labels_list[i]].append(i) remain_data = [] cut_i = 0 for i in range(0, CONFIG.ClassNum()): # 用于遍历cluster for j in range(0, len(class_divide[i])): if j < int(len(class_divide[i]) / 100) * 100: cut[cut_i].append(class_divide[i][j]) if len(cut[cut_i]) is 100: cut_i += 1 cut.append([]) else: # print(j) remain_data.append(class_divide[i][j]) for r in remain_data: cut[cut_i].append(r) # print(len(cut)) if (len(cut[cut_i]) is 100) and (len(cut) != 500): cut_i += 1 cut.append([]) # 此时,cut里面就是分割好的对照关系 x = [] y = [] coo_x = [] coo_y = [] data = [] for i in range(0, 500): for j in range(0, 100): src = CONFIG.TargetEnd() + i dst = CONFIG.TargetBegin() + cut[i][j] assert (src >= CONFIG.TargetEnd()) and (src < CONFIG.TargetEnd() + 500) assert (dst >= CONFIG.TargetBegin()) and (dst < CONFIG.TargetEnd()) x.append(src) y.append(dst) x.append(dst) y.append(src) coo_x.append(src - CONFIG.TargetEnd()) coo_y.append(dst) data.append(1) # for i in range(CONFIG.TargetBegin(), CONFIG.TargetEnd()): # if i in coo_y: # continue # else: # print("ERROR!!!", i) # print("OK!") # 保存邻接矩阵形式: adj = scipy.sparse.coo_matrix((data, (coo_x, coo_y)), shape=(500, 593986)) adj = adj.tocsr() # print(adj) with open("adj.pkl", 'wb') as f: # 将数据写入pkl文件 pickle.dump(adj, f) # 将其化为tensor edges = [] edges.append(x) edges.append(y) # print(edges) return torch.tensor(edges, dtype=torch.long).to(device)