class DataProc(object): def __init__(self, data_path): # 源代码生成的txt文件的临时存放区域 self._data_path = data_path self._extractor = Extractor() self._interpreter = Interpreter(self._data_path) self._path_vec = [] # 生成txt文件 def get_feature_path(self, file_path, file_name): out, _ = self._extractor.extract(os.path.join(file_path, file_name)) out_path = os.path.join(self._data_path, file_name.replace('java', 'txt')) f = open(out_path, 'w') f.write(out.decode()) f.close() # 从txt文件解析得到path def get_interpr_path(self): self._interpreter.file_iterator() self._path_vec = self._interpreter.ret_vec @property def path_vec(self): return self._path_vec
self._ret_vec = [] # 用于存储hash code self._hash_code = [] # dict,用于存储树的索引 self._hash_tree = {} def __call__(self, out, err): if isinstance(out, bytes): out = out.decode() if isinstance(err, bytes): err = err.decode() assert isinstance(out, str) assert isinstance(err, str) self.__init__() if len(out) == 0: raise RuntimeError("Source file error:" + err) else: self.data_handler(out) result_con = str() result_con += self._method_name.replace("\n", " ") for s in self._ret_vec: result_con += s result_con += ' ' return result_con if __name__ == "__main__": out, err = Extractor.extract("/Users/LeonWong/Desktop/Test.java") interpreter = SingleInterpreter() print(interpreter(out.decode(), err.decode()))
def extractFeatures(data): extractor = Extractor(data) data = extractor.extractFeatures()
def __init__(self, data_path): # 源代码生成的txt文件的临时存放区域 self._data_path = data_path self._extractor = Extractor() self._interpreter = Interpreter(self._data_path) self._path_vec = []
import cv2 import numpy as np import Config from tqdm import tqdm from PIL import Image from sklearn.svm import LinearSVC from sklearn.neural_network import MLPClassifier from skimage.transform import pyramid_gaussian from sklearn.externals import joblib from FeatureExtractor import Extractor ## Initialize the Extractor conf = Config.Config() extractor = Extractor(conf) class Classifier(object): def __init__(self, config): self.config = config def load_data(self): ''' This function is used to load the training data, positive data and negtive data.The features of images are stored in self.fds, the labels are stored in self.labels. :return: None ''' self.fds = [] self.labels = []