def process_test_url(url,output_dest): # i think this takes a single url to extract feature, this is used in gui.py file only feature=[] url=url.strip() if url!='': print ('working on: '+url) #showoff ret_dict=urlfeature.feature_extract(url) feature.append([url,ret_dict]); resultwriter(feature,output_dest)
def process_test_url(url, output_dest): feature = [] url = url.strip() if url != '': print 'working on: ' + url #showoff ret_dict = urlfeature.feature_extract(url) feature.append([url, ret_dict]) resultwriter(feature, output_dest)
def process_test_list(file_dest, output_dest): feature = [] with open(file_dest) as file: for line in file: url = line.strip() if url != '': print 'working on: ' + url #showoff ret_dict = urlfeature.feature_extract(url) feature.append([url, ret_dict]) resultwriter(feature, output_dest)
def feature_extraction_live(w_emg, w_imu, feature_set=Constant.rehman): """ Extracts the features of a given feature-set from the given raw data :param w_emg:matrix The matrix of the windowed raw data for the EMG signals :param w_imu:matrix The matrix of the windowed raw data for the IMU signals :param feature_set:string From this feature-set the features are extracted for extraction :return:matrix Matrix of extracted features. By default the features for EMG and IMU data will be combined """ feature_emg, feature_imu = [], [] for x in w_emg: feature = [] for n in range(8): if feature_set == Constant.georgi: feature.extend( Feature_extraction.georgi([y[n] for y in x], sensor=Constant.EMG)) elif feature_set == Constant.rehman: feature.extend(Feature_extraction.rehman([y[n] for y in x])) elif feature_set == Constant.robinson: feature.extend(Feature_extraction.robinson([y[n] for y in x])) elif feature_set == Constant.mantena: feature.extend(Feature_extraction.mantena([y[n] for y in x])) else: print("Could not match given feature set") feature_emg.append(feature) for x in w_imu: feature = [] for n in range(9): if feature_set == Constant.georgi: feature.extend( Feature_extraction.georgi([y[n] for y in x], sensor=Constant.IMU)) elif feature_set == Constant.rehman: feature.extend(Feature_extraction.rehman([y[n] for y in x])) elif feature_set == Constant.robinson: feature.extend(Feature_extraction.robinson([y[n] for y in x])) elif feature_set == Constant.mantena: feature.extend(Feature_extraction.mantena([y[n] for y in x])) else: print("Could not match given feature set") feature_imu.append(feature) features = [] for i in range(len(feature_imu)): f = [] for x in np.asarray([feature_emg[i], feature_imu[i]]).flatten('F'): f.extend(x) features.append(f) return features
def process_test_list(file_dest,output_dest): # i think this takes whole file of urls without given malicious to extract their feature and doest not provide malicious column like this will take query.txt feature=[] with open(file_dest) as file: for line in file: url=line.strip() if url!='': print ('working on: '+url) #showoff ret_dict=urlfeature.feature_extract(url) feature.append([url,ret_dict]); resultwriter(feature,output_dest)
def process_URL_list(file_dest,output_dest):# i think this takes whole file of urls with given malicious to extract their feature and provide malicious column also like this will take url.txt feature=[] with open(file_dest) as file: for line in file: url=line.split(',')[0].strip() malicious_bool=line.split(',')[1].strip() if url!='': print ('working on: '+url) #showoff ret_dict=urlfeature.feature_extract(url) ret_dict['malicious']=malicious_bool feature.append([url,ret_dict]); resultwriter(feature,output_dest)
def process_URL_list(file_dest, output_dest): feature = [] with open(file_dest) as file: for line in file: url = line.split(',')[0].strip() malicious_bool = line.split(',')[1].strip() if url != '': print('working on: ', url) #showoff ret_dict = urlfeature.feature_extract(url) ret_dict['malicious'] = malicious_bool feature.append([url, ret_dict]) resultwriter(feature, output_dest)
def submit(): if (request.method == 'POST'): features = [] data = request.get_json() print(data) sms = data['msg'] #Check for URL in msg url = '' for w in sms.split(' '): if (w.startswith('https:') or w.startswith('http:') or w.startswith('www')): url = w if url != '': url = str(url) features.append(urlfeature.feature_extract(url)) df = pd.DataFrame(features) ans_np = list(rf_model.predict(df[train_cols])) # print 'URL is: ',url # print '\n ANS is: ',ans[0] ans = int(ans_np[0]) if (ans == 1): features.append({ 'ans': ans, 'flash': 'Site entered is SUSPICIOUS', 'url': url }) elif (ans == 0): features.append({ 'ans': ans, 'flash': 'Site entered is SAFE', 'url': url }) else: features.append({ 'ans': ans, 'flash': 'Site entered is MALICIOUS', 'url': url }) return {'features': features} else: error = 'Please enter a valid URL' return {'error': error}
def process_URL_list(file_dest, output_dest): feature = [] with open(file_dest) as file: for line in file: ##对每一行内容进行划分为链接和标签 content_list = line.split(",") ##url链接 url = content_list[0].strip() ##标签 malicious_bool = content_list[1].strip() if url: print 'working on: ' + url ##url特征提取 ret_dict = urlfeature.feature_extract(url) ###输出恶意代码类型 ret_dict['malicious'] = malicious_bool feature.append([url, ret_dict]) file.close() resultwriter(feature, output_dest)
### Feature and embedding extration wavlist = [] for line in lines: wavlist.append(line.rstrip().split()[0]) embeddings = {} for filename in wavlist: start_time = time.time() base, _ = os.path.splitext(os.path.basename(filename)) print(base) # nn_info = nobj.netinfo(base) feat, utt_label, utt_shape, tffilename = fe.feat_extract([filename], FEAT_TYPE, N_FFT, HOP, VAD, CMVN, EXCLUDE_SHORT) # print('features:', type(feat), len(feat[0])) embeddings[base] = emnet_validation.ac2.eval({x: feat, s: utt_shape}) print((embeddings[base]).shape) # if args.outputlayer: # outputlayer.append(emnet_validation.o1.eval({x:feat, s:utt_shape})) # elapsed_time = time.time() - start_time print( format(elapsed_time) + ' seconds elapsed for ' + filename.split('/')[-1]) # embeddings = np.array(embeddings) # np.save(args.wavlist.split('/')[-1].split('.')[0]+'_embeddings',embeddings)
def process_raw_data(user, overlap, window, data_set, sensor, feature, pre, save_path_for_featureset="./", load_path=Constant.collections_default_path): """ Load raw data for user, window the data, pre process the data, extract features from data, save extracted features to file :param user: string The user from which the features should be extracted :param overlap: float The size of overlap :param window: int The window size :param data_set: string The data set from user study: separate, continues,separatecontinues :param sensor: string The sensor data: EMG,IMU,EMGIMU :param feature: string The set of features to extract :param pre: string The pre processing setting: filter, z-normalization, no pre processing :param save_path_for_featureset: string, default Constant.collection_path_default('./Collections/') Describes the save path for features. If empty the default path "./" will be used :param load_path: string Path to the Collection folder which contains the raw data """ features = [] save_path_for_featureset += user + "/features" try: load_path += user directories, path_add = [], [] if Constant.SEPARATE in data_set: directories.append(os.listdir(load_path + Constant.SEPARATE_PATH)) path_add.append(Constant.SEPARATE_PATH) if Constant.CONTINUES in data_set: directories.append(os.listdir(load_path + Constant.CONTINUES_PATH)) path_add.append(Constant.CONTINUES_PATH) for i in range(len(directories)): # go through all directories tmp_features = [] for steps in directories[i]: # go through all Steps features = [] raw_emg, raw_imu = Save_Load.load_raw_data_for_both_sensors( emg_path=load_path + path_add[i] + "/" + steps + "/emg.csv", imu_path=load_path + path_add[i] + "/" + steps + "/imu.csv") w_emg, w_imu = window_data_for_both_sensor( raw_emg, raw_imu, window=window, degree_of_overlap=overlap, skip_timestamp=1) # Preprocess each window if pre == Constant.filter_ and Constant.EMG in sensor: w_emg = filter_emg_data(emg=w_emg, filter_type=feature) elif pre == Constant.z_norm: w_emg = z_norm(w_emg) w_imu = z_norm(w_imu) if Constant.EMG + Constant.IMU in sensor: features.append( fe.feature_extraction(w_emg, mode=feature, sensor=Constant.EMG)) features.append( fe.feature_extraction(w_imu, mode=feature, sensor=Constant.IMU)) tmp = [] for j in range(len(features[0])): merged_feature = features[0][j]['fs'] + features[1][j][ 'fs'] if features[0][j]['label'] == features[1][j]['label']: tmp.append({ "fs": merged_feature, "label": features[1][j]['label'] }) else: print("ERROR! Should not happen!") tmp_features.append(tmp) continue if Constant.EMG in sensor: tmp_features.append( fe.feature_extraction(w_emg, mode=feature, sensor=Constant.EMG)) if Constant.IMU in sensor: tmp_features.append( fe.feature_extraction(w_imu, mode=feature, sensor=Constant.IMU)) features = tmp_features if pre: save_path_for_featureset = save_path_for_featureset + "_filter" if not os.path.isdir(save_path_for_featureset): os.mkdir(save_path_for_featureset) filename = user + "-" + pre + "-" + data_set + "-" + sensor + "-" + str( window) + "-" + str(overlap) + "-" + feature Save_Load.save_features( features, save_path_for_featureset + "/" + filename + ".csv") print(filename + " done") return True except: print("ERROR!", user, data_set, sensor, ValueError) raise
X_train = np.append(X_train, X_neutral, axis=0) Y_train = np.append(Y_train, Y_neutral) X_test = X_cont Y_test = Y_cont return X_train, X_test, Y_train, Y_test if __name__ == '__main__': n_classes = 4 p = 1 Colors = 'rgbym' plt.figure() #X_neutral is Neutral Data print "\nGetting Neutral Data....\n" X_neutral, Y_neutral = fe.get_data(n_classes, neutral=True) print "--------------------------------------------------------" print X_neutral.shape #X_Happy is Emotional Data = 'Happy' print "\nGetting Emotional Data(Happy)....\n" X_Happy, Y_Happy = fe.get_data(n_classes, 'Happy', cont=False) print "--------------------------------------------------------" print X_Happy.shape #X_Sad is Emotional Data = 'Sad' print "\nGetting Emotional Data(Sad)....\n" X_Sad, Y_Sad = fe.get_data(n_classes, 'Sad', cont=False) print "--------------------------------------------------------" print X_Sad.shape
target.append(3) target = np.array(target) clf = GradientBoostingClassifier(n_estimators=200) cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=None) predict = list() for trainIndex, testIndex in cv.split(trainOther, target): trainSet = trainOther[trainIndex] trainTarget = target[trainIndex] clf.fit(trainSet, trainTarget) predict.append(clf.predict(test)[0]) return predict if __name__ == '__main__': FileName = sys.argv[1] test = np.array([Feature_extraction.get138Fea(FileName)]) judge1 = sum(classifier1(test)) if judge1 == 0: judge2 = sum(classifier2(test)) if judge2 == 0: print("ATP binding function") elif judge2 == 10: judge3 = sum(classifier3(test)) if judge3 == 0: print("heme binding function") elif judge3 == 10: print("zinc ion binding function") elif judge3 == 20: print("GTP binding function") elif judge3 == 30: print("ADP binding function")
def extract_features(doc): return [fe.word2features(doc, i) for i in range(len(doc))]
def token_title(self): self.logger.setPlainText('') title_dict = fe.find_title_keyword(self.title_text.toPlainText()) print(title_dict) self.logger.appendPlainText(str(title_dict))
def find_keyword(self): self.logger.setPlainText('') keyword_dict = fe.find_content_keyword(self.Content_text.toPlainText()) print(keyword_dict) self.logger.appendPlainText(str(keyword_dict))