def process_test_url(url,output_dest): # i think this takes  a single url to extract feature, this is used in gui.py file only
    feature=[]
    url=url.strip()
    if url!='':
        print ('working on: '+url)           #showoff 
        ret_dict=urlfeature.feature_extract(url)
        feature.append([url,ret_dict]);
    resultwriter(feature,output_dest)
Exemple #2
0
def process_test_url(url, output_dest):
    feature = []
    url = url.strip()
    if url != '':
        print 'working on: ' + url  #showoff
        ret_dict = urlfeature.feature_extract(url)
        feature.append([url, ret_dict])
    resultwriter(feature, output_dest)
Exemple #3
0
def process_test_list(file_dest, output_dest):
    feature = []
    with open(file_dest) as file:
        for line in file:
            url = line.strip()
            if url != '':
                print 'working on: ' + url  #showoff
                ret_dict = urlfeature.feature_extract(url)
                feature.append([url, ret_dict])
    resultwriter(feature, output_dest)
Exemple #4
0
def feature_extraction_live(w_emg, w_imu, feature_set=Constant.rehman):
    """
    Extracts the features of a given feature-set from the given raw data
    :param w_emg:matrix
            The matrix of the windowed raw data for the EMG signals
    :param w_imu:matrix
            The matrix of the windowed raw data for the IMU signals
    :param feature_set:string
            From this feature-set the features are extracted for extraction
    :return:matrix
            Matrix of extracted features. By default the features for EMG and IMU data will be combined
    """
    feature_emg, feature_imu = [], []
    for x in w_emg:
        feature = []
        for n in range(8):
            if feature_set == Constant.georgi:
                feature.extend(
                    Feature_extraction.georgi([y[n] for y in x],
                                              sensor=Constant.EMG))
            elif feature_set == Constant.rehman:
                feature.extend(Feature_extraction.rehman([y[n] for y in x]))
            elif feature_set == Constant.robinson:
                feature.extend(Feature_extraction.robinson([y[n] for y in x]))
            elif feature_set == Constant.mantena:
                feature.extend(Feature_extraction.mantena([y[n] for y in x]))
            else:
                print("Could not match given feature set")
        feature_emg.append(feature)
    for x in w_imu:
        feature = []
        for n in range(9):
            if feature_set == Constant.georgi:
                feature.extend(
                    Feature_extraction.georgi([y[n] for y in x],
                                              sensor=Constant.IMU))
            elif feature_set == Constant.rehman:
                feature.extend(Feature_extraction.rehman([y[n] for y in x]))
            elif feature_set == Constant.robinson:
                feature.extend(Feature_extraction.robinson([y[n] for y in x]))
            elif feature_set == Constant.mantena:
                feature.extend(Feature_extraction.mantena([y[n] for y in x]))
            else:
                print("Could not match given feature set")
        feature_imu.append(feature)

    features = []
    for i in range(len(feature_imu)):
        f = []
        for x in np.asarray([feature_emg[i], feature_imu[i]]).flatten('F'):
            f.extend(x)
        features.append(f)
    return features
def process_test_list(file_dest,output_dest):  # i think this takes whole file of urls without given malicious to extract their  feature and doest not provide malicious column like this will take query.txt
    feature=[]
    with open(file_dest) as file:
        for line in file:
            url=line.strip()
            if url!='':
                print ('working on: '+url)           #showoff 
                ret_dict=urlfeature.feature_extract(url)
                feature.append([url,ret_dict]);
    resultwriter(feature,output_dest)
def process_URL_list(file_dest,output_dest):# i think this takes whole file of urls with given malicious to extract their  feature and provide malicious column also like this will take url.txt
    feature=[]
    with open(file_dest) as file:
        for line in file:
            url=line.split(',')[0].strip()
            malicious_bool=line.split(',')[1].strip()
            if url!='':
                print ('working on: '+url)           #showoff 
                ret_dict=urlfeature.feature_extract(url)
                ret_dict['malicious']=malicious_bool
                feature.append([url,ret_dict]);
    resultwriter(feature,output_dest)
Exemple #7
0
def process_URL_list(file_dest, output_dest):
    feature = []
    with open(file_dest) as file:
        for line in file:
            url = line.split(',')[0].strip()
            malicious_bool = line.split(',')[1].strip()
            if url != '':
                print('working on: ', url)  #showoff
                ret_dict = urlfeature.feature_extract(url)
                ret_dict['malicious'] = malicious_bool
                feature.append([url, ret_dict])
    resultwriter(feature, output_dest)
Exemple #8
0
def submit():
    if (request.method == 'POST'):
        features = []
        data = request.get_json()
        print(data)
        sms = data['msg']
        #Check for URL in msg
        url = ''
        for w in sms.split(' '):
            if (w.startswith('https:') or w.startswith('http:')
                    or w.startswith('www')):
                url = w

        if url != '':
            url = str(url)
            features.append(urlfeature.feature_extract(url))

            df = pd.DataFrame(features)
            ans_np = list(rf_model.predict(df[train_cols]))

            # print 'URL is: ',url
            # print '\n ANS  is: ',ans[0]
            ans = int(ans_np[0])

            if (ans == 1):
                features.append({
                    'ans': ans,
                    'flash': 'Site entered is SUSPICIOUS',
                    'url': url
                })
            elif (ans == 0):
                features.append({
                    'ans': ans,
                    'flash': 'Site entered is SAFE',
                    'url': url
                })
            else:
                features.append({
                    'ans': ans,
                    'flash': 'Site entered is MALICIOUS',
                    'url': url
                })

            return {'features': features}
        else:
            error = 'Please enter a valid URL'
            return {'error': error}
Exemple #9
0
def process_URL_list(file_dest, output_dest):
    feature = []
    with open(file_dest) as file:
        for line in file:
            ##对每一行内容进行划分为链接和标签
            content_list = line.split(",")
            ##url链接
            url = content_list[0].strip()
            ##标签
            malicious_bool = content_list[1].strip()
            if url:
                print 'working on: ' + url

                ##url特征提取
                ret_dict = urlfeature.feature_extract(url)
                ###输出恶意代码类型
                ret_dict['malicious'] = malicious_bool
                feature.append([url, ret_dict])
        file.close()
    resultwriter(feature, output_dest)
### Feature and embedding extration
wavlist = []
for line in lines:
    wavlist.append(line.rstrip().split()[0])

embeddings = {}

for filename in wavlist:

    start_time = time.time()
    base, _ = os.path.splitext(os.path.basename(filename))
    print(base)
    # nn_info = nobj.netinfo(base)

    feat, utt_label, utt_shape, tffilename = fe.feat_extract([filename],
                                                             FEAT_TYPE, N_FFT,
                                                             HOP, VAD, CMVN,
                                                             EXCLUDE_SHORT)
    # print('features:', type(feat), len(feat[0]))
    embeddings[base] = emnet_validation.ac2.eval({x: feat, s: utt_shape})
    print((embeddings[base]).shape)

    # if args.outputlayer:
    #     outputlayer.append(emnet_validation.o1.eval({x:feat, s:utt_shape}))
    #
    elapsed_time = time.time() - start_time
    print(
        format(elapsed_time) + ' seconds elapsed for ' +
        filename.split('/')[-1])
# embeddings = np.array(embeddings)
# np.save(args.wavlist.split('/')[-1].split('.')[0]+'_embeddings',embeddings)
def process_raw_data(user,
                     overlap,
                     window,
                     data_set,
                     sensor,
                     feature,
                     pre,
                     save_path_for_featureset="./",
                     load_path=Constant.collections_default_path):
    """
    Load raw data for user, window the data, pre process the data,
    extract  features from data, save extracted features to file
    :param user: string
        The user from which the features should be extracted
    :param overlap: float
        The size of overlap
    :param window: int
        The window size
    :param data_set: string
        The data set from user study: separate, continues,separatecontinues
    :param sensor: string
        The sensor data: EMG,IMU,EMGIMU
    :param feature: string
        The set of features  to extract
    :param pre: string
        The pre processing setting: filter, z-normalization, no pre processing
    :param save_path_for_featureset: string, default Constant.collection_path_default('./Collections/')
            Describes the save path for features. If empty the default path "./" will be used
    :param load_path: string
            Path to the Collection folder which contains the raw data
    """
    features = []
    save_path_for_featureset += user + "/features"
    try:
        load_path += user
        directories, path_add = [], []

        if Constant.SEPARATE in data_set:
            directories.append(os.listdir(load_path + Constant.SEPARATE_PATH))
            path_add.append(Constant.SEPARATE_PATH)
        if Constant.CONTINUES in data_set:
            directories.append(os.listdir(load_path + Constant.CONTINUES_PATH))
            path_add.append(Constant.CONTINUES_PATH)

        for i in range(len(directories)):  # go through all directories
            tmp_features = []
            for steps in directories[i]:  # go through all Steps
                features = []
                raw_emg, raw_imu = Save_Load.load_raw_data_for_both_sensors(
                    emg_path=load_path + path_add[i] + "/" + steps +
                    "/emg.csv",
                    imu_path=load_path + path_add[i] + "/" + steps +
                    "/imu.csv")

                w_emg, w_imu = window_data_for_both_sensor(
                    raw_emg,
                    raw_imu,
                    window=window,
                    degree_of_overlap=overlap,
                    skip_timestamp=1)

                # Preprocess each window
                if pre == Constant.filter_ and Constant.EMG in sensor:
                    w_emg = filter_emg_data(emg=w_emg, filter_type=feature)
                elif pre == Constant.z_norm:
                    w_emg = z_norm(w_emg)
                    w_imu = z_norm(w_imu)

                if Constant.EMG + Constant.IMU in sensor:
                    features.append(
                        fe.feature_extraction(w_emg,
                                              mode=feature,
                                              sensor=Constant.EMG))
                    features.append(
                        fe.feature_extraction(w_imu,
                                              mode=feature,
                                              sensor=Constant.IMU))
                    tmp = []
                    for j in range(len(features[0])):
                        merged_feature = features[0][j]['fs'] + features[1][j][
                            'fs']
                        if features[0][j]['label'] == features[1][j]['label']:
                            tmp.append({
                                "fs": merged_feature,
                                "label": features[1][j]['label']
                            })
                        else:
                            print("ERROR! Should not happen!")
                    tmp_features.append(tmp)
                    continue
                if Constant.EMG in sensor:
                    tmp_features.append(
                        fe.feature_extraction(w_emg,
                                              mode=feature,
                                              sensor=Constant.EMG))
                if Constant.IMU in sensor:
                    tmp_features.append(
                        fe.feature_extraction(w_imu,
                                              mode=feature,
                                              sensor=Constant.IMU))
            features = tmp_features
        if pre:
            save_path_for_featureset = save_path_for_featureset + "_filter"
        if not os.path.isdir(save_path_for_featureset):
            os.mkdir(save_path_for_featureset)

        filename = user + "-" + pre + "-" + data_set + "-" + sensor + "-" + str(
            window) + "-" + str(overlap) + "-" + feature
        Save_Load.save_features(
            features, save_path_for_featureset + "/" + filename + ".csv")
        print(filename + " done")
        return True
    except:
        print("ERROR!", user, data_set, sensor, ValueError)
        raise
    X_train = np.append(X_train, X_neutral, axis=0)
    Y_train = np.append(Y_train, Y_neutral)
    X_test = X_cont
    Y_test = Y_cont
    return X_train, X_test, Y_train, Y_test


if __name__ == '__main__':
    n_classes = 4
    p = 1
    Colors = 'rgbym'
    plt.figure()

    #X_neutral is Neutral Data
    print "\nGetting Neutral Data....\n"
    X_neutral, Y_neutral = fe.get_data(n_classes, neutral=True)
    print "--------------------------------------------------------"
    print X_neutral.shape

    #X_Happy is Emotional Data = 'Happy'
    print "\nGetting Emotional Data(Happy)....\n"
    X_Happy, Y_Happy = fe.get_data(n_classes, 'Happy', cont=False)
    print "--------------------------------------------------------"
    print X_Happy.shape

    #X_Sad is Emotional Data = 'Sad'
    print "\nGetting Emotional Data(Sad)....\n"
    X_Sad, Y_Sad = fe.get_data(n_classes, 'Sad', cont=False)
    print "--------------------------------------------------------"
    print X_Sad.shape
Exemple #13
0
        target.append(3)
    target = np.array(target)
    clf = GradientBoostingClassifier(n_estimators=200)
    cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=None)
    predict = list()
    for trainIndex, testIndex in cv.split(trainOther, target):
        trainSet = trainOther[trainIndex]
        trainTarget = target[trainIndex]
        clf.fit(trainSet, trainTarget)
        predict.append(clf.predict(test)[0])
    return predict


if __name__ == '__main__':
    FileName = sys.argv[1]
    test = np.array([Feature_extraction.get138Fea(FileName)])
    judge1 = sum(classifier1(test))
    if judge1 == 0:
        judge2 = sum(classifier2(test))
        if judge2 == 0:
            print("ATP binding function")
        elif judge2 == 10:
            judge3 = sum(classifier3(test))
            if judge3 == 0:
                print("heme binding function")
            elif judge3 == 10:
                print("zinc ion binding function")
            elif judge3 == 20:
                print("GTP binding function")
            elif judge3 == 30:
                print("ADP binding function")
Exemple #14
0
def extract_features(doc):
    return [fe.word2features(doc, i) for i in range(len(doc))]
 def token_title(self):
     self.logger.setPlainText('')
     title_dict = fe.find_title_keyword(self.title_text.toPlainText())
     print(title_dict)
     self.logger.appendPlainText(str(title_dict))
 def find_keyword(self):
     self.logger.setPlainText('')
     keyword_dict = fe.find_content_keyword(self.Content_text.toPlainText())
     print(keyword_dict)
     self.logger.appendPlainText(str(keyword_dict))