def filtering(dic): """Removes features with less then alpha occurences from the dictionary""" print('Normalizing Values - Start') alpha = 3 # minimum feature occurrece, default 3 beta = 5 # minimum feature length, default 5 max_bar = len(dic.keys()) dic_n = dict(dic) incremento = 0 for key in dic.keys(): value = dic.get(key) if value < alpha or len(key) < beta: dic_n.pop(key, None) progress_bar.update(max_bar, incremento) incremento += 1 print('Normalizing Values - End') return dic_n
def database_train(train_data): """Organizes the training set getting all the data""" print('Creating Dataset from Training Data - Start') global malware_map db = {} max_bar = len(train_data) incremento = 0 for filename in train_data: if filename in malware_map.keys(): fam = malware_map.get(filename) else: fam = 'Safe' if fam in db.keys(): dic = analyze_file(filename, db.get(fam)) else: dic = analyze_file(filename, {}) db.update({fam: dic}) incremento += 1 progress_bar.update(max_bar, incremento) print('Creating Dataset from Training Data - End') return db
def create_dictionary_safe(db_train): """All the features of all the safe family in the training set""" print('Loading Safe Dictionary - Start') db_prefix = {} prefixes = db_train.get('Safe') max_bar = len(prefixes.keys()) incremento = 0 for prefix in prefixes.keys(): values = prefixes.get(prefix) for value in values: if value in db_prefix: v = db_prefix.get(value) v += values.get(value) else: v = values.get(value) db_prefix.update({value: v}) incremento += 1 progress_bar.update(max_bar, incremento) db_prefix_n = filtering(db_prefix) print('Loading Safe Dictionary - End') return db_prefix_n
def determine(): '''determine all combinations of objects that can be used as input into VISUO3D (run.py script) given the data gathered into the data/triplets.pickle file output: data/working-combos.csv''' print("Determining working combos...") dictionary = pickle.load(open("data/triplets.pickle", 'rb')) combos = triplets = list(dictionary.keys()) # combos will be a buffer pairs = [','.join(key.split(',')[:-1]) for key in triplets] maxIter = 4 # max number of items per combination sumIter = sum(list({p:min(pairs.count(p)-1,maxIter-1) \ for p in pairs if pairs.count(p)>1}.values())) pairs = list(set(pairs)) # set up progress bar progress = 0 longest = max(len(p) for p in pairs) # longest string in pairs progress_bar.update(0, sumIter, prefix='Progress:') # open/create file wcFile = open("data/working-combos.csv", 'w') [wcFile.write(combo + '\n') for combo in combos] # write triplets set combos = [] # clear buffer time = startTimer() for pair in pairs: objs = [t.split(',')[-1] for t in triplets if t.startswith(pair + ',')] if len(objs) < 2: continue for i in range(2, min(maxIter + 1, len(objs) + 1)): current = [ ','.join([pair] + list(o)) for o in combinations(objs, i) ] combos.extend(list(set(current))) progress_bar.update(progress,sumIter,prefix='Progress:',suffix=pair+ \ '... + '+str(int(i))+' '*(longest-len(pair)+8)) progress += 1 [wcFile.write(combo + '\n') for combo in combos] # write to file combos = [] # clear buffer wcFile.close() endMessage = str(progress) + '/' + str(sumIter) + ' Done! ' + '(' + str( endTimer(time)) + ' sec)' endMessage = endMessage + ' ' * (longest - min(longest, len(endMessage))) progress_bar.update(sumIter, sumIter, prefix='Progress:', suffix=endMessage)
if c_map == 'malware': f_tp += 1 tp += 1 else: f_tn += 1 tn += 1 else: if c_map == 'malware': f_fp += 1 fp += 1 else: f_fn += 1 fn += 1 incremento += 1 progress_bar.update(max_bar, incremento) print('True Positive for episode {} is: {}'.format( i / fold_size, f_tp)) print('True Negative for episode {} is: {}'.format( i / fold_size, f_tn)) print('False Positive for episode {} is: {}'.format( i / fold_size, f_fp)) print('False Negative for episode {} is: {}'.format( i / fold_size, f_fn)) f_error_rate = (f_fn + f_fp) / (f_tp + f_tn + f_fp + f_fn) print('Error rate for episode {} is: {}'.format( i / fold_size, f_error_rate)) f_accuracy = 1 - f_error_rate print('Accuracy for episode {} is: {}'.format(i / fold_size, f_accuracy))