def fitness_hidost_pdfrate_mean(file_paths, seed_sha1): pdfrate = lambda *args:query_classifier('pdfrate', *args) hidost = lambda *args:query_classifier('hidost', *args) oracle = lambda *args:query_classifier('cuckoo', *args) p_scores = pdfrate(file_paths) h_scores = hidost(file_paths) h_scores = map(sigmoid, h_scores) oracle_results = oracle(file_paths, seed_sha1) assert (len(p_scores) == len(h_scores) == len(oracle_results) == len(file_paths)) fitness_scores = [] for i in range(len(file_paths)): short_path = '/'.join(file_paths[i].split('/')[-3:]) p_score, h_score, oracle_result = p_scores[i], h_scores[i], oracle_results[i] if oracle_result == 'malicious': classify_score = [p_score, h_score] score = -mean(classify_score) if max(classify_score) < 0.5: score += 0.5 else: # big negative fitness score = LOW_SCORE logger.info("Variant: %s %.2f %.2f %.2f %s" % (oracle_result, score, p_score, h_score, short_path)) fitness_scores.append(score) return fitness_scores
def fitness_pos_neg(file_paths, seed_sha1, classifier_name, oracle_name, offset = 0): classifier = lambda *args:query_classifier(classifier_name, *args) oracle = lambda *args:query_classifier(oracle_name, *args) classified_scores = classifier(file_paths) oracle_results = oracle(file_paths, seed_sha1) while oracle_results == None or classified_scores == None: logger.warning("Invalid results: oracle %s classifier %s " % (oracle_results != None, classified_scores != None)) classified_scores = classifier(file_paths) oracle_results = oracle(file_paths, seed_sha1) for i in range(len(file_paths)): short_path = '/'.join(file_paths[i].split('/')[-3:]) logger.info("Variant: %s %s %s" % (oracle_results[i], classified_scores[i], short_path)) fitness_scores = [] for i in range(len(classified_scores)): if oracle_results[i] == 'malicious': score = (classified_scores[i]-offset) * float(-1) else: # big negative fitness score = LOW_SCORE fitness_scores.append(score) return fitness_scores
def fitness_pre_softmax(file_paths, seed_sha1, classifier_name, oracle_name): classifier = lambda *args: query_classifier(classifier_name, *args) oracle = lambda *args: query_classifier(oracle_name, *args) try: classified_scores = classifier(file_paths) oracle_results = oracle(file_paths, seed_sha1) except Exception: # try again classified_scores = classifier(file_paths) oracle_results = oracle(file_paths, seed_sha1) while oracle_results == None or classified_scores == None: logger.warning("Invalid results: oracle %s classifier %s " % (oracle_results != None, classified_scores != None)) classified_scores = classifier(file_paths) oracle_results = oracle(file_paths, seed_sha1) for i in range(len(file_paths)): short_path = '/'.join(file_paths[i].split('/')[-3:]) logger.info("Variant: %s %s %s" % (oracle_results[i], classified_scores[i], short_path)) fitness_scores = [] for i in range(len(classified_scores)): if oracle_results[i] == 'malicious': # distance between benign logit and malicous logit score = classified_scores[i][0] - classified_scores[i][1] else: # big negative fitness score = LOW_SCORE fitness_scores.append(score) return fitness_scores
def fitness_pos_neg(file_paths, seed_sha1, classifier_name, oracle_name, offset=0): ##print "fitness.py----Start to call fitness_pos_neg (fitness.py)..." classifier = lambda *args: query_classifier(classifier_name, *args) oracle = lambda *args: query_classifier(oracle_name, *args) #print "fitness.py----Start to call query_classfier for hidost..." #print "fitness.py----file_paths is %s" % file_paths classified_scores = classifier(file_paths) #print "fitness.py----classified_scores is %s" % classified_scores #print "fitness.py----start to call query_classifier for cuckoo..." oracle_results = oracle(file_paths, seed_sha1) #print "fitness.py----oracle_results is %s" % oracle_results while oracle_results == None or classified_scores == None: logger.warning("Invalid results: oracle %s classifier %s " % (oracle_results != None, classified_scores != None)) classified_scores = classifier(file_paths) oracle_results = oracle(file_paths, seed_sha1) ##print "fitness.py----file_paths: %s" % file_paths ##print "len(file_paths): %d" % len(file_paths) for i in range(len(file_paths)): short_path = '/'.join(file_paths[i].split('/')[-3:]) ##print "short_path: %s" % short_path logger.info("Variant: %s %s %s" % (oracle_results[i], classified_scores[i], short_path)) fitness_scores = [] ##print "len(classified_scores): %d" % len(classified_scores) #print "fitness.py----classified_scores: ", classified_scores #print "fitness.py----oracle_results: ", oracle_results for i in range(len(classified_scores)): if oracle_results[i] == 'malicious': score = (classified_scores[i] - offset) * float(-1) ##print "score: %f" % score else: # big negative fitness score = LOW_SCORE fitness_scores.append(score) ##print classified_scores ##print oracle_results #print fitness_scores return fitness_scores