def find_lib_cluster_link(so_inst, libs): (closest_relative, max_similarity) = ("nothing", 0.0) for lib in libs: # lib's with absent sigs cannot be cluster centers if lib["jni_onload_info"]["signature"] != []: print "Comparing signatures of " + lib["so_file_name"] + " and " + so_inst.so_file_name print lib["jni_onload_info"]["signature"], so_inst.mnemonics # calculate weight distance based on the shorter lengthed signature # TODO: Remove similar code at the start of JNI_OnLoad's (distance, num_mnemonics) = tapered_levenshtein(lib["jni_onload_info"]["signature"], so_inst.mnemonics) # calc similarity with center similarity = 1.0 - float(distance) / num_mnemonics print "\tSimilarity is " + str(similarity) + " and distance " + str(distance) + "\n" if similarity > max_similarity: # found cluster, end search # TODO: switch to insert_many with large amounts of apks max_similarity = similarity closest_relative = lib["so_file_name"] print "Chose " + closest_relative return (closest_relative, max_similarity)
def main(options, arguments): libs = libraries.find() for json_obj in libs: for comp_obj in libs: (distance, num_mnemonics) = tapered_levenshtein(center_obj["jni_onload_info"]["signature"], so_inst.mnemonics) if distance != -1.0: print 'no mnemonics' break # calc similarity with center similarity = 1.0 - float(distance)/num_mnemonics if similarity >= 0.9: # found cluster, end search # TODO: switch to insert_many with large amounts of apks instance = libraries.insert_one({'so_file_name': so_inst.so_file_name, 'arch': so_inst.arch, 'apks_found_in':[so_inst.apk_filename], 'hash': so_inst.sha1, 'jni_onload_info': {'signature': so_inst.mnemonics,'is_cluster_center': False, 'similarity_with_center': similarity, 'variations': [] } }) print 'added to existing cluster' return