Ejemplo n.º 1
0
def get_feature_res(cursor, feature, extra_selector=""):
    cursor.execute("SELECT DISTINCT(user_id) from {}".format(table_name))

    cb_total = 0.0
    num_vals = 0.0
    cb_count = 0.0
    fp_to_count_cross = {}
    fp_to_count_single = {}
    data = cursor.fetchall()

    for user_id, in data:
        cb_prints = []
        cursor.execute("SELECT image_id from {} where user_id='{}' {}".format(
            table_name, user_id, extra_selector))
        ids = [x for x, in cursor.fetchall()]
        for image_id in ids:
            cb_prints.append(
                Fingerprint(cursor, image_id, table_name,
                            Fingerprint_Type.CROSS, feature))
            single_fp = Fingerprint(cursor, image_id, table_name,
                                    Fingerprint_Type.SINGLE, feature)
            if single_fp in fp_to_count_single:
                fp_to_count_single[single_fp] += 1
            else:
                fp_to_count_single.update({single_fp: 1})

        if len(ids) > 1:
            cb_total += 1.0
            if is_all_same(cb_prints):
                cb_count += 1.0
                fp = cb_prints[0]
                if fp in fp_to_count_cross:
                    fp_to_count_cross[fp] += 1
                else:
                    fp_to_count_cross.update({fp: 1})

    cb_distinct = float(len(fp_to_count_cross))
    cb_unique = 0.0
    for _, count in fp_to_count_cross.items():
        if count == 1:
            cb_unique += 1.0

    single_distinct = float(len(fp_to_count_single))
    single_unique = 0.0
    for _, count in fp_to_count_single.items():
        if count == 1:
            single_unique += 1.0
    cb_total = max(cb_total, 1.0)
    single_distinct = max(single_distinct, 1.0)
    cb_distinct = max(cb_distinct, 1.0)
    frmt = "{:3.1f}%"
    return frmt.format(single_unique / single_distinct * 100), frmt.format(
        cb_count / cb_total * 100), frmt.format(cb_unique / cb_distinct * 100)
Ejemplo n.º 2
0
  def __cross_helper(self, b1, b2, cursor, table_name, attrs, extra_selector):
    cursor.execute("SELECT user_id FROM {} WHERE browser='{}' {}".format(table_name, b1, extra_selector))
    tuids = [uid for uid, in cursor.fetchall()]

    uids = []
    for uid in tuids:
      cursor.execute("SELECT user_id FROM {} WHERE user_id='{}' AND browser='{}' {}".format(table_name, uid, b2, extra_selector))
      for uid, in cursor.fetchall():
        uids.append(uid)

    if len(uids) is 0:
        return None

    fp_to_count = {}
    num_cross_browser = 0.0

    for uid in uids:
      cursor.execute("SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".format(table_name, b1, uid))
      image1_id = cursor.fetchone()[0]

      cursor.execute("SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".format(table_name, b2, uid))
      image2_id = cursor.fetchone()[0]

      fp_1 = Fingerprint(cursor, image1_id, table_name, Fingerprint_Type.CROSS, attrs, b2)
      fp_2 = Fingerprint(cursor, image2_id, table_name, Fingerprint_Type.CROSS, attrs, b1)

      if fp_1 == fp_2:
        num_cross_browser += 1
        if fp_1 in fp_to_count:
          fp_to_count[fp_1] += 1
        else:
          fp_to_count.update(
            {
              fp_1: 1
            }
          )

    entropy = 0.0
    num_distinct = max(float(len(fp_to_count)), 1.0)
    num_unique = 0.0
    for _, count in fp_to_count.items():
      if count == 1:
        num_unique += 1.0
      
      P = float(count) / float(num_cross_browser) 
      entropy -= P * math.log(P, 2)

    num_uids = max(float(len(uids)), 1.0)
    num_cross_browser = max(num_cross_browser, 1.0)

    return int(num_uids), num_cross_browser/num_uids, num_unique/num_cross_browser, entropy, num_cross_browser
    def get_fingerprints_countermeasure(self, countermeasure):
        fps = self.collection.find({'countermeasure': countermeasure})
        fp_objects = []
        for fingerprint in fps:
            fp_objects.append(Fingerprint(fingerprint))

        return fp_objects
    def get_all_fingerprints(self):
        fps = self.collection.find()
        fp_objects = []
        for fingerprint in fps:
            fp_objects.append(Fingerprint(fingerprint))

        return fp_objects
Ejemplo n.º 5
0
    def detect(self, X, y=None, threshold=None):
        """Predict whether samples of X are anomalous or not.

            Parameters
            ----------
            X : np.array of shape=(n_samples,)
                Flows for fitting FlowPrint.

            y : Ignored

            threshold : float, default=None
                Minimum required threshold to consider point benign.
                If None is given, use FlowPrint default

            Returns
            -------
            result : np.array of shape=(n_samples,)
                Prediction of samples in X: +1 if benign, -1 if anomalous.
            """
        # Get best match for each fingerprint
        prediction = self.predict(X, default=Fingerprint())
        # Compute match score between each best match
        prediction = np.asarray(
            [x.compare(fp) for x, fp in zip(X, prediction)])
        # Return whether matching score is high enough
        return (prediction >= (threshold or self.threshold)) * 2 - 1
Ejemplo n.º 6
0
  def __single_helper(self, b, cursor, table_name, attrs, extra_selector):
    cursor.execute("SELECT image_id FROM {} WHERE browser='{}' {}".format(table_name, b, extra_selector))
    image_ids = [uid for uid, in cursor.fetchall()]

    if len(image_ids) is 0:
      return None

    fp_to_count = {}
    for uid in image_ids:
      fp = Fingerprint(cursor, uid, table_name, Fingerprint_Type.SINGLE, attrs)
      if fp in fp_to_count:
        fp_to_count[fp] += 1
      else:
        fp_to_count.update(
          {
            fp : 1
          }
        )

    num_distinct = max(float(len(fp_to_count)), 1.0)
    num_unique = 0.0
    for _, count in fp_to_count.items():
      if count == 1:
        num_unique += 1.0
    num_uids = max(len(image_ids), 1.0)

    return int(num_uids), num_unique/num_uids
Ejemplo n.º 7
0
def create_fingerprints(peaks, fan_value=15):
    """
    Create fingerprints for all the peaks.
    fingerprint = hash:time
    hash        = (f1, f2, t2 - t1)
    time        = t1
    """
    prints = []
    peaks = list(peaks)
    for i in range(len(peaks)):
        for j in range(1, fan_value):
            if (i + j) < len(peaks):
                f1 = peaks[i][0]
                f2 = peaks[i + j][0]
                t1 = peaks[i][1]
                t2 = peaks[i + j][1]
                t_delta = t2 - t1

                # Hashes must be within 200s of each other
                if t_delta >= 0 and t_delta <= 200:
                    h = '{},{},{}'.format(f1, f2, t_delta)
                    p = Fingerprint(h, t1)
                    prints.append(p)

    return list(set(prints))
Ejemplo n.º 8
0
def get_fingerprints_experiments(
        cur,
        min_nb_fingerprints,
        attributes,
        id_file="./data/consistent_extension_ids.csv"):
    """
        Returns a list of the fingerprints to use for the experiment
        We get only fingerprints whose associated user has at least
        min_nb_fingerprints and who have no inconsistency
    """
    with open(id_file, "r") as f:
        # we jump header
        f.readline()
        ids_query = []

        for line in f.readlines():
            ids_query.append("'" + line.replace("\n", "") + "'")

        ids_query = ",".join(ids_query)
        cur.execute(
            "SELECT *, NULL as canvasJS FROM extensionDataScheme WHERE \
                    id in (" + ids_query + ") and \
                    id in (SELECT id FROM extensionDataScheme GROUP BY \
                    id having count(*) > " + str(min_nb_fingerprints) + ")\
                    ORDER by counter ASC")
        fps = cur.fetchall()
        fp_set = []
        for fp in fps:
            try:
                fp_set.append(Fingerprint(attributes, fp))
            except Exception as e:
                print(e)

        return fp_set
Ejemplo n.º 9
0
    def merge_fingerprints(self, fingerprints, threshold=1):
        """Merge fingerprints based on similarity.

            Parameters
            ----------
            fingerprints : list
                List of fingerprints to merge.

            Returns
            -------
            result : list
                Merged fingerprints
            """
        ####################################################################
        #           Case default: all fingerprints are different           #
        ####################################################################
        result = np.asarray(fingerprints)

        # Retrieve unique fingerprints
        unique = sorted(set(fingerprints))

        ####################################################################
        #                Case 1: all fingerprints are equal                #
        ####################################################################
        if threshold <= 0:
            # Create one big merged fingerprint out of all unique fingerprints
            result[:] = Fingerprint(set().union(*unique))

        ####################################################################
        #         Case 2: Merge fingerprints by 0 < threshold < 1          #
        ####################################################################
        elif threshold < 1:
            # Initialise fingerprinting pairs to merge
            pairs = set([
                # Define pairs
                (fp1, fp2)
                # For each combination of pairs
                for fp1, fp2 in self.score_combinations(unique, threshold)
                # Where similarity >= threshold
                if fp1.compare(fp2) >= threshold
            ])

            # Create mapping of original fingerprint -> merged fingerprint
            mapping = dict()
            # Loop over all fingerprints to be merged
            for fp1, fp2 in pairs:
                # Create merged fingerprint
                fp_merged = mapping.get(fp1, fp1).merge(mapping.get(fp2, fp2))
                # Set mappings
                mapping[fp1] = fp_merged
                mapping[fp2] = fp_merged

            # Apply mapping
            result = np.array([mapping.get(fp, fp) for fp in fingerprints])

        ####################################################################
        #                    Return merged fingerprints                    #
        ####################################################################
        return result
Ejemplo n.º 10
0
    def load(self, *files, store=True, parameters=False):
        """Load fingerprints from files.

            Parameters
            ----------
            file : string
                Files from which to load fingerprints.

            store : boolean, default=True
                If True, store fingerprints in FlowPrint object

            parameters : boolean, default=False
                If True, also update FlowPrint parameters from file

            Returns
            -------
            result : dict of Fingerprint -> label
                Fingerprints imported from file.
            """
        # Initialise fingerprints
        fingerprints = dict()

        # Loop over all files
        for file in files:
            # Open input file
            with open(file, 'r') as infile:
                # Load fingerprints
                data = json.load(infile)

                # Store parameters if necessary
                if parameters:
                    self.batch = data.get('batch', self.batch)
                    self.window = data.get('window', self.window)
                    self.correlation = data.get('correlation',
                                                self.correlation)
                    self.similarity = data.get('similarity', self.similarity)
                    self.threshold = data.get('threshold', self.threshold)

                # Add fingerprints
                for fp, label in data.get('fingerprints'):
                    # Transform json to Fingerprint
                    fp = Fingerprint().from_dict(fp)
                    # Get label
                    label = fingerprints.get(fp, set()) | set([label])
                    # Set fingerprint
                    fingerprints[fp] = label

        # Store fingerprints if necessary
        if store:
            for k, v in fingerprints.items():
                self.fingerprints[k] = self.fingerprints.get(k, set()) | v

        # Return fingerprints
        return fingerprints
Ejemplo n.º 11
0
def carrega_txt():
    f = Fingerprint()
    file = open("teste_1.txt", 'r')
    list_content = file.read().strip().split("|")
    list_valid = []

    for item in list_content:
        if item.strip():
            try:
                list_valid.append(int(item))
            except ValueError:
                pass

    print(list_valid)

    f.uploadCharacteristics(0x01, list_valid)
    f.uploadCharacteristics(0x02, list_valid)

    print(f.getTemplateCount())
    print("Create Template -> " + str(f.createTemplate()))
    print("Store Template  -> " + str(f.storeTemplate()))
    print(f.getTemplateCount())
Ejemplo n.º 12
0
def check(origin, plagiarized):
    with open(origin, "r") as file:
        origin = file.read()

    with open(plagiarized, "r") as file:
        plagiarism = file.read()

    text_length = min(len(origin.split()), len(plagiarism.split()))

    if text_length < 60:
        raise NotImplementedError("Compare texts with at least 60 words.")

    window = max(text_length // 21, 3)
    kgram = window - 1
    base = 11 if text_length < 250 else 23 if text_length < 600 else 101
    modulo = max(round(text_length * 5, -3), 1000)

    fprint = Fingerprint(kgram_len=kgram, window_len=window, base=base, modulo=modulo)

    first = fprint.generate(str=origin)
    second = fprint.generate(str=plagiarism)

    similar = [
        x
        for x in first
        if x in second
    ]

    similar_grams = Counter([
        element[0]
        for element in first
        for sec in second
        if sec[0] == element[0]
    ])

    print("Identical substring hashes:")
    pprint(similar)
    print("\nIdentical grams:")
    pprint(similar_grams)
def get_identity():
    window = Window("Roblox", "https://www.roblox.com/account/signupredir")
    fp = Fingerprint(
        user_agent=user_agent,
        protochain_hash="5d76839801bc5904a4f12f1731a7b6d1",
        sec_fetch=True,
        content_type_value="application/x-www-form-urlencoded; charset=UTF-8",
        accept_language_value="en-US,en;q=0.9",
        jsbd_gen=lambda w: dict(HL=random.randint(1, 5),
                                NCE=True,
                                DT=w.title,
                                NWD="undefined",
                                DA=None,
                                DR=None,
                                DMT=random.randint(1, 40),
                                DO=None,
                                DOT=random.randint(30, 50)),
        DNT="unknown",
        L="en-US",
        D=24,
        PR=1,
        S="1920,1080",
        AS="1920,1040",
        SS=True,
        LS=True,
        IDB=True,
        B=False,
        ODB=True,
        CPUC="unknown",
        PK="Win32",
        JSF=
        "Arial,Arial Black,Arial Narrow,Book Antiqua,Bookman Old Style,Calibri,Cambria,Cambria Math,Century,Century Gothic,Century Schoolbook,Comic Sans MS,Consolas,Courier,Courier New,Garamond,Georgia,Helvetica,Impact,Lucida Bright,Lucida Calligraphy,Lucida Console,Lucida Fax,Lucida Handwriting,Lucida Sans,Lucida Sans Typewriter,Lucida Sans Unicode,Microsoft Sans Serif,Monotype Corsiva,MS Gothic,MS PGothic,MS Reference Sans Serif,MS Sans Serif,MS Serif,Palatino Linotype,Segoe Print,Segoe Script,Segoe UI,Segoe UI Light,Segoe UI Semibold,Segoe UI Symbol,Tahoma,Times,Times New Roman,Trebuchet MS,Verdana,Wingdings,Wingdings 2,Wingdings 3",
        P="Chrome PDF Plugin,Chrome PDF Viewer,Native Client",
        T="0,false,false",
        H="8",
        SWF=False)
    return fp, window
Ejemplo n.º 14
0
def get_consistent_ids(cur):
    """
        Returns a list of user ids having only consistent fingerprints
    """

    batch_size = 5000
    attributes = Fingerprint.INFO_ATTRIBUTES + Fingerprint.HTTP_ATTRIBUTES + \
                     Fingerprint.JAVASCRIPT_ATTRIBUTES + Fingerprint.FLASH_ATTRIBUTES
    counter_to_os = dict()
    counter_to_browser = dict()
    id_to_oses = dict()
    id_to_browsers = dict()
    id_to_nb_inconsistencies = dict()
    id_to_nb_fps = dict()

    cur.execute('SELECT max(counter) as nb_fps from extensionDataScheme')
    nb_fps = cur.fetchone()["nb_fps"] + 1

    for i in range(0, nb_fps, batch_size):
        print(i)
        sql = "SELECT * FROM extensionDataScheme where counter < %s and counter > %s"
        cur.execute(sql, (i + batch_size, i))
        fps = cur.fetchall()
        for fp_dict in fps:
            try:
                fp = Fingerprint(attributes, fp_dict)
                counter_to_os[fp.getCounter()] = fp.getOs()
                counter_to_browser[fp.getCounter()] = fp.getBrowser()
                counter = fp.getCounter()

                if fp.getId() in id_to_oses:
                    id_to_oses[fp.getId()].add(fp.getOs())
                else:
                    id_to_oses[fp.getId()] = set()
                    id_to_oses[fp.getId()].add(fp.getOs())

                if fp.getId() in id_to_browsers:
                    id_to_browsers[fp.getId()].add(fp.getBrowser())
                else:
                    id_to_browsers[fp.getId()] = set()
                    id_to_browsers[fp.getId()].add(fp.getBrowser())

                if len(id_to_browsers[fp.getId()]) > 1 or len(
                        id_to_oses[fp.getId()]) > 1:
                    id_to_nb_inconsistencies[fp.getId()] = 100000000

                if counter_to_os[counter] == "Android" or counter_to_os[counter] == "iOS" or \
                counter_to_os[counter] == "Windows Phone" or counter_to_os[counter] == "Firefox OS" or \
                counter_to_os[counter] == "Windows 95":
                    id_to_nb_inconsistencies[fp.getId()] = 10000000000

                if counter_to_browser[counter] == "Safari" or counter_to_browser[counter] == "IE" or \
                counter_to_browser[counter] == "Edge" or counter_to_browser[counter] == "Googlebot":
                    id_to_nb_inconsistencies[fp.getId()] = 10000000

                if fp.hasPlatformInconsistency():
                    if fp.getId() in id_to_nb_inconsistencies:
                        id_to_nb_inconsistencies[fp.getId()] += 5
                    else:
                        id_to_nb_inconsistencies[fp.getId()] = 5

                if fp.getId() in id_to_nb_fps:
                    id_to_nb_fps[fp.getId()] += 1
                else:
                    id_to_nb_fps[fp.getId()] = 1

                # Seems weird but made on purpose !
                if fp.getId() not in id_to_nb_inconsistencies:
                    id_to_nb_inconsistencies[fp.getId()] = 0

            except:
                id_to_nb_inconsistencies[fp_dict["id"]] = 1000000

    user_id_consistent = [
        x for x in id_to_nb_fps
        if float(id_to_nb_inconsistencies[x]) / float(id_to_nb_fps[x]) < 0.02
    ]
    # we remove user that poison their canvas
    # we select users that changed canvas too frequently
    cur.execute(
        "SELECT id, count(distinct canvasJSHashed) as count, count(canvasJSHashed) as \
                nb_fps FROM extensionDataScheme group by id having count(distinct canvasJSHashed)/count(canvasJSHashed) > 0.35 \
                and count(canvasJSHashed) > 5 order by id")
    rows = cur.fetchall()
    poisoner_ids = [row["id"] for row in rows]
    user_id_consistent = [
        user_id for user_id in user_id_consistent
        if user_id not in poisoner_ids
    ]

    return user_id_consistent
Ejemplo n.º 15
0
def limpa_db(self):
    f = Fingerprint()
    print("Depois " + str(f.getTemplateCount()))
    f.limpa_bd()
    print("Antes " + str(f.getTemplateCount()))
Ejemplo n.º 16
0
def getRes(b1,
           b2,
           cursor,
           quiet,
           attrs="hashes, langs",
           extra_selector="",
           fp_type=Fingerprint_Type.CROSS):
    if not quiet:
        print('extra_selector="{}"'.format(extra_selector))
    global mask
    global b_mask
    mask = None
    global instability
    tuids = []
    uids = []
    cursor.execute("SELECT COUNT(DISTINCT(ip)) FROM {}".format(table_name))
    if not quiet:
        print('ip', cursor.fetchone()[0])
    cursor.execute(
        "SELECT COUNT(DISTINCT(user_id)) FROM {}".format(table_name))
    if not quiet:
        print('user', cursor.fetchone()[0])

    #cursor.execute("SELECT user_id FROM {} WHERE browser='{}'".format(table_name, b1))
    cursor.execute("SELECT user_id FROM {} WHERE browser='{}' {}".format(
        table_name, b1, extra_selector))
    for uid, in cursor.fetchall():
        tuids.append(uid)

    if not quiet:
        print(b1, len(tuids))

    for uid in tuids:
        #cursor.execute("SELECT user_id FROM {} WHERE user_id='{}' AND browser='{}'".format(table_name, uid, b2))
        cursor.execute(
            "SELECT user_id FROM {} WHERE user_id='{}' AND browser='{}' {}".
            format(table_name, uid, b2, extra_selector))
        for uid, in cursor.fetchall():
            uids.append(uid)

    if not quiet:
        print(b1, 'and', b2, len(uids))

    if len(uids) is 0:
        return None

    #uids is the list of users uses both b1 and b2
    hash_all = {}
    hash_long = []
    fp_to_count = {}
    hash_all_unique = {}
    index = []
    uid_stability = {}
    instability = {}

    for uid in uids:
        cursor.execute(
            "SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".
            format(table_name, b1, uid))
        image1_id = cursor.fetchone()[0]

        cursor.execute(
            "SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".
            format(table_name, b2, uid))
        image2_id = cursor.fetchone()[0]

        fp_1 = Fingerprint(cursor, image1_id, table_name, fp_type, attrs, b2)
        fp_2 = Fingerprint(cursor, image2_id, table_name, fp_type, attrs, b1)

        try:
            # Feature to mask
            feature = "fonts"
            cursor.execute("SELECT {} FROM {} WHERE image_id='{}'".format(
                feature, table_name, image1_id))
            hashes_1 = cursor.fetchone()[0]

            cursor.execute("SELECT {} FROM {} WHERE image_id='{}'".format(
                feature, table_name, image2_id))
            hashes_2 = cursor.fetchone()[0]

            if mask is None:
                mask = [1 for _ in range(len(hashes_1))]

            if len(hashes_1) == len(hashes_2):
                s1 = ""
                s2 = ""

                uid_stability.update({uid: []})
                for i in range(len(hashes_1)):
                    if i not in hash_all:
                        hash_all.update({i: []})
                    if i not in hash_all_unique:
                        hash_all_unique.update({i: Set()})
                    if i not in instability:
                        instability.update({i: 0.0})

                    hash1_val = hashes_1[i]
                    hash2_val = hashes_2[i]

                    s1 += hash1_val
                    s2 += hash2_val

                    if hash1_val == hash2_val:
                        hash_all[i].append(hash1_val)
                        hash_all_unique[i].add(hash1_val)
                    else:
                        instability[i] += 1.0 / len(uids)
                        uid_stability[uid].append([hash1_val, hash2_val])

        except:
            pass
        if fp_1 == fp_2:
            hash_long.append(fp_1)
            index.append(uid)
            if fp_1 in fp_to_count:
                fp_to_count[fp_1] += 1
            else:
                fp_to_count.update({fp_1: 1})

    print 'hashall:' + str(len(hash_all))

    for index, i in instability.items():
        if i > 0.001:
            mask[index] = 0

    num_distinct = max(float(len(fp_to_count)), 1.0)
    num_unique = 0.0
    for _, count in fp_to_count.items():
        if count == 1:
            num_unique += 1.0
    num_cross_browser = float(len(hash_long))
    num_uids = max(float(len(uids)), 1.0)

    if not quiet:
        for i, d in instability.items():
            print("{}: instability: {}".format(i, d))

        print('Cross_browser', num_cross_browser)
        print('Cross_browser rate', num_cross_browser / num_uids)

        print('Cross_browser unique', num_unique / num_distinct)
        print(num_unique, num_distinct)

    return int(num_uids), "{:3.1f}%".format(
        num_cross_browser / num_uids * 100), "{:3.1f}%".format(
            num_unique / num_distinct * 100)
Ejemplo n.º 17
0
    def __getRes(self,
                 b1,
                 b2,
                 cursor,
                 quiet,
                 rate,
                 table_name,
                 attrs="",
                 extra_selector=""):
        if not quiet:
            print('extra_selector="{}"'.format(extra_selector))

        tuids = []
        uids = []

        cursor.execute("SELECT user_id FROM {} WHERE browser='{}' {}".format(
            table_name, b1, extra_selector))
        for uid, in cursor.fetchall():
            tuids.append(uid)

        if not quiet:
            print(b1, len(tuids))

        for uid in tuids:
            cursor.execute(
                "SELECT user_id FROM {} WHERE user_id='{}' AND browser='{}' {}"
                .format(table_name, uid, b2, extra_selector))
            for uid, in cursor.fetchall():
                uids.append(uid)

        if not quiet:
            print(b1, 'and', b2, len(uids))

        #uids is the list of users uses both b1 and b2
        hash_all = {}
        hash_long = []
        fp_to_count = {}
        hash_all_unique = {}
        index = []
        uid_stability = {}
        instability = {}
        mask = [1 for _ in range(28)]

        if len(uids) == 0:
            return 0, mask

        for uid in uids:
            cursor.execute(
                "SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".
                format(table_name, b1, uid))
            image1_id = cursor.fetchone()[0]

            cursor.execute(
                "SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".
                format(table_name, b2, uid))
            image2_id = cursor.fetchone()[0]

            try:
                # Feature to mask
                feature = "hashes"
                cursor.execute("SELECT {} FROM {} WHERE image_id='{}'".format(
                    feature, table_name, image1_id))
                hashes_1 = cursor.fetchone()[0].split("&")[:28]

                cursor.execute("SELECT {} FROM {} WHERE image_id='{}'".format(
                    feature, table_name, image2_id))
                hashes_2 = cursor.fetchone()[0].split("&")[:28]

                if len(hashes_1) == len(hashes_2):

                    uid_stability.update({uid: []})
                    for i in range(len(hashes_1)):
                        if i not in instability:
                            instability.update({i: 0.0})

                        hash1_val = hashes_1[i]
                        hash2_val = hashes_2[i]

                        if hash1_val != hash2_val:
                            instability[i] += 1.0 / len(uids)
            except:
                pass

        for index, i in instability.items():
            if i > rate:
                mask[index] = 0

        for uid in uids:
            cursor.execute(
                "SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".
                format(table_name, b1, uid))
            image1_id = cursor.fetchone()[0]

            cursor.execute(
                "SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".
                format(table_name, b2, uid))
            image2_id = cursor.fetchone()[0]

            fp_1 = Fingerprint(cursor, image1_id, table_name,
                               Fingerprint_Type.CROSS, attrs, b2, mask)
            fp_2 = Fingerprint(cursor, image2_id, table_name,
                               Fingerprint_Type.CROSS, attrs, b1, mask)

            if fp_1 == fp_2:
                hash_long.append(fp_1)
                if fp_1 in fp_to_count:
                    fp_to_count[fp_1] += 1
                else:
                    fp_to_count.update({fp_1: 1})

        num_distinct = max(float(len(fp_to_count)), 1.0)
        num_unique = 0.0
        for _, count in fp_to_count.items():
            if count == 1:
                num_unique += 1.0
        num_cross_browser = max(float(len(hash_long)), 1.0)
        num_uids = max(float(len(uids)), 1.0)

        if not quiet:
            for i, d in instability.items():
                print("{}: instability: {}".format(i, d))

            print('Cross_browser', num_cross_browser)
            print('Cross_browser rate', num_cross_browser / num_uids)

            print('Cross_browser unique', num_unique / num_distinct)
            print(num_unique, num_distinct)

        return num_cross_browser / num_uids * num_unique / num_cross_browser * 100, mask
 def __init__(self, db):
     self.fgp_db = db
     self.fgp_api = Fingerprint()
Ejemplo n.º 19
0
    def _fit_single_batch_(self, X, y=None):
        """Create fingerprints for a given batch of flows.

            Parameters
            ----------
            X : array-like of shape=(n_samples_batch,)
                Samples (Flow objects) from which to generate fingerprints.

            y : array-like of shape=(n_samples_batch,), optional
                Labels corresponding to X. If given, they will be encorporated
                into each fingerprint.

            Returns
            -------
            np.array of shape=(n_samples,)
                Resulting fingerprints corresponding to each flow.
            """
        ####################################################################
        #                       Create fingerprints                        #
        ####################################################################

        # Create clustering instance
        cluster = Cluster()
        # Cluster flows into network destinations
        cluster.fit(X, y)

        # Find cliques in clusters
        cliques = CrossCorrelationGraph(
            window=self.window,  # Set window size
            correlation=self.correlation  # Set correlation threshold
        ).fit_predict(cluster)  # Get cliques

        # Transform cliques to fingerprints
        fingerprints = list(
            Fingerprint(c)  # Cast to fingerprint
            for c in cliques if len(c) > 1  # Only select cliques > 1
        )

        ####################################################################
        #                   Assign fingerprints per flow                   #
        ####################################################################

        # Get network destination per flow
        destinations = cluster.predict(X)  # Get destination id per flow
        translation = cluster.cluster_dict()  # Get destinations for each id
        destinations = [translation.get(d) for d in destinations]

        # Get fingerprint per network destination
        mapping_fingerprints = dict()
        # Map destination to largest fingerprint by (#destinations, #flows)
        for fingerprint in sorted(fingerprints):
            for destination in fingerprint:
                mapping_fingerprints[destination] = fingerprint

        # Apply mapping
        prediction = np.array([
            mapping_fingerprints.get(
                x.destination,
                mapping_fingerprints.get(x.certificate, Fingerprint()))
            for x in X
        ])

        ####################################################################
        #             Handle unknown and similar fingerprints              #
        ####################################################################

        # For unknown results assign nearest neighbour
        prediction = self.assign_nearest(X, prediction)
        # Merge similar fingerprints
        prediction = self.merge_fingerprints(prediction, self.similarity)

        # Return prediction
        return prediction
Ejemplo n.º 20
0
    def assign_nearest(self, X, y):
        """Set unassigned labels to that of nearest neighbours.

            Parameters
            ----------
            X : np.array of shape=(n_flows,)
                Array of original flows.

            y : np.array of shape=(n_flows,) and dtype=int
                Array of fingerprints.

            Returns
            -------
            result : np.array of shape=(n_flows,) and dtype=int
                Array of Fingerprints. Without any -1 labels.
            """
        ####################################################################
        #             Sort flows and fingerprints by timestamp             #
        ####################################################################

        # Sort flows by time
        sort_time = np.argsort(X)
        sort_orig = np.argsort(sort_time)

        # Sort by time
        X = X[sort_time]
        y = y[sort_time]
        # Get timestamps
        timestamps = np.asarray([x.time_start for x in X])

        ####################################################################
        #               Assign closest fingerprints in time                #
        ####################################################################

        # Get blocks of unassigned fingerprint indices
        blocks = list()
        block = list()
        for i, fingerprint in enumerate(y):
            if fingerprint and block:
                blocks.append(np.asarray(block))
                block = list()
            elif not fingerprint:
                block.append(i)
        if block:
            blocks.append(np.asarray(block))

        # For each block of unassigned fingerprints compute new labels
        for block in blocks:
            # Get indices before and after block
            before = min(block) - 1
            after = max(block) + 1
            # Get timestamps before and after block
            ts_before = X[before].time_start if before >= 0 else float('inf')
            ts_after = X[after].time_start if after < X.shape[0] else float(
                'inf')
            # Get fingerprints before and after block
            fp_before = y[before] if before >= 0 else Fingerprint()
            fp_after = y[after] if after < X.shape[0] else Fingerprint()

            # Assign new fingerprints per block
            block_before = abs(timestamps[block] - ts_before) <\
                           abs(timestamps[block] - ts_after )
            y[block[block_before]] = fp_before
            y[block[~block_before]] = fp_after

        # Return fingerprints in original order
        return y[sort_orig]
# -*- coding: utf-8 -*-
"""The main module finding similarity ratio between two strings."""

from fingerprint import Fingerprint
from fingerprint.fingerprint import FingerprintException

FINGERPRINT = Fingerprint(kgram_len=4, window_len=3, base=101, modulo=256)


def find_similarity_ratio(f_string: str, s_string: str) -> float:
    """
    Take two strings and find similarity between them using \
    Rabin fingerprint and winnowing by Stanford.

    Args:
         `f_string`: first string.\n
         `s_string`: second string.
    Returns:
        `float`: the similarity ratio between two strings.
    """
    try:
        f_string_fingerprint = FINGERPRINT.generate(str=f_string)
        s_string_fingerprint = FINGERPRINT.generate(str=s_string)
    except (FingerprintException, IndexError):
        return 0
    f_string_only_hashes = [element[0] for element in f_string_fingerprint]
    s_string_only_hashes = [element[0] for element in s_string_fingerprint]
    common_hashes = set(f_string_only_hashes).intersection(
        set(s_string_only_hashes))
    minimal_length_of_string_hashes = len(
        min(f_string_only_hashes, s_string_only_hashes, key=len))
Ejemplo n.º 22
0
def getRes(b1,
           b2,
           cursor,
           quiet,
           attrs="hashes, langs",
           extra_selector="",
           fp_type=Fingerprint_Type.CROSS):
    if not quiet:
        print 'extra_selector="{}"'.format(extra_selector)
    global mask
    tuids = []
    uids = []
    cursor.execute("SELECT COUNT(DISTINCT(ip)) FROM {}".format(table_name))
    if not quiet:
        print 'ip', cursor.fetchone()[0]
    cursor.execute(
        "SELECT COUNT(DISTINCT(user_id)) FROM {}".format(table_name))
    if not quiet:
        print 'user', cursor.fetchone()[0]

    #cursor.execute("SELECT user_id FROM {} WHERE browser='{}'".format(table_name, b1))
    cursor.execute("SELECT user_id FROM {} WHERE browser='{}' {}".format(
        table_name, b1, extra_selector))
    for uid, in cursor.fetchall():
        tuids.append(uid)

    if not quiet:
        print b1, len(tuids)

    for uid in tuids:
        #cursor.execute("SELECT user_id FROM {} WHERE user_id='{}' AND browser='{}'".format(table_name, uid, b2))
        cursor.execute(
            "SELECT user_id FROM {} WHERE user_id='{}' AND browser='{}' {}".
            format(table_name, uid, b2, extra_selector))
        for uid, in cursor.fetchall():
            uids.append(uid)

    if not quiet:
        print b1, 'and', b2, len(uids)

    if len(uids) is 0:
        return None

    #uids is the list of users uses both b1 and b2
    hash_all = {}
    hash_long = []
    fp_to_count = {}
    hash_all_unique = {}
    stability = {}
    diff = {}
    index = []
    uid_stability = {}

    for uid in uids:
        #cursor.execute("SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".format(table_name, b1, uid))
        cursor.execute(
            "SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".
            format(table_name, b1, uid))
        image1_id = cursor.fetchone()[0]
        #cursor.execute("SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".format(table_name, b2, uid))
        cursor.execute(
            "SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".
            format(table_name, b2, uid))
        image2_id = cursor.fetchone()[0]

        fp_1 = Fingerprint(cursor, image1_id, table_name, fp_type, attrs)
        fp_2 = Fingerprint(cursor, image2_id, table_name, fp_type, attrs)

        try:
            if quiet:
                _, opps = None
            cursor.execute("SELECT fonts FROM {} WHERE image_id='{}'".format(
                table_name, image1_id))

            hashes_1 = list(cursor.fetchone()[0])

            cursor.execute("SELECT fonts FROM {} WHERE image_id='{}'".format(
                table_name, image2_id))
            hashes_2 = list(cursor.fetchone()[0])

            if mask is None:
                mask = [1 for _ in range(len(hashes_1))]

            if len(hashes_1) == len(hashes_2):
                s1 = ""
                s2 = ""

                uid_stability.update({uid: []})
                for i in range(len(hashes_1)):

                    if i not in hash_all:
                        hash_all.update({i: []})
                    if i not in hash_all_unique:
                        hash_all_unique.update({i: Set()})
                    if i not in diff:
                        diff.update({i: 0.0})

                    hash1_val = hashes_1[i]
                    hash2_val = hashes_2[i]

                    s1 += hash1_val
                    s2 += hash2_val

                    #if hash1_val == hash2_val and (hash1_val not in hash_all[i]):
                    if hash1_val == hash2_val:
                        hash_all[i].append(hash1_val)
                        hash_all_unique[i].add(hash1_val)
                    else:
                        diff[i] += 1.0 / len(uids)
                        uid_stability[uid].append([hash1_val, hash2_val])
        except:
            pass
        if fp_1 == fp_2:
            #else:
            #    print 'found: ' + str(uid) + '%' + str(uids[hash_long.index(s1)])
            hash_long.append(fp_1)
            index.append(uid)
            if fp_1 in fp_to_count:
                fp_to_count[fp_1] += 1
            else:
                fp_to_count.update({fp_1: 1})

        #else:
        #    print 'not same: ' + str(uid)
    #for i in range(case_number):
    #    print i, diff[i]

    for i, d in diff.items():
        if d > 0.0:
            mask[i] = 0

    num_distinct = float(len(fp_to_count))
    num_unique = 0.0
    for _, count in fp_to_count.items():
        if count == 1:
            num_unique += 1.0
    num_cross_browser = float(len(hash_long))
    num_uids = float(len(uids))

    if not quiet:
        for i, d in diff.items():
            print "{}: instability: {}".format(i, d)
        for u, s in uid_stability.items():
            print "{}: {}".format(u, s)

        print 'Cross_browser', num_cross_browser
        print 'Cross_browser rate', num_cross_browser / num_uids

        print 'Cross_browser unique', num_unique / num_distinct
        print num_unique, num_distinct

    return int(num_uids), "{:3.1f}%".format(
        num_cross_browser / num_uids * 100), "{:3.1f}%".format(
            num_unique / num_distinct * 100)
Ejemplo n.º 23
0
def fingerprint_function(url):
    f = Fingerprint(kgram_len=4, window_len=1, base=10, modulo=1000)
    return f.generate(str=url)
Ejemplo n.º 24
0
 def get_fingerprint(self, fingerprint_id):
     return Fingerprint(
         self.collection.find({"_id": ObjectId(fingerprint_id)})[0])
Ejemplo n.º 25
0
    print("Create Template -> " + str(f.createTemplate()))
    print("Store Template  -> " + str(f.storeTemplate()))
    print(f.getTemplateCount())


def limpa_db(self):
    f = Fingerprint()
    print("Depois " + str(f.getTemplateCount()))
    f.limpa_bd()
    print("Antes " + str(f.getTemplateCount()))


def enroll(self):
    pass


f = Fingerprint()
resposta = int(
    input(
        "1 - Registra_digital\n2 - Passa digital\n3 - Limpa bd\n4 - Dump API"))
if (resposta == 1):
    f.registra_digital()
elif (resposta == 2):
    f.valida_digital()
elif (resposta == 3):
    f.limpa_bd()
elif (resposta == 4):
    f.dump_bd()
else:
    print("dunga burro aperta direito")