Beispiel #1
0
def get_feature_res(cursor, feature, extra_selector=""):
    cursor.execute("SELECT DISTINCT(user_id) from {}".format(table_name))

    cb_total = 0.0
    num_vals = 0.0
    cb_count = 0.0
    fp_to_count_cross = {}
    fp_to_count_single = {}
    data = cursor.fetchall()

    for user_id, in data:
        cb_prints = []
        cursor.execute("SELECT image_id from {} where user_id='{}' {}".format(
            table_name, user_id, extra_selector))
        ids = [x for x, in cursor.fetchall()]
        for image_id in ids:
            cb_prints.append(
                Fingerprint(cursor, image_id, table_name,
                            Fingerprint_Type.CROSS, feature))
            single_fp = Fingerprint(cursor, image_id, table_name,
                                    Fingerprint_Type.SINGLE, feature)
            if single_fp in fp_to_count_single:
                fp_to_count_single[single_fp] += 1
            else:
                fp_to_count_single.update({single_fp: 1})

        if len(ids) > 1:
            cb_total += 1.0
            if is_all_same(cb_prints):
                cb_count += 1.0
                fp = cb_prints[0]
                if fp in fp_to_count_cross:
                    fp_to_count_cross[fp] += 1
                else:
                    fp_to_count_cross.update({fp: 1})

    cb_distinct = float(len(fp_to_count_cross))
    cb_unique = 0.0
    for _, count in fp_to_count_cross.items():
        if count == 1:
            cb_unique += 1.0

    single_distinct = float(len(fp_to_count_single))
    single_unique = 0.0
    for _, count in fp_to_count_single.items():
        if count == 1:
            single_unique += 1.0
    cb_total = max(cb_total, 1.0)
    single_distinct = max(single_distinct, 1.0)
    cb_distinct = max(cb_distinct, 1.0)
    frmt = "{:3.1f}%"
    return frmt.format(single_unique / single_distinct * 100), frmt.format(
        cb_count / cb_total * 100), frmt.format(cb_unique / cb_distinct * 100)
Beispiel #2
0
  def __cross_helper(self, b1, b2, cursor, table_name, attrs, extra_selector):
    cursor.execute("SELECT user_id FROM {} WHERE browser='{}' {}".format(table_name, b1, extra_selector))
    tuids = [uid for uid, in cursor.fetchall()]

    uids = []
    for uid in tuids:
      cursor.execute("SELECT user_id FROM {} WHERE user_id='{}' AND browser='{}' {}".format(table_name, uid, b2, extra_selector))
      for uid, in cursor.fetchall():
        uids.append(uid)

    if len(uids) is 0:
        return None

    fp_to_count = {}
    num_cross_browser = 0.0

    for uid in uids:
      cursor.execute("SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".format(table_name, b1, uid))
      image1_id = cursor.fetchone()[0]

      cursor.execute("SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".format(table_name, b2, uid))
      image2_id = cursor.fetchone()[0]

      fp_1 = Fingerprint(cursor, image1_id, table_name, Fingerprint_Type.CROSS, attrs, b2)
      fp_2 = Fingerprint(cursor, image2_id, table_name, Fingerprint_Type.CROSS, attrs, b1)

      if fp_1 == fp_2:
        num_cross_browser += 1
        if fp_1 in fp_to_count:
          fp_to_count[fp_1] += 1
        else:
          fp_to_count.update(
            {
              fp_1: 1
            }
          )

    entropy = 0.0
    num_distinct = max(float(len(fp_to_count)), 1.0)
    num_unique = 0.0
    for _, count in fp_to_count.items():
      if count == 1:
        num_unique += 1.0
      
      P = float(count) / float(num_cross_browser) 
      entropy -= P * math.log(P, 2)

    num_uids = max(float(len(uids)), 1.0)
    num_cross_browser = max(num_cross_browser, 1.0)

    return int(num_uids), num_cross_browser/num_uids, num_unique/num_cross_browser, entropy, num_cross_browser
    def get_fingerprints_countermeasure(self, countermeasure):
        fps = self.collection.find({'countermeasure': countermeasure})
        fp_objects = []
        for fingerprint in fps:
            fp_objects.append(Fingerprint(fingerprint))

        return fp_objects
Beispiel #4
0
def get_fingerprints_experiments(
        cur,
        min_nb_fingerprints,
        attributes,
        id_file="./data/consistent_extension_ids.csv"):
    """
        Returns a list of the fingerprints to use for the experiment
        We get only fingerprints whose associated user has at least
        min_nb_fingerprints and who have no inconsistency
    """
    with open(id_file, "r") as f:
        # we jump header
        f.readline()
        ids_query = []

        for line in f.readlines():
            ids_query.append("'" + line.replace("\n", "") + "'")

        ids_query = ",".join(ids_query)
        cur.execute(
            "SELECT *, NULL as canvasJS FROM extensionDataScheme WHERE \
                    id in (" + ids_query + ") and \
                    id in (SELECT id FROM extensionDataScheme GROUP BY \
                    id having count(*) > " + str(min_nb_fingerprints) + ")\
                    ORDER by counter ASC")
        fps = cur.fetchall()
        fp_set = []
        for fp in fps:
            try:
                fp_set.append(Fingerprint(attributes, fp))
            except Exception as e:
                print(e)

        return fp_set
Beispiel #5
0
    def run(self):
        if len(self.fp_vectors) == 0:
            return
        self.timer.start()
        while True:
            with self.lock:
                if self.die is True:
                    return
            try:
                msg = self.input_queue.get(timeout=1)
            except:
                continue

            with self.lock:
                for vector in self.fp_map:
                    if Fingerprint.cmp_fv(
                            msg, vector[0],
                            self.fingerprint['features']) is True:
                        # check if vector already matched
                        if vector[1] is None:
                            vector[1] = msg
                            vector[2] = self.slice
                            self.matched_vectors += 1

            self.input_queue.task_done()
    def get_all_fingerprints(self):
        fps = self.collection.find()
        fp_objects = []
        for fingerprint in fps:
            fp_objects.append(Fingerprint(fingerprint))

        return fp_objects
Beispiel #7
0
def create_fingerprints(peaks, fan_value=15):
    """
    Create fingerprints for all the peaks.
    fingerprint = hash:time
    hash        = (f1, f2, t2 - t1)
    time        = t1
    """
    prints = []
    peaks = list(peaks)
    for i in range(len(peaks)):
        for j in range(1, fan_value):
            if (i + j) < len(peaks):
                f1 = peaks[i][0]
                f2 = peaks[i + j][0]
                t1 = peaks[i][1]
                t2 = peaks[i + j][1]
                t_delta = t2 - t1

                # Hashes must be within 200s of each other
                if t_delta >= 0 and t_delta <= 200:
                    h = '{},{},{}'.format(f1, f2, t_delta)
                    p = Fingerprint(h, t1)
                    prints.append(p)

    return list(set(prints))
Beispiel #8
0
    def detect(self, X, y=None, threshold=None):
        """Predict whether samples of X are anomalous or not.

            Parameters
            ----------
            X : np.array of shape=(n_samples,)
                Flows for fitting FlowPrint.

            y : Ignored

            threshold : float, default=None
                Minimum required threshold to consider point benign.
                If None is given, use FlowPrint default

            Returns
            -------
            result : np.array of shape=(n_samples,)
                Prediction of samples in X: +1 if benign, -1 if anomalous.
            """
        # Get best match for each fingerprint
        prediction = self.predict(X, default=Fingerprint())
        # Compute match score between each best match
        prediction = np.asarray(
            [x.compare(fp) for x, fp in zip(X, prediction)])
        # Return whether matching score is high enough
        return (prediction >= (threshold or self.threshold)) * 2 - 1
Beispiel #9
0
  def __single_helper(self, b, cursor, table_name, attrs, extra_selector):
    cursor.execute("SELECT image_id FROM {} WHERE browser='{}' {}".format(table_name, b, extra_selector))
    image_ids = [uid for uid, in cursor.fetchall()]

    if len(image_ids) is 0:
      return None

    fp_to_count = {}
    for uid in image_ids:
      fp = Fingerprint(cursor, uid, table_name, Fingerprint_Type.SINGLE, attrs)
      if fp in fp_to_count:
        fp_to_count[fp] += 1
      else:
        fp_to_count.update(
          {
            fp : 1
          }
        )

    num_distinct = max(float(len(fp_to_count)), 1.0)
    num_unique = 0.0
    for _, count in fp_to_count.items():
      if count == 1:
        num_unique += 1.0
    num_uids = max(len(image_ids), 1.0)

    return int(num_uids), num_unique/num_uids
Beispiel #10
0
    def merge_fingerprints(self, fingerprints, threshold=1):
        """Merge fingerprints based on similarity.

            Parameters
            ----------
            fingerprints : list
                List of fingerprints to merge.

            Returns
            -------
            result : list
                Merged fingerprints
            """
        ####################################################################
        #           Case default: all fingerprints are different           #
        ####################################################################
        result = np.asarray(fingerprints)

        # Retrieve unique fingerprints
        unique = sorted(set(fingerprints))

        ####################################################################
        #                Case 1: all fingerprints are equal                #
        ####################################################################
        if threshold <= 0:
            # Create one big merged fingerprint out of all unique fingerprints
            result[:] = Fingerprint(set().union(*unique))

        ####################################################################
        #         Case 2: Merge fingerprints by 0 < threshold < 1          #
        ####################################################################
        elif threshold < 1:
            # Initialise fingerprinting pairs to merge
            pairs = set([
                # Define pairs
                (fp1, fp2)
                # For each combination of pairs
                for fp1, fp2 in self.score_combinations(unique, threshold)
                # Where similarity >= threshold
                if fp1.compare(fp2) >= threshold
            ])

            # Create mapping of original fingerprint -> merged fingerprint
            mapping = dict()
            # Loop over all fingerprints to be merged
            for fp1, fp2 in pairs:
                # Create merged fingerprint
                fp_merged = mapping.get(fp1, fp1).merge(mapping.get(fp2, fp2))
                # Set mappings
                mapping[fp1] = fp_merged
                mapping[fp2] = fp_merged

            # Apply mapping
            result = np.array([mapping.get(fp, fp) for fp in fingerprints])

        ####################################################################
        #                    Return merged fingerprints                    #
        ####################################################################
        return result
Beispiel #11
0
    def load(self, *files, store=True, parameters=False):
        """Load fingerprints from files.

            Parameters
            ----------
            file : string
                Files from which to load fingerprints.

            store : boolean, default=True
                If True, store fingerprints in FlowPrint object

            parameters : boolean, default=False
                If True, also update FlowPrint parameters from file

            Returns
            -------
            result : dict of Fingerprint -> label
                Fingerprints imported from file.
            """
        # Initialise fingerprints
        fingerprints = dict()

        # Loop over all files
        for file in files:
            # Open input file
            with open(file, 'r') as infile:
                # Load fingerprints
                data = json.load(infile)

                # Store parameters if necessary
                if parameters:
                    self.batch = data.get('batch', self.batch)
                    self.window = data.get('window', self.window)
                    self.correlation = data.get('correlation',
                                                self.correlation)
                    self.similarity = data.get('similarity', self.similarity)
                    self.threshold = data.get('threshold', self.threshold)

                # Add fingerprints
                for fp, label in data.get('fingerprints'):
                    # Transform json to Fingerprint
                    fp = Fingerprint().from_dict(fp)
                    # Get label
                    label = fingerprints.get(fp, set()) | set([label])
                    # Set fingerprint
                    fingerprints[fp] = label

        # Store fingerprints if necessary
        if store:
            for k, v in fingerprints.items():
                self.fingerprints[k] = self.fingerprints.get(k, set()) | v

        # Return fingerprints
        return fingerprints
Beispiel #12
0
def check(origin, plagiarized):
    with open(origin, "r") as file:
        origin = file.read()

    with open(plagiarized, "r") as file:
        plagiarism = file.read()

    text_length = min(len(origin.split()), len(plagiarism.split()))

    if text_length < 60:
        raise NotImplementedError("Compare texts with at least 60 words.")

    window = max(text_length // 21, 3)
    kgram = window - 1
    base = 11 if text_length < 250 else 23 if text_length < 600 else 101
    modulo = max(round(text_length * 5, -3), 1000)

    fprint = Fingerprint(kgram_len=kgram, window_len=window, base=base, modulo=modulo)

    first = fprint.generate(str=origin)
    second = fprint.generate(str=plagiarism)

    similar = [
        x
        for x in first
        if x in second
    ]

    similar_grams = Counter([
        element[0]
        for element in first
        for sec in second
        if sec[0] == element[0]
    ])

    print("Identical substring hashes:")
    pprint(similar)
    print("\nIdentical grams:")
    pprint(similar_grams)
def carrega_txt():
    f = Fingerprint()
    file = open("teste_1.txt", 'r')
    list_content = file.read().strip().split("|")
    list_valid = []

    for item in list_content:
        if item.strip():
            try:
                list_valid.append(int(item))
            except ValueError:
                pass

    print(list_valid)

    f.uploadCharacteristics(0x01, list_valid)
    f.uploadCharacteristics(0x02, list_valid)

    print(f.getTemplateCount())
    print("Create Template -> " + str(f.createTemplate()))
    print("Store Template  -> " + str(f.storeTemplate()))
    print(f.getTemplateCount())
def get_identity():
    window = Window("Roblox", "https://www.roblox.com/account/signupredir")
    fp = Fingerprint(
        user_agent=user_agent,
        protochain_hash="5d76839801bc5904a4f12f1731a7b6d1",
        sec_fetch=True,
        content_type_value="application/x-www-form-urlencoded; charset=UTF-8",
        accept_language_value="en-US,en;q=0.9",
        jsbd_gen=lambda w: dict(HL=random.randint(1, 5),
                                NCE=True,
                                DT=w.title,
                                NWD="undefined",
                                DA=None,
                                DR=None,
                                DMT=random.randint(1, 40),
                                DO=None,
                                DOT=random.randint(30, 50)),
        DNT="unknown",
        L="en-US",
        D=24,
        PR=1,
        S="1920,1080",
        AS="1920,1040",
        SS=True,
        LS=True,
        IDB=True,
        B=False,
        ODB=True,
        CPUC="unknown",
        PK="Win32",
        JSF=
        "Arial,Arial Black,Arial Narrow,Book Antiqua,Bookman Old Style,Calibri,Cambria,Cambria Math,Century,Century Gothic,Century Schoolbook,Comic Sans MS,Consolas,Courier,Courier New,Garamond,Georgia,Helvetica,Impact,Lucida Bright,Lucida Calligraphy,Lucida Console,Lucida Fax,Lucida Handwriting,Lucida Sans,Lucida Sans Typewriter,Lucida Sans Unicode,Microsoft Sans Serif,Monotype Corsiva,MS Gothic,MS PGothic,MS Reference Sans Serif,MS Sans Serif,MS Serif,Palatino Linotype,Segoe Print,Segoe Script,Segoe UI,Segoe UI Light,Segoe UI Semibold,Segoe UI Symbol,Tahoma,Times,Times New Roman,Trebuchet MS,Verdana,Wingdings,Wingdings 2,Wingdings 3",
        P="Chrome PDF Plugin,Chrome PDF Viewer,Native Client",
        T="0,false,false",
        H="8",
        SWF=False)
    return fp, window
class Worker(object):
    def __init__(self, db):
        self.fgp_db = db
        self.fgp_api = Fingerprint()

    def mic_recognize(self, limit=None):
        if limit is None:
            limit = 10

        print('Microphone listening for: {} seconds'.format(limit))
        self.mic = AudioHelper()
        result = set()

        mic_data = self.mic.recognize(limit=limit)
        for num_channels, channel in enumerate(mic_data):
            hashes = self.fgp_api.fingerprint(channel,
                                              frame_rate=self.mic.samplerate,
                                              verbose=True,
                                              plot=True)

            result |= set(hashes)
        return result

    def fingerprint_worker(self,
                           file_path,
                           limit=None,
                           grid_only=False,
                           verbose=False,
                           plot=False):
        #st = time.time()
        song_name, extension = os.path.splitext(file_path)
        # print('Fingerprinting: ', song_name, '\nFile extension: ', extension)

        # using different extraction method for mp3
        if extension is '.mp3' or '.mpeg':
            # print(file_path)
            num_channels, frame_rate, audio_data = hlp.retrieve_audio_mpeg(
                file_path, limit)
        else:
            num_channels, frame_rate, audio_data = hlp.retrieve_audio(
                file_path, limit)
        #print('from fingerprint worker\n frame rate {}, data {}'.format(frame_rate, channels))
        result = set()

        for num_channels, channel in enumerate(audio_data):
            # print('Channel number:', num_channels+1)
            hashes = self.fgp_api.fingerprint(channel,
                                              frame_rate=frame_rate,
                                              verbose=verbose,
                                              plot=plot)

            if grid_only:
                return self.fgp_api.fingerprint(channel,
                                                frame_rate=frame_rate,
                                                grid_only=grid_only,
                                                plot=plot)

            result |= set(hashes)

        #ft = time.time() - st
        #print('Elapsed fingerprinting time: ', ft)
        #print('Generated {} hashes'.format(len(result)))
        return song_name, result

    def insert_wav_to_db(self, song_n):
        #db.connect()
        song_name, list_hash = self.fingerprint_worker(song_n, limit=None)

        print('Song name: ', song_name)
        print('Number of generated hashes: ', len(list_hash))

        self.fgp_db.insert_song(song_name, 1)

        for h in list_hash:
            self.fgp_db.insert_fingerprint(h[0], song_name, h[1])

    def get_max_track_frequency(self, list_tracks):
        """Interates through a list of tuples (track, frequency of track) and returns the maximum value"""
        max_t_frequ = 0
        for t in list_tracks.keys():
            if list_tracks[t] > max_t_frequ:
                max_t_frequ = list_tracks[t]
        return max_t_frequ

    def align_matches_weighted(self, list_matches):
        candidates = dict()

        for tup in list_matches:
            track_name, time_delta = tup

            if time_delta not in candidates:
                candidates[time_delta] = dict()
            if track_name not in candidates[time_delta]:
                candidates[time_delta][track_name] = 1
            else:
                candidates[time_delta][track_name] += 1

        weighted_candidates = []
        # each candidate is a tuple of (weight, (k,v))
        # default weight = 1
        # formula    = (e ^ -(|time_delta|)) + max time delta value over a candidate list
        for k, v in candidates.items():
            cand_weight = float(math.e**(-abs(k))) * 1000
            max_t_freq = self.get_max_track_frequency(v)
            cand_tup = (cand_weight + max_t_freq, k, v)

            weighted_candidates.append(cand_tup)

        weighted_candidates = sorted(weighted_candidates,
                                     key=lambda weight: weight[0])
        res = [elem for elem in weighted_candidates if elem[0] > 100.0]

        # escape case where list of candidates is empty
        if len(res) == 0:
            return {
                'song id': 0,
                'song name': 'No results found',
                'is fingerprinted': 0
            }, candidates, res

        prime_candidate = res[-1]
        prime_weight = prime_candidate[0]
        max_count = 0
        query_track = ''

        # query the track with most hits
        for k, v in prime_candidate[2].items():
            if v > max_count:
                max_count = v
                query_track = k

        query_hit, id, name, is_fng = self.fgp_db.get_song_by_name(query_track)

        # cut-off weight for candidates
        CUT_OFF_WEIGHT_1 = 368.87944117144235
        CUT_OFF_WEIGHT_2 = 1010
        if prime_weight <= CUT_OFF_WEIGHT_2 and max_count <= 10:
            track = {
                'song id': 0,
                'song name': 'No results found',
                'is fingerprinted': 0,
            }
            return track, candidates, res

        track = {
            'song id': id,
            'song name': name,
            'is fingerprinted': int(is_fng),
        }

        return track, candidates, res

    def fingerprint_songs(self, user_path='', num_tracks=None):
        dir_structure = self.build_dir_map(user_path)

        # get fingerprinted files
        number_fgp, already_fingerprinted = self.get_wavs_by_fgp(1)
        #print(already_fingerprinted)
        #print('Number of fingerprints=', number_fgp)

        song_counter = 0

        # go through each file in the directory
        for file in dir_structure.keys():
            # don't re-fingerprint files
            if file in already_fingerprinted:
                print('Skipping: {}'.format(file))
                continue

            if song_counter == num_tracks:
                print('Added {} tracks to database.'.format(song_counter))
                self.fgp_db.connection.close()
                return

            # path of dir + actual file
            path = dir_structure[file] + '\\' + file

            # avoid invalid extensions
            _pth, ext = os.path.splitext(path)
            if ext not in VALID_EXT:
                continue

            # insert song returns true if it managed, false otherwise
            res = self.fgp_db.insert_song(file, 1)
            if res:
                song_counter += 1

                # generate and insert hashes
                _, list_hashes = self.fingerprint_worker(path)
                formatted_list = []
                for h in list_hashes:
                    #     db.insert_fingerprint(h[0], file, h[1])
                    formatted_list.append((h[0], file, h[1]))
                res = self.fgp_db.dump_fingerprints(formatted_list)

                # stop everything in case of failure
                if not res:
                    self.fgp_db.delete_songs([file])
                    print('Fingerprinting failed for: {}'.format([file]))
                    return
            else:
                print('Fingerprinting skipped')
                continue

        print('Number of wavs: ', song_counter)

    def get_wavs_by_fgp(self, is_fgp=0):
        res = list(self.fgp_db.get_songs_by_fgp_status(is_fgp))

        clean_list = []
        for elem in res:
            temp = str(elem)[2:-3]
            clean_list.append(temp)
        # print(clean_list)

        number_of_tracks = len(clean_list)
        return number_of_tracks, clean_list

######################################################################
#
# GRIDHASH ALGORITHM
#
######################################################################

##### DIRECTORY STRUCTURE METHODS #####

    def _get_dir_structure(self, dir_path):
        """Returns all files from a specified directory"""
        files = []

        for (dirpath, dirname, filenames) in os.walk(dir_path):
            files.append([dirpath, filenames])

        return files

    def has_valid_extension(self, path_to_file):
        """Checks if file extension is valid
        Valid extensions: '.wav', '.ogg', '.mp3', '.flac', '.grid', '.mpeg'
        """
        path, ext = os.path.splitext(path_to_file)
        if ext in VALID_EXT:
            return True
        return False

    def build_dir_map(self, root):
        """creates a dictionary directory structure.
        It maps files to their relative path.

        file.wav -> c//dir/dir2/dir_with_wavs

        Attributes:
            root - where to start looking

        Return:
            map  - dictionary structure
        """
        dir_struct = self._get_dir_structure(root)
        map = dict()

        for tup in dir_struct:
            current_directory = tup[0]
            files_in_dir = tup[1]

            for f in files_in_dir:
                path = os.path.join(current_directory, f)
                # add key if not already in dict and if file has a valid extension
                if f not in map and self.has_valid_extension(path):
                    map[f] = current_directory

        return map

    ##### IO METHODS #####

    def export_file(self, file_name, data, dest_dir=''):
        """Stores gridHash file to specified location

        Attributes:
            file_name - name of file
            data      - information to package to the file
            dest_dir  - file path
        """
        name = file_name[:-4] + CUSTOM_EXT
        path = os.path.join(dest_dir, name)

        with open(path, mode='wb') as f:
            try:
                min_data = self.get_minHash(data)
                pickle.dump(min_data, f)
                f.close()
                print('Exported: {}'.format(name))
                return True
            except:
                print('Export failed: {}'.format(name))
                return False

    def load_grid(self, file_name, local_dir=''):
        """Loads gridHash file from specified location.

        Attributes:
            file_name - name of file to load
            local_dir - load path

        Return:
            data - retrieved information
        """
        path = os.path.join(local_dir, file_name)
        filename, ext = os.path.splitext(path)

        if ext != CUSTOM_EXT:
            path = path[:-len(ext)] + CUSTOM_EXT

        with open(path, 'rb') as f:
            data = pickle.load(f)

        return data

    ##### minHash generators ######

    def get_minHash(self, input_set):
        """Generates minHash object from input set
        Attributes:
            input_set - list of strings to minHash

        Returns:
            minHash object
        """
        min_h = MinHash()

        for itm in input_set:
            min_h.update(itm.encode('utf8'))

        return min_h

    def export_many(self, files_in, files_out, limit=0):
        """Exports multiple gridHash objects"""
        # initialize counter for files to be indexed
        counter = 0
        # build directory maps
        dir_map = self.build_dir_map(files_in)
        indexed = self.build_dir_map(files_out)

        # if no number of files is specified, process all files
        if limit == 0:
            limit = len(dir_map.keys())

        print(
            'Info:\n', 'There are {} available audio files.\n'.format(
                len(dir_map.keys())),
            'There are {} available gridHash files.\n'.format(
                len(indexed.keys())))

        # go file by file
        for tr in dir_map.keys():
            if counter < limit:
                # check if the file has not already been exported
                pre = tr[:-4] + CUSTOM_EXT

                if pre not in indexed.keys():
                    _path = os.path.join(dir_map[tr], tr)

                    # ensure a valid extension
                    if self.has_valid_extension(_path):
                        set_data = self.fingerprint_worker(_path,
                                                           grid_only=True,
                                                           plot=False)
                        #print(tr, set_data)

                        # generate gridhash
                        res = self.export_file(tr,
                                               set_data,
                                               dest_dir=files_out)

                        if res:
                            counter += 1
                        else:
                            return
                else:
                    print('Skipping: {} file already exists'.format(tr))

        print('Exported {} grids'.format(counter))

    def compute_jaccard(self, s1, s2, grid_folder):
        """Computes jaccard distance between two gridHash files"""
        dir_map = self.build_dir_map(grid_folder)

        c1 = None
        c2 = None

        for itm in dir_map.keys():
            if itm == s1:
                c1 = self.load_grid(itm, local_dir=grid_folder)
            if itm == s2:
                c2 = self.load_grid(itm, local_dir=grid_folder)

        sim = c1.jaccard(c2)
        return sim
Beispiel #16
0
from fingerprint import Fingerprint

fp = Fingerprint()
fp.clear_database()

Beispiel #17
0
    def assign_nearest(self, X, y):
        """Set unassigned labels to that of nearest neighbours.

            Parameters
            ----------
            X : np.array of shape=(n_flows,)
                Array of original flows.

            y : np.array of shape=(n_flows,) and dtype=int
                Array of fingerprints.

            Returns
            -------
            result : np.array of shape=(n_flows,) and dtype=int
                Array of Fingerprints. Without any -1 labels.
            """
        ####################################################################
        #             Sort flows and fingerprints by timestamp             #
        ####################################################################

        # Sort flows by time
        sort_time = np.argsort(X)
        sort_orig = np.argsort(sort_time)

        # Sort by time
        X = X[sort_time]
        y = y[sort_time]
        # Get timestamps
        timestamps = np.asarray([x.time_start for x in X])

        ####################################################################
        #               Assign closest fingerprints in time                #
        ####################################################################

        # Get blocks of unassigned fingerprint indices
        blocks = list()
        block = list()
        for i, fingerprint in enumerate(y):
            if fingerprint and block:
                blocks.append(np.asarray(block))
                block = list()
            elif not fingerprint:
                block.append(i)
        if block:
            blocks.append(np.asarray(block))

        # For each block of unassigned fingerprints compute new labels
        for block in blocks:
            # Get indices before and after block
            before = min(block) - 1
            after = max(block) + 1
            # Get timestamps before and after block
            ts_before = X[before].time_start if before >= 0 else float('inf')
            ts_after = X[after].time_start if after < X.shape[0] else float(
                'inf')
            # Get fingerprints before and after block
            fp_before = y[before] if before >= 0 else Fingerprint()
            fp_after = y[after] if after < X.shape[0] else Fingerprint()

            # Assign new fingerprints per block
            block_before = abs(timestamps[block] - ts_before) <\
                           abs(timestamps[block] - ts_after )
            y[block[block_before]] = fp_before
            y[block[~block_before]] = fp_after

        # Return fingerprints in original order
        return y[sort_orig]
 def __init__(self, db):
     self.fgp_db = db
     self.fgp_api = Fingerprint()
# -*- coding: utf-8 -*-
"""The main module finding similarity ratio between two strings."""

from fingerprint import Fingerprint
from fingerprint.fingerprint import FingerprintException

FINGERPRINT = Fingerprint(kgram_len=4, window_len=3, base=101, modulo=256)


def find_similarity_ratio(f_string: str, s_string: str) -> float:
    """
    Take two strings and find similarity between them using \
    Rabin fingerprint and winnowing by Stanford.

    Args:
         `f_string`: first string.\n
         `s_string`: second string.
    Returns:
        `float`: the similarity ratio between two strings.
    """
    try:
        f_string_fingerprint = FINGERPRINT.generate(str=f_string)
        s_string_fingerprint = FINGERPRINT.generate(str=s_string)
    except (FingerprintException, IndexError):
        return 0
    f_string_only_hashes = [element[0] for element in f_string_fingerprint]
    s_string_only_hashes = [element[0] for element in s_string_fingerprint]
    common_hashes = set(f_string_only_hashes).intersection(
        set(s_string_only_hashes))
    minimal_length_of_string_hashes = len(
        min(f_string_only_hashes, s_string_only_hashes, key=len))
Beispiel #20
0
    def _fit_single_batch_(self, X, y=None):
        """Create fingerprints for a given batch of flows.

            Parameters
            ----------
            X : array-like of shape=(n_samples_batch,)
                Samples (Flow objects) from which to generate fingerprints.

            y : array-like of shape=(n_samples_batch,), optional
                Labels corresponding to X. If given, they will be encorporated
                into each fingerprint.

            Returns
            -------
            np.array of shape=(n_samples,)
                Resulting fingerprints corresponding to each flow.
            """
        ####################################################################
        #                       Create fingerprints                        #
        ####################################################################

        # Create clustering instance
        cluster = Cluster()
        # Cluster flows into network destinations
        cluster.fit(X, y)

        # Find cliques in clusters
        cliques = CrossCorrelationGraph(
            window=self.window,  # Set window size
            correlation=self.correlation  # Set correlation threshold
        ).fit_predict(cluster)  # Get cliques

        # Transform cliques to fingerprints
        fingerprints = list(
            Fingerprint(c)  # Cast to fingerprint
            for c in cliques if len(c) > 1  # Only select cliques > 1
        )

        ####################################################################
        #                   Assign fingerprints per flow                   #
        ####################################################################

        # Get network destination per flow
        destinations = cluster.predict(X)  # Get destination id per flow
        translation = cluster.cluster_dict()  # Get destinations for each id
        destinations = [translation.get(d) for d in destinations]

        # Get fingerprint per network destination
        mapping_fingerprints = dict()
        # Map destination to largest fingerprint by (#destinations, #flows)
        for fingerprint in sorted(fingerprints):
            for destination in fingerprint:
                mapping_fingerprints[destination] = fingerprint

        # Apply mapping
        prediction = np.array([
            mapping_fingerprints.get(
                x.destination,
                mapping_fingerprints.get(x.certificate, Fingerprint()))
            for x in X
        ])

        ####################################################################
        #             Handle unknown and similar fingerprints              #
        ####################################################################

        # For unknown results assign nearest neighbour
        prediction = self.assign_nearest(X, prediction)
        # Merge similar fingerprints
        prediction = self.merge_fingerprints(prediction, self.similarity)

        # Return prediction
        return prediction
Beispiel #21
0
def getRes(b1,
           b2,
           cursor,
           quiet,
           attrs="hashes, langs",
           extra_selector="",
           fp_type=Fingerprint_Type.CROSS):
    if not quiet:
        print 'extra_selector="{}"'.format(extra_selector)
    global mask
    tuids = []
    uids = []
    cursor.execute("SELECT COUNT(DISTINCT(ip)) FROM {}".format(table_name))
    if not quiet:
        print 'ip', cursor.fetchone()[0]
    cursor.execute(
        "SELECT COUNT(DISTINCT(user_id)) FROM {}".format(table_name))
    if not quiet:
        print 'user', cursor.fetchone()[0]

    #cursor.execute("SELECT user_id FROM {} WHERE browser='{}'".format(table_name, b1))
    cursor.execute("SELECT user_id FROM {} WHERE browser='{}' {}".format(
        table_name, b1, extra_selector))
    for uid, in cursor.fetchall():
        tuids.append(uid)

    if not quiet:
        print b1, len(tuids)

    for uid in tuids:
        #cursor.execute("SELECT user_id FROM {} WHERE user_id='{}' AND browser='{}'".format(table_name, uid, b2))
        cursor.execute(
            "SELECT user_id FROM {} WHERE user_id='{}' AND browser='{}' {}".
            format(table_name, uid, b2, extra_selector))
        for uid, in cursor.fetchall():
            uids.append(uid)

    if not quiet:
        print b1, 'and', b2, len(uids)

    if len(uids) is 0:
        return None

    #uids is the list of users uses both b1 and b2
    hash_all = {}
    hash_long = []
    fp_to_count = {}
    hash_all_unique = {}
    stability = {}
    diff = {}
    index = []
    uid_stability = {}

    for uid in uids:
        #cursor.execute("SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".format(table_name, b1, uid))
        cursor.execute(
            "SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".
            format(table_name, b1, uid))
        image1_id = cursor.fetchone()[0]
        #cursor.execute("SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".format(table_name, b2, uid))
        cursor.execute(
            "SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".
            format(table_name, b2, uid))
        image2_id = cursor.fetchone()[0]

        fp_1 = Fingerprint(cursor, image1_id, table_name, fp_type, attrs)
        fp_2 = Fingerprint(cursor, image2_id, table_name, fp_type, attrs)

        try:
            if quiet:
                _, opps = None
            cursor.execute("SELECT fonts FROM {} WHERE image_id='{}'".format(
                table_name, image1_id))

            hashes_1 = list(cursor.fetchone()[0])

            cursor.execute("SELECT fonts FROM {} WHERE image_id='{}'".format(
                table_name, image2_id))
            hashes_2 = list(cursor.fetchone()[0])

            if mask is None:
                mask = [1 for _ in range(len(hashes_1))]

            if len(hashes_1) == len(hashes_2):
                s1 = ""
                s2 = ""

                uid_stability.update({uid: []})
                for i in range(len(hashes_1)):

                    if i not in hash_all:
                        hash_all.update({i: []})
                    if i not in hash_all_unique:
                        hash_all_unique.update({i: Set()})
                    if i not in diff:
                        diff.update({i: 0.0})

                    hash1_val = hashes_1[i]
                    hash2_val = hashes_2[i]

                    s1 += hash1_val
                    s2 += hash2_val

                    #if hash1_val == hash2_val and (hash1_val not in hash_all[i]):
                    if hash1_val == hash2_val:
                        hash_all[i].append(hash1_val)
                        hash_all_unique[i].add(hash1_val)
                    else:
                        diff[i] += 1.0 / len(uids)
                        uid_stability[uid].append([hash1_val, hash2_val])
        except:
            pass
        if fp_1 == fp_2:
            #else:
            #    print 'found: ' + str(uid) + '%' + str(uids[hash_long.index(s1)])
            hash_long.append(fp_1)
            index.append(uid)
            if fp_1 in fp_to_count:
                fp_to_count[fp_1] += 1
            else:
                fp_to_count.update({fp_1: 1})

        #else:
        #    print 'not same: ' + str(uid)
    #for i in range(case_number):
    #    print i, diff[i]

    for i, d in diff.items():
        if d > 0.0:
            mask[i] = 0

    num_distinct = float(len(fp_to_count))
    num_unique = 0.0
    for _, count in fp_to_count.items():
        if count == 1:
            num_unique += 1.0
    num_cross_browser = float(len(hash_long))
    num_uids = float(len(uids))

    if not quiet:
        for i, d in diff.items():
            print "{}: instability: {}".format(i, d)
        for u, s in uid_stability.items():
            print "{}: {}".format(u, s)

        print 'Cross_browser', num_cross_browser
        print 'Cross_browser rate', num_cross_browser / num_uids

        print 'Cross_browser unique', num_unique / num_distinct
        print num_unique, num_distinct

    return int(num_uids), "{:3.1f}%".format(
        num_cross_browser / num_uids * 100), "{:3.1f}%".format(
            num_unique / num_distinct * 100)
    print("Create Template -> " + str(f.createTemplate()))
    print("Store Template  -> " + str(f.storeTemplate()))
    print(f.getTemplateCount())


def limpa_db(self):
    f = Fingerprint()
    print("Depois " + str(f.getTemplateCount()))
    f.limpa_bd()
    print("Antes " + str(f.getTemplateCount()))


def enroll(self):
    pass


f = Fingerprint()
resposta = int(
    input(
        "1 - Registra_digital\n2 - Passa digital\n3 - Limpa bd\n4 - Dump API"))
if (resposta == 1):
    f.registra_digital()
elif (resposta == 2):
    f.valida_digital()
elif (resposta == 3):
    f.limpa_bd()
elif (resposta == 4):
    f.dump_bd()
else:
    print("dunga burro aperta direito")
Beispiel #23
0
    def __getRes(self,
                 b1,
                 b2,
                 cursor,
                 quiet,
                 rate,
                 table_name,
                 attrs="",
                 extra_selector=""):
        if not quiet:
            print('extra_selector="{}"'.format(extra_selector))

        tuids = []
        uids = []

        cursor.execute("SELECT user_id FROM {} WHERE browser='{}' {}".format(
            table_name, b1, extra_selector))
        for uid, in cursor.fetchall():
            tuids.append(uid)

        if not quiet:
            print(b1, len(tuids))

        for uid in tuids:
            cursor.execute(
                "SELECT user_id FROM {} WHERE user_id='{}' AND browser='{}' {}"
                .format(table_name, uid, b2, extra_selector))
            for uid, in cursor.fetchall():
                uids.append(uid)

        if not quiet:
            print(b1, 'and', b2, len(uids))

        #uids is the list of users uses both b1 and b2
        hash_all = {}
        hash_long = []
        fp_to_count = {}
        hash_all_unique = {}
        index = []
        uid_stability = {}
        instability = {}
        mask = [1 for _ in range(28)]

        if len(uids) == 0:
            return 0, mask

        for uid in uids:
            cursor.execute(
                "SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".
                format(table_name, b1, uid))
            image1_id = cursor.fetchone()[0]

            cursor.execute(
                "SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".
                format(table_name, b2, uid))
            image2_id = cursor.fetchone()[0]

            try:
                # Feature to mask
                feature = "hashes"
                cursor.execute("SELECT {} FROM {} WHERE image_id='{}'".format(
                    feature, table_name, image1_id))
                hashes_1 = cursor.fetchone()[0].split("&")[:28]

                cursor.execute("SELECT {} FROM {} WHERE image_id='{}'".format(
                    feature, table_name, image2_id))
                hashes_2 = cursor.fetchone()[0].split("&")[:28]

                if len(hashes_1) == len(hashes_2):

                    uid_stability.update({uid: []})
                    for i in range(len(hashes_1)):
                        if i not in instability:
                            instability.update({i: 0.0})

                        hash1_val = hashes_1[i]
                        hash2_val = hashes_2[i]

                        if hash1_val != hash2_val:
                            instability[i] += 1.0 / len(uids)
            except:
                pass

        for index, i in instability.items():
            if i > rate:
                mask[index] = 0

        for uid in uids:
            cursor.execute(
                "SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".
                format(table_name, b1, uid))
            image1_id = cursor.fetchone()[0]

            cursor.execute(
                "SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".
                format(table_name, b2, uid))
            image2_id = cursor.fetchone()[0]

            fp_1 = Fingerprint(cursor, image1_id, table_name,
                               Fingerprint_Type.CROSS, attrs, b2, mask)
            fp_2 = Fingerprint(cursor, image2_id, table_name,
                               Fingerprint_Type.CROSS, attrs, b1, mask)

            if fp_1 == fp_2:
                hash_long.append(fp_1)
                if fp_1 in fp_to_count:
                    fp_to_count[fp_1] += 1
                else:
                    fp_to_count.update({fp_1: 1})

        num_distinct = max(float(len(fp_to_count)), 1.0)
        num_unique = 0.0
        for _, count in fp_to_count.items():
            if count == 1:
                num_unique += 1.0
        num_cross_browser = max(float(len(hash_long)), 1.0)
        num_uids = max(float(len(uids)), 1.0)

        if not quiet:
            for i, d in instability.items():
                print("{}: instability: {}".format(i, d))

            print('Cross_browser', num_cross_browser)
            print('Cross_browser rate', num_cross_browser / num_uids)

            print('Cross_browser unique', num_unique / num_distinct)
            print(num_unique, num_distinct)

        return num_cross_browser / num_uids * num_unique / num_cross_browser * 100, mask
def fingerprint_function(url):
    f = Fingerprint(kgram_len=4, window_len=1, base=10, modulo=1000)
    return f.generate(str=url)
Beispiel #25
0
def get_consistent_ids(cur):
    """
        Returns a list of user ids having only consistent fingerprints
    """

    batch_size = 5000
    attributes = Fingerprint.INFO_ATTRIBUTES + Fingerprint.HTTP_ATTRIBUTES + \
                     Fingerprint.JAVASCRIPT_ATTRIBUTES + Fingerprint.FLASH_ATTRIBUTES
    counter_to_os = dict()
    counter_to_browser = dict()
    id_to_oses = dict()
    id_to_browsers = dict()
    id_to_nb_inconsistencies = dict()
    id_to_nb_fps = dict()

    cur.execute('SELECT max(counter) as nb_fps from extensionDataScheme')
    nb_fps = cur.fetchone()["nb_fps"] + 1

    for i in range(0, nb_fps, batch_size):
        print(i)
        sql = "SELECT * FROM extensionDataScheme where counter < %s and counter > %s"
        cur.execute(sql, (i + batch_size, i))
        fps = cur.fetchall()
        for fp_dict in fps:
            try:
                fp = Fingerprint(attributes, fp_dict)
                counter_to_os[fp.getCounter()] = fp.getOs()
                counter_to_browser[fp.getCounter()] = fp.getBrowser()
                counter = fp.getCounter()

                if fp.getId() in id_to_oses:
                    id_to_oses[fp.getId()].add(fp.getOs())
                else:
                    id_to_oses[fp.getId()] = set()
                    id_to_oses[fp.getId()].add(fp.getOs())

                if fp.getId() in id_to_browsers:
                    id_to_browsers[fp.getId()].add(fp.getBrowser())
                else:
                    id_to_browsers[fp.getId()] = set()
                    id_to_browsers[fp.getId()].add(fp.getBrowser())

                if len(id_to_browsers[fp.getId()]) > 1 or len(
                        id_to_oses[fp.getId()]) > 1:
                    id_to_nb_inconsistencies[fp.getId()] = 100000000

                if counter_to_os[counter] == "Android" or counter_to_os[counter] == "iOS" or \
                counter_to_os[counter] == "Windows Phone" or counter_to_os[counter] == "Firefox OS" or \
                counter_to_os[counter] == "Windows 95":
                    id_to_nb_inconsistencies[fp.getId()] = 10000000000

                if counter_to_browser[counter] == "Safari" or counter_to_browser[counter] == "IE" or \
                counter_to_browser[counter] == "Edge" or counter_to_browser[counter] == "Googlebot":
                    id_to_nb_inconsistencies[fp.getId()] = 10000000

                if fp.hasPlatformInconsistency():
                    if fp.getId() in id_to_nb_inconsistencies:
                        id_to_nb_inconsistencies[fp.getId()] += 5
                    else:
                        id_to_nb_inconsistencies[fp.getId()] = 5

                if fp.getId() in id_to_nb_fps:
                    id_to_nb_fps[fp.getId()] += 1
                else:
                    id_to_nb_fps[fp.getId()] = 1

                # Seems weird but made on purpose !
                if fp.getId() not in id_to_nb_inconsistencies:
                    id_to_nb_inconsistencies[fp.getId()] = 0

            except:
                id_to_nb_inconsistencies[fp_dict["id"]] = 1000000

    user_id_consistent = [
        x for x in id_to_nb_fps
        if float(id_to_nb_inconsistencies[x]) / float(id_to_nb_fps[x]) < 0.02
    ]
    # we remove user that poison their canvas
    # we select users that changed canvas too frequently
    cur.execute(
        "SELECT id, count(distinct canvasJSHashed) as count, count(canvasJSHashed) as \
                nb_fps FROM extensionDataScheme group by id having count(distinct canvasJSHashed)/count(canvasJSHashed) > 0.35 \
                and count(canvasJSHashed) > 5 order by id")
    rows = cur.fetchall()
    poisoner_ids = [row["id"] for row in rows]
    user_id_consistent = [
        user_id for user_id in user_id_consistent
        if user_id not in poisoner_ids
    ]

    return user_id_consistent
Beispiel #26
0
def getRes(b1,
           b2,
           cursor,
           quiet,
           attrs="hashes, langs",
           extra_selector="",
           fp_type=Fingerprint_Type.CROSS):
    if not quiet:
        print('extra_selector="{}"'.format(extra_selector))
    global mask
    global b_mask
    mask = None
    global instability
    tuids = []
    uids = []
    cursor.execute("SELECT COUNT(DISTINCT(ip)) FROM {}".format(table_name))
    if not quiet:
        print('ip', cursor.fetchone()[0])
    cursor.execute(
        "SELECT COUNT(DISTINCT(user_id)) FROM {}".format(table_name))
    if not quiet:
        print('user', cursor.fetchone()[0])

    #cursor.execute("SELECT user_id FROM {} WHERE browser='{}'".format(table_name, b1))
    cursor.execute("SELECT user_id FROM {} WHERE browser='{}' {}".format(
        table_name, b1, extra_selector))
    for uid, in cursor.fetchall():
        tuids.append(uid)

    if not quiet:
        print(b1, len(tuids))

    for uid in tuids:
        #cursor.execute("SELECT user_id FROM {} WHERE user_id='{}' AND browser='{}'".format(table_name, uid, b2))
        cursor.execute(
            "SELECT user_id FROM {} WHERE user_id='{}' AND browser='{}' {}".
            format(table_name, uid, b2, extra_selector))
        for uid, in cursor.fetchall():
            uids.append(uid)

    if not quiet:
        print(b1, 'and', b2, len(uids))

    if len(uids) is 0:
        return None

    #uids is the list of users uses both b1 and b2
    hash_all = {}
    hash_long = []
    fp_to_count = {}
    hash_all_unique = {}
    index = []
    uid_stability = {}
    instability = {}

    for uid in uids:
        cursor.execute(
            "SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".
            format(table_name, b1, uid))
        image1_id = cursor.fetchone()[0]

        cursor.execute(
            "SELECT image_id FROM {} WHERE browser='{}' AND user_id='{}'".
            format(table_name, b2, uid))
        image2_id = cursor.fetchone()[0]

        fp_1 = Fingerprint(cursor, image1_id, table_name, fp_type, attrs, b2)
        fp_2 = Fingerprint(cursor, image2_id, table_name, fp_type, attrs, b1)

        try:
            # Feature to mask
            feature = "fonts"
            cursor.execute("SELECT {} FROM {} WHERE image_id='{}'".format(
                feature, table_name, image1_id))
            hashes_1 = cursor.fetchone()[0]

            cursor.execute("SELECT {} FROM {} WHERE image_id='{}'".format(
                feature, table_name, image2_id))
            hashes_2 = cursor.fetchone()[0]

            if mask is None:
                mask = [1 for _ in range(len(hashes_1))]

            if len(hashes_1) == len(hashes_2):
                s1 = ""
                s2 = ""

                uid_stability.update({uid: []})
                for i in range(len(hashes_1)):
                    if i not in hash_all:
                        hash_all.update({i: []})
                    if i not in hash_all_unique:
                        hash_all_unique.update({i: Set()})
                    if i not in instability:
                        instability.update({i: 0.0})

                    hash1_val = hashes_1[i]
                    hash2_val = hashes_2[i]

                    s1 += hash1_val
                    s2 += hash2_val

                    if hash1_val == hash2_val:
                        hash_all[i].append(hash1_val)
                        hash_all_unique[i].add(hash1_val)
                    else:
                        instability[i] += 1.0 / len(uids)
                        uid_stability[uid].append([hash1_val, hash2_val])

        except:
            pass
        if fp_1 == fp_2:
            hash_long.append(fp_1)
            index.append(uid)
            if fp_1 in fp_to_count:
                fp_to_count[fp_1] += 1
            else:
                fp_to_count.update({fp_1: 1})

    print 'hashall:' + str(len(hash_all))

    for index, i in instability.items():
        if i > 0.001:
            mask[index] = 0

    num_distinct = max(float(len(fp_to_count)), 1.0)
    num_unique = 0.0
    for _, count in fp_to_count.items():
        if count == 1:
            num_unique += 1.0
    num_cross_browser = float(len(hash_long))
    num_uids = max(float(len(uids)), 1.0)

    if not quiet:
        for i, d in instability.items():
            print("{}: instability: {}".format(i, d))

        print('Cross_browser', num_cross_browser)
        print('Cross_browser rate', num_cross_browser / num_uids)

        print('Cross_browser unique', num_unique / num_distinct)
        print(num_unique, num_distinct)

    return int(num_uids), "{:3.1f}%".format(
        num_cross_browser / num_uids * 100), "{:3.1f}%".format(
            num_unique / num_distinct * 100)
def limpa_db(self):
    f = Fingerprint()
    print("Depois " + str(f.getTemplateCount()))
    f.limpa_bd()
    print("Antes " + str(f.getTemplateCount()))
 def get_fingerprint(self, fingerprint_id):
     return Fingerprint(
         self.collection.find({"_id": ObjectId(fingerprint_id)})[0])
Beispiel #29
0
    if message.topic == "enroll/begin":
        fp.abort = True
        data = json.loads(message.payload)
        global search_thread
        search_thread.join()
        fp.enroll(data['identificacion'])
        if fp.abort:
            client.publish("enroll/abort", "")
            client.publish("search/finished", "")
            fp.abort = False
    if message.topic == "delete":
        fp.abort = True
        global search_thread
        search_thread.join()
        fp.delete(message.payload)
        client.publish("search/finished", "")
        fp.abort = False
    if message.topic == "search/finished":
        search_thread = threading.Thread(target=fp.search)
        search_thread.start()


fp = Fingerprint()
client = paho.Client("routine")
client.connect("localhost")
client.on_message = on_message
client.subscribe("search/finished")
client.subscribe("enroll/begin")
client.subscribe("delete")
client.loop_forever()