Example #1
0
    def testAdd(self):
        # Simple add two elements
        ht = hash_table.HashTable(5)
        array = [[None], [None], [None], [None], [None]]
        ht.add('Bob', '567-8888')
        ht.add('Ankit', '293-8625')
        if ht._get_hash('Bob') != ht._get_hash('Ankit'):
            array[ht._get_hash('Bob')] = [['Bob', '567-8888']]
            array[ht._get_hash('Ankit')] = [['Ankit', '293-8625']]
        else:
            array[ht._get_hash('Bob')] = [['Bob', '567-8888'],
                                          ['Ankit', '293-8625']]
        self.assertEqual(ht.table, array)

        # Add two elements with substitution value
        ht = hash_table.HashTable(5)
        array = [[None], [None], [None], [None], [None]]
        ht.add('Bob', '567-8888')
        ht.add('Ankit', '293-8625')
        ht.add('Ankit', '293-6753')
        if ht._get_hash('Bob') != ht._get_hash('Ankit'):
            array[ht._get_hash('Bob')] = [['Bob', '567-8888']]
            array[ht._get_hash('Ankit')] = [['Ankit', '293-6753']]
        else:
            array[ht._get_hash('Bob')] = [['Bob', '567-8888'],
                                          ['Ankit', '293-6753']]
        self.assertEqual(ht.table, array)
Example #2
0
    def testCreate(self):
        ht = hash_table.HashTable(5)
        self.assertEqual(ht._size, 5)
        self.assertEqual(ht.table, [[None], [None], [None], [None], [None]])

        ht = hash_table.HashTable(8)
        self.assertEqual(ht._size, 8)
        self.assertEqual(
            ht.table,
            [[None], [None], [None], [None], [None], [None], [None], [None]])
Example #3
0
    def setUp(self):
        ''' Test the HashTable constructor and
        prepare several tables for other tests. '''

        # If the algorithm works, tables could be any
        # positive integer length.

        self.size_one_hashtable = hash_table.HashTable(1)
        self.size_three_hashtable = hash_table.HashTable(3)
        self.size_ten_thousand_hashtable = hash_table.HashTable(10000)
Example #4
0
def create_blog_post(user_id):
    # takes in the data sent as part of the request body and populates the BlogPost table
    data = request.get_json()
    # this fetches the first user with the given user id
    user = User.query.filter_by(id=user_id).first()
    # this is to check if that particular user exists or not
    if not user:
        return jsonify({"message": "user does not exist"}), 400
    else:
        # creating an object of HashTable to access all the methods belonging to hash table
        ht = hash_table.HashTable(10)
        # creating key-value pairs for all the data belonging to the blog post
        ht.add_key_value("title", data["title"])
        ht.add_key_value("body", data["body"])
        ht.add_key_value("date", now)
        ht.add_key_value("user_id", user_id)

        # this is getting the data ready for adding to the database
        new_blog_post = BlogPost(title=ht.get_value("title"),
                                 body=ht.get_value("body"),
                                 date=ht.get_value("date"),
                                 user_id=ht.get_value("user_id"))
        # this adds the new blog post to the database
        db.session.add(new_blog_post)
        db.session.commit()

        return jsonify({"message": "blog post created"}), 200
Example #5
0
    def test_hash(self):
        table = hash_table.HashTable()  # hash table 객체 생성

        table.__setitem__("one", 1)
        self.assertEqual(table.size, 1)
        table.push("two", 2)
        self.assertEqual(table.size, 2)
        table.push("three", 3)
        self.assertEqual(table.size, 3)
        table.push("four", 4)
        self.assertEqual(table.size, 4)
        table["four"] = 4
        self.assertEqual(table.size, 4)
        table["five"] = 5
        self.assertEqual(table.size, 5)
        table.push("six", 6)
        self.assertEqual(table.size, 6)
        table.push("seven", 7)
        self.assertEqual(table.size, 7)
        table.push("eight", 8)
        self.assertEqual(table.size, 8)

        self.assertEqual(table.is_empty(), False)

        self.assertEqual(table.__getitem__("two"), 2)

        table["one"] = 123
        self.assertEqual(table.size, 8)

        table.pop("three")
        self.assertEqual(table.size, 7)
        table.pop("four")
        self.assertEqual(table.size, 6)
        table.pop("five")
        self.assertEqual(table.size, 5)
def main():
    h = hash_table.HashTable()

    # The number of items in an empty hash_table
    test(len(h), 0)

    # Adding a key value pair to the hash table
    h.put(3, 'Brandon')
    h.put(6, 'Odiwuor')

    # The number of items in a hash_table with two items
    test(len(h), 2)

    # Testign the __contains__(self, key) method
    test(3 in h, True)
    test(5 in h, False)

    # Getting a value from the hash_table using a key
    test(h.get(6), 'Odiwuor')
    test(h[3], 'Brandon')

    # Changing a values a ssociated with a particular key
    h[3] = 'Baker'
    test(h[3], 'Baker')

    # Deleting a key-value pair from the hash_table
    del h[6]
    test(len(h), 1)
Example #7
0
def match_pitch(filename, dbasename, density=18, radius=1, step=1 / 20):
    matcher = audfprint_match.Matcher()
    hash_tab = hash_table.HashTable(dbasename)
    analyzer = audfprint_analyze.Analyzer()
    analyzer.density = density

    bestScore = 0
    bestMatch = "NOMATCH"
    song = AudioSegment.from_mp3(filename)
    aa = AudioAugmentation()

    for i in range(-radius, radius + 1):
        # Speed up/slow down the sample
        if i != 0:
            octave = i * step
            shifted, _ = aa.pitch_shift(song, octave)
            shifted.export("tmp.mp3", format="mp3")

        matches, _, _ = matcher.match_file(
            analyzer, hash_tab, "tmp.mp3" if not i == 0 else filename)
        if len(matches) == 0:
            continue
        songid, score = matches[0][:2]
        songname = hash_tab.names[songid]
        if score > bestScore:
            bestScore = score
            bestMatch = songname

    return bestMatch
def create(event, context):
    s3.Bucket(BUCKET_NAME).download_file('_test-fprint.afpt',
                                         '/tmp/_test-fprint.afpt')

    analyzer = audfprint_analyze.Analyzer()
    analyzer.n_fft = 512
    analyzer.n_hop = analyzer.n_fft / 2
    analyzer.shifts = 1
    # analyzer.exact_count = True
    analyzer.density = 20.0
    analyzer.target_sr = 11025
    analyzer.verbose = False

    # hashbits=20, depth=100, maxtime=16384
    hash_tab = hash_table.HashTable(hashbits=20, depth=100, maxtime=16384)
    hash_tab.params['samplerate'] = analyzer.target_sr

    analyzer.ingest(hash_tab, '/tmp/_test-fprint.afpt')

    if hash_tab and hash_tab.dirty:
        hash_tab.save('/tmp/_test-db.pklz')

    s3.Bucket(BUCKET_NAME).upload_file('/tmp/_test-db.pklz', '_test-db.pklz')

    body = {
        "message": "Go Serverless v1.0! Your function executed successfully!",
        "input": event
    }

    response = {"statusCode": 200, "body": json.dumps(body)}
    return response
Example #9
0
def local_tester():
    test_fn = '/Users/dpwe/Downloads/carol11k.wav'
    test_ht = hash_table.HashTable()
    test_analyzer = Analyzer()

    test_analyzer.ingest(test_ht, test_fn)
    test_ht.save('httest.pklz')
Example #10
0
 def test_search(self):
     test_table = ht.HashTable(13)
     test_table.put(10, 30)
     test_table.put(1, 20)
     # Normal search
     self.assertEqual(test_table.search(10), 30)
     # Non-exist key search
     self.assertEqual(test_table.search(2), None)
Example #11
0
def location_hashmap():
    """This returns the location and address hashmap

    Space Complexity: O(n)

    Time Complexity: O(n)
        
    Returns:
       hash_table.HashTable: location hashtable, address hashtable
       
    """
    name_hash = hash_table.HashTable()
    address_hash = hash_table.HashTable()
    full_address = parse_distance_name_data()
    
    for i, j in enumerate(full_address):
        name_hash.add(j[1], i)
        address_hash.add(j[2], i)

    return name_hash, address_hash
Example #12
0
def do_cmd(cmd, analyzer, hash_tab, filename_iter, matcher, outdir, type, report, skip_existing=False, strip_prefix=None):
    """ Breaks out the core part of running the command.
        This is just the single-core versions.
    """
    if cmd == 'merge' or cmd == 'newmerge':
        # files are other hash tables, merge them in
        for filename in filename_iter:
            hash_tab2 = hash_table.HashTable(filename)
            if "samplerate" in hash_tab.params:
                assert hash_tab.params["samplerate"] == hash_tab2.params["samplerate"]
            else:
                # "newmerge" fails to setup the samplerate param
                hash_tab.params["samplerate"] = hash_tab2.params["samplerate"]
            hash_tab.merge(hash_tab2)

    elif cmd == 'precompute':
        # just precompute fingerprints, single core
        for filename in filename_iter:
            report(file_precompute(analyzer, filename, outdir, type, skip_existing=skip_existing, strip_prefix=strip_prefix))

    elif cmd == 'match':
        msgs = []
        # Running query, single-core mode
        for num, filename in enumerate(filename_iter):
            msgs.append(matcher.file_match_to_msgs(analyzer, hash_tab, filename, num))
            #report(msgs)
        return msgs

    elif cmd == 'new' or cmd == 'add':
        # Adding files
        tothashes = 0
        ix = 0
        for filename in filename_iter:
            report([time.ctime() + " ingesting #" + str(ix) + ": "
                    + filename + " ..."])
            dur, nhash = analyzer.ingest(hash_tab, filename)
            tothashes += nhash
            ix += 1

        report(["Added " +  str(tothashes) + " hashes "
                + "(%.1f" % (tothashes/float(analyzer.soundfiletotaldur))
                + " hashes/sec)"])
    elif cmd == 'remove':
        # Removing files from hash table.
        for filename in filename_iter:
            hash_tab.remove(filename)

    elif cmd == 'list':
        hash_tab.list(lambda x: report([x]))

    else:
        raise ValueError("unrecognized command: "+cmd)
def loadHashTable(words, option):
    hashSize = int(input("Insert the size of the hash: "))

    hesh = ht.HashTable(hashSize)

    # Inserting differentely depending on the parameter
    if option == 'linear':
        for key, word in words.items():
            hesh.insert(word, key, 'linear')
    elif option == 'quadratic':
        for key, word in words.items():
            hesh.insert(word, key, 'quadratic')

    return hesh
Example #14
0
    def match(self):
        matcher = audfprint_match.Matcher()
        matcher.find_time_range = True
        matcher.verbose = 1
        matcher.max_returns = 100

        analyzer = audfprint_analyze.Analyzer()
        analyzer.n_fft = 512
        analyzer.n_hop = analyzer.n_fft / 2
        analyzer.shifts = 1
        # analyzer.exact_count = True
        analyzer.density = 20.0
        analyzer.target_sr = 11025

        hash_tab = hash_table.HashTable("./samples.pklz")
        hash_tab.params['samplerate'] = analyzer.target_sr

        qry = "./Samples/viral.afpt"
        rslts, dur, nhash = matcher.match_file(analyzer, hash_tab,
                                               "./Samples/viral.afpt", 0)
        t_hop = analyzer.n_hop / float(analyzer.target_sr)
        qrymsg = qry + (' %.1f ' % dur) + "sec " + str(nhash) + " raw hashes"

        msgrslt = []
        if len(rslts) == 0:
            nhashaligned = 0
            msgrslt.append("NOMATCH " + qrymsg)
        else:
            for (tophitid, nhashaligned, aligntime, nhashraw, rank, min_time,
                 max_time) in rslts:
                # msg = ("Matched {:6.1f} s starting at {:6.1f} s in {:s}"
                #            " to time {:6.1f} s in {:s}").format(
                #         (max_time - min_time) * t_hop, min_time * t_hop, qry,
                #         (min_time + aligntime) * t_hop, hash_tab.names[tophitid])
                msg = (
                    "Matched {:6.1f} s starting at {:6.1f} s in {:s}"
                    " to time {:6.1f} s in {:n}; max {:6.1f} min {:6.1f} align {:6.1f} hop {:6.1f}"
                ).format(
                    (max_time - min_time) * t_hop,
                    min_time * t_hop,
                    qry,
                    (min_time + aligntime) * t_hop,
                    tophitid,  #),
                    max_time * t_hop,
                    min_time * t_hop,
                    aligntime * t_hop,
                    t_hop)

                msgrslt.append(msg)
        dumper.dump(msgrslt)
def match(event, context):
    s3.Bucket(BUCKET_NAME).download_file('_test-fprint.afpt',
                                         '/tmp/_test-fprint.afpt')
    s3.Bucket(BUCKET_NAME).download_file('_test-db.pklz', '/tmp/_test-db.pklz')

    qry = '/tmp/_test-fprint.afpt'
    hashFile = '/tmp/_test-db.pklz'

    matcher = audfprint_match.Matcher()
    matcher.find_time_range = True
    matcher.verbose = False
    matcher.max_returns = 100

    matcher.exact_count = True
    matcher.max_alignments_per_id = 20

    analyzer = audfprint_analyze.Analyzer()
    analyzer.n_fft = 512
    analyzer.n_hop = analyzer.n_fft / 2
    analyzer.shifts = 1
    # analyzer.exact_count = True
    analyzer.density = 20.0
    analyzer.target_sr = 11025
    analyzer.verbose = False

    hash_tab = hash_table.HashTable(hashFile)
    hash_tab.params['samplerate'] = analyzer.target_sr

    rslts, dur, nhash = matcher.match_file(analyzer, hash_tab, qry, 0)
    t_hop = analyzer.n_hop / float(analyzer.target_sr)
    qrymsg = qry + (' %.1f ' % dur) + "sec " + str(nhash) + " raw hashes"

    # print "duration,start,from,time,source,sourceId,nhashaligned,aligntime,nhashraw,rank,min_time,max_time, t_hop"
    matches = []
    if len(rslts) == 0:
        nhashaligned = 0
    else:
        for (tophitid, nhashaligned, aligntime, nhashraw, rank, min_time,
             max_time) in rslts:
            msg = (
                "{:f},{:f},{:s},{:f},{:s},{:n},{:n},{:n},{:n},{:n},{:n},{:n},{:f}"
            ).format((max_time - min_time) * t_hop, min_time * t_hop, qry,
                     (min_time + aligntime) * t_hop, hash_tab.names[tophitid],
                     tophitid, nhashaligned, aligntime, nhashraw, rank,
                     min_time, max_time, t_hop)
            matches.append(msg)

    response = {"statusCode": 200, "body": json.dumps(matches)}
    return response
Example #16
0
def make_ht_from_list(analyzer, filelist, hashbits, depth, maxtime, pipe=None):
    """ Populate a hash table from a list, used as target for
        multiprocess division.  pipe is a pipe over which to push back
        the result, else return it """
    # Create new ht instance
    ht = hash_table.HashTable(hashbits=hashbits, depth=depth, maxtime=maxtime)
    # Add in the files
    for filename in filelist:
        hashes = analyzer.wavfile2hashes(filename)
        ht.store(filename, hashes)
    # Pass back to caller
    if pipe:
        pipe.send(ht)
    else:
        return ht
Example #17
0
def glob2hashtable(pattern, density=None):
    """ Build a hash table from the files matching a glob pattern """
    ht = hash_table.HashTable()
    filelist = glob.glob(pattern)
    initticks = time.clock()
    totdur = 0.0
    tothashes = 0
    for ix, file in enumerate(filelist):
        print time.ctime(), "ingesting #", ix, ":", file, "..."
        dur, nhash = ingest(ht, file, density)
        totdur += dur
        tothashes += nhash
    elapsedtime = time.clock() - initticks
    print "Added", tothashes, "(", tothashes / float(
        totdur), "hashes/sec) at ", elapsedtime / totdur, "x RT"
    return ht
Example #18
0
def regular_matching():
    #get find_peaks from analyze
    analyzer = audfprint_analyze.Analyzer()
    hash_tab = hash_table.HashTable('fpdbase.pklz')
    matcher = audfprint_match.Matcher()

    sampling_seconds = 10
    sampling_interval = 15
    prev_resultID = []

    count = 0
    while True:
        start = time.time()
        twoSecondArray = different_record(sampling_seconds)

        peakLists = analyzer.find_peaks(twoSecondArray, 11025)
        landmarkLists = analyzer.peaks2landmarks(peakLists)
        hashesLists = audfprint_analyze.landmarks2hashes(landmarkLists)
        print(hashesLists)

        hashes_hashes = (((hashesLists[:, 0].astype(np.uint64)) << 32)
                            + hashesLists[:, 1].astype(np.uint64))
        unique_hash_hash = np.sort(np.unique(hashes_hashes))
        unique_hashes = np.hstack([
            (unique_hash_hash >> 32)[:, np.newaxis],
            (unique_hash_hash & ((1 << 32) - 1))[:, np.newaxis]
        ]).astype(np.int32)
        hashes = unique_hashes
        #now the matching
        # for num, filename in enumerate(filename_iter):
        #     # count += 1
        #     msgs = matcher.file_match_to_msgs(analyzer, hash_tab, filename, num)
        #     report(msgs)

        # file_match_to_msgs(self, analyzer, ht, qry, number=None)
        # print(matcher.file_match_to_msgs(analyzer, hash_tab, "Some qry name"))
        # rslts, dur, nhash = match_file(matcher, analyzer, hash_tab, "some query", hashesLists)
        message, results = file_match_to_msgs(matcher, analyzer, hash_tab, "FROM MICROPHONE", hashes)
        print(sampling_seconds, sampling_interval)
        
        count += 1
        end = time.time() - start
        print(end)
        
        time.sleep(sampling_interval - (end - sampling_seconds))
        
    print(count)
Example #19
0
def create(event, context):
    day = event.get('date')
    if not day:
        day = (date.today() - timedelta(2)).strftime('%Y%m%d')

    channel = event['channel']

    analyzer = audfprint_analyze.Analyzer()
    analyzer.n_fft = 512
    analyzer.n_hop = analyzer.n_fft // 2
    analyzer.shifts = 1
    # analyzer.exact_count = True
    analyzer.density = 20.0
    analyzer.target_sr = 11025
    analyzer.verbose = False

    # hashbits=20, depth=100, maxtime=16384
    # maxtime=262144
    hash_tab = hash_table.HashTable(hashbits=20, depth=100, maxtime=262144)
    hash_tab.params['samplerate'] = analyzer.target_sr

    fingerprints = s3client.list_objects_v2(Bucket=BUCKET_NAME,
                                            Prefix='tva/{}/{}/'.format(
                                                day, channel))['Contents']

    for fingerprint in fingerprints:
        s3.Bucket(BUCKET_NAME).download_file(
            fingerprint['Key'],
            '/tmp/{}'.format(fingerprint['Key'].split('/').pop()))
        analyzer.ingest(hash_tab,
                        '/tmp/{}'.format(fingerprint['Key'].split('/').pop()))
        os.remove('/tmp/{}'.format(fingerprint['Key'].split('/').pop()))

    if hash_tab and hash_tab.dirty:
        hash_tab.save('/tmp/{}-{}.pklz'.format(channel, day))

    s3.Bucket(BUCKET_NAME).upload_file(
        '/tmp/{}-{}.pklz'.format(channel, day),
        'hash/{}/{}-{}.pklz'.format(day, channel, day))
    os.remove('/tmp/{}-{}.pklz'.format(channel, day))

    body = {"input": event, "fingerprints": len(fingerprints)}

    response = {"statusCode": 200, "body": json.dumps(body)}
    return response
Example #20
0
 def test_put(self):
     test_table = ht.HashTable(13)
     # Normal put
     test_table.put(10, 20)
     self.assertEqual(
         test_table.table,
         [[], [], [], [], [], [], [], [], [], [], [(10, 20)], [], []])
     # Replace put
     test_table.put(10, 30)
     self.assertEqual(
         test_table.table,
         [[], [], [], [], [], [], [], [], [], [], [(10, 30)], [], []])
     # Chaining put
     test_table.put(23, 20)
     self.assertEqual(
         test_table.table,
         [[], [], [], [], [], [], [], [], [], [], [(10, 30),
                                                   (23, 20)], [], []])
Example #21
0
def package_hashmap():
    """This returns the package hashmap

    Space Complexity: O(n)

    Time Complexity: O(n)
        
    Returns:
       hash_table.HashTable: packages hashtable
       
    """
    package_hash = hash_table.HashTable()
    packages = parse_packages()
    for i in packages:
        
        package_hash[i[0]] = package.Package(*i)

    return package_hash
Example #22
0
def create_post(user_id):
    data = request.get_json()
    user = User.query.filter_by(id=user_id).first()
    if not user:
        return jsonify({'meesage': "user doesn't exist"}), 400

    ht = hash_table.HashTable(10)
    ht.add_key_value("title", data["title"])
    ht.add_key_value("body", data["body"])
    ht.add_key_value("date", now)
    ht.add_key_value("user_id", user_id)

    new_post = BlogPost(title=ht.get_value('title'),
                        body=ht.get_value('body'),
                        date=ht.get_value('date'),
                        user_id=ht.get_value('user_id'))
    db.session.add(new_post)
    db.session.commit()
    return jsonify({'meesage': "a new post created"}), 200
Example #23
0
    def test_on_a_long_list_of_words(self):

        # This section heavily styled after:
        # https://github.com/jbbrokaw/
        #   data-structures/blob/master/test_hashtable.py

        word_list_location = '/usr/share/dict/words'

        # Before beginning to iterate through a file using a while loop,
        # initialize each_word so that it won't fail immediately:
        each_word = "Non-null value."

        # First, figure out the ideal size for maximizing the performance
        # of the resulting hash table.

        # Figure out how many words are there:
        word_count = 0
        with io.open(word_list_location) as file_full_of_words:
            # Terminate at end of file:
            while each_word != "":
                each_word = file_full_of_words.readline().strip()
                word_count += 1

        # According to The Powers That Be, we must now multiply the expected
        # size of the hash table by one point six to divine the value of
        # the ideal size of the hash table for performance purposes.
        # Note that HashTable size must be integerized BEFORE construction.
        calculated_hashtable_size = int(word_count * 1.6)

        big_huge_hashtable = hash_table.HashTable(calculated_hashtable_size)

        with io.open(word_list_location) as file_full_of_words:
            while each_word != "":
                each_word = file_full_of_words.readline().strip()
                # Make keys and values identical to ease testing this monster:
                big_huge_hashtable.set(each_word, each_word)

        # Now that the table is compiled, ensure the hasher
        # relates words to the file as expected.
        with io.open(word_list_location) as file_full_of_words:
            while each_word != "":
                each_word = file_full_of_words.readline().strip()
                assert big_huge_hashtable.get(each_word) == each_word
def create_blog_post(user_id):
    data = request.get_json()
    user = User.query.filter_by(id=user_id).first()
    if not user:
        return jsonify({"message": "User does not exist!"}), 400

    ht = hash_table.HashTable(10)
    ht.add_key_value("title", data["title"])
    ht.add_key_value("body", data["body"])
    ht.add_key_value("date", now)
    ht.add_key_value("user_id", user_id)

    new_blog_post = BlogPost(title=ht.get_value("title"),
                             body=ht.get_value("body"),
                             date=ht.get_value("date"),
                             user_id=ht.get_value("user_id"))
    db.session.add(new_blog_post)
    db.session.commit()
    return jsonify({"message": "new blog post created"}), 200
Example #25
0
 def test_delete(self):
     test_table = ht.HashTable(13)
     test_table.put(10, 20)
     self.assertEqual(
         test_table.table,
         [[], [], [], [], [], [], [], [], [], [], [(10, 20)], [], []])
     test_table.put(3, 7)
     self.assertEqual(
         test_table.table,
         [[], [], [], [(3, 7)], [], [], [], [], [], [], [(10, 20)], [], []])
     # Normal delete
     test_table.delete(10)
     self.assertEqual(
         test_table.table,
         [[], [], [], [(3, 7)], [], [], [], [], [], [], [], [], []])
     # Non-exist key delete
     test_table.delete(2)
     self.assertEqual(
         test_table.table,
         [[], [], [], [(3, 7)], [], [], [], [], [], [], [], [], []])
Example #26
0
def glob2hashtable(pattern, density=20.0):
    """ Build a hash table from the files matching a glob pattern """
    global g2h_analyzer
    if g2h_analyzer is None:
        g2h_analyzer = Analyzer(density=density)

    ht = hash_table.HashTable()
    filelist = glob.glob(pattern)
    initticks = time.clock()
    totdur = 0.0
    tothashes = 0
    for ix, file_ in enumerate(filelist):
        #print(time.ctime(), "ingesting #", ix, ":", file_, "...")
        dur, nhash = g2h_analyzer.ingest(ht, file_)
        totdur += dur
        tothashes += nhash
    elapsedtime = time.clock() - initticks
    #print("Added", tothashes, "(", tothashes / totdur, "hashes/sec) at ",
    #elapsedtime / totdur, "x RT")
    return ht
def fingerprint_filename(filename, dbasename):
    matcher = audfprint_match.Matcher()
    matcher.window = 2
    matcher.threshcount = 5
    matcher.max_returns = 1
    matcher.search_depth = 100
    matcher.sort_by_time = False
    matcher.exact_count = False
    matcher.illustrate = False
    matcher.illustrate_hpf = False
    matcher.verbose = 1
    matcher.find_time_range = False
    matcher.time_quantile = 0.05

    analyzer = audfprint_analyze.Analyzer()
    # Read parameters from command line/docopts
    analyzer.density = 20
    analyzer.maxpksperframe = 5
    analyzer.maxpairsperpeak = 3
    analyzer.f_sd = 30.0
    analyzer.shifts = 0
    # fixed - 512 pt FFT with 256 pt hop at 11025 Hz
    analyzer.target_sr = 11025
    analyzer.n_fft = 512
    analyzer.n_hop = analyzer.n_fft // 2
    # set default value for shifts depending on mode
    if analyzer.shifts == 0:
        # Default shift is 4 for match, otherwise 1
        analyzer.shifts = 4
    analyzer.fail_on_error = not False

    hashtable = hash_table.HashTable(dbasename)
    results, dur, nhash = matcher.match_file(analyzer, hashtable, filename, 0)
    try:
        (first_hit, nhashaligned, aligntime, nhashraw, rank, min_time,
         max_time) = results[0]
        filename = hashtable.names[first_hit]

        return nhashraw, filename
    except:
        return None, None
Example #28
0
def create_blog_post(user_id):
    """Create new blog post and add it to database. 
    Return the success message if the operation is done.

    Args:
        user_id (int): user id

    Returns:
        JSON: success message
    """

    data = request.get_json()

    # Check if the user is in the database
    user = User.query.filter_by(id=user_id).first()
    if not user:
        return jsonify({"message": "user does not exist!"}), 400

    # Create an instance of a HashTable
    ht = hash_table.HashTable(10)

    # Create a blog post
    ht.add_key_value("title", data["title"])
    ht.add_key_value("body", data["body"])
    ht.add_key_value("date", now)
    ht.add_key_value("user_id", user_id)

    # Add a blog post to the database
    new_blog_post = BlogPost(
        title=ht.get_value("title"),
        body=ht.get_value("body"),
        date=ht.get_value("date"),
        user_id=ht.get_value("user_id"),
    )
    db.session.add(new_blog_post)
    db.session.commit()
    return jsonify({"message": "new blog post created"}), 200
Example #29
0
import hash_table
import numpy as np
import csv
import time
import sys

# The first value of the hash_table function is size, which defines the length of m
# The method specifies which hash function will be used to get a hash value for selecting a slot.
# The different methods are: DivisionMethod, MultiplicationMethod and UniversalMethod.
# The collisionType variable is which technique to use for handling collisions.
# The different options are: Chaining and OpenAddressing.
# The probeType variable defines the probe type, when using open addressing.
# The different operations are: Linear, Quadratic, and DoubleHashing.
hash_table = hash_table.HashTable(20000,
                                  method="MultiplicationMethod",
                                  collisionType="Chaining",
                                  probeType="Linear")

# The two datasets are Video_Games.csv and disney-voice-actors.csv
# To change the dataset, then in line 18 define the dataset by writing one of the two datasets mentioned the line above.
with open('datasets/Video_Games.csv', mode='r', encoding="utf8") as csv_file:
    csv_reader = csv.DictReader(csv_file)
    line_count = 1
    startime = time.time()
    for row in csv_reader:
        if row is not None:
            if line_count == 0:
                line_count += 1
            # The key is set to provide a random key value between 0 and 3500 for each Name element.
            # The value variable when using the Video_games.csv files should be set to Name.
            # The value variable when using the disney-voice-actors.csv files should be set to voice-actor.
Example #30
0
def main(argv):
    """ Main routine for the command-line interface to audfprint """
    # Other globals set from command line
    args = docopt.docopt(USAGE, version=__version__, argv=argv[1:])

    # Figure which command was chosen
    poss_cmds = [
        'new', 'add', 'precompute', 'merge', 'newmerge', 'match', 'list',
        'remove'
    ]
    cmdlist = [cmdname for cmdname in poss_cmds if args[cmdname]]
    if len(cmdlist) != 1:
        raise ValueError("must specify exactly one command")
    # The actual command as a str
    cmd = cmdlist[0]

    # Setup output function
    report = setup_reporter(args)

    # Keep track of wall time
    initticks = time.clock()

    # Command line sanity.
    if args["--maxtimebits"]:
        args["--maxtimebits"] = int(args["--maxtimebits"])
    else:
        args["--maxtimebits"] = hash_table._bitsfor(int(args["--maxtime"]))

    # Setup the analyzer if we're using one (i.e., unless "merge")
    analyzer = setup_analyzer(args) if not (cmd is "merge" or cmd is "newmerge"
                                            or cmd is "list"
                                            or cmd is "remove") else None

    precomp_type = 'hashes'

    # Set up the hash table, if we're using one (i.e., unless "precompute")
    if cmd is not "precompute":
        # For everything other than precompute, we need a database name
        # Check we have one
        dbasename = args['--dbase']
        if not dbasename:
            raise ValueError("dbase name must be provided if not precompute")
        if cmd == "new" or cmd == "newmerge":
            # Check that the output directory can be created before we start
            ensure_dir(os.path.split(dbasename)[0])
            # Create a new hash table
            hash_tab = hash_table.HashTable(
                hashbits=int(args['--hashbits']),
                depth=int(args['--bucketsize']),
                maxtime=(1 << int(args['--maxtimebits'])))
            # Set its samplerate param
            if analyzer:
                hash_tab.params['samplerate'] = analyzer.target_sr

        else:
            # Load existing hash table file (add, match, merge)
            if args['--verbose']:
                report([time.ctime() + " Reading hash table " + dbasename])
            hash_tab = hash_table.HashTable(dbasename)
            if analyzer and 'samplerate' in hash_tab.params \
                   and hash_tab.params['samplerate'] != analyzer.target_sr:
                # analyzer.target_sr = hash_tab.params['samplerate']
                print("db samplerate overridden to ", analyzer.target_sr)
    else:
        # The command IS precompute
        # dummy empty hash table
        hash_tab = None
        if args['--precompute-peaks']:
            precomp_type = 'peaks'

    # Create a matcher
    matcher = setup_matcher(args) if cmd == 'match' else None

    filename_iter = filename_list_iterator(args['<file>'], args['--wavdir'],
                                           args['--wavext'], args['--list'])

    #######################
    # Run the main commmand
    #######################

    # How many processors to use (multiprocessing)
    ncores = int(args['--ncores'])
    if ncores > 1 and not (cmd == "merge" or cmd == "newmerge" or cmd == "list"
                           or cmd == "remove"):
        # merge/newmerge/list/remove are always single-thread processes
        do_cmd_multiproc(cmd,
                         analyzer,
                         hash_tab,
                         filename_iter,
                         matcher,
                         args['--precompdir'],
                         precomp_type,
                         report,
                         skip_existing=args['--skip-existing'],
                         ncores=ncores)
    else:
        do_cmd(cmd,
               analyzer,
               hash_tab,
               filename_iter,
               matcher,
               args['--precompdir'],
               precomp_type,
               report,
               skip_existing=args['--skip-existing'])

    elapsedtime = time.clock() - initticks
    if analyzer and analyzer.soundfiletotaldur > 0.:
        print("Processed "
              + "%d files (%.1f s total dur) in %.1f s sec = %.3f x RT" \
              % (analyzer.soundfilecount, analyzer.soundfiletotaldur,
                 elapsedtime, (elapsedtime/analyzer.soundfiletotaldur)))

    # Save the hash table file if it has been modified
    if hash_tab and hash_tab.dirty:
        # We already created the directory, if "new".
        hash_tab.save(dbasename)