Example #1
0
class Parsers:
    """
        parse json file, store important stuff

        get no of pages-> pages
        page_counter <- 1
        while(page_counter < pages):
            get_page(page_counter)
            for recording in recordings:
                get_recording_details()
                get_audio_file()
            page_counter++
    """

    MAX_IMAGES_URL = 4
    MAX_NO_THREADS = 100
    CONFIG_FILE = 'bsrs.cfg'
    BIRD_SOUNDS_DIR = 'BirdSounds/'

    def __init__(self):
        self.fetcher = Fetcher()
        self.logger = Logging()
        self.config_file = Parsers.CONFIG_FILE
        self.config = ConfigParser()

        self.load_config_file()
        creds = self.get_db_creds()
        self.database = MySQLDatabases(hostname=creds['hostname'], username=creds['username'],
                                       password=creds['passwd'], database=creds['db_name'])

        #queues
        self.birdID_queue = self.wavFile_queue = Queue()
        self.soundtype_queue = self.soundURL_queue = Queue()

    def get_db_creds(self):
        """
            load db creds from config file
        """
        hostname = self.config.get('database', 'db_hostname')
        username = self.config.get('database', 'db_username')
        passwd = self.config.get('database', 'db_password')
        db_name = self.config.get('database', 'db_dbname')
        return {'hostname': hostname, 'username': username,
                'passwd': passwd, 'db_name': db_name}

    def load_config_file(self):
        """
            Load config file
        """
        try:
            self.config.read(self.config_file)
            info_msg = "Loaded config file %s" % self.config_file
            self.logger.write_log('fetcher', 'i', info_msg)
        except Exception, e:
            info_msg = "config file %s missing" % self.config_file
            self.logger.write_log('fetcher', 'e', info_msg)
            raise Exception(info_msg)
Example #2
0
class MySQLDatabases:
    #TODO add sql queries descriptions
    """
        Queries:
    """

    #db conn keys
    CONNECTION = "connection"
    KEY_USERNAME = "******"
    KEY_PASSWORD = "******"
    KEY_DATABASE = "db_name"
    KEY_HOST = "db_host"

    #tables to use
    FINGERPRINTS_TBL = "fingerprints"
    BIRDS_TBL = "birds"
    IMAGES_TBL = "images"
    SOUNDS_TBL = "sounds"
    SOUNDS_TMP_TBL = "tmp_sounds"
    STATS_TBL = "stats"
    INBOUND_REQ_TBL = "inbound_requests"
    OUTBOUND_MATCHES_TBL = "outbound_matches"

    #tbl field names
    FIELD_BIRDNAME = "englishName"

    #sql stmts
    #TODO add queries for select,insert,drops,updates

    #inserts
    INSERT_FINGERPRINT = "INSERT INTO %s(birdID, hash, start_time) values(%%s,unhex(%%s), %%s)" % (FINGERPRINTS_TBL)
    INSERT_IMAGES = " INSERT INTO %s(birdID, imageURL, siteURL) VALUES ('%%s', '%%s','%%s')" % (IMAGES_TBL)
    INSERT_SOUNDS = "INSERT INTO %s(birdID, soundType, wavFile, soundURL) values" \
                    "('%%s','%%s','%%s', '%%s')" % (SOUNDS_TBL)
    INSERT_BIRDS = "INSERT INTO %s(englishName, genericName, specificName, Recorder, Location, Country, lat_lng, xenoCantoURL) " \
                   "values('%%s', '%%s', '%%s', '%%s', '%%s', '%%s', '%%s', '%%s')" % (BIRDS_TBL)
    INSERT_TMP_SOUNDS = "INSERT INTO %s(birdID, wavFile, soundType, soundURL) values('%%s', '%%s', '%%s', '%%s')" % (
        SOUNDS_TMP_TBL)
    INSERT_STATS = "insert into %s(birdID, match_time, confidence, offset) values(%%s, %%s, %%s, %%s)" % (STATS_TBL)
    INSERT_OUTBOUND_MATCH = "INSERT INTO %s (requestID, birdID, matchResults) values(%%s, %%s, %%s)" % (
        OUTBOUND_MATCHES_TBL)
    INSERT_INBOUND_REQUEST = "INSERT INTO %s (wavFile, deviceID) VALUES ('%%s', '%%s')" % (INBOUND_REQ_TBL)

    #selects
    SELECT = "SELECT birdID, start_time FROM %s WHERE hash = UNHEX(%%s);" % (FINGERPRINTS_TBL)
    SELECT_ALL = "SELECT birdID, start_time FROM %s" % (FINGERPRINTS_TBL)

    SELECT_SOUNDS = "SELECT birdID, wavFile, fingerprinted FROM %s" % (SOUNDS_TBL)
    SELECT_NON_FINGERPRINTED_SOUNDS = "%s WHERE fingerprinted = 0" % (SELECT_SOUNDS)
    SELECT_NUM_FINGERPRINTS = "SELECT COUNT(*) AS fingerprints FROM %s" % (FINGERPRINTS_TBL)
    SELECT_ALL_BIRDS = "SELECT birdID, englishName, genericName, specificName, Recorder, Location, Country, " \
                       "lat_lng, xenoCantoURL from %s" % (BIRDS_TBL)
    SELECT_BIRD_BY_ID = "%s WHERE birdID = '%%s' " % (SELECT_ALL_BIRDS)
    SELECT_SOUND_BY_ID = "SELECT birdID, soundType, wavFile, soundURL FROM %s WHERE birdID = %%s" % (SOUNDS_TBL)

    SELECT_TMP_SOUNDS = "SELECT birdID, wavFile, soundType, soundURL FROM tmp_sounds ORDER BY 1 DESC"
    #SELECT_TMP_SOUNDS = "SELECT birdID, wavFile, soundType, soundURL FROM tmp_sounds WHERE birdID< '322' ORDER BY 1 DESC"

    SELECT_INBOUND_REQUEST = "SELECT wavFile FROM %s WHERE requestID = %%s" % (INBOUND_REQ_TBL)
    SELECT_OUTBOUND_BIRD_ID = "SELECT birdID  FROM %s WHERE outboundID = %%s" % (OUTBOUND_MATCHES_TBL)
    SELECT_MATCH_RESULTS = "SELECT matchResults FROM %s WHERE outboundID = %%s" % (OUTBOUND_MATCHES_TBL)
    SELECT_THUMBNAIL_PIC = "SELECT imageURL FROM %s WHERE birdID = %%s limit 1" % (IMAGES_TBL)
    SELECT_IMAGES = "SELECT imageURL, siteURL FROM %s WHERE birdID = %%s" % (IMAGES_TBL)

    # update
    UPDATE_SONG_FINGERPRINTED = "UPDATE %s SET fingerprinted=1 where birdID = '%%s'" % (SOUNDS_TBL)
    UPDATE_MATCHED_REQUESTS = "UPDATE %s set status = 1 WHERE requestID = %%s" % (INBOUND_REQ_TBL)

    # delete
    DELETE_UNFINGERPRINTED = ""
    DELETE_ORPHANS = ""

    #list tables
    LIST_TABLES = "show tables"


    def __init__(self, hostname, username, password, database):

        self.logging = Logging()

        #connect
        try:
            self.connection = mysql.connect(host=hostname, user=username, passwd=password,
                                            db=database, cursorclass=cursors.DictCursor)
            self.KEY_USERNAME = username
            self.KEY_DATABASE = database
            self.KEY_PASSWORD = password
            self.KEY_HOST = hostname

            self.connection.autocommit(False)
            self.cursor = self.connection.cursor()
            self.logging.write_log('databases', 'i', "successfully connected to DB. DB Version: %s" %
                                                     self.connection.get_server_info())
        except mysql.Error, e:
            self.logging.write_log('databases', 'e', ("Connection error %d: %s" % (e.args[0], e.args[1])))
Example #3
0
File: nest.py Project: oguya/bsrs
class Nest:
    """
        - convert all mp3 sounds to wav sounds -> store in wavs folder
        - go to db get birdID & wavFile & fingerprint the wavfile
        - store birdID & hash in db
    """

    SOUNDS_DIR = 'BirdSounds/'
    WAV_SOUNDS_DIR = 'BirdSounds/wavSounds/'
    MAX_PROCS = 10

    def __init__(self, **kwargs):
        if kwargs.get('cd'):
            print os.getcwd()
            os.chdir('../')

        self.fingerprinter = Fingerprinter()
        self.logger = Logging()
        self.fetcher = Fetcher()
        self.parser = Parsers()
        self.config = Configs()
        creds = self.config.get_db_creds()
        self.recognizer = Recognizer()
        self.database = self.database = MySQLDatabases(hostname=creds['hostname'], username=creds['username'],
                                                       password=creds['passwd'], database=creds['db_name'])

    def mp3_to_wav(self, src_dir, extension_list=('*.mp4', '*.flv', '*.mp3')):
        os.chdir(src_dir)
        logs = ""
        for extension in extension_list:
            for media_file in glob.glob(extension):
                wav_file = "../" + Nest.WAV_SOUNDS_DIR + os.path.splitext(os.path.basename(media_file))[0] + '.wav'
                logs += "converting %s to %s\n" % (os.path.basename(media_file), wav_file)
                AudioSegment.from_file(media_file).export(wav_file, format='wav')
        os.chdir('../')
        print logs
        self.logger.write_log(log_file='fingerprint', log_tag='i', log_msg=logs)

    def reload_creds(self):
        self.database = None

        creds = self.config.get_db_creds()
        self.database = self.database = MySQLDatabases(hostname=creds['hostname'], username=creds['username'],
                                                       password=creds['passwd'], database=creds['db_name'])

    def fetch_stuff(self):
        pass
        #self.parser.parse()
        #self.parser.threading_ops()

    def chunkify(self, lst, n):
        """
        split a list into n no of parts
        """
        return [lst[i::n] for i in xrange(n)]

    def fetch_images(self):
        """
        get all birds from db
            - get birdID & birdName
            - get image URLS from GAPI & store in DB
        """
        cursor = self.parser.database.get_all_birds()
        for row in cursor:
            self.parser.parse_GAPI(birdName=row['englishName'], birdID=row['birdID'])

    def fingerprint_sounds(self):
        """
            - go to db get birdID & wavFile & fingerprint the wavfile
            - store birdID & hash in db
        """
        cursor = self.database.get_sounds()
        threads = []
        sound_details = []

        count = 0
        print len(cursor)
        for row in cursor:
            birdID = row['birdID']
            wavFile = "%s%s.wav" % (Nest.WAV_SOUNDS_DIR, row['wavFile'])
            sound_details.append((birdID, wavFile))

        shuffle(sound_details)
        split_details = self.chunkify(sound_details, Nest.MAX_PROCS)

        #split procs
        procs = []
        #for i in range(Nest.MAX_PROCS):
        #    #create separate/non-shared connections to db
        #    creds = Configs().get_db_creds()
        #    self.database = self.database = MySQLDatabases(hostname=creds['hostname'], username=creds['username'],
        #                                                   password=creds['passwd'], database=creds['db_name'])
        #
        #    #create procs & start
        #    proc = Process(target=self.fingerprint_worker, args=([split_details[i]]))
        #    proc.start()
        #    procs.append(proc)
        #
        ##wait for all procs to finish
        #for proc in procs:
        #    proc.join()

        self.fingerprint_worker(sound_details)

    def fingerprint_worker(self, sound_details):
        """
            fingerprint each song & store hash in db
        """

        for birdID, wavFile in sound_details:
            print "birdID: ", birdID, "wavFile: ", wavFile

            channels = self.fingerprinter.extract_channels(wavFile)
            for c in range(len(channels)):
                channel = channels[c]
                t_start = time()
                logs = "now fingerprinting channel %d of song %s. BirdID: %s" % (c + 1, wavFile, birdID)
                self.logger.write_log(log_file='fingerprint', log_tag='i', log_msg=logs)
                print logs
                self.fingerprinter.fingerprint(channel, birdID)
                logs = "time taken: %d seconds" % (time() - t_start)
                self.logger.write_log(log_file='fingerprint', log_tag='i', log_msg=logs)
                print logs

            #update song as fingerprinted
            self.database.update_fingerprinted_songs(birdID=birdID)

    def process_requests(self, request_id):
        """
            get wavfile from inbound request, match &
        """
        cursor = self.database.get_inbound_request(request_id)
        if cursor is None:
            print "cursor is None!"
            self.reload_creds()
            cursor = self.database.get_inbound_request(request_id)
        else:
            print "cursor is not None!"

        wavfile = cursor['wavFile']

        bird_details = self.recognizer.recognize_file(filename=wavfile, verbose=False)
        self.database.update_processed_requests(request_id)

        match_result = 0 if bird_details['bird_id'] == 0 else 1
        outbound_id = self.database.insert_outbound_match(request_id=request_id, birdID=bird_details['bird_id'],
                                                          matchResults=match_result)
        # print "outboundID: %s" % outbound_id
        return outbound_id

    def get_outbound_birdID(self, outboundID):
        """
            return outboundId from outbound_matches tbl
        """
        cursor = self.database.get_outbound_bird_id(outboundID)
        return cursor['birdID']

    def get_match_results(self, outboundID):
        """
            return matchResults from outbound_matches tbl
        """
        cursor = self.database.get_match_results(outboundID)
        return cursor['matchResults']

    def add_request(self, wavfile, deviceID):
        """
            add new unmatched request in db
        """
        request_id = self.database.insert_inbound_request(wavfile, deviceID)
        return request_id

    def get_bird_details(self, birdID):
        """
            get bird details from db
        """
        cursor = self.database.get_bird_by_id(birdID)
        return cursor

    def get_sound_details(self, birdID):
        """
            get sounds from db for a given birdID
            birdID, soundType, wavFile, soundURL
        """
        cursor = self.database.get_sound_by_id(birdID)
        return {"soundType": cursor['soundType'], "soundURL": cursor['soundURL']}

    def get_thumbnail_pic(self, birdID):
        """
            get thumbnail img from db for a given birdID
        """
        cursor = self.database.get_thumbnail_pic(birdID)
        return cursor['imageURL']

    def get_images(self, birdID):
        """
            return a list of images from db for a given birdID
        """
        cursors = self.database.get_images(birdID)
        pics = []
        for cursor in cursors:
            img = {"imageURL": cursor['imageURL'], "siteURL": cursor['siteURL']}
            pics.append(img)
        return pics