class Parsers: """ parse json file, store important stuff get no of pages-> pages page_counter <- 1 while(page_counter < pages): get_page(page_counter) for recording in recordings: get_recording_details() get_audio_file() page_counter++ """ MAX_IMAGES_URL = 4 MAX_NO_THREADS = 100 CONFIG_FILE = 'bsrs.cfg' BIRD_SOUNDS_DIR = 'BirdSounds/' def __init__(self): self.fetcher = Fetcher() self.logger = Logging() self.config_file = Parsers.CONFIG_FILE self.config = ConfigParser() self.load_config_file() creds = self.get_db_creds() self.database = MySQLDatabases(hostname=creds['hostname'], username=creds['username'], password=creds['passwd'], database=creds['db_name']) #queues self.birdID_queue = self.wavFile_queue = Queue() self.soundtype_queue = self.soundURL_queue = Queue() def get_db_creds(self): """ load db creds from config file """ hostname = self.config.get('database', 'db_hostname') username = self.config.get('database', 'db_username') passwd = self.config.get('database', 'db_password') db_name = self.config.get('database', 'db_dbname') return {'hostname': hostname, 'username': username, 'passwd': passwd, 'db_name': db_name} def load_config_file(self): """ Load config file """ try: self.config.read(self.config_file) info_msg = "Loaded config file %s" % self.config_file self.logger.write_log('fetcher', 'i', info_msg) except Exception, e: info_msg = "config file %s missing" % self.config_file self.logger.write_log('fetcher', 'e', info_msg) raise Exception(info_msg)
class MySQLDatabases: #TODO add sql queries descriptions """ Queries: """ #db conn keys CONNECTION = "connection" KEY_USERNAME = "******" KEY_PASSWORD = "******" KEY_DATABASE = "db_name" KEY_HOST = "db_host" #tables to use FINGERPRINTS_TBL = "fingerprints" BIRDS_TBL = "birds" IMAGES_TBL = "images" SOUNDS_TBL = "sounds" SOUNDS_TMP_TBL = "tmp_sounds" STATS_TBL = "stats" INBOUND_REQ_TBL = "inbound_requests" OUTBOUND_MATCHES_TBL = "outbound_matches" #tbl field names FIELD_BIRDNAME = "englishName" #sql stmts #TODO add queries for select,insert,drops,updates #inserts INSERT_FINGERPRINT = "INSERT INTO %s(birdID, hash, start_time) values(%%s,unhex(%%s), %%s)" % (FINGERPRINTS_TBL) INSERT_IMAGES = " INSERT INTO %s(birdID, imageURL, siteURL) VALUES ('%%s', '%%s','%%s')" % (IMAGES_TBL) INSERT_SOUNDS = "INSERT INTO %s(birdID, soundType, wavFile, soundURL) values" \ "('%%s','%%s','%%s', '%%s')" % (SOUNDS_TBL) INSERT_BIRDS = "INSERT INTO %s(englishName, genericName, specificName, Recorder, Location, Country, lat_lng, xenoCantoURL) " \ "values('%%s', '%%s', '%%s', '%%s', '%%s', '%%s', '%%s', '%%s')" % (BIRDS_TBL) INSERT_TMP_SOUNDS = "INSERT INTO %s(birdID, wavFile, soundType, soundURL) values('%%s', '%%s', '%%s', '%%s')" % ( SOUNDS_TMP_TBL) INSERT_STATS = "insert into %s(birdID, match_time, confidence, offset) values(%%s, %%s, %%s, %%s)" % (STATS_TBL) INSERT_OUTBOUND_MATCH = "INSERT INTO %s (requestID, birdID, matchResults) values(%%s, %%s, %%s)" % ( OUTBOUND_MATCHES_TBL) INSERT_INBOUND_REQUEST = "INSERT INTO %s (wavFile, deviceID) VALUES ('%%s', '%%s')" % (INBOUND_REQ_TBL) #selects SELECT = "SELECT birdID, start_time FROM %s WHERE hash = UNHEX(%%s);" % (FINGERPRINTS_TBL) SELECT_ALL = "SELECT birdID, start_time FROM %s" % (FINGERPRINTS_TBL) SELECT_SOUNDS = "SELECT birdID, wavFile, fingerprinted FROM %s" % (SOUNDS_TBL) SELECT_NON_FINGERPRINTED_SOUNDS = "%s WHERE fingerprinted = 0" % (SELECT_SOUNDS) SELECT_NUM_FINGERPRINTS = "SELECT COUNT(*) AS fingerprints FROM %s" % (FINGERPRINTS_TBL) SELECT_ALL_BIRDS = "SELECT birdID, englishName, genericName, specificName, Recorder, Location, Country, " \ "lat_lng, xenoCantoURL from %s" % (BIRDS_TBL) SELECT_BIRD_BY_ID = "%s WHERE birdID = '%%s' " % (SELECT_ALL_BIRDS) SELECT_SOUND_BY_ID = "SELECT birdID, soundType, wavFile, soundURL FROM %s WHERE birdID = %%s" % (SOUNDS_TBL) SELECT_TMP_SOUNDS = "SELECT birdID, wavFile, soundType, soundURL FROM tmp_sounds ORDER BY 1 DESC" #SELECT_TMP_SOUNDS = "SELECT birdID, wavFile, soundType, soundURL FROM tmp_sounds WHERE birdID< '322' ORDER BY 1 DESC" SELECT_INBOUND_REQUEST = "SELECT wavFile FROM %s WHERE requestID = %%s" % (INBOUND_REQ_TBL) SELECT_OUTBOUND_BIRD_ID = "SELECT birdID FROM %s WHERE outboundID = %%s" % (OUTBOUND_MATCHES_TBL) SELECT_MATCH_RESULTS = "SELECT matchResults FROM %s WHERE outboundID = %%s" % (OUTBOUND_MATCHES_TBL) SELECT_THUMBNAIL_PIC = "SELECT imageURL FROM %s WHERE birdID = %%s limit 1" % (IMAGES_TBL) SELECT_IMAGES = "SELECT imageURL, siteURL FROM %s WHERE birdID = %%s" % (IMAGES_TBL) # update UPDATE_SONG_FINGERPRINTED = "UPDATE %s SET fingerprinted=1 where birdID = '%%s'" % (SOUNDS_TBL) UPDATE_MATCHED_REQUESTS = "UPDATE %s set status = 1 WHERE requestID = %%s" % (INBOUND_REQ_TBL) # delete DELETE_UNFINGERPRINTED = "" DELETE_ORPHANS = "" #list tables LIST_TABLES = "show tables" def __init__(self, hostname, username, password, database): self.logging = Logging() #connect try: self.connection = mysql.connect(host=hostname, user=username, passwd=password, db=database, cursorclass=cursors.DictCursor) self.KEY_USERNAME = username self.KEY_DATABASE = database self.KEY_PASSWORD = password self.KEY_HOST = hostname self.connection.autocommit(False) self.cursor = self.connection.cursor() self.logging.write_log('databases', 'i', "successfully connected to DB. DB Version: %s" % self.connection.get_server_info()) except mysql.Error, e: self.logging.write_log('databases', 'e', ("Connection error %d: %s" % (e.args[0], e.args[1])))
class Nest: """ - convert all mp3 sounds to wav sounds -> store in wavs folder - go to db get birdID & wavFile & fingerprint the wavfile - store birdID & hash in db """ SOUNDS_DIR = 'BirdSounds/' WAV_SOUNDS_DIR = 'BirdSounds/wavSounds/' MAX_PROCS = 10 def __init__(self, **kwargs): if kwargs.get('cd'): print os.getcwd() os.chdir('../') self.fingerprinter = Fingerprinter() self.logger = Logging() self.fetcher = Fetcher() self.parser = Parsers() self.config = Configs() creds = self.config.get_db_creds() self.recognizer = Recognizer() self.database = self.database = MySQLDatabases(hostname=creds['hostname'], username=creds['username'], password=creds['passwd'], database=creds['db_name']) def mp3_to_wav(self, src_dir, extension_list=('*.mp4', '*.flv', '*.mp3')): os.chdir(src_dir) logs = "" for extension in extension_list: for media_file in glob.glob(extension): wav_file = "../" + Nest.WAV_SOUNDS_DIR + os.path.splitext(os.path.basename(media_file))[0] + '.wav' logs += "converting %s to %s\n" % (os.path.basename(media_file), wav_file) AudioSegment.from_file(media_file).export(wav_file, format='wav') os.chdir('../') print logs self.logger.write_log(log_file='fingerprint', log_tag='i', log_msg=logs) def reload_creds(self): self.database = None creds = self.config.get_db_creds() self.database = self.database = MySQLDatabases(hostname=creds['hostname'], username=creds['username'], password=creds['passwd'], database=creds['db_name']) def fetch_stuff(self): pass #self.parser.parse() #self.parser.threading_ops() def chunkify(self, lst, n): """ split a list into n no of parts """ return [lst[i::n] for i in xrange(n)] def fetch_images(self): """ get all birds from db - get birdID & birdName - get image URLS from GAPI & store in DB """ cursor = self.parser.database.get_all_birds() for row in cursor: self.parser.parse_GAPI(birdName=row['englishName'], birdID=row['birdID']) def fingerprint_sounds(self): """ - go to db get birdID & wavFile & fingerprint the wavfile - store birdID & hash in db """ cursor = self.database.get_sounds() threads = [] sound_details = [] count = 0 print len(cursor) for row in cursor: birdID = row['birdID'] wavFile = "%s%s.wav" % (Nest.WAV_SOUNDS_DIR, row['wavFile']) sound_details.append((birdID, wavFile)) shuffle(sound_details) split_details = self.chunkify(sound_details, Nest.MAX_PROCS) #split procs procs = [] #for i in range(Nest.MAX_PROCS): # #create separate/non-shared connections to db # creds = Configs().get_db_creds() # self.database = self.database = MySQLDatabases(hostname=creds['hostname'], username=creds['username'], # password=creds['passwd'], database=creds['db_name']) # # #create procs & start # proc = Process(target=self.fingerprint_worker, args=([split_details[i]])) # proc.start() # procs.append(proc) # ##wait for all procs to finish #for proc in procs: # proc.join() self.fingerprint_worker(sound_details) def fingerprint_worker(self, sound_details): """ fingerprint each song & store hash in db """ for birdID, wavFile in sound_details: print "birdID: ", birdID, "wavFile: ", wavFile channels = self.fingerprinter.extract_channels(wavFile) for c in range(len(channels)): channel = channels[c] t_start = time() logs = "now fingerprinting channel %d of song %s. BirdID: %s" % (c + 1, wavFile, birdID) self.logger.write_log(log_file='fingerprint', log_tag='i', log_msg=logs) print logs self.fingerprinter.fingerprint(channel, birdID) logs = "time taken: %d seconds" % (time() - t_start) self.logger.write_log(log_file='fingerprint', log_tag='i', log_msg=logs) print logs #update song as fingerprinted self.database.update_fingerprinted_songs(birdID=birdID) def process_requests(self, request_id): """ get wavfile from inbound request, match & """ cursor = self.database.get_inbound_request(request_id) if cursor is None: print "cursor is None!" self.reload_creds() cursor = self.database.get_inbound_request(request_id) else: print "cursor is not None!" wavfile = cursor['wavFile'] bird_details = self.recognizer.recognize_file(filename=wavfile, verbose=False) self.database.update_processed_requests(request_id) match_result = 0 if bird_details['bird_id'] == 0 else 1 outbound_id = self.database.insert_outbound_match(request_id=request_id, birdID=bird_details['bird_id'], matchResults=match_result) # print "outboundID: %s" % outbound_id return outbound_id def get_outbound_birdID(self, outboundID): """ return outboundId from outbound_matches tbl """ cursor = self.database.get_outbound_bird_id(outboundID) return cursor['birdID'] def get_match_results(self, outboundID): """ return matchResults from outbound_matches tbl """ cursor = self.database.get_match_results(outboundID) return cursor['matchResults'] def add_request(self, wavfile, deviceID): """ add new unmatched request in db """ request_id = self.database.insert_inbound_request(wavfile, deviceID) return request_id def get_bird_details(self, birdID): """ get bird details from db """ cursor = self.database.get_bird_by_id(birdID) return cursor def get_sound_details(self, birdID): """ get sounds from db for a given birdID birdID, soundType, wavFile, soundURL """ cursor = self.database.get_sound_by_id(birdID) return {"soundType": cursor['soundType'], "soundURL": cursor['soundURL']} def get_thumbnail_pic(self, birdID): """ get thumbnail img from db for a given birdID """ cursor = self.database.get_thumbnail_pic(birdID) return cursor['imageURL'] def get_images(self, birdID): """ return a list of images from db for a given birdID """ cursors = self.database.get_images(birdID) pics = [] for cursor in cursors: img = {"imageURL": cursor['imageURL'], "siteURL": cursor['siteURL']} pics.append(img) return pics