def handle(self, *args, **options):
     logging.debug("started")
     for root, dirs, files in os.walk(args[0]):
         for f in files:
             if f.endswith('.wav'):
                 # First check to see if it exists
                 path = os.path.join(root, f)
                 if os.path.getsize(path) < MIN_FILE_SIZE:
                     logging.info('small file ignored: %s', path)
                     continue
                 md5 = get_md5(path)
                 try:
                     recording = Recording.objects.get(md5=md5)
                     logging.info('recording with same MD5 already in database: %s', path)
                     save_canonical(recording)
                     continue
                 except Recording.DoesNotExist:
                     logging.debug('recording not already in database: %s', path)
                     pass
                 # Now get the file path
                 try:
                     starttime = get_starttime(f)
                 except ValueError:
                     logging.error('unable to extract date and time from filename: %s', path)
                 try:
                     recorder_code, site_code = get_recorder_site(path)
                     logging.debug('recorder %s and site %s: %s', recorder_code, site_code, path)
                     if site_code and recorder_code:
                         deployment = Deployment.objects.get(recorder__code=recorder_code, 
                             site__code=site_code,
                             start__lt=starttime, 
                             end__gt=starttime)
                     elif recorder_code:
                         deployment = Deployment.objects.get(recorder__code=recorder_code, 
                             start__lt=starttime, 
                             end__gt=starttime)
                     elif site_code:
                         deployment = Deployment.objects.get(site__code=site_code, 
                             start__lt=starttime, 
                             end__gt=starttime)
                     else:
                         logging.error('no site or recorder identified in path: %s', path)
                         continue
                     logging.debug('found the deployment: %s', deployment)
                     try:
                         Recording.objects.get(datetime=starttime, deployment=deployment)
                         logging.error('recording already exists with the same startime (%s) and deployment (%s): %s',
                             starttime, deployment, path)
                         continue
                     except Recording.DoesNotExist:
                         pass
                     recording = Recording(datetime=starttime, deployment=deployment, path=path)
                     logging.debug('created the recording: %s', recording)
                     recording.save()
                     logging.info('added recording to database: %s', path)
                     save_canonical(recording)
                     logging.debug('generate the snippets: %s', path)
                     #Now generate the snippets
                     if not recording.snippets.count():
                         try:
                             with closing(wave.open(path, 'r')) as w:
                                 frames = w.getnframes()
                                 rate = w.getframerate()
                                 length = frames/float(rate)
                                 snippet_length = 60
                                 snippet_overlap = 0
                                 snippet_minimum = 59.9
                                 seconds = 0
                                 while seconds + snippet_minimum < length: 
                                     offset = max(seconds - snippet_overlap, 0)
                                     duration = min(snippet_length + 2*snippet_overlap, length - offset)
                                     Snippet(recording=recording, offset=offset, duration=duration).save()
                                     seconds += snippet_length
                         except KeyboardInterrupt:
                             break
                         except:
                             logging.error('error extracting snippet: %s', path)
                 except Deployment.DoesNotExist:
                     logging.error('no matching deployment found: %s', path)
                 except Deployment.MultipleObjectsReturned:
                     logging.error('multiple matching deployment found: %s', path)
                 except IntegrityError:
                     logging.error('integrity error when trying to save file: %s', path)
                 except wave.Error:
                     logging.error("doesn't seem to be a WAV file: %s", path)
                 except RecorderSiteError:
                     logging.error('unable to extract recorder or site from path: %s', path)
                 except KeyboardInterrupt:
                     break
 def handle(self, *args, **options):
     logging.debug("started")
     recordings = []
     for root, dirs, files in os.walk(args[0]):
         for f in files:
             if f.endswith('.wav'):
                 recordings.append((f, os.path.join(root, f)))
     recordings.sort()
     #First make the organisation
     try:
         doc = Organisation.objects.get(code='doc')
     except Organisation.DoesNotExist:
         doc = Organisation(code='doc', name='Department of Conservation')
         doc.save()
         logging.info('Made a new organisation: doc')
     #Now make the sites
     site_codes = set(['_'.join(f.split('_')[:2]) for (f, path) in recordings])
     for code in site_codes:
         try:
             Site.objects.get(code=code, organisation=doc)
         except Site.DoesNotExist:
             Site(code=code, organisation=doc).save()
             logging.info('Made a new site: %s' % code)
     #Now make the deployments
     tz = pytz.timezone('Etc/GMT-12')
     old_site = ''
     last_date = tz.localize(datetime.strptime('20000101', '%Y%m%d'))
     count = 0
     for f, path in recordings:
         count += 1
         site_code = '_'.join(f.split('_')[:2])
         date = tz.localize(datetime.strptime(f.split('_')[2], '%y%m%d'))
         starttime = tz.localize(datetime.strptime('_'.join(f.split('_')[2:4]), '%y%m%d_%H%M%S.wav'))
         if site_code != old_site or (date - last_date).days > 1:
             old_site = site_code
             site = Site.objects.get(code=site_code, organisation=doc)
             deployment, created = Deployment.objects.get_or_create(site=site, owner=doc,
                     start=date, start_timezone='Pacific/Auckland')
             if created:
                 deployment.save()
                 logging.info('Made a new deployment: %s, %s' % (site, date))
         last_date = date
                 
         if os.path.getsize(path) < MIN_FILE_SIZE:
             logging.info('small file ignored: %s', path)
             continue
         md5 = get_md5(path)
         try:
             recording = Recording.objects.get(md5=md5)
             logging.info('recording with same MD5 already in database: %s', path)
             recording.path = path
             recording.save()
             continue
         except Recording.DoesNotExist:
             logging.debug('recording not already in database: %s', path)
             pass
         try:
             try:
                 Recording.objects.get(datetime=starttime, deployment=deployment)
                 logging.error('recording already exists with the same startime (%s) and deployment (%s): %s',
                     starttime, deployment, path)
                 continue
             except Recording.DoesNotExist:
                 pass
             recording = Recording(datetime=starttime, deployment=deployment, path=path)
             logging.debug('created the recording: %s', recording)
             recording.save()
             logging.debug('generate the snippets: %s', path)
             #Now generate the snippets
             if not recording.snippets.count():
                 try:
                     with closing(wave.open(path, 'r')) as w:
                         frames = w.getnframes()
                         rate = w.getframerate()
                         length = frames/float(rate)
                         snippet_length = 60
                         snippet_overlap = 0
                         snippet_minimum = 59.9
                         seconds = 0
                         while seconds + snippet_minimum < length: 
                             offset = max(seconds - snippet_overlap, 0)
                             duration = min(snippet_length + 2*snippet_overlap, length - offset)
                             Snippet(recording=recording, offset=offset, duration=duration).save()
                             seconds += snippet_length
                 except KeyboardInterrupt:
                     break
                 except:
                     logging.error('error extracting snippet: %s', path)
         except Deployment.DoesNotExist:
             logging.error('no matching deployment found: %s', path)
         except Deployment.MultipleObjectsReturned:
             logging.error('multiple matching deployment found: %s', path)
         except IntegrityError:
             logging.error('integrity error when trying to save file: %s', path)
         except wave.Error:
             logging.error("doesn't seem to be a WAV file: %s", path)
         except RecorderSiteError:
             logging.error('unable to extract recorder or site from path: %s', path)
         except KeyboardInterrupt:
             break
Beispiel #3
0
    def handle(self, *args, **options):
        logging.debug("started")
        recordings = []
        for root, dirs, files in os.walk(args[0]):
            for f in files:
                if f.endswith('.wav'):
                    recordings.append((f, os.path.join(root, f)))
        recordings.sort()
        #First make the organisation
        try:
            doc = Organisation.objects.get(code='doc')
        except Organisation.DoesNotExist:
            doc = Organisation(code='doc', name='Department of Conservation')
            doc.save()
            logging.info('Made a new organisation: doc')
        #Now make the sites
        site_codes = set(
            ['_'.join(f.split('_')[:2]) for (f, path) in recordings])
        for code in site_codes:
            try:
                Site.objects.get(code=code, organisation=doc)
            except Site.DoesNotExist:
                Site(code=code, organisation=doc).save()
                logging.info('Made a new site: %s' % code)
        #Now make the deployments
        tz = pytz.timezone('Etc/GMT-12')
        old_site = ''
        last_date = tz.localize(datetime.strptime('20000101', '%Y%m%d'))
        count = 0
        for f, path in recordings:
            count += 1
            site_code = '_'.join(f.split('_')[:2])
            date = tz.localize(datetime.strptime(f.split('_')[2], '%y%m%d'))
            starttime = tz.localize(
                datetime.strptime('_'.join(f.split('_')[2:4]),
                                  '%y%m%d_%H%M%S.wav'))
            if site_code != old_site or (date - last_date).days > 1:
                old_site = site_code
                site = Site.objects.get(code=site_code, organisation=doc)
                deployment, created = Deployment.objects.get_or_create(
                    site=site,
                    owner=doc,
                    start=date,
                    start_timezone='Pacific/Auckland')
                if created:
                    deployment.save()
                    logging.info('Made a new deployment: %s, %s' %
                                 (site, date))
            last_date = date

            if os.path.getsize(path) < MIN_FILE_SIZE:
                logging.info('small file ignored: %s', path)
                continue
            md5 = get_md5(path)
            try:
                recording = Recording.objects.get(md5=md5)
                logging.info('recording with same MD5 already in database: %s',
                             path)
                recording.path = path
                recording.save()
                continue
            except Recording.DoesNotExist:
                logging.debug('recording not already in database: %s', path)
                pass
            try:
                try:
                    Recording.objects.get(datetime=starttime,
                                          deployment=deployment)
                    logging.error(
                        'recording already exists with the same startime (%s) and deployment (%s): %s',
                        starttime, deployment, path)
                    continue
                except Recording.DoesNotExist:
                    pass
                recording = Recording(datetime=starttime,
                                      deployment=deployment,
                                      path=path)
                logging.debug('created the recording: %s', recording)
                recording.save()
                logging.debug('generate the snippets: %s', path)
                #Now generate the snippets
                if not recording.snippets.count():
                    try:
                        with closing(wave.open(path, 'r')) as w:
                            frames = w.getnframes()
                            rate = w.getframerate()
                            length = frames / float(rate)
                            snippet_length = 60
                            snippet_overlap = 0
                            snippet_minimum = 59.9
                            seconds = 0
                            while seconds + snippet_minimum < length:
                                offset = max(seconds - snippet_overlap, 0)
                                duration = min(
                                    snippet_length + 2 * snippet_overlap,
                                    length - offset)
                                Snippet(recording=recording,
                                        offset=offset,
                                        duration=duration).save()
                                seconds += snippet_length
                    except KeyboardInterrupt:
                        break
                    except:
                        logging.error('error extracting snippet: %s', path)
            except Deployment.DoesNotExist:
                logging.error('no matching deployment found: %s', path)
            except Deployment.MultipleObjectsReturned:
                logging.error('multiple matching deployment found: %s', path)
            except IntegrityError:
                logging.error('integrity error when trying to save file: %s',
                              path)
            except wave.Error:
                logging.error("doesn't seem to be a WAV file: %s", path)
            except RecorderSiteError:
                logging.error(
                    'unable to extract recorder or site from path: %s', path)
            except KeyboardInterrupt:
                break
    def handle(self, *args, **options):
        logging.debug("started")
        for root, dirs, files in os.walk(
                "/home/jasonhideki/songscape/www/recordings/recordings"):
            for f in files:
                if f.endswith('.wav'):
                    # First check to see if it exists
                    path = os.path.join(root, f)
                    if os.path.getsize(path) < MIN_FILE_SIZE:
                        logging.info('small file ignored: %s', path)
                        continue
                    md5 = get_md5(path)
                    '''
		    try:                      
			recording = Recording.objects.get(md5=md5)
                        logging.info('recording with same MD5 already in database: %s', path)
                        save_canonical(recording)
                        #continue
                    except Recording.DoesNotExist:
                        logging.debug('recording not already in database: %s', path)
                        pass
		    '''
                    # Now get the file path
                    try:
                        starttime = get_starttime(f)
                    except ValueError:
                        logging.error(
                            'unable to extract date and time from filename: %s',
                            path)
                    try:
                        recorder_code, site_code = get_recorder_site(f)
                        logging.debug('recorder %s and site %s: %s',
                                      recorder_code, site_code, f)

                        #try:
                        #    recorder_code, site_code = get_recorder_site(path)
                        #    logging.debug('recorder %s and site %s: %s', recorder_code, site_code, path)
                        if site_code and recorder_code:
                            deployment = Deployment.objects.get(
                                recorder__code=recorder_code,
                                site__code=site_code,
                                start__lt=starttime,
                                end__gt=starttime)
                        elif recorder_code:
                            deployment = Deployment.objects.get(
                                recorder__code=recorder_code,
                                start__lt=starttime,
                                end__gt=starttime)
                        elif site_code:
                            deployment = Deployment.objects.get(
                                site__code=site_code,
                                start__lt=starttime,
                                end__gt=starttime)
                        else:
                            logging.error(
                                'no site or recorder identified in path: %s',
                                path)
                            continue
                        logging.debug('found the deployment: %s', deployment)
                        try:
                            Recording.objects.get(datetime=starttime,
                                                  deployment=deployment)
                            logging.error(
                                'recording already exists with the same startime (%s) and deployment (%s): %s',
                                starttime, deployment, path)
                            continue
                        except Recording.DoesNotExist:
                            pass
                        recording = Recording(datetime=starttime,
                                              deployment=deployment,
                                              path=path)
                        logging.debug('created the recording: %s', recording)
                        recording.save()
                        logging.info('added recording to database: %s', path)
                        #save_canonical(recording)
                        logging.debug('generate the snippets: %s', path)
                        #Now generate the snippets
                        if not recording.snippets.count():
                            try:
                                with closing(wave.open(path, 'r')) as w:
                                    frames = w.getnframes()
                                    rate = w.getframerate()
                                    length = frames / float(rate)
                                    snippet_length = 60
                                    snippet_overlap = 0
                                    snippet_minimum = 59.9
                                    seconds = 0
                                    while seconds + snippet_minimum < length:
                                        offset = max(seconds - snippet_overlap,
                                                     0)
                                        duration = min(
                                            snippet_length +
                                            2 * snippet_overlap,
                                            length - offset)
                                        Snippet(recording=recording,
                                                offset=offset,
                                                duration=duration).save()
                                        seconds += snippet_length
                            except KeyboardInterrupt:
                                break
                            except:
                                logging.error('error extracting snippet: %s',
                                              path)
                    except Deployment.DoesNotExist:
                        logging.error('no matching deployment found: %s', path)
                    except Deployment.MultipleObjectsReturned:
                        logging.error('multiple matching deployment found: %s',
                                      path)
                    except IntegrityError:
                        logging.error(
                            'integrity error when trying to save file: %s',
                            path)
                    except wave.Error:
                        logging.error("doesn't seem to be a WAV file: %s",
                                      path)
                    except RecorderSiteError:
                        logging.error(
                            'unable to extract recorder or site from path: %s',
                            path)
                    except KeyboardInterrupt:
                        break
                    except:
                        logging.error(
                            'Hmmm. Something weird happened with this file: %s',
                            path)