Beispiel #1
0
    def handle(self, *args, **options):
        logging.debug("started")
        recordings = []
        for root, dirs, files in os.walk(args[0]):
            for f in files:
                if f.endswith('.wav'):
                    recordings.append((f, os.path.join(root, f)))
        recordings.sort()
        #First make the organisation
        try:
            doc = Organisation.objects.get(code='doc')
        except Organisation.DoesNotExist:
            doc = Organisation(code='doc', name='Department of Conservation')
            doc.save()
            logging.info('Made a new organisation: doc')
        #Now make the sites
        site_codes = set(
            ['_'.join(f.split('_')[:2]) for (f, path) in recordings])
        for code in site_codes:
            try:
                Site.objects.get(code=code, organisation=doc)
            except Site.DoesNotExist:
                Site(code=code, organisation=doc).save()
                logging.info('Made a new site: %s' % code)
        #Now make the deployments
        tz = pytz.timezone('Etc/GMT-12')
        old_site = ''
        last_date = tz.localize(datetime.strptime('20000101', '%Y%m%d'))
        count = 0
        for f, path in recordings:
            count += 1
            site_code = '_'.join(f.split('_')[:2])
            date = tz.localize(datetime.strptime(f.split('_')[2], '%y%m%d'))
            starttime = tz.localize(
                datetime.strptime('_'.join(f.split('_')[2:4]),
                                  '%y%m%d_%H%M%S.wav'))
            if site_code != old_site or (date - last_date).days > 1:
                old_site = site_code
                site = Site.objects.get(code=site_code, organisation=doc)
                deployment, created = Deployment.objects.get_or_create(
                    site=site,
                    owner=doc,
                    start=date,
                    start_timezone='Pacific/Auckland')
                if created:
                    deployment.save()
                    logging.info('Made a new deployment: %s, %s' %
                                 (site, date))
            last_date = date

            if os.path.getsize(path) < MIN_FILE_SIZE:
                logging.info('small file ignored: %s', path)
                continue
            md5 = get_md5(path)
            try:
                recording = Recording.objects.get(md5=md5)
                logging.info('recording with same MD5 already in database: %s',
                             path)
                recording.path = path
                recording.save()
                continue
            except Recording.DoesNotExist:
                logging.debug('recording not already in database: %s', path)
                pass
            try:
                try:
                    Recording.objects.get(datetime=starttime,
                                          deployment=deployment)
                    logging.error(
                        'recording already exists with the same startime (%s) and deployment (%s): %s',
                        starttime, deployment, path)
                    continue
                except Recording.DoesNotExist:
                    pass
                recording = Recording(datetime=starttime,
                                      deployment=deployment,
                                      path=path)
                logging.debug('created the recording: %s', recording)
                recording.save()
                logging.debug('generate the snippets: %s', path)
                #Now generate the snippets
                if not recording.snippets.count():
                    try:
                        with closing(wave.open(path, 'r')) as w:
                            frames = w.getnframes()
                            rate = w.getframerate()
                            length = frames / float(rate)
                            snippet_length = 60
                            snippet_overlap = 0
                            snippet_minimum = 59.9
                            seconds = 0
                            while seconds + snippet_minimum < length:
                                offset = max(seconds - snippet_overlap, 0)
                                duration = min(
                                    snippet_length + 2 * snippet_overlap,
                                    length - offset)
                                Snippet(recording=recording,
                                        offset=offset,
                                        duration=duration).save()
                                seconds += snippet_length
                    except KeyboardInterrupt:
                        break
                    except:
                        logging.error('error extracting snippet: %s', path)
            except Deployment.DoesNotExist:
                logging.error('no matching deployment found: %s', path)
            except Deployment.MultipleObjectsReturned:
                logging.error('multiple matching deployment found: %s', path)
            except IntegrityError:
                logging.error('integrity error when trying to save file: %s',
                              path)
            except wave.Error:
                logging.error("doesn't seem to be a WAV file: %s", path)
            except RecorderSiteError:
                logging.error(
                    'unable to extract recorder or site from path: %s', path)
            except KeyboardInterrupt:
                break
    def handle(self, *args, **options):
        logging.debug("started")
        for root, dirs, files in os.walk(
                "/home/jasonhideki/songscape/www/recordings/recordings"):
            for f in files:
                if f.endswith('.wav'):
                    # First check to see if it exists
                    path = os.path.join(root, f)
                    if os.path.getsize(path) < MIN_FILE_SIZE:
                        logging.info('small file ignored: %s', path)
                        continue
                    md5 = get_md5(path)
                    '''
		    try:                      
			recording = Recording.objects.get(md5=md5)
                        logging.info('recording with same MD5 already in database: %s', path)
                        save_canonical(recording)
                        #continue
                    except Recording.DoesNotExist:
                        logging.debug('recording not already in database: %s', path)
                        pass
		    '''
                    # Now get the file path
                    try:
                        starttime = get_starttime(f)
                    except ValueError:
                        logging.error(
                            'unable to extract date and time from filename: %s',
                            path)
                    try:
                        recorder_code, site_code = get_recorder_site(f)
                        logging.debug('recorder %s and site %s: %s',
                                      recorder_code, site_code, f)

                        #try:
                        #    recorder_code, site_code = get_recorder_site(path)
                        #    logging.debug('recorder %s and site %s: %s', recorder_code, site_code, path)
                        if site_code and recorder_code:
                            deployment = Deployment.objects.get(
                                recorder__code=recorder_code,
                                site__code=site_code,
                                start__lt=starttime,
                                end__gt=starttime)
                        elif recorder_code:
                            deployment = Deployment.objects.get(
                                recorder__code=recorder_code,
                                start__lt=starttime,
                                end__gt=starttime)
                        elif site_code:
                            deployment = Deployment.objects.get(
                                site__code=site_code,
                                start__lt=starttime,
                                end__gt=starttime)
                        else:
                            logging.error(
                                'no site or recorder identified in path: %s',
                                path)
                            continue
                        logging.debug('found the deployment: %s', deployment)
                        try:
                            Recording.objects.get(datetime=starttime,
                                                  deployment=deployment)
                            logging.error(
                                'recording already exists with the same startime (%s) and deployment (%s): %s',
                                starttime, deployment, path)
                            continue
                        except Recording.DoesNotExist:
                            pass
                        recording = Recording(datetime=starttime,
                                              deployment=deployment,
                                              path=path)
                        logging.debug('created the recording: %s', recording)
                        recording.save()
                        logging.info('added recording to database: %s', path)
                        #save_canonical(recording)
                        logging.debug('generate the snippets: %s', path)
                        #Now generate the snippets
                        if not recording.snippets.count():
                            try:
                                with closing(wave.open(path, 'r')) as w:
                                    frames = w.getnframes()
                                    rate = w.getframerate()
                                    length = frames / float(rate)
                                    snippet_length = 60
                                    snippet_overlap = 0
                                    snippet_minimum = 59.9
                                    seconds = 0
                                    while seconds + snippet_minimum < length:
                                        offset = max(seconds - snippet_overlap,
                                                     0)
                                        duration = min(
                                            snippet_length +
                                            2 * snippet_overlap,
                                            length - offset)
                                        Snippet(recording=recording,
                                                offset=offset,
                                                duration=duration).save()
                                        seconds += snippet_length
                            except KeyboardInterrupt:
                                break
                            except:
                                logging.error('error extracting snippet: %s',
                                              path)
                    except Deployment.DoesNotExist:
                        logging.error('no matching deployment found: %s', path)
                    except Deployment.MultipleObjectsReturned:
                        logging.error('multiple matching deployment found: %s',
                                      path)
                    except IntegrityError:
                        logging.error(
                            'integrity error when trying to save file: %s',
                            path)
                    except wave.Error:
                        logging.error("doesn't seem to be a WAV file: %s",
                                      path)
                    except RecorderSiteError:
                        logging.error(
                            'unable to extract recorder or site from path: %s',
                            path)
                    except KeyboardInterrupt:
                        break
                    except:
                        logging.error(
                            'Hmmm. Something weird happened with this file: %s',
                            path)