def get_syntactically_similar_pairs(request): extra_args = json.loads(request.POST.get('extras', {})) granularity = extra_args['granularity'] user = request.user database = get_user_databases(user) permission = database.get_assigned_permission(user) if permission < DatabasePermission.ANNOTATE: raise CustomAssertionError( 'You don\'t have permission to annotate this database') sids, tids = get_sids_tids(database) label_arr = get_syllable_labels(user, granularity, sids, on_no_label='set_blank') cls_labels, syl_label_enum_arr = np.unique(label_arr, return_inverse=True) enum2label = {enum: label for enum, label in enumerate(cls_labels)} sid2enumlabel = { sid: enum_label for sid, enum_label in zip(sids, syl_label_enum_arr) } adjacency_mat, classes_info = calc_class_ajacency(database, syl_label_enum_arr, enum2label, sid2enumlabel, count_style='forward', self_count='append') counter = Counter(syl_label_enum_arr) nlabels = len(counter) frequencies = np.array([counter[i] for i in range(nlabels)]) return adjacency_mat.tolist(), frequencies.tolist(), cls_labels.tolist()
def get_home_page(request): user = request.user if user.is_authenticated: current_database = get_user_databases(user) if current_database is None: return redirect('dashboard') return redirect('songs') return render(request, 'home_page.html')
def import_history(request): """ Import a HistoryEntry from any user to this user. If this operation fails, the database is intact. :param request: must specify either : version-id, which is the id of the HistoryEntry object to be imported to or FILES['zipfile'] which should be created somewhere by Koe for someone :return: True if everything goes well. """ version_id = request.POST.get('version-id', None) zip_file = request.FILES.get('zipfile', None) user = request.user current_database = get_user_databases(user) if current_database is None: raise CustomAssertionError( 'You don\'t have a current working database') assert_permission(user, current_database, DatabasePermission.ANNOTATE) if not (version_id or zip_file): raise CustomAssertionError('No ID or file provided. Abort.') if version_id: he = HistoryEntry.objects.get(id=version_id) file = open(history_path(he.filename), 'rb') else: file = File(file=zip_file) filelist = {} with zipfile.ZipFile(file, "r") as zip_file: namelist = zip_file.namelist() for name in namelist: filelist[name] = zip_file.read(name) meta = json.loads(get_or_error(filelist, 'meta.json')) version = get_or_error(meta, 'version') backup_type = get_or_error(meta, 'type') if version < 4: raise CustomAssertionError( 'This file format is too old and not supported anymore.') if backup_type == 'segmentation': retval = import_history_with_segmentation(current_database, user, filelist) return dict(origin='import_history', success=True, warning=None, payload=retval) try: contents = [ get_or_error(filelist, 'segment.extraattrvalue.json'), get_or_error(filelist, 'audiofile.extraattrvalue.json') ] extra_attrs = json.loads(get_or_error(filelist, 'extraattr.json')) new_entries = [] for content in contents: loaded = json.loads(content) new_entries += loaded except Exception: raise CustomAssertionError( 'The history content is malformed and cannot be parsed.') new_entries = change_owner_and_attr_ids(new_entries, extra_attrs) retval = update_extra_attr_values(user, new_entries) return dict(origin='import_history', success=True, warning=None, payload=retval)
def populate_context(obj, context): page_name = getattr(obj, 'page_name', None) if page_name is None: page_name = obj.__class__.page_name user = obj.request.user gets = obj.request.GET for key, value in gets.items(): if key.startswith('__'): context['external{}'.format(key)] = value elif key.startswith('_'): context['internal{}'.format(key)] = value else: context[key] = value current_database = get_user_databases(user) specified_db = None db_class = Database if current_database is None else current_database.__class__ if 'database' in gets: specified_db = gets['database'] db_class = Database elif 'tmpdb' in gets: specified_db = gets['tmpdb'] db_class = TemporaryDatabase if specified_db and (current_database is None or specified_db != current_database.name): current_database = get_or_error(db_class, dict(name=specified_db)) current_database_value = ExtraAttrValue.objects.filter( attr=settings.ATTRS.user.current_database, owner_id=user.id, user=user).first() if current_database_value is None: current_database_value = ExtraAttrValue( attr=settings.ATTRS.user.current_database, owner_id=user.id, user=user) current_database_value.value = '{}_{}'.format(db_class.__name__, current_database.id) current_database_value.save() if db_class == Database: db_assignment = assert_permission(user, current_database, DatabasePermission.VIEW) else: db_assignment = {'can_view': True} context['databases'] = get_user_accessible_databases(user) context['current_database'] = current_database context['db_assignment'] = db_assignment context['my_tmpdbs'] = TemporaryDatabase.objects.filter(user=user) # context['other_tmpdbs'] = TemporaryDatabase.objects.exclude(user=user) if db_class == Database: underlying_databases = [current_database] else: underlying_databases = current_database.get_databases() other_users = DatabaseAssignment.objects\ .filter(database__in=underlying_databases, permission__gte=DatabasePermission.VIEW)\ .values_list('user__id', flat=True) other_users = User.objects.filter(id__in=other_users) viewas = gets.get('viewas', user.username) viewas = get_or_error(User, dict(username=viewas)) context['viewas'] = viewas context['other_users'] = other_users granularity = gets.get('granularity', 'label') context['granularity'] = granularity context['page'] = page_name
def bulk_get_song_sequence_associations(all_songs, extras): granularity = extras.granularity current_database = get_user_databases(extras.user) viewas = extras.viewas support = float(extras.get('support', 0.01)) use_gap = extras.usegap maxgap = extras.maxgap if use_gap else 1 mingap = extras.mingap if use_gap else -99999 if isinstance(current_database, Database): if isinstance(all_songs, QuerySet): all_songs = all_songs.filter(database=current_database) else: all_songs = [ x.id for x in all_songs if x.database == current_database ] segs = Segment.objects.filter(audio_file__in=all_songs).order_by( 'audio_file__name', 'start_time_ms') else: segs = Segment.objects.filter(id__in=current_database.ids) if use_gap: values = segs.values_list('id', 'audio_file__id', 'start_time_ms', 'end_time_ms') else: values = segs.values_list('id', 'audio_file__id') seg_ids = segs.values_list('id', flat=True) label_attr = ExtraAttr.objects.get(klass=Segment.__name__, name=granularity) labels = ExtraAttrValue.objects.filter(attr=label_attr, owner_id__in=seg_ids, user__username=viewas) \ .values_list('owner_id', 'value') seg_id_to_label = {x: y for x, y in labels} label_set = set(seg_id_to_label.values()) labels2enums = {y: x + 1 for x, y in enumerate(label_set)} enums2labels = {x: y for y, x in labels2enums.items()} pseudo_end_id = len(label_set) + 1 enums2labels[pseudo_end_id] = '__PSEUDO_END__' enums2labels[0] = '__PSEUDO_START__' seg_id_to_label_enum = { x: labels2enums[y] for x, y in seg_id_to_label.items() } # Bagging song syllables by song name songs = {} sequences = [] sequence_ind = 1 for value in values: seg_id = value[0] song_id = value[1] label2enum = seg_id_to_label_enum.get(seg_id, None) if use_gap: start = value[2] end = value[3] seg_info = (label2enum, start, end) else: seg_info = label2enum if song_id not in songs: segs_info = [] songs[song_id] = segs_info else: segs_info = songs[song_id] segs_info.append(seg_info) for song_id, segs_info in songs.items(): song_sequence = [] has_unlabelled = False # This helps keep track of the current position of the syllable when the song is rid of syllable duration and # only gaps are retained. accum_gap = 10 # This helps keep track of the gap between this current syllable and the previous one, # such that we can decide to merge two syllables if their gap is too small (could also be negative) gap = 0 last_syl_end = None for ind, seg_info in enumerate(segs_info): if use_gap: label2enum, start, end = seg_info if last_syl_end is None: gap = 0 else: gap = start - last_syl_end last_syl_end = end accum_gap += gap # If the gap is too small, merge this one with the previous one, which means the eid stays the same if ind > 0 and gap < mingap: song_sequence[-1][2].append(label2enum) else: eid = accum_gap song_sequence.append([sequence_ind, eid, [label2enum]]) else: label2enum = seg_info eid = ind + 1 song_sequence.append([sequence_ind, eid, [label2enum]]) if label2enum is None: has_unlabelled = True break pseudo_start = max(0, song_sequence[0][1] - 1) song_sequence.insert(0, [sequence_ind, pseudo_start, [0]]) song_sequence.append([sequence_ind, eid + 1, [pseudo_end_id]]) if not has_unlabelled: sequences += song_sequence sequence_ind += 1 ids = [] rows = [] nsequences = sequence_ind - 1 if nsequences == 0: return ids, rows support = max(int(nsequences * support), 1) / nsequences try: result = spade(data=sequences, support=support, maxgap=maxgap) except RuntimeError as e: raise CustomAssertionError('SPADE error: {}'.format(str(e))) mined_objects = result['mined_objects'] for idx, seq in enumerate(mined_objects): items = seq.items conf = -1 if seq.confidence is None else seq.confidence lift = -1 if seq.lift is None else seq.lift items_str = [] for item in items: item_str = '{}' if len(item.elements) == 1 else '({})' labels = ' -&- '.join( [enums2labels[element] for element in item.elements]) item_str = item_str.format(labels) items_str.append(item_str) assocrule = ' => '.join(items_str) row = dict(id=idx, chainlength=len(items), transcount=seq.noccurs, confidence=conf, lift=lift, support=seq.noccurs / nsequences, assocrule=assocrule) rows.append(row) ids.append(idx) return ids, rows
def bulk_get_song_sequences(all_songs, extras): """ For the song sequence page. For each song, send the sequence of syllables in order of appearance :param all_songs: a QuerySet list of AudioFile :param extras: :return: """ granularity = extras.granularity current_database = get_user_databases(extras.user) permission = current_database.get_assigned_permission(extras.user) viewas = extras.viewas if permission < DatabasePermission.VIEW: raise CustomAssertionError( 'You don\'t have permission to view this database') extras.permission = permission if isinstance(current_database, Database): if isinstance(all_songs, QuerySet): all_songs = all_songs.filter(database=current_database) song_ids = all_songs.values_list('id', flat=True) else: all_songs = [ x.id for x in all_songs if x.database == current_database ] song_ids = all_songs segs = Segment.objects.filter(audio_file__in=all_songs).order_by( 'audio_file__name', 'start_time_ms') else: seg_ids = current_database.ids segs = Segment.objects.filter(id__in=seg_ids) song_ids = segs.values_list('audio_file').distinct() all_songs = AudioFile.objects.filter(id__in=song_ids) values = segs.values_list( 'id', 'tid', 'start_time_ms', 'end_time_ms', 'audio_file__name', 'audio_file__id', 'audio_file__quality', 'audio_file__length', 'audio_file__fs', 'audio_file__added', 'audio_file__track__name', 'audio_file__track__date', 'audio_file__individual__name', 'audio_file__individual__gender', 'audio_file__individual__species__name') seg_ids = segs.values_list('id', flat=True) label_attr = ExtraAttr.objects.get(klass=Segment.__name__, name=granularity) labels = ExtraAttrValue.objects.filter(attr=label_attr, owner_id__in=seg_ids, user__username=viewas) \ .values_list('owner_id', 'value') seg_id_to_label = {x: y for x, y in labels} ids = [] rows = [] # Bagging song syllables by song name songs = {} for seg_id, tid, start, end, filename, song_id, quality, length, fs, added, track, date, indv, gender, species \ in values: if song_id not in songs: url = reverse('segmentation', kwargs={'file_id': song_id}) url = '[{}]({})'.format(url, filename) duration_ms = round(length * 1000 / fs) song_info = dict(filename=url, track=track, individual=indv, sex=gender, quality=quality, record_date=date, added=added.date(), duration=duration_ms, species=species) segs_info = [] songs[song_id] = dict(song=song_info, segs=segs_info) else: segs_info = songs[song_id]['segs'] label = seg_id_to_label.get(seg_id, '__NONE__') segs_info.append((tid, label, start, end)) for song_id, info in songs.items(): song_info = info['song'] segs_info = info['segs'] sequence_labels = [] sequence_starts = [] sequence_ends = [] sequence_tids = [] for tid, label, start, end in segs_info: sequence_labels.append(label) sequence_starts.append(start) sequence_ends.append(end) sequence_tids.append(tid) sequence_str = '-'.join('\"{}\"'.format(x) for x in sequence_labels) row = song_info row['id'] = song_id row['sequence'] = sequence_str row['sequence-labels'] = sequence_labels row['sequence-starts'] = sequence_starts row['sequence-ends'] = sequence_ends row['sequence-tids'] = sequence_tids ids.append(song_id) rows.append(row) # Now we have to deal with songs without any segmentation done empty_songs = all_songs.exclude(id__in=songs.keys()) _ids, _rows = get_sequence_info_empty_songs(empty_songs) ids += _ids rows += _rows extra_attrs = ExtraAttr.objects.filter(klass=AudioFile.__name__) extra_attr_values_list = ExtraAttrValue.objects\ .filter(user__username=viewas, attr__in=extra_attrs, owner_id__in=song_ids)\ .values_list('owner_id', 'attr__name', 'value') extra_attr_values_lookup = {} for id, attr, value in extra_attr_values_list: if id not in extra_attr_values_lookup: extra_attr_values_lookup[id] = {} extra_attr_dict = extra_attr_values_lookup[id] extra_attr_dict[attr] = value for song_id, row in zip(ids, rows): extra_attr_dict = extra_attr_values_lookup.get(song_id, {}) for attr in extra_attr_dict: row[attr] = extra_attr_dict[attr] return ids, rows
def bulk_get_exemplars(objs, extras): """ Return rows containing n exemplars per class. Each row is one class. Class can be label, label_family, label_subfamily :param objs: a list of Segments :param extras: must contain key 'class', value can be one of 'label', 'label_family', 'label_subfamily' :return: """ granularity = extras.granularity viewas = extras.viewas current_database = get_user_databases(extras.user) if isinstance(current_database, Database): if isinstance(objs, QuerySet): id2tid = { x: y for x, y in objs.filter( audio_file__database=current_database).values_list( 'id', 'tid') } ids = id2tid.keys() else: objs = [ x for x in objs if x.audio_file.database == current_database ] id2tid = {x.id: x.tid for x in objs} ids = id2tid.keys() else: ids = current_database.ids segs = Segment.objects.filter(id__in=ids) id2tid = {x: y for x, y in segs.values_list('id', 'tid')} values = ExtraAttrValue.objects.filter(attr__klass=Segment.__name__, attr__name=granularity, owner_id__in=ids, user__username=viewas) \ .order_by(Lower('value'), 'owner_id').values_list('value', 'owner_id') class_to_exemplars = [] current_class = '' current_exemplars_list = None current_exemplars_count = 0 total_exemplars_count = 0 for cls, owner_id in values: if cls: cls = cls.strip() if cls: if cls.lower() != current_class.lower(): class_to_exemplars.append( (current_class, total_exemplars_count, current_exemplars_list)) current_exemplars_count = 0 current_class = cls total_exemplars_count = 0 current_exemplars_list = [owner_id] else: current_exemplars_list.append(owner_id) current_exemplars_count += 1 total_exemplars_count += 1 class_to_exemplars.append( (current_class, total_exemplars_count, current_exemplars_list)) rows = [] ids = [] for cls, count, exemplar_ids in class_to_exemplars: if cls: exemplar_id2tid = [(x, id2tid[x]) for x in exemplar_ids] row = dict(id=cls, count=count, spectrograms=exemplar_id2tid) row['class'] = cls rows.append(row) ids.append(cls) return ids, rows