def clone_cohort(request, cohort_id): if debug: print >> sys.stderr,'Called '+sys._getframe().f_code.co_name redirect_url = 'cohort_details' parent_cohort = Cohort.objects.get(id=cohort_id) new_name = 'Copy of %s' % parent_cohort.name cohort = Cohort.objects.create(name=new_name) cohort.save() # If there are sample ids samples = Samples.objects.filter(cohort=parent_cohort).values_list('sample_id', 'study_id') sample_list = [] for sample in samples: sample_list.append(Samples(cohort=cohort, sample_id=sample[0], study_id=sample[1])) Samples.objects.bulk_create(sample_list) # TODO Some cohorts won't have them at the moment. That isn't a big deal in this function # If there are patient ids patients = Patients.objects.filter(cohort=parent_cohort).values_list('patient_id', flat=True) patient_list = [] for patient_code in patients: patient_list.append(Patients(cohort=cohort, patient_id=patient_code)) Patients.objects.bulk_create(patient_list) # Clone the filters filters = Filters.objects.filter(resulting_cohort=parent_cohort).values_list('name', 'value') # ...but only if there are any (there may not be) if filters.__len__() > 0: filters_list = [] for filter_pair in filters: filters_list.append(Filters(name=filter_pair[0], value=filter_pair[1], resulting_cohort=cohort)) Filters.objects.bulk_create(filters_list) # Set source source = Source(parent=parent_cohort, cohort=cohort, type=Source.CLONE) source.save() # Set permissions perm = Cohort_Perms(cohort=cohort, user=request.user, perm=Cohort_Perms.OWNER) perm.save() # Store cohort to BigQuery project_id = settings.BQ_PROJECT_ID cohort_settings = settings.GET_BQ_COHORT_SETTINGS() bcs = BigQueryCohortSupport(project_id, cohort_settings.dataset_id, cohort_settings.table_id) bcs.add_cohort_with_sample_barcodes(cohort.id, samples) return redirect(reverse(redirect_url,args=[cohort.id]))
def set_operation(request): if debug: print >> sys.stderr,'Called '+sys._getframe().f_code.co_name redirect_url = '/cohorts/' if request.POST: name = request.POST.get('name').encode('utf8') cohorts = [] base_cohort = None subtract_cohorts = [] notes = '' patients = [] samples = [] op = request.POST.get('operation') if op == 'union': notes = 'Union of ' cohort_ids = request.POST.getlist('selected-ids') cohorts = Cohort.objects.filter(id__in=cohort_ids, active=True, cohort_perms__in=request.user.cohort_perms_set.all()) first = True ids = () for cohort in cohorts: if first: notes += cohort.name first = False else: notes += ', ' + cohort.name ids += (cohort.id,) patients = Patients.objects.filter(cohort_id__in=ids).distinct().values_list('patient_id', flat=True) samples = Samples.objects.filter(cohort_id__in=ids).distinct().values_list('sample_id', 'study_id') elif op == 'intersect': cohort_ids = request.POST.getlist('selected-ids') cohorts = Cohort.objects.filter(id__in=cohort_ids, active=True, cohort_perms__in=request.user.cohort_perms_set.all()) request.user.cohort_perms_set.all() if len(cohorts): cohort_patients = set(Patients.objects.filter(cohort=cohorts[0]).values_list('patient_id')) cohort_samples = set(Samples.objects.filter(cohort=cohorts[0]).values_list('sample_id', 'study_id')) notes = 'Intersection of ' + cohorts[0].name # print "Start of intersection with %s has %d" % (cohorts[0].name, len(cohort_samples)) for i in range(1, len(cohorts)): cohort = cohorts[i] notes += ', ' + cohort.name cohort_patients = cohort_patients.intersection(Patients.objects.filter(cohort=cohort).values_list('patient_id')) cohort_samples = cohort_samples.intersection(Samples.objects.filter(cohort=cohort).values_list('sample_id', 'study_id')) # se1 = set(x[0] for x in s1) # se2 = set(x[0] for x in s2) # TODO: work this out with user data when activated # cohort_samples = cohort_samples.extra( # tables=[Samples._meta.db_table+"` AS `t"+str(1)], # TODO This is ugly :( # where=[ # 't'+str(i)+'.sample_id = ' + Samples._meta.db_table + '.sample_id', # 't'+str(i)+'.study_id = ' + Samples._meta.db_table + '.study_id', # 't'+str(i)+'.cohort_id = ' + Samples._meta.db_table + '.cohort_id', # ] # ) # cohort_patients = cohort_patients.extra( # tables=[Patients._meta.db_table+"` AS `t"+str(1)], # TODO This is ugly :( # where=[ # 't'+str(i)+'.patient_id = ' + Patients._meta.db_table + '.patient_id', # 't'+str(i)+'.cohort_id = ' + Patients._meta.db_table + '.cohort_id', # ] # ) patients = list(cohort_patients) samples = list(cohort_samples) elif op == 'complement': base_id = request.POST.get('base-id') subtract_ids = request.POST.getlist('subtract-ids') base_patients = Patients.objects.filter(cohort_id=base_id) subtract_patients = Patients.objects.filter(cohort_id__in=subtract_ids).distinct() cohort_patients = base_patients.exclude(patient_id__in=subtract_patients.values_list('patient_id', flat=True)) patients = cohort_patients.values_list('patient_id', flat=True) base_samples = Samples.objects.filter(cohort_id=base_id) subtract_samples = Samples.objects.filter(cohort_id__in=subtract_ids).distinct() cohort_samples = base_samples.exclude(sample_id__in=subtract_samples.values_list('sample_id', flat=True)) samples = cohort_samples.values_list('sample_id', 'study_id') notes = 'Subtracted ' base_cohort = Cohort.objects.get(id=base_id) subtracted_cohorts = Cohort.objects.filter(id__in=subtract_ids) first = True for item in subtracted_cohorts: if first: notes += item.name first = False else: notes += ', ' + item.name notes += ' from %s.' % base_cohort.name if len(samples) or len(patients): new_cohort = Cohort.objects.create(name=name) perm = Cohort_Perms(cohort=new_cohort, user=request.user, perm=Cohort_Perms.OWNER) perm.save() # Store cohort to BigQuery project_id = settings.BQ_PROJECT_ID cohort_settings = settings.GET_BQ_COHORT_SETTINGS() bcs = BigQueryCohortSupport(project_id, cohort_settings.dataset_id, cohort_settings.table_id) bcs.add_cohort_with_sample_barcodes(new_cohort.id, samples) # Store cohort to CloudSQL patient_list = [] for patient in patients: patient_list.append(Patients(cohort=new_cohort, patient_id=patient)) Patients.objects.bulk_create(patient_list) sample_list = [] for sample in samples: sample_list.append(Samples(cohort=new_cohort, sample_id=sample[0], study_id=sample[1])) Samples.objects.bulk_create(sample_list) # Create Sources if op == 'union' or op == 'intersect': for cohort in cohorts: source = Source.objects.create(parent=cohort, cohort=new_cohort, type=Source.SET_OPS, notes=notes) source.save() elif op=='complement': source = Source.objects.create(parent=base_cohort, cohort=new_cohort, type=Source.SET_OPS, notes=notes) source.save() for cohort in subtract_cohorts: source = Source.objects.create(parent=cohort, cohort=new_cohort, type=Source.SET_OPS, notes=notes) source.save() else: message = 'Operation resulted in empty set of samples and patients. Cohort not created.' messages.warning(request, message) return redirect('cohort_list') return redirect(redirect_url)
def save_cohort(request, workbook_id=None, worksheet_id=None, create_workbook=False): if debug: print >> sys.stderr,'Called '+sys._getframe().f_code.co_name redirect_url = reverse('cohort_list') samples = [] patients = [] name = '' user_id = request.user.id parent = None if request.POST: name = request.POST.get('name') source = request.POST.get('source') deactivate_sources = request.POST.get('deactivate_sources') filters = request.POST.getlist('filters') projects = request.user.project_set.all() token = SocialToken.objects.filter(account__user=request.user, account__provider='Google')[0].token data_url = METADATA_API + 'v2/metadata_sample_list' payload = { 'token': token } # Given cohort_id is the only source id. if source: # Only ever one source # data_url += '&cohort_id=' + source payload['cohort_id'] = source parent = Cohort.objects.get(id=source) if deactivate_sources: parent.active = False parent.save() if filters: filter_obj = [] for filter in filters: tmp = json.loads(filter) key = tmp['feature']['name'] val = tmp['value']['name'] if 'id' in tmp['feature'] and tmp['feature']['id']: key = tmp['feature']['id'] if 'id' in tmp['value'] and tmp['value']['id']: val = tmp['value']['id'] if key == 'user_projects': proj = projects.get(id=val) studies = proj.study_set.all() for study in studies: filter_obj.append({ 'key': 'user_studies', 'value': str(study.id) }) else : filter_obj.append({ 'key': key, 'value': val }) if len(filter_obj): # data_url += '&filters=' + re.sub(r'\s+', '', urllib.quote( json.dumps(filter_obj) )) payload['filters'] = json.dumps(filter_obj) result = urlfetch.fetch(data_url, method=urlfetch.POST, payload=json.dumps(payload), deadline=60, headers={'Content-Type': 'application/json'}) items = json.loads(result.content) #it is possible the the filters are creating a cohort with no samples if int(items['count']) == 0 : messages.error(request, 'The filters selected returned 0 samples. Please alter your filters and try again') redirect_url = reverse('cohort') else : items = items['items'] for item in items: samples.append(item['sample_barcode']) #patients.append(item['ParticipantBarcode']) # Create new cohort cohort = Cohort.objects.create(name=name) cohort.save() # If there are sample ids sample_list = [] for item in items: study = None if 'study_id' in item: study = item['study_id'] sample_list.append(Samples(cohort=cohort, sample_id=item['sample_barcode'], study_id=study)) Samples.objects.bulk_create(sample_list) # TODO This would be a nice to have if we have a mapped ParticipantBarcode value # TODO Also this gets weird with mixed mapped and unmapped ParticipantBarcode columns in cohorts # If there are patient ids # If we are *not* using user data, get participant barcodes from metadata_data if not USER_DATA_ON: participant_url = METADATA_API + ('v2/metadata_participant_list?cohort_id=%s' % (str(cohort.id),)) participant_result = urlfetch.fetch(participant_url, deadline=120) participant_items = json.loads(participant_result.content) participant_list = [] for item in participant_items['items']: participant_list.append(Patients(cohort=cohort, patient_id=item['sample_barcode'])) Patients.objects.bulk_create(participant_list) # Set permission for user to be owner perm = Cohort_Perms(cohort=cohort, user=request.user, perm=Cohort_Perms.OWNER) perm.save() # Create the source if it was given if source: Source.objects.create(parent=parent, cohort=cohort, type=Source.FILTERS).save() # Create filters applied if filters: for filter in filter_obj: Filters.objects.create(resulting_cohort=cohort, name=filter['key'], value=filter['value']).save() # Store cohort to BigQuery project_id = settings.BQ_PROJECT_ID cohort_settings = settings.GET_BQ_COHORT_SETTINGS() bcs = BigQueryCohortSupport(project_id, cohort_settings.dataset_id, cohort_settings.table_id) bcs.add_cohort_with_sample_barcodes(cohort.id, cohort.samples_set.values_list('sample_id','study_id')) # Check if coming from applying filters and redirect accordingly if 'apply-filters' in request.POST: redirect_url = reverse('cohort_details',args=[cohort.id]) messages.info(request, 'Changes applied successfully.') else: redirect_url = reverse('cohort_list') messages.info(request, 'Cohort, %s, created successfully.' % cohort.name) if workbook_id and worksheet_id : Worksheet.objects.get(id=worksheet_id).add_cohort(cohort) redirect_url = reverse('worksheet_display', kwargs={'workbook_id':workbook_id, 'worksheet_id' : worksheet_id}) elif create_workbook : workbook_model = Workbook.create("default name", "This is a default workbook description", request.user) worksheet_model = Worksheet.create(workbook_model.id, "worksheet 1","This is a default description") worksheet_model.add_cohort(cohort) redirect_url = reverse('worksheet_display', kwargs={'workbook_id':workbook_model.id, 'worksheet_id' : worksheet_model.id}) return redirect(redirect_url) # redirect to search/ with search parameters just saved