Example #1
0
def clone_cohort(request, cohort_id):
    if debug: print >> sys.stderr,'Called '+sys._getframe().f_code.co_name
    redirect_url = 'cohort_details'
    parent_cohort = Cohort.objects.get(id=cohort_id)
    new_name = 'Copy of %s' % parent_cohort.name
    cohort = Cohort.objects.create(name=new_name)
    cohort.save()

    # If there are sample ids
    samples = Samples.objects.filter(cohort=parent_cohort).values_list('sample_id', 'study_id')
    sample_list = []
    for sample in samples:
        sample_list.append(Samples(cohort=cohort, sample_id=sample[0], study_id=sample[1]))
    Samples.objects.bulk_create(sample_list)

    # TODO Some cohorts won't have them at the moment. That isn't a big deal in this function
    # If there are patient ids
    patients = Patients.objects.filter(cohort=parent_cohort).values_list('patient_id', flat=True)
    patient_list = []
    for patient_code in patients:
        patient_list.append(Patients(cohort=cohort, patient_id=patient_code))
    Patients.objects.bulk_create(patient_list)

    # Clone the filters
    filters = Filters.objects.filter(resulting_cohort=parent_cohort).values_list('name', 'value')
    # ...but only if there are any (there may not be)
    if filters.__len__() > 0:
        filters_list = []
        for filter_pair in filters:
            filters_list.append(Filters(name=filter_pair[0], value=filter_pair[1], resulting_cohort=cohort))
        Filters.objects.bulk_create(filters_list)

    # Set source
    source = Source(parent=parent_cohort, cohort=cohort, type=Source.CLONE)
    source.save()

    # Set permissions
    perm = Cohort_Perms(cohort=cohort, user=request.user, perm=Cohort_Perms.OWNER)
    perm.save()

    # Store cohort to BigQuery
    project_id = settings.BQ_PROJECT_ID
    cohort_settings = settings.GET_BQ_COHORT_SETTINGS()
    bcs = BigQueryCohortSupport(project_id, cohort_settings.dataset_id, cohort_settings.table_id)
    bcs.add_cohort_with_sample_barcodes(cohort.id, samples)

    return redirect(reverse(redirect_url,args=[cohort.id]))
Example #2
0
def set_operation(request):
    if debug: print >> sys.stderr,'Called '+sys._getframe().f_code.co_name
    redirect_url = '/cohorts/'

    if request.POST:
        name = request.POST.get('name').encode('utf8')
        cohorts = []
        base_cohort = None
        subtract_cohorts = []
        notes = ''
        patients = []
        samples = []

        op = request.POST.get('operation')
        if op == 'union':
            notes = 'Union of '
            cohort_ids = request.POST.getlist('selected-ids')
            cohorts = Cohort.objects.filter(id__in=cohort_ids, active=True, cohort_perms__in=request.user.cohort_perms_set.all())
            first = True
            ids = ()
            for cohort in cohorts:
                if first:
                    notes += cohort.name
                    first = False
                else:
                    notes += ', ' + cohort.name
                ids += (cohort.id,)
            patients = Patients.objects.filter(cohort_id__in=ids).distinct().values_list('patient_id', flat=True)
            samples = Samples.objects.filter(cohort_id__in=ids).distinct().values_list('sample_id', 'study_id')
        elif op == 'intersect':
            cohort_ids = request.POST.getlist('selected-ids')
            cohorts = Cohort.objects.filter(id__in=cohort_ids, active=True, cohort_perms__in=request.user.cohort_perms_set.all())
            request.user.cohort_perms_set.all()
            if len(cohorts):
                cohort_patients = set(Patients.objects.filter(cohort=cohorts[0]).values_list('patient_id'))
                cohort_samples = set(Samples.objects.filter(cohort=cohorts[0]).values_list('sample_id', 'study_id'))

                notes = 'Intersection of ' + cohorts[0].name

                # print "Start of intersection with %s has %d" % (cohorts[0].name, len(cohort_samples))
                for i in range(1, len(cohorts)):
                    cohort = cohorts[i]
                    notes += ', ' + cohort.name

                    cohort_patients = cohort_patients.intersection(Patients.objects.filter(cohort=cohort).values_list('patient_id'))
                    cohort_samples = cohort_samples.intersection(Samples.objects.filter(cohort=cohort).values_list('sample_id', 'study_id'))

                    # se1 = set(x[0] for x in s1)
                    # se2 = set(x[0] for x in s2)
                    # TODO: work this out with user data when activated
                    # cohort_samples = cohort_samples.extra(
                    #         tables=[Samples._meta.db_table+"` AS `t"+str(1)], # TODO This is ugly :(
                    #         where=[
                    #             't'+str(i)+'.sample_id = ' + Samples._meta.db_table + '.sample_id',
                    #             't'+str(i)+'.study_id = ' + Samples._meta.db_table + '.study_id',
                    #             't'+str(i)+'.cohort_id = ' + Samples._meta.db_table + '.cohort_id',
                    #         ]
                    # )
                    # cohort_patients = cohort_patients.extra(
                    #         tables=[Patients._meta.db_table+"` AS `t"+str(1)], # TODO This is ugly :(
                    #         where=[
                    #             't'+str(i)+'.patient_id = ' + Patients._meta.db_table + '.patient_id',
                    #             't'+str(i)+'.cohort_id = ' + Patients._meta.db_table + '.cohort_id',
                    #         ]
                    # )

                patients = list(cohort_patients)
                samples = list(cohort_samples)

        elif op == 'complement':
            base_id = request.POST.get('base-id')
            subtract_ids = request.POST.getlist('subtract-ids')

            base_patients = Patients.objects.filter(cohort_id=base_id)
            subtract_patients = Patients.objects.filter(cohort_id__in=subtract_ids).distinct()
            cohort_patients = base_patients.exclude(patient_id__in=subtract_patients.values_list('patient_id', flat=True))
            patients = cohort_patients.values_list('patient_id', flat=True)

            base_samples = Samples.objects.filter(cohort_id=base_id)
            subtract_samples = Samples.objects.filter(cohort_id__in=subtract_ids).distinct()
            cohort_samples = base_samples.exclude(sample_id__in=subtract_samples.values_list('sample_id', flat=True))
            samples = cohort_samples.values_list('sample_id', 'study_id')

            notes = 'Subtracted '
            base_cohort = Cohort.objects.get(id=base_id)
            subtracted_cohorts = Cohort.objects.filter(id__in=subtract_ids)
            first = True
            for item in subtracted_cohorts:
                if first:
                    notes += item.name
                    first = False
                else:
                    notes += ', ' + item.name
            notes += ' from %s.' % base_cohort.name

        if len(samples) or len(patients):
            new_cohort = Cohort.objects.create(name=name)
            perm = Cohort_Perms(cohort=new_cohort, user=request.user, perm=Cohort_Perms.OWNER)
            perm.save()

            # Store cohort to BigQuery
            project_id = settings.BQ_PROJECT_ID
            cohort_settings = settings.GET_BQ_COHORT_SETTINGS()
            bcs = BigQueryCohortSupport(project_id, cohort_settings.dataset_id, cohort_settings.table_id)
            bcs.add_cohort_with_sample_barcodes(new_cohort.id, samples)

            # Store cohort to CloudSQL
            patient_list = []
            for patient in patients:
                patient_list.append(Patients(cohort=new_cohort, patient_id=patient))
            Patients.objects.bulk_create(patient_list)

            sample_list = []
            for sample in samples:
                sample_list.append(Samples(cohort=new_cohort, sample_id=sample[0], study_id=sample[1]))
            Samples.objects.bulk_create(sample_list)

            # Create Sources
            if op == 'union' or op == 'intersect':
                for cohort in cohorts:
                    source = Source.objects.create(parent=cohort, cohort=new_cohort, type=Source.SET_OPS, notes=notes)
                    source.save()
            elif op=='complement':
                source = Source.objects.create(parent=base_cohort, cohort=new_cohort, type=Source.SET_OPS, notes=notes)
                source.save()
                for cohort in subtract_cohorts:
                    source = Source.objects.create(parent=cohort, cohort=new_cohort, type=Source.SET_OPS, notes=notes)
                    source.save()

        else:
            message = 'Operation resulted in empty set of samples and patients. Cohort not created.'
            messages.warning(request, message)
            return redirect('cohort_list')

    return redirect(redirect_url)
Example #3
0
def save_cohort(request, workbook_id=None, worksheet_id=None, create_workbook=False):
    if debug: print >> sys.stderr,'Called '+sys._getframe().f_code.co_name

    redirect_url = reverse('cohort_list')

    samples = []
    patients = []
    name = ''
    user_id = request.user.id
    parent = None

    if request.POST:
        name = request.POST.get('name')
        source = request.POST.get('source')
        deactivate_sources = request.POST.get('deactivate_sources')
        filters = request.POST.getlist('filters')
        projects = request.user.project_set.all()

        token = SocialToken.objects.filter(account__user=request.user, account__provider='Google')[0].token
        data_url = METADATA_API + 'v2/metadata_sample_list'
        payload = {
            'token': token
        }
        # Given cohort_id is the only source id.
        if source:
            # Only ever one source
            # data_url += '&cohort_id=' + source
            payload['cohort_id'] = source
            parent = Cohort.objects.get(id=source)
            if deactivate_sources:
                parent.active = False
                parent.save()

        if filters:
            filter_obj = []
            for filter in filters:
                tmp = json.loads(filter)
                key = tmp['feature']['name']
                val = tmp['value']['name']

                if 'id' in tmp['feature'] and tmp['feature']['id']:
                    key = tmp['feature']['id']

                if 'id' in tmp['value'] and tmp['value']['id']:
                    val = tmp['value']['id']

                if key == 'user_projects':
                    proj = projects.get(id=val)
                    studies = proj.study_set.all()
                    for study in studies:
                        filter_obj.append({
                            'key': 'user_studies',
                            'value': str(study.id)
                        })

                else :
                    filter_obj.append({
                        'key': key,
                        'value': val
                    })

            if len(filter_obj):
                # data_url += '&filters=' + re.sub(r'\s+', '', urllib.quote( json.dumps(filter_obj) ))
                payload['filters'] = json.dumps(filter_obj)
        result = urlfetch.fetch(data_url, method=urlfetch.POST, payload=json.dumps(payload), deadline=60, headers={'Content-Type': 'application/json'})
        items = json.loads(result.content)

        #it is possible the the filters are creating a cohort with no samples
        if int(items['count']) == 0 :
            messages.error(request, 'The filters selected returned 0 samples. Please alter your filters and try again')
            redirect_url = reverse('cohort')
        else :
            items = items['items']
            for item in items:
                samples.append(item['sample_barcode'])
                #patients.append(item['ParticipantBarcode'])

            # Create new cohort
            cohort = Cohort.objects.create(name=name)
            cohort.save()

            # If there are sample ids
            sample_list = []
            for item in items:
                study = None
                if 'study_id' in item:
                    study = item['study_id']
                sample_list.append(Samples(cohort=cohort, sample_id=item['sample_barcode'], study_id=study))
            Samples.objects.bulk_create(sample_list)

            # TODO This would be a nice to have if we have a mapped ParticipantBarcode value
            # TODO Also this gets weird with mixed mapped and unmapped ParticipantBarcode columns in cohorts
            # If there are patient ids
            # If we are *not* using user data, get participant barcodes from metadata_data
            if not USER_DATA_ON:
                participant_url = METADATA_API + ('v2/metadata_participant_list?cohort_id=%s' % (str(cohort.id),))
                participant_result = urlfetch.fetch(participant_url, deadline=120)
                participant_items = json.loads(participant_result.content)
                participant_list = []
                for item in participant_items['items']:
                    participant_list.append(Patients(cohort=cohort, patient_id=item['sample_barcode']))
                Patients.objects.bulk_create(participant_list)

            # Set permission for user to be owner
            perm = Cohort_Perms(cohort=cohort, user=request.user, perm=Cohort_Perms.OWNER)
            perm.save()

            # Create the source if it was given
            if source:
                Source.objects.create(parent=parent, cohort=cohort, type=Source.FILTERS).save()

            # Create filters applied
            if filters:
                for filter in filter_obj:
                    Filters.objects.create(resulting_cohort=cohort, name=filter['key'], value=filter['value']).save()

            # Store cohort to BigQuery
            project_id = settings.BQ_PROJECT_ID
            cohort_settings = settings.GET_BQ_COHORT_SETTINGS()
            bcs = BigQueryCohortSupport(project_id, cohort_settings.dataset_id, cohort_settings.table_id)
            bcs.add_cohort_with_sample_barcodes(cohort.id, cohort.samples_set.values_list('sample_id','study_id'))

            # Check if coming from applying filters and redirect accordingly
            if 'apply-filters' in request.POST:
                redirect_url = reverse('cohort_details',args=[cohort.id])
                messages.info(request, 'Changes applied successfully.')
            else:
                redirect_url = reverse('cohort_list')
                messages.info(request, 'Cohort, %s, created successfully.' % cohort.name)

            if workbook_id and worksheet_id :
                Worksheet.objects.get(id=worksheet_id).add_cohort(cohort)
                redirect_url = reverse('worksheet_display', kwargs={'workbook_id':workbook_id, 'worksheet_id' : worksheet_id})
            elif create_workbook :
                workbook_model  = Workbook.create("default name", "This is a default workbook description", request.user)
                worksheet_model = Worksheet.create(workbook_model.id, "worksheet 1","This is a default description")
                worksheet_model.add_cohort(cohort)
                redirect_url = reverse('worksheet_display', kwargs={'workbook_id':workbook_model.id, 'worksheet_id' : worksheet_model.id})

    return redirect(redirect_url) # redirect to search/ with search parameters just saved