コード例 #1
0
 def get_regular_sample(self, sample_data, tumor_type):
     legacy_fg = Q(file__file_group=FileGroup.objects.get(
         slug="fero-legacy-data"))
     data_files = FileRepository.filter(queryset=self.files, q=legacy_fg)
     sample_id = sample_data["sample_id"]
     sample = FileRepository.filter(queryset=data_files,
                                    metadata={
                                        "cmoSampleName": sample_id,
                                        "igocomplete": True
                                    },
                                    filter_redact=True)
     if not sample:  # try dmp sample
         if "patient_id" in sample_data:
             patient_id = sample_data["patient_id"]
         if "bait_set" in sample_data:
             bait_set = sample_data["bait_set"]
         dmp_bam_id = sample_id.replace("s_", "").replace("_", "-")
         data = FileRepository.filter(queryset=self.files,
                                      metadata={"external_id": dmp_bam_id})
         sample = list()
         for i in data:
             s = i
             metadata = build_dmp_sample(i, patient_id, bait_set,
                                         tumor_type)["metadata"]
             s.metadata = metadata
             sample.append(i)
     return sample
コード例 #2
0
 def _get_request_id(self):
     files = FileRepository.all()
     request_ids = set()
     for run_id in self.run_ids:
         run = Run.objects.filter(id=run_id)[0]
         sample_name = run.tags['sampleNameTumor']
         sample_files = FileRepository.filter(queryset=files, metadata = {'cmoSampleName': sample_name})
         for f in sample_files:
             metadata = f.metadata
             if 'requestId' in metadata:
                 request_ids.add(metadata['requestId'])
     request_id = "_".join(list(request_ids))
     return request_id
コード例 #3
0
def get_dmp_bam(patient_id, bait_set, tumor_type):
    """
    From a patient id and bait set, get matching dmp bam normal
    """
    file_objs = FileRepository.all()

    dmp_query = build_dmp_query(patient_id, bait_set)

    dmp_bam = FileRepository.filter(queryset=file_objs, q=dmp_query).order_by('file__file_name').first()

    if dmp_bam:
        sample = build_dmp_sample(dmp_bam, patient_id, bait_set, tumor_type)
        built_sample = build_sample([sample], ignore_sample_formatting=True)
        return built_sample
    return None
コード例 #4
0
 def _get_samples_data(self):
     files = FileRepository.all()
     f = FileRepository.filter(queryset=files,
                               metadata={
                                   "cmoSampleName": self.tumor_sample_name,
                                   "igocomplete": True
                               },
                               filter_redact=True)
     sample = None
     if f:
         # retrieve metadata from first record (should only be one)
         meta = f[0].metadata
         sample_id = meta["sampleId"]
         sample = SampleData(sample_id)
     return sample
コード例 #5
0
def get_pooled_normal_files(run_ids, preservation_types, bait_set):

    pooled_normals = FileRepository.all()

    query = Q(file__file_group=settings.POOLED_NORMAL_FILE_GROUP)
    run_id_query = build_run_id_query(run_ids)
    preservation_query = build_preservation_query(preservation_types)

    q = query & run_id_query & preservation_query

    pooled_normals = FileRepository.filter(queryset=pooled_normals, q=q)

    pooled_normals, descriptor, sample_name = get_descriptor(bait_set, pooled_normals, preservation_types, run_ids)

    return pooled_normals, descriptor, sample_name
コード例 #6
0
ファイル: helper.py プロジェクト: utkarsh-stark/beagle
def generate_sample_data_content(files, pipeline_name, pipeline_github,
                                 pipeline_version):
    result = "SAMPLE_ID\tREQUEST_ID\tPROJECT_ID\tPATIENT_ID\tCOLLAB_ID\tSAMPLE_TYPE\tGENE_PANEL\tONCOTREE_CODE\tSAMPLE_CLASS\tSPECIMEN_PRESERVATION_TYPE\tSEX\tTISSUE_SITE\tIGO_ID\tPIPELINE\tPIPELINE_GITHUB_LINK\tPIPELINE_VERSION\n"
    ret_str = 'metadata__sampleId'
    query = Q(file__file_group_id=settings.IMPORT_FILE_GROUP)
    query |= Q(file__file_group__slug="origin-unknown")
    query = query & Q(file__path__in=files)
    samples = FileRepository.filter(
        q=query).order_by(ret_str).distinct(ret_str).all()
    for sample in samples:
        metadata = sample.metadata
        result += '{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(
            metadata.get(
                'cmoSampleName',
                format_sample_name(metadata['sampleName'],
                                   metadata['specimenType'])),
            metadata['requestId'],
            get_project_id(metadata['requestId']),
            metadata['patientId'],
            metadata['investigatorSampleId'],
            MetadataValidator.clean_value(metadata['sampleClass']),
            MetadataValidator.clean_value(metadata['recipe']),
            MetadataValidator.clean_value(metadata['oncoTreeCode']),
            MetadataValidator.clean_value(metadata['specimenType']),
            MetadataValidator.clean_value(metadata['preservation']),
            MetadataValidator.clean_value(metadata['sex']),
            MetadataValidator.clean_value(metadata['tissueLocation']),
            metadata['sampleId'],
            pipeline_name,
            pipeline_github,
            pipeline_version,
        )
    return result
コード例 #7
0
def get_samples_from_patient_id(patient_id):
    """
    Retrieves samples from the database based on the patient_id
    """
    files = FileRepository.filter(metadata={"patientId": patient_id},
                                  filter_redact=True)
    data = list()
    for current_file in files:
        sample = dict()
        sample['id'] = current_file.file.id
        sample['path'] = current_file.file.path
        sample['file_name'] = current_file.file.file_name
        sample['metadata'] = current_file.metadata
        data.append(sample)

    samples = list()
    # group by igoId
    igo_id_group = dict()
    for sample in data:
        igo_id = sample['metadata']['sampleId']
        if igo_id not in igo_id_group:
            igo_id_group[igo_id] = list()
        igo_id_group[igo_id].append(sample)

    for igo_id in igo_id_group:
        samples.append(build_sample(igo_id_group[igo_id]))
    samples, bad_samples = remove_with_caveats(samples)
    number_of_bad_samples = len(bad_samples)
    if number_of_bad_samples > 0:
        LOGGER.warning(
            'Some samples for patient query %s have invalid %i values',
            patient_id, number_of_bad_samples)
    return samples
コード例 #8
0
    def __init__(
        self,
        model,
        job_group_id=None,
        job_group_notifier_id=None,
        request_id=None,
        run_ids=[],
        pipeline=None,
        pairing=None,
        output_directory_prefix=None,
    ):
        if not isinstance(model, OperatorModel):
            raise Exception(
                "Must pass an instance of beagle_etl.models.Operator")

        self.model = model
        self.request_id = request_id
        self.job_group_id = job_group_id
        self.job_group_notifier_id = job_group_notifier_id
        self.run_ids = run_ids
        self.files = FileRepository.all()
        self.pairing = pairing
        # {"pairs": [{"tumor": "tumorSampleName", "normal": "normalSampleName"}]}
        self.output_directory_prefix = output_directory_prefix
        self._jobs = []
        self._pipeline = pipeline
コード例 #9
0
def populate_missing_info_in_notifier(apps, _):
    JobGroupNotifier = apps.get_model("notifier", "JobGroupNotifier")
    job_group_notifiers = JobGroupNotifier.objects.all()
    for jgn in job_group_notifiers:
        if jgn.jira_id and jgn.startswith("VADEV-"):
            project = jgn.jira_id.split("-")[0]
            jira_client = JiraClient(url=settings.JIRA_URL,
                                     username=settings.JIRA_USERNAME,
                                     password=settings.JIRA_PASSWORD,
                                     project=project)
            print("Populating status for ticket %s" % jgn.jira_id)
            jira_ticket = jira_client.get_ticket(jgn.jira_id).json()
            request_id = jira_ticket.get("fields", {}).get("summary")
            if request_id:
                file_obj = FileRepository.filter(metadata={
                    "requestId": request_id
                }).first()
                if file_obj:
                    jgn.request_id = request_id
                    jgn.PI = file_obj.metadata["labHeadName"]
                    jgn.investigator = file_obj.metadata["investigatorName"]
                    jgn.assay = file_obj.metadata["recipe"]
                    jgn.save(update_fields=("request_id", "PI", "investigator",
                                            "assay"))
                else:
                    print("Metadata can't be found")
コード例 #10
0
    def get_jobs(self):
        files = FileRepository.filter(queryset=self.files,
                                      metadata={
                                          "requestId": self.request_id,
                                          "igocomplete": True
                                      })
        data = [{
            "id": f.file.id,
            "path": f.file.path,
            "file_name": f.file.file_name,
            "metadata": f.metadata
        } for f in files]

        sample_inputs = construct_sample_inputs(data)

        number_of_inputs = len(sample_inputs)

        return [(RunCreator(
            **{
                "name":
                "ACCESS M1: %s, %i of %i" %
                (self.request_id, i + 1, number_of_inputs),
                "app":
                self.get_pipeline_id(),
                "inputs":
                job,
                "tags": {
                    "requestId": self.request_id
                },
            }), ) for i, job in enumerate(sample_inputs)]
コード例 #11
0
    def get_jobs(self):
        files = FileRepository.filter(queryset=self.files,
                                      metadata={
                                          'requestId': self.request_id,
                                          'igocomplete': True
                                      })
        data = [{
            "id": f.file.id,
            "path": f.file.path,
            "file_name": f.file.file_name,
            "metadata": f.metadata
        } for f in files]

        inputs = construct_inputs(data, self.request_id)

        number_of_inputs = len(inputs)

        return [(APIRunCreateSerializer(
            data={
                'name':
                "LEGACY FASTQ Merge: %s, %i of %i" %
                (self.request_id, i + 1, number_of_inputs),
                'app':
                self.get_pipeline_id(),
                'output_metadata': {
                    key: metadata[key]
                    for key in METADATA_OUTPUT_FIELDS if key in metadata
                },
                'inputs':
                job,
                'tags': {
                    'requestId': self.request_id,
                    'sampleId': metadata["sampleId"]
                }
            }), job) for i, (job, metadata) in enumerate(inputs)]
コード例 #12
0
    def get_jobs(self):
        files = FileRepository.filter(queryset=self.files,
                                      metadata={'requestId': self.request_id,
                                                'igocomplete': True})
        data = [
            {
                "id": f.file.id,
                "path": f.file.path,
                "file_name": f.file.file_name,
                "metadata": f.metadata
            } for f in files
        ]

        sample_inputs = construct_sample_inputs(data)

        number_of_inputs = len(sample_inputs)

        return [
            (
                APIRunCreateSerializer(
                    data={
                        'name': "ACCESS M1: %s, %i of %i" % (self.request_id, i + 1, number_of_inputs),
                        'app': self.get_pipeline_id(),
                        'inputs': job,
                        'tags': {'requestId': self.request_id}}
                ),
                job
             )

            for i, job in enumerate(sample_inputs)
        ]
コード例 #13
0
 def __init__(self, dmp_file):
     self.files = FileRepository.all()
     self.dmp_file = dmp_file
     self.bam_path = dmp_file.file.path
     self.metadata = dmp_file.metadata
     self.mutations_extended = self._set_data_muts_txt()
     self.dmp_sample_name = self._set_dmp_sample_name()
コード例 #14
0
def get_samples_from_patient_id(patient_id):
    files = FileRepository.filter(metadata={"patientId": patient_id})

    data = list()
    for f in files:
        sample = dict()
        sample["id"] = f.file.id
        sample["path"] = f.file.path
        sample["file_name"] = f.file.file_name
        sample["metadata"] = f.metadata
        data.append(sample)

    samples = list()
    # group by igoId
    igo_id_group = dict()
    for sample in data:
        igo_id = sample["metadata"]["sampleId"]
        if igo_id not in igo_id_group:
            igo_id_group[igo_id] = list()
        igo_id_group[igo_id].append(sample)

    for igo_id in igo_id_group:
        samples.append(build_sample(igo_id_group[igo_id]))
    samples, bad_samples = remove_with_caveats(samples)
    if len(bad_samples) > 0:
        logger.warning(
            "Some samples for patient query %s have invalid %i values" %
            (patient_id, len(bad_samples)))
    return samples
コード例 #15
0
 def __init__(self, sample_id):
     self.files = FileRepository.all()
     self.sample_id = sample_id
     self.patient_id, self.cmo_sample_name = self._get_sample_metadata()
     self.dmp_patient_id = self._get_dmp_patient_id()
     self.dmp_bams_tumor = self._find_dmp_bams("T")
     self.dmp_bams_normal = self._find_dmp_bams("N")
コード例 #16
0
def get_dmp_normal(patient_id, bait_set):
    """
    From a patient id and bait set, get matching dmp bam normal
    """
    file_objs = FileRepository.all()

    dmp_query = build_dmp_query(patient_id, bait_set)

    dmp_bam = FileRepository.filter(
        queryset=file_objs, q=dmp_query).order_by("file__file_name").first()

    if dmp_bam:
        dmp_metadata = dmp_bam.metadata
        specimen_type = "DMP Normal"
        sample_name = dmp_metadata["external_id"]
        sequencingCenter = "MSKCC"
        platform = "Illumina"
        sample = dict()
        sample["id"] = dmp_bam.file.id
        sample["path"] = dmp_bam.file.path
        sample["file_name"] = dmp_bam.file.file_name
        sample["file_type"] = dmp_bam.file.file_type
        metadata = init_metadata()
        metadata["sampleId"] = sample_name
        metadata["sampleName"] = format_sample_name(sample_name, specimen_type)
        metadata["requestId"] = sample_name
        metadata["sequencingCenter"] = sequencingCenter
        metadata["platform"] = platform
        metadata["baitSet"] = bait_set
        metadata["recipe"] = bait_set
        metadata["run_id"] = ""
        metadata["preservation"] = ""
        metadata["libraryId"] = sample_name + "_1"
        metadata["R"] = "Not applicable"
        # because rgid depends on flowCellId and barcodeIndex, we will
        # spoof barcodeIndex so that pairing can work properly; see
        # build_sample in runner.operator.argos_operator.bin
        metadata["barcodeIndex"] = "DMP_BARCODEIDX"
        metadata["flowCellId"] = "DMP_FCID"
        metadata["tumorOrNormal"] = "Normal"
        metadata["patientId"] = patient_id
        metadata["specimenType"] = specimen_type
        sample["metadata"] = metadata
        built_sample = build_sample([sample], ignore_sample_formatting=True)
        return built_sample
    return None
コード例 #17
0
def get_oncotree_codes(request_id):
    oncotree_dh = OncotreeDataHandler()
    files = FileRepository.all()
    oncotree_codes_tmp = set(
        FileRepository.filter(queryset=files, metadata={"requestId": request_id}).values_list(
            "metadata__oncoTreeCode", flat=True
        )
    )
    oncotree_codes = list()
    for val in oncotree_codes_tmp:
        if val:
            oncotree_codes.append(val)
    if not oncotree_codes:  # hack; if there are no oncotree codes, just say it's mixed
        return "mixed"
    shared_nodes = oncotree_dh.find_shared_nodes_by_code_list(oncotree_codes)
    common_anc = oncotree_dh.get_highest_level_shared_node(shared_nodes)
    if common_anc.code.lower() == "tissue":
        common_anc.code = "mixed"
    return common_anc.code.lower()
コード例 #18
0
ファイル: port_view.py プロジェクト: utkarsh-stark/beagle
 def update(self, request, *args, **kwargs):
     try:
         port = Port.objects.get(id=kwargs.get('pk'))
     except Port.DoesNotExist:
         return Response({'details': 'Not Found'},
                         status=status.HTTP_404_NOT_FOUND)
     value = request.data
     if isinstance(port.schema.get('type'), dict):
         if port.schema.get('type').get('type') == 'array':
             if port.schema.get('type').get('items') != 'File':
                 port.value = {"inputs": value.get('values')}
             else:
                 input_ids = []
                 files = []
                 for val in value.get('values'):
                     try:
                         file = FileRepository.get(id=val)
                     except FileNotFoundException:
                         return Response({'details': 'Not Found'},
                                         status=status.HTTP_404_NOT_FOUND)
                     input_ids.append(val)
                     file_val = self._create_file(
                         file, port.schema.get('secondaryFiles'))
                     files.append(file_val)
                 port.value = {"refs": input_ids, "inputs": files}
     else:
         if port.schema.get('type') != 'File':
             port.value = {"inputs": value.get('values')}
         else:
             try:
                 file = FileRepository.get(pk=value.get('values')[0])
             except FileNotFoundException:
                 return Response({'details': 'Not Found'},
                                 status=status.HTTP_404_NOT_FOUND)
             port.value = {
                 "inputs":
                 self._create_file(file, port.schema.get('secondaryFiles')),
                 "refs":
                 str(file.id)
             }
     port.save()
     response = PortSerializer(port)
     return Response(response.data, status=status.HTTP_200_OK)
コード例 #19
0
def get_request_pi(run_id_list):
    request_pis = set()
    files = FileRepository.all()
    all_request_ids = set()
    # reducing number of queries
    for run_id in run_id_list:
        argos_run = Run.objects.get(id=run_id)
        run_request_id = argos_run.tags["requestId"]
        all_request_ids.add(run_request_id)
    for request_id in all_request_ids:
        investigator_emails = FileRepository.filter(queryset=files, metadata={"requestId": request_id}).values_list(
            "metadata__investigatorEmail", flat=True
        )
        request_pis = request_pis.union(set(investigator_emails))
    request_pis_final = list()
    for request_pi in request_pis:
        if request_pi:
            request_pis_final.append(format_msk_id(request_pi))
    return ",".join(request_pis_final)
コード例 #20
0
ファイル: port_view.py プロジェクト: mskcc/beagle
 def update(self, request, *args, **kwargs):
     try:
         port = Port.objects.get(id=kwargs.get("pk"))
     except Port.DoesNotExist:
         return Response({"details": "Not Found"},
                         status=status.HTTP_404_NOT_FOUND)
     value = request.data
     if isinstance(port.schema.get("type"), dict):
         if port.schema.get("type").get("type") == "array":
             if port.schema.get("type").get("items") != "File":
                 port.value = {"inputs": value.get("values")}
             else:
                 input_ids = []
                 files = []
                 for val in value.get("values"):
                     try:
                         file = FileRepository.get(id=val)
                     except FileNotFoundException:
                         return Response({"details": "Not Found"},
                                         status=status.HTTP_404_NOT_FOUND)
                     input_ids.append(val)
                     file_val = self._create_file(
                         file, port.schema.get("secondaryFiles"))
                     files.append(file_val)
                 port.value = {"refs": input_ids, "inputs": files}
     else:
         if port.schema.get("type") != "File":
             port.value = {"inputs": value.get("values")}
         else:
             try:
                 file = FileRepository.get(pk=value.get("values")[0])
             except FileNotFoundException:
                 return Response({"details": "Not Found"},
                                 status=status.HTTP_404_NOT_FOUND)
             port.value = {
                 "inputs":
                 self._create_file(file, port.schema.get("secondaryFiles")),
                 "refs":
                 str(file.id),
             }
     port.save()
     response = PortSerializer(port)
     return Response(response.data, status=status.HTTP_200_OK)
コード例 #21
0
def create_or_update_file(path, request_id, file_group_id, file_type, igocomplete, data, library, run, sample,
                          request_metadata, r, update=False, job_group_notifier=None):
    logger.info("Creating file %s " % path)
    try:
        file_group_obj = FileGroup.objects.get(id=file_group_id)
        file_type_obj = FileType.objects.filter(name=file_type).first()
        lims_metadata = copy.deepcopy(data)
        library_copy = copy.deepcopy(library)
        lims_metadata['requestId'] = request_id
        lims_metadata['igocomplete'] = igocomplete
        lims_metadata['R'] = r
        for k, v in library_copy.items():
            lims_metadata[k] = v
        for k, v in run.items():
            lims_metadata[k] = v
        for k, v in request_metadata.items():
            lims_metadata[k] = v
        metadata = format_metadata(lims_metadata)
        # validator = MetadataValidator(METADATA_SCHEMA)
    except Exception as e:
        logger.error("Failed to parse metadata for file %s path" % path)
        raise FailedToFetchSampleException("Failed to create file %s. Error %s" % (path, str(e)))
    try:
        logger.info(lims_metadata)
        # validator.validate(metadata)
    except MetadataValidationException as e:
        logger.error("Failed to create file %s. Error %s" % (path, str(e)))
        raise FailedToFetchSampleException("Failed to create file %s. Error %s" % (path, str(e)))
    else:
        f = FileRepository.filter(path=path).first()
        if not f:
            create_file_object(path, file_group_obj, lims_metadata, metadata, file_type_obj, sample)

            if update:
                message = "File registered: %s" % path
                update = RedeliveryUpdateEvent(job_group_notifier, message).to_dict()
                send_notification.delay(update)
        else:
            if update:
                before = f.file.filemetadata_set.order_by('-created_date').count()
                update_file_object(f.file, path, metadata)
                after = f.file.filemetadata_set.order_by('-created_date').count()
                if after != before:
                    all_metadata = f.file.filemetadata_set.order_by('-created_date')
                    ddiff = DeepDiff(all_metadata[1].metadata,
                                     all_metadata[0].metadata,
                                     ignore_order=True)
                    diff_file_name = "%s_metadata_update.json" % f.file.file_name
                    message = "Updating file metadata: %s, details in file %s\n" % (path, diff_file_name)
                    update = RedeliveryUpdateEvent(job_group_notifier, message).to_dict()
                    diff_details_event = LocalStoreFileEvent(job_group_notifier, diff_file_name, str(ddiff)).to_dict()
                    send_notification.delay(update)
                    send_notification.delay(diff_details_event)
            else:
                raise FailedToFetchSampleException("File %s already exist with id %s" % (path, str(f.id)))
コード例 #22
0
def get_descriptor(bait_set, pooled_normals, preservation_types, run_ids):
    """
    Need descriptor to match pooled normal "recipe", which might need to be re-labeled as bait_set

    Adding correction for IMPACT505 pooled normals
    """
    query = Q(file__file_group=settings.POOLED_NORMAL_FILE_GROUP)
    sample_name = None

    descriptor = None
    for pooled_normal in pooled_normals:
        bset_data = pooled_normal.metadata['recipe']
        if bset_data.lower() in bait_set.lower():
            descriptor = bset_data

    if descriptor:  # From returned pooled normals, we found the bait set/recipe we're looking for
        pooled_normals = FileRepository.filter(queryset=pooled_normals,
                                               metadata={'recipe': descriptor})

        # sample_name is FROZENPOOLEDNORMAL unless FFPE is in any of the preservation types
        # in preservation_types
        preservations_lower_case = set([x.lower() for x in preservation_types])
        run_ids_suffix_list = [i for i in run_ids
                               if i]  # remove empty or false string values
        run_ids_suffix = "_".join(set(run_ids_suffix_list))
        sample_name = "FROZENPOOLEDNORMAL_" + run_ids_suffix
        if "ffpe" in preservations_lower_case:
            sample_name = "FFPEPOOLEDNORMAL_" + run_ids_suffix
    elif "impact505" in bait_set.lower():
        # We didn't find a pooled normal for IMPACT505; return "static" FROZEN or FFPE pool normal
        descriptor = "IMPACT505"
        preservations_lower_case = set([x.lower() for x in preservation_types])
        sample_name = "FROZENPOOLEDNORMAL_IMPACT505_V1"
        if "ffpe" in preservations_lower_case:
            sample_name = "FFPEPOOLEDNORMAL_IMPACT505_V1"
        q = query & Q(metadata__sampleName=sample_name)
        pooled_normals = FileRepository.filter(queryset=pooled_normals, q=q)
        if not pooled_normals:
            LOGGER.error("Could not find IMPACT505 pooled normal to pair %s",
                         sample_name)

    return pooled_normals, descriptor, sample_name
コード例 #23
0
ファイル: sample_view.py プロジェクト: mskcc/beagle
 def list(self, request, *args, **kwargs):
     request_id = request.query_params.get("project_id")
     if not request_id:
         return Response(status=status.HTTP_404_NOT_FOUND)
     sample_ids = list(
         FileRepository.filter(metadata={
             "requestId": request_id
         },
                               values_metadata="sampleId").all())
     samples = Sample.objects.filter(sample_id__in=sample_ids)
     response = FullSampleSerializer(samples, many=True)
     return Response(response.data, status=status.HTTP_200_OK)
コード例 #24
0
    def get_files_for_pairs(self):
        all_files = []
        cnt_tumors = 0
        for pair in self.pairing.get('pairs'):
            tumors = FileRepository.filter(queryset=self.files,
                                           metadata={
                                               'cmoSampleName': pair['tumor'],
                                               'igocomplete': True
                                           },
                                           filter_redact=True)
            cnt_tumors += len(tumors)
            normals = FileRepository.filter(queryset=self.files,
                                            metadata={
                                                'cmoSampleName':
                                                pair['normal'],
                                                'igocomplete': True
                                            },
                                            filter_redact=True)
            if not normals and cnt_tumors > 0:  # get from DMP bams
                patient_id = tumors[0].metadata['patientId']
                bait_set = tumors[0].metadata['baitSet']
                dmp_bam_id = pair['normal']
                dmp_bam_id = dmp_bam_id.replace('s_', '').replace('_', '-')
                data = FileRepository.filter(
                    queryset=self.files, metadata={'external_id': dmp_bam_id})
                normals = list()
                for i in data:
                    sample = i
                    metadata = build_dmp_sample(i, patient_id,
                                                bait_set)['metadata']
                    sample.metadata = metadata
                    normals.append(sample)
            for file in list(tumors):
                if file not in all_files:
                    all_files.append(file)
            for file in list(normals):
                if file not in all_files:
                    all_files.append(file)

        return all_files, cnt_tumors
コード例 #25
0
 def _get_muts(self, data_id):
     # There should only be one mutations file returned here, one per dmp sample
     query_results = FileRepository.filter(
         queryset=self.files, metadata={'dmp_link_id': data_id})
     results = list()
     if query_results:
         for item in query_results:
             results.append(item.file.path)
     if len(results) > 1:
         LOGGER.error("More than one mutations file found for %s", data_id)
     if results:
         return results[0]
     return results
コード例 #26
0
def get_samples_from_patient_id(patient_id):
    """
    Retrieves samples from the database based on the patient_id

    Only retrieve patients from LIMS file group
    """
    all_files = FileRepository.all()
    q_pid = Q(metadata__patientId=patient_id)
    q_fg = build_argos_file_groups_query()
    q = q_pid & q_fg
    files = FileRepository.filter(queryset=all_files, q=q, filter_redact=True)
    data = list()
    for current_file in files:
        sample = dict()
        sample["id"] = current_file.file.id
        sample["path"] = current_file.file.path
        sample["file_name"] = current_file.file.file_name
        sample["metadata"] = current_file.metadata
        data.append(sample)

    samples = list()
    # group by igoId
    igo_id_group = dict()
    for sample in data:
        igo_id = sample["metadata"]["sampleId"]
        if igo_id not in igo_id_group:
            igo_id_group[igo_id] = list()
        igo_id_group[igo_id].append(sample)

    for igo_id in igo_id_group:
        samples.append(build_sample(igo_id_group[igo_id]))
    samples, bad_samples = remove_with_caveats(samples)
    number_of_bad_samples = len(bad_samples)
    if number_of_bad_samples > 0:
        LOGGER.warning(
            "Some samples for patient query %s have invalid %i values",
            patient_id, number_of_bad_samples)
    return samples
コード例 #27
0
 def test_construct_inputs_obj_no_dmp_bams(self):
     """
     Test the creation of the inputs obj with no dmp bams
     """
     file_group_id = FileGroup.objects.get(name="DMP BAMs").pk
     files = FileRepository.filter(file_group=file_group_id)
     for single_file in files:
         single_file.delete()
     single_run = Run.objects.get(id=self.run_ids[0])
     input_obj = InputsObj(single_run)
     input_json = input_obj._build_inputs_json()
     self.assertEqual(input_json["unindexed_bam_files"], [])
     self.assertEqual(input_json["unindexed_sample_ids"], [])
     self.assertEqual(input_json["unindexed_maf_files"], [])
コード例 #28
0
 def test_construct_inputs_obj_no_dmp_muts(self):
     """
     Test the creation of the inputs obj with no dmp muts
     """
     file_group_id = FileGroup.objects.get(
         name="DMP Data Mutations Extended").pk
     files = FileRepository.filter(file_group=file_group_id)
     for single_file in files:
         single_file.delete()
     single_run = Run.objects.get(id=self.run_ids[0])
     input_obj = InputsObj(single_run)
     expected_input_json = self.first_run_expected_inputs
     input_json = input_obj._build_inputs_json()
     self.assertEqual(input_json["unindexed_maf_files"], [])
コード例 #29
0
 def _find_dmp_bams(self, tumor_type):
     # Retrieves dmp samples based on dmp bams
     file_list = list()
     if self.dmp_patient_id:
         files = FileRepository.filter(queryset=self.files,
                                       metadata={
                                           'patient__cmo':
                                           self.dmp_patient_id,
                                           "type": tumor_type
                                       })
         if files:
             for f in files:
                 file_list.append(BamData(f))
             return file_list
     return None
コード例 #30
0
 def get_regular_sample(self, sample_data, tumor_type):
     sample_id = sample_data['sample_id']
     sample = FileRepository.filter(queryset=self.files,
                                    metadata={
                                        'cmoSampleName': sample_id,
                                        'igocomplete': True
                                    },
                                    filter_redact=True)
     if not sample:  # try dmp sample
         if 'patient_id' in sample_data:
             patient_id = sample_data['patient_id']
         if 'bait_set' in sample_data:
             bait_set = sample_data['bait_set']
         dmp_bam_id = sample_id.replace('s_', '').replace('_', '-')
         data = FileRepository.filter(queryset=self.files,
                                      metadata={'external_id': dmp_bam_id})
         sample = list()
         for i in data:
             s = i
             metadata = build_dmp_sample(i, patient_id, bait_set,
                                         tumor_type)['metadata']
             s.metadata = metadata
             sample.append(i)
     return sample