Example #1
0
def convert_to_dict(runs):
    run_dict = dict()
    for run in runs:
        if not run_dict.get(run["runId"]):
            run_dict[run["runId"]] = run
        else:
            if run_dict[run["runId"]].get("fastqs"):
                logger.error("Fastq empty")
                if run_dict[run["runId"]]["fastqs"][0] != run["fastqs"][0]:
                    logger.error("File %s do not match with %s" %
                                 (run_dict[run["runId"]]["fastqs"][0],
                                  run["fastqs"][0]))
                    raise FailedToFetchSampleException(
                        "File %s do not match with %s" %
                        (run_dict[run["runId"]]["fastqs"][0],
                         run["fastqs"][0]))
                if run_dict[run["runId"]]["fastqs"][1] != run["fastqs"][1]:
                    logger.error("File %s do not match with %s" %
                                 (run_dict[run["runId"]]["fastqs"][1],
                                  run["fastqs"][1]))
                    raise FailedToFetchSampleException(
                        "File %s do not match with %s" %
                        (run_dict[run["runId"]]["fastqs"][1],
                         run["fastqs"][1]))
    return run_dict
Example #2
0
def fetch_sample_metadata(sample_id, igocomplete, request_id, request_metadata, redelivery=False, job_group_notifier=None):
    logger.info("Fetch sample metadata for sampleId:%s" % sample_id)
    sampleMetadata = LIMSClient.get_sample_manifest(sample_id)
    try:
        data = sampleMetadata[0]
    except Exception as e:
        raise FailedToFetchSampleException(
            "Failed to fetch SampleManifest for sampleId:%s. Invalid response" % sample_id)
    if data['igoId'] != sample_id:
        # logger.info(data)
        logger.info("Failed to fetch SampleManifest for sampleId:%s. LIMS returned %s " % (sample_id, data['igoId']))
        raise FailedToFetchSampleException(
            "Failed to fetch SampleManifest for sampleId:%s. LIMS returned %s " % (sample_id, data['igoId']))

    validate_sample(sample_id, data.get('libraries', []), igocomplete, redelivery)

    libraries = data.pop('libraries')
    for library in libraries:
        logger.info("Processing library %s" % library)
        runs = library.pop('runs')
        run_dict = convert_to_dict(runs)
        logger.info("Processing runs %s" % run_dict)
        for run in run_dict.values():
            logger.info("Processing run %s" % run)
            fastqs = run.pop('fastqs')
            for fastq in fastqs:
                logger.info("Adding file %s" % fastq)
                create_or_update_file(fastq, request_id, settings.IMPORT_FILE_GROUP, 'fastq', igocomplete, data,
                                      library, run,
                                      request_metadata, R1_or_R2(fastq), update=redelivery,
                                      job_group_notifier=job_group_notifier)
Example #3
0
def create_or_update_file(path, request_id, file_group_id, file_type, igocomplete, data, library, run, sample,
                          request_metadata, r, update=False, job_group_notifier=None):
    logger.info("Creating file %s " % path)
    try:
        file_group_obj = FileGroup.objects.get(id=file_group_id)
        file_type_obj = FileType.objects.filter(name=file_type).first()
        lims_metadata = copy.deepcopy(data)
        library_copy = copy.deepcopy(library)
        lims_metadata['requestId'] = request_id
        lims_metadata['igocomplete'] = igocomplete
        lims_metadata['R'] = r
        for k, v in library_copy.items():
            lims_metadata[k] = v
        for k, v in run.items():
            lims_metadata[k] = v
        for k, v in request_metadata.items():
            lims_metadata[k] = v
        metadata = format_metadata(lims_metadata)
        # validator = MetadataValidator(METADATA_SCHEMA)
    except Exception as e:
        logger.error("Failed to parse metadata for file %s path" % path)
        raise FailedToFetchSampleException("Failed to create file %s. Error %s" % (path, str(e)))
    try:
        logger.info(lims_metadata)
        # validator.validate(metadata)
    except MetadataValidationException as e:
        logger.error("Failed to create file %s. Error %s" % (path, str(e)))
        raise FailedToFetchSampleException("Failed to create file %s. Error %s" % (path, str(e)))
    else:
        f = FileRepository.filter(path=path).first()
        if not f:
            create_file_object(path, file_group_obj, lims_metadata, metadata, file_type_obj, sample)

            if update:
                message = "File registered: %s" % path
                update = RedeliveryUpdateEvent(job_group_notifier, message).to_dict()
                send_notification.delay(update)
        else:
            if update:
                before = f.file.filemetadata_set.order_by('-created_date').count()
                update_file_object(f.file, path, metadata)
                after = f.file.filemetadata_set.order_by('-created_date').count()
                if after != before:
                    all_metadata = f.file.filemetadata_set.order_by('-created_date')
                    ddiff = DeepDiff(all_metadata[1].metadata,
                                     all_metadata[0].metadata,
                                     ignore_order=True)
                    diff_file_name = "%s_metadata_update.json" % f.file.file_name
                    message = "Updating file metadata: %s, details in file %s\n" % (path, diff_file_name)
                    update = RedeliveryUpdateEvent(job_group_notifier, message).to_dict()
                    diff_details_event = LocalStoreFileEvent(job_group_notifier, diff_file_name, str(ddiff)).to_dict()
                    send_notification.delay(update)
                    send_notification.delay(diff_details_event)
            else:
                raise FailedToFetchSampleException("File %s already exist with id %s" % (path, str(f.id)))
Example #4
0
def create_file_object(path, file_group, lims_metadata, metadata, file_type):
    try:
        f = File.objects.create(file_name=os.path.basename(path),
                                path=path,
                                file_group=file_group,
                                file_type=file_type)
        f.save()

        fm = FileMetadata(file=f, metadata=metadata)
        fm.save()
        Job.objects.create(
            run=TYPES["CALCULATE_CHECKSUM"],
            args={
                "file_id": str(f.id),
                "path": path
            },
            status=JobStatus.CREATED,
            max_retry=3,
            children=[],
        )
        import_metadata = ImportMetadata.objects.create(file=f,
                                                        metadata=lims_metadata)
    except Exception as e:
        logger.error("Failed to create file %s. Error %s" % (path, str(e)))
        raise FailedToFetchSampleException(
            "Failed to create file %s. Error %s" % (path, str(e)))
Example #5
0
def fetch_samples(request_id, import_pooled_normals=True, import_samples=True, job_group=None, job_group_notifier=None,
                  redelivery=False):
    logger.info("Fetching sampleIds for requestId:%s" % request_id)
    jg = None
    jgn = None
    try:
        jg = JobGroup.objects.get(id=job_group)
        logger.debug("JobGroup found")
    except JobGroup.DoesNotExist:
        logger.debug("No JobGroup Found")
    try:
        jgn = JobGroupNotifier.objects.get(id=job_group_notifier)
        logger.debug("JobGroup found")
    except JobGroupNotifier.DoesNotExist:
        logger.debug("No JobGroup Found")
    children = set()
    sample_ids = LIMSClient.get_request_samples(request_id)
    if sample_ids['requestId'] != request_id:
        raise ErrorInconsistentDataException(
            "LIMS returned wrong response for request %s. Got %s instead" % (request_id, sample_ids['requestId']))
    request_metadata = {
        "dataAnalystEmail": sample_ids['dataAnalystEmail'],
        "dataAnalystName": sample_ids['dataAnalystName'],
        "investigatorEmail": sample_ids['investigatorEmail'],
        "investigatorName": sample_ids['investigatorName'],
        "labHeadEmail": sample_ids['labHeadEmail'],
        "labHeadName": sample_ids['labHeadName'],
        "otherContactEmails": sample_ids['otherContactEmails'],
        "dataAccessEmails": sample_ids['dataAccessEmails'],
        "qcAccessEmails": sample_ids['qcAccessEmails'],
        "projectManagerName": sample_ids['projectManagerName'],
        "recipe": sample_ids['recipe'],
        "piEmail": sample_ids["piEmail"],
    }
    set_recipe_event = ETLSetRecipeEvent(job_group_notifier, request_metadata['recipe']).to_dict()
    send_notification.delay(set_recipe_event)
    pooled_normals = sample_ids.get("pooledNormals", [])
    if import_pooled_normals and pooled_normals:
        for f in pooled_normals:
            job = get_or_create_pooled_normal_job(f, jg)
            children.add(str(job.id))
    if import_samples:
        if not sample_ids.get('samples', False):
            raise FailedToFetchSampleException("No samples reported for requestId: %s" % request_id)

        for sample in sample_ids.get('samples', []):
            job = create_sample_job(sample['igoSampleId'],
                                    sample['igocomplete'],
                                    request_id,
                                    request_metadata,
                                    redelivery,
                                    jg,
                                    jgn)
            children.add(str(job.id))
    return list(children)
Example #6
0
 def get_deliveries(timestamp):
     requestIds = requests.get(
         '%s/LimsRest/api/getDeliveries' % settings.LIMS_URL,
         params={"timestamp": timestamp},
         auth=(settings.LIMS_USERNAME, settings.LIMS_PASSWORD),
         verify=False)
     if requestIds.status_code != 200:
         raise FailedToFetchSampleException(
             "Failed to fetch new requests, status_code: %s" %
             requestIds.status_code)
     return requestIds.json()
Example #7
0
 def get_sample_manifest(sample_id):
     sample_metadata = requests.get(
         '%s/LimsRest/api/getSampleManifest' % settings.LIMS_URL,
         params={"igoSampleId": sample_id},
         auth=(settings.LIMS_USERNAME, settings.LIMS_PASSWORD),
         verify=False)
     if sample_metadata.status_code != 200:
         raise FailedToFetchSampleException(
             "Failed to fetch SampleManifest for sampleId:%s, status_code: %s"
             % (sample_id, sample_metadata.status_code))
     return sample_metadata.json()
Example #8
0
 def get_request_samples(request_id):
     sample_ids = requests.get(
         '%s/LimsRest/api/getRequestSamples' % settings.LIMS_URL,
         params={"request": request_id},
         auth=(settings.LIMS_USERNAME, settings.LIMS_PASSWORD),
         verify=False)
     if sample_ids.status_code != 200:
         raise FailedToFetchSampleException(
             "Failed to fetch sampleIds for request %s, status_code: %s" %
             (request_id, sample_ids.status_code))
     return sample_ids.json()
Example #9
0
def update_file_object(file_object, path, metadata):
    data = {
        "path": path,
        "metadata": metadata,
    }
    try:
        user = User.objects.get(username=settings.ETL_USER)
        data['user'] = user.id
    except User.DoesNotExist:
        user = None
    serializer = UpdateFileSerializer(file_object, data=data)
    if serializer.is_valid():
        serializer.save()
    else:
        logger.error("Failed to update file %s: Error %s" % (path, serializer.errors))
        raise FailedToFetchSampleException(
            "Failed to update metadata for fastq files for %s : %s" % (path, serializer.errors))
Example #10
0
def create_or_update_file(
    path,
    request_id,
    file_group_id,
    file_type,
    igocomplete,
    data,
    library,
    run,
    request_metadata,
    r,
    update=False,
    job_group_notifier=None,
):
    logger.info("Creating file %s " % path)
    try:
        file_group_obj = FileGroup.objects.get(id=file_group_id)
        file_type_obj = FileType.objects.filter(name=file_type).first()
        lims_metadata = copy.deepcopy(data)
        library_copy = copy.deepcopy(library)
        lims_metadata["requestId"] = request_id
        lims_metadata["igocomplete"] = igocomplete
        lims_metadata["R"] = r
        for k, v in library_copy.items():
            lims_metadata[k] = v
        for k, v in run.items():
            lims_metadata[k] = v
        for k, v in request_metadata.items():
            lims_metadata[k] = v
        metadata = format_metadata(lims_metadata)
        # validator = MetadataValidator(METADATA_SCHEMA)
    except Exception as e:
        logger.error("Failed to parse metadata for file %s path" % path)
        raise FailedToFetchSampleException(
            "Failed to create file %s. Error %s" % (path, str(e)))
    try:
        logger.info(lims_metadata)
        # validator.validate(metadata)
    except MetadataValidationException as e:
        logger.error("Failed to create file %s. Error %s" % (path, str(e)))
        raise FailedToFetchSampleException(
            "Failed to create file %s. Error %s" % (path, str(e)))
    else:
        recipe = metadata.get("recipe", "")
        new_path = CopyService.remap(recipe, path)  # Get copied file path
        f = FileRepository.filter(path=new_path).first()
        if not f:
            try:
                if path != new_path:
                    CopyService.copy(path, new_path)
            except Exception as e:
                if "Permission denied" in str(e):
                    raise FailedToCopyFilePermissionDeniedException(
                        "Failed to copy file %s. Error %s" % (path, str(e)))
                else:
                    raise FailedToCopyFileException(
                        "Failed to copy file %s. Error %s" % (path, str(e)))
            create_file_object(new_path, file_group_obj, lims_metadata,
                               metadata, file_type_obj)
            if update:
                message = "File registered: %s" % path
                update = RedeliveryUpdateEvent(job_group_notifier,
                                               message).to_dict()
                send_notification.delay(update)
        else:
            if update:
                before = f.file.filemetadata_set.order_by(
                    "-created_date").count()
                update_file_object(f.file, f.file.path, metadata)
                after = f.file.filemetadata_set.order_by(
                    "-created_date").count()
                if after != before:
                    all_metadata = f.file.filemetadata_set.order_by(
                        "-created_date")
                    ddiff = DeepDiff(all_metadata[1].metadata,
                                     all_metadata[0].metadata,
                                     ignore_order=True)
                    diff_file_name = "%s_metadata_update_%s.json" % (
                        f.file.file_name, all_metadata[0].version)
                    message = "Updating file metadata: %s, details in file %s\n" % (
                        path, diff_file_name)
                    update = RedeliveryUpdateEvent(job_group_notifier,
                                                   message).to_dict()
                    diff_details_event = LocalStoreFileEvent(
                        job_group_notifier, diff_file_name,
                        str(ddiff)).to_dict()
                    send_notification.delay(update)
                    send_notification.delay(diff_details_event)
            else:
                raise FailedToFetchSampleException(
                    "File %s already exist with id %s" % (path, str(f.id)))
Example #11
0
def fetch_samples(
    request_id,
    import_pooled_normals=True,
    import_samples=True,
    job_group=None,
    job_group_notifier=None,
    redelivery=False,
):
    logger.info("Fetching sampleIds for requestId:%s" % request_id)
    jg = None
    jgn = None
    try:
        jg = JobGroup.objects.get(id=job_group)
        logger.debug("JobGroup found")
    except JobGroup.DoesNotExist:
        logger.debug("No JobGroup Found")
    try:
        jgn = JobGroupNotifier.objects.get(id=job_group_notifier)
        logger.debug("JobGroup found")
    except JobGroupNotifier.DoesNotExist:
        logger.debug("No JobGroup Found")
    children = set()
    sample_ids = LIMSClient.get_request_samples(request_id)
    if sample_ids["requestId"] != request_id:
        raise ErrorInconsistentDataException(
            "LIMS returned wrong response for request %s. Got %s instead" %
            (request_id, sample_ids["requestId"]))
    request_metadata = {
        "dataAnalystEmail": sample_ids["dataAnalystEmail"],
        "dataAnalystName": sample_ids["dataAnalystName"],
        "investigatorEmail": sample_ids["investigatorEmail"],
        "investigatorName": sample_ids["investigatorName"],
        "labHeadEmail": sample_ids["labHeadEmail"],
        "labHeadName": sample_ids["labHeadName"],
        "otherContactEmails": sample_ids["otherContactEmails"],
        "dataAccessEmails": sample_ids["dataAccessEmails"],
        "qcAccessEmails": sample_ids["qcAccessEmails"],
        "projectManagerName": sample_ids["projectManagerName"],
        "recipe": sample_ids["recipe"],
        "piEmail": sample_ids["piEmail"],
    }
    set_recipe_event = ETLSetRecipeEvent(job_group_notifier,
                                         request_metadata["recipe"]).to_dict()
    send_notification.delay(set_recipe_event)
    pooled_normals = sample_ids.get("pooledNormals", [])
    if import_pooled_normals and pooled_normals:
        for f in pooled_normals:
            job = get_or_create_pooled_normal_job(f,
                                                  jg,
                                                  jgn,
                                                  redelivery=redelivery)
            children.add(str(job.id))
    if import_samples:
        if not sample_ids.get("samples", False):
            raise FailedToFetchSampleException(
                "No samples reported for requestId: %s" % request_id)

        for sample in sample_ids.get("samples", []):
            sampleMetadata = LIMSClient.get_sample_manifest(
                sample["igoSampleId"])
            try:
                data = sampleMetadata[0]
            except Exception as e:
                pass
            patient_id = format_patient_id(data.get("cmoPatientId"))

            if not Patient.objects.filter(patient_id=patient_id):
                Patient.objects.create(patient_id=patient_id)

            sample_name = data.get("cmoSampleName", None)
            specimen_type = data.get("specimenType", None)
            cmo_sample_name = format_sample_name(sample_name, specimen_type)

            if not Sample.objects.filter(sample_id=sample["igoSampleId"],
                                         sample_name=sample_name,
                                         cmo_sample_name=cmo_sample_name):
                Sample.objects.create(sample_id=sample["igoSampleId"],
                                      sample_name=sample_name,
                                      cmo_sample_name=cmo_sample_name)

            job = create_sample_job(sample["igoSampleId"],
                                    sample["igoComplete"], request_id,
                                    request_metadata, redelivery, jg, jgn)
            children.add(str(job.id))
    return list(children)