def validate_csv_sample(csvSampleDict, request): """ validate csv contents and convert user input to raw data to prepare for sample persistence returns: a collection of error messages if errors found and whether to skip the row """ failed = [] isToSkipRow = False isToAbort = False logger.debug("ENTER import_sample_processor.validate_csv_sample() csvSampleDict=%s; " % (csvSampleDict)) try: sampleDisplayedName = csvSampleDict.get(COLUMN_SAMPLE_NAME, '').strip() sampleExtId = csvSampleDict.get(COLUMN_SAMPLE_EXT_ID, '').strip() sampleControlType = csvSampleDict.get(COLUMN_CONTROLTYPE, '').strip() sampleGender = csvSampleDict.get(COLUMN_GENDER, '').strip() sampleGroupType = csvSampleDict.get(COLUMN_GROUP_TYPE, '').strip() sampleGroup = csvSampleDict.get(COLUMN_GROUP, '').strip() sampleDescription = csvSampleDict.get(COLUMN_SAMPLE_DESCRIPTION, '').strip() barcodeKit = csvSampleDict.get(COLUMN_BARCODE_KIT, '') barcodeAssignment = csvSampleDict.get(COLUMN_BARCODE, '') nucleotideType = csvSampleDict.get(COLUMN_NUCLEOTIDE_TYPE, "").strip() cancerType = csvSampleDict.get(COLUMN_CANCER_TYPE, "").strip() cellularityPct = csvSampleDict.get(COLUMN_CELLULARITY_PCT, None).strip() pcrPlateRow = csvSampleDict.get(COLUMN_PCR_PLATE_POSITION, "").strip() biopsyDays = csvSampleDict.get(COLUMN_BIOPSY_DAYS, "0").strip() cellNum = csvSampleDict.get(COLUMN_CELL_NUM, "").strip() coupleId = csvSampleDict.get(COLUMN_COUPLE_ID, "").strip() embryoId = csvSampleDict.get(COLUMN_EMBRYO_ID, "").strip() # Trim off barcode and barcode kit leading and trailing spaces and update the log file if exists if ((len(barcodeKit) - len(barcodeKit.lstrip())) or (len(barcodeKit) - len(barcodeKit.rstrip()))): logger.warning("The BarcodeKitName(%s) contains Leading/Trailing spaces and got trimmed." % barcodeKit) if ((len(barcodeAssignment) - len(barcodeAssignment.lstrip())) or (len(barcodeAssignment) - len(barcodeAssignment.rstrip()))): logger.warning("The BarcodeName (%s) of BarcodeKitName(%s) contains Leading/Trailing spaces and got trimmed." % (barcodeAssignment, barcodeKit)) barcodeKit = barcodeKit.strip() barcodeAssignment = barcodeAssignment.strip() # skip blank line hasAtLeastOneValue = bool([v for v in csvSampleDict.values() if v != '']) if not hasAtLeastOneValue: isToSkipRow = True return failed, isToSkipRow, isToAbort isValid, errorMessage = sample_validator.validate_sampleDisplayedName(sampleDisplayedName) if not isValid: failed.append((COLUMN_SAMPLE_NAME, errorMessage)) isValid, errorMessage = sample_validator.validate_sampleExternalId(sampleExtId) if not isValid: failed.append((COLUMN_SAMPLE_EXT_ID, errorMessage)) isValid, errorMessage = sample_validator.validate_sampleDescription(sampleDescription) if not isValid: failed.append((COLUMN_SAMPLE_DESCRIPTION, errorMessage)) isValid, errorMessage, gender_CV_value = sample_validator.validate_sampleGender(sampleGender) if not isValid: failed.append((COLUMN_GENDER, errorMessage)) isValid, errorMessage, role_CV_value = sample_validator.validate_sampleGroupType(sampleGroupType) if not isValid: failed.append((COLUMN_GROUP_TYPE, errorMessage)) if sampleGroup: isValid, errorMessage = sample_validator.validate_sampleGroup(sampleGroup) if not isValid: failed.append((COLUMN_GROUP, errorMessage)) if cancerType: isValid, errorMessage, cancerType_CV_value = sample_validator.validate_cancerType(cancerType) if not isValid: failed.append((COLUMN_CANCER_TYPE, errorMessage)) if cellularityPct: isValid, errorMessage, value = sample_validator.validate_cellularityPct(cellularityPct) if not isValid: failed.append((COLUMN_CELLULARITY_PCT, errorMessage)) if nucleotideType: isValid, errorMessage, nucleotideType_internal_value = sample_validator.validate_nucleotideType(nucleotideType) if not isValid: failed.append((COLUMN_NUCLEOTIDE_TYPE, errorMessage)) if pcrPlateRow: isValid, errorMessage, pcrPlateRow_internal_value = sample_validator.validate_pcrPlateRow(pcrPlateRow) if not isValid: failed.append((COLUMN_PCR_PLATE_POSITION, errorMessage)) if biopsyDays: isValid, errorMessage = sample_validator.validate_sampleBiopsyDays(biopsyDays) if not isValid: failed.append((COLUMN_BIOPSY_DAYS, errorMessage)) if cellNum: isValid, errorMessage = sample_validator.validate_sampleCellNum(cellNum) if not isValid: failed.append((COLUMN_CELL_NUM, errorMessage)) if coupleId: isValid, errorMessage = sample_validator.validate_sampleCoupleId(coupleId) if not isValid: failed.append((COLUMN_COUPLE_ID, errorMessage)) if embryoId: isValid, errorMessage = sample_validator.validate_sampleEmbryoId(embryoId) if not isValid: failed.append((COLUMN_EMBRYO_ID, errorMessage)) if sampleControlType: isValid, errorMessage, controlType_CV_value = sample_validator.validate_controlType(sampleControlType) if not isValid: failed.append((COLUMN_CONTROLTYPE, errorMessage)) # NEW VALIDATION FOR BARCODEKIT AND BARCODE_ID_STR isValid, errorMessage, item = sample_validator.validate_barcodekit_and_id_str(barcodeKit, barcodeAssignment) if not isValid: if item == 'barcodeKit': failed.append((COLUMN_BARCODE_KIT, errorMessage)) else: failed.append((COLUMN_BARCODE, errorMessage)) # if not isValid: # failed.append((COLUMN_BARCODE, errorMessage)) # validate user-defined custom attributes failed_userDefined = _validate_csv_user_defined_attributes(csvSampleDict, request) failed.extend(failed_userDefined) logger.debug("import_sample_processor.validate_csv_sample() failed=%s" % (failed)) return failed, isToSkipRow, isToAbort except: logger.exception(format_exc()) failed.append(("File Contents", " the CSV file does not seem to have all the columns. Click the Sample File Format button for an example. ")) isToAbort = True logger.debug("import_sample_processor.validate_csv_sample() failed=%s" % (failed)) return failed, isToSkipRow, isToAbort
def _create_sampleSetItem(csvSampleDict, request, user, sampleSet_ids): sampleDisplayedName = csvSampleDict.get(COLUMN_SAMPLE_NAME, '').strip() sampleExtId = csvSampleDict.get(COLUMN_SAMPLE_EXT_ID, '').strip() sampleGender = csvSampleDict.get(COLUMN_GENDER, '').strip() sampleControlType = csvSampleDict.get(COLUMN_CONTROLTYPE, '').strip() sampleGroupType = csvSampleDict.get(COLUMN_GROUP_TYPE, None) sampleGroup = csvSampleDict.get(COLUMN_GROUP, '0').strip() sampleDescription = csvSampleDict.get(COLUMN_SAMPLE_DESCRIPTION, '').strip() barcodeKit = csvSampleDict.get(COLUMN_BARCODE_KIT, '').strip() barcodeAssignment = csvSampleDict.get(COLUMN_BARCODE, '').strip() nucleotideType = csvSampleDict.get(COLUMN_NUCLEOTIDE_TYPE, "").strip() or csvSampleDict.get(ALTERNATE_COLUMN_NUCLEOTIDE_TYPE, "").strip() cancerType = csvSampleDict.get(COLUMN_CANCER_TYPE, "").strip() cellularityPct = csvSampleDict.get(COLUMN_CELLULARITY_PCT, None).strip() pcrPlateRow = csvSampleDict.get(COLUMN_PCR_PLATE_POSITION, "").strip() biopsyDays = csvSampleDict.get(COLUMN_BIOPSY_DAYS, "0").strip() cellNum = csvSampleDict.get(COLUMN_CELL_NUM, "").strip() coupleId = csvSampleDict.get(COLUMN_COUPLE_ID, None).strip() embryoId = csvSampleDict.get(COLUMN_EMBRYO_ID, "").strip() if not sampleGroup: sampleGroup = '0' isValid, errorMessage, nucleotideType_internal_value = sample_validator.validate_nucleotideType(nucleotideType) # validation has been done already, this is just to get the official value isValid, errorMessage, gender_CV_value = sample_validator.validate_sampleGender(sampleGender) isValid, errorMessage, role_CV_value = sample_validator.validate_sampleGroupType(sampleGroupType) isValid, errorMessage, controlType_CV_value = sample_validator.validate_controlType(sampleControlType) isValid, errorMessage, cancerType_CV_value = sample_validator.validate_cancerType(cancerType) isValid, errorMessage, pcrPlateRow_internal_value = sample_validator.validate_pcrPlateRow(pcrPlateRow) sampleName = sampleDisplayedName.replace(' ', '_') sample_kwargs = { 'displayedName': sampleDisplayedName, 'status': 'created', 'description': sampleDescription, 'date': timezone.now() ##datetime.datetime.now() } sample, isCreated = Sample.objects.get_or_create(name=sampleName, externalId=sampleExtId, defaults=sample_kwargs) if isCreated: logger.debug("import_sample_processor._create_sampleSetItem() new sample created for sample=%s; id=%d" % (sampleDisplayedName, sample.id)) else: if (sample.description != sampleDescription): sample.description = sampleDescription sample.save() logger.debug("import_sample_processor._create_sampleSetItem() just updated sample description for sample=%s; id=%d" % (sampleDisplayedName, sample.id)) # logger.debug("import_sample_processor._create_sampleSetItem() after get_or_create isCreated=%s; sample=%s; sample.id=%d" %(str(isCreated), sampleDisplayedName, sample.id)) for sampleSetId in sampleSet_ids: logger.debug("import_sample_processor._create_sampleSetItem() going to create sampleSetItem for sample=%s; sampleSetId=%s in sampleSet_ids=%s" % (sampleDisplayedName, str(sampleSetId), sampleSet_ids)) currentDateTime = timezone.now() ##datetime.datetime.now() dnabarcode = None if barcodeKit and barcodeAssignment: dnabarcode = models.dnaBarcode.objects.get(name__iexact=barcodeKit, id_str__iexact=barcodeAssignment) pcrPlateColumn = "1" if pcrPlateRow_internal_value else "" sampleSetItem_kwargs = { 'gender': gender_CV_value, 'relationshipRole': role_CV_value, 'relationshipGroup': sampleGroup, 'cancerType': cancerType_CV_value, 'cellularityPct': cellularityPct if cellularityPct else None, 'biopsyDays': int(biopsyDays) if biopsyDays else 0, "cellNum": cellNum, "coupleId": coupleId, "embryoId": embryoId, 'creator': user, 'creationDate': currentDateTime, 'lastModifiedUser': user, 'lastModifiedDate': currentDateTime, 'description': sampleDescription, 'controlType': controlType_CV_value } sampleSetItem, isCreated = SampleSetItem.objects.get_or_create(sample=sample, sampleSet_id=sampleSetId, dnabarcode=dnabarcode, description=sampleDescription, nucleotideType=nucleotideType_internal_value, pcrPlateRow=pcrPlateRow_internal_value, pcrPlateColumn=pcrPlateColumn, defaults=sampleSetItem_kwargs) logger.debug("import_sample_processor._create_sampleSetItem() after get_or_create isCreated=%s; sampleSetItem=%s; samplesetItem.id=%d" % (str(isCreated), sampleDisplayedName, sampleSetItem.id)) ssi_sid = transaction.savepoint() return sample, sampleSetItem, ssi_sid
def validate_csv_sample(csvSampleDict, request): """ validate csv contents and convert user input to raw data to prepare for sample persistence returns: a collection of error messages if errors found and whether to skip the row """ failed = [] isToSkipRow = False isToAbort = False logger.debug("ENTER import_sample_processor.validate_csv_sample() csvSampleDict=%s; " %(csvSampleDict)) try: sampleDisplayedName = csvSampleDict.get(COLUMN_SAMPLE_NAME, '').strip() sampleExtId = csvSampleDict.get(COLUMN_SAMPLE_EXT_ID, '').strip() sampleGender = csvSampleDict.get(COLUMN_GENDER, '').strip() sampleGroupType = csvSampleDict.get(COLUMN_GROUP_TYPE, '').strip() sampleGroup = csvSampleDict.get(COLUMN_GROUP, '').strip() sampleDescription = csvSampleDict.get(COLUMN_SAMPLE_DESCRIPTION, '').strip() barcodeKit = csvSampleDict.get(COLUMNS_BARCODE_KIT, '').strip() barcodeAssignment = csvSampleDict.get(COLUMN_BARCODE, '').strip() nucleotideType = csvSampleDict.get(COLUMN_NUCLEOTIDE_TYPE, "").strip() cancerType = csvSampleDict.get(COLUMN_CANCER_TYPE, "").strip() cellularityPct = csvSampleDict.get(COLUMN_CELLULARITY_PCT, None).strip() pcrPlateRow = csvSampleDict.get(COLUMN_PCR_PLATE_POSITION, "").strip() #skip blank line hasAtLeastOneValue = bool([v for v in csvSampleDict.values() if v != '']) if not hasAtLeastOneValue: isToSkipRow = True return failed, isToSkipRow, isToAbort isValid, errorMessage = sample_validator.validate_sampleDisplayedName(sampleDisplayedName) if not isValid: failed.append((COLUMN_SAMPLE_NAME, errorMessage)) isValid, errorMessage = sample_validator.validate_sampleExternalId(sampleExtId) if not isValid: failed.append((COLUMN_SAMPLE_EXT_ID, errorMessage)) isValid, errorMessage = sample_validator.validate_sampleDescription(sampleDescription) if not isValid: failed.append((COLUMN_SAMPLE_DESCRIPTION, errorMessage)) isValid, errorMessage, gender_CV_value = sample_validator.validate_sampleGender(sampleGender) if not isValid: failed.append((COLUMN_GENDER, errorMessage)) isValid, errorMessage, role_CV_value = sample_validator.validate_sampleGroupType(sampleGroupType) if not isValid: failed.append((COLUMN_GROUP_TYPE, errorMessage)) if sampleGroup: isValid, errorMessage = sample_validator.validate_sampleGroup(sampleGroup) if not isValid: failed.append((COLUMN_GROUP, errorMessage)) if cancerType: isValid, errorMessage, cancerType_CV_value = sample_validator.validate_cancerType(cancerType) if not isValid: failed.append((COLUMN_CANCER_TYPE, errorMessage)) if cellularityPct: isValid, errorMessage, value = sample_validator.validate_cellularityPct(cellularityPct) if not isValid: failed.append((COLUMN_CELLULARITY_PCT, errorMessage)) if nucleotideType: isValid, errorMessage, nucleotideType_internal_value = sample_validator.validate_nucleotideType(nucleotideType) if not isValid: failed.append((COLUMN_NUCLEOTIDE_TYPE, errorMessage)) if pcrPlateRow: isValid, errorMessage, pcrPlateRow_internal_value = sample_validator.validate_pcrPlateRow(pcrPlateRow) if not isValid: failed.append((COLUMN_PCR_PLATE_POSITION, errorMessage)) ##NEW VALIDATION FOR BARCODEKIT AND BARCODE_ID_STR isValid, errorMessage, item = sample_validator.validate_barcodekit_and_id_str(barcodeKit, barcodeAssignment) if not isValid: if item == 'barcodeKit': failed.append((COLUMNS_BARCODE_KIT, errorMessage)) else: failed.append((COLUMN_BARCODE, errorMessage)) # if not isValid: # failed.append((COLUMN_BARCODE, errorMessage)) #validate user-defined custom attributes failed_userDefined = _validate_csv_user_defined_attributes(csvSampleDict, request) failed.extend(failed_userDefined) logger.debug("import_sample_processor.validate_csv_sample() failed=%s" %(failed)) return failed, isToSkipRow, isToAbort except: logger.exception(format_exc()) failed.append(("File Contents", " the CSV file does not seem to have all the columns. Click the Sample File Format button for an example. ")) isToAbort = True logger.debug("import_sample_processor.validate_csv_sample() failed=%s" %(failed)) return failed, isToSkipRow, isToAbort
def get_sampleSetItem_kwargs(csvSampleDict, user): sampleDisplayedName = csvSampleDict.get(COLUMN_SAMPLE_NAME, "").strip() sampleExtId = csvSampleDict.get(COLUMN_SAMPLE_EXT_ID, "").strip() sampleGender = csvSampleDict.get(COLUMN_GENDER, "").strip() sampleControlType = csvSampleDict.get(COLUMN_CONTROLTYPE, "").strip() sampleGroupType = csvSampleDict.get(COLUMN_GROUP_TYPE, None) sampleGroup = csvSampleDict.get(COLUMN_GROUP, "0").strip() if not sampleGroup: sampleGroup = "0" sampleDescription = csvSampleDict.get(COLUMN_SAMPLE_DESCRIPTION, "").strip() barcodeKit = csvSampleDict.get(COLUMN_BARCODE_KIT, "").strip() barcodeAssignment = csvSampleDict.get(COLUMN_BARCODE, "").strip() pcrPlateRow = csvSampleDict.get(COLUMN_PCR_PLATE_POSITION, "").strip() nucleotideType = (csvSampleDict.get(COLUMN_NUCLEOTIDE_TYPE, "").strip() or csvSampleDict.get(ALTERNATE_COLUMN_NUCLEOTIDE_TYPE, "").strip()) sampleSource = csvSampleDict.get(COLUMN_SAMPLE_SOURCE, "").strip() panelPoolType = csvSampleDict.get(COLUMN_PANEL_POOL_TYPE, "").strip() sampleCollectionDate = csvSampleDict.get(COLUMN_SAMPLE_COLLECTION_DATE, "").strip() sampleReceiptDate = csvSampleDict.get(COLUMN_SAMPLE_RECEIPT_DATE, "").strip() if sampleCollectionDate: sampleCollectionDate = datetime.strptime(str(sampleCollectionDate), "%Y-%m-%d").date() if sampleReceiptDate: sampleReceiptDate = datetime.strptime(str(sampleReceiptDate), "%Y-%m-%d").date() cancerType = csvSampleDict.get(COLUMN_CANCER_TYPE, "").strip() cellularityPct = csvSampleDict.get(COLUMN_CELLULARITY_PCT, None) if cellularityPct: cellularityPct = cellularityPct.strip() if not cellularityPct: cellularityPct = None biopsyDays = csvSampleDict.get(COLUMN_BIOPSY_DAYS, "0").strip() cellNum = csvSampleDict.get(COLUMN_CELL_NUM, "").strip() coupleId = csvSampleDict.get(COLUMN_COUPLE_ID, None) if coupleId: coupleId = coupleId.strip() embryoId = csvSampleDict.get(COLUMN_EMBRYO_ID, "").strip() population = csvSampleDict.get(COLUMN_SAMPLE_POPULATION, "").strip() mouseStrains = csvSampleDict.get(COLUMN_SAMPLE_MOUSE_STRAINS, "").strip() currentDateTime = timezone.now() ##datetime.datetime.now() sampleName = sampleDisplayedName.replace(" ", "_") isValid, errorMessage, nucleotideType_internal_value = sample_validator.validate_nucleotideType( nucleotideType, field_label=COLUMN_NUCLEOTIDE_TYPE) # validation has been done already, this is just to get the official value isValid, errorMessage, gender_CV_value = sample_validator.validate_sampleGender( sampleGender, field_label=COLUMN_GENDER) isValid, errorMessage, role_CV_value = sample_validator.validate_sampleGroupType( sampleGroupType, field_label=COLUMN_GROUP_TYPE) isValid, errorMessage, controlType_CV_value = sample_validator.validate_controlType( sampleControlType, field_label=COLUMN_CONTROLTYPE) isValid, errorMessage, cancerType_CV_value = sample_validator.validate_cancerType( cancerType, field_label=COLUMN_CANCER_TYPE) isValid, errorMessage, pcrPlateRow_internal_value = sample_validator.validate_pcrPlateRow( pcrPlateRow, field_label=COLUMN_PCR_PLATE_POSITION) sampleSetItem_kwargs = { "sampleName": sampleName, "sampleDisplayedName": sampleDisplayedName, "sampleExtId": sampleExtId, "barcodeKit": barcodeKit, "barcodeAssignment": barcodeAssignment, "gender": gender_CV_value, "relationshipRole": role_CV_value, "relationshipGroup": sampleGroup, "cancerType": cancerType_CV_value, "pcrPlateRow": pcrPlateRow_internal_value, "nucleotideType": nucleotideType_internal_value, "sampleSource": sampleSource, "panelPoolType": panelPoolType, "cellularityPct": cellularityPct if cellularityPct else None, "biopsyDays": int(biopsyDays) if biopsyDays else 0, "cellNum": cellNum, "coupleId": coupleId, "embryoId": embryoId, "creator": user, "creationDate": currentDateTime, "lastModifiedUser": user, "lastModifiedDate": currentDateTime, "description": sampleDescription, "controlType": controlType_CV_value, "displayedName": sampleDisplayedName, "sampleStatus": "created", "sampleDescription": sampleDescription, "sampleCollectionDate": sampleCollectionDate or None, "sampleReceiptDate": sampleReceiptDate or None, "population": population, "mouseStrains": mouseStrains, "date": timezone.now(), } return sampleSetItem_kwargs
def validate_csv_sample(csvSampleDict, request): """ validate csv contents and convert user input to raw data to prepare for sample persistence returns: a collection of error messages if errors found and whether to skip the row """ failed = [] isToSkipRow = False isToAbort = False logger.debug( "ENTER import_sample_processor.validate_csv_sample() csvSampleDict=%s; " % (csvSampleDict)) try: sampleDisplayedName = csvSampleDict.get(COLUMN_SAMPLE_NAME, "").strip() sampleExtId = csvSampleDict.get(COLUMN_SAMPLE_EXT_ID, "").strip() sampleControlType = csvSampleDict.get(COLUMN_CONTROLTYPE, "").strip() sampleGender = csvSampleDict.get(COLUMN_GENDER, "").strip() sampleGroupType = csvSampleDict.get(COLUMN_GROUP_TYPE, "").strip() sampleGroup = csvSampleDict.get(COLUMN_GROUP, "").strip() if not sampleGroup: sampleGroup = None sampleDescription = csvSampleDict.get(COLUMN_SAMPLE_DESCRIPTION, "").strip() barcodeKit = csvSampleDict.get(COLUMN_BARCODE_KIT, "") barcodeAssignment = csvSampleDict.get(COLUMN_BARCODE, "") nucleotideType = csvSampleDict.get(COLUMN_NUCLEOTIDE_TYPE, "").strip() cancerType = csvSampleDict.get(COLUMN_CANCER_TYPE, "").strip() cellularityPct = csvSampleDict.get(COLUMN_CELLULARITY_PCT, None).strip() if not cellularityPct: cellularityPct = None pcrPlateRow = csvSampleDict.get(COLUMN_PCR_PLATE_POSITION, "").strip() biopsyDays = csvSampleDict.get(COLUMN_BIOPSY_DAYS, "0").strip() cellNum = csvSampleDict.get(COLUMN_CELL_NUM, "").strip() coupleId = csvSampleDict.get(COLUMN_COUPLE_ID, "").strip() embryoId = csvSampleDict.get(COLUMN_EMBRYO_ID, "").strip() sampleSource = csvSampleDict.get(COLUMN_SAMPLE_SOURCE, "").strip() panelPoolType = csvSampleDict.get(COLUMN_PANEL_POOL_TYPE, "").strip() sampleCollectionDate = csvSampleDict.get(COLUMN_SAMPLE_COLLECTION_DATE, "").strip() sampleReceiptDate = csvSampleDict.get(COLUMN_SAMPLE_RECEIPT_DATE, "").strip() population = csvSampleDict.get(COLUMN_SAMPLE_POPULATION, "").strip() mouseStrains = csvSampleDict.get(COLUMN_SAMPLE_MOUSE_STRAINS, "").strip() # Trim off barcode and barcode kit leading and trailing spaces and update the log file if exists if (len(barcodeKit) - len(barcodeKit.lstrip())) or ( len(barcodeKit) - len(barcodeKit.rstrip())): logger.warning( "The BarcodeKitName(%s) contains Leading/Trailing spaces and got trimmed." % barcodeKit) if (len(barcodeAssignment) - len(barcodeAssignment.lstrip())) or ( len(barcodeAssignment) - len(barcodeAssignment.rstrip())): logger.warning( "The BarcodeName (%s) of BarcodeKitName(%s) contains Leading/Trailing spaces and got trimmed." % (barcodeAssignment, barcodeKit)) barcodeKit = barcodeKit.strip() barcodeAssignment = barcodeAssignment.strip() # skip blank line hasAtLeastOneValue = bool( [v for v in list(csvSampleDict.values()) if v != ""]) if not hasAtLeastOneValue: isToSkipRow = True return failed, isToSkipRow, isToAbort isValid, errorMessage = sample_validator.validate_sampleDisplayedName( sampleDisplayedName, field_label=COLUMN_SAMPLE_NAME) if not isValid: failed.append((COLUMN_SAMPLE_NAME, errorMessage)) isValid, errorMessage = sample_validator.validate_sampleExternalId( sampleExtId, field_label=COLUMN_SAMPLE_EXT_ID) if not isValid: failed.append((COLUMN_SAMPLE_EXT_ID, errorMessage)) isValid, errorMessage = sample_validator.validate_sampleDescription( sampleDescription, field_label=COLUMN_SAMPLE_DESCRIPTION) if not isValid: failed.append((COLUMN_SAMPLE_DESCRIPTION, errorMessage)) isValid, errorMessage, gender_CV_value = sample_validator.validate_sampleGender( sampleGender, field_label=COLUMN_GENDER) if not isValid: failed.append((COLUMN_GENDER, errorMessage)) isValid, errorMessage, role_CV_value = sample_validator.validate_sampleGroupType( sampleGroupType, field_label=COLUMN_GROUP_TYPE) if not isValid: failed.append((COLUMN_GROUP_TYPE, errorMessage)) if sampleGroup: isValid, errorMessage = sample_validator.validate_sampleGroup( sampleGroup, field_label=COLUMN_GROUP) if not isValid: failed.append((COLUMN_GROUP, errorMessage)) if cancerType: isValid, errorMessage, cancerType_CV_value = sample_validator.validate_cancerType( cancerType, field_label=COLUMN_CANCER_TYPE) if not isValid: failed.append((COLUMN_CANCER_TYPE, errorMessage)) if cellularityPct: isValid, errorMessage = sample_validator.validate_cellularityPct( cellularityPct, field_label=COLUMN_CELLULARITY_PCT) if not isValid: failed.append((COLUMN_CELLULARITY_PCT, errorMessage)) if pcrPlateRow: isValid, errorMessage, pcrPlateRow_internal_value = sample_validator.validate_pcrPlateRow( pcrPlateRow, field_label=COLUMN_PCR_PLATE_POSITION) if not isValid: failed.append((COLUMN_PCR_PLATE_POSITION, errorMessage)) if nucleotideType: isValid, errorMessage, nucleotideType_internal_value = sample_validator.validate_nucleotideType( nucleotideType, field_label=COLUMN_NUCLEOTIDE_TYPE) if not isValid: failed.append((COLUMN_NUCLEOTIDE_TYPE, errorMessage)) if biopsyDays: isValid, errorMessage = sample_validator.validate_sampleBiopsyDays( biopsyDays, field_label=COLUMN_BIOPSY_DAYS) if not isValid: failed.append((COLUMN_BIOPSY_DAYS, errorMessage)) if cellNum: isValid, errorMessage = sample_validator.validate_sampleCellNum( cellNum, field_label=COLUMN_CELL_NUM) if not isValid: failed.append((COLUMN_CELL_NUM, errorMessage)) if coupleId: isValid, errorMessage = sample_validator.validate_sampleCoupleId( coupleId, field_label=COLUMN_COUPLE_ID) if not isValid: failed.append((COLUMN_COUPLE_ID, errorMessage)) if embryoId: isValid, errorMessage = sample_validator.validate_sampleEmbryoId( embryoId, field_label=COLUMN_EMBRYO_ID) if not isValid: failed.append((COLUMN_EMBRYO_ID, errorMessage)) if sampleControlType: isValid, errorMessage, controlType_CV_value = sample_validator.validate_controlType( sampleControlType, field_label=COLUMN_CONTROLTYPE) if not isValid: failed.append((COLUMN_CONTROLTYPE, errorMessage)) if population: isValid, errorMessage, _ = sample_validator.validate_population( population) if not isValid: failed.append((COLUMN_SAMPLE_POPULATION, errorMessage)) if mouseStrains: isValid, errorMessage, _ = sample_validator.validate_mouseStrains( mouseStrains) if not isValid: failed.append((COLUMN_SAMPLE_MOUSE_STRAINS, errorMessage)) if sampleSource: isValid, errorMessage, _ = sample_validator.validate_sampleSource( sampleSource) if not isValid: failed.append((COLUMN_SAMPLE_SOURCE, errorMessage)) if panelPoolType: isValid, errorMessage, _ = sample_validator.validate_panelPoolType( panelPoolType) if not isValid: failed.append((COLUMN_PANEL_POOL_TYPE, errorMessage)) if sampleCollectionDate: isValid, errorMessage = sample_validator.validate_sampleCollectionDate( sampleCollectionDate) if not isValid: failed.append((COLUMN_SAMPLE_COLLECTION_DATE, errorMessage)) if sampleReceiptDate: isValid, errorMessage = sample_validator.validate_sampleReceiptDate( sampleReceiptDate, sampleCollectionDate) if not isValid: failed.append((COLUMN_SAMPLE_RECEIPT_DATE, errorMessage)) # NEW VALIDATION FOR BARCODEKIT AND BARCODE_ID_STR isValid, errorMessage, item = sample_validator.validate_barcodekit_and_id_str( barcodeKit, barcodeAssignment, barcodeKit_label=COLUMN_BARCODE_KIT, barcode_id_str_label=COLUMN_BARCODE, ) # TODO: i18n if not isValid: if item == "barcodeKit": failed.append((COLUMN_BARCODE_KIT, errorMessage)) else: failed.append((COLUMN_BARCODE, errorMessage)) # if not isValid: # failed.append((COLUMN_BARCODE, errorMessage)) # validate user-defined custom attributes failed_userDefined = _validate_csv_user_defined_attributes( csvSampleDict, request) failed.extend(failed_userDefined) logger.debug( "import_sample_processor.validate_csv_sample() failed=%s" % (failed)) return failed, isToSkipRow, isToAbort except Exception: logger.exception(format_exc()) failed.append(( "File Contents", " the CSV file does not seem to have all the columns. Click the Sample File Format button for an example. ", )) # TODO: i18n logger.debug( "import_sample_processor.validate_csv_sample() failed=%s" % (failed)) return failed, isToSkipRow, True
def _create_sampleSetItem(csvSampleDict, request, user, sampleSet_ids): sampleDisplayedName = csvSampleDict.get(COLUMN_SAMPLE_NAME, '').strip() sampleExtId = csvSampleDict.get(COLUMN_SAMPLE_EXT_ID, '').strip() sampleGender = csvSampleDict.get(COLUMN_GENDER, '').strip() sampleGroupType = csvSampleDict.get(COLUMN_GROUP_TYPE, None) sampleGroup = csvSampleDict.get(COLUMN_GROUP, '0').strip() sampleDescription = csvSampleDict.get(COLUMN_SAMPLE_DESCRIPTION, '').strip() barcodeKit = csvSampleDict.get(COLUMN_BARCODE_KIT, '').strip() barcodeAssignment = csvSampleDict.get(COLUMN_BARCODE, '').strip() nucleotideType = csvSampleDict.get(COLUMN_NUCLEOTIDE_TYPE, "").strip() cancerType = csvSampleDict.get(COLUMN_CANCER_TYPE, "").strip() cellularityPct = csvSampleDict.get(COLUMN_CELLULARITY_PCT, None).strip() pcrPlateRow = csvSampleDict.get(COLUMN_PCR_PLATE_POSITION, "").strip() biopsyDays = csvSampleDict.get(COLUMN_BIOPSY_DAYS, "0").strip() coupleId = csvSampleDict.get(COLUMN_COUPLE_ID, None).strip() embryoId = csvSampleDict.get(COLUMN_EMBRYO_ID, "").strip() if not sampleGroup: sampleGroup = '0' isValid, errorMessage, nucleotideType_internal_value = sample_validator.validate_nucleotideType( nucleotideType) #validation has been done already, this is just to get the official value isValid, errorMessage, gender_CV_value = sample_validator.validate_sampleGender( sampleGender) isValid, errorMessage, role_CV_value = sample_validator.validate_sampleGroupType( sampleGroupType) isValid, errorMessage, cancerType_CV_value = sample_validator.validate_cancerType( cancerType) isValid, errorMessage, pcrPlateRow_internal_value = sample_validator.validate_pcrPlateRow( pcrPlateRow) sampleName = sampleDisplayedName.replace(' ', '_') sample_kwargs = { 'displayedName': sampleDisplayedName, 'status': 'created', 'description': sampleDescription, 'date': timezone.now() ##datetime.datetime.now() } sample, isCreated = Sample.objects.get_or_create(name=sampleName, externalId=sampleExtId, defaults=sample_kwargs) if isCreated: logger.debug( "import_sample_processor._create_sampleSetItem() new sample created for sample=%s; id=%d" % (sampleDisplayedName, sample.id)) else: if (sample.description != sampleDescription): sample.description = sampleDescription sample.save() logger.debug( "import_sample_processor._create_sampleSetItem() just updated sample description for sample=%s; id=%d" % (sampleDisplayedName, sample.id)) ##logger.debug("import_sample_processor._create_sampleSetItem() after get_or_create isCreated=%s; sample=%s; sample.id=%d" %(str(isCreated), sampleDisplayedName, sample.id)) for sampleSetId in sampleSet_ids: logger.debug( "import_sample_processor._create_sampleSetItem() going to create sampleSetItem for sample=%s; sampleSetId=%s in sampleSet_ids=%s" % (sampleDisplayedName, str(sampleSetId), sampleSet_ids)) currentDateTime = timezone.now() ##datetime.datetime.now() dnabarcode = None if barcodeKit and barcodeAssignment: dnabarcode = models.dnaBarcode.objects.get( name__iexact=barcodeKit, id_str__iexact=barcodeAssignment) pcrPlateColumn = "1" if pcrPlateRow_internal_value else "" sampleSetItem_kwargs = { 'gender': gender_CV_value, 'relationshipRole': role_CV_value, 'relationshipGroup': sampleGroup, 'cancerType': cancerType_CV_value, 'cellularityPct': cellularityPct if cellularityPct else None, 'biopsyDays': int(biopsyDays) if biopsyDays else 0, "coupleId": coupleId, "embryoId": embryoId, 'creator': user, 'creationDate': currentDateTime, 'lastModifiedUser': user, 'lastModifiedDate': currentDateTime, } sampleSetItem, isCreated = SampleSetItem.objects.get_or_create( sample=sample, sampleSet_id=sampleSetId, dnabarcode=dnabarcode, nucleotideType=nucleotideType_internal_value, pcrPlateRow=pcrPlateRow_internal_value, pcrPlateColumn=pcrPlateColumn, defaults=sampleSetItem_kwargs) logger.debug( "import_sample_processor._create_sampleSetItem() after get_or_create isCreated=%s; sampleSetItem=%s; samplesetItem.id=%d" % (str(isCreated), sampleDisplayedName, sampleSetItem.id)) ssi_sid = transaction.savepoint() return sample, sampleSetItem, ssi_sid
def validate_csv_sample(csvSampleDict, request): """ validate csv contents and convert user input to raw data to prepare for sample persistence returns: a collection of error messages if errors found and whether to skip the row """ failed = [] isToSkipRow = False isToAbort = False logger.debug( "ENTER import_sample_processor.validate_csv_sample() csvSampleDict=%s; " % (csvSampleDict)) try: sampleDisplayedName = csvSampleDict.get(COLUMN_SAMPLE_NAME, '').strip() sampleExtId = csvSampleDict.get(COLUMN_SAMPLE_EXT_ID, '').strip() sampleGender = csvSampleDict.get(COLUMN_GENDER, '').strip() sampleGroupType = csvSampleDict.get(COLUMN_GROUP_TYPE, '').strip() sampleGroup = csvSampleDict.get(COLUMN_GROUP, '').strip() sampleDescription = csvSampleDict.get(COLUMN_SAMPLE_DESCRIPTION, '').strip() barcodeKit = csvSampleDict.get(COLUMN_BARCODE_KIT, '').strip() barcodeAssignment = csvSampleDict.get(COLUMN_BARCODE, '').strip() nucleotideType = csvSampleDict.get(COLUMN_NUCLEOTIDE_TYPE, "").strip() cancerType = csvSampleDict.get(COLUMN_CANCER_TYPE, "").strip() cellularityPct = csvSampleDict.get(COLUMN_CELLULARITY_PCT, None).strip() pcrPlateRow = csvSampleDict.get(COLUMN_PCR_PLATE_POSITION, "").strip() biopsyDays = csvSampleDict.get(COLUMN_BIOPSY_DAYS, "0").strip() coupleId = csvSampleDict.get(COLUMN_COUPLE_ID, "").strip() embryoId = csvSampleDict.get(COLUMN_EMBRYO_ID, "").strip() #skip blank line hasAtLeastOneValue = bool( [v for v in csvSampleDict.values() if v != '']) if not hasAtLeastOneValue: isToSkipRow = True return failed, isToSkipRow, isToAbort isValid, errorMessage = sample_validator.validate_sampleDisplayedName( sampleDisplayedName) if not isValid: failed.append((COLUMN_SAMPLE_NAME, errorMessage)) isValid, errorMessage = sample_validator.validate_sampleExternalId( sampleExtId) if not isValid: failed.append((COLUMN_SAMPLE_EXT_ID, errorMessage)) isValid, errorMessage = sample_validator.validate_sampleDescription( sampleDescription) if not isValid: failed.append((COLUMN_SAMPLE_DESCRIPTION, errorMessage)) isValid, errorMessage, gender_CV_value = sample_validator.validate_sampleGender( sampleGender) if not isValid: failed.append((COLUMN_GENDER, errorMessage)) isValid, errorMessage, role_CV_value = sample_validator.validate_sampleGroupType( sampleGroupType) if not isValid: failed.append((COLUMN_GROUP_TYPE, errorMessage)) if sampleGroup: isValid, errorMessage = sample_validator.validate_sampleGroup( sampleGroup) if not isValid: failed.append((COLUMN_GROUP, errorMessage)) if cancerType: isValid, errorMessage, cancerType_CV_value = sample_validator.validate_cancerType( cancerType) if not isValid: failed.append((COLUMN_CANCER_TYPE, errorMessage)) if cellularityPct: isValid, errorMessage, value = sample_validator.validate_cellularityPct( cellularityPct) if not isValid: failed.append((COLUMN_CELLULARITY_PCT, errorMessage)) if nucleotideType: isValid, errorMessage, nucleotideType_internal_value = sample_validator.validate_nucleotideType( nucleotideType) if not isValid: failed.append((COLUMN_NUCLEOTIDE_TYPE, errorMessage)) if pcrPlateRow: isValid, errorMessage, pcrPlateRow_internal_value = sample_validator.validate_pcrPlateRow( pcrPlateRow) if not isValid: failed.append((COLUMN_PCR_PLATE_POSITION, errorMessage)) if biopsyDays: isValid, errorMessage = sample_validator.validate_sampleBiopsyDays( biopsyDays) if not isValid: failed.append((COLUMN_BIOPSY_DAYS, errorMessage)) if coupleId: isValid, errorMessage = sample_validator.validate_sampleCoupleId( coupleId) if not isValid: failed.append((COLUMN_COUPLE_ID, errorMessage)) if embryoId: isValid, errorMessage = sample_validator.validate_sampleEmbryoId( embryoId) if not isValid: failed.append((COLUMN_EMBRYO_ID, errorMessage)) ##NEW VALIDATION FOR BARCODEKIT AND BARCODE_ID_STR isValid, errorMessage, item = sample_validator.validate_barcodekit_and_id_str( barcodeKit, barcodeAssignment) if not isValid: if item == 'barcodeKit': failed.append((COLUMN_BARCODE_KIT, errorMessage)) else: failed.append((COLUMN_BARCODE, errorMessage)) # if not isValid: # failed.append((COLUMN_BARCODE, errorMessage)) #validate user-defined custom attributes failed_userDefined = _validate_csv_user_defined_attributes( csvSampleDict, request) failed.extend(failed_userDefined) logger.debug( "import_sample_processor.validate_csv_sample() failed=%s" % (failed)) return failed, isToSkipRow, isToAbort except: logger.exception(format_exc()) failed.append(( "File Contents", " the CSV file does not seem to have all the columns. Click the Sample File Format button for an example. " )) isToAbort = True logger.debug( "import_sample_processor.validate_csv_sample() failed=%s" % (failed)) return failed, isToSkipRow, isToAbort