def _getExperimentSample(collectionPermId, expSamplePermId): """Retrieve the experiment sample.""" # Get the experiment sample sampleCriteria = SearchCriteria() sampleCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.PERM_ID, expSamplePermId) ) expCriteria = SearchCriteria() expCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.PERM_ID, collectionPermId) ) # Add the experiment subcriteria sampleCriteria.addSubCriteria( SearchSubCriteria.createExperimentCriteria( expCriteria) ) # Search expSampleList = searchService.searchForSamples(sampleCriteria) if len(expSampleList) != 1: return None # Return the experiment sample return expSampleList[0]
def createNewBarcode(project, tr): search_service = tr.getSearchService() sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.PROJECT, project)) sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) foundSamples = search_service.searchForSamples(sc) foundSamplesFilter = [ s for s in foundSamples if 'ENTITY' not in s.getCode() ] offset = 0 exists = True while exists: # create new barcode newBarcode = getNextFreeBarcode( project, len(foundSamplesFilter) + len(newTestSamples) + offset) # check if barcode already exists in database pc = SearchCriteria() pc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.CODE, newBarcode)) found = search_service.searchForSamples(pc) if len(found) == 0: exists = False else: offset += 1 return newBarcode
def _getDataSetsForExperiment(self): """ Return a list of datasets belonging to the experiment and optionally to the sample. If the sample ID is empty, only the experiment is used in the search criteria. If none are found, return []. """ # Set search criteria to retrieve all datasets for the experiment. # If the sample code is set, we also filter by it. searchCriteria = SearchCriteria() searchCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, "MICROSCOPY_IMG_CONTAINER")) expCriteria = SearchCriteria() expCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, self._experiment.permId)) searchCriteria.addSubCriteria(SearchSubCriteria.createExperimentCriteria(expCriteria)) if self._sample is not None: self._logger.info("Filter by sample " + self._sampleId) sampleCriteria = SearchCriteria() sampleCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, self._sample.permId)) searchCriteria.addSubCriteria(SearchSubCriteria.createSampleCriteria(sampleCriteria)) dataSets = searchService.searchForDataSets(searchCriteria) if len(dataSets) == 0: dataSets = [] self._message = "Could not retrieve datasets for experiment " \ "with id " + self._experimentId if self._sampleId != "": self._message = self._message + " and sample with id " + \ self._sampleId self._logger.error(self._message) # Return return dataSets
def _getDataSetForTube(self, tubeCode=None): """ Get the datasets belonging to the tube with specified tube code. If none is found, return []. If no tubeCode is given, it is assumed that the tube is the passed entity with code self._entityCode. """ if tubeCode is None: tubeCode = self._entityCode # Set search criteria to retrieve the dataset contained in the tube searchCriteria = SearchCriteria() tubeCriteria = SearchCriteria() tubeCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.CODE, tubeCode)) searchCriteria.addSubCriteria(SearchSubCriteria.createSampleCriteria(tubeCriteria)) dataSets = searchService.searchForDataSets(searchCriteria) if len(dataSets) == 0: self._message = "Could not retrieve datasets for tube " \ "with code " + tubeCode + "." self._logger.error(self._message) # Return return dataSets
def _getDataSetForWell(self, wellId=None): """ Get the datasets belonging to the well with specified code. If none are found, return []. If no wellId is given, it is assumed that the well is the passed entity with code self._entityId. """ if wellId is None: wellId = self._entityId # Set search criteria to retrieve the dataset contained in the well searchCriteria = SearchCriteria() wellCriteria = SearchCriteria() wellCriteria.addMatchClause( MatchClause.createAttributeMatch(MatchClauseAttribute.CODE, wellId)) searchCriteria.addSubCriteria( SearchSubCriteria.createSampleCriteria(wellCriteria)) dataSets = searchService.searchForDataSets(searchCriteria) if len(dataSets) == 0: self._message = "Could not retrieve datasets for well " \ "with code " + wellId + "." self._logger.error(self._message) # Return return dataSets
def _getChildSamples(parentSampleType, parentSamplePermId, sampleType): """Return the samples of given type for specified parent sample.""" # The samples are of type 'sampleType' searchCriteria = SearchCriteria() searchCriteria.addMatchClause( MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, sampleType)) # The samples have given parent expSampleCriteria = SearchCriteria() expSampleCriteria.addMatchClause( MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, parentSampleType)) expSampleCriteria.addMatchClause( MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, parentSamplePermId)) searchCriteria.addSubCriteria( SearchSubCriteria.createSampleParentCriteria(expSampleCriteria)) # Now search samples = searchService.searchForSamples(searchCriteria) # Return the samples return samples
def _getDataSetForTube(self, tubeCode): """ Get the datasets belonging to the tube with specified tube code. If none is found, return []. """ if _DEBUG: self._logger.info("Searching for tube with code " + tubeCode) # Set search criteria to retrieve the dataset contained in the tube searchCriteria = SearchCriteria() tubeCriteria = SearchCriteria() tubeCriteria.addMatchClause( MatchClause.createAttributeMatch(MatchClauseAttribute.CODE, tubeCode)) searchCriteria.addSubCriteria( SearchSubCriteria.createSampleCriteria(tubeCriteria)) dataSets = searchService.searchForDataSets(searchCriteria) if _DEBUG: self._logger.info("Retrieved " + str(len(dataSets)) + \ " datasets for tube with code " + tubeCode) if len(dataSets) == 0: self._message = "Could not retrieve datasets for tube " \ "with code " + tubeCode + "." self._logger.error(self._message) # Return return dataSets
def _getAllTubes(self): """ Get all tubes in the experiment. If the specimen is set (self._specimen), then return only those tubes that belong to it. Returns [] if none are found. """ # Set search criteria to retrieve all tubes in the experiment # All tubes belong to a virtual tubeset - so the set of tubes in the # experiment is exactly the same as the set of tubes in the virtual # tubeset searchCriteria = SearchCriteria() searchCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, self._experimentPrefix + "_TUBE")) expCriteria = SearchCriteria() expCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, self._experiment.permId)) searchCriteria.addSubCriteria(SearchSubCriteria.createExperimentCriteria(expCriteria)) tubes = searchService.searchForSamples(searchCriteria) if len(tubes) == 0: self._message = "The experiment with code " + \ self._experimentCode + "does not contain tubes." self._logger.error(self._message) return tubes # Check that the specimen matches (if needed) if self._specimen != "": tubes = [tube for tube in tubes if \ tube.getPropertyValue(self._experimentPrefix + "_SPECIMEN") == self._specimen] # Return the (filtered) tubes return tubes
def createNewBarcode(project, tr): search_service = tr.getSearchService() sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, project)); sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) foundSamples = search_service.searchForSamples(sc) foundSamplesFilter = [s for s in foundSamples if 'ENTITY' not in s.getCode()] offset = 0 exists = True while exists: # create new barcode newBarcode = getNextFreeBarcode(project, len(foundSamplesFilter) + len(newTestSamples) + offset) # check if barcode already exists in database pc = SearchCriteria() pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, newBarcode)) found = search_service.searchForSamples(pc) if len(found) == 0: exists = False else: offset += 1 return newBarcode
def _getDataSetsForSample(sampleIdentifier, dataSetType): """Return the dataSet of given type for specified sample.""" # Set search criteria to retrieve the dataSet associated with the sample searchCriteria = SearchCriteria() searchCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.TYPE, dataSetType) ) sampleCriteria = SearchCriteria() sampleCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.CODE, sampleIdentifier) ) searchCriteria.addSubCriteria( SearchSubCriteria.createSampleCriteria( sampleCriteria) ) dataSetList = searchService.searchForDataSets(searchCriteria) if len(dataSetList) != 1: [] # Return the dataSet return dataSetList
def _getDataSetsForExperiment(self): """ Return a list of datasets belonging to the experiment and optionally to the sample. If the sample ID is empty, only the experiment is used in the search criteria. If none are found, return []. """ # Set search criteria to retrieve all datasets of type MICROSCOPY_IMG_CONTAINER # for the experiment. If the sample code is set, we also filter by it. searchCriteria = SearchCriteria() searchCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, "MICROSCOPY_IMG_CONTAINER")) expCriteria = SearchCriteria() expCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, self._experiment.permId)) searchCriteria.addSubCriteria(SearchSubCriteria.createExperimentCriteria(expCriteria)) if self._sample is not None: self._logger.info("Filter by sample " + self._sampleId) sampleCriteria = SearchCriteria() sampleCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, self._sample.permId)) searchCriteria.addSubCriteria(SearchSubCriteria.createSampleCriteria(sampleCriteria)) dataSets = searchService.searchForDataSets(searchCriteria) if len(dataSets) == 0: dataSets = [] self._message = "Could not retrieve datasets for experiment " \ "with id " + self._experimentId if self._sampleId != "": self._message = self._message + " and sample with id " + \ self._sampleId self._logger.error(self._message) # Return return dataSets
def _getDataSetForWell(self, wellCode=None): """ Get the datasets belonging to the well with specified code. If none are found, return []. If no wellCode is given, it is assumed that the well is the passed entity with code self._entityCode. """ if wellCode is None: wellCode = self._entityCode # Set search criteria to retrieve the dataset contained in the well searchCriteria = SearchCriteria() wellCriteria = SearchCriteria() wellCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.CODE, wellCode)) searchCriteria.addSubCriteria(SearchSubCriteria.createSampleCriteria(wellCriteria)) dataSets = searchService.searchForDataSets(searchCriteria) if len(dataSets) == 0: self._message = "Could not retrieve datasets for well " \ "with code " + wellCode + "." # Return return dataSets
def _getChildSamples(parentSampleType, parentSamplePermId, sampleType): """Return the samples of given type for specified parent sample.""" # The samples are of type 'sampleType' searchCriteria = SearchCriteria() searchCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.TYPE, sampleType) ) # The samples have given parent expSampleCriteria = SearchCriteria() expSampleCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.TYPE, parentSampleType) ) expSampleCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.PERM_ID, parentSamplePermId) ) searchCriteria.addSubCriteria( SearchSubCriteria.createSampleParentCriteria(expSampleCriteria) ) # Now search samples = searchService.searchForSamples(searchCriteria) # Return the samples return samples
def _retrieveAllSamplesWithTypeAndParent(self, sampleType, parentSampleId, parentSampleType): """ Retrieve all samples belonging to current experiment sample and collection having specified type. """ if _DEBUG: self._logger.info("Retrieving samples of type " + sampleType + " with parent sample with id " + parentSampleId + " and type " + parentSampleType) # The samples are of type 'sampleType' searchCriteria = SearchCriteria() searchCriteria.addMatchClause( MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, sampleType)) # The samples have given parent expSampleCriteria = SearchCriteria() expSampleCriteria.addMatchClause( MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, parentSampleType)) expSampleCriteria.addMatchClause( MatchClause.createAttributeMatch(MatchClauseAttribute.CODE, parentSampleId)) searchCriteria.addSubCriteria( SearchSubCriteria.createSampleParentCriteria(expSampleCriteria)) # Now search samples = searchService.searchForSamples(searchCriteria) # Return the samples return samples
def _retrieveAllSamplesWithType(self, sampleType): """ Retrieve all samples belonging to current experiment sample and collection having specified type. """ # The samples are of type 'sampleType' searchCriteria = SearchCriteria() searchCriteria.addMatchClause( MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, sampleType)) # The samples have parent _EXPERIMENT_SAMPLE expSampleCriteria = SearchCriteria() expSampleCriteria.addMatchClause( MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, self._expSampleType)) expSampleCriteria.addMatchClause( MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, self._expSamplePermId)) searchCriteria.addSubCriteria( SearchSubCriteria.createSampleParentCriteria(expSampleCriteria)) # Now search samples = searchService.searchForSamples(searchCriteria) # Return the samples return samples
def get_space_from_project(transaction, project): search_service = transaction.getSearchService() sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.PROJECT, project)) sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) foundSamples = search_service.searchForSamples(sc) space = foundSamples[0].getSpace() return space
def _getMicroscopySampleTypeSample(self): # Search sample of type MICROSCOPY_SAMPLE_TYPE with specified CODE sampleCriteria = SearchCriteria() sampleCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.TYPE, self._sampleType) ) sampleCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.PERM_ID, self._samplePermId) ) # Search parent sample of type MICROSCOPY_EXPERIMENT with specified permId sampleParentCriteria = SearchCriteria() sampleParentCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.TYPE, self._expSampleType)) sampleParentCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.PERM_ID, self._expSamplePermId)) # Add the parent sample subcriteria sampleCriteria.addSubCriteria( SearchSubCriteria.createSampleParentCriteria( sampleParentCriteria ) ) # Search samples = searchService.searchForSamples(sampleCriteria) if len(samples) == 0: samples = [] self._message = "Could not retrieve MICROSCOPY_SAMPLE_TYPE sample with id " + \ self._sampleId + " for parent sample MICROSCOPY_EXPERIMENT with id " + \ self._expSampleId + " from COLLECTION experiment " + self._collectionId + "." self._logger.error(self._message) return samples if _DEBUG: self._logger.info("Retrieved " + str(len(samples)) + \ " samples of type MICROSCOPY_SAMPLE_TYPE " + \ "for parent sample MICROSCOPY_EXPERIMENT " + "with ID " + self._expSamplePermId) # Return return samples[0]
def process(tr, parameters, tableBuilder): #ids = sorted(parameters.get("ids")) types = parameters.get( "types") #sample types (tiers) that are requested for the tsv project = parameters.get("project") tableBuilder.addHeader(CODE) tableBuilder.addHeader(SECONDARY_NAME) tableBuilder.addHeader(SOURCE) tableBuilder.addHeader(EXTERNAL_ID) tableBuilder.addHeader(SAMPLE_TYPE) tableBuilder.addHeader(XML) tableBuilder.addHeader(TIER) #search all samples of project search = tr.getSearchService() sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.PROJECT, project)) sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) fetchOptions = EnumSet.of(SampleFetchOption.ANCESTORS, SampleFetchOption.PROPERTIES) allSamples = search.searchForSamples(sc, fetchOptions) #filter all samples by types samples = [] for s in allSamples: if s.getSampleType() in types: samples.append(s) #sort remaining samples- samples = sorted(samples) voc = search.getVocabulary("Q_NCBI_TAXONOMY") for s in samples: code = sample.getCode() row = tableBuilder.addRow() row.setCell(CODE, code) row.setCell(SECONDARY_NAME, sample.getPropertyValue("Q_SECONDARY_NAME")) row.setCell(SOURCE, fetchSource([sample], voc.getTerms(), [])) row.setCell(EXTERNAL_ID, sample.getPropertyValue("Q_EXTERNALDB_ID")) extrType = sample.getPropertyValue("Q_PRIMARY_TISSUE") if not extrType: extrType = sample.getPropertyValue("Q_SAMPLE_TYPE") if not extrType: extrType = "" if extrType == "CELL_LINE": extrType = sample.getPropertyValue("Q_TISSUE_DETAILED") row.setCell(SAMPLE_TYPE, extrType) row.setCell(XML, sample.getPropertyValue("Q_PROPERTIES")) row.setCell(TIER, sample.getSampleType())
def listSamplesForExperiment(searchService, sampleType, expID): sc = SearchCriteria() sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.TYPE, sampleType)) ec = SearchCriteria() ec.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.CODE, expID)) sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(ec)) existingSamples = searchService.searchForSamples(sc) return existingSamples
def _getDataSetsForPlate(self, plateCode=None): """ Return a list of datasets belonging to the plate with specified ID optionally filtered by self._specimen. If none are found, return []. If no plateCode is given, it is assumed that the plate is the passed entity with code self._entityCode. """ if plateCode is None: plateCode = self._entityCode # Set search criteria to retrieve all wells contained in the plate searchCriteria = SearchCriteria() plateCriteria = SearchCriteria() plateCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.CODE, plateCode)) searchCriteria.addSubCriteria(SearchSubCriteria.createSampleContainerCriteria(plateCriteria)) wells = searchService.searchForSamples(searchCriteria) if len(wells) == 0: self._message = "Could not retrieve wells for plate with " \ "code " + plateCode + "." self._logger.error(self._message) return wells # Check that the specimen matches (if needed) if self._specimen != "": wells = [well for well in wells if \ well.getPropertyValue(self._experimentPrefix + "_SPECIMEN") == self._specimen] # Now iterate over the samples and retrieve their datasets dataSets = [] for well in wells: wellCode = well.getCode() dataSetsForWell = self._getDataSetForWell(wellCode) dataSets.extend(dataSetsForWell) if len(dataSets) == 0: self._message = "Could not retrieve datasets for wells in " \ "plate with code " + plateCode + " from experiment " \ "with code " + self._experimentCode + "." self._logger.error(self._message) # Return return dataSets
def _getSamples(self, expSampleType, expSamplePermId, sampleType): """ Return a list of datasets of requested type belonging to the MICROSCOPY_EXPERIMENT sample and a specific sample of type MICROSCOPY_SAMPLE_TYPE. If none are found, return []. """ if _DEBUG: self._logger.info("* Requested experiment sample type: " + expSampleType) self._logger.info("* Requested experiment sample permId: " + expSamplePermId) self._logger.info("* Requested sample type: " + sampleType) # Search samples of type MICROSCOPY_SAMPLE_TYPE sampleCriteria = SearchCriteria() sampleCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.TYPE, sampleType) ) # Search parent sample of type MICROSCOPY_EXPERIMENT with specified permId sampleParentCriteria = SearchCriteria() sampleParentCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.TYPE, expSampleType)) sampleParentCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.PERM_ID, expSamplePermId)) # Add the parent sample subcriteria sampleCriteria.addSubCriteria( SearchSubCriteria.createSampleParentCriteria( sampleParentCriteria ) ) # Search samples = searchService.searchForSamples(sampleCriteria) # Return return samples
def _getAllPlates(self): """ Get all plates in the experiment. Returns [] if none are found. """ # Set search criteria to retrieve all plates in the experiment searchCriteria = SearchCriteria() searchCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, self._experimentPrefix + "_PLATE")) expCriteria = SearchCriteria() expCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, self._experiment.permId)) searchCriteria.addSubCriteria(SearchSubCriteria.createExperimentCriteria(expCriteria)) plates = searchService.searchForSamples(searchCriteria) if len(plates) == 0: self._message = "Could not retrieve plates for experiment with code " + self._experimentCode + "." return plates # Return the plates return plates
def _getDataSetsForSample(sampleIdentifier, dataSetType): """Return the dataSet of given type for specified sample.""" # Set search criteria to retrieve the dataSet associated with the sample searchCriteria = SearchCriteria() searchCriteria.addMatchClause( MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, dataSetType)) sampleCriteria = SearchCriteria() sampleCriteria.addMatchClause( MatchClause.createAttributeMatch(MatchClauseAttribute.CODE, sampleIdentifier)) searchCriteria.addSubCriteria( SearchSubCriteria.createSampleCriteria(sampleCriteria)) dataSetList = searchService.searchForDataSets(searchCriteria) if len(dataSetList) != 1: [] # Return the dataSet return dataSetList
def aggregate(parameters, tableBuilder): codes = parameters.get("codes") tableBuilder.addHeader(PROJECT) tableBuilder.addHeader(DATASETS) allCodes = "" for code in codes: allCodes += code+" " sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, allCodes)) sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) found = searchService.searchForDataSets(sc) for ds in found: project = ds.getExperiment().getExperimentIdentifier().split("/")[2] try: projectMap[project] = projectMap[project]+1 except: projectMap[project] = 1 for key in projectMap: row = tableBuilder.addRow() row.setCell(PROJECT, key) row.setCell(DATASETS, projectMap[key])
def _getAccessoryDataSetsForExperiment(self): """ Return a list of datasets belonging to the experiment and optionally to the sample. If the sample ID is empty, only the experiment is used in the search criteria. If none are found, return []. """ # Set search criteria to retrieve all datasets of type for the experiment. # If the sample code is set, we also filter by it. searchCriteria = SearchCriteria() searchCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, "MICROSCOPY_ACCESSORY_FILE")) expCriteria = SearchCriteria() expCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, self._experiment.permId)) searchCriteria.addSubCriteria(SearchSubCriteria.createExperimentCriteria(expCriteria)) if self._sample is not None: self._logger.info("Filter by sample " + self._sampleId) sampleCriteria = SearchCriteria() sampleCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, self._sample.permId)) searchCriteria.addSubCriteria(SearchSubCriteria.createSampleCriteria(sampleCriteria)) accessoryDataSets = searchService.searchForDataSets(searchCriteria) # Append the accessory datasets if len(accessoryDataSets) != 0: self._message = "Found " + str(len(accessoryDataSets)) + \ " accessory datasets for experiment " \ "with id " + self._experimentId if self._sampleId != "": self._message = self._message + " and sample with id " + \ self._sampleId self._logger.info(self._message) # Return return accessoryDataSets
def _getExperimentSample(collectionPermId, expSamplePermId): """Retrieve the experiment sample.""" # Get the experiment sample sampleCriteria = SearchCriteria() sampleCriteria.addMatchClause( MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, expSamplePermId)) expCriteria = SearchCriteria() expCriteria.addMatchClause( MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, collectionPermId)) # Add the experiment subcriteria sampleCriteria.addSubCriteria( SearchSubCriteria.createExperimentCriteria(expCriteria)) # Search expSampleList = searchService.searchForSamples(sampleCriteria) if len(expSampleList) != 1: return None # Return the experiment sample return expSampleList[0]
def createNewBarcode(project, tr): search_service = tr.getSearchService() sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.PROJECT, project)) sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) foundSamples = search_service.searchForSamples(sc) space = foundSamples[0].getSpace() foundSamplesFilter = [ s for s in foundSamples if 'ENTITY' not in s.getCode() ] offset = 0 exists = True while exists: # create new barcode newBarcode = getNextFreeBarcode( project, len(foundSamplesFilter) + len(newTestSamples) + offset) # check if barcode already exists in database #pc = SearchCriteria() #pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, newBarcode)) #found = search_service.searchForSamples(pc) # try to fetch the sample, safer if it's new and not indexed yet sampleIdentifier = "/" + space + "/" + newBarcode if not tr.getSampleForUpdate(sampleIdentifier): exists = False else: offset += 1 return newBarcode
def find_and_register_vcf(transaction, jsonContent): qbicBarcodes = [] geneticIDS = [] sampleSource = [] for key in jsonContent.keys(): if key == "type" or key == "files": pass else: geneticIDS.append(jsonContent[key]["id_genetics"]) qbicBarcodes.append(jsonContent[key]["id_qbic"]) sampleSource.append(jsonContent[key]["tumor"]) expType = jsonContent["type"] project = qbicBarcodes[0][:5] search_service = transaction.getSearchService() sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.PROJECT, project)) sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) foundSamples = search_service.searchForSamples(sc) space = foundSamples[0].getSpace() datasetSample = None sampleFound = False parentIdentifiers = [] testParentIdentifiers = [] global numberOfExperiments for barcode, geneticID in zip(qbicBarcodes, geneticIDS): if geneticID in newNGSSamples: parentIdentifiers.append(newNGSSamples[geneticID]) testParentIdentifiers.append(oldTestSamples[geneticID]) else: for samp in foundSamples: qbicBarcodeID = '/' + space + '/' + barcode if qbicBarcodeID in samp.getParentSampleIdentifiers(): testParentID = samp.getSampleIdentifier() for s in foundSamples: sampleType = s.getSampleType() secName = s.getPropertyValue("Q_SECONDARY_NAME") extDB = s.getPropertyValue("Q_EXTERNALDB_ID") if (testParentID in s.getParentSampleIdentifiers( )) and (sampleType == "Q_NGS_SINGLE_SAMPLE_RUN") and ( ((secName != None) and (secName in geneticID)) or ((extDB != None) and (extDB in geneticID))): sampleIdent = s.getSampleIdentifier() parentIdentifiers.append(sampleIdent) testParentIdentifiers.append(testParentID) #numberOfExperiments = len(search_service.listExperiments("/" + space + "/" + project)) + 1 #TEST numberOfExperiments += 1 newVCExp = transaction.createNewExperiment( '/' + space + '/' + project + '/' + project + 'E' + str(numberOfExperiments), "Q_NGS_VARIANT_CALLING") identString = '' for genID in geneticIDS: identString += genID.split('_')[-1] identString2 = '' for tpi in testParentIdentifiers: identString2 += tpi.split('/')[-1] #newVCSample = transaction.createNewSample('/' + space + '/' + 'VC'+ project + qbicBarcodes[0][5:] + qbicBarcodes[1][5:] + identString, "Q_NGS_VARIANT_CALLING") newVCSample = transaction.createNewSample( '/' + space + '/' + 'VC' + identString2 + identString, "Q_NGS_VARIANT_CALLING") newVCSample.setParentSampleIdentifiers(parentIdentifiers) newVCSample.setExperiment(newVCExp) additionalInfo = "" secName = "" for i, parentBarcode in enumerate(qbicBarcodes): additionalInfo += '%s %s Tumor: %s \n' % ( qbicBarcodes[i], geneticIDS[i], sampleSource[i]) secName += '%s ' % (geneticIDS[i]) secName = secName.strip() #additionalInfo = '%s %s Tumor: %s \n %s %s Tumor: %s' % (qbicBarcodes[0], geneticIDS[0], sampleSource[0], qbicBarcodes[1], geneticIDS[1], sampleSource[1]) newVCSample.setPropertyValue('Q_ADDITIONAL_INFO', additionalInfo) #secName = '%s-%s' % (geneticIDS[0], geneticIDS[1]) newVCSample.setPropertyValue('Q_SECONDARY_NAME', secName) datasetSample = newVCSample return datasetSample
def find_and_register_ngs(transaction, jsonContent): qcValues = jsonContent["sample1"]["qc"] genome = jsonContent["sample1"]["genome"] idGenetics = jsonContent["sample1"]["id_genetics"] qbicBarcode = jsonContent["sample1"]["id_qbic"] system = jsonContent["sample1"]["processing_system"] tumor = jsonContent["sample1"]["tumor"] expType = jsonContent["type"] project = qbicBarcode[:5] search_service = transaction.getSearchService() sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.PROJECT, project)) sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) foundSamples = search_service.searchForSamples(sc) datasetSample = None sampleFound = False sampleIdent = None space = foundSamples[0].getSpace() testSampleCode = None knownCodes = [] for samp in foundSamples: qbicBarcodeID = '/' + samp.getSpace() + '/' + qbicBarcode knownCodes.append(samp.getCode()) print "code: " + samp.getCode() if qbicBarcodeID in samp.getParentSampleIdentifiers( ) or qbicBarcode == samp.getCode(): sampleType = samp.getSampleType() if sampleType == "Q_TEST_SAMPLE": print "searching: " + idGenetics.split('_')[0] print samp.getPropertyValue("Q_EXTERNALDB_ID") if (samp.getPropertyValue("Q_SAMPLE_TYPE") == typesDict[expType]) and ( (samp.getPropertyValue("Q_SECONDARY_NAME") == idGenetics.split('_')[0]) or (samp.getPropertyValue("Q_EXTERNALDB_ID") == idGenetics.split('_')[0])): sampleIdent = samp.getSampleIdentifier() testSampleCode = samp.getCode() oldTestSamples[idGenetics] = sampleIdent if not sampleIdent: if not idGenetics in newTestSamples: for samp in foundSamples: if qbicBarcode == samp.getCode(): testSampleCode = createNewBarcode(project, transaction) sampleIdent = '/' + space + '/' + testSampleCode testSample = transaction.createNewSample( sampleIdent, "Q_TEST_SAMPLE") testSample.setParentSampleIdentifiers( [samp.getSampleIdentifier()]) testSample.setPropertyValue('Q_SECONDARY_NAME', idGenetics.split('_')[0]) testSample.setPropertyValue('Q_SAMPLE_TYPE', typesDict[expType]) global numberOfExperiments numberOfExperiments += 1 newTestSampleExperiment = transaction.createNewExperiment( '/' + space + '/' + project + '/' + project + 'E' + str(numberOfExperiments), "Q_SAMPLE_PREPARATION") testSample.setExperiment(newTestSampleExperiment) newTestSamples[idGenetics] = sampleIdent for s in foundSamples: # There is already a registered NGS run if (s.getSampleType() == "Q_NGS_SINGLE_SAMPLE_RUN") and ( sampleIdent in s.getParentSampleIdentifiers() and (s.getPropertyValue("Q_SECONDARY_NAME") in idGenetics)): sa = transaction.getSampleForUpdate(s.getSampleIdentifier()) sa.setPropertyValue("Q_SECONDARY_NAME", idGenetics) datasetSample = sa sampleFound = True if not sampleFound: # register new experiment and sample numberOfExperiments += 1 newNGSMeasurementExp = transaction.createNewExperiment( '/' + space + '/' + project + '/' + project + 'E' + str(numberOfExperiments), "Q_NGS_MEASUREMENT") newNGSMeasurementExp.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED') newNGSMeasurementExp.setPropertyValue('Q_SEQUENCING_MODE', 'PAIRED_END') newNGSMeasurementExp.setPropertyValue('Q_SEQUENCER_DEVICE', 'IMGAG_ILLUMINA_HISEQ_2500') newNGSMeasurementExp.setPropertyValue('Q_ADDITIONAL_INFO', system) newNGSMeasurementExp.setPropertyValue('Q_SEQUENCING_TYPE', typesDict[expType]) newNGSID = '/' + space + '/' + 'NGS' + idGenetics.split( '_')[-1] + testSampleCode newNGSrunSample = transaction.createNewSample( newNGSID, "Q_NGS_SINGLE_SAMPLE_RUN") newNGSrunSample.setParentSampleIdentifiers([sampleIdent]) newNGSrunSample.setExperiment(newNGSMeasurementExp) newNGSSamples[idGenetics] = newNGSID additionalInfo = '%s: %s\n' % ("Genome", genome) for qc in qcValues: line = str(qc) additionalInfo += '%s\n' % line.replace('{', '').replace('}', '') newNGSrunSample.setPropertyValue('Q_ADDITIONAL_INFO', additionalInfo) newNGSrunSample.setPropertyValue('Q_SECONDARY_NAME', idGenetics) datasetSample = newNGSrunSample return datasetSample
def process(transaction): context = transaction.getRegistrationContext().getPersistentMap() # Get the incoming path of the transaction incomingPath = transaction.getIncoming().getAbsolutePath() key = context.get("RETRY_COUNT") if (key == None): key = 1 # Get the name of the incoming folder name = transaction.getIncoming().getName() identifier = pattern.findall(name)[0] if isExpected(identifier): pass #experiment = identifier[1:5] #parentCode = identifier[:10] else: print "The identifier " + identifier + " did not match the pattern Q[A-Z]{4}\d{3}\w{2} or checksum" project = identifier[:5] search_service = transaction.getSearchService() sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.PROJECT, project)) sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) foundSamples = search_service.searchForSamples(sc) space = foundSamples[0].getSpace() global numberOfExperiments numberOfExperiments = len( search_service.listExperiments("/" + space + "/" + project)) src = os.path.realpath(os.path.join(incomingPath, 'source_dropbox.txt')) numberOfExperiments = len( search_service.listExperiments("/" + space + "/" + project)) src = os.path.realpath(os.path.join(incomingPath, 'source_dropbox.txt')) if os.path.isfile(src): os.remove(src) print "start registration" #dataSet = None for f in os.listdir(os.path.join(incomingPath, name)): if f.endswith('metadata'): jsonContent = parse_metadata_file( os.path.realpath( os.path.join(os.path.join(incomingPath, name), f))) rawFiles = jsonContent["files"] vcfs = [] fastqs = [] gsvars = [] print "metadata read" for rawFile in rawFiles: print rawFile if rawFile.endswith("vcf") or rawFile.endswith("vcf.gz"): vcfs.append(rawFile) if rawFile.endswith("fastq") or rawFile.endswith("fastq.gz"): fastqs.append(rawFile) if rawFile.endswith("GSvar") or rawFile.endswith("GSvar.gz"): gsvars.append(rawFile) #if rawFiles[0].endswith("vcf") or rawFiles[0].endswith("vcf.gz"): # datasetSample = find_and_register_vcf(transaction, jsonContent) # # dataSet = transaction.createNewDataSet("Q_NGS_VARIANT_CALLING_DATA") # dataSet.setSample(datasetSample) #elif rawFiles[0].endswith("fastq") or rawFiles[0].endswith("fastq.gz"): # datasetSample = find_and_register_ngs(transaction, jsonContent) # dataSet = transaction.createNewDataSet("Q_NGS_RAW_DATA") # dataSet.setSample(datasetSample) #os.remove(os.path.realpath(os.path.join(os.path.join(incomingPath,name),f))) else: pass folder = os.path.join(incomingPath, name) if len(fastqs) > 0: fastqSample = find_and_register_ngs(transaction, jsonContent) fastqDataSet = transaction.createNewDataSet("Q_NGS_RAW_DATA") fastqDataSet.setSample(fastqSample) fastqFolder = os.path.join(folder, "raw") os.mkdir(fastqFolder) for f in fastqs: os.rename(os.path.join(folder, f), os.path.join(fastqFolder, f)) transaction.moveFile(fastqFolder, fastqDataSet) for vc in vcfs: vcfSample = find_and_register_vcf(transaction, jsonContent) vcfDataSet = transaction.createNewDataSet("Q_NGS_VARIANT_CALLING_DATA") vcfDataSet.setSample(vcfSample) vcfFolder = os.path.join(folder, "vcf") os.mkdir(vcfFolder) for f in vcfs: os.rename(os.path.join(folder, f), os.path.join(vcfFolder, f)) for g in gsvars: if (f.split('.')[0].replace('_vc_strelka', '') == g.split('.')[0]): os.rename(os.path.join(folder, g), os.path.join(vcfFolder, g)) transaction.moveFile(vcfFolder, vcfDataSet)
def process(transaction): context = transaction.getRegistrationContext().getPersistentMap() # Get the incoming path of the transaction incomingPath = transaction.getIncoming().getAbsolutePath() key = context.get("RETRY_COUNT") if (key == None): key = 1 # Get the name of the incoming file name = transaction.getIncoming().getName() identifier = pattern.findall(name)[0] if isExpected(identifier): experiment = identifier[1:5] project = identifier[:5] parentCode = identifier[:10] else: print "The identifier " + identifier + " did not match the pattern Q[A-Z]{4}\d{3}\w{2} or checksum" # create new dataset dataSet = transaction.createNewDataSet("Q_FASTA_DATA") dataSet.setMeasuredData(False) search_service = transaction.getSearchService() vcf = re.compile("VCQ\w{4}[0-9]{3}[A-Z]\w[A-Z]*") vcfCodes = vcf.findall(name) if len(vcfCodes) > 0: sc = SearchCriteria() sc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.CODE, vcfCodes[0])) foundSamples = search_service.searchForSamples(sc) vcSample = transaction.getSampleForUpdate( foundSamples[0].getSampleIdentifier()) else: # vcf sample needs to be created sc = SearchCriteria() sc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.CODE, parentCode)) foundSamples = search_service.searchForSamples(sc) parentSampleIdentifier = foundSamples[0].getSampleIdentifier() space = foundSamples[0].getSpace() sa = transaction.getSampleForUpdate(parentSampleIdentifier) # register new experiment and sample existingExperimentIDs = [] existingExperiments = search_service.listExperiments("/" + space + "/" + project) numberOfExperiments = len(existingExperiments) + 1 for eexp in existingExperiments: existingExperimentIDs.append(eexp.getExperimentIdentifier()) newExpID = '/' + space + '/' + project + '/' + project + 'E' + str( numberOfExperiments) while newExpID in existingExperimentIDs: numberOfExperiments += 1 newExpID = '/' + space + '/' + project + '/' + project + 'E' + str( numberOfExperiments) newVariantCallingExperiment = transaction.createNewExperiment( newExpID, "Q_FASTA_INFO") newVariantCallingExperiment.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED') search_service = transaction.getSearchService() sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.PROJECT, project)) sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) foundSamples2 = search_service.searchForSamples(sc) vcNumber = 1 newSampleID = '/' + space + '/' + 'FASTA' + str(vcNumber) + parentCode existingSampleIDs = [] for samp in foundSamples2: existingSampleIDs.append(samp.getSampleIdentifier()) # search in known ids, but also try to fetch the sample in case it wasn't indexed yet while newSampleID in existingSampleIDs or transaction.getSampleForUpdate( newSampleID): vcNumber += 1 newSampleID = '/' + space + '/' + 'FASTA' + str( vcNumber) + parentCode vcSample = transaction.createNewSample(newSampleID, "Q_FASTA") vcSample.setParentSampleIdentifiers([sa.getSampleIdentifier()]) vcSample.setExperiment(newVariantCallingExperiment) resultsname = name.replace('.fasta', '').replace('.fsa', '') new_folder = os.path.realpath(os.path.join(incomingPath, resultsname)) os.mkdir(new_folder) for f in os.listdir(incomingPath): if f.endswith('origlabfilename'): origName = open(os.path.join(incomingPath, f), 'r') secondaryName = origName.readline().strip().split('_')[0] origName.close() sa.setPropertyValue('Q_SECONDARY_NAME', secondaryName) os.remove(os.path.realpath(os.path.join(incomingPath, f))) elif f.endswith('sha256sum') or f.endswith('fasta') or f.endswith( 'fsa'): os.rename(os.path.realpath(os.path.join(incomingPath, f)), os.path.join(new_folder, f)) elif not os.path.isdir(os.path.join(incomingPath, f)): os.remove(os.path.realpath(os.path.join(incomingPath, f))) dataSet.setSample(vcSample) transaction.moveFile(new_folder, dataSet)
def process(transaction): context = transaction.getRegistrationContext().getPersistentMap() # Get the incoming path of the transaction incomingPath = transaction.getIncoming().getAbsolutePath() key = context.get("RETRY_COUNT") if (key == None): key = 1 # Get the name of the incoming file name = transaction.getIncoming().getName() identifier = pattern.findall(name)[0] if isExpected(identifier): experiment = identifier[1:5] project = identifier[:5] parentCode = identifier[:10] else: print "The identifier " + identifier + " did not match the pattern Q[A-Z]{4}\d{3}\w{2} or checksum" search_service = transaction.getSearchService() sc = SearchCriteria() sc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.CODE, parentCode)) foundSamples = search_service.searchForSamples(sc) if len(foundSamples) > 0: parentSampleIdentifier = foundSamples[0].getSampleIdentifier() space = foundSamples[0].getSpace() else: search_service = transaction.getSearchService() sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.PROJECT, project)) sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) foundSamples = search_service.searchForSamples(sc) if len(foundSamples) > 0: space = foundSamples[0].getSpace() parentSampleIdentifier = "/" + space + "/" + parentCode else: # no sample found in this project, they are probably not indexed yet. try parsing space from file name instead space = name.split("_")[0] parentSampleIdentifier = "/" + space + "/" + parentCode sa = transaction.getSampleForUpdate(parentSampleIdentifier) # register new experiment and sample existingExperimentIDs = [] existingExperiments = search_service.listExperiments("/" + space + "/" + project) numberOfExperiments = len( search_service.listExperiments("/" + space + "/" + project)) + 1 for eexp in existingExperiments: existingExperimentIDs.append(eexp.getExperimentIdentifier()) newExpID = '/' + space + '/' + project + '/' + project + 'E' + str( numberOfExperiments) while newExpID in existingExperimentIDs: numberOfExperiments += 1 newExpID = '/' + space + '/' + project + '/' + project + 'E' + str( numberOfExperiments) newHLATypingExperiment = transaction.createNewExperiment( newExpID, "Q_NGS_HLATYPING") newHLATypingExperiment.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED') if os.path.isdir(incomingPath): for root, subFolders, files in os.walk(incomingPath): if subFolders: subFolder = subFolders[0] for f in files: if f.endswith('.alleles'): resultPath = os.path.join(root, f) resultFile = open(resultPath, 'r') else: resultPath = incomingPath resultFile = open(resultPath, 'r') resultContent = resultFile.read() mhcClass = "MHC_CLASS_II" mhcSuffix = "2" # check for MHC class if 'A*' in resultContent: mhcClass = "MHC_CLASS_I" mhcSuffix = "1" # does HLA sample of this class already exist? hlaCode = 'HLA' + mhcSuffix + parentCode sc = SearchCriteria() sc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.CODE, hlaCode)) foundSamples = search_service.searchForSamples(sc) if len(foundSamples) < 1: newHLATypingSample = transaction.createNewSample( '/' + space + '/' + hlaCode, "Q_NGS_HLATYPING") newHLATypingSample.setParentSampleIdentifiers( [sa.getSampleIdentifier()]) newHLATypingSample.setExperiment(newHLATypingExperiment) newHLATypingSample.setPropertyValue("Q_HLA_CLASS", mhcClass) else: newHLATypingSample = transaction.getSampleForUpdate( foundSamples[0].getSampleIdentifier()) newHLATypingSample.setPropertyValue("Q_HLA_TYPING", resultContent) # create new dataset dataSet = transaction.createNewDataSet("Q_NGS_HLATYPING_DATA") dataSet.setMeasuredData(False) dataSet.setSample(newHLATypingSample) transaction.moveFile(resultPath, dataSet)
def find_and_register_vcf(transaction, jsonContent): qbicBarcodes = [] geneticIDS = [] sampleSource = [] for key in jsonContent.keys(): if key == "type" or key == "files": pass else: geneticIDS.append(jsonContent[key]["id_genetics"]) qbicBarcodes.append(jsonContent[key]["id_qbic"]) sampleSource.append(jsonContent[key]["tumor"]) expType = jsonContent["type"] project = qbicBarcodes[0][:5] search_service = transaction.getSearchService() sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, project)); sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) foundSamples = search_service.searchForSamples(sc) space = foundSamples[0].getSpace() datasetSample = None sampleFound = False parentIdentifiers = [] testParentIdentifiers = [] global numberOfExperiments for barcode, geneticID in zip(qbicBarcodes, geneticIDS): if geneticID in newNGSSamples: parentIdentifiers.append(newNGSSamples[geneticID]) testParentIdentifiers.append(oldTestSamples[geneticID]) else: for samp in foundSamples: qbicBarcodeID = '/' + space + '/' + barcode if qbicBarcodeID in samp.getParentSampleIdentifiers(): testParentID = samp.getSampleIdentifier() for s in foundSamples: sampleType = s.getSampleType() secName = s.getPropertyValue("Q_SECONDARY_NAME") extDB = s.getPropertyValue("Q_EXTERNALDB_ID") if (testParentID in s.getParentSampleIdentifiers()) and (sampleType == "Q_NGS_SINGLE_SAMPLE_RUN") and (((secName != None) and (secName in geneticID)) or ((extDB != None) and (extDB in geneticID))): sampleIdent = s.getSampleIdentifier() parentIdentifiers.append(sampleIdent) testParentIdentifiers.append(testParentID) #numberOfExperiments = len(search_service.listExperiments("/" + space + "/" + project)) + 1 #TEST numberOfExperiments += 1 newVCExp = transaction.createNewExperiment('/' + space + '/' + project + '/' + project + 'E' + str(numberOfExperiments), "Q_NGS_VARIANT_CALLING") identString = '' for genID in geneticIDS: identString += genID.split('_')[-1] identString2 = '' for tpi in testParentIdentifiers: identString2 += tpi.split('/')[-1] #newVCSample = transaction.createNewSample('/' + space + '/' + 'VC'+ project + qbicBarcodes[0][5:] + qbicBarcodes[1][5:] + identString, "Q_NGS_VARIANT_CALLING") newVCSample = transaction.createNewSample('/' + space + '/' + 'VC'+ identString2 + identString, "Q_NGS_VARIANT_CALLING") newVCSample.setParentSampleIdentifiers(parentIdentifiers) newVCSample.setExperiment(newVCExp) additionalInfo = "" secName = "" for i, parentBarcode in enumerate(qbicBarcodes): additionalInfo += '%s %s Tumor: %s \n' % (qbicBarcodes[i], geneticIDS[i], sampleSource[i]) secName += '%s ' % (geneticIDS[i]) secName = secName.strip() #additionalInfo = '%s %s Tumor: %s \n %s %s Tumor: %s' % (qbicBarcodes[0], geneticIDS[0], sampleSource[0], qbicBarcodes[1], geneticIDS[1], sampleSource[1]) newVCSample.setPropertyValue('Q_ADDITIONAL_INFO', additionalInfo) #secName = '%s-%s' % (geneticIDS[0], geneticIDS[1]) newVCSample.setPropertyValue('Q_SECONDARY_NAME', secName) datasetSample = newVCSample return datasetSample
def handle_BSA_Run(transaction): # Get the name of the incoming file name = transaction.getIncoming().getName() incomingPath = transaction.getIncoming().getAbsolutePath() stem, ext = os.path.splitext(name) # Convert the raw file and write it to an mzml tmp folder. # Sadly, I can not see a way to make this part of the transaction. tmpdir = tempfile.mkdtemp(dir=MZML_TMP) try: convert = partial(convert_raw, remote_base=REMOTE_BASE, host=MSCONVERT_HOST, timeout=CONVERSION_TIMEOUT, user=MSCONVERT_USER) if ext.lower() in VENDOR_FORMAT_EXTENSIONS: openbis_format_code = VENDOR_FORMAT_EXTENSIONS[ext.lower()] else: raise ValueError("Invalid incoming file %s" % incomingPath) mzml_path = os.path.join(tmpdir, stem + '.mzML') raw_path = os.path.join(incomingPath, name) convert(raw_path, mzml_path) mzml_name = os.path.basename(mzml_path) mzml_dest = os.path.join(DROPBOX_PATH, mzml_name) os.rename(mzml_path, mzml_dest) finally: shutil.rmtree(tmpdir) # The MS experiment msExp = transaction.getExperiment(BSA_MPC_EXPERIMENT_ID) #TODO create new ms sample? if so, use normal qbic barcodes? msCode = "MS"+BSA_MPC_BARCODE search_service = transaction.getSearchService() sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, BSA_MPC_PROJECT)); sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) foundSamples = search_service.searchForSamples(sc) run = 1 for samp in foundSamples: if samp.getSampleType() == "Q_MS_RUN": existingRun = int(samp.getCode().split("_")[-1]) if existingRun >= run: run = existingRun + 1 msSample = transaction.createNewSample('/' + BSA_MPC_SPACE + '/' + msCode + "_" + str(run), "Q_MS_RUN") #set parent sample, always the same for bsa run msSample.setParentSampleIdentifiers([BSA_MPC_SAMPLE_ID]) msSample.setExperiment(msExp) createRawDataSet(transaction, raw_path, msSample, openbis_format_code) GZipAndMoveMZMLDataSet(transaction, mzml_dest, msSample) for f in os.listdir(incomingPath): if ".testorig" in f: os.remove(os.path.realpath(os.path.join(incomingPath, f)))
def process(transaction): context = transaction.getRegistrationContext().getPersistentMap() # Get the incoming path of the transaction incomingPath = transaction.getIncoming().getAbsolutePath() key = context.get("RETRY_COUNT") if (key == None): key = 1 # Get the name of the incoming file name = transaction.getIncoming().getName() identifier = pattern.findall(name)[0] if isExpected(identifier): experiment = identifier[1:5] project = identifier[:5] parentCode = identifier[:10] else: print "The identifier "+identifier+" did not match the pattern Q[A-Z]{4}\d{3}\w{2} or checksum" # create new dataset dataSet = transaction.createNewDataSet("Q_NGS_VARIANT_CALLING_DATA") dataSet.setMeasuredData(False) search_service = transaction.getSearchService() vcf = re.compile("VCQ\w{4}[0-9]{3}[A-Z]\w[A-Z]*") vcfCodes = vcf.findall(name) if len(vcfCodes) > 0: sc = SearchCriteria() sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, vcfCodes[0])) foundSamples = search_service.searchForSamples(sc) vcSample = transaction.getSampleForUpdate(foundSamples[0].getSampleIdentifier()) else: # vcf sample needs to be created sc = SearchCriteria() sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, parentCode)) foundSamples = search_service.searchForSamples(sc) parentSampleIdentifier = foundSamples[0].getSampleIdentifier() space = foundSamples[0].getSpace() sa = transaction.getSampleForUpdate(parentSampleIdentifier) # register new experiment and sample #numberOfExperiments = len(search_service.listExperiments("/" + space + "/" + project)) + 1 #newVariantCallingExperiment = transaction.createNewExperiment('/' + space + '/' + project + '/' + project + 'E' + str(numberOfExperiments), "Q_NGS_VARIANT_CALLING") #newVariantCallingExperiment.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED') # register new experiment and sample existingExperimentIDs = [] existingExperiments = search_service.listExperiments("/" + space + "/" + project) numberOfExperiments = len(existingExperiments) + 1 for eexp in existingExperiments: existingExperimentIDs.append(eexp.getExperimentIdentifier()) newExpID = '/' + space + '/' + project + '/' + project + 'E' +str(numberOfExperiments) while newExpID in existingExperimentIDs: numberOfExperiments += 1 newExpID = '/' + space + '/' + project + '/' + project + 'E' +str(numberOfExperiments) newVariantCallingExperiment = transaction.createNewExperiment(newExpID, "Q_NGS_VARIANT_CALLING") newVariantCallingExperiment.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED') search_service = transaction.getSearchService() sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, project)) sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) foundSamples2 = search_service.searchForSamples(sc) vcNumber = 1 newSampleID = '/' + space + '/' + 'VC' + str(vcNumber) + parentCode existingSampleIDs = [] for samp in foundSamples2: existingSampleIDs.append(samp.getSampleIdentifier()) while newSampleID in existingSampleIDs: vcNumber += 1 newSampleID = '/' + space + '/' + 'VC' + str(vcNumber) + parentCode vcSample = transaction.createNewSample(newSampleID, "Q_NGS_VARIANT_CALLING") vcSample.setParentSampleIdentifiers([sa.getSampleIdentifier()]) vcSample.setExperiment(newVariantCallingExperiment) cegat = False sourceLabFile = open(os.path.join(incomingPath,'source_dropbox.txt'), 'r') sourceLab = sourceLabFile.readline().strip() sourceLabFile.close() if sourceLab == 'dmcegat': cegat = True os.remove(os.path.realpath(os.path.join(incomingPath,'source_dropbox.txt'))) for f in os.listdir(incomingPath): if f.endswith('origlabfilename') and cegat: origName = open(os.path.join(incomingPath,f), 'r') secondaryName = origName.readline().strip().split('_')[0] origName.close() #entitySample = transaction.getSampleForUpdate('/%s/%s' % (space,parentCode)) sa.setPropertyValue('Q_SECONDARY_NAME', secondaryName) os.remove(os.path.realpath(os.path.join(incomingPath,f))) elif f.endswith('sha256sum') or f.endswith('vcf'): pass #transaction.moveFile(os.path.join(incomingPath,f), dataSet) else: os.remove(os.path.realpath(os.path.join(incomingPath,f))) dataSet.setSample(vcSample) transaction.moveFile(incomingPath, dataSet)
def process(transaction): context = transaction.getRegistrationContext().getPersistentMap() # Get the incoming path of the transaction incomingPath = transaction.getIncoming().getAbsolutePath() key = context.get("RETRY_COUNT") if (key == None): key = 1 # Get the name of the incoming file name = transaction.getIncoming().getName() identifier = pattern.findall(name)[0] if isExpected(identifier): experiment = identifier[1:5] project = identifier[:5] parentCode = identifier[:10] else: print "The identifier "+identifier+" did not match the pattern Q[A-Z]{4}\d{3}\w{2} or checksum" search_service = transaction.getSearchService() sc = SearchCriteria() sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, parentCode)) foundSamples = search_service.searchForSamples(sc) parentSampleIdentifier = foundSamples[0].getSampleIdentifier() space = foundSamples[0].getSpace() sa = transaction.getSampleForUpdate(parentSampleIdentifier) # find or register new experiment expType = "Q_NGS_MAPPING" experiments = search_service.listExperiments("/" + space + "/" + project) experimentIDs = [] for exp in experiments: experimentIDs.append(exp.getExperimentIdentifier()) # no existing experiment for samples of this sample preparation found expID = experimentIDs[0] i = 0 while expID in experimentIDs: i += 1 expNum = len(experiments) + i expID = '/' + space + '/' + project + '/' + project + 'E' + str(expNum) #newMappingSample = transaction.createNewSample('/' + space + '/' + 'MP'+ parentCode, "Q_NGS_MAPPING") #newMappingSample.setParentSampleIdentifiers([sa.getSampleIdentifier()]) #newMappingSample.setExperiment(mapExperiment) sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, project)) sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) allSamples = search_service.searchForSamples(sc) #existingSampleIDs = [] ngsParents = [] for samp in allSamples: #existingSampleIDs.append(samp.getSampleIdentifier()) if samp.getSampleType()=="Q_NGS_SINGLE_SAMPLE_RUN": if sa.getSampleIdentifier() in samp.getParentSampleIdentifiers(): ngsParents.append(samp.getSampleIdentifier()) #replNumber = 1 #if len(ngsParents > 1): mapSampleID = '/' + space + '/' + 'MP' + parentCode sc = SearchCriteria() sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, mapSampleID)) foundMapSample = search_service.searchForSamples(sc) #while newSampleID in existingSampleIDs: # vcNumber += 1 # newSampleID = '/' + space + '/' + 'MP' + str(vcNumber) + parentCode if len(foundMapSample) == 0: mapExperiment = transaction.createNewExperiment(expID, expType) mapExperiment.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED') mappingSample = transaction.createNewSample(mapSampleID, "Q_NGS_MAPPING") mappingSample.setParentSampleIdentifiers(ngsParents) mappingSample.setExperiment(mapExperiment) else: mappingSample = transaction.getSampleForUpdate(foundMapSample[0].getSampleIdentifier()) # create new dataset dataSet = transaction.createNewDataSet("Q_NGS_MAPPING_DATA") dataSet.setMeasuredData(False) dataSet.setSample(mappingSample) transaction.moveFile(incomingPath, dataSet)
def process(transaction): context = transaction.getRegistrationContext().getPersistentMap() # Get the incoming path of the transaction incomingPath = transaction.getIncoming().getAbsolutePath() key = context.get("RETRY_COUNT") if (key == None): key = 1 # Get the name of the incoming file name = transaction.getIncoming().getName() identifier = pattern.findall(name)[0] if isExpected(identifier): experiment = identifier[1:5] project = identifier[:5] parentCode = identifier[:10] else: print "The identifier " + identifier + " did not match the pattern Q[A-Z]{4}\d{3}\w{2} or checksum" search_service = transaction.getSearchService() sc = SearchCriteria() sc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.CODE, parentCode)) foundSamples = search_service.searchForSamples(sc) parentSampleIdentifier = foundSamples[0].getSampleIdentifier() space = foundSamples[0].getSpace() sa = transaction.getSampleForUpdate(parentSampleIdentifier) # find or register new experiment expType = "Q_NGS_MAPPING" experiments = search_service.listExperiments("/" + space + "/" + project) experimentIDs = [] for exp in experiments: experimentIDs.append(exp.getExperimentIdentifier()) # no existing experiment for samples of this sample preparation found expID = experimentIDs[0] i = 0 while expID in experimentIDs: i += 1 expNum = len(experiments) + i expID = '/' + space + '/' + project + '/' + project + 'E' + str(expNum) #newMappingSample = transaction.createNewSample('/' + space + '/' + 'MP'+ parentCode, "Q_NGS_MAPPING") #newMappingSample.setParentSampleIdentifiers([sa.getSampleIdentifier()]) #newMappingSample.setExperiment(mapExperiment) sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.PROJECT, project)) sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) allSamples = search_service.searchForSamples(sc) #existingSampleIDs = [] ngsParents = [] for samp in allSamples: #existingSampleIDs.append(samp.getSampleIdentifier()) if samp.getSampleType() == "Q_NGS_SINGLE_SAMPLE_RUN": if sa.getSampleIdentifier() in samp.getParentSampleIdentifiers(): ngsParents.append(samp.getSampleIdentifier()) #replNumber = 1 #if len(ngsParents > 1): mapSampleID = '/' + space + '/' + 'MP' + parentCode sc = SearchCriteria() sc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.CODE, mapSampleID)) foundMapSample = search_service.searchForSamples(sc) #while newSampleID in existingSampleIDs: # vcNumber += 1 # newSampleID = '/' + space + '/' + 'MP' + str(vcNumber) + parentCode if len(foundMapSample) == 0: mapExperiment = transaction.createNewExperiment(expID, expType) mapExperiment.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED') mappingSample = transaction.createNewSample(mapSampleID, "Q_NGS_MAPPING") mappingSample.setParentSampleIdentifiers(ngsParents) mappingSample.setExperiment(mapExperiment) else: mappingSample = transaction.getSampleForUpdate( foundMapSample[0].getSampleIdentifier()) # create new dataset dataSet = transaction.createNewDataSet("Q_NGS_MAPPING_DATA") dataSet.setMeasuredData(False) dataSet.setSample(mappingSample) transaction.moveFile(incomingPath, dataSet)
def find_and_register_vcf(transaction, jsonContent, varcode):#varcode example: GS130715_03-GS130717_03 (verified in startup.log) qbicBarcodes = [] geneticIDS = [] sampleSource = [] varcodekey = '' for key in jsonContent.keys(): if key == "type" or key == "files": pass else:#keys: "sample1" and "sample2" geneticIDS.append(jsonContent[key]["id_genetics"])#GS130715_03 and GS130717_03 qbicBarcodes.append(jsonContent[key]["id_qbic"]) sampleSource.append(jsonContent[key]["tumor"]) if jsonContent[key]["id_genetics"] == varcode: varcodekey = key expType = jsonContent["type"] project = qbicBarcodes[0][:5] search_service = transaction.getSearchService() sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, project)); sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) foundSamples = search_service.searchForSamples(sc) space = foundSamples[0].getSpace() datasetSample = None sampleFound = False parentIdentifiers = [] testParentIdentifiers = [] global numberOfExperiments additionalInfo = '' secName = '' if len(geneticIDS) >= 2: somaticIdent = '%s-%s' % (geneticIDS[0], geneticIDS[1]) # if there is more than one sample we have to concatenate the identifiers secName = somaticIdent if somaticIdent == varcode: for i, parentBarcode in enumerate(qbicBarcodes): additionalInfo += '%s %s Tumor: %s \n' % (qbicBarcodes[i], geneticIDS[i], sampleSource[i]) for barcode, geneticID in zip(qbicBarcodes, geneticIDS): genShortID = geneticID.split('_')[0] if geneticID in newNGSSamples: parentIdentifiers.append(newNGSSamples[geneticID]) testParentIdentifiers.append(oldTestSamples[geneticID]) else: for samp in foundSamples: #some short variables to clean up the long if case code = samp.getCode() sType = samp.getSampleType() qbicBarcodeID = '/' + space + '/' + barcode # qbic identifier from the metadata that came in (probably tissue sample) parentIDs = samp.getParentSampleIdentifiers() analyte = samp.getPropertyValue("Q_SAMPLE_TYPE") curSecName = samp.getPropertyValue("Q_SECONDARY_NAME") extID = samp.getPropertyValue("Q_EXTERNALDB_ID") # we are looking for either the test sample with this barcode OR a test sample with parent with this barcode, the right analyte (e.g. DNA) and the short genetics ID in secondary name or external ID print code print qbicBarcodeID in parentIDs print analyte == typesDict[expType] print (curSecName != None) and (genShortID in curSecName) print (extID != None) and (genShortID in extID) if ((barcode == code) and (sType == "Q_TEST_SAMPLE")) or ((qbicBarcodeID in parentIDs) and (analyte == typesDict[expType]) and (((curSecName != None) and (genShortID in curSecName)) or ((extID != None) and (genShortID in extID)))): testParentID = samp.getSampleIdentifier() # this time we are looking for the NGS Single Sample run attached to the test sample we just found for s in foundSamples: sampleType = s.getSampleType() curSecName = s.getPropertyValue("Q_SECONDARY_NAME") extDB = s.getPropertyValue("Q_EXTERNALDB_ID") if (testParentID in s.getParentSampleIdentifiers()) and (sampleType == "Q_NGS_SINGLE_SAMPLE_RUN") and (((curSecName != None) and (geneticID in curSecName)) or ((extDB != None) and (geneticID in extDB))): sampleIdent = s.getSampleIdentifier() parentIdentifiers.append(sampleIdent) testParentIdentifiers.append(testParentID) # if we found the right one, we append it for later, as every related test sample is needed for registration else: geneticID = varcode genShortID = geneticID.split('_')[0] barcode = jsonContent[varcodekey]["id_qbic"] additionalInfo = '%s %s Tumor: %s \n' % (barcode, geneticID, jsonContent[varcodekey]["tumor"]) secName += '%s ' % geneticID if geneticID in newNGSSamples: parentIdentifiers.append(newNGSSamples[geneticID]) testParentIdentifiers.append(oldTestSamples[geneticID]) else: for samp in foundSamples: #some short variables to clean up the long if case code = samp.getCode() sType = samp.getSampleType() qbicBarcodeID = '/' + space + '/' + barcode # qbic identifier from the metadata that came in (probably tissue sample) parentIDs = samp.getParentSampleIdentifiers() analyte = samp.getPropertyValue("Q_SAMPLE_TYPE") curSecName = samp.getPropertyValue("Q_SECONDARY_NAME") extID = samp.getPropertyValue("Q_EXTERNALDB_ID") # we are looking for either the test sample with this barcode OR a test sample with parent with this barcode, the right analyte (e.g. DNA) and the short genetics ID in secondary name or external ID if ((barcode == code) and (sType == "Q_TEST_SAMPLE")) or ((qbicBarcodeID in parentIDs) and (analyte == typesDict[expType]) and ((genShortID in curSecName) or (genShortID in extID))): testParentID = samp.getSampleIdentifier() for s in foundSamples: sampleType = s.getSampleType() curSecName = s.getPropertyValue("Q_SECONDARY_NAME") extDB = s.getPropertyValue("Q_EXTERNALDB_ID") if (testParentID in s.getParentSampleIdentifiers()) and (sampleType == "Q_NGS_SINGLE_SAMPLE_RUN") and (((curSecName != None) and (geneticID in curSecName)) or ((extDB != None) and (geneticID in extDB))): sampleIdent = s.getSampleIdentifier() parentIdentifiers.append(sampleIdent) testParentIdentifiers.append(testParentID) numberOfExperiments += 1 existingExperimentIDs = [] existingExperiments = search_service.listExperiments("/" + space + "/" + project) for eexp in existingExperiments: existingExperimentIDs.append(eexp.getExperimentIdentifier()) newExpID = '/' + space + '/' + project + '/' + project + 'E' +str(numberOfExperiments) while newExpID in existingExperimentIDs: numberOfExperiments += 1 newExpID = '/' + space + '/' + project + '/' + project + 'E' +str(numberOfExperiments) newVCExp = transaction.createNewExperiment(newExpID, "Q_NGS_VARIANT_CALLING") identString = varcode # not used atm #for genID in geneticIDS: # identString += genID.split('_')[-1] identString2 = '' for tpi in testParentIdentifiers: identString2 += '_'+tpi.split('/')[-1] #newVCSample = transaction.createNewSample('/' + space + '/' + 'VC'+ project + qbicBarcodes[0][5:] + qbicBarcodes[1][5:] + identString, "Q_NGS_VARIANT_CALLING") newVCSample = transaction.createNewSample('/' + space + '/' + 'VC'+ identString2, "Q_NGS_VARIANT_CALLING") newVCSample.setParentSampleIdentifiers(parentIdentifiers) newVCSample.setExperiment(newVCExp) #additionalInfo = "" #secName = "" #for i, parentBarcode in enumerate(qbicBarcodes): # additionalInfo += '%s %s Tumor: %s \n' % (qbicBarcodes[i], geneticIDS[i], sampleSource[i]) # secName += '%s ' % (geneticIDS[i]) secName = secName.strip() #additionalInfo = '%s %s Tumor: %s \n %s %s Tumor: %s' % (qbicBarcodes[0], geneticIDS[0], sampleSource[0], qbicBarcodes[1], geneticIDS[1], sampleSource[1]) newVCSample.setPropertyValue('Q_ADDITIONAL_INFO', additionalInfo) #secName = '%s-%s' % (geneticIDS[0], geneticIDS[1]) newVCSample.setPropertyValue('Q_SECONDARY_NAME', secName) datasetSample = newVCSample return datasetSample
def find_and_register_ngs(transaction, jsonContent): qcValues = jsonContent["sample1"]["qc"] genome = jsonContent["sample1"]["genome"] idGenetics = jsonContent["sample1"]["id_genetics"] qbicBarcode = jsonContent["sample1"]["id_qbic"] system = jsonContent["sample1"]["processing_system"] tumor = jsonContent["sample1"]["tumor"] expType = jsonContent["type"] project = qbicBarcode[:5] search_service = transaction.getSearchService() sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, project)) sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) foundSamples = search_service.searchForSamples(sc) datasetSample = None sampleFound = False sampleIdent = None space = foundSamples[0].getSpace() testSampleCode = None knownCodes = [] for samp in foundSamples: qbicBarcodeID = '/' + samp.getSpace() + '/' + qbicBarcode knownCodes.append(samp.getCode()) print "code: "+samp.getCode() if qbicBarcodeID in samp.getParentSampleIdentifiers() or qbicBarcode == samp.getCode(): sampleType = samp.getSampleType() if sampleType == "Q_TEST_SAMPLE": print "searching: "+idGenetics.split('_')[0] print samp.getPropertyValue("Q_EXTERNALDB_ID") if (samp.getPropertyValue("Q_SAMPLE_TYPE") == typesDict[expType]) and ((samp.getPropertyValue("Q_SECONDARY_NAME") == idGenetics.split('_')[0]) or (samp.getPropertyValue("Q_EXTERNALDB_ID") == idGenetics.split('_')[0])): sampleIdent = samp.getSampleIdentifier() testSampleCode = samp.getCode() oldTestSamples[idGenetics] = sampleIdent if not sampleIdent: if not idGenetics in newTestSamples: for samp in foundSamples: if qbicBarcode == samp.getCode(): testSampleCode = createNewBarcode(project, transaction) sampleIdent = '/' + space + '/' + testSampleCode testSample = transaction.createNewSample(sampleIdent, "Q_TEST_SAMPLE") testSample.setParentSampleIdentifiers([samp.getSampleIdentifier()]) testSample.setPropertyValue('Q_SECONDARY_NAME', idGenetics.split('_')[0]) testSample.setPropertyValue('Q_SAMPLE_TYPE', typesDict[expType]) global numberOfExperiments numberOfExperiments += 1 newTestSampleExperiment = transaction.createNewExperiment('/' + space + '/' + project + '/' + project + 'E' +str(numberOfExperiments), "Q_SAMPLE_PREPARATION") testSample.setExperiment(newTestSampleExperiment) newTestSamples[idGenetics] = sampleIdent for s in foundSamples: # There is already a registered NGS run if (s.getSampleType() == "Q_NGS_SINGLE_SAMPLE_RUN") and (sampleIdent in s.getParentSampleIdentifiers() and (s.getPropertyValue("Q_SECONDARY_NAME") in idGenetics)): sa = transaction.getSampleForUpdate(s.getSampleIdentifier()) sa.setPropertyValue("Q_SECONDARY_NAME", idGenetics) datasetSample = sa sampleFound = True if not sampleFound: # register new experiment and sample numberOfExperiments += 1 newNGSMeasurementExp = transaction.createNewExperiment('/' + space + '/' + project + '/' + project + 'E' + str(numberOfExperiments), "Q_NGS_MEASUREMENT") newNGSMeasurementExp.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED') newNGSMeasurementExp.setPropertyValue('Q_SEQUENCING_MODE', 'PAIRED_END') newNGSMeasurementExp.setPropertyValue('Q_SEQUENCER_DEVICE', 'IMGAG_ILLUMINA_HISEQ_2500') newNGSMeasurementExp.setPropertyValue('Q_ADDITIONAL_INFO', system) newNGSMeasurementExp.setPropertyValue('Q_SEQUENCING_TYPE', typesDict[expType]) newNGSID = '/' + space + '/' + 'NGS'+ idGenetics.split('_')[-1] + testSampleCode newNGSrunSample = transaction.createNewSample(newNGSID, "Q_NGS_SINGLE_SAMPLE_RUN") newNGSrunSample.setParentSampleIdentifiers([sampleIdent]) newNGSrunSample.setExperiment(newNGSMeasurementExp) newNGSSamples[idGenetics] = newNGSID additionalInfo = '%s: %s\n' % ("Genome", genome) for qc in qcValues: line = str(qc) additionalInfo += '%s\n' % line.replace('{', '').replace('}', '') newNGSrunSample.setPropertyValue('Q_ADDITIONAL_INFO', additionalInfo) newNGSrunSample.setPropertyValue('Q_SECONDARY_NAME', idGenetics) datasetSample = newNGSrunSample return datasetSample
def process(transaction): context = transaction.getRegistrationContext().getPersistentMap() # Get the incoming path of the transaction incomingPath = transaction.getIncoming().getAbsolutePath() key = context.get("RETRY_COUNT") if (key == None): key = 1 # Get the name of the incoming file name = transaction.getIncoming().getName() foundBarcode = barcode.findall(name)[0] wfSample = sPattern.findall(name)[0] project = foundBarcode[:5] parentCode = foundBarcode[:10] ss = transaction.getSearchService() sc = SearchCriteria() sc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.CODE, wfSample)) foundSamples = ss.searchForSamples(sc) samplehit = foundSamples[0] space = foundSamples[0].getSpace() sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) newNumber = 1 newSampleID = '/' + space + '/' + 'VAC' + str(newNumber) + wfSample existingSampleIDs = [] sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.PROJECT, project)) sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) foundSamples2 = ss.searchForSamples(sc) for samp in foundSamples2: existingSampleIDs.append(samp.getSampleIdentifier()) # search in known ids, but also try to fetch the sample in case it wasn't indexed yet while newSampleID in existingSampleIDs or transaction.getSampleForUpdate( newSampleID): newNumber += 1 newSampleID = '/' + space + '/' + 'VAC' + str(newNumber) + wfSample newSample = transaction.createNewSample(newSampleID, "Q_VACCINE_CONSTRUCT") newSample.setParentSampleIdentifiers([samplehit.getSampleIdentifier()]) existingExperimentIDs = [] existingExperiments = ss.listExperiments("/" + space + "/" + project) numberOfExperiments = len(existingExperiments) + 1 for eexp in existingExperiments: existingExperimentIDs.append(eexp.getExperimentIdentifier()) newExpID = '/' + space + '/' + project + '/' + project + 'E' + str( numberOfExperiments) while newExpID in existingExperimentIDs: numberOfExperiments += 1 newExpID = '/' + space + '/' + project + '/' + project + 'E' + str( numberOfExperiments) experiment = transaction.createNewExperiment(newExpID, "Q_NGS_EPITOPE_SELECTION") experiment.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED') newSample.setExperiment(experiment) #Register files dataSetRes = transaction.createNewDataSet('Q_VACCINE_CONSTRUCT_DATA') dataSetRes.setMeasuredData(False) dataSetRes.setSample(newSample) os.remove( os.path.realpath(os.path.join(incomingPath, 'source_dropbox.txt'))) resultsname = name.replace(foundBarcode + '__', '').replace('.txt', '') new_folder = os.path.realpath(os.path.join(incomingPath, resultsname)) os.mkdir(new_folder) for f in os.listdir(incomingPath): if f.endswith('origlabfilename'): os.remove(os.path.realpath(os.path.join(incomingPath, f))) elif not os.path.isdir(os.path.join(incomingPath, f)): new_name = f.replace(foundBarcode + '__', '') os.rename(os.path.realpath(os.path.join(incomingPath, f)), os.path.join(new_folder, new_name)) transaction.moveFile(new_folder, dataSetRes)
def process(transaction): context = transaction.getRegistrationContext().getPersistentMap() # Get the incoming path of the transaction incomingPath = transaction.getIncoming().getAbsolutePath() key = context.get("RETRY_COUNT") if (key == None): key = 1 # Get the name of the incoming file name = transaction.getIncoming().getName() # identifier = pattern.findall(name)[0] # if isExpected(identifier): # project = identifier[:5] # #parentCode = identifier[:10] # else: # print "The identifier "+identifier+" did not match the pattern # Q[A-Z]{4}\d{3}\w{2} or checksum" propertyMap = mangleFilenameForAttributes(name) # we'll get qbic code and patient id expID = propertyMap['expID'] code = propertyMap['qbicID'] projectCode = code[:5] patientID = propertyMap['patientID'] timepoint = propertyMap['timepoint'] modality = propertyMap['modality'] tracer = propertyMap['tracer'] tissue = propertyMap['tissue'] timestamp = propertyMap['datestr'] # print "look for: ", code search_service = transaction.getSearchService() sc = SearchCriteria() # Find the patient according to code sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.CODE, code)) foundSamples = search_service.searchForSamples(sc) if not len(foundSamples) > 0: raise SampleNotFoundError( 'openBIS query of ' + code + ' failed. Please recheck your QBiC code!') # produces an IndexError if sample code does not exist (will check before) sampleIdentifier = foundSamples[0].getSampleIdentifier() space = foundSamples[0].getSpace() rootSample = transaction.getSampleForUpdate(sampleIdentifier) # print code, "was found in space", space, "as", sampleIdentifier # get or create MS-specific experiment/sample and # attach to the test sample expType = "Q_BMI_GENERIC_IMAGING" # load imaging experiments to append new data activeExperiment = None experiments = search_service.listExperiments( "/" + space + "/" + projectCode) experimentIDs = [] fullExpIdentifier = '/' + space + '/' + projectCode + '/' + expID for exp in experiments: if exp.getExperimentType() == expType and exp.getExperimentIdentifier() == fullExpIdentifier: activeExperiment = exp # if expID is not found... if (activeExperiment == None): raise ExperimentNotFoundError( 'Experiment with ID ' + expID + ' could not be found! Check the ID.') sc = SearchCriteria() # Find the patient according to code sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.TYPE, "Q_BMI_GENERIC_IMAGING_RUN")) ec = SearchCriteria() ec.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.CODE, expID)) sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(ec)) existingSamples = search_service.searchForSamples(sc) imagingSampleCode = modality + '-' + tracer + '-' + tissue + '-' + \ patientID + '-' + timepoint + '-' + \ str(len(existingSamples) + 1).zfill(3) # let's first check if such an imaging run was registered before sc = SearchCriteria() sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.CODE, imagingSampleCode)) foundSamples = search_service.searchForSamples(sc) if len(foundSamples) > 0: raise SampleAlreadyCreatedError( 'Sample ' + imagingSampleCode + ' has been created already. Please re-check to avoid duplicates! Offending file: ' + incomingPath) imagingSample = transaction.createNewSample( '/' + space + '/' + imagingSampleCode, "Q_BMI_GENERIC_IMAGING_RUN") imagingSample.setParentSampleIdentifiers( [rootSample.getSampleIdentifier()]) imagingSample.setExperiment(activeExperiment) sampleLabel = modality + ' imaging (' + patientID + ', ' + timepoint + ')' imagingSample.setPropertyValue('Q_SECONDARY_NAME', sampleLabel) imagingSample.setPropertyValue('Q_TIMEPOINT', timepoint) if tissue == 'liver': imagingSample.setPropertyValue('Q_IMAGED_TISSUE', 'LIVER') elif tissue == 'tumor': imagingSample.setPropertyValue( 'Q_IMAGED_TISSUE', 'HEPATOCELLULAR_CARCINOMA') openbisTimestamp = buildOpenBisTimestamp(timestamp) imagingSample.setPropertyValue('Q_MSHCC_IMAGING_DATE', openbisTimestamp) # create new dataset imagingDataset = transaction.createNewDataSet('Q_BMI_IMAGING_DATA') imagingDataset.setMeasuredData(False) imagingDataset.setSample(imagingSample) imagingDataset.setPropertyValue( 'Q_SECONDARY_NAME', modality + ' data (' + patientID + ', ' + timepoint + ')') # disable hash computation for now... resulted in outOfMemory errors for some bigger files #incomingFileSha256Sum = hashlib.sha256( # open(incomingPath, 'rb').read()).hexdigest() incomingFileSha256Sum = 'MISSING!' imagingDataset.setPropertyValue( 'Q_TARBALL_SHA256SUM', incomingFileSha256Sum) # finish the transaction transaction.moveFile(incomingPath, imagingDataset)
def find_and_register_vcf(transaction, jsonContent): qbicBarcodes = [] geneticIDS = [] sampleSource = [] for key in jsonContent.keys(): if key == "type" or key == "files": pass else: geneticIDS.append(jsonContent[key]["id_genetics"]) qbicBarcodes.append(jsonContent[key]["id_qbic"]) sampleSource.append(jsonContent[key]["tumor"]) expType = jsonContent["type"] project = qbicBarcodes[0][:5] search_service = transaction.getSearchService() sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, project)); sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) foundSamples = search_service.searchForSamples(sc) datasetSample = None sampleFound = False parentIdentifiers = [] testParentIdentifiers = [] for barcode, geneticID in zip(qbicBarcodes, geneticIDS): for samp in foundSamples: space = samp.getSpace() qbicBarcodeID = '/' + space + '/' + barcode print qbicBarcodeID print geneticID if qbicBarcodeID in samp.getParentSampleIdentifiers(): testParentID = samp.getSampleIdentifier() for s in foundSamples: sampleType = s.getSampleType() print sampleType print testParentID print s.getParentSampleIdentifiers() print s.getPropertyValue("Q_SECONDARY_NAME") print geneticID if (testParentID in s.getParentSampleIdentifiers()) and (sampleType == "Q_NGS_SINGLE_SAMPLE_RUN") and (s.getPropertyValue("Q_SECONDARY_NAME") in geneticID): sampleIdent = s.getSampleIdentifier() parentIdentifiers.append(sampleIdent) testParentIdentifiers.append(testParentID) numberOfExperiments = len(search_service.listExperiments("/" + space + "/" + project)) + 1 newVCExp = transaction.createNewExperiment('/' + space + '/' + project + '/' + project + 'E' + str(numberOfExperiments), "Q_NGS_VARIANT_CALLING") newVCExp.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED') identString = '' for genID in geneticIDS: identString += genID.split('_')[-1] identString2 = '' for tpi in testParentIdentifiers: identString2 += tpi.split('/')[-1] #newVCSample = transaction.createNewSample('/' + space + '/' + 'VC'+ project + qbicBarcodes[0][5:] + qbicBarcodes[1][5:] + identString, "Q_NGS_VARIANT_CALLING") newVCSample = transaction.createNewSample('/' + space + '/' + 'VC'+ identString2 + identString, "Q_NGS_VARIANT_CALLING") newVCSample.setParentSampleIdentifiers(parentIdentifiers) newVCSample.setExperiment(newVCExp) additionalInfo = '%s %s Tumor: %s \n %s %s Tumor: %s' % (qbicBarcodes[0], geneticIDS[0], sampleSource[0], qbicBarcodes[1], geneticIDS[1], sampleSource[1]) newVCSample.setPropertyValue('Q_ADDITIONAL_INFO', additionalInfo) secName = '%s-%s' % (geneticIDS[0], geneticIDS[1]) newVCSample.setPropertyValue('Q_SECONDARY_NAME', secName) datasetSample = newVCSample return datasetSample
def process(transaction, parameters, tableBuilder): """Update old flow experiments that have some missing or incorrect information. """ # Latest experiment version EXPERIMENT_VERSION = 1 # Set up logging _logger = setUpLogging() # Prepare the return table tableBuilder.addHeader("success") tableBuilder.addHeader("message") # Add a row for the results row = tableBuilder.addRow() # Retrieve parameters from client expPermId = parameters.get("expPermId") # Log parameter info _logger.info("Requested update of experiment " + expPermId + ".") # Get the experiment expCriteria = SearchCriteria() expCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, expPermId)) experiments = searchService.searchForExperiments(expCriteria) # If we did not get the experiment, return here with an error if len(experiments) != 1: # Prepare the return arguments success = False message = "The experiment with permID " + expPermId + " could not be found." # Log the error _logger.error(message) # Add the results to current row row.setCell("success", success) row.setCell("message", message) # Return here return # Get the experiment experiment = experiments[0] # Get the experiment type experimentType = experiment.getExperimentType() # Log _logger.info("Successfully retrieved Experiment with permId " + expPermId + " and type " + experimentType + ".") # Build the corresponding dataset type experimentPrefix = experimentType[0:experimentType.find("_EXPERIMENT")] dataSetType = experimentPrefix + "_FCSFILE" # Retrieve all FCS files contained in the experiment searchCriteria = SearchCriteria() searchCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, dataSetType)) expCriteria = SearchCriteria() expCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, expPermId)) searchCriteria.addSubCriteria(SearchSubCriteria.createExperimentCriteria(expCriteria)) dataSets = searchService.searchForDataSets(searchCriteria) # Log _logger.info("Retrieved " + str(len(dataSets)) + " dataset(s) for experiment with permId " + expPermId + ".") # If we did not get the datasets, return here with an error if dataSets is None: # Prepare the return arguments success = False message = "No FCS files could be found for experiment with permID " + \ expPermId + "." # Log the error _logger.error(message) # Add the results to current row row.setCell("success", success) row.setCell("message", message) # Return here return # Get the file from the first dataset files = getFileForCode(dataSets[0].getDataSetCode()) if len(files) != 1: # Prepare the return arguments success = False message = "Could not retrieve the FCS file to process!" # Log the error _logger.error(message) # Add the results to current row row.setCell("success", success) row.setCell("message", message) # Return here return # Get the file fcsFile = files[0] # Log _logger.info("Reading file " + fcsFile + ".") # Open the FCS file reader = FCSReader(java.io.File(fcsFile), False) # Parse the file with data if not reader.parse(): # Prepare the return arguments success = False message = "Could not process file " + os.path.basename(fcsFile) # Log the error _logger.error(message) # Add the results to current row row.setCell("success", success) row.setCell("message", message) # Return here return # # # EXPERIMENT NAME # # # Get the experiment name from the file expNameFromFile = reader.getCustomKeyword("EXPERIMENT NAME") # Get the experiment name from the registered Experiment currentExpName = experiment.getPropertyValue(experimentType + "_NAME") # We need the Experiment to be mutable mutableExperiment = transaction.makeExperimentMutable(experiment) # Are the experiment names matching? if expNameFromFile == currentExpName: # Log _logger.info("Registered experiment name matches the experiment " + "name from the FCS file.") else: # Update the registered Experiment name mutableExperiment.setPropertyValue(experimentType + "_NAME", expNameFromFile) # Log _logger.info("Updated registered experiment name from '" + currentExpName + "' to '" + expNameFromFile + "'.") # # # FCS FILE PARAMETERS AND ACQUISITION DATE # # hardwareString = experimentType[0:experimentType.find("_EXPERIMENT")] parameterProperty = hardwareString + "_FCSFILE_PARAMETERS" acqDateProperty = hardwareString + "_FCSFILE_ACQ_DATE" # Log _logger.info("Checking properties of " + str(len(dataSets)) + " file(s).") for dataSet in dataSets: # Check whether the parameters are stored for the file parameters = dataSet.getPropertyValue(parameterProperty) # Check whether the acquisition date is stored for the file acqDate = dataSet.getPropertyValue(acqDateProperty) if parameters is None or acqDate is None: # Make the DataSet mutable for update mutableDataSet = transaction.makeDataSetMutable(dataSet) # Get the file from the dataset files = getFileForCode(dataSet.getDataSetCode()) if len(files) != 1: # Prepare the return arguments success = False message = "Could not retrieve the FCS file to process!" # Log the error _logger.error(message) # Add the results to current row row.setCell("success", success) row.setCell("message", message) # Return here return # Get the FCS file fcsFile = files[0] # Open and parse the FCS file reader = FCSReader(java.io.File(fcsFile), True); # Parse the file with data if not reader.parse(): # Prepare the return arguments success = False message = "Could not process file " + os.path.basename(fcsFile) # Log the error _logger.error(message) # Add the results to current row row.setCell("success", success) row.setCell("message", message) # Return here return if acqDate is None: # Get and format the acquisition date dateStr = formatExpDateForPostgreSQL( reader.getStandardKeyword("$DATE")) # Update the dataSet mutableDataSet.setPropertyValue(acqDateProperty, dateStr) # Log _logger.info("The acquisition date of file " + str(fcsFile) + " was set to: " + dateStr + ".") if parameters is None: # Get the parameters parametersAttr = reader.parametersAttr if parametersAttr is None: # Prepare the return arguments success = False message = "Could not read parameters from file " + \ os.path.basename(fcsFile) # Log the error _logger.error(message) # Add the results to current row row.setCell("success", success) row.setCell("message", message) # Return here return # Convert the parameters to XML parametersXML = dictToXML(parametersAttr) # Now store them in the dataSet mutableDataSet.setPropertyValue(parameterProperty, parametersXML) # Log _logger.info("The parameters for file " + str(fcsFile) + " were successfully stored (in XML).") # Update the version of the experiment mutableExperiment.setPropertyValue(experimentType + "_VERSION", str(EXPERIMENT_VERSION)) success = True message = "Congratulations! The experiment was successfully upgraded " + \ "to the latest version." # Log _logger.info(message) # Add the results to current row row.setCell("success", success) row.setCell("message", message)
def _getDataSets(self, expSampleType, expSamplePermId, sampleType, samplePermId, requestedDatasetType="MICROSCOPY_IMG_CONTAINER"): """ Return a list of datasets of requested type belonging to the MICROSCOPY_EXPERIMENT sample and a specific sample of type MICROSCOPY_SAMPLE_TYPE. If none are found, return []. """ # Only two types of experiment are allowed assert requestedDatasetType == "MICROSCOPY_IMG_CONTAINER" or requestedDatasetType == "MICROSCOPY_ACCESSORY_FILE", \ "Input argument 'requestedDatasetType' must be one of MICROSCOPY_IMG_CONTAINER or MICROSCOPY_ACCESSORY_FILE." self._logger.info("_getDataSetsForMicroscopySampleType() called " + "with requested data type " + requestedDatasetType) if _DEBUG: self._logger.info("* Requested dataset type: " + requestedDatasetType) self._logger.info("* Requested experiment sample type: " + expSampleType) self._logger.info("* Requested experiment sample permId: " + expSamplePermId) self._logger.info("* Requested sample type: " + sampleType) self._logger.info("* Requested sample permId: " + samplePermId) # Dataset criteria datasetSearchCriteria = SearchCriteria() datasetSearchCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.TYPE, requestedDatasetType) ) # Add search criteria for sample of type MICROSCOPY_EXPERIMENT with specified CODE sampleExpCriteria = SearchCriteria() sampleExpCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.TYPE, expSampleType)) sampleExpCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.PERM_ID, expSamplePermId) ) # Add search criteria for sample of type MICROSCOPY_SAMPLE_TYPE with specified CODE sampleCriteria = SearchCriteria() sampleCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.TYPE, sampleType) ) sampleCriteria.addMatchClause( MatchClause.createAttributeMatch( MatchClauseAttribute.PERM_ID, samplePermId) ) sampleCriteria.addSubCriteria( SearchSubCriteria.createSampleParentCriteria( sampleExpCriteria) ) # Add search for a parent sample of type MICROSCOPY_SAMPLE_TYPE as subcriterion datasetSearchCriteria.addSubCriteria( SearchSubCriteria.createSampleCriteria(sampleCriteria) ) # Retrieve the datasets dataSets = searchService.searchForDataSets(datasetSearchCriteria) # Inform self._logger.info("Retrieved " + str(len(dataSets)) + " dataSets of type " + requestedDatasetType + ".") # Return return dataSets
def process(transaction): context = transaction.getRegistrationContext().getPersistentMap() # Get the incoming path of the transaction incomingPath = transaction.getIncoming().getAbsolutePath() key = context.get("RETRY_COUNT") if (key == None): key = 1 # Get the name of the incoming folder name = transaction.getIncoming().getName() identifier = pattern.findall(name)[0] if isExpected(identifier): pass #experiment = identifier[1:5] #parentCode = identifier[:10] else: print "The identifier "+identifier+" did not match the pattern Q[A-Z]{4}\d{3}\w{2} or checksum" project = identifier[:5] search_service = transaction.getSearchService() sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, project)); sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) foundSamples = search_service.searchForSamples(sc) space = foundSamples[0].getSpace() global numberOfExperiments numberOfExperiments = len(search_service.listExperiments("/" + space + "/" + project)) src = os.path.realpath(os.path.join(incomingPath,'source_dropbox.txt')) numberOfExperiments = len(search_service.listExperiments("/" + space + "/" + project)) src = os.path.realpath(os.path.join(incomingPath,'source_dropbox.txt')) if os.path.isfile(src): os.remove(src) print "start registration" #dataSet = None for f in os.listdir(os.path.join(incomingPath,name)): if f.endswith('metadata'): jsonContent = parse_metadata_file(os.path.realpath(os.path.join(os.path.join(incomingPath, name),f))) rawFiles = jsonContent["files"] vcfs = [] fastqs = [] gsvars = [] print "metadata read" for rawFile in rawFiles: print rawFile if rawFile.endswith("vcf") or rawFile.endswith("vcf.gz"): vcfs.append(rawFile) if rawFile.endswith("fastq") or rawFile.endswith("fastq.gz"): fastqs.append(rawFile) if rawFile.endswith("GSvar") or rawFile.endswith("GSvar.gz"): gsvars.append(rawFile) #if rawFiles[0].endswith("vcf") or rawFiles[0].endswith("vcf.gz"): # datasetSample = find_and_register_vcf(transaction, jsonContent) # # dataSet = transaction.createNewDataSet("Q_NGS_VARIANT_CALLING_DATA") # dataSet.setSample(datasetSample) #elif rawFiles[0].endswith("fastq") or rawFiles[0].endswith("fastq.gz"): # datasetSample = find_and_register_ngs(transaction, jsonContent) # dataSet = transaction.createNewDataSet("Q_NGS_RAW_DATA") # dataSet.setSample(datasetSample) #os.remove(os.path.realpath(os.path.join(os.path.join(incomingPath,name),f))) else: pass folder = os.path.join(incomingPath, name) if len(fastqs) > 0: fastqSample = find_and_register_ngs(transaction, jsonContent) fastqDataSet = transaction.createNewDataSet("Q_NGS_RAW_DATA") fastqDataSet.setSample(fastqSample) fastqFolder = os.path.join(folder, "raw") os.mkdir(fastqFolder) for f in fastqs: os.rename(os.path.join(folder, f), os.path.join(fastqFolder, f)) transaction.moveFile(fastqFolder, fastqDataSet) for vc in vcfs: vcfSample = find_and_register_vcf(transaction, jsonContent) vcfDataSet = transaction.createNewDataSet("Q_NGS_VARIANT_CALLING_DATA") vcfDataSet.setSample(vcfSample) vcfFolder = os.path.join(folder, "vcf") os.mkdir(vcfFolder) for f in vcfs: os.rename(os.path.join(folder, f), os.path.join(vcfFolder, f)) for g in gsvars: if(f.split('.')[0].replace('_vc_strelka','') == g.split('.')[0]): os.rename(os.path.join(folder,g), os.path.join(vcfFolder, g)) transaction.moveFile(vcfFolder, vcfDataSet)
def process(transaction): context = transaction.getRegistrationContext().getPersistentMap() # Get the incoming path of the transaction incomingPath = transaction.getIncoming().getAbsolutePath() key = context.get("RETRY_COUNT") if (key == None): key = 1 # Get the name of the incoming file name = transaction.getIncoming().getName() identifier = pattern.findall(name)[0] #identifier = name if isExpected(identifier): project = identifier[:5] else: print "The identifier " + identifier + " did not match the pattern Q[A-Z]{4}\d{3}\w{2} or checksum" search_service = transaction.getSearchService() sc = SearchCriteria() sc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.CODE, identifier)) foundSamples = search_service.searchForSamples(sc) sampleIdentifier = foundSamples[0].getSampleIdentifier() space = foundSamples[0].getSpace() sa = transaction.getSampleForUpdate(sampleIdentifier) sampleType = "Q_NGS_SINGLE_SAMPLE_RUN" if sa.getSampleType() != sampleType: sc = SearchCriteria() sc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.CODE, "NGS" + identifier)) foundSamples = search_service.searchForSamples(sc) if len(foundSamples) > 0: sampleIdentifier = foundSamples[0].getSampleIdentifier() else: search_service = transaction.getSearchService() sc = SearchCriteria() pc = SearchCriteria() pc.addMatchClause( SearchCriteria.MatchClause.createAttributeMatch( SearchCriteria.MatchClauseAttribute.PROJECT, project)) sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc)) foundSamples = search_service.searchForSamples(sc) space = foundSamples[0].getSpace() sampleIdentifier = "/" + space + "/" + "NGS" + identifier if transaction.getSampleForUpdate(sampleIdentifier): sa = transaction.getSampleForUpdate(sampleIdentifier) else: # create NGS-specific experiment/sample and # attach to the test sample expType = "Q_NGS_MEASUREMENT" ngsExperiment = None experiments = search_service.listExperiments("/" + space + "/" + project) experimentIDs = [] for exp in experiments: experimentIDs.append(exp.getExperimentIdentifier()) expID = experimentIDs[0] i = 0 while expID in experimentIDs: i += 1 expNum = len(experiments) + i expID = '/' + space + '/' + project + \ '/' + project + 'E' + str(expNum) ngsExperiment = transaction.createNewExperiment(expID, expType) ngsExperiment.setPropertyValue( 'Q_SEQUENCER_DEVICE', "UNSPECIFIED_ILLUMINA_HISEQ_2500") #change this newID = 'NGS' + identifier ngsSample = transaction.createNewSample('/' + space + '/' + newID, sampleType) ngsSample.setParentSampleIdentifiers([sa.getSampleIdentifier()]) ngsSample.setExperiment(ngsExperiment) sa = ngsSample # create new dataset dataSet = transaction.createNewDataSet("Q_NGS_RAW_DATA") dataSet.setMeasuredData(False) dataSet.setSample(sa) for f in os.listdir(incomingPath): if ".testorig" in f: os.remove(os.path.realpath(os.path.join(incomingPath, f))) if ".origlabfilename" in f: nameFile = open(os.path.join(incomingPath, f)) origName = nameFile.readline().strip() nameFile.close() transaction.moveFile(incomingPath, dataSet)