def prepend_old_log(self): """Prepend the old log to the new log. The location of the old log file is retrieved through the REST api. In order to work, the script should be executed on the LIMS server since the location on the disk is parsed out from the sftp string and then used for local copy of file. This method does not use logging since that could mess up the logging settings, instead warnings are printed to stderr.""" try: log_artifact = Artifact(self.lims, id=self.log_file) log_artifact.get() if log_artifact.files: log_path = log_artifact.files[0].content_location.split(self.lims.baseuri.split(":")[1])[1] dir = os.getcwd() destination = os.path.join(dir, self.log_file) copy(log_path, destination) with open(destination, "a") as f: f.write("=" * 80 + "\n") except HTTPError: # Probably no artifact found, skip prepending print >>sys.stderr, ("No log file artifact found " "for id: {0}").format(self.log_file) except IOError as e: # Probably some path was wrong in copy print >>sys.stderr, ("Log could not be prepended, " "make sure {0} and {1} are " "proper paths.").format( log_path, self.log_file ) raise e
def prepend_old_log(self, external_log_file = None): """Prepend the old log to the new log. The location of the old log file is retrieved through the REST api. In order to work, the script should be executed on the LIMS server since the location on the disk is parsed out from the sftp string and then used for local copy of file. This method does not use logging since that could mess up the logging settings, instead warnings are printed to stderr.""" if external_log_file: log_file_name = external_log_file else: log_file_name = self.log_file local_log_path = os.path.join(os.getcwd(), log_file_name) if not os.path.isfile(local_log_path): try: log_artifact = Artifact(self.lims,id = log_file_name) log_artifact.get() if log_artifact.files: log_path = log_artifact.files[0].content_location.split( self.lims.baseuri.split(':')[1])[1] copy(log_path, local_log_path) with open(local_log_path,'a') as f: f.write('='*80+'\n') except HTTPError: # Probably no artifact found, skip prepending print(('No log file artifact found ' 'for id: {0}').format(log_file_name), file=sys.stderr) except IOError as e: # Probably some path was wrong in copy print(('Log could not be prepended, ' 'make sure {0} and {1} are ' 'proper paths.').format(log_path, log_file_name), file=sys.stderr) raise e
def test_input_artifact_list(self): a = Artifact(uri=self.lims.get_uri('artifacts', 'a1'), lims=self.lims) with patch('requests.Session.get', return_value=Mock(content=self.root_artifact_xml, status_code=200)): assert a.input_artifact_list() == []
def get(self): limsl = lims.Lims(BASEURI, USERNAME, PASSWORD) #sequencing queues are currently taken as the following #Miseq- Step 7: Denature, Dilute and load sample #Novaseq Step 11: Load to flow cell queues = {} queues['MiSeq'] = Queue(limsl, id='55') queues['NovaSeq'] = Queue(limsl, id='1662') methods = queues.keys() pools = {} for method in methods: pools[method] = {} if queues[method].artifacts: tree = ET.fromstring(queues[method].xml()) for artifact in tree.iter('artifact'): queue_time = artifact.find('queue-time').text container = Container(limsl, uri=artifact.find('location').find( 'container').attrib['uri']).name attr_name = Artifact(limsl, uri=artifact.attrib['uri']).name value = artifact.find('location').find('value').text proj_and_samples = {} conc_qpcr = '' art = Artifact(limsl, uri=artifact.attrib['uri']) if method is 'MiSeq': #FinishedLibrary if 'Concentration' in dict(art.udf.items()).keys(): conc_qpcr = art.udf['Concentration'] #InhouseLibrary elif 'Pool Conc. (nM)' in dict(art.udf.items()).keys(): conc_qpcr = str(art.udf['Pool Conc. (nM)']) else: pass is_rerun = art.udf.get('Rerun', False) elif method is 'NovaSeq': if 'Concentration' in dict(art.udf.items()).keys(): conc_qpcr = art.udf["Concentration"] is_rerun = art.udf.get('Rerun', False) else: new_art = art.parent_process.input_output_maps[0][ 0] # The loop iterates 4 times as the values were found within the first 4 preceding # parent processes(through trial and error). If the values are not found within 4 iterations, they can be looked up # manually in LIMS. The loop is structured so as its not very clear in the genologics API which of the parent processes # will contain the values in post process and 4 seemed to get everything for the data at hand. i = 0 while i < 4: if 'Concentration' in dict( new_art['post-process-uri'].udf.items( )).keys(): conc_qpcr = new_art[ 'post-process-uri'].udf[ "Concentration"] is_rerun = new_art[ 'post-process-uri'].udf.get( 'Rerun', False) break else: new_art = new_art[ 'parent-process'].input_output_maps[0][ 0] i = i + 1 for sample in art.samples: project = sample.project.id if project in pools[method]: if container in pools[method][project]['plates']: pools[method][project]['plates'][container][ 'samples'].append(sample.name) else: pools[method][project]['plates'][container] = { 'samples': [sample.name], 'well': value, 'queue_time': queue_time, 'conc_pool_qpcr': conc_qpcr, 'is_rerun': is_rerun } else: setup = sample.project.udf['Sequencing setup'] lanes = sample.project.udf[ 'Sequence units ordered (lanes)'] librarytype = sample.project.udf[ 'Library construction method'] runmode = sample.project.udf['Sequencing platform'] final_loading_conc = 'TBD' if method is 'NovaSeq': try: final_loading_conc = Artifact( limsl, uri=artifact.attrib['uri'] ).udf['Final Loading Concentration (pM)'] except KeyError: pass pools[method][project] = { 'name': sample.project.name, 'setup': setup, 'lanes': lanes, 'runmode': runmode, 'final_loading_conc': final_loading_conc, 'librarytype': librarytype, 'plates': { container: { 'samples': [sample.name], 'well': value, 'queue_time': queue_time, 'conc_pool_qpcr': conc_qpcr, 'is_rerun': is_rerun } } } self.set_header("Content-type", "application/json") self.write(json.dumps(pools))
def update_samplesheet(lims, process_id, artifact_id, output_file): """Update illumina samplesheet.""" process = Process(lims, id=process_id) trim_last_base = True # Used to set Read1EndWithCycle def get_project(projects, urgent=False): """Inner function to get a project name for samples.""" if urgent: # Sort projects for urgent samples on name projects = sorted(projects.items(), key=operator.itemgetter(0)) for project in projects: if project[1] < 9: return project[0] # return first project with < 9 samples else: # Sort projects for other samples on number of samples projects = sorted(projects.items(), key=operator.itemgetter(1)) return projects[0][0] # return project with least amount of samples. # Parse families families = {} for artifact in process.all_inputs(): for sample in artifact.samples: if ( 'Dx Familienummer' in list(sample.udf) and 'Dx NICU Spoed' in list(sample.udf) and 'Dx Protocolomschrijving' in list(sample.udf) ): # Dx production sample family = sample.udf['Dx Familienummer'] # Create family if not exist if family not in families: families[family] = { 'samples': [], 'NICU': False, 'project_type': 'unknown_project', 'split_project_type': False, 'urgent': False, 'deviating': False # merge, deep sequencing (5x), etc samples } # Update family information if sample.udf['Dx Onderzoeksreden'] == 'Research': # Dx research sample for onderzoeksindicatie in config.research_onderzoeksindicatie_project: if sample.udf['Dx Onderzoeksindicatie'] == onderzoeksindicatie: project_type = config.research_onderzoeksindicatie_project[onderzoeksindicatie] families[family]['project_type'] = project_type families[family]['split_project_type'] = False break else: # Dx clinic sample newest_protocol = sample.udf['Dx Protocolomschrijving'].split(';')[0] if 'SNP fingerprint MIP' in newest_protocol and not families[family]['NICU']: project_type = 'Fingerprint' families[family]['project_type'] = project_type families[family]['split_project_type'] = False trim_last_base = False elif 'PID09.V7_smMIP' in newest_protocol and not families[family]['NICU']: project_type = 'ERARE' families[family]['project_type'] = project_type families[family]['split_project_type'] = False trim_last_base = False elif sample.udf['Dx NICU Spoed']: families[family]['NICU'] = True project_type = 'NICU_{0}'.format(sample.udf['Dx Familienummer']) families[family]['project_type'] = project_type families[family]['split_project_type'] = False elif 'elidS30409818' in newest_protocol and not families[family]['NICU']: project_type = 'CREv2' families[family]['project_type'] = project_type families[family]['split_project_type'] = True elif 'elidS31285117' in newest_protocol and not families[family]['NICU']: project_type = 'SSv7' families[family]['project_type'] = project_type families[family]['split_project_type'] = True # Set urgent status if 'Dx Spoed' in list(sample.udf) and sample.udf['Dx Spoed']: families[family]['urgent'] = True # Set deviating status, remove urgent status if deviating if ( ('Dx Mergen' in list(sample.udf) and sample.udf['Dx Mergen']) or ('Dx Exoomequivalent' in list(sample.udf) and sample.udf['Dx Exoomequivalent'] > 1) ): families[family]['deviating'] = True families[family]['urgent'] = False else: # Other samples if 'GIAB' in sample.name.upper() and not sample.project: # GIAB control samples family = 'GIAB' else: family = sample.project.name # Remove 'dx' (ignore case) and strip leading space or _ family = re.sub('^dx[ _]*', '', family, flags=re.IGNORECASE) if family not in families: families[family] = { 'samples': [], 'NICU': False, 'project_type': family, 'split_project_type': False, 'urgent': False, 'deviating': False } # Add sample to family families[family]['samples'].append(sample) # Get all project types and count samples project_types = {} for family in families.values(): if family['project_type'] in project_types: project_types[family['project_type']]['sample_count'] += len(family['samples']) else: project_types[family['project_type']] = { 'sample_count': len(family['samples']), 'projects': {}, 'split_project_type': family['split_project_type'] } # Define projects per project_type for project_type in project_types: project_types[project_type]['index'] = 0 if project_types[project_type]['split_project_type']: for i in range(0, int(project_types[project_type]['sample_count']/9+1)): project_types[project_type]['projects']['{0}_{1}'.format(project_type, i+1)] = 0 else: project_types[project_type]['projects'][project_type] = 0 # Set sample projects sample_projects = {} sample_sequence_names = {} # Urgent families / samples, skip deviating for family in [family for family in families.values() if family['urgent'] and not family['deviating']]: family_project = get_project(project_types[family['project_type']]['projects'], urgent=True) for sample in family['samples']: sample_sequence_name = get_sequence_name(sample) sample_sequence_names[sample.name] = sample_sequence_name sample_projects[sample_sequence_name] = family_project project_types[family['project_type']]['projects'][family_project] += 1 # Deviating families / samples for family in [family for family in families.values() if family['deviating']]: family_project = get_project(project_types[family['project_type']]['projects']) for sample in family['samples']: sample_sequence_name = get_sequence_name(sample) sample_sequence_names[sample.name] = sample_sequence_name sample_projects[sample_sequence_name] = family_project project_types[family['project_type']]['projects'][family_project] += 1 # Non urgent and non deviating families / samples normal_families = [family for family in families.values() if not family['urgent'] and not family['deviating']] for family in sorted(normal_families, key=lambda fam: (len(fam['samples'])), reverse=True): family_project = get_project(project_types[family['project_type']]['projects']) for sample in family['samples']: sample_sequence_name = get_sequence_name(sample) sample_sequence_names[sample.name] = sample_sequence_name sample_projects[sample_sequence_name] = family_project project_types[family['project_type']]['projects'][family_project] += 1 # Check sequencer type # NextSeq runs need to reverse complement 'index2' for dual barcodes and 'index' for single barcodes. if 'nextseq' in process.type.name.lower(): nextseq_run = True else: nextseq_run = False # Edit clarity samplesheet sample_header = '' # empty until [data] section settings_section = False samplesheet_artifact = Artifact(lims, id=artifact_id) file_id = samplesheet_artifact.files[0].id for line in lims.get_file_contents(id=file_id).rstrip().split('\n'): if line.startswith('[Settings]') and trim_last_base: output_file.write('{line}\n'.format(line=line)) output_file.write('Read1EndWithCycle,{value}\n'.format(value=process.udf['Read 1 Cycles']-1)) output_file.write('Read2EndWithCycle,{value}\n'.format(value=process.udf['Read 2 Cycles']-1)) settings_section = True elif line.startswith('[Data]') and trim_last_base and not settings_section: output_file.write('[Settings]\n') output_file.write('Read1EndWithCycle,{value}\n'.format(value=process.udf['Read 1 Cycles']-1)) output_file.write('Read2EndWithCycle,{value}\n'.format(value=process.udf['Read 2 Cycles']-1)) output_file.write('{line}\n'.format(line=line)) elif line.startswith('Sample_ID'): # Samples header line sample_header = line.rstrip().split(',') sample_id_index = sample_header.index('Sample_ID') sample_name_index = sample_header.index('Sample_Name') sample_project_index = sample_header.index('Sample_Project') if 'index2' in sample_header: index_index = sample_header.index('index2') else: index_index = sample_header.index('index') output_file.write('{line}\n'.format(line=line)) elif sample_header: # Samples header seen, so continue with samples. data = line.rstrip().split(',') # Fix sample name -> use sequence name if data[sample_name_index] in sample_sequence_names: data[sample_name_index] = sample_sequence_names[data[sample_name_index]] # Set Sample_Project if data[sample_name_index] in sample_projects: data[sample_project_index] = sample_projects[data[sample_name_index]] # Overwrite Sample_ID with Sample_name to get correct conversion output folder structure data[sample_id_index] = data[sample_name_index] # Reverse complement index for NextSeq runs if nextseq_run: data[index_index] = clarity_epp.export.utils.reverse_complement(data[index_index]) output_file.write('{line}\n'.format(line=','.join(data))) else: # Leave other lines untouched. output_file.write('{line}\n'.format(line=line))
def main(lims, args, epp_logger): p = Process(lims, id=args.pid) if not args.path: args.path = os.getcwd() file_list = os.listdir(args.path) # Find all per input result files io = p.input_output_maps io_filtered = filter( lambda (x, y): y['output-generation-type'] == 'PerInput', io) io_filtered = filter(lambda (x, y): y['output-type'] == 'ResultFile', io_filtered) artifact_missing_file = [] artifact_multiple_file = [] found_files = [] for input, output in io_filtered: i_a = Artifact(lims, id=input['limsid']) o_a = Artifact(lims, id=output['limsid']) # Input Well, Input Container i_w, i_c = i_a.location[1], i_a.location[0] # Well is typed without colon in filename: i_w = ''.join(i_w.split(':')) info = { 'well': i_w, 'container_id': i_c.id, 'input_artifact_id': i_a.id } # Use a reguluar expression to find the file name given # the container and sample. This is all assuming the driver template name ends with: # ${INPUT.CONTAINER.PLACEMENT}_${INPUT.NAME}_${INPUT.CONTAINER.LIMSID}_${INPUT.LIMSID} # However, names are excluded to improve robustness. re_str = '.*{well}_.*_.*{container_id}_.*{input_artifact_id}'\ .format(**info) im_file_r = re.compile(re_str) fns = filter(im_file_r.match, file_list) logging.info( ("Looking for file for artifact id: {input_artifact_id} " "from container with id: {container_id}.").format(**info)) if len(fns) == 0: logging.warning( "No image file found for artifact with id {0}".format(i_a.id)) artifact_missing_file.append(i_a) elif len(fns) > 1: logging.warning( ("Multiple image files found for artifact with id {0}, " "please attach files manually").format(i_a.id)) artifact_multiple_file.append(i_a) else: fn = fns[0] found_files.append(fn) logging.info( "Found image file {0} for artifact with id {1}".format( fn, i_a.id)) fp = os.path.join(args.path, fn) # Attach file to the LIMS location = attach_file(fp, o_a) logging.debug("Moving {0} to {1}".format(fp, location)) warning = "" if len(artifact_missing_file): warning = "Did not find any file for {0} artifact(s). ".format( len(artifact_missing_file)) if len(artifact_multiple_file): warning += "Found multiple files for {0} artifact(s), none of these were uploaded.".format( len(artifact_multiple_file)) if warning: warning = "Warning: " + warning abstract = "Uploaded {0} file(s). {1}".format(len(found_files), warning) print >> sys.stderr, abstract # stderr will be logged and printed in GUI
def _get_individual_artifacts(self, sample): ## Assuming first artifact is allways named sample.id + 'PA1. first_sample_artifact = Artifact(lims, id=sample.id + 'PA1') self.uniq_artifacts[sample.id] = first_sample_artifact
def get_artifacts(self): for inp, outp in self.input_output_maps: if outp.get("output-generation-type") == "PerAllInputs": continue self.artifacts.append( Artifact(self.lims,id = outp['limsid']))