def get_sampleIDs_from_samplesheet(iem_file): """ Get a list of sample IDs from an IEM formatted samplesheet (SampleSheet.csv) """ sheet = samplesheet.IEMFile(path=iem_file) sampleIDs = [s['Sample_ID'] for s in sheet.data['Data']['Samples']] return (sampleIDs)
def load_samplesheet_data(samplesheet_file): """ Examples -------- Example usage:: x = "SampleSheet.csv" run_settings, run_samples = load_samplesheet_data(x) """ data = samplesheet.IEMFile(path=samplesheet_file) run_settings = {} run_settings.update(data.data['Reads']) run_settings.update(data.data['Header']) run_settings.update(data.data['Settings']) run_samples = data.data['Data']['Samples'] return ((run_settings, run_samples))
import os import sys import csv from util import samplesheet from util import find os.environ.setdefault("DJANGO_SETTINGS_MODULE", "tuco.settings") import django django.setup() from lims.models import SequencingSampleSheet sheets_dir = os.path.realpath(sys.argv[1]) # find all the samplesheets in the dir for sheet_file in find.find(search_dir = sheets_dir, inclusion_patterns = ['SampleSheet.csv']): sheet = samplesheet.IEMFile(path = os.path.realpath(sheet_file)) seqtype_file = os.path.join(os.path.dirname(sheet_file), 'seqtype.txt') Run_ID = os.path.basename(os.path.dirname(sheet_file)) # load seqtype if os.path.exists(seqtype_file): with open(seqtype_file) as f: lines = f.readlines() Seq_Type = lines[0].strip() # get the samplesheet entry from database sheet_instance = SequencingSampleSheet.objects.get(md5 = sheet.meta['Sheet_md5']) # check if it already has seq_type.. if not sheet_instance.seq_type or sheet_instance.seq_type == '' and Seq_Type != '': sheet_instance.seq_type = Seq_Type
import os import sys import csv from util import samplesheet from util import find os.environ.setdefault("DJANGO_SETTINGS_MODULE", "tuco.settings") import django django.setup() from lims.models import SequencingRun, SequencingSampleSheet sheets_dir = os.path.realpath(sys.argv[1]) # find all the samplesheets in the dir for sheet_file in find.find(search_dir=sheets_dir, inclusion_patterns=['SampleSheet.csv']): sheet = samplesheet.IEMFile(path=sheet_file) Run_ID = os.path.basename(os.path.dirname(sheet_file)) seqtype_file = os.path.join(os.path.dirname(sheet_file), 'seqtype.txt') # register the SampleSheet in the database if its not there already SequencingSampleSheet.objects.get_or_create( run_id=SequencingRun.objects.get(run_id=Run_ID), path=os.path.realpath(sheet.path), md5=sheet.md5, host=sheet.meta.get('Sheet_host', '')) # IEMFileVersion = record.get('IEMFileVersion',''), # Investigator_Name = record.get('Investigator_Name',''), # Investigator Name # Project_Name = record.get('Project_Name',''), # Project Name # Experiment_Name = record.get('Experiment_Name',''), # Experiment Name # Date = record.get('Date',''), # Workflow = record.get('Workflow',''),
def samplesheet_upload(request): template = 'lims/samplesheet_upload.html' form = SequencingSampleSheetForm() message = '' context = {'form': form, 'message': message} if request.method == 'POST' and request.FILES['samplesheet']: # print(dir(request)) run_id = request.POST['run_id'] sheet = request.FILES['samplesheet'] # set up temp storage location for file; needed for samplesheet module read methods.. # MEDIA_TMP/<timestamp>/filename fs = FileSystemStorage() tmp_save_path = os.path.join( settings.MEDIA_TMP, datetime.datetime.strftime(now(), '%Y-%m-%d'), sheet.name) # save file to disk at location filename = fs.save(tmp_save_path, sheet) uploaded_file_url = fs.url(filename) # try to parse the file as a SampleSheet.csv file try: sheet_obj = samplesheet.IEMFile(path=os.path.realpath(filename)) except: context[ 'message'] = "ERROR: Samplesheet file could not be parsed. Is it in the correct format?" return render(request, template, context, status=422) # validate the contents of the samplesheet try: sheet_obj.isValid(_raise=True) except: context[ 'message'] = "ERROR: Samplesheet contains errors;\n{0}".format( json.dumps([{ k: v } for k, v, in sheet_obj.get_validations().items() if v])) return render(request, template, context, status=422) # make sure the sheet is not already in the databse num_matches = SequencingSampleSheet.objects.filter( md5=sheet_obj.md5).count() if num_matches > 0: context[ 'message'] = "ERROR: This exact samplesheet has already been uploaded, please re-assign the existing version instead." return render(request, template, context, status=409) # try to put the samplesheet into the database try: sheet_instance = SequencingSampleSheet.objects.create( run_id=SequencingRun.objects.get(run_id=run_id), file=sheet, md5=sheet_obj.md5, host=sheet_obj.meta.get('Sheet_host', '')) except: context[ 'message'] = "ERROR: Samplesheet could not be imported into the database." return render(request, template, context, status=422) # try to import samples from the samplesheet try: all_created = [] not_created = [] for record in sheet_obj.flatten(): instance, created = SequencingSample.objects.get_or_create( run_id=SequencingRun.objects.get(run_id=run_id), sample=record.get('Sample_ID', ''), sample_name=record.get('Sample_Name', ''), # paired_normal = record.get('Paired_Normal',''), # get this separately! i7_index=record.get('I7_Index_ID', ''), index=record.get('index', ''), sample_project=record.get('Sample_Project', ''), description=record.get('Description', ''), genome_folder=record.get('GenomeFolder', ''), samplesheet=sheet_instance) if created: all_created.append((instance, created)) if not created: not_created.append((instance, created)) context[ 'message'] = "The following samples from the samplesheet were successfully imported:\n{0}\n\nThe following samples were not successfully imported:\n{1}".format( '\n'.join([i[0].__str__() for i in all_created]), '\n'.join([i[0].__str__() for i in not_created])) return render(request, template, context) except: context[ 'message'] = "ERROR: An error occured while importing samples from the samplesheet into the database." return render(request, template, context, status=422) else: return render(request, template, context)
#!/usr/bin/env python # -*- coding: utf-8 -*- """ Mark a run's output as 'passed' by placing a symlink in the output directory """ import sys import os from util import samplesheet samplesheet_file = sys.argv[1] outputDir = os.path.join("output", "reads") sheet_obj = samplesheet.IEMFile(path=samplesheet_file) sample_projects = list( set([ sample['Sample_Project'] for sample in sheet_obj.data['Data']['Samples'] ])) for i, sample_project in enumerate(sample_projects): src = os.path.join(outputDir, sample_project) dest = os.path.join(outputDir, "passed{0}".format(i)) print(">>> Linking {0} to {1}".format(src, dest)) if os.path.exists(dest): os.unlink(dest) os.symlink(sample_project, dest)