Esempio n. 1
0
def get_sampleIDs_from_samplesheet(iem_file):
    """
    Get a list of sample IDs from an IEM formatted samplesheet (SampleSheet.csv)
    """
    sheet = samplesheet.IEMFile(path=iem_file)
    sampleIDs = [s['Sample_ID'] for s in sheet.data['Data']['Samples']]
    return (sampleIDs)
Esempio n. 2
0
def load_samplesheet_data(samplesheet_file):
    """

    Examples
    --------
    Example usage::

        x = "SampleSheet.csv"
        run_settings, run_samples = load_samplesheet_data(x)

    """
    data = samplesheet.IEMFile(path=samplesheet_file)
    run_settings = {}
    run_settings.update(data.data['Reads'])
    run_settings.update(data.data['Header'])
    run_settings.update(data.data['Settings'])
    run_samples = data.data['Data']['Samples']
    return ((run_settings, run_samples))
Esempio n. 3
0
import os
import sys
import csv
from util import samplesheet
from util import find
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "tuco.settings")
import django
django.setup()
from lims.models import SequencingSampleSheet


sheets_dir = os.path.realpath(sys.argv[1])

#  find all the samplesheets in the dir
for sheet_file in find.find(search_dir = sheets_dir, inclusion_patterns = ['SampleSheet.csv']):
    sheet = samplesheet.IEMFile(path = os.path.realpath(sheet_file))
    seqtype_file = os.path.join(os.path.dirname(sheet_file), 'seqtype.txt')
    Run_ID = os.path.basename(os.path.dirname(sheet_file))

    # load seqtype
    if os.path.exists(seqtype_file):
        with open(seqtype_file) as f:
            lines = f.readlines()
            Seq_Type = lines[0].strip()

            # get the samplesheet entry from database
            sheet_instance = SequencingSampleSheet.objects.get(md5 = sheet.meta['Sheet_md5'])

            # check if it already has seq_type..
            if not sheet_instance.seq_type or sheet_instance.seq_type == '' and Seq_Type != '':
                sheet_instance.seq_type = Seq_Type
Esempio n. 4
0
import os
import sys
import csv
from util import samplesheet
from util import find
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "tuco.settings")
import django
django.setup()
from lims.models import SequencingRun, SequencingSampleSheet

sheets_dir = os.path.realpath(sys.argv[1])

#  find all the samplesheets in the dir
for sheet_file in find.find(search_dir=sheets_dir,
                            inclusion_patterns=['SampleSheet.csv']):
    sheet = samplesheet.IEMFile(path=sheet_file)
    Run_ID = os.path.basename(os.path.dirname(sheet_file))
    seqtype_file = os.path.join(os.path.dirname(sheet_file), 'seqtype.txt')

    # register the SampleSheet in the database if its not there already
    SequencingSampleSheet.objects.get_or_create(
        run_id=SequencingRun.objects.get(run_id=Run_ID),
        path=os.path.realpath(sheet.path),
        md5=sheet.md5,
        host=sheet.meta.get('Sheet_host', ''))
    # IEMFileVersion = record.get('IEMFileVersion',''),
    # Investigator_Name = record.get('Investigator_Name',''), # Investigator Name
    # Project_Name = record.get('Project_Name',''), # Project Name
    # Experiment_Name = record.get('Experiment_Name',''), # Experiment Name
    # Date = record.get('Date',''),
    # Workflow = record.get('Workflow',''),
Esempio n. 5
0
def samplesheet_upload(request):
    template = 'lims/samplesheet_upload.html'
    form = SequencingSampleSheetForm()
    message = ''
    context = {'form': form, 'message': message}
    if request.method == 'POST' and request.FILES['samplesheet']:
        # print(dir(request))
        run_id = request.POST['run_id']
        sheet = request.FILES['samplesheet']

        # set up temp storage location for file; needed for samplesheet module read methods..
        # MEDIA_TMP/<timestamp>/filename
        fs = FileSystemStorage()
        tmp_save_path = os.path.join(
            settings.MEDIA_TMP, datetime.datetime.strftime(now(), '%Y-%m-%d'),
            sheet.name)

        # save file to disk at location
        filename = fs.save(tmp_save_path, sheet)
        uploaded_file_url = fs.url(filename)

        # try to parse the file as a SampleSheet.csv file
        try:
            sheet_obj = samplesheet.IEMFile(path=os.path.realpath(filename))
        except:
            context[
                'message'] = "ERROR: Samplesheet file could not be parsed. Is it in the correct format?"
            return render(request, template, context, status=422)

        # validate the contents of the samplesheet
        try:
            sheet_obj.isValid(_raise=True)
        except:
            context[
                'message'] = "ERROR: Samplesheet contains errors;\n{0}".format(
                    json.dumps([{
                        k: v
                    } for k, v, in sheet_obj.get_validations().items() if v]))
            return render(request, template, context, status=422)

        # make sure the sheet is not already in the databse
        num_matches = SequencingSampleSheet.objects.filter(
            md5=sheet_obj.md5).count()
        if num_matches > 0:
            context[
                'message'] = "ERROR: This exact samplesheet has already been uploaded, please re-assign the existing version instead."
            return render(request, template, context, status=409)

        # try to put the samplesheet into the database
        try:
            sheet_instance = SequencingSampleSheet.objects.create(
                run_id=SequencingRun.objects.get(run_id=run_id),
                file=sheet,
                md5=sheet_obj.md5,
                host=sheet_obj.meta.get('Sheet_host', ''))
        except:
            context[
                'message'] = "ERROR: Samplesheet could not be imported into the database."
            return render(request, template, context, status=422)

        # try to import samples from the samplesheet
        try:
            all_created = []
            not_created = []
            for record in sheet_obj.flatten():
                instance, created = SequencingSample.objects.get_or_create(
                    run_id=SequencingRun.objects.get(run_id=run_id),
                    sample=record.get('Sample_ID', ''),
                    sample_name=record.get('Sample_Name', ''),
                    # paired_normal = record.get('Paired_Normal',''), # get this separately!
                    i7_index=record.get('I7_Index_ID', ''),
                    index=record.get('index', ''),
                    sample_project=record.get('Sample_Project', ''),
                    description=record.get('Description', ''),
                    genome_folder=record.get('GenomeFolder', ''),
                    samplesheet=sheet_instance)
                if created:
                    all_created.append((instance, created))
                if not created:
                    not_created.append((instance, created))
            context[
                'message'] = "The following samples from the samplesheet were successfully imported:\n{0}\n\nThe following samples were not successfully imported:\n{1}".format(
                    '\n'.join([i[0].__str__() for i in all_created]),
                    '\n'.join([i[0].__str__() for i in not_created]))
            return render(request, template, context)
        except:
            context[
                'message'] = "ERROR: An error occured while importing samples from the samplesheet into the database."
            return render(request, template, context, status=422)
    else:
        return render(request, template, context)
Esempio n. 6
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Mark a run's output as 'passed' by placing a symlink in the output directory
"""
import sys
import os
from util import samplesheet

samplesheet_file = sys.argv[1]
outputDir = os.path.join("output", "reads")

sheet_obj = samplesheet.IEMFile(path=samplesheet_file)

sample_projects = list(
    set([
        sample['Sample_Project']
        for sample in sheet_obj.data['Data']['Samples']
    ]))

for i, sample_project in enumerate(sample_projects):
    src = os.path.join(outputDir, sample_project)
    dest = os.path.join(outputDir, "passed{0}".format(i))
    print(">>> Linking {0} to {1}".format(src, dest))
    if os.path.exists(dest):
        os.unlink(dest)
    os.symlink(sample_project, dest)