コード例 #1
0
def main(lims, args, epp_logger):
    p = Process(lims, id=args.pid)
    udf_check = 'Conc. Units'
    value_check = 'ng/ul'
    concentration_udf = 'Concentration'
    size_udf = 'Size (bp)'

    if args.aggregate:
        artifacts = p.all_inputs(unique=True)
    else:
        all_artifacts = p.all_outputs(unique=True)
        artifacts = filter(lambda a: a.output_type == "ResultFile",
                           all_artifacts)

    correct_artifacts, no_concentration = check_udf_is_defined(
        artifacts, concentration_udf)
    correct_artifacts, no_size = check_udf_is_defined(correct_artifacts,
                                                      size_udf)
    correct_artifacts, wrong_value = check_udf_has_value(
        correct_artifacts, udf_check, value_check)

    apply_calculations(lims, correct_artifacts, concentration_udf, size_udf,
                       udf_check, epp_logger)

    d = {
        'ca': len(correct_artifacts),
        'ia': len(wrong_value) + len(no_size) + len(no_concentration)
    }

    abstract = ("Updated {ca} artifact(s), skipped {ia} artifact(s) with "
                "wrong and/or blank values for some udfs.").format(**d)

    print >> sys.stderr, abstract  # stderr will be logged and printed in GUI
コード例 #2
0
def main(lims,args,epp_logger):
    p = Process(lims,id = args.pid)
    udf_factor1 = 'Concentration (ng/ul)'
    result_udf = 'Concentration nM'
    udf_factor2 = 'Size (bp)'

    if args.aggregate:
        artifacts = p.all_inputs(unique=True)
    else:
        all_artifacts = p.all_outputs(unique=True)
        artifacts = filter(lambda a: a.output_type == "Analyte", all_artifacts)

#    print rrtifacts
    correct_artifacts, wrong_factor1 = check_udf_is_defined(artifacts, udf_factor1)
    correct_artifacts, wrong_factor2 = check_udf_is_defined(correct_artifacts, udf_factor2)

    f = open(args.res, "a")

    if correct_artifacts:
        apply_calculations(lims, correct_artifacts, udf_factor1,
                           udf_factor2, result_udf, epp_logger, f)
    
    f.close()


    d = {'ca': len(correct_artifacts),
         'ia': len(wrong_factor1)+ len(wrong_factor2) }

    abstract = ("Updated {ca} artifact(s), skipped {ia} artifact(s) with "
                "wrong and/or blank values for some udfs.").format(**d)

    print >> sys.stderr, abstract # stderr will be logged and printed in GUI
コード例 #3
0
def main(lims, args, epp_logger):
    p = Process(lims, id=args.pid)
    udf_check = "Conc. Units"
    value_check = "ng/ul"
    concentration_udf = "Concentration"
    size_udf = "Size (bp)"

    if args.aggregate:
        artifacts = p.all_inputs(unique=True)
    else:
        all_artifacts = p.all_outputs(unique=True)
        artifacts = filter(lambda a: a.output_type == "ResultFile", all_artifacts)

    correct_artifacts, no_concentration = check_udf_is_defined(artifacts, concentration_udf)
    correct_artifacts, no_size = check_udf_is_defined(correct_artifacts, size_udf)
    correct_artifacts, wrong_value = check_udf_has_value(correct_artifacts, udf_check, value_check)

    apply_calculations(lims, correct_artifacts, concentration_udf, size_udf, udf_check, epp_logger)

    d = {"ca": len(correct_artifacts), "ia": len(wrong_value) + len(no_size) + len(no_concentration)}

    abstract = (
        "Updated {ca} artifact(s), skipped {ia} artifact(s) with " "wrong and/or blank values for some udfs."
    ).format(**d)

    print >>sys.stderr, abstract  # stderr will be logged and printed in GUI
コード例 #4
0
def main(lims, args, epp_logger):
    pro = Process(lims, id=args.pid)
    source_udf = 'Reference genome'
    destination_udf = 'Reference Genome'

    artifacts = pro.all_inputs(unique=True)
    projects = all_projects_for_artifacts(artifacts)

    correct_projects, incorrect_udf = check_udf_is_defined(
        projects, source_udf)
    correct_samples = filter_samples(artifacts, correct_projects)

    session = Session(pro, source_udf, destination_udf)
    session.copy_main(correct_samples)

    if len(incorrect_udf) == 0:
        warning = "no projects"
    else:
        warning = "WARNING: skipped {0} project(s)".format(len(incorrect_udf))

    d = {'cs': len(correct_samples), 'warning': warning}

    abstract = (
        "Updated {cs} sample(s), {warning} with incorrect udf info.").format(
            **d)

    print >> sys.stderr, abstract  # stderr will be logged and printed in GUI
コード例 #5
0
def samplesheet_pool_samples(lims, process_id, output_file):
    """Create manual pipetting samplesheet for pooling samples."""
    process = Process(lims, id=process_id)

    # print header
    output_file.write('Sample\tContainer\tWell\tPool\n')

    # Get all input artifact and store per container
    input_containers = {}
    for input_artifact in process.all_inputs(resolve=True):
        container = input_artifact.location[0].name
        well = ''.join(input_artifact.location[1].split(':'))

        if container not in input_containers:
            input_containers[container] = {}

        input_containers[container][well] = input_artifact

    # print pool scheme per input artifact
    # sort on container and well
    for input_container in sorted(input_containers.keys()):
        input_artifacts = input_containers[input_container]
        for well in clarity_epp.export.utils.sort_96_well_plate(
                input_artifacts.keys()):
            output_file.write('{sample}\t{container}\t{well}\t{pool}\n'.format(
                sample=input_artifacts[well].name,
                container=input_artifacts[well].location[0].name,
                well=well,
                pool=process.outputs_per_input(input_artifacts[well].id,
                                               Analyte=True)[0].name))
コード例 #6
0
def main(lims,args,epp_logger):
    pro = Process(lims,id = args.pid)
    source_udf = 'Reference genome'
    destination_udf = 'Reference Genome'

    artifacts = pro.all_inputs(unique=True)
    projects = all_projects_for_artifacts(artifacts)

    correct_projects, incorrect_udf = check_udf_is_defined(projects, source_udf)
    correct_samples = filter_samples(artifacts, correct_projects)

    session = Session(pro, source_udf, destination_udf)
    session.copy_main(correct_samples)

    if len(incorrect_udf) == 0:
        warning = "no projects"
    else:
        warning = "WARNING: skipped {0} project(s)".format(len(incorrect_udf))

    d = {'cs': len(correct_samples),
         'warning' : warning
    }

    abstract = ("Updated {cs} sample(s), {warning} with incorrect udf info.").format(**d)

    print >> sys.stderr, abstract # stderr will be logged and printed in GUI
コード例 #7
0
def samplesheet_dilute_library_pool(lims, process_id, output_file):
    """Create manual pipetting samplesheet for sequencing pools."""
    output_file.write('Sample\tContainer\tWell\tul Sample\tul EB\n')
    process = Process(lims, id=process_id)

    output = []  # save pool data to list, to be able to sort on pool number.
    nM_pool = process.udf['Dx Pool verdunning (nM)']
    output_ul = process.udf['Eindvolume (ul)']

    for input in process.all_inputs():
        search_number = re.search(r'Pool #(\d+)_', input.name)
        if search_number:
            input_number = int(search_number.group(1))
        else:
            input_number = 0
        qc_artifact = input.input_artifact_list()[0]

        size = float(qc_artifact.udf['Dx Fragmentlengte (bp)'])
        concentration = float(
            qc_artifact.udf['Dx Concentratie fluorescentie (ng/ul)'])

        nM_dna = (concentration * 1000 * (1 / 660.0) * (1 / size)) * 1000
        ul_sample = (nM_pool / nM_dna) * output_ul
        ul_EB = output_ul - ul_sample

        line = '{pool_name}\t{container}\t{well}\t{ul_sample:.2f}\t{ul_EB:.2f}\n'.format(
            pool_name=input.name,
            container=input.location[0].name,
            well=input.location[1],
            ul_sample=ul_sample,
            ul_EB=ul_EB)
        output.append((input_number, line))

    for number, line in sorted(output):
        output_file.write(line)
コード例 #8
0
def main(lims, args, epp_logger):
    p = Process(lims, id=args.pid)
    udf_check = "Conc. Units"
    value_check = "ng/ul"
    udf_factor1 = "Concentration"
    udf_factor2 = "Volume (ul)"
    result_udf = "Amount (ng)"

    if args.aggregate:
        artifacts = p.all_inputs(unique=True)
    else:
        all_artifacts = p.all_outputs(unique=True)
        artifacts = filter(lambda a: a.output_type == "ResultFile", all_artifacts)

    correct_artifacts, wrong_factor1 = check_udf_is_defined(artifacts, udf_factor1)
    correct_artifacts, wrong_factor2 = check_udf_is_defined(correct_artifacts, udf_factor2)

    correct_artifacts, wrong_value = check_udf_has_value(correct_artifacts, udf_check, value_check)

    if correct_artifacts:
        apply_calculations(lims, correct_artifacts, udf_factor1, "*", udf_factor2, result_udf, epp_logger, p)

    d = {"ca": len(correct_artifacts), "ia": len(wrong_factor1) + len(wrong_factor2) + len(wrong_value)}

    abstract = (
        "Updated {ca} artifact(s), skipped {ia} artifact(s) with " "wrong and/or blank values for some udfs."
    ).format(**d)

    print >>sys.stderr, abstract  # stderr will be logged and printed in GUI
コード例 #9
0
ファイル: email.py プロジェクト: UMCUGenetics/clarity_epp
def sequencing_run(lims, email_settings, process_id):
    process = Process(lims, id=process_id)
    artifact = process.all_inputs()[0]

    subject = "LIMS QC Controle - {0}".format(artifact.name)

    message = "Sequencing Run: {0}\n".format(artifact.name)
    message += "Technician: {0}\n".format(process.technician.name)
    message += "LIMS Next Action: {0}\n\n".format(
        process.step.actions.next_actions[0]['action'])

    message += "UDF - Conversie rapport OK?: {0}\n".format(
        process.udf['Conversie rapport OK?'])
    if 'Fouten registratie (uitleg)' in process.udf:
        message += "UDF - Fouten registratie (uitleg): {0}\n".format(
            process.udf['Fouten registratie (uitleg)'])
    if 'Fouten registratie (oorzaak)' in process.udf:
        message += "UDF - Fouten registratie (oorzaak): {0}\n".format(
            process.udf['Fouten registratie (uitleg)'])

    if process.step.actions.escalation:
        message += "\nManager Review LIMS:\n"
        message += "{0}: {1}\n".format(
            process.step.actions.escalation['author'].name,
            process.step.actions.escalation['request'])
        message += "{0}: {1}\n".format(
            process.step.actions.escalation['reviewer'].name,
            process.step.actions.escalation['answer'])

    send_email(email_settings['server'], email_settings['from'],
               email_settings['to_sequencing_run_complete'], subject, message)
コード例 #10
0
def main(lims, args):

    p = Process(lims, id=args.pid)
    log = []
    datamap = {}
    wsname = None
    username = "******".format(p.technician.first_name,
                                p.technician.last_name)
    user_email = p.technician.email
    for art in p.all_inputs():
        if len(art.samples) != 1:
            log.append(
                "Warning : artifact {0} has more than one sample".format(
                    art.id))
        for sample in art.samples:
            #take care of lamda DNA
            if sample.project:
                if sample.project.id not in datamap:
                    datamap[sample.project.id] = [sample.name]
                else:
                    datamap[sample.project.id].append(sample.name)

    for art in p.all_outputs():
        try:
            wsname = art.location[0].name
            break
        except:
            pass

    now = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
    for pid in datamap:
        pj = Project(lims, id=pid)
        if len(datamap[pid]) > 1:
            rnt = "{0} samples planned for {1}".format(len(datamap[pid]),
                                                       wsname)
        else:
            rnt = "{0} sample planned for {1}".format(len(datamap[pid]),
                                                      wsname)

        running_note = {
            "note": rnt,
            "user": username,
            "email": user_email,
            "category": "Workset"
        }
        write_note_to_couch(pid, now, running_note, lims.get_uri())
        log.append(
            "Updated project {0} : {1}, {2} samples in this workset".format(
                pid, pj.name, len(datamap[pid])))

    with open("EPP_Notes.log", "w") as flog:
        flog.write("\n".join(log))
    for out in p.all_outputs():
        #attach the log file
        if out.name == "RNotes Log":
            attach_file(os.path.join(os.getcwd(), "EPP_Notes.log"), out)

    sys.stderr.write("Updated {0} projects successfully".format(
        len(list(datamap.keys()))))
コード例 #11
0
def main(lims, args):
    process = Process(lims, id=args.p)
    duplicates = get_duplicate_samples(process.all_inputs())

    if duplicates:
        sys.exit('Samples: ' + ', '.join(duplicates) +
                 ' appeared more than once in this step.')
    else:
        print >> sys.stderr, 'No duplicated samples!'
コード例 #12
0
ファイル: pool.py プロジェクト: UMCUGenetics/clarity_epp
def unpooling(lims, process_id):
    """Unpool samples after sequencing."""
    process = Process(lims, id=process_id)

    pool_artifact = process.all_inputs()[0]
    pool_artifact_parent_process = pool_artifact.parent_process
    pool_artifact_demux = lims.get(pool_artifact.uri + '/demux')

    run_id = pool_artifact.name  # Assume run id is set as pool name using placement/artifact/set_runid_name
    sample_artifacts = []  # sample artifacts before pooling
    sample_projects = {}

    for artifact in pool_artifact_parent_process.result_files():
        if (artifact.name == 'SampleSheet csv'
                or artifact.name == 'Sample Sheet') and artifact.files:
            file_id = artifact.files[0].id
            sample_sheet = lims.get_file_contents(id=file_id)
            project_index = None
            sample_index = None
            for line in sample_sheet.split('\n'):
                data = line.rstrip().split(',')

                if 'Sample_Project' in data and 'Sample_ID' in data:
                    project_index = data.index('Sample_Project')
                    sample_index = data.index('Sample_ID')
                elif project_index and len(data) >= project_index:
                    sample_projects[data[sample_index]] = data[project_index]

    for node in pool_artifact_demux.getiterator('artifact'):
        if node.find('samples'):
            if len(node.find('samples').findall('sample')) == 1:
                sample_artifact = Artifact(lims, uri=node.attrib['uri'])
                sample = sample_artifact.samples[0]  # 1 sample per artifact.

                # Get sample sequencing run and project from samplesheet
                sample_artifact.udf['Dx Sequencing Run ID'] = run_id
                if 'Sample Type' in sample.udf and 'library' in sample.udf[
                        'Sample Type']:  # Use sample.name for external (clarity_portal) samples
                    sample_artifact.udf[
                        'Dx Sequencing Run Project'] = sample_projects[
                            sample.name]
                else:  # Use sample_artifact.name for Dx samples (upload via Helix)
                    sample_artifact.udf[
                        'Dx Sequencing Run Project'] = sample_projects[
                            sample_artifact.name]
                sample_artifact.put()

                if sample_artifact.samples[
                        0].project and sample_artifact.samples[0].project.udf[
                            'Application'] == 'DX':  # Only move DX production samples to post sequencing workflow
                    sample_artifacts.append(sample_artifact)

    lims.route_artifacts(sample_artifacts,
                         workflow_uri=Workflow(
                             lims, id=config.post_sequencing_workflow).uri)
コード例 #13
0
def samplesheet_normalization(lims, process_id, output_file):
    """Create manual pipetting samplesheet for normalizing (MIP) samples."""
    output_file.write(
        'Sample\tConcentration (ng/ul)\tVolume sample (ul)\tVolume water (ul)\tOutput (ng)\tIndampen\n'
    )
    process = Process(lims, id=process_id)

    # Find all QC process types
    qc_process_types = clarity_epp.export.utils.get_process_types(
        lims, ['Dx Qubit QC', 'Dx Tecan Spark 10M QC'])

    for input_artifact in process.all_inputs(resolve=True):
        artifact = process.outputs_per_input(
            input_artifact.id,
            Analyte=True)[0]  # assume one artifact per input
        sample = input_artifact.samples[
            0]  # asume one sample per input artifact

        # Find last qc process for artifact
        qc_process = sorted(
            lims.get_processes(type=qc_process_types,
                               inputartifactlimsid=input_artifact.id),
            key=lambda process: int(process.id.split('-')[-1]))[-1]

        # Find concentration measurement
        for qc_artifact in qc_process.outputs_per_input(input_artifact.id):
            if qc_artifact.name.split(' ')[0] == artifact.name:
                concentration = float(
                    qc_artifact.udf['Dx Concentratie fluorescentie (ng/ul)'])

        final_volume = float(artifact.udf['Dx Eindvolume (ul)'])
        input_ng = float(artifact.udf['Dx Input (ng)'])
        if 'Dx pipetteervolume (ul)' in artifact.udf:
            input_ng = concentration * float(
                artifact.udf['Dx pipetteervolume (ul)'])
        sample_volume = input_ng / concentration
        water_volume = final_volume - sample_volume
        evaporate = 'N'

        if sample_volume < 0.5:
            sample_volume = 0.5
            water_volume = final_volume - sample_volume
        elif sample_volume > final_volume:
            evaporate = 'J'
            water_volume = 0

        output_file.write(
            '{sample}\t{concentration:.1f}\t{sample_volume:.1f}\t{water_volume:.1f}\t{output:.1f}\t{evaporate}\n'
            .format(sample=sample.name,
                    concentration=concentration,
                    sample_volume=sample_volume,
                    water_volume=water_volume,
                    output=input_ng,
                    evaporate=evaporate))
コード例 #14
0
def main(lims, args):

    p=Process(lims, id=args.pid)
    log=[]
    datamap={}
    wsname=None
    username="******".format(p.technician.first_name, p.technician.last_name)
    user_email=p.technician.email
    for art in p.all_inputs():
        if len(art.samples)!=1:
            log.append("Warning : artifact {0} has more than one sample".format(art.id))
        for sample in art.samples:
           #take care of lamda DNA
           if sample.project:
                if sample.project.id not in datamap:
                    datamap[sample.project.id]=[sample.name]
                else:
                    datamap[sample.project.id].append(sample.name)

    for art in p.all_outputs():
        try:
            wsname=art.location[0].name
            break
        except:
            pass

    now=datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
    for pid in datamap:
        pj=Project(lims, id=pid)
        running_notes=json.loads(pj.udf['Running Notes'])
        if len(datamap[pid]) > 1:
            rnt="{0} samples planned for {1}".format(len(datamap[pid]), wsname)
        else:
            rnt="{0} sample planned for {1}".format(len(datamap[pid]), wsname)

        running_notes[now]={"note": rnt, "user" : username, "email":user_email, "category":"Workset"}

        pj.udf['Running Notes']=json.dumps(running_notes)
        pj.put()
        log.append("Updated project {0} : {1}, {2} samples in this workset".format(pid,pj.name, len(datamap[pid])))


 
    with open("EPP_Notes.log", "w") as flog:
        flog.write("\n".join(log))
    for out in p.all_outputs():
        #attach the log file
        if out.name=="RNotes Log":
            attach_file(os.path.join(os.getcwd(), "EPP_Notes.log"), out)

    sys.stderr.write("Updated {0} projects successfully".format(len(datamap.keys())))
コード例 #15
0
    def get(self):
        data = {}
        lims_url = self.request.query
        lims_id = "24-{}".format(lims_url.split("/")[-1])
        mylims = lims.Lims(BASEURI, USERNAME, PASSWORD)
        try:
            p = Process(mylims, id=lims_id)
            if p.type.name != 'Setup Workset/Plate':
                raise Exception("Wrong process type")
        except:
            self.set_status(
                400, reason="Wrong process type : use a Setup Workset/Plate")
            self.finish()

        data['comments'] = {}
        data['samples'] = {}
        for i in p.all_inputs():
            sample_name = i.samples[0].name
            if not i.samples[0].project:
                continue
            else:
                project = i.samples[0].project
            if 'Project Comment' in project.udf and project.id not in data[
                    'comments']:
                data['comments'][project.id] = project.udf['Project Comment']
            data['samples'][sample_name] = {}
            data['samples'][sample_name]['amount'] = i.udf['Amount (ng)']
            data['samples'][sample_name]['previous_preps'] = {}
            if 'Library construction method' in project.udf:
                data['samples'][sample_name]['lib_method'] = project.udf[
                    'Library construction method']
            if 'Sequencing platform' in project.udf:
                data['samples'][sample_name]['seq_pl'] = project.udf[
                    'Sequencing platform']
            other_preps = mylims.get_processes(inputartifactlimsid=i.id,
                                               type="Setup Workset/Plate")
            for op in other_preps:
                if op.id != p.id:
                    for o in op.all_outputs():
                        if o.type == "Analyte" and o.samples[
                                0].name == sample_name:
                            data['samples'][sample_name]['previous_preps'][
                                o.location[0].name] = {}
                            data['samples'][sample_name]['previous_preps'][
                                o.location[0].name]['position'] = o.location[1]
                            data['samples'][sample_name]['previous_preps'][
                                o.location[0].
                                name]['amount'] = o.udf['Amount taken (ng)']

        self.set_header("Content-type", "application/json")
        self.write(json.dumps(data))
コード例 #16
0
def samplesheet_dilute(lims, process_id, output_file):
    """Create Caliper samplesheet for diluting samples."""
    output_file.write(
        'Monsternummer\tPlate_Id_input\tWell\tPlate_Id_output\tPipetteervolume DNA (ul)\tPipetteervolume H2O (ul)\n'
    )
    process = Process(lims, id=process_id)

    output = {}  # save output data to dict, to be able to sort on well.
    nM_pool = process.udf['Dx Pool verdunning (nM)']
    output_ul = process.udf['Eindvolume (ul)']

    for input_artifact in process.all_inputs():
        output_artifact = process.outputs_per_input(input_artifact.id,
                                                    Analyte=True)[0]

        # Get QC stats
        size = float(input_artifact.udf['Dx Fragmentlengte (bp)'])
        concentration = float(
            input_artifact.udf['Dx Concentratie fluorescentie (ng/ul)'])

        # Calculate dilution
        nM_dna = (concentration * 1000 * (1 / 660.0) * (1 / size)) * 1000
        ul_sample = (nM_pool / nM_dna) * output_ul
        ul_water = output_ul - ul_sample

        # Store output lines by well
        well = ''.join(input_artifact.location[1].split(':'))
        output[
            well] = '{name}\t{plate_id_input}\t{well}\t{plate_id_output}\t{volume_dna:.1f}\t{volume_water:.1f}\n'.format(
                name=input_artifact.name,
                plate_id_input=input_artifact.location[0].name,
                well=well,
                plate_id_output=output_artifact.location[0].name,
                volume_dna=ul_sample,
                volume_water=ul_water)

    wells = []
    for col in range(1, 13):
        wells.extend([
            '{}{}'.format(row, str(col)) for row in string.ascii_uppercase[:8]
        ])

    for well in wells:
        if well in output:
            output_file.write(output[well])
        else:
            output_file.write(
                'Leeg\tNone\t{well}\t{plate_id_output}\t0\t0\n'.format(
                    well=well,
                    plate_id_output=output_artifact.location[0].name,
                ))
コード例 #17
0
def main(lims, args, epp_logger):
    p = Process(lims, id=args.pid)
    target_concentration = float(args.targetConcentration)
    target_volume = float(args.targetVolume)
    threshold_conc_no_normalize = float(args.thresholdConcNoNormalize)

    with open(args.newCsvFilename, 'w', newline='') as csvfile:
        pass

    samples_in = p.all_inputs(unique=True)

    well_re = re.compile("([A-Z]):*([0-9]{1,2})")
    samples_in.sort(
        key=lambda sample: sort_samples_columnwise(sample, well_re)
    )  # wrap the call in a lambda to be able to pass in the regex

    if args.concOnOutput:
        samples = [find_output_artifact(s.name, p)
                   for s in samples_in]  # required for the WGS step
    else:
        samples = samples_in

    print(samples)

    for i, sample in enumerate(samples):
        if not args.concOnOutput and sample.type != "Analyte":
            # if 16S, only work on analytes (not result files)
            # but WGS should work on result files
            continue
        concentration = get_udf_if_exists(sample,
                                          args.concentrationUDF,
                                          default=None)
        if concentration is not None:
            concentration = float(concentration)
            sample_required, water_required = calculate_volumes_required(
                concentration, target_concentration, target_volume,
                threshold_conc_no_normalize, is_control(sample.name))
            sample_required = format_volume(sample_required)
            water_required = format_volume(water_required)
        else:
            raise RuntimeError("Could not find UDF '%s' of sample '%s'" %
                               (args.concentrationUDF, sample.name))
        well = samples_in[i].location[1].split(':')
        well = ''.join(well)

        with open(args.newCsvFilename, 'a') as csvfile:
            csv_writer = csv.writer(csvfile, delimiter='\t')
            csv_writer.writerow([well, water_required, sample_required])
コード例 #18
0
def samplesheet_pool_magnis_pools(lims, process_id, output_file):
    """Create manual pipetting samplesheet for pooling magnis pools. Correct for pools with < 8 samples"""
    process = Process(lims, id=process_id)

    # print header
    output_file.write('Pool\tContainer\tSample count\tVolume (ul)\n')

    # Get input pools, sort by name and print volume
    for input_artifact in sorted(process.all_inputs(resolve=True),
                                 key=lambda artifact: artifact.name):
        output_file.write(
            '{pool}\t{container}\t{sample_count}\t{volume}\n'.format(
                pool=input_artifact.name,
                container=input_artifact.container.name,
                sample_count=len(input_artifact.samples),
                volume=len(input_artifact.samples) * 1.25))
コード例 #19
0
def samplesheet_multiplex_sequence_pool(lims, process_id, output_file):
    """Create manual pipetting samplesheet for multiplex sequence pools."""

    process = Process(lims, id=process_id)
    input_pools = []
    total_sample_count = 0
    total_load_uL = 0

    for input_pool in process.all_inputs():
        input_pool_conc = float(
            input_pool.udf['Dx Concentratie fluorescentie (ng/ul)'])
        input_pool_size = float(input_pool.udf['Dx Fragmentlengte (bp)'])
        input_pool_nM = (input_pool_conc * 1000 * (1.0 / 660.0) *
                         (1 / input_pool_size)) * 1000
        input_pool_pM = (input_pool_nM * 1000) / 5

        input_pool_sample_count = 0

        for sample in input_pool.samples:
            if 'Dx Exoomequivalent' in sample.udf:
                input_pool_sample_count += sample.udf['Dx Exoomequivalent']
            else:
                input_pool_sample_count += 1
        total_sample_count += input_pool_sample_count
        input_pools.append({
            'name': input_pool.name,
            'nM': input_pool_nM,
            'pM': input_pool_pM,
            'sample_count': input_pool_sample_count
        })

    # print header
    output_file.write('Naam\tuL\n')

    # Last calcuations and print sample
    for input_pool in input_pools:
        input_pool_load_pM = (float(process.udf['Dx Laadconcentratie (pM)']) /
                              total_sample_count) * input_pool['sample_count']
        input_pool_load_uL = 150.0 / (input_pool['pM'] / input_pool_load_pM)
        total_load_uL += input_pool_load_uL
        output_file.write('{0}\t{1:.2f}\n'.format(input_pool['name'],
                                                  input_pool_load_uL))

    tris_HCL_uL = 150 - total_load_uL
    output_file.write('{0}\t{1:.2f}\n'.format('Tris-HCL', tris_HCL_uL))
コード例 #20
0
ファイル: artifact.py プロジェクト: UMCUGenetics/clarity_epp
def set_runid_name(lims, process_id):
    """Change artifact name to run id."""
    process = Process(lims, id=process_id)
    analyte = process.analytes()[0][0]
    input_artifact = process.all_inputs()[0]

    container_name = analyte.container.name

    # Find sequencing process
    # Assume one sequence process per input artifact
    for sequence_process_type in config.sequence_process_types:
        sequence_processes = lims.get_processes(
            type=sequence_process_type, inputartifactlimsid=input_artifact.id)
        for sequence_process in sequence_processes:
            if sequence_process.analytes(
            )[0][0].container.name == container_name:
                analyte.name = sequence_process.udf['Run ID']
                analyte.put()
コード例 #21
0
ファイル: workset_placement.py プロジェクト: Galithil/status
    def get(self):
        data={}
        lims_url=self.request.query
        lims_id="24-{}".format(lims_url.split("/")[-1])
        mylims = lims.Lims(BASEURI, USERNAME, PASSWORD)
        try:
            p=Process(mylims, id=lims_id)
            if p.type.name!='Setup Workset/Plate':
                raise Exception("Wrong process type")
        except:
            self.set_status(400, reason="Wrong process type : use a Setup Workset/Plate")
            self.finish()


        data['comments']={}
        data['samples']={}
        for i in p.all_inputs():
            sample_name=i.samples[0].name
            if not i.samples[0].project:
                continue
            else:
                project=i.samples[0].project
            if 'Project Comment' in project.udf and project.id not in data['comments']:
                data['comments'][project.id]=project.udf['Project Comment']
            data['samples'][sample_name]={}
            data['samples'][sample_name]['amount']=i.udf['Amount (ng)']
            data['samples'][sample_name]['previous_preps']={}
            if 'Library construction method' in project.udf:
                data['samples'][sample_name]['lib_method']=project.udf['Library construction method']
            if 'Sequencing platform' in project.udf:
                data['samples'][sample_name]['seq_pl']=project.udf['Sequencing platform']
            other_preps=mylims.get_processes(inputartifactlimsid=i.id, type="Setup Workset/Plate")
            for op in other_preps:
                if op.id != p.id:
                    for o in op.all_outputs():
                        if o.type=="Analyte" and o.samples[0].name==sample_name:
                            data['samples'][sample_name]['previous_preps'][o.location[0].name]={}
                            data['samples'][sample_name]['previous_preps'][o.location[0].name]['position']=o.location[1]
                            data['samples'][sample_name]['previous_preps'][o.location[0].name]['amount']=o.udf['Amount taken (ng)']


        self.set_header("Content-type", "application/json")
        self.write(json.dumps(data))
コード例 #22
0
def main(lims, args, epp_logger):
    p = Process(lims, id=args.pid)
    if p.type.name == 'Aggregate QC (Library Validation) 4.0':
        udf_check = 'Conc. Units'
        value_check = ['nM', 'pM']
        udf_factor1 = 'Concentration'
        udf_factor2 = 'Volume (ul)'
        result_udf = 'Amount (fmol)'
    else:
        udf_check = 'Conc. Units'
        value_check = ['ng/ul', 'ng/uL']
        udf_factor1 = 'Concentration'
        udf_factor2 = 'Volume (ul)'
        result_udf = 'Amount (ng)'

    if args.aggregate:
        artifacts = p.all_inputs(unique=True)
    else:
        all_artifacts = p.all_outputs(unique=True)
        artifacts = [a for a in all_artifacts if a.output_type == "ResultFile"]

    correct_artifacts, wrong_factor1 = check_udf_is_defined(
        artifacts, udf_factor1)
    correct_artifacts, wrong_factor2 = check_udf_is_defined(
        correct_artifacts, udf_factor2)

    correct_artifacts, wrong_value = check_udf_has_value(
        correct_artifacts, udf_check, value_check)

    if correct_artifacts:
        apply_calculations(lims, correct_artifacts, udf_factor1, '*',
                           udf_factor2, result_udf, epp_logger, p)

    d = {
        'ca': len(correct_artifacts),
        'ia': len(wrong_factor1) + len(wrong_factor2) + len(wrong_value)
    }

    abstract = ("Updated {ca} artifact(s), skipped {ia} artifact(s) with "
                "wrong and/or blank values for some udfs.").format(**d)

    print(abstract,
          file=sys.stderr)  # stderr will be logged and printed in GUI
コード例 #23
0
def samplesheet_mip_pool_dilution(lims, process_id, output_file):
    """Create manual pipetting samplesheet for smMIP pool dilution"""
    process = Process(lims, id=process_id)

    # Write header
    output_file.write(
        '{sample}\t{ul_sample_10}\t{ul_EB_10}\t{ul_sample_20}\t{ul_EB_20}\t{ul_sample_40}\t{ul_EB_40}\t\n'
        .format(
            sample='Sample',
            ul_sample_10='ul Sample (10 ul)',
            ul_EB_10='ul EB buffer (10 ul)',
            ul_sample_20='ul Sample (20 ul)',
            ul_EB_20='ul EB buffer (20 ul)',
            ul_sample_40='ul Sample (40 ul)',
            ul_EB_40='ul EB buffer (40 ul)',
        ))

    for input_artifact in process.all_inputs(resolve=True):
        concentration = float(
            input_artifact.udf['Dx Concentratie fluorescentie (ng/ul)'])
        fragment_length = float(input_artifact.udf['Dx Fragmentlengte (bp)'])

        dna = (concentration * (10.0**3.0 / 1.0) * (1.0 / 649.0) *
               (1.0 / fragment_length)) * 1000.0
        ul_sample = 2 / dna * 10
        ul_EB = 10 - ul_sample

        output_file.write(
            '{sample}\t{ul_sample_10:.2f}\t{ul_EB_10:.2f}\t{ul_sample_20:.2f}\t{ul_EB_20:.2f}\t{ul_sample_40:.2f}\t{ul_EB_40:.2f}\t\n'
            .format(
                sample=input_artifact.name,
                ul_sample_10=ul_sample,
                ul_EB_10=ul_EB,
                ul_sample_20=ul_sample * 2,
                ul_EB_20=ul_EB * 2,
                ul_sample_40=ul_sample * 4,
                ul_EB_40=ul_EB * 4,
            ))
コード例 #24
0
def main(lims, args, epp_logger):
    p = Process(lims, id=args.pid)

    with open(args.newCsvFilename, 'w', newline='') as csvfile:
        pass

    # the well location information is on the input samples,
    well_re = re.compile("([A-Z]):*([0-9]{1,2})")
    samples_in = p.all_inputs(unique=True)
    samples_in.sort(
        key=lambda sample: sort_samples_columnwise(sample, well_re)
    )  # wrap the call in a lambda to be able to pass in the regex

    if args.concOnOutput:
        samples = [find_output_artifact(s.name, p)
                   for s in samples_in]  # required in the WGS step
    else:
        samples = samples_in

    for i, sample in enumerate(samples):
        concentration = get_udf_if_exists(sample, args.concUdf, default=None)
        if concentration is not None:
            concentration = float(concentration)
        else:
            raise RuntimeError("Could not find UDF '%s' of sample '%s'" %
                               (args.concUdf, sample.name))

        if concentration == 0.0:
            concentration = 0.01

        well = samples_in[i].location[1].split(':')
        well = ''.join(well)

        with open(args.newCsvFilename, 'a') as csvfile:
            csv_writer = csv.writer(csvfile, delimiter=';')
            csv_writer.writerow([well, concentration])
コード例 #25
0
def main(lims, args, epp_logger):
    p = Process(lims, id=args.pid)
    udf_check = 'Conc. Units'
    value_check = 'ng/ul'
    udf_factor1 = 'Concentration'
    udf_factor2 = 'Volume (ul)'
    result_udf = 'Amount (ng)'

    if args.aggregate:
        artifacts = p.all_inputs(unique=True)
    else:
        all_artifacts = p.all_outputs(unique=True)
        artifacts = filter(lambda a: a.output_type == "ResultFile",
                           all_artifacts)

    correct_artifacts, wrong_factor1 = check_udf_is_defined(
        artifacts, udf_factor1)
    correct_artifacts, wrong_factor2 = check_udf_is_defined(
        correct_artifacts, udf_factor2)

    correct_artifacts, wrong_value = check_udf_has_value(
        correct_artifacts, udf_check, value_check)

    if correct_artifacts:
        apply_calculations(lims, correct_artifacts, udf_factor1, '*',
                           udf_factor2, result_udf, epp_logger, p)

    d = {
        'ca': len(correct_artifacts),
        'ia': len(wrong_factor1) + len(wrong_factor2) + len(wrong_value)
    }

    abstract = ("Updated {ca} artifact(s), skipped {ia} artifact(s) with "
                "wrong and/or blank values for some udfs.").format(**d)

    print >> sys.stderr, abstract  # stderr will be logged and printed in GUI
コード例 #26
0
 def process_samples(lims_process: Process) -> Generator[str, None, None]:
     """Retrieve LIMS input samples from a process."""
     for lims_artifact in lims_process.all_inputs():
         for lims_sample in lims_artifact.samples:
             yield lims_sample.id
コード例 #27
0
ファイル: workflow.py プロジェクト: UMCUGenetics/clarity_epp
def helix_magnis(lims, process_id, output_file):
    """Export workflow information in helix table format (Magnis Workflow)."""
    output_file.write((
        "meet_id\twerklijst_nummer\tonderzoeknr\tmonsternummer\tZuivering OK?\tZuivering herh?\tSampleprep OK?\t"
        "Sampleprep herh?\tSequencen OK?\tSequencen herh?\tBfx analyse OK?\tSamplenaam\tvcf-file\tCNV vcf-file\n"
    ))
    process = Process(lims, id=process_id)

    for artifact in process.all_inputs():
        for sample in artifact.samples:
            if 'Dx Werklijstnummer' in sample.udf:  # Only check samples with a 'Werklijstnummer'
                sample_artifacts = lims.get_artifacts(samplelimsid=sample.id,
                                                      type='Analyte')
                # Filter artifacts without parent_process
                sample_artifacts = [
                    sample_artifact for sample_artifact in sample_artifacts
                    if sample_artifact.parent_process
                ]
                # Sort artifact by parent process id
                sample_artifacts = sorted(
                    sample_artifacts,
                    key=lambda artifact: int(
                        artifact.parent_process.id.split('-')[-1]))

                sample_all_processes = {}
                # reset after 'Dx Sample registratie zuivering' process -> this is a new import from helix, should not be counted as a repeat
                sample_filter_processes = {}

                for sample_artifact in sample_artifacts:
                    if 'Dx Sample registratie zuivering' in sample_artifact.parent_process.type.name:
                        sample_filter_processes = {
                        }  # reset after new helix import
                    process_id = sample_artifact.parent_process.id
                    process_name = sample_artifact.parent_process.type.name

                    if process_name in sample_all_processes:
                        sample_all_processes[process_name].add(process_id)
                    else:
                        sample_all_processes[process_name] = set([process_id])

                    if process_name in sample_filter_processes:
                        sample_filter_processes[process_name].add(process_id)
                    else:
                        sample_filter_processes[process_name] = set(
                            [process_id])

                # Determine meetw
                repeat_cutoff = len(
                    sample.udf['Dx Werklijstnummer'].split(';')) * 2
                meetw_zui, meetw_zui_herh = determin_meetw(
                    config.meetw_zui_processes, sample_all_processes,
                    repeat_cutoff)
                meetw_sampleprep, meetw_sampleprep_herh = determin_meetw(
                    config.meetw_sampleprep_processes, sample_filter_processes,
                    2)
                meetw_seq, meetw_seq_herh = determin_meetw(
                    config.meetw_seq_processes, sample_filter_processes, 2)

                # Determine vcf files
                gatk_vcf = ''
                exomedepth_vcf = ''
                if 'Dx GATK vcf' in artifact.udf:
                    gatk_vcf = artifact.udf['Dx GATK vcf']
                if 'Dx ExomeDepth vcf' in artifact.udf:
                    exomedepth_vcf = artifact.udf['Dx ExomeDepth vcf']

                output_file.write((
                    "{meet_id}\t{werklijst}\t{onderzoeksnummer}\t{monsternummer}\t{meetw_zui}\t{meetw_zui_herh}\t"
                    "{meetw_sampleprep}\t{meetw_sampleprep_herh}\t{meetw_seq}\t{meetw_seq_herh}\t{meetw_bfx}\t"
                    "{sample_name}\t{vcf_file}\t{cnv_vcf_file}\n"
                ).format(
                    meet_id=sample.udf['Dx Meet ID'].split(';')[0],
                    werklijst=sample.udf['Dx Werklijstnummer'].split(';')[0],
                    onderzoeksnummer=sample.udf['Dx Onderzoeknummer'].split(
                        ';')[0],
                    monsternummer=sample.udf['Dx Monsternummer'],
                    meetw_zui=meetw_zui,
                    meetw_zui_herh=meetw_zui_herh,
                    meetw_sampleprep=meetw_sampleprep,
                    meetw_sampleprep_herh=meetw_sampleprep_herh,
                    meetw_seq=meetw_seq,
                    meetw_seq_herh=meetw_seq_herh,
                    meetw_bfx='J',
                    sample_name=get_sequence_name(sample),
                    vcf_file=gatk_vcf,
                    cnv_vcf_file=exomedepth_vcf,
                ))
コード例 #28
0
def main(lims, args, epp_logger):
    p = Process(lims, id=args.pid)

    new_workbook = xlwt.Workbook()
    new_sheet = new_workbook.add_sheet('Sheet 1')
    for col, heading in enumerate(fields.keys()):
        new_sheet.write(0, col, heading)

    well_re = re.compile("([A-Z]):*([0-9]{1,2})")
    artifacts = p.all_inputs(unique=True)
    artifacts.sort(
        key=lambda sample: sort_samples_columnwise(sample, well_re)
    )  # wrap the call in a lambda to be able to pass in the regex

    if args.udfsOnOutput:
        outputs = [find_output_artifact(s.name, p)
                   for s in artifacts]  # required for the WGS step

    for i, artifact in enumerate(artifacts):
        sample = artifact.samples[0]  # the original, submitted sample
        fields["Sample Name"] = artifact.name
        fields["Original DNA Plate LIMS ID"] = ""
        if artifact.location:
            fields["Container Name"] = artifact.location[0].name
            fields["Well"] = artifact.location[1]
        else:
            fields["Container Name"] = "Unknown Container"
            fields["Well"] = "Unknown Well"
        if sample.project:
            fields["Project"] = sample.project.name
        else:
            fields["Project"] = ""
        fields["Sample Origin"] = get_udf_if_exists(sample, "Sample Origin")
        fields["Sample Buffer"] = get_udf_if_exists(sample, "Sample Buffer")
        fields["Indexes"] = artifact.reagent_labels
        fields["PCR Method"] = ""
        if args.udfsOnOutput:
            udf_sample = outputs[
                i]  # use the equivalent output of the sample to find the UDF measurement
        else:
            udf_sample = artifact
        fields["QuantIt HS Concentration"] = get_udf_if_exists(
            udf_sample, "QuantIt HS Concentration")
        fields["QuantIt BR Concentration"] = get_udf_if_exists(
            udf_sample, "QuantIt BR Concentration")
        fields["Qubit Concentration"] = get_udf_if_exists(
            udf_sample, "Qubit Concentration")
        fields["Chosen Concentration"] = get_udf_if_exists(
            udf_sample, "Concentration")
        fields["QuantIt HS Concentration (nM)"] = get_udf_if_exists(
            udf_sample, "QuantIt HS Concentration (nM)")
        fields["QuantIt BR Concentration (nM)"] = get_udf_if_exists(
            udf_sample, "QuantIt BR Concentration (nM)")
        fields["Qubit Concentration (nM)"] = get_udf_if_exists(
            udf_sample, "Qubit Concentration (nM)")
        fields["Chosen Concentration (nM)"] = get_udf_if_exists(
            udf_sample, "Concentration (nM)")
        #for col, field in enumerate([sample_name, container, well, conc_hs, conc_br, conc_qb, conc_chosen]):
        for col, field in enumerate(fields.values()):
            style = get_field_style(field, float(args.redTextConcThreshold),
                                    float(args.orangeTextConcThreshold))
            new_sheet.write(i + 1, col, field, style)

    new_workbook.save(args.outputFile)
コード例 #29
0
def update_samplesheet(lims, process_id, artifact_id, output_file):
    """Update illumina samplesheet."""
    process = Process(lims, id=process_id)
    trim_last_base = True  # Used to set Read1EndWithCycle

    def get_project(projects, urgent=False):
        """Inner function to get a project name for samples."""
        if urgent:  # Sort projects for urgent samples on name
            projects = sorted(projects.items(), key=operator.itemgetter(0))
            for project in projects:
                if project[1] < 9:
                    return project[0]  # return first project with < 9 samples
        else:  # Sort projects for other samples on number of samples
            projects = sorted(projects.items(), key=operator.itemgetter(1))
            return projects[0][0]  # return project with least amount of samples.

    # Parse families
    families = {}
    for artifact in process.all_inputs():
        for sample in artifact.samples:
            if (
                'Dx Familienummer' in list(sample.udf) and
                'Dx NICU Spoed' in list(sample.udf) and
                'Dx Protocolomschrijving' in list(sample.udf)
            ):
                # Dx production sample
                family = sample.udf['Dx Familienummer']

                # Create family if not exist
                if family not in families:
                    families[family] = {
                        'samples': [],
                        'NICU': False,
                        'project_type': 'unknown_project',
                        'split_project_type': False,
                        'urgent': False,
                        'deviating': False  # merge, deep sequencing (5x), etc samples
                    }

                # Update family information
                if sample.udf['Dx Onderzoeksreden'] == 'Research':  # Dx research sample
                    for onderzoeksindicatie in config.research_onderzoeksindicatie_project:
                        if sample.udf['Dx Onderzoeksindicatie'] == onderzoeksindicatie:
                            project_type = config.research_onderzoeksindicatie_project[onderzoeksindicatie]
                            families[family]['project_type'] = project_type
                            families[family]['split_project_type'] = False
                            break

                else:  # Dx clinic sample
                    newest_protocol = sample.udf['Dx Protocolomschrijving'].split(';')[0]
                    if 'SNP fingerprint MIP' in newest_protocol and not families[family]['NICU']:
                        project_type = 'Fingerprint'
                        families[family]['project_type'] = project_type
                        families[family]['split_project_type'] = False
                        trim_last_base = False
                    elif 'PID09.V7_smMIP' in newest_protocol and not families[family]['NICU']:
                        project_type = 'ERARE'
                        families[family]['project_type'] = project_type
                        families[family]['split_project_type'] = False
                        trim_last_base = False
                    elif sample.udf['Dx NICU Spoed']:
                        families[family]['NICU'] = True
                        project_type = 'NICU_{0}'.format(sample.udf['Dx Familienummer'])
                        families[family]['project_type'] = project_type
                        families[family]['split_project_type'] = False
                    elif 'elidS30409818' in newest_protocol and not families[family]['NICU']:
                        project_type = 'CREv2'
                        families[family]['project_type'] = project_type
                        families[family]['split_project_type'] = True
                    elif 'elidS31285117' in newest_protocol and not families[family]['NICU']:
                        project_type = 'SSv7'
                        families[family]['project_type'] = project_type
                        families[family]['split_project_type'] = True

                    # Set urgent status
                    if 'Dx Spoed' in list(sample.udf) and sample.udf['Dx Spoed']:
                        families[family]['urgent'] = True

                    # Set deviating status, remove urgent status if deviating
                    if (
                        ('Dx Mergen' in list(sample.udf) and sample.udf['Dx Mergen']) or
                        ('Dx Exoomequivalent' in list(sample.udf) and sample.udf['Dx Exoomequivalent'] > 1)
                    ):
                        families[family]['deviating'] = True
                        families[family]['urgent'] = False

            else:  # Other samples
                if 'GIAB' in sample.name.upper() and not sample.project:  # GIAB control samples
                    family = 'GIAB'
                else:
                    family = sample.project.name
                    # Remove 'dx' (ignore case) and strip leading space or _
                    family = re.sub('^dx[ _]*', '', family, flags=re.IGNORECASE)
                if family not in families:
                    families[family] = {
                        'samples': [],
                        'NICU': False,
                        'project_type': family,
                        'split_project_type': False,
                        'urgent': False,
                        'deviating': False
                    }

            # Add sample to family
            families[family]['samples'].append(sample)

    # Get all project types and count samples
    project_types = {}
    for family in families.values():
        if family['project_type'] in project_types:
            project_types[family['project_type']]['sample_count'] += len(family['samples'])
        else:
            project_types[family['project_type']] = {
                'sample_count': len(family['samples']),
                'projects': {},
                'split_project_type': family['split_project_type']
            }

    # Define projects per project_type
    for project_type in project_types:
        project_types[project_type]['index'] = 0
        if project_types[project_type]['split_project_type']:
            for i in range(0, int(project_types[project_type]['sample_count']/9+1)):
                project_types[project_type]['projects']['{0}_{1}'.format(project_type, i+1)] = 0
        else:
            project_types[project_type]['projects'][project_type] = 0

    # Set sample projects
    sample_projects = {}
    sample_sequence_names = {}

    # Urgent families / samples, skip deviating
    for family in [family for family in families.values() if family['urgent'] and not family['deviating']]:
        family_project = get_project(project_types[family['project_type']]['projects'], urgent=True)
        for sample in family['samples']:
            sample_sequence_name = get_sequence_name(sample)
            sample_sequence_names[sample.name] = sample_sequence_name
            sample_projects[sample_sequence_name] = family_project
            project_types[family['project_type']]['projects'][family_project] += 1

    # Deviating families / samples
    for family in [family for family in families.values() if family['deviating']]:
        family_project = get_project(project_types[family['project_type']]['projects'])
        for sample in family['samples']:
            sample_sequence_name = get_sequence_name(sample)
            sample_sequence_names[sample.name] = sample_sequence_name
            sample_projects[sample_sequence_name] = family_project
            project_types[family['project_type']]['projects'][family_project] += 1

    # Non urgent and non deviating families / samples
    normal_families = [family for family in families.values() if not family['urgent'] and not family['deviating']]
    for family in sorted(normal_families, key=lambda fam: (len(fam['samples'])), reverse=True):
        family_project = get_project(project_types[family['project_type']]['projects'])
        for sample in family['samples']:
            sample_sequence_name = get_sequence_name(sample)
            sample_sequence_names[sample.name] = sample_sequence_name
            sample_projects[sample_sequence_name] = family_project
            project_types[family['project_type']]['projects'][family_project] += 1

    # Check sequencer type
    # NextSeq runs need to reverse complement 'index2' for dual barcodes and 'index' for single barcodes.
    if 'nextseq' in process.type.name.lower():
        nextseq_run = True
    else:
        nextseq_run = False

    # Edit clarity samplesheet
    sample_header = ''  # empty until [data] section
    settings_section = False
    samplesheet_artifact = Artifact(lims, id=artifact_id)
    file_id = samplesheet_artifact.files[0].id

    for line in lims.get_file_contents(id=file_id).rstrip().split('\n'):
        if line.startswith('[Settings]') and trim_last_base:
            output_file.write('{line}\n'.format(line=line))
            output_file.write('Read1EndWithCycle,{value}\n'.format(value=process.udf['Read 1 Cycles']-1))
            output_file.write('Read2EndWithCycle,{value}\n'.format(value=process.udf['Read 2 Cycles']-1))
            settings_section = True

        elif line.startswith('[Data]') and trim_last_base and not settings_section:
            output_file.write('[Settings]\n')
            output_file.write('Read1EndWithCycle,{value}\n'.format(value=process.udf['Read 1 Cycles']-1))
            output_file.write('Read2EndWithCycle,{value}\n'.format(value=process.udf['Read 2 Cycles']-1))
            output_file.write('{line}\n'.format(line=line))

        elif line.startswith('Sample_ID'):  # Samples header line
            sample_header = line.rstrip().split(',')
            sample_id_index = sample_header.index('Sample_ID')
            sample_name_index = sample_header.index('Sample_Name')
            sample_project_index = sample_header.index('Sample_Project')

            if 'index2' in sample_header:
                index_index = sample_header.index('index2')
            else:
                index_index = sample_header.index('index')

            output_file.write('{line}\n'.format(line=line))

        elif sample_header:  # Samples header seen, so continue with samples.
            data = line.rstrip().split(',')

            # Fix sample name -> use sequence name
            if data[sample_name_index] in sample_sequence_names:
                data[sample_name_index] = sample_sequence_names[data[sample_name_index]]

            # Set Sample_Project
            if data[sample_name_index] in sample_projects:
                data[sample_project_index] = sample_projects[data[sample_name_index]]

            # Overwrite Sample_ID with Sample_name to get correct conversion output folder structure
            data[sample_id_index] = data[sample_name_index]

            # Reverse complement index for NextSeq runs
            if nextseq_run:
                data[index_index] = clarity_epp.export.utils.reverse_complement(data[index_index])

            output_file.write('{line}\n'.format(line=','.join(data)))
        else:  # Leave other lines untouched.
            output_file.write('{line}\n'.format(line=line))
コード例 #30
0
def samplesheet_multiplex_library_pool(lims, process_id, output_file):
    """Create manual pipetting samplesheet for multiplexing(pooling) samples."""
    process = Process(lims, id=process_id)
    inputs = list(set(process.all_inputs()))
    outputs = list(set(process.all_outputs()))

    sample_concentration = {}
    sample_size = {}
    trio_statuses = {}
    ul_sample = {}
    ng_sample = {}
    udf_output = []
    udf_ul_sample = {}
    udf_name_ul_sample = {}
    plate_id = {}
    well_id = {}
    pools_not_3 = []
    order = [
        'A1', 'B1', 'C1', 'D1', 'E1', 'F1', 'G1', 'H1', 'A2', 'B2', 'C2', 'D2',
        'E2', 'F2', 'G2', 'H2', 'A3', 'B3', 'C3', 'D3', 'E3', 'F3', 'G3', 'H3',
        'A4', 'B4', 'C4', 'D4', 'E4', 'F4', 'G4', 'H4', 'A5', 'B5', 'C5', 'D5',
        'E5', 'F5', 'G5', 'H5', 'A6', 'B6', 'C6', 'D6', 'E6', 'F6', 'G6', 'H6',
        'A7', 'B7', 'C7', 'D7', 'E7', 'F7', 'G7', 'H7', 'A8', 'B8', 'C8', 'D8',
        'E8', 'F8', 'G8', 'H8', 'A9', 'B9', 'C9', 'D9', 'E9', 'F9', 'G9', 'H9',
        'A10', 'B10', 'C10', 'D10', 'E10', 'F10', 'G10', 'H10', 'A11', 'B11',
        'C11', 'D11', 'E11', 'F11', 'G11', 'H11', 'A12', 'B12', 'C12', 'D12',
        'E12', 'F12', 'G12', 'H12'
    ]
    order = dict(zip(order, range(len(order))))
    well_order = {}
    sample_well_pool = []

    # get input udfs 'Dx sample volume ul' and 'Dx Samplenaam' per output analyte
    for output in outputs:
        if output.type == 'Analyte':
            if 'Dx sample volume (ul)' in output.udf and 'Dx Samplenaam' in output.udf:
                udf_ul_sample[
                    output.name] = output.udf['Dx sample volume (ul)']
                # if samplename is complete sequencename take only monsternummer
                if re.search(r'U\d{6}\D{2}', output.udf['Dx Samplenaam']):
                    udf_name_ul_sample[
                        output.name] = output.udf['Dx Samplenaam'][9:]
                else:
                    udf_name_ul_sample[
                        output.name] = output.udf['Dx Samplenaam']
                udf_output.append(output.name)

    # get concentration, size, containername and well per input artifact
    for input in inputs:
        sample = input.samples[0]
        samplename = sample.name

        if 'Dx Concentratie fluorescentie (ng/ul)' in input.udf:
            measurement = input.udf['Dx Concentratie fluorescentie (ng/ul)']
            qcflag = input.qc_flag
            if qcflag == 'UNKNOWN' or 'PASSED':
                sample_concentration[samplename] = measurement

        if 'Dx Fragmentlengte (bp)' in input.udf:
            measurement = input.udf['Dx Fragmentlengte (bp)']
            qcflag = input.qc_flag
            if qcflag == 'UNKNOWN' or 'PASSED':
                sample_size[samplename] = measurement

        plate_id[samplename] = input.container.name
        placement = input.location[1]
        placement = ''.join(placement.split(':'))
        well_id[samplename] = placement
        well_order[sample.name] = order[placement]

    # get familystatus per sample in output analyte and determine trio composition if number of samples in pool = 3
    for output in outputs:
        if output.type == 'Analyte':
            sample_given_ul = ''
            if len(output.samples) == 3:
                samplestatus = []

                for sample in output.samples:
                    # First check GIAB controls
                    if 'CFGIAB' in sample.name.upper():
                        sample.udf['Dx Familie status'] = 'Kind'
                    elif 'PFGIAB' in sample.name.upper(
                    ) or 'PMGIAB' in sample.name.upper():
                        sample.udf['Dx Familie status'] = 'Ouder'

                    if 'Dx Onderzoeksreden' in sample.udf and sample.udf[
                            'Dx Onderzoeksreden'] == 'Research':
                        samplestatus.append('Kind')
                    else:
                        samplestatus.append(sample.udf['Dx Familie status'])

                if samplestatus == ['Kind'] * 3 or samplestatus == ['Ouder'
                                                                    ] * 3:
                    trio_statuses[output.name] = 'CCC'
                elif sorted(samplestatus) == ['Kind', 'Ouder', 'Ouder']:
                    trio_statuses[output.name] = 'CPP'
                elif sorted(samplestatus) == ['Kind', 'Kind', 'Ouder']:
                    trio_statuses[output.name] = 'CCP'

                # if udfs 'Dx sample volume ul' and 'Dx Samplenaam' are not empty change trio status and do pre-calculation
                if output.name in udf_output:
                    trio_statuses[output.name] = 'adapted'

                    for sample in output.samples:
                        if sample.name == udf_name_ul_sample[output.name]:
                            sample_given_ul = sample
                            ng_sample[
                                sample.
                                name] = library_dilution_calculator_fixed_volume(
                                    sample_concentration[sample.name],
                                    sample_size[sample.name],
                                    udf_ul_sample[output.name])

                    for sample in output.samples:
                        if sample.name != udf_name_ul_sample[output.name]:
                            ng_sample[
                                sample.
                                name] = library_dilution_calculator_fixed_ng(
                                    sample_concentration[sample.name],
                                    sample_size[sample.name],
                                    sample.udf['Dx Familie status'],
                                    ng_sample[udf_name_ul_sample[output.name]],
                                    sample_given_ul.udf['Dx Familie status'])

                    output.udf['Dx input pool (ng)'] = round(
                        ng_sample[output.samples[0].name] +
                        ng_sample[output.samples[1].name] +
                        ng_sample[output.samples[2].name], 2)
                    output.put()

                else:
                    output.udf['Dx input pool (ng)'] = 750
                    output.put()

            # if number of samples in pool is not 3 set trio status and prepare error warning output file
            else:
                trio_statuses[output.name] = 'not_3'
                pools_not_3.append(output.name)

            # calculation if udfs 'Dx sample volume ul' and 'Dx Samplenaam' are empty and not empty
            if not sample_given_ul:
                for sample in output.samples:
                    if 'Dx Onderzoeksreden' in sample.udf and sample.udf[
                            'Dx Onderzoeksreden'] == 'Research':
                        sample_pedigree = 'Kind'
                    else:
                        sample_pedigree = sample.udf['Dx Familie status']
                    ul_sample[sample.name] = library_dilution_calculator(
                        concentration=sample_concentration[sample.name],
                        size=sample_size[sample.name],
                        trio=trio_statuses[output.name],
                        pedigree=sample_pedigree,
                        ng=0)
            else:
                for sample in output.samples:
                    if sample.udf['Dx Onderzoeksreden'] == 'Research':
                        sample_pedigree = 'Kind'
                    else:
                        sample_pedigree = sample.udf['Dx Familie status']
                    ul_sample[sample.name] = library_dilution_calculator(
                        concentration=sample_concentration[sample.name],
                        size=sample_size[sample.name],
                        trio=trio_statuses[output.name],
                        pedigree=sample_pedigree,
                        ng=ng_sample[sample.name])

            # sorting pools then wells for output file
            sort_pool_name = output.name
            if re.search(r'#\d_', sort_pool_name):
                sort_pool_name = re.sub('#', '#0', sort_pool_name)
            for sample in output.samples:
                sample_well_pool.append([
                    sample, well_order[sample.name], sort_pool_name,
                    output.name
                ])

    sorted_samples = sorted(sample_well_pool,
                            key=lambda sample: (sample[2], sample[1]))

    # write output file per output analyte sorted on pool number
    output_file.write('Sample\tul Sample\tPlaat_id\twell_id\tpool\n')
    if pools_not_3:
        output_file.write(
            'De volgende pool(s) hebben een ander aantal samples dan 3: {pools}\n'
            .format(pools=pools_not_3))

    for sorted_sample in sorted_samples:
        sample = sorted_sample[0]

        output_file.write(
            '{sample}\t{ul_sample:.2f}\t{plate_id}\t{well_id}\t{pool}\n'.
            format(sample=sample.name,
                   ul_sample=ul_sample[sample.name],
                   plate_id=plate_id[sample.name],
                   well_id=well_id[sample.name],
                   pool=sorted_sample[3]))
コード例 #31
0
def samplesheet_mip_multiplex_pool(lims, process_id, output_file):
    """Create manual pipetting samplesheet for smMIP multiplexing"""
    process = Process(lims, id=process_id)
    input_artifacts = []

    # Find all Dx Tapestation 2200/4200 QC process types
    qc_process_types = clarity_epp.export.utils.get_process_types(
        lims, ['Dx Tapestation 2200 QC', 'Dx Tapestation 4200 QC'])

    # Write header
    output_file.write(
        '{sample}\t{volume}\t{plate_id}\t{well_id}\t{concentration}\t{manual}\n'
        .format(
            sample='Sample',
            volume='Volume',
            plate_id='Plaat_id',
            well_id='Well_id',
            concentration='Concentratie',
            manual='Handmatig',
        ))

    for input_artifact in process.all_inputs(resolve=True):
        # Find last qc process for artifact
        qc_process = sorted(
            lims.get_processes(type=qc_process_types,
                               inputartifactlimsid=input_artifact.id),
            key=lambda process: int(process.id.split('-')[-1]))[-1]

        # Find concentration measurement
        for qc_artifact in qc_process.outputs_per_input(input_artifact.id):
            if qc_artifact.name == input_artifact.name:
                concentration = float(
                    qc_artifact.udf['Dx Concentratie fluorescentie (ng/ul)'])

        input_artifacts.append({
            'name':
            input_artifact.name,
            'concentration':
            concentration,
            'plate_id':
            input_artifact.location[0].id,
            'well_id':
            ''.join(input_artifact.location[1].split(':')),
            'manual':
            input_artifact.samples[0].udf['Dx Handmatig']
        })

    # Calculate avg concentration for all non manual samples
    concentrations = [
        input_artifact['concentration'] for input_artifact in input_artifacts
        if not input_artifact['manual']
    ]
    avg_concentration = sum(concentrations) / len(concentrations)

    # Set volume and store input_artifact per plate to be able print samplesheet sorted on plate and well
    input_containers = {}
    for input_artifact in input_artifacts:
        if input_artifact['concentration'] < avg_concentration * 0.5:
            input_artifact['volume'] = 20
        elif input_artifact['concentration'] > avg_concentration * 1.5:
            input_artifact['volume'] = 2
        else:
            input_artifact['volume'] = 5

        if input_artifact['plate_id'] not in input_containers:
            input_containers[input_artifact['plate_id']] = {}

        input_containers[input_artifact['plate_id']][
            input_artifact['well_id']] = input_artifact

    for input_container in sorted(input_containers.keys()):
        input_artifacts = input_containers[input_container]
        for well in clarity_epp.export.utils.sort_96_well_plate(
                input_artifacts.keys()):
            input_artifact = input_artifacts[well]
            output_file.write(
                '{sample}\t{volume}\t{plate_id}\t{well_id}\t{concentration}\t{manual}\n'
                .format(
                    sample=input_artifact['name'],
                    volume=input_artifact['volume'],
                    plate_id=input_artifact['plate_id'],
                    well_id=input_artifact['well_id'],
                    concentration=input_artifact['concentration'],
                    manual=input_artifact['manual'],
                ))
コード例 #32
0
 def process_samples(self, lims_process: Process):
     """Retrieve LIMS input samples from a process."""
     for lims_artifact in lims_process.all_inputs():
         for lims_sample in lims_artifact.samples:
             yield lims_sample.id