Beispiel #1
0
    def _upload(self, local_file, commit, artifacts_to_stdout):
        self.logger.debug("Shared files: {}".format(
            [artifact.name for artifact in
             self.extension.context.shared_files]))
        self.logger.debug("Shared file from extension: {}".format(
            self.extension.shared_file()))
        artifacts = [shared_file for shared_file in self.extension.context.shared_files
                    if shared_file.name == self.extension.shared_file()]
        assert len(artifacts) == 1
        artifact = artifacts[0]

        self.logger.info("Uploading local file {} to the LIMS placeholder at {}".format(local_file, artifact.id))
        if commit:
            # Find the output on the current step
            self.logger.info("Uploading to the LIMS server")
            attach_file(local_file, artifact)
        else:
            # When not connected to an actual server, we copy the file to another directory for integration tests
            upload_path = os.path.join(self.result_path, "uploaded")
            self.logger.info("Commit is set to false, copying the file to {}".format(upload_path))
            if os.path.exists(upload_path):
                os.rmdir(upload_path)
            os.mkdir(upload_path)
            # The LIMS does always add a prefix with the artifact ID:
            new_file_name = "{}_{}".format(artifact.id, os.path.basename(local_file))
            new_file_path = os.path.join(upload_path, new_file_name)
            shutil.copyfile(local_file, new_file_path)

        if artifacts_to_stdout:
            print "--- {} => {} ({})".format(local_file, artifact.name, artifact.id)
            with open(local_file, 'r') as f:
                print f.read()
            print "---"
Beispiel #2
0
def prepooling(currentStep, lims):
    log=[]
    #First thing to do is to grab the volumes of the input artifacts. The method is ... rather unique.
    data=compute_transfer_volume(currentStep, lims, log)
    with open("bravo.csv", "w") as csvContext:
            for s in data:
                if s['vol_to_take']>MAX_WARNING_VOLUME:
                    log.append("Volume for sample {} is above {}, redo the calculations manually".format(MAX_WARNING_VOLUME, s['name']))
                if s['vol_to_take']<MIN_WARNING_VOLUME:
                    log.append("Volume for sample {} is below {}, redo the calculations manually".format(MIN_WARNING_VOLUME, s['name']))
                csvContext.write("{0},{1},{2},{3},{4}\n".format(s['src_fc_id'], s['src_well'], s['vol_to_take'], s['dst_fc'], s['dst_well']))
    if log:
        with open("bravo.log", "w") as logContext:
            logContext.write("\n".join(log))
    for out in currentStep.all_outputs():
        #attach the csv file and the log file
        if out.name=="Bravo CSV File":
            attach_file(os.path.join(os.getcwd(), "bravo.csv"), out)
        if log and out.name=="Bravo Log":
            attach_file(os.path.join(os.getcwd(), "bravo.log"), out)
    if log:
        #to get an eror display in the lims, you need a non-zero exit code AND a message in STDERR
        sys.stderr.write("Errors were met, please check the Log file\n")
        sys.exit(2)
    else:
        logging.info("Work done")
Beispiel #3
0
def setup_workset(currentStep):
    checkTheLog=[False]
    with open("bravo.csv", "w") as csvContext:
        with open("bravo.log", "w") as logContext:
            #working directly with the map allows easier input/output handling
            for art_tuple in currentStep.input_output_maps:
                #filter out result files
                if art_tuple[0]['uri'].type=='Analyte' and art_tuple[1]['uri'].type=='Analyte': 
                    source_fc=art_tuple[0]['uri'].location[0].name
                    source_well=art_tuple[0]['uri'].location[1]
                    dest_fc=art_tuple[1]['uri'].location[0].name
                    dest_well=art_tuple[1]['uri'].location[1]
                    try:
                        #might not be filled in
                        final_volume=art_tuple[1]['uri'].udf["Total Volume (uL)"]
                    except KeyError as e:
                        logContext.write("No Total Volume found for sample {0}\n".format(art_tuple[0]['uri'].samples[0].name))
                        checkTheLog[0]=True
                    else:
                        volume=calc_vol(art_tuple, logContext, checkTheLog)
                        csvContext.write("{0},{1},{2},{3},{4},{5}\n".format(source_fc, source_well, volume, dest_fc, dest_well, final_volume)) 
    for out in currentStep.all_outputs():
        #attach the csv file and the log file
        if out.name=="Bravo CSV File":
            attach_file(os.path.join(os.getcwd(), "bravo.csv"), out)
        if out.name=="Bravo Log":
            attach_file(os.path.join(os.getcwd(), "bravo.log"), out)
    if checkTheLog[0]:
        #to get an eror display in the lims, you need a non-zero exit code AND a message in STDERR
        sys.stderr.write("Errors were met, please check the Log file\n")
        sys.exit(2)
    else:
        logging.info("Work done")
Beispiel #4
0
def setup_workset(currentStep):
    checkTheLog=[False]
    with open("bravo.csv", "w") as csvContext:
        with open("bravo.log", "w") as logContext:
            #working directly with the map allows easier input/output handling
            for art_tuple in currentStep.input_output_maps:
                #filter out result files
                if art_tuple[0]['uri'].type=='Analyte' and art_tuple[1]['uri'].type=='Analyte':
                    source_fc=art_tuple[0]['uri'].location[0].name
                    source_well=art_tuple[0]['uri'].location[1]
                    dest_fc=art_tuple[1]['uri'].location[0].id
                    dest_well=art_tuple[1]['uri'].location[1]
                    try:
                        #might not be filled in
                        final_volume=art_tuple[1]['uri'].udf["Total Volume (uL)"]
                    except KeyError as e:
                        logContext.write("No Total Volume found for sample {0}\n".format(art_tuple[0]['uri'].samples[0].name))
                        checkTheLog[0]=True
                    else:
                        volume=calc_vol(art_tuple, logContext, checkTheLog)
                        csvContext.write("{0},{1},{2},{3},{4},{5}\n".format(source_fc, source_well, volume, dest_fc, dest_well, final_volume))
    for out in currentStep.all_outputs():
        #attach the csv file and the log file
        if out.name=="Bravo CSV File":
            attach_file(os.path.join(os.getcwd(), "bravo.csv"), out)
        if out.name=="Bravo Log":
            attach_file(os.path.join(os.getcwd(), "bravo.log"), out)
    if checkTheLog[0]:
        #to get an eror display in the lims, you need a non-zero exit code AND a message in STDERR
        sys.stderr.write("Errors were met, please check the Log file\n")
        sys.exit(2)
    else:
        logging.info("Work done")
Beispiel #5
0
def main(lims, pid, file):
    """Uploads a given file to the first output artifact of the process

    lims: The LIMS instance
    pid: Process Lims id
    file: File to be attached
    """
    p = Process(lims, id=pid)

    # Fetch all input-output artifact pairs
    io = p.input_output_maps

    # Filter them so that only PerInput output artifacts remains
    io_filtered = [
        x for x in io if x[1]['output-generation-type'] == 'PerInput'
    ]

    # Fetch the first input-output artifact pair
    (input, output) = io_filtered[0]

    # Instantiate the output artifact
    output_artifact = Artifact(output['limsid'])

    # Attach the file
    attach_file(args.file, output_artifact)
Beispiel #6
0
def main(lims, args, logger):
    """This should be run at project summary level"""
    p = Process(lims,id = args.pid)
    samplenb=0
    errnb=0
    summary={}
    logart=None
    for output_artifact in p.all_outputs():
        #filter to only keep solo sample demultiplexing output artifacts
        if output_artifact.type=='Analyte' and len(output_artifact.samples)==1:
            sample=output_artifact.samples[0]
            samplenb+=1
            #update the total number of reads
            total_reads=sumreads(sample, summary)
            sample.udf['Total Reads (M)']=total_reads
            output_artifact.udf['Set Total Reads']=total_reads
            logging.info("Total reads is {0} for sample {1}".format(sample.udf['Total Reads (M)'],sample.name))
            try:
                if sample.udf['Reads Min'] >= sample.udf['Total Reads (M)']:
                    sample.udf['Status (auto)']="In Progress"
                    sample.udf['Passed Sequencing QC']="False"
                elif sample.udf['Reads Min'] < sample.udf['Total Reads (M)'] : 
                    sample.udf['Passed Sequencing QC']="True"
                    sample.udf['Status (auto)']="Finished"
            except KeyError as e:
                print e
                logging.warning("No reads minimum found, cannot set the status auto flag for sample {0}".format(sample.name))
                errnb+=1

            #commit the changes
            sample.put()
            output_artifact.put()
        elif(output_artifact.type=='Analyte') and len(output_artifact.samples)!=1:
            logging.error("Found {0} samples for the ouput analyte {1}, that should not happen".format(len(output_artifact.samples()),output_artifact.id))
        elif(output_artifact.type=="ResultFile" and output_artifact.name=="AggregationLog"):
            logart=output_artifact


    #write the csv file, separated by pipes, no cell delimiter
    with open("AggregationLog.csv", "w") as f:
        f.write("sep=,\n")
        f.write('sample name,number of flowcells,number of lanes,flowcell1:lane1|lane2;flowcell2:lane1|lane2|lane3 ...\n')
        for sample in summary:
            view=[]
            totfc=len(summary[sample])
            totlanes=0
            for fc in summary[sample]:
                view.append("{0}:{1}".format(fc, "|".join(summary[sample][fc])))
                totlanes+=len(summary[sample][fc])
            f.write('{0},{1},{2},{3}\n'.format(sample, totfc, totlanes, ";".join(view)))
    try:
        attach_file(os.path.join(os.getcwd(), "AggregationLog.csv"), logart)
        logging.info("updated {0} samples with {1} errors".format(samplenb, errnb))
    except AttributeError:
        #happens if the log artifact does not exist, if the step has been started before the configuration changes
        logging.info("Could not upload the log file")
Beispiel #7
0
def normalization(current_step):
    log = []
    with open("normalization.csv", "w") as csv:
        for art in current_step.input_output_maps:
            src = art[0]["uri"]
            dest = art[1]["uri"]
            if src.type == dest.type == "Analyte":
                # Source sample:
                src_plate = src.location[0].id
                src_well = src.location[1]
                src_tot_volume = float(src.udf["Volume (ul)"])
                try:
                    src_volume = float(dest.udf["Volume to take (uL)"])
                except:
                    sys.stderr.write("Field 'Volume to take (uL)' is empty for artifact {0}\n".format(dest.name))
                    sys.exit(2)

                src_conc = src.udf["Concentration"]
                # Diluted sample:
                dest_plate = dest.location[0].id
                dest_well = dest.location[1]
                try:
                    dest_conc = dest.udf["Normalized conc. (nM)"]
                except:
                    sys.stderr.write("Field 'Normalized conc. (nM)' is empty for artifact {0}\n".format(dest.name))
                    sys.exit(2)
                if src.udf["Conc. Units"] != "nM":
                    log.append("ERROR: No valid concentration found for sample {0}".format(src.samples[0].name))
                elif src_conc < dest_conc:
                    log.append("ERROR: Too low concentration for sample {0}".format(src.samples[0].name))
                else:
                    # Warn if volume to take > volume available or max volume is
                    # exceeded but still do the calculation:
                    if src_volume > src_tot_volume:
                        log.append("WARNING: Not enough available volume of sample {0}".format(src.samples[0].name))
                    final_volume = src_conc * src_volume / dest_conc
                    if final_volume > MAX_WARNING_VOLUME:
                        log.append("WARNING: Maximum volume exceeded for sample {0}".format(src.samples[0].name))
                    csv.write("{0},{1},{2},{3},{4},{5}\n".format(src_plate, src_well, src_volume, dest_plate, dest_well, final_volume))
    if log:
        with open("normalization.log", "w") as log_context:
            log_context.write("\n".join(log))
    for out in current_step.all_outputs():
        #attach the csv file and the log file
        if out.name == "Normalization buffer volumes CSV":
            attach_file(os.path.join(os.getcwd(), "normalization.csv"), out)
        elif out.name == "Normalization Log" and log:
            attach_file(os.path.join(os.getcwd(), "normalization.log"), out)
    if log:
        #to get an eror display in the lims, you need a non-zero exit code AND a message in STDERR
        sys.stderr.write("Errors were met, please check the log file\n")
        sys.exit(2)
    else:
        logging.info("Work done")
Beispiel #8
0
def generate_csv(lims, step_id, logfile):
    logger = setupLog(logfile)
    pro = Process(lims, id=step_id)
    data = generate_data(pro)
    with open("neoprep_input.csv", 'wb') as f:
        f.write(data)
    for out in pro.all_outputs():
        #attach the csv file
        if out.name == "Input CSV File":
            attach_file(os.path.join(os.getcwd(), "neoprep_input.csv"), out)
        if out.name == "Log File":
            attach_file(os.path.join(os.getcwd(), logfile), out)
def generate_csv(lims,step_id, logfile):
    logger=setupLog(logfile)
    pro=Process(lims, id=step_id)
    data=generate_data(pro)
    with open("neoprep_input.csv", 'wb') as f:
        f.write(data)
    for out in pro.all_outputs():
        #attach the csv file 
        if out.name=="Input CSV File":
            attach_file(os.path.join(os.getcwd(), "neoprep_input.csv"), out)
        if out.name=="Log File":
            attach_file(os.path.join(os.getcwd(), logfile), out)
def main(lims, args):

    p=Process(lims, id=args.pid)
    log=[]
    datamap={}
    wsname=None
    username="******".format(p.technician.first_name, p.technician.last_name)
    user_email=p.technician.email
    for art in p.all_inputs():
        if len(art.samples)!=1:
            log.append("Warning : artifact {0} has more than one sample".format(art.id))
        for sample in art.samples:
           #take care of lamda DNA
           if sample.project:
                if sample.project.id not in datamap:
                    datamap[sample.project.id]=[sample.name]
                else:
                    datamap[sample.project.id].append(sample.name)

    for art in p.all_outputs():
        try:
            wsname=art.location[0].name
            break
        except:
            pass

    now=datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
    for pid in datamap:
        pj=Project(lims, id=pid)
        running_notes=json.loads(pj.udf['Running Notes'])
        if len(datamap[pid]) > 1:
            rnt="{0} samples planned for {1}".format(len(datamap[pid]), wsname)
        else:
            rnt="{0} sample planned for {1}".format(len(datamap[pid]), wsname)

        running_notes[now]={"note": rnt, "user" : username, "email":user_email, "category":"Workset"}

        pj.udf['Running Notes']=json.dumps(running_notes)
        pj.put()
        log.append("Updated project {0} : {1}, {2} samples in this workset".format(pid,pj.name, len(datamap[pid])))


 
    with open("EPP_Notes.log", "w") as flog:
        flog.write("\n".join(log))
    for out in p.all_outputs():
        #attach the log file
        if out.name=="RNotes Log":
            attach_file(os.path.join(os.getcwd(), "EPP_Notes.log"), out)

    sys.stderr.write("Updated {0} projects successfully".format(len(datamap.keys())))
Beispiel #11
0
def converter(demux_process, epp_logger):
    #Fetches workflow info
    proc_stats = manipulate_workflow(demux_process)
    #Sets up the process values
    manipulate_process(demux_process, proc_stats)
    #Create the demux output file
    parser_struct = write_demuxfile(proc_stats)
    #Alters artifacts
    set_sample_values(demux_process, parser_struct, proc_stats)
    
    #Attaches output files to lims process; crazyness
    for out in demux_process.all_outputs():
        if out.name == "Demultiplex Stats":
            attach_file(os.path.join(os.getcwd(), 'demuxstats' + '_' + proc_stats['Flow Cell ID'] + '_' + timestamp + '.csv'), out)
        elif out.name == "QC Log File":
            attach_file(os.path.join(os.getcwd(), 'runtime_'+ timestamp + '.log'), out)
Beispiel #12
0
def prepooling(currentStep, lims):
    log=[]
    #First thing to do is to grab the volumes of the input artifacts. The method is ... rather unique.
    data=compute_transfer_volume(currentStep, lims, log)
    with open("bravo.csv", "w") as csvContext:
            for s in data:
                csvContext.write("{0},{1},{2},{3},{4}\n".format(s['src_fc'], s['src_well'], s['vol_to_take'], s['dst_fc'], s['dst_well'])) 
    if log:
        with open("bravo.log", "w") as logContext:
            logContext.write("\n".join(log))
    for out in currentStep.all_outputs():
        #attach the csv file and the log file
        if out.name=="Bravo CSV File":
            attach_file(os.path.join(os.getcwd(), "bravo.csv"), out)
        if log and out.name=="Bravo Log":
            attach_file(os.path.join(os.getcwd(), "bravo.log"), out)
    if log:
        #to get an eror display in the lims, you need a non-zero exit code AND a message in STDERR
        sys.stderr.write("Errors were met, please check the Log file\n")
        sys.exit(2)
    else:
        logging.info("Work done")
def main(lims,pid,file):
    """Uploads a given file to the first output artifact of the process

    lims: The LIMS instance
    pid: Process Lims id
    file: File to be attached
    """
    p=Process(lims,id=pid)

    # Fetch all input-output artifact pairs
    io = p.input_output_maps

    # Filter them so that only PerInput output artifacts remains
    io_filtered = filter(lambda (x,y): y['output-generation-type']=='PerInput',io)

    # Fetch the first input-output artifact pair
    (input,output) = io_filtered[0]

    # Instantiate the output artifact
    output_artifact = Artifact(output['limsid'])

    # Attach the file
    attach_file(args.file,output_artifact)
def main(lims, args, epp_logger):
    p = Process(lims, id=args.pid)

    if not args.path:
        args.path = os.getcwd()

    file_list = os.listdir(args.path)

    # Find all per input result files
    io = p.input_output_maps
    io_filtered = filter(
        lambda (x, y): y['output-generation-type'] == 'PerInput', io)
    io_filtered = filter(lambda (x, y): y['output-type'] == 'ResultFile',
                         io_filtered)

    artifact_missing_file = []
    artifact_multiple_file = []
    found_files = []

    for input, output in io_filtered:
        i_a = Artifact(lims, id=input['limsid'])
        o_a = Artifact(lims, id=output['limsid'])

        # Input Well, Input Container
        i_w, i_c = i_a.location[1], i_a.location[0]

        # Well is typed without colon in filename:
        i_w = ''.join(i_w.split(':'))

        info = {
            'well': i_w,
            'container_id': i_c.id,
            'input_artifact_id': i_a.id
        }

        # Use a reguluar expression to find the file name given
        # the container and sample. This is all assuming the driver template name ends with:
        # ${INPUT.CONTAINER.PLACEMENT}_${INPUT.NAME}_${INPUT.CONTAINER.LIMSID}_${INPUT.LIMSID}
        # However, names are excluded to improve robustness.
        re_str = '.*{well}_.*_.*{container_id}_.*{input_artifact_id}'\
                                   .format(**info)

        im_file_r = re.compile(re_str)
        fns = filter(im_file_r.match, file_list)
        logging.info(
            ("Looking for file for artifact id: {input_artifact_id} "
             "from container with id: {container_id}.").format(**info))

        if len(fns) == 0:
            logging.warning(
                "No image file found for artifact with id {0}".format(i_a.id))
            artifact_missing_file.append(i_a)
        elif len(fns) > 1:
            logging.warning(
                ("Multiple image files found for artifact with id {0}, "
                 "please attach files manually").format(i_a.id))
            artifact_multiple_file.append(i_a)
        else:
            fn = fns[0]
            found_files.append(fn)
            logging.info(
                "Found image file {0} for artifact with id {1}".format(
                    fn, i_a.id))
            fp = os.path.join(args.path, fn)

            # Attach file to the LIMS
            location = attach_file(fp, o_a)
            logging.debug("Moving {0} to {1}".format(fp, location))

    warning = ""
    if len(artifact_missing_file):
        warning = "Did not find any file for {0} artifact(s). ".format(
            len(artifact_missing_file))

    if len(artifact_multiple_file):
        warning += "Found multiple files for {0} artifact(s), none of these were uploaded.".format(
            len(artifact_multiple_file))

    if warning:
        warning = "Warning: " + warning

    abstract = "Uploaded {0} file(s). {1}".format(len(found_files), warning)
    print >> sys.stderr, abstract  # stderr will be logged and printed in GUI
Beispiel #15
0
def read_log(lims, pid, logfile):
    logger=setupLog(logfile)
    pro=Process(lims, id=pid)
    f=None
    for out in pro.all_outputs():
        if out.type == "ResultFile" and out.name == "NeoPrep Output Log File":
            try:
                fid=out.files[0].id
            except IndexError:
                logger.error("Can't find the machine log file")
                print("Cannot find the NeoPrep Output Log File", file=sys.stderr)
                exit(2)

            file_contents=lims.get_file_contents(id=fid)
            logger.info("Found the machine log file")

    if file_contents:
        data={}
        read=False
        #default values
        sample_idx=2
        conc_idx=6
        norm_idx=7
        stat_idx=8
        logger.info("Reading the file")
        for line in file_contents.split('\n') :
            #This does something that is close from csv.dictreader, but the file is FUBAR
            if not line.rstrip():
                read=False
            if read:
                if "Start Well" in line:
                    #Header row
                    #identify which column goes with which index
                    elements=line.split('\t')
                    for idx, el in enumerate(elements):
                        if el == "Name":
                            sample_idx=idx
                        elif el == "Quant":
                            conc_idx=idx
                        elif el == "Norm":
                            norm_idx=idx
                        elif el == "Status":
                            stat_idx=idx
                else:
                    elements=line.split('\t')
                    #data rows
                    data[elements[sample_idx]]={}
                    data[elements[sample_idx]]['conc']=elements[conc_idx]
                    data[elements[sample_idx]]['norm']=elements[norm_idx]
                    data[elements[sample_idx]]['stat']=elements[stat_idx]

            if "[Sample Information]" in line:
                read=True
        logger.info("obtained data for samples {0}".format(data.keys()))

    for inp in pro.all_inputs():
        #save the data from the logfile to the lims artifacts
        if inp.name in data:
            inp.udf['Molar Conc. (nM)']=float(data[inp.name]['conc'])
            inp.udf['Normalized conc. (nM)']=float(data[inp.name]['norm'])
            inp.udf['NeoPrep Machine QC']=data[inp.name]['stat']
            inp.put()
            logger.info("updated sample {0}".format(inp.name))

    for out in pro.all_outputs():
        #attach the epp log
        if out.name=="EPP Log":
            attach_file(os.path.join(os.getcwd(), logfile), out)
def read_log(lims, pid, logfile):
    logger = setupLog(logfile)
    pro = Process(lims, id=pid)
    f = None
    for out in pro.all_outputs():
        if out.type == "ResultFile" and out.name == "NeoPrep Output Log File":
            fid = out.files[0].id
            file_contents = lims.get_file_contents(id=fid)
            logger.info("Found the machine log file")

    if file_contents:
        data = {}
        read = False
        #default values
        sample_idx = 2
        conc_idx = 6
        norm_idx = 7
        stat_idx = 8
        logger.info("Reading the file")
        for line in file_contents.split('\n'):
            #This does something that is close from csv.dictreader, but the file is FUBAR
            if not line.rstrip():
                read = False
            if read:
                if "Start Well" in line:
                    #Header row
                    #identify which column goes with which index
                    elements = line.split('\t')
                    for idx, el in enumerate(elements):
                        if el == "Name":
                            sample_idx = idx
                        elif el == "Quant":
                            conc_idx = idx
                        elif el == "Norm":
                            norm_idx = idx
                        elif el == "Status":
                            stat_idx = idx
                else:
                    elements = line.split('\t')
                    #data rows
                    data[elements[sample_idx]] = {}
                    data[elements[sample_idx]]['conc'] = elements[conc_idx]
                    data[elements[sample_idx]]['norm'] = elements[norm_idx]
                    data[elements[sample_idx]]['stat'] = elements[stat_idx]

            if "[Sample Information]" in line:
                read = True
        logger.info("obtained data for samples {0}".format(data.keys()))

    for inp in pro.all_inputs():
        #save the data from the logfile to the lims artifacts
        if inp.name in data:
            inp.udf['Molar Conc. (nM)'] = float(data[inp.name]['conc'])
            inp.udf['Normalized conc. (nM)'] = float(data[inp.name]['norm'])
            inp.udf['NeoPrep Machine QC'] = data[inp.name]['stat']
            inp.put()
            logger.info("updated sample {0}".format(inp.name))

    for out in pro.all_outputs():
        #attach the epp log
        if out.name == "EPP Log":
            attach_file(os.path.join(os.getcwd(), logfile), out)
def main(lims, args, epp_logger):
    p = Process(lims, id=args.pid)

    if not args.path:
        args.path = os.getcwd()

    file_list = os.listdir(args.path)
    
    # Find all per input result files
    io = p.input_output_maps
    io_filtered = filter(lambda (x,y): y['output-generation-type']=='PerInput', io)
    io_filtered = filter(lambda (x,y): y['output-type']=='ResultFile', io_filtered)
    
    artifact_missing_file = []
    artifact_multiple_file = []
    found_files = []

    for input, output in io_filtered:
        i_a = Artifact(lims,id=input['limsid'])
        o_a = Artifact(lims,id=output['limsid'])

        # Input Well, Input Container
        i_w, i_c = i_a.location[1], i_a.location[0]

        # Well is typed without colon in filename:
        i_w = ''.join(i_w.split(':'))
        
        info = {'well':i_w,
                'container_id':i_c.id,
                'input_artifact_id':i_a.id}

        # Use a reguluar expression to find the file name given
        # the container and sample. This is all assuming the driver template name ends with:
        # ${INPUT.CONTAINER.PLACEMENT}_${INPUT.NAME}_${INPUT.CONTAINER.LIMSID}_${INPUT.LIMSID}
        # However, names are excluded to improve robustness.
        re_str = '.*{well}_.*_.*{container_id}_.*{input_artifact_id}'\
                                   .format(**info)

        im_file_r = re.compile(re_str)
        fns = filter(im_file_r.match, file_list)
        logging.info(("Looking for file for artifact id: {input_artifact_id} "
                      "from container with id: {container_id}.").format(**info))

        if len(fns) == 0:
            logging.warning("No image file found for artifact with id {0}".format(i_a.id))
            artifact_missing_file.append(i_a)
        elif len(fns) > 1:
            logging.warning(("Multiple image files found for artifact with id {0}, "
                            "please attach files manually").format(i_a.id))
            artifact_multiple_file.append(i_a)
        else:
            fn = fns[0]
            found_files.append(fn)
            logging.info("Found image file {0} for artifact with id {1}".format(fn, i_a.id))
            fp = os.path.join(args.path, fn)
            
            # Attach file to the LIMS
            location = attach_file(fp, o_a)
            logging.debug("Moving {0} to {1}".format(fp,location))

    warning = ""
    if len(artifact_missing_file):
        warning = "Did not find any file for {0} artifact(s). ".format(len(artifact_missing_file))

    if len(artifact_multiple_file):
        warning += "Found multiple files for {0} artifact(s), none of these were uploaded.".format(len(artifact_multiple_file))
    
    if warning:
       warning = "Warning: " + warning

    abstract = "Uploaded {0} file(s). {1}".format(len(found_files), warning)
    print >> sys.stderr, abstract # stderr will be logged and printed in GUI
Beispiel #18
0
def main(lims, args, logger):
    """This should be run at project summary level"""
    p = Process(lims, id=args.pid)
    samplenb = 0
    errnb = 0
    summary = {}
    logart = None
    for output_artifact in p.all_outputs():
        #filter to only keep solo sample demultiplexing output artifacts
        if output_artifact.type == 'Analyte' and len(
                output_artifact.samples) == 1:
            sample = output_artifact.samples[0]
            samplenb += 1
            #update the total number of reads
            total_reads = sumreads(sample, summary)
            sample.udf['Total Reads (M)'] = total_reads
            output_artifact.udf['Set Total Reads'] = total_reads
            logging.info("Total reads is {0} for sample {1}".format(
                sample.udf['Total Reads (M)'], sample.name))
            try:
                if sample.udf['Reads Min'] >= sample.udf['Total Reads (M)']:
                    sample.udf['Status (auto)'] = "In Progress"
                    sample.udf['Passed Sequencing QC'] = "False"
                elif sample.udf['Reads Min'] < sample.udf['Total Reads (M)']:
                    sample.udf['Passed Sequencing QC'] = "True"
                    sample.udf['Status (auto)'] = "Finished"
            except KeyError as e:
                print e
                logging.warning(
                    "No reads minimum found, cannot set the status auto flag for sample {0}"
                    .format(sample.name))
                errnb += 1

            #commit the changes
            sample.put()
            output_artifact.put()
        elif (output_artifact.type
              == 'Analyte') and len(output_artifact.samples) != 1:
            logging.error(
                "Found {0} samples for the ouput analyte {1}, that should not happen"
                .format(len(output_artifact.samples()), output_artifact.id))
        elif (output_artifact.type == "ResultFile"
              and output_artifact.name == "AggregationLog"):
            logart = output_artifact

    #write the csv file, separated by pipes, no cell delimiter
    with open("AggregationLog.csv", "w") as f:
        f.write("sep=,\n")
        f.write(
            'sample name,number of flowcells,number of lanes,flowcell1:lane1|lane2;flowcell2:lane1|lane2|lane3 ...\n'
        )
        for sample in summary:
            view = []
            totfc = len(summary[sample])
            totlanes = 0
            for fc in summary[sample]:
                view.append("{0}:{1}".format(fc,
                                             "|".join(summary[sample][fc])))
                totlanes += len(summary[sample][fc])
            f.write('{0},{1},{2},{3}\n'.format(sample, totfc, totlanes,
                                               ";".join(view)))
    try:
        attach_file(os.path.join(os.getcwd(), "AggregationLog.csv"), logart)
        logging.info("updated {0} samples with {1} errors".format(
            samplenb, errnb))
    except AttributeError:
        #happens if the log artifact does not exist, if the step has been started before the configuration changes
        logging.info("Could not upload the log file")
Beispiel #19
0
            sample.put()
        elif(output_artifact.type=='Analyte') and len(output_artifact.samples)!=1:
            logging.error("Found {0} samples for the ouput analyte {1}, that should not happen".format(len(output_artifact.samples()),output_artifact.id))
        elif(output_artifact.type=="ResultFile" and output_artifact.name=="AggregationLog"):
            logart=output_artifact


    with open("AggregationLog.csv", "w") as f:
       f.write("sample name | number of flowcells | number of lanes [ list of <flowcells:lane>")
        for sample in summary:
            view=set("{0}:{1}".format(s[0],s[1]) for s in summary[sample])
            totfc=len(set([s[0] for s in summary[sample]]))
            totlanes=len(view)
            f.write("{0} | {1} | {2} | {3}\n".format(sample, totfc, totlanes, ";".join(view)))
    attach_file(os.path.join(os.getcwd(), "AggregationLog.csv"), logart)
    logging.info("updated {0} samples with {1} errors".format(samplenb, errnb))
            
def demnumber(sample):
    """Returns the number of distinct demultiplexing processes for a given sample"""
    expectedName="{0} (FASTQ reads)".format(sample.name)
    dem=set()
    arts=lims.get_artifacts(sample_name=sample.name,process_type=DEMULTIPLEX.values(), name=expectedName)   
    for a in arts:
        if a.udf["Include reads"] == "YES":
            dem.add(a.parent_process.id)
    return len(dem)
    
def sumreads(sample, summary):
    if sample.name not in summary:
        summary[sample.name]=[]