コード例 #1
0
ファイル: epp.py プロジェクト: b97pla/genologics
    def prepend_old_log(self):
        """Prepend the old log to the new log. 

        The location of the old log file is retrieved through the REST api. 
        In order to work, the script should be executed on the LIMS server
        since the location on the disk is parsed out from the sftp string
        and then used for local copy of file. 

        This method does not use logging since that could mess up the
        logging settings, instead warnings are printed to stderr."""
        try:
            log_artifact = Artifact(self.lims, id=self.log_file)
            log_artifact.get()
            if log_artifact.files:
                log_path = log_artifact.files[0].content_location.split(self.lims.baseuri.split(":")[1])[1]
                dir = os.getcwd()
                destination = os.path.join(dir, self.log_file)
                copy(log_path, destination)
                with open(destination, "a") as f:
                    f.write("=" * 80 + "\n")
        except HTTPError:  # Probably no artifact found, skip prepending
            print >>sys.stderr, ("No log file artifact found " "for id: {0}").format(self.log_file)
        except IOError as e:  # Probably some path was wrong in copy
            print >>sys.stderr, ("Log could not be prepended, " "make sure {0} and {1} are " "proper paths.").format(
                log_path, self.log_file
            )
            raise e
コード例 #2
0
ファイル: epp.py プロジェクト: EdinburghGenomics/genologics
    def prepend_old_log(self, external_log_file = None):
        """Prepend the old log to the new log. 

        The location of the old log file is retrieved through the REST api. 
        In order to work, the script should be executed on the LIMS server
        since the location on the disk is parsed out from the sftp string
        and then used for local copy of file. 

        This method does not use logging since that could mess up the
        logging settings, instead warnings are printed to stderr."""
        if external_log_file:
            log_file_name = external_log_file
        else:
            log_file_name = self.log_file

        local_log_path = os.path.join(os.getcwd(), log_file_name)
        if not os.path.isfile(local_log_path):
            try:
                log_artifact = Artifact(self.lims,id = log_file_name)
                log_artifact.get()
                if log_artifact.files:
                    log_path = log_artifact.files[0].content_location.split(
                        self.lims.baseuri.split(':')[1])[1]
                    copy(log_path, local_log_path)
                    with open(local_log_path,'a') as f:
                        f.write('='*80+'\n')
            except HTTPError: # Probably no artifact found, skip prepending
                print(('No log file artifact found '
                                      'for id: {0}').format(log_file_name), file=sys.stderr)
            except IOError as e: # Probably some path was wrong in copy
                print(('Log could not be prepended, '
                                      'make sure {0} and {1} are '
                                      'proper paths.').format(log_path, 
                                                              log_file_name), file=sys.stderr)
                raise e
コード例 #3
0
ファイル: test_entities.py プロジェクト: senthil10/genologics
 def test_input_artifact_list(self):
     a = Artifact(uri=self.lims.get_uri('artifacts', 'a1'), lims=self.lims)
     with patch('requests.Session.get', return_value=Mock(content=self.root_artifact_xml, status_code=200)):
         assert a.input_artifact_list() == []
コード例 #4
0
ファイル: queues.py プロジェクト: FranBonath/genomics-status
    def get(self):
        limsl = lims.Lims(BASEURI, USERNAME, PASSWORD)
        #sequencing queues are currently taken as the following
        #Miseq- Step 7: Denature, Dilute and load sample
        #Novaseq Step 11: Load to flow cell
        queues = {}
        queues['MiSeq'] = Queue(limsl, id='55')
        queues['NovaSeq'] = Queue(limsl, id='1662')

        methods = queues.keys()
        pools = {}

        for method in methods:
            pools[method] = {}
            if queues[method].artifacts:
                tree = ET.fromstring(queues[method].xml())
                for artifact in tree.iter('artifact'):
                    queue_time = artifact.find('queue-time').text
                    container = Container(limsl,
                                          uri=artifact.find('location').find(
                                              'container').attrib['uri']).name
                    attr_name = Artifact(limsl,
                                         uri=artifact.attrib['uri']).name
                    value = artifact.find('location').find('value').text
                    proj_and_samples = {}
                    conc_qpcr = ''
                    art = Artifact(limsl, uri=artifact.attrib['uri'])
                    if method is 'MiSeq':
                        #FinishedLibrary
                        if 'Concentration' in dict(art.udf.items()).keys():
                            conc_qpcr = art.udf['Concentration']
                        #InhouseLibrary
                        elif 'Pool Conc. (nM)' in dict(art.udf.items()).keys():
                            conc_qpcr = str(art.udf['Pool Conc. (nM)'])
                        else:
                            pass
                        is_rerun = art.udf.get('Rerun', False)
                    elif method is 'NovaSeq':
                        if 'Concentration' in dict(art.udf.items()).keys():
                            conc_qpcr = art.udf["Concentration"]
                            is_rerun = art.udf.get('Rerun', False)
                        else:
                            new_art = art.parent_process.input_output_maps[0][
                                0]
                            # The loop iterates 4 times as the values were found within the first 4 preceding
                            # parent processes(through trial and error). If the values are not found within 4 iterations, they can be looked up
                            # manually in LIMS. The loop is structured so as its not very clear in the genologics API which of the parent processes
                            # will contain the values in post process and 4 seemed to get everything for the data at hand.
                            i = 0
                            while i < 4:
                                if 'Concentration' in dict(
                                        new_art['post-process-uri'].udf.items(
                                        )).keys():
                                    conc_qpcr = new_art[
                                        'post-process-uri'].udf[
                                            "Concentration"]
                                    is_rerun = new_art[
                                        'post-process-uri'].udf.get(
                                            'Rerun', False)
                                    break
                                else:
                                    new_art = new_art[
                                        'parent-process'].input_output_maps[0][
                                            0]
                                    i = i + 1

                    for sample in art.samples:
                        project = sample.project.id
                        if project in pools[method]:
                            if container in pools[method][project]['plates']:
                                pools[method][project]['plates'][container][
                                    'samples'].append(sample.name)
                            else:
                                pools[method][project]['plates'][container] = {
                                    'samples': [sample.name],
                                    'well': value,
                                    'queue_time': queue_time,
                                    'conc_pool_qpcr': conc_qpcr,
                                    'is_rerun': is_rerun
                                }
                        else:
                            setup = sample.project.udf['Sequencing setup']
                            lanes = sample.project.udf[
                                'Sequence units ordered (lanes)']
                            librarytype = sample.project.udf[
                                'Library construction method']
                            runmode = sample.project.udf['Sequencing platform']
                            final_loading_conc = 'TBD'
                            if method is 'NovaSeq':
                                try:
                                    final_loading_conc = Artifact(
                                        limsl, uri=artifact.attrib['uri']
                                    ).udf['Final Loading Concentration (pM)']
                                except KeyError:
                                    pass
                            pools[method][project] = {
                                'name': sample.project.name,
                                'setup': setup,
                                'lanes': lanes,
                                'runmode': runmode,
                                'final_loading_conc': final_loading_conc,
                                'librarytype': librarytype,
                                'plates': {
                                    container: {
                                        'samples': [sample.name],
                                        'well': value,
                                        'queue_time': queue_time,
                                        'conc_pool_qpcr': conc_qpcr,
                                        'is_rerun': is_rerun
                                    }
                                }
                            }
        self.set_header("Content-type", "application/json")
        self.write(json.dumps(pools))
コード例 #5
0
def update_samplesheet(lims, process_id, artifact_id, output_file):
    """Update illumina samplesheet."""
    process = Process(lims, id=process_id)
    trim_last_base = True  # Used to set Read1EndWithCycle

    def get_project(projects, urgent=False):
        """Inner function to get a project name for samples."""
        if urgent:  # Sort projects for urgent samples on name
            projects = sorted(projects.items(), key=operator.itemgetter(0))
            for project in projects:
                if project[1] < 9:
                    return project[0]  # return first project with < 9 samples
        else:  # Sort projects for other samples on number of samples
            projects = sorted(projects.items(), key=operator.itemgetter(1))
            return projects[0][0]  # return project with least amount of samples.

    # Parse families
    families = {}
    for artifact in process.all_inputs():
        for sample in artifact.samples:
            if (
                'Dx Familienummer' in list(sample.udf) and
                'Dx NICU Spoed' in list(sample.udf) and
                'Dx Protocolomschrijving' in list(sample.udf)
            ):
                # Dx production sample
                family = sample.udf['Dx Familienummer']

                # Create family if not exist
                if family not in families:
                    families[family] = {
                        'samples': [],
                        'NICU': False,
                        'project_type': 'unknown_project',
                        'split_project_type': False,
                        'urgent': False,
                        'deviating': False  # merge, deep sequencing (5x), etc samples
                    }

                # Update family information
                if sample.udf['Dx Onderzoeksreden'] == 'Research':  # Dx research sample
                    for onderzoeksindicatie in config.research_onderzoeksindicatie_project:
                        if sample.udf['Dx Onderzoeksindicatie'] == onderzoeksindicatie:
                            project_type = config.research_onderzoeksindicatie_project[onderzoeksindicatie]
                            families[family]['project_type'] = project_type
                            families[family]['split_project_type'] = False
                            break

                else:  # Dx clinic sample
                    newest_protocol = sample.udf['Dx Protocolomschrijving'].split(';')[0]
                    if 'SNP fingerprint MIP' in newest_protocol and not families[family]['NICU']:
                        project_type = 'Fingerprint'
                        families[family]['project_type'] = project_type
                        families[family]['split_project_type'] = False
                        trim_last_base = False
                    elif 'PID09.V7_smMIP' in newest_protocol and not families[family]['NICU']:
                        project_type = 'ERARE'
                        families[family]['project_type'] = project_type
                        families[family]['split_project_type'] = False
                        trim_last_base = False
                    elif sample.udf['Dx NICU Spoed']:
                        families[family]['NICU'] = True
                        project_type = 'NICU_{0}'.format(sample.udf['Dx Familienummer'])
                        families[family]['project_type'] = project_type
                        families[family]['split_project_type'] = False
                    elif 'elidS30409818' in newest_protocol and not families[family]['NICU']:
                        project_type = 'CREv2'
                        families[family]['project_type'] = project_type
                        families[family]['split_project_type'] = True
                    elif 'elidS31285117' in newest_protocol and not families[family]['NICU']:
                        project_type = 'SSv7'
                        families[family]['project_type'] = project_type
                        families[family]['split_project_type'] = True

                    # Set urgent status
                    if 'Dx Spoed' in list(sample.udf) and sample.udf['Dx Spoed']:
                        families[family]['urgent'] = True

                    # Set deviating status, remove urgent status if deviating
                    if (
                        ('Dx Mergen' in list(sample.udf) and sample.udf['Dx Mergen']) or
                        ('Dx Exoomequivalent' in list(sample.udf) and sample.udf['Dx Exoomequivalent'] > 1)
                    ):
                        families[family]['deviating'] = True
                        families[family]['urgent'] = False

            else:  # Other samples
                if 'GIAB' in sample.name.upper() and not sample.project:  # GIAB control samples
                    family = 'GIAB'
                else:
                    family = sample.project.name
                    # Remove 'dx' (ignore case) and strip leading space or _
                    family = re.sub('^dx[ _]*', '', family, flags=re.IGNORECASE)
                if family not in families:
                    families[family] = {
                        'samples': [],
                        'NICU': False,
                        'project_type': family,
                        'split_project_type': False,
                        'urgent': False,
                        'deviating': False
                    }

            # Add sample to family
            families[family]['samples'].append(sample)

    # Get all project types and count samples
    project_types = {}
    for family in families.values():
        if family['project_type'] in project_types:
            project_types[family['project_type']]['sample_count'] += len(family['samples'])
        else:
            project_types[family['project_type']] = {
                'sample_count': len(family['samples']),
                'projects': {},
                'split_project_type': family['split_project_type']
            }

    # Define projects per project_type
    for project_type in project_types:
        project_types[project_type]['index'] = 0
        if project_types[project_type]['split_project_type']:
            for i in range(0, int(project_types[project_type]['sample_count']/9+1)):
                project_types[project_type]['projects']['{0}_{1}'.format(project_type, i+1)] = 0
        else:
            project_types[project_type]['projects'][project_type] = 0

    # Set sample projects
    sample_projects = {}
    sample_sequence_names = {}

    # Urgent families / samples, skip deviating
    for family in [family for family in families.values() if family['urgent'] and not family['deviating']]:
        family_project = get_project(project_types[family['project_type']]['projects'], urgent=True)
        for sample in family['samples']:
            sample_sequence_name = get_sequence_name(sample)
            sample_sequence_names[sample.name] = sample_sequence_name
            sample_projects[sample_sequence_name] = family_project
            project_types[family['project_type']]['projects'][family_project] += 1

    # Deviating families / samples
    for family in [family for family in families.values() if family['deviating']]:
        family_project = get_project(project_types[family['project_type']]['projects'])
        for sample in family['samples']:
            sample_sequence_name = get_sequence_name(sample)
            sample_sequence_names[sample.name] = sample_sequence_name
            sample_projects[sample_sequence_name] = family_project
            project_types[family['project_type']]['projects'][family_project] += 1

    # Non urgent and non deviating families / samples
    normal_families = [family for family in families.values() if not family['urgent'] and not family['deviating']]
    for family in sorted(normal_families, key=lambda fam: (len(fam['samples'])), reverse=True):
        family_project = get_project(project_types[family['project_type']]['projects'])
        for sample in family['samples']:
            sample_sequence_name = get_sequence_name(sample)
            sample_sequence_names[sample.name] = sample_sequence_name
            sample_projects[sample_sequence_name] = family_project
            project_types[family['project_type']]['projects'][family_project] += 1

    # Check sequencer type
    # NextSeq runs need to reverse complement 'index2' for dual barcodes and 'index' for single barcodes.
    if 'nextseq' in process.type.name.lower():
        nextseq_run = True
    else:
        nextseq_run = False

    # Edit clarity samplesheet
    sample_header = ''  # empty until [data] section
    settings_section = False
    samplesheet_artifact = Artifact(lims, id=artifact_id)
    file_id = samplesheet_artifact.files[0].id

    for line in lims.get_file_contents(id=file_id).rstrip().split('\n'):
        if line.startswith('[Settings]') and trim_last_base:
            output_file.write('{line}\n'.format(line=line))
            output_file.write('Read1EndWithCycle,{value}\n'.format(value=process.udf['Read 1 Cycles']-1))
            output_file.write('Read2EndWithCycle,{value}\n'.format(value=process.udf['Read 2 Cycles']-1))
            settings_section = True

        elif line.startswith('[Data]') and trim_last_base and not settings_section:
            output_file.write('[Settings]\n')
            output_file.write('Read1EndWithCycle,{value}\n'.format(value=process.udf['Read 1 Cycles']-1))
            output_file.write('Read2EndWithCycle,{value}\n'.format(value=process.udf['Read 2 Cycles']-1))
            output_file.write('{line}\n'.format(line=line))

        elif line.startswith('Sample_ID'):  # Samples header line
            sample_header = line.rstrip().split(',')
            sample_id_index = sample_header.index('Sample_ID')
            sample_name_index = sample_header.index('Sample_Name')
            sample_project_index = sample_header.index('Sample_Project')

            if 'index2' in sample_header:
                index_index = sample_header.index('index2')
            else:
                index_index = sample_header.index('index')

            output_file.write('{line}\n'.format(line=line))

        elif sample_header:  # Samples header seen, so continue with samples.
            data = line.rstrip().split(',')

            # Fix sample name -> use sequence name
            if data[sample_name_index] in sample_sequence_names:
                data[sample_name_index] = sample_sequence_names[data[sample_name_index]]

            # Set Sample_Project
            if data[sample_name_index] in sample_projects:
                data[sample_project_index] = sample_projects[data[sample_name_index]]

            # Overwrite Sample_ID with Sample_name to get correct conversion output folder structure
            data[sample_id_index] = data[sample_name_index]

            # Reverse complement index for NextSeq runs
            if nextseq_run:
                data[index_index] = clarity_epp.export.utils.reverse_complement(data[index_index])

            output_file.write('{line}\n'.format(line=','.join(data)))
        else:  # Leave other lines untouched.
            output_file.write('{line}\n'.format(line=line))
コード例 #6
0
def main(lims, args, epp_logger):
    p = Process(lims, id=args.pid)

    if not args.path:
        args.path = os.getcwd()

    file_list = os.listdir(args.path)

    # Find all per input result files
    io = p.input_output_maps
    io_filtered = filter(
        lambda (x, y): y['output-generation-type'] == 'PerInput', io)
    io_filtered = filter(lambda (x, y): y['output-type'] == 'ResultFile',
                         io_filtered)

    artifact_missing_file = []
    artifact_multiple_file = []
    found_files = []

    for input, output in io_filtered:
        i_a = Artifact(lims, id=input['limsid'])
        o_a = Artifact(lims, id=output['limsid'])

        # Input Well, Input Container
        i_w, i_c = i_a.location[1], i_a.location[0]

        # Well is typed without colon in filename:
        i_w = ''.join(i_w.split(':'))

        info = {
            'well': i_w,
            'container_id': i_c.id,
            'input_artifact_id': i_a.id
        }

        # Use a reguluar expression to find the file name given
        # the container and sample. This is all assuming the driver template name ends with:
        # ${INPUT.CONTAINER.PLACEMENT}_${INPUT.NAME}_${INPUT.CONTAINER.LIMSID}_${INPUT.LIMSID}
        # However, names are excluded to improve robustness.
        re_str = '.*{well}_.*_.*{container_id}_.*{input_artifact_id}'\
                                   .format(**info)

        im_file_r = re.compile(re_str)
        fns = filter(im_file_r.match, file_list)
        logging.info(
            ("Looking for file for artifact id: {input_artifact_id} "
             "from container with id: {container_id}.").format(**info))

        if len(fns) == 0:
            logging.warning(
                "No image file found for artifact with id {0}".format(i_a.id))
            artifact_missing_file.append(i_a)
        elif len(fns) > 1:
            logging.warning(
                ("Multiple image files found for artifact with id {0}, "
                 "please attach files manually").format(i_a.id))
            artifact_multiple_file.append(i_a)
        else:
            fn = fns[0]
            found_files.append(fn)
            logging.info(
                "Found image file {0} for artifact with id {1}".format(
                    fn, i_a.id))
            fp = os.path.join(args.path, fn)

            # Attach file to the LIMS
            location = attach_file(fp, o_a)
            logging.debug("Moving {0} to {1}".format(fp, location))

    warning = ""
    if len(artifact_missing_file):
        warning = "Did not find any file for {0} artifact(s). ".format(
            len(artifact_missing_file))

    if len(artifact_multiple_file):
        warning += "Found multiple files for {0} artifact(s), none of these were uploaded.".format(
            len(artifact_multiple_file))

    if warning:
        warning = "Warning: " + warning

    abstract = "Uploaded {0} file(s). {1}".format(len(found_files), warning)
    print >> sys.stderr, abstract  # stderr will be logged and printed in GUI
コード例 #7
0
 def _get_individual_artifacts(self, sample):
     ## Assuming first artifact is allways named sample.id + 'PA1.
     first_sample_artifact = Artifact(lims, id=sample.id + 'PA1')
     self.uniq_artifacts[sample.id] = first_sample_artifact
コード例 #8
0
 def get_artifacts(self):
     for inp, outp in self.input_output_maps:
         if outp.get("output-generation-type") == "PerAllInputs":
             continue
         self.artifacts.append( Artifact(self.lims,id = outp['limsid']))