Exemple #1
0
class builder(Node):
    """Builds and adds a :term:`SKETCH` from given :term:`FORM` string to the :class:`.Case` using ideal SSE elements.

    If corrections are available and specified, these will be applied onto the :term:`SKETCH`.

    .. caution::
        In order to apply secondary structure or per layer corrections, the :mod:`.corrector` plugin
        needs to be set in the :class:`.Pipeline`.

    :param connectivity: Expected secondary structure connectivity. *Important*: at the moment only a single
                         connectivity supported (default: True).
    :param motif: Expected Motif to be added to the :term:`SKETCH` (default: False).
    :param pick_aa: Desired amino acid type to use for the :term:`SKETCH` sequence. If not specified, it will
                    use pseudorandomly assign amino acid types based on secondary structure propensity scores.
    :param write2disc: Dump the :term:`SKETCH` (default: True).

    :raises:
        :NodeDataError: On **check**. If the required fields to be executed are not there.
        :NodeDataError: On **execution**. If the :class:`.Case` contains anything other than one defined connectivity.
    """
    REQUIRED_FIELDS = ('topology.architecture', 'topology.connectivity')
    RETURNED_FIELDS = ()
    VERSION = 'v1.0'

    def __init__(self,
                 tag: int,
                 connectivity: Optional[bool] = True,
                 motif: Optional[bool] = False,
                 pick_aa: Optional[str] = None,
                 write2disc: Optional[str] = True):
        super(builder, self).__init__(tag)

        self.connectivity = connectivity
        self.motif = motif
        self.pick_aa = pick_aa
        self.write2disc = write2disc

    def single_check(self, dummy: Dict) -> Dict:
        kase = Case(dummy)

        # Check what it needs
        for itag in self.REQUIRED_FIELDS:
            if kase[itag] is None:
                raise NodeDataError(f'Field "{itag}" is required')

        # Include what keywords it adds (in this instance, nothing)
        # Here Nothing
        return kase.data

    def single_execute(self, data: Dict) -> Dict:
        case = Case(data)

        # Apply connectivity?
        if self.connectivity:
            if case.connectivity_count == 0:
                raise NodeDataError(
                    'Minimum a single connectivity must be provided.')
            if case.connectivity_count > 1:
                raise NodeDataError(
                    'Only single connectivity cases can be build.')
Exemple #2
0
    def single_execute(self, data: Dict) -> Dict:
        kase = Case(data)

        # Check name was not already added.
        sn = copy.deepcopy(self.subnames)
        if kase.name.endswith('_'.join(sn)):
            self.log.notice(
                f'Seems the subnames {"_".join(sn)} already existed.')
            self.log.notice('Will NOT re-append.')
            return kase

        # Add new names
        oname = kase.name
        sn.insert(0, oname)
        kase.data['configuration']['name'] = '_'.join(sn)

        self.log.debug(f'Renamed case {oname} to {kase.name}')
        return kase.data
 def test_empty(self):
     input = {}
     error = {
         'configuration': ['Configuration data is required'],
         'topology': ['A topological definition is required']
     }
     with pytest.raises(ValidationError) as message:
         Case(input)
     assert message.value.messages == error
Exemple #4
0
    def single_execute(self, data: Dict) -> Dict:
        kase = Case(data)

        crr = copy.deepcopy(self.corrections)

        # See if there are extra corrections attached to the case itself
        krr = kase['metadata.corrections']
        krr = [] if krr is None else krr
        crr.extend(krr)

        # Apply each set of corrections
        for c in crr:
            self.log.info('Applying correction: {0}\n'.format(c))
            kase = kase.apply_corrections(c)

        self.log.debug(f'Applied a total of {len(crr)} corrections.')
        self.log.debug(f'{len(krr)} from within the Case definition.')
        self.log.debug(f'{len(self.corrections)} from protocol-provided data.')
        return kase.data
Exemple #5
0
    def single_check(self, dummy: Dict) -> Dict:
        kase = Case(dummy)

        # Check what it needs
        for itag in self.REQUIRED_FIELDS:
            if kase[itag] is None:
                raise NodeDataError(f'Field "{itag}" is required')

        # Include what keywords it adds (in this instance, nothing)
        return kase.data
Exemple #6
0
    def single_check(self, dummy: Dict) -> Dict:
        kase = Case(dummy)
        # Check what it needs
        for itag in self.REQUIRED_FIELDS:
            if kase[itag] is None:
                raise NodeDataError(f'Field "{itag}" is required')

        # Include what keywords it adds
        kase.data.setdefault('metadata', {}).setdefault('motif_picker', [])
        return kase.data
 def test_empty_config(self):
     input = {'configuration': {}}
     error = {
         'configuration': {
             'name': ['A case identifier is required']
         },
         'topology': ['A topological definition is required']
     }
     with pytest.raises(ValidationError) as message:
         Case(input)
     assert message.value.messages == error
Exemple #8
0
    def single_check(self, dummy: Dict) -> Dict:
        kase = Case(dummy)

        # Check what it needs
        for itag in self.REQUIRED_FIELDS:
            if kase[itag] is None:
                raise NodeDataError(f'Field "{itag}" is required')

        # Include what keywords it adds (in this instance, nothing)
        kase.data.setdefault('metadata', {}).setdefault('loop_fragments', [])
        kase.data.setdefault('metadata', {}).setdefault('loop_lengths', [])
        return kase.data
Exemple #9
0
    def execute(self, data: List[Dict]) -> List[Dict]:
        kase = Case(data[0])

        # File management
        if self.outfile is None:
            self.outfile = kase.main_path.joinpath('images').resolve()
            self.outfile.mkdir(parents=True, exist_ok=True)
        if isinstance(self.outfile, str):
            self.outfile = Path(self.outfile).resolve()

        # Get output format
        outformat = TBcore.get_option('system', 'image')

        for ptype in self.plot_types:
            if self.outfile.is_dir():
                self.prefix = self.prefix if self.prefix is not None else ".".join(
                    [str(os.getppid()), f'{ptype}'])
                thisoutfile = self.outfile.joinpath(".".join(
                    [self.prefix, ptype + outformat]))
            else:
                thisoutfile = Path(str(self.outfile) + '.' + ptype + outformat)
            thisoutfile.parent.mkdir(parents=True, exist_ok=True)
            if not TBcore.get_option('system',
                                     'overwrite') and thisoutfile.is_file():
                self.log.warning(
                    f'Unable to overwrite file {thisoutfile}: Already exists')
                continue

            if not self.plot_params[ptype]:
                fig, ax = getattr(pts, ptype)(self.log,
                                              [Case(i) for i in data])
            else:
                fig, ax = getattr(pts,
                                  ptype)(self.log, [Case(i) for i in data],
                                         self.plot_params[ptype])
            plt.tight_layout()
            plt.savefig(str(thisoutfile), dpi=300)

            self.log.info(f'Creating new image at: {str(thisoutfile)}')
            return data
Exemple #10
0
class fragment_maker(Node):
    """Creates or mixes fragments that are needed in multiple Rosetta protocols. Mutliple ways of creating fragments
    are possible through different protocols.

    .. note::
        Currently, solely the ``loop_fragment`` protocol is implemented.

    .. caution::
        In order to create fragments with the ``loop_fragment`` protocol, the :mod:`.loop_fragments` plugin
        needs to be set in the :class:`.Pipeline`.

    :param protocol: Fragment creation protocol to be used.
    :param script: Rosetta script to pick fragments.

    :raises:
        :NodeDataError: On **check**. If the required fields to be executed are not there.
        :NodeDataError: On **execution**. If the :class:`.Case` contains anything other than one defined connectivity.
        :NodeMissingError: On **exection**. If required variable inputs are not there.
    """
    REQUIRED_FIELDS = ('metadata.loop_fragments', 'metadata.loop_lengths')
    RETURNED_FIELDS = ('metadata.fragments')
    VERSION = 'v1.0'

    def __init__(self,
                 tag: int,
                 protocol: str,
                 script: Optional[Union[Path, str]] = None):
        super(fragment_maker, self).__init__(tag)

        self.protocol = protocol
        self.script = script

    def single_check(self, dummy: Dict) -> Dict:
        kase = Case(dummy)

        # Check what it needs
        for itag in self.REQUIRED_FIELDS:
            if kase[itag] is None:
                raise NodeDataError(f'Field "{itag}" is required')

        # Include what keywords it adds (in this instance, nothing)
        kase.data.setdefault('metadata', {}).setdefault('fragments', {})
        return kase.data

    def single_execute(self, data: Dict) -> Dict:
        case = Case(data)
        data = {'protocol': self.protocol, 'files': []}

        # Fragments can only be made for a full, reoriented Case.
        if case.connectivity_count > 1:
            raise NodeDataError(
                'FunFolDes can only be applied to one connectivity.')
    def test_architecture(self):
        c = Case('test_architecture')
        c = c.add_architecture('5E.2H')
        assert c.shape == (5, 2)
        assert c.shape_len == ((7, 7, 7, 7, 7), (13, 13))
        assert c.architecture_str == '5E.2H'
        assert c.center_shape == {
            'A': {
                'bottom': 0,
                'hight': 0,
                'left': 0,
                'right': 19.4,
                'top': 0,
                'width': 19.4
            },
            'B': {
                'bottom': 0,
                'hight': 0,
                'left': 0,
                'right': 10.0,
                'top': 0,
                'width': 10.0
            }
        }
        with pytest.raises(ValidationError) as message:
            c = c.add_architecture('5E:8:8:7:7:7.2H:18:19')
        assert message.value.messages == [
            'An arquitecture is already defined.'
        ]

        c = Case('test_architecture')
        c = c.add_architecture('5E:8:8:7:7:7.2H:18:19')
        assert c.shape == (5, 2)
        assert c.shape_len == ((8, 8, 7, 7, 7), (18, 19))
        assert c.connectivity_count == 0
        assert c.connectivities_str == []
        fig = plt.figure(figsize=(15, 5))
        ax1 = plt.subplot2grid((1, 3), (0, 0), fig=fig)
        plot_case_sketch(c, ax1)
        ax2 = plt.subplot2grid((1, 3), (0, 1), fig=fig)
        plot_case_sketch(c.apply_corrections({'B': {'xalign': 'center'}}), ax2)
        ax3 = plt.subplot2grid((1, 3), (0, 2), fig=fig)
        plot_case_sketch(c.apply_corrections({'B': {'xalign': 'right'}}), ax3)
        return fig
 def test_minimal(self):
     c = Case('test_minimal')
     assert c['configuration.name'] == 'test_minimal'
     assert c['topology.architecture'] == [[]]
     assert 'architecture' not in c
     assert c.shape == ()
     c.write(self.datadir)
     c.write(self.datadir, format='json')
Exemple #13
0
    def single_execute(self, data: Dict) -> Dict:
        kase = Case(data)

        new_cases = []
        # If connectivities are pre-specified, only make those.
        if kase.connectivity_count > 0:
            new_cases.extend(
                self.eval_representatives(kase, self.representatives,
                                          self.sampling))
        else:
            new_cases.extend(
                self.eval_representatives(self.explore_connectivities(kase),
                                          self.representatives, self.sampling))
        self.log.notice(f'case count: {len(new_cases)}')
        return new_cases[0]
Exemple #14
0
    def single_execute(self, data: Dict) -> Dict:
        kase = Case(data)
        result = {'id': f'motif_{self.identifier}'}

        # Create a working folder
        folders = kase.undirected_path.joinpath(result['id'])
        folders.mkdir(parents=True, exist_ok=True)
        result['data_dir'] = str(folders)

        # Load Structure and create eigens
        try:
            motifs = self.Motif(*reverse_motif(
                self.log, self.source, self.selection, self.attach,
                self.hotspot, self.identifier, self.binder))
        except StructuralError as se:
            raise NodeDataError(str(se))
        result['motifs'] = motifs
        # Attach data and return
        kase.data.setdefault('metadata', {}).setdefault('motif_picker',
                                                        []).append(result)
        return kase.data
Exemple #15
0
    def next(self):
        """Execute the next plugin on the protocol list.
        """
        next(self)

    def _load_case(self, case):
        if isinstance(case, Case):
            self.case = [
                Case(case),
            ]
            self._bckcase = [
                Case(case),
            ]
        elif isinstance(case, list):
            self.case = [Case(x) for x in case]
            self._bckcase = [Case(x) for x in case]

    def __next__(self):
        if self.current + 1 >= len(self.protocols):
            raise StopIteration('Finished interactive protocols.')
        self.current += 1

        # Get current plugin identifier
        current = copy.deepcopy(self.protocols[self.current])
        name = current.pop('name', None)

        # Execute current plugin
        try:
            self.case = topobuilder.plugin_source.load_plugin(name).apply(
                self.case, **current, prtid=-1)
Exemple #16
0
    protocols = kase['configuration.protocols']
    if protocols is not None:
        if len(protocols) == 1 and not bool(protocols[0]):
            protocols = None
    if protocols is None and protocol is None:
        raise EmptyProtocolError('There are no protocols to run')
    if protocol is not None and protocols is not None:
        raise ProtocolIncompatibilityError(
            'Protocols are provided both through file and in the Case. Pick one.'
        )
    if protocol is not None:
        protocol = str(Path(protocol).resolve())
        try:
            protocols = json.loads("".join(
                [x.strip() for x in open(protocol).readlines()]))
            case_format = 'json'
        except json.JSONDecodeError:
            protocols = yaml.load(open(protocol), Loader=yaml.Loader)
            case_format = 'yaml'

    p = Pipeline(protocols).check(kase.data)
    cases = p.execute([
        kase.data,
    ])
    for i, c in enumerate(cases):
        cases[i] = Case(c).assign_protocols(protocols)
        p.log.notice(
            f'New case file created at {cases[i].write(format=case_format)}')
    return cases
Exemple #17
0
class statistics( Node ):
    """Various statistics on the sequence and structure level are computed depending on available scripts.

    .. note::
        Depends on the ``statistic.molprobity`` configuration option.
        Depends on the ``statistic.tmalign`` configuration option.
        Depends on the ``statistic.trrosetta_repo`` configuration option.
        Depends on the ``statistic.trrosetta_wts`` configuration option.
        Depends on the ``statistic.trrosetta_env`` configuration option.

    .. caution::
        In order to execute this :class:`.Node`, we highly recommend to install `trRosetta` with all dependencies.
        The external conda environment can be specified in the ``statistic.trrosetta_env`` configuration option.

    .. admonition:: To Developers

        Due to its use in multiple :class:`.Node`, functions to deal with this :class:`.Node` are mostly located
        in the respective module file and external scripts are locate in this :class:`.Node` directory.

    :param loop_range: Expected loop length is calculated from the euclidian distance between two secondary
        structures. This attribute adds a window of ``loop_range`` residues under and over the calculated
        length.
    :param source: Plugin designs come from, e.g. :class:`funfoldes`.
    :param stage: The type of design, e.g. folded or designed.
    :param analysis: Geometric or quality assessment.
    :param metric: Type of geometric or quality assessment.

    :raises:
        :NodeDataError: On **check**. If the required fields to be executed are not there.
    """
    REQUIRED_FIELDS = ()
    RETURNED_FIELDS = ()
    VERSION = 'v1.0'

    def __init__( self, tag: int,
                        source: str,
                        stage: str,
                        analysis: str,
                        metric: Optional[str] = None,
                        **kwargs ) -> str:
        super(statistics, self).__init__(tag)

        self.source = source
        self.stage = stage
        self.analysis = analysis
        self.metric = metric


    def single_check( self, dummy: Dict ) -> Dict:
        case = Case(dummy)

        # Check what it needs
        for itag in self.REQUIRED_FIELDS:
            if case[itag] is None:
                raise NodeDataError(f'Field "{itag}" is required')

        # Include what keywords it adds (in this instance, nothing)
        if self.analysis == 'geometry':
            case['metadata'].setdefault('statistic', {}).setdefault('geometry', '')
        if self.analysis == 'quality':
            case['metadata'].setdefault('statistic', {}).setdefault('quality', '')
        return case.data
Exemple #18
0
        case = Case(dummy)

        # Check what it needs
        for itag in self.REQUIRED_FIELDS:
            if case[itag] is None:
                raise NodeDataError(f'Field "{itag}" is required')

        # Include what keywords it adds (in this instance, nothing)
        if self.analysis == 'geometry':
            case['metadata'].setdefault('statistic', {}).setdefault('geometry', '')
        if self.analysis == 'quality':
            case['metadata'].setdefault('statistic', {}).setdefault('quality', '')
        return case.data

    def single_execute( self, data: Dict ) -> Dict:
        case = Case(data)

        # Generate the folder tree for a single connectivity.
        wfolder = case.connectivities_paths[0].joinpath(f'statistic/{self.source}_{self.stage}/')
        wfolder.mkdir(parents=True, exist_ok=True)
        # Generate internal folder
        thisfolder = wfolder.joinpath('_pdb_files')
        thisfolder.mkdir(parents=True, exist_ok=True)

        # Commands
        commands = []

        # Get data by source
        if len(os.listdir(thisfolder)) == 0:
                commands.extend(self.funfoldes2pdb(case, thisfolder))
Exemple #19
0
    def single_execute(self, data: Dict) -> Dict:
        kase = Case(data)
        # Loop MASTER is only applied to a Case with one single connectivity
        if kase.connectivity_count != 1:
            err = f'{self.nodeID} can only be applied to one connectivity. '
            err += f'Current case contains a total of {kase.connectivity_count}.'
            raise NodeDataError(err)
        # And has to be reoriented
        if not kase.is_reoriented:
            self.log.debug(
                'Topology was provided without oriented SSE -> orienting.')
            kase = kase.apply_topologies()[0]

        # Generate the folder tree for a single connectivity.
        folders = kase.connectivities_paths[0].joinpath('loopgroup_master')
        folders.mkdir(parents=True, exist_ok=True)

        # Global step distance
        loop_step = kase.cast_absolute(
        )['configuration.defaults.distance.loop_step']

        # Output keys
        kase.data.setdefault('metadata', {}).setdefault('loop_fragments', [])
        kase.data.setdefault('metadata', {}).setdefault('loop_lengths', [])

        # Find steps: Each pair of secondary structure.
        #it = kase.connectivities_str[0].split('.')
        #steps = [it[i:i + 2] for i in range(0, len(it) - 1)]
        lengths = kase.connectivity_len[0]
        start = 1

        for i, (group, infos) in enumerate(self.steps.items()):
            self.log.info(f'Search at: {group}')

            # 1. Make folders and files
            wfolder = folders.joinpath(f'loopgroup{i + 1:02d}')
            wfolder.mkdir(parents=True, exist_ok=True)
            outfile = wfolder.joinpath(f'loopgroup_master.iter{i + 1:02d}.pdb')
            outfilePDS = wfolder.joinpath(
                f'loopgroup_master.iter{i + 1:02d}.pds')
            masfile = outfile.with_suffix('.master')

            gr = self.steps[f'group{i + 1:02d}'][-1].split(';')
            gr = [int(g) for g in gr if g != 'x']
            for g in gr:
                checkpoint = wfolder.joinpath(f'loop{g:02d}/checkpoint.json')
                # 2. Check if checkpoint exists, retrieve and skip
                reload = TButil.checkpoint_in(self.log, checkpoint)
                if reload is not None:
                    self.log.debug(
                        f'Reloading loopgroup{i + 1:02d} with loop{g:02d}')
                    kase.data['metadata']['loop_fragments'].append(reload)
                    kase.data['metadata']['loop_lengths'].append(
                        int(reload['edges']['loop']))
                    start += (int(reload['edges']['sse1']) +
                              int(reload['edges']['loop']))
                    continue

            # 3. Check hairpin
            # Get SSEs and identifiers
            sses = [kase.get_sse_by_id(sse) for sse in infos[0]]
            #sse1_name, sse2_name = sse1['id'], sse2['id']
            #is_hairpin = self.check_hairpin(sse1_name, sse2_name)

            # 4. Generate structures
            sses = TBstructure.build_pdb_object(self.log,
                                                sses,
                                                5,
                                                concat=False,
                                                outfile=outfile)

            if not masfile.is_file():
                # 5. calculate expected loop length by loop_step
                #Mdis, mdis = TBstructure.get_loop_length(self.log, sse1, sse2, loop_step, self.loop_range)

                # 6. Run MASTER
                #outfilePDS = outfile if outfile is not None else Path(outfile).with_suffix('.pds')
                self.log.debug(f'FILE {outfilePDS}')
                # -> make PDS query
                cmd = TBMaster.createPDS(outfile, outfilePDS)
                self.log.debug(f'EXECUTE: {" ".join(cmd)}')
                run(cmd, stdout=DEVNULL)
                # -> run MASTER
                cmd = TBMaster.master_groupedgap(outfilePDS, self.pdsdb,
                                                 masfile, infos[1],
                                                 self.rmsd_cut)
                self.log.debug(f'EXECUTE: {" ".join(cmd)}')
                result = run(cmd, stdout=DEVNULL)

                # TODO: implement motif compability
                # if result.returncode: # no loop between that connection, e.g. a motif ranging over multiple sse with keeping the loops
                #     # 4. Generate structures
                #     self.log.debug('generate combined structure')
                #     sse = pd.concat([sse1, sse2], sort=False)
                #
                #     # 6. Run MASTER
                #     self.log.debug(Path(outfile))
                #     #outfilePDS = outfile if outfile is not None else Path(outfile).with_suffix('.pds')
                #     self.log.debug(f'FILE {outfilePDS}')
                #     # -> make PDS query
                #     cmd = TBMaster.createPDS(outfile, outfilePDS)
                #     self.log.debug(f'EXECUTE: {" ".join(cmd)}')
                #     run(cmd, stdout=DEVNULL)
                #     # -> run MASTER
                #     cmd = TBMaster.master_nogap(outfilePDS, self.pdsdb, masfile, self.rmsd_cut)
                #     self.log.debug(f'EXECUTE: {" ".join(cmd)}')
                #     run(cmd, stdout=DEVNULL)
                #
                #     # 6. Minimize master data (pick top_loopsx3 lines to read and minimize the files)
                #     match_count = self.minimize_master_file(masfile)
                #     self.log.debug(f'match count here {match_count}')
                #
                #     # 7. Retrieve MASTER data
                #     dfloop = self.process_master_data_no_gap(masfile, sse1_name, sse2_name)
                #     sse1l, loopl, sse2l = lengths[i], int(dfloop['loop_length'].values[0]), lengths[i + 1]
                #     total_len = sse1l + loopl + sse2l
                #     end_edge = total_len + start - 1
                #     edges = {'ini': int(start), 'end': int(end_edge), 'sse1': int(sse1l), 'loop': int(loopl), 'sse2': int(sse2l)}
                #     self.log.debug(f'INI: {start}; END: {end_edge}; SSE1: {sse1l}; LOOP: {loopl}; SSE2: {sse2l}')
                #     self.log.debug(dfloop.to_string())
                #
                #     # 8. Bring and Combine fragments from the different sources.
                #     loop_data = self.make_fragment_files(dfloop, edges, masfile)
                #     loop_data['match_count'] += match_count

                #else:

                # 6. Minimize master data (pick top_loopsx3 lines to read and minimize the files)
                match_count = self.minimize_master_file(masfile)
                # 7. Retrieve MASTER data
                df_container = self.process_master_data(
                    masfile, infos[0], infos[1], infos[2])

                for indx in list(df_container.order.drop_duplicates()):
                    dfloop = df_container[df_container.order == indx]
                    sse1l, loopl, sse2l = lengths[i], int(
                        dfloop['loop_length'].values[0]), lengths[i + 1]
                    total_len = sse1l + loopl + sse2l
                    end_edge = total_len + start - 1
                    edges = {
                        'ini': int(start),
                        'end': int(end_edge),
                        'sse1': int(sse1l),
                        'loop': int(loopl),
                        'sse2': int(sse2l)
                    }
                    self.log.debug(
                        f'INI: {start}; END: {end_edge}; SSE1: {sse1l}; LOOP: {loopl}; SSE2: {sse2l}'
                    )
                    self.log.debug(dfloop.to_string())

                    # 8. Bring and Combine fragments from the different sources.
                    loop_data, nfolder = self.make_fragment_files(dfloop,
                                                                  edges,
                                                                  masfile,
                                                                  no_loop=True)
                    loop_data['match_count'] += match_count

                    # 9. Save data in the Case
                    kase.data['metadata']['loop_fragments'].append(loop_data)
                    kase.data['metadata']['loop_lengths'].append(int(loopl))
                    start += (sse1l + loopl)

                    # 10. Checkpoint save
                    checkpoint = nfolder.joinpath('checkpoint.json')
                    TButil.checkpoint_out(self.log, checkpoint, loop_data)
        return kase
    def test_topology(self):
        c = Case('test_topology')
        c = c.add_topology('A2E.A1E.B1H.A3E.B2H.A5E.A4E')
        assert c.shape == (5, 2)
        assert c.shape_len == ((7, 7, 7, 7, 7), (13, 13))
        assert c.architecture_str == '5E.2H'
        assert c.connectivity_count == 1
        assert c.connectivities_str == ('A2E.A1E.B1H.A3E.B2H.A5E.A4E', )
        c = c.add_topology('A2E.A1E.B1H.A3E.B2H.A5E.A4E')
        with pytest.raises(ValidationError) as message:
            c = c.add_topology('A2E.A1E.B1H.A3E.B2H.A5E.A4E.B3H')
        assert message.value.messages == [
            'Provided topology does not match existing architecture.'
        ]
        assert c.connectivity_count == 1
        assert c.connectivities_str == ('A2E.A1E.B1H.A3E.B2H.A5E.A4E', )
        cs = c.apply_topologies()
        assert len(cs) == 1
        assert cs[0].shape_len == c.shape_len
        assert cs[0].shape == c.shape
        assert cs[0].connectivities_str == c.connectivities_str

        c = Case('test_topology')
        c = c.add_architecture('5E:8:8:7:7:7.2H:18:19')
        c = c.add_topology('A2E8.A1E8.B1H18.A3E7.B2H19.A5E7.A4E7')
        c = c.add_topology('A2E8.A1E8.B1H18.A3E7.B2H19.A4E7.A5E7')
        c = c.add_topology('A1E8.A2E8.B1H18.A3E7.B2H19.A4E7.A5E7')
        assert c.shape == (5, 2)
        assert c.shape_len == ((8, 8, 7, 7, 7), (18, 19))
        assert c.architecture_str == '5E.2H'
        assert c.connectivity_count == 3
        assert c.connectivities_str == ('A2E.A1E.B1H.A3E.B2H.A5E.A4E',
                                        'A2E.A1E.B1H.A3E.B2H.A4E.A5E',
                                        'A1E.A2E.B1H.A3E.B2H.A4E.A5E')
        cs = c.apply_topologies()
        assert len(cs) == 3
        assert cs[0].connectivity_count == 1
        assert cs[0].connectivities_str == ('A2E.A1E.B1H.A3E.B2H.A5E.A4E', )
        assert cs[1].connectivity_count == 1
        assert cs[1].connectivities_str == ('A2E.A1E.B1H.A3E.B2H.A4E.A5E', )
        assert cs[2].connectivity_count == 1
        assert cs[2].connectivities_str == ('A1E.A2E.B1H.A3E.B2H.A4E.A5E', )

        fig = plt.figure(figsize=(15, 5))
        ax1 = plt.subplot2grid((1, 3), (0, 0), fig=fig)
        plot_case_sketch(cs[0], ax1)
        ax2 = plt.subplot2grid((1, 3), (0, 1), fig=fig)
        plot_case_sketch(cs[1], ax2)
        ax3 = plt.subplot2grid((1, 3), (0, 2), fig=fig)
        plot_case_sketch(cs[2], ax3)
        return fig
Exemple #21
0
class hybridize(Node):
    """Run Rosettas hybridize protocol to generate designs.

    .. caution::
        Due to the ``FastDesignMover``, this :class:`funfoldes` may take *a lot* of time. If possible, please
        use the ``slurm.use`` configuration. In case this is not possible, you may reduce the number of decoys
        to be generated via the `nstruct` parameter option.


    :param nstruct: Number of decoys to be generated (default: 2000).
    :param natbias: Score function bias towards per secondary structure types (default: 2.5).
    :param layer_design: If :class:`funfoldes` should a layer design approach (default: True).

    :raises:
        :NodeDataError: On **check**. If the required fields to be executed are not there.
        :NodeMissingError: On **exection**. If required variable inputs are not there.
    """
    REQUIRED_FIELDS = ('metadata.fragments', 'metadata.loop_lengths')
    RETURNED_FIELDS = ('metadata.hybridize')
    VERSION = 'v1.0'

    def __init__(self,
                 tag: int,
                 nstruct: Optional[int] = 2000,
                 natbias: Optional[float] = 2.5,
                 layer_design: Optional[bool] = True) -> Case:
        super(hybridize, self).__init__(tag)

        self.nstruct = nstruct
        self.natbias = natbias
        self.layer_design = layer_design

    def single_check(self, dummy: Dict) -> Dict:
        kase = Case(dummy)

        # Check what it needs
        for itag in self.REQUIRED_FIELDS:
            if kase[itag] is None:
                raise NodeDataError(f'Field "{itag}" is required')

        # Include what keywords it adds (in this instance, nothing)
        kase.data.setdefault('metadata', {}).setdefault('hybridize', {})
        return kase.data

    def single_execute(self, data: Dict) -> Dict:
        case = Case(data)

        data = {
            'script': {
                'assembly': '',
                'design': ''
            },
            'cmd': {
                'assembly': [
                    Path(TBcore.get_option('rosetta', 'scripts')),
                    '-parser:protocol'
                ],
                'design': [
                    Path(TBcore.get_option('rosetta', 'scripts')),
                    '-parser:protocol'
                ]
            },
            'silent_files': {
                'assembly': [],
                'design': []
            },
            'minisilent': {
                'assembly': '',
                'design': ''
            }
        }

        # Generate the folder tree for a single connectivity.
        wpaths = utils.folder_structure(case)

        # Check if checkpoint exists, retrieve and skip
        reload = TButil.checkpoint_in(self.log, wpaths['checkpoint'])
        if reload is not None:
            case.data['metadata']['hybridize'] = reload
            return case
Exemple #22
0
        stepfolder = wfolder.joinpath('step{:02d}'.format(i + 1))
        stepfolder.mkdir(parents=True, exist_ok=True)
        query = stepfolder.joinpath('imaster.query{:02d}.pdb'.format(i + 1))
        checkpoint = stepfolder.joinpath('checkpoint.json')

        reload = TButil.checkpoint_in(checkpoint)
        if reload is not None:
            kase.data['metadata']['imaster'].setdefault('step{:02d}'.format(i + 1), reload)
            kase.data['metadata']['corrections'].append(reload['corrections'])
            corrections.update(reload['corrections'])
            done_l.update(reload['layers'])
            # CKase = CKase.apply_corrections(corrections)
            continue

        # Apply corrections from previous steps and rebuild
        CKase = Case(kase).apply_corrections(corrections)
        with TBcore.on_option_value('system', 'overwrite', True):
            CKase = plugin_source.load_plugin('builder').case_apply(CKase, connectivity=True)

        # Generate structure query and get layer displacements
        layers = set(itemgetter(*step)(ascii_uppercase))
        sses = [sse for sse in CKase.ordered_structures if sse['id'][0] in layers]
        structure, cends = TButil.build_pdb_object(sses, 3)
        TButil.plugin_filemaker('Writing structure {0}'.format(query))
        structure.write(output_file=str(query), format='pdb', clean=True, force=True)

        flip = cycle([CKase['configuration.flip_first'], not CKase['configuration.flip_first']])
        counts = np.asarray([sse['length'] for sse in CKase.ordered_structures])
        cends = np.cumsum(counts)
        cstrs = cends - counts + 1