예제 #1
0
def get_feature(molgrp):

    nx = len(molgrp['grid_points/x'])
    ny = len(molgrp['grid_points/y'])
    nz = len(molgrp['grid_points/z'])
    shape = (nx,ny,nz)

    mapgrp = molgrp['mapped_features']
    data_dict = {}

    # loop through all the features
    for data_name in mapgrp.keys():

        # create a dict of the feature {name : value}
        featgrp = mapgrp[data_name]

        for ff in featgrp.keys():
            subgrp = featgrp[ff]
            if not subgrp.attrs['sparse']:
                data_dict[ff] =  subgrp['value'].value
            else:
                spg = sparse.FLANgrid(sparse=True,index=subgrp['index'].value,value=subgrp['value'].value,shape=shape)
                data_dict[ff] =  spg.to_dense()

    return data_dict
예제 #2
0
    def hdf5_grid_data(self, dict_data, data_name):
        """Save the mapped feature to the hdf5 file

        Args:
            dict_data (dict): feature values stored as a dict
            data_name (str): feature name
        """
        # get the group og the feature
        feat_group = self.hdf5.require_group(self.mol_basename +
                                             '/mapped_features/' + data_name)

        # gothrough all the feature elements
        for key, value in dict_data.items():

            # remove only subgroup
            if key in feat_group:
                del feat_group[key]

            # create new one
            sub_feat_group = feat_group.create_group(key)

            # try  a sparse representation
            if self.try_sparse:

                # check if the grid is sparse or not
                t0 = time()
                spg = sparse.FLANgrid()
                spg.from_dense(value, beta=1E-2)
                if self.time:
                    print('      SPG time %f ms' % ((time() - t0) * 1000))

                # if we have a sparse matrix
                if spg.sparse:
                    sub_feat_group.attrs['sparse'] = spg.sparse
                    sub_feat_group.attrs['type'] = 'sparse_matrix'
                    sub_feat_group.create_dataset('index',
                                                  data=spg.index,
                                                  compression='gzip',
                                                  compression_opts=9)
                    sub_feat_group.create_dataset('value',
                                                  data=spg.value,
                                                  compression='gzip',
                                                  compression_opts=9)

                else:
                    sub_feat_group.attrs['sparse'] = spg.sparse
                    sub_feat_group.attrs['type'] = 'sparse_matrix'
                    sub_feat_group.create_dataset('value',
                                                  data=spg.value,
                                                  compression='gzip',
                                                  compression_opts=9)

            else:
                sub_feat_group.attrs['sparse'] = False
                sub_feat_group.attrs['type'] = 'sparse_matrix'
                sub_feat_group.create_dataset('value',
                                              data=value,
                                              compression='gzip',
                                              compression_opts=9)
예제 #3
0
def _context_sparse(item, treeview, position):

    menu = QtWidgets.QMenu()
    list_operations = ['Load Matrix', 'Plot Histogram']
    action, actions = get_actions(treeview, position, list_operations)

    name = item.basename + '_' + item.name.split('/')[2]

    if action == actions['Load Matrix']:

        subgrp = item.data_file[item.name]
        data_dict = {}
        if not subgrp.attrs['sparse']:
            data_dict[item.name] = subgrp['value'].value
        else:
            molgrp = item.data_file[item.parent.parent.parent.name]
            grid = {}
            lx = len(molgrp['grid_points/x'].value)
            ly = len(molgrp['grid_points/y'].value)
            lz = len(molgrp['grid_points/z'].value)
            shape = (lx, ly, lz)
            spg = sparse.FLANgrid(sparse=True,
                                  index=subgrp['index'].value,
                                  value=subgrp['value'].value,
                                  shape=shape)
            data_dict[name] = spg.to_dense()
        treeview.emitDict.emit(data_dict)

    if action == actions['Plot Histogram']:

        value = item.data_file[item.name]['value'].value
        data_dict = {'value': value}
        treeview.emitDict.emit(data_dict)

        cmd = "%matplotlib inline\nimport matplotlib.pyplot as plt\nplt.hist(value,25)\nplt.show()\n"
        data_dict = {'exec_cmd': cmd}
        treeview.emitDict.emit(data_dict)
예제 #4
0
def visualize3Ddata(hdf5=None, mol_name=None, out=None):
    '''
    This function can be used to generate cube files for the visualization of the mapped
    data in VMD

    Usage
    python generate_cube_files.py <mol_dir_name>
    e.g. python generate_cube_files.py 1AK4

    or within a python script

    import deeprank.map
    deeprank.map.generate_viz_files(mol_dir_name)

    A new subfolder data_viz will be created in <mol_dir_name>
    with all the cube files representing the features contained in
    the files <mol_dir_name>/input/*.npy

    A script called <feature_name>.vmd is also outputed et allow for
    quick vizualisation of the data by typing

    vmd -e <feature_name>.vmd
    '''

    outdir = out

    if outdir is None:
        outdir = mol_name

    if outdir[-1] != '/':
        outdir = outdir + '/'

    if not os.path.isdir(outdir):
        os.mkdir(outdir)

    try:
        f5 = h5py.File(hdf5, 'r')
    except:
        raise FileNotFoundError('HDF5 file %s could not be opened' % hdf5)

    try:
        molgrp = f5[mol_name]
    except:
        raise LookupError('Molecule %s not found in %s' % (mol_name, hdf5))

    # create the pdb file
    sqldb = pdb2sql(molgrp['complex'].value)
    sqldb.exportpdb(outdir + '/complex.pdb')
    sqldb.close()

    # get the grid
    grid = {}
    grid['x'] = molgrp['grid_points/x'].value
    grid['y'] = molgrp['grid_points/y'].value
    grid['z'] = molgrp['grid_points/z'].value
    shape = (len(grid['x']), len(grid['y']), len(grid['z']))

    # deals with the features
    mapgrp = molgrp['mapped_features']

    # loop through all the features
    for data_name in mapgrp.keys():

        # create a dict of the feature {name : value}
        featgrp = mapgrp[data_name]
        data_dict = {}
        for ff in featgrp.keys():
            subgrp = featgrp[ff]
            if not subgrp.attrs['sparse']:
                data_dict[ff] = subgrp['value'].value
            else:
                spg = sparse.FLANgrid(sparse=True,
                                      index=subgrp['index'].value,
                                      value=subgrp['value'].value,
                                      shape=shape)
                data_dict[ff] = spg.to_dense()

        # export the cube file
        export_cube_files(data_dict, data_name, grid, outdir)

    f5.close()
예제 #5
0
    def _extract_data(self):
        """Extract the data from the different maps."""

        f5 = h5py.File(self.fname, 'r')
        mol_names = list(f5.keys())
        self.nmol = len(mol_names)

        # loop over the molecules
        for mol in mol_names:

            # get the mapped features group
            data_group = f5.get(mol + '/mapped_features/')

            # loop over all the feature types
            for feat_types, feat_names in data_group.items():

                # if feature type not in param add
                if feat_types not in self.parameters['features']:
                    self.parameters['features'][feat_types] = {}

                # loop over all the feature
                for name in feat_names:

                    # we skip the target
                    if name in self.skip_feature:
                        continue

                    # create the param if it doesn't already exists
                    if name not in self.parameters['features'][feat_types]:
                        self.parameters['features'][feat_types][
                            name] = NormParam()

                    # load the matrix
                    feat_data = data_group[feat_types + '/' + name]
                    if feat_data.attrs['sparse']:
                        mat = sparse.FLANgrid(sparse=True,
                                              index=feat_data['index'][:],
                                              value=feat_data['value'][:],
                                              shape=self.shape).to_dense()
                    else:
                        mat = feat_data['value'][:]

                    # add the parameter (mean and var)
                    self.parameters['features'][feat_types][name].add(
                        np.mean(mat), np.var(mat))

            # get the target groups
            target_group = f5.get(mol + '/targets')

            # loop over all the targets
            for tname, tval in target_group.items():

                # we skip the already computed target
                if tname in self.skip_target:
                    continue

                # create a new item if needed
                if tname not in self.parameters['targets']:
                    self.parameters['targets'][tname] = MinMaxParam()

                # update the value
                self.parameters['targets'][tname].update(tval[()])

        f5.close()
예제 #6
0
    def load_one_molecule(self, fname, mol=None):
        '''Load the feature/target of a single molecule.
        Args:
            fname (str): hdf5 file name
            mol (None or str, optional): name of the complex in the hdf5
        Returns:
            np.array,float: features, targets
        '''

        outtype = 'float32'
        fh5 = h5py.File(fname, 'r')

        if mol is None:
            mol = list(fh5.keys())[0]

        # get the mol
        mol_data = fh5.get(mol)

        # get the features
        feature = []
        for feat_type, feat_names in self.select_feature.items():

            # see if the feature exists
            try:
                feat_dict = mol_data.get('mapped_features/' + feat_type)
            except KeyError:
                print('Feature type %s not found in file %s for molecule %s' %
                      (feat_type, fname, mol))
                print('Possible feature types are : ' +
                      '\n\t'.join(list(mol_data['mapped_features'].keys())))

            # loop through all the desired feat names
            for name in feat_names:

                # extract the group
                try:
                    data = feat_dict[name]
                except KeyError:
                    print(
                        'Feature %s not found in file %s for mol %s and feature type %s'
                        % (name, fname, mol, feat_type))
                    print('Possible feature are : ' + '\n\t'.join(
                        list(mol_data['mapped_features/' + feat_type].keys())))

                # check its sparse attribute
                # if true get a FLAN
                # if flase direct import
                if data.attrs['sparse']:
                    mat = sparse.FLANgrid(sparse=True,
                                          index=data['index'][:],
                                          value=data['value'][:],
                                          shape=self.grid_shape).to_dense()
                else:
                    mat = data['value'][:]

                # append to the list of features
                feature.append(mat)

        # get the target value
        target = mol_data.get('targets/' + self.select_target)[()]

        # close
        fh5.close()

        # make sure all the feature have exact same type
        # if they don't  collate_fn in the creation of the minibatch will fail.
        # Note returning torch.FloatTensor makes each epoch twice longer ...
        return np.array(feature).astype(outtype), np.array([target
                                                            ]).astype(outtype)