Beispiel #1
0
def dmif_data_structure(grid, get_partners):
    """ This function generates the central data structure for dmif analysis of trajectories. Returned will be a grid as
    numpy structured array whose first three columns are the coordinates and whose other columns are used for holding
    scores in later trajectory analysis. Additionally, a list of lists of lists will be returned with the same length
    as the grid that is used to save coordinates of interaction partners for e.g. hydrogen bonds. """
    grid_score = []
    grid_partners = []
    for position in grid:
        grid_score.append(position + [0] * (len(grid_score_dict.keys()) - 3))
        if get_partners:
            grid_partners.append([[] if x[0] != 'hda' else [[], []] for x in sorted([[x, grid_list_dict[x]] for x in
                                                                                     grid_list_dict.keys()],
                                                                                    key=operator.itemgetter(1))])
    grid_score = np.array([tuple(x) for x in grid_score], dtype=[(x[0], float) for x in sorted([[x,
                          grid_score_dict[x]] for x in grid_score_dict.keys()], key=operator.itemgetter(1))])
    return [grid_score, grid_partners]
Beispiel #2
0
def generate_features(positions, feature_scores, feature_type,
                      features_per_feature_type, directory, partner_path,
                      debugging, total_number_of_features, start,
                      feature_counter, results):
    """ This function generates features with variable tolerance based on a global maximum search algorithm. The
    features are described with a list of properties as follows.

    Format:
    ------------------------------------------------------------------------
    0   1 2             3   4                             5          6     7
    ------------------------------------------------------------------------
    0  hi M [0.0,0.0,0.0] 1.5                            []        0.0   1.0
    1  pi M [0.0,0.0,0.0] 1.5                            []        0.0   1.0
    2  ni M [0.0,0.0,0.0] 1.5                            []        0.0   1.0
    3  hd M [0.0,0.0,0.0] 1.5               [[3.0,0.0,0.0]]  1.9499999   1.0
    4  ha M [0.0,0.0,0.0] 1.5               [[3.0,0.0,0.0]]  1.9499999   1.0
    5 hd2 M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0],[0.0,3.0,0.0]]  1.9499999   1.0
    6 ha2 M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0],[0.0,3.0,0.0]]  1.9499999   1.0
    7 hda M [0.0,0.0,0.0] 1.5 [[3.0,0.0,0.0],[0.0,3.0,0.0]]  1.9499999   1.0
    8  ai M [0.0,0.0,0.0] 1.5               [[1.0,0.0,0.0]] 0.43633232   1.0
    ------------------------------------------------------------------------
    Legend:
    0 - index
    1 - type
    2 - flag (O - optional, M - mandatory)
    3 - core position
    4 - core tolerance
    5 - partner positions (hda feature with coordinates for first donor than acceptor)
    6 - partner tolerance
    7 - weight
    """
    logger = setup_logger('_'.join(['features', feature_type]), directory,
                          debugging)
    if partner_path is None:
        partner_path = directory + '/data'
    if feature_type in grid_list_dict.keys():
        partners = pickle_reader(partner_path + '/' + feature_type + '.pkl',
                                 feature_type + '.pkl', logger)
    else:
        partners = [[]] * len(positions)
    score_minimum = 1
    tree = cKDTree(positions)
    generated_features = []
    not_used = range(len(feature_scores))
    used = []
    while feature_scores[not_used].max() >= score_minimum:
        feature_maximum = feature_scores[not_used].max()
        logger.debug(
            'Feature {} maximum of remaining grid points at {}.'.format(
                feature_type, feature_maximum))
        indices_not_checked = np.where(
            abs(feature_scores - feature_maximum) < 1e-8)[0]
        indices = []
        # check if grid points within minimum tolerance already used for features
        for index_not_checked in indices_not_checked:
            feature_indices = tree.query_ball_point(
                positions[index_not_checked], r=1.5)
            if len(feature_indices) + len(used) == len(
                    set(feature_indices + used)):
                indices.append(index_not_checked)
            else:
                not_used = [x for x in not_used if x != index_not_checked]
        if len(indices) > 0:
            # check if only one grid point
            if len(indices) == 1:
                index = indices[0]
                core_tolerance, feature_indices = get_core_tolerance(
                    positions[index], tree, feature_scores, feature_maximum)
            # if more than one grid point, search for the ones with the biggest tolerance
            else:
                core_tolerance, indices_maximal_tolerance, feature_indices_list = \
                    get_maximal_core_tolerance(indices, positions, tree, feature_scores, feature_maximum)
                # if more than one grid point with biggest tolerance, search for the one with the biggest score
                if len(indices_maximal_tolerance) > 1:
                    index, feature_indices = get_maximal_sum_of_scores(
                        feature_scores, indices_maximal_tolerance,
                        feature_indices_list)
                else:
                    index = indices_maximal_tolerance[0]
                    feature_indices = feature_indices_list[0]
            if len(feature_indices) + len(used) > len(
                    set(feature_indices + used)):
                not_used = [x for x in not_used if x != index]
                used.append(index)
            else:
                generated_features.append([
                    index, feature_type, 'M', positions[index], core_tolerance,
                    get_partner_positions(feature_type, partners[index]),
                    get_partner_tolerance(feature_type, core_tolerance), 1.0
                ])
                not_used = [x for x in not_used if x not in feature_indices]
                used += feature_indices
                with feature_counter.get_lock():
                    feature_counter.value += 1
                update_progress(
                    feature_counter.value / total_number_of_features,
                    'Progress of feature generation',
                    ((time.time() - start) / feature_counter.value) *
                    (total_number_of_features - feature_counter.value))
            if len(generated_features) >= features_per_feature_type:
                break
    if len(generated_features) < features_per_feature_type:
        with feature_counter.get_lock():
            feature_counter.value += features_per_feature_type - len(
                generated_features)
        update_progress(feature_counter.value / total_number_of_features,
                        'Progress of feature generation',
                        ((time.time() - start) / feature_counter.value) *
                        (total_number_of_features - feature_counter.value))
    results += generated_features
    return
Beispiel #3
0
     for process in chunk:
         process.start()
     for process in chunk:
         process.join()
 update_user('Processing results.', logger)
 # convert multiprocessing list to true python list
 results_list = []
 for x in results:
     results_list.append(x)
 results = None
 dmif, partners = post_processing(results_list, total_number_of_frames)
 results_list = None
 update_user('Writing raw data to {}/data.'.format(directory), logger)
 pickle_writer(dmif, 'dmif', '{}/{}'.format(directory, 'data'))
 if get_partners:
     for key in grid_list_dict.keys():
         pickle_writer(partners[key].tolist(), key,
                       '/'.join([directory, 'data']))
 partners = None
 update_user('Writing maps to {}/dmifs.'.format(directory), logger)
 for map_format in map_formats:
     for feature_type in [
             x for x in dmif.dtype.names if x not in ['x', 'y', 'z']
     ]:
         dmif_writer(
             dmif[feature_type],
             np.array(
                 [[x, y, z]
                  for x, y, z in zip(dmif['x'], dmif['y'], dmif['z'])]),
             map_format, feature_type,
             '{}/{}'.format(directory, 'dmifs'), logger)
Beispiel #4
0
def grid_partners_to_array(grid_partners):
    grid_partners = np.array([tuple(x) for x in grid_partners], dtype=[(x[0], 'O') for x in
                             sorted([[x, grid_list_dict[x]] for x in grid_list_dict.keys()],
                             key=operator.itemgetter(1))])
    return grid_partners