Example #1
0
def batch_processing(op):

    # walk through every subdir, find all pdb files
    import os
    extension = '.pdb'
    pdb_path = {}
    for root, sub_folders, files in os.walk(op['pdb_dir']):
        for file_t in files:
            if not file_t.endswith(extension): continue

            pdb_id = file_t[:len(file_t) - len(extension)]

            assert (pdb_id + extension) == file_t
            assert pdb_id not in pdb_path  # the pdb_id must be unique

            pdb_path[pdb_id] = os.path.join(root, file_t)

    if 'pdb_id_selected' in op:
        pdb_path = {
            _: pdb_path[_]
            for _ in (set(pdb_path.keys()) & set(op['pdb_id_selected']))
        }

    print 'generating maps for ', len(pdb_path), 'structures'

    import copy
    ts = {}
    for pdb_id in pdb_path:
        for spacing in op['spacing_s']:
            for resolution in op['resolution_s']:

                op_t = copy.deepcopy(op)
                op_t['pdb_id'] = pdb_id
                op_t['pdb_file'] = pdb_path[pdb_id]

                assert 'resolution' not in op_t
                op_t['resolution'] = resolution
                assert 'spacing' not in op_t
                op_t['spacing'] = spacing

                ts[uuid.uuid4()] = {'func': SP.convert, 'kwargs': {'op': op_t}}

    import aitom.parallel.multiprocessing.util as TPMU
    cre_s = TPMU.run_batch(ts)

    re = {}
    for cre in cre_s:
        pdb_id = cre['result']['pdb_id']
        resolution = cre['result']['resolution']
        spacing = cre['result']['spacing']

        if pdb_id not in re: re[pdb_id] = {}

        if spacing not in re[pdb_id]: re[pdb_id][spacing] = {}

        assert resolution not in re[pdb_id][spacing]

        re[pdb_id][spacing][resolution] = cre['result']

    return re
Example #2
0
def batch_processing(op):
    """
    automatically scan pdb files and convert them to density maps, and save in to a pickle file.
    This is done in parallel Alternatively, save in matlab format that is same as Bsoft.pdb2em_batch_convert_test().
    """
    import os
    # walk through every subdir, find all pdb files
    extension = '.pdb'
    pdb_path = {}
    for root, sub_folders, files in os.walk(op['pdb_dir']):
        for file_t in files:
            if not file_t.endswith(extension):
                continue

            pdb_id = file_t[:len(file_t) - len(extension)]

            assert (pdb_id + extension) == file_t
            assert pdb_id not in pdb_path  # the pdb_id must be unique

            pdb_path[pdb_id] = os.path.join(root, file_t)

    if 'pdb_id_selected' in op:
        pdb_path = {
            _: pdb_path[_]
            for _ in (set(pdb_path.keys()) & set(op['pdb_id_selected']))
        }

    print('generating maps for ', len(pdb_path), 'structures')

    import copy
    ts = {}
    for pdb_id in pdb_path:
        for spacing in op['spacing_s']:
            for resolution in op['resolution_s']:

                op_t = copy.deepcopy(op)
                op_t['pdb_id'] = pdb_id
                op_t['pdb_file'] = pdb_path[pdb_id]

                assert 'resolution' not in op_t
                op_t['resolution'] = resolution
                assert 'spacing' not in op_t
                op_t['spacing'] = spacing

                ts[uuid.uuid4()] = {'func': SP.convert, 'kwargs': {'op': op_t}}

    import aitom.parallel.multiprocessing.util as TPMU
    cre_s = TPMU.run_batch(ts, worker_num=0, verbose=True)

    re = {}
    for cre in cre_s:
        #print(cre)
        pdb_id = cre['result']['pdb_id']
        resolution = cre['result']['resolution']
        spacing = cre['result']['spacing']

        if pdb_id not in re:
            re[pdb_id] = {}

        if spacing not in re[pdb_id]:
            re[pdb_id][spacing] = {}

        assert resolution not in re[pdb_id][spacing]

        re[pdb_id][spacing][resolution] = cre['result']

    return re