def batch_processing(op): # walk through every subdir, find all pdb files import os extension = '.pdb' pdb_path = {} for root, sub_folders, files in os.walk(op['pdb_dir']): for file_t in files: if not file_t.endswith(extension): continue pdb_id = file_t[:len(file_t) - len(extension)] assert (pdb_id + extension) == file_t assert pdb_id not in pdb_path # the pdb_id must be unique pdb_path[pdb_id] = os.path.join(root, file_t) if 'pdb_id_selected' in op: pdb_path = { _: pdb_path[_] for _ in (set(pdb_path.keys()) & set(op['pdb_id_selected'])) } print 'generating maps for ', len(pdb_path), 'structures' import copy ts = {} for pdb_id in pdb_path: for spacing in op['spacing_s']: for resolution in op['resolution_s']: op_t = copy.deepcopy(op) op_t['pdb_id'] = pdb_id op_t['pdb_file'] = pdb_path[pdb_id] assert 'resolution' not in op_t op_t['resolution'] = resolution assert 'spacing' not in op_t op_t['spacing'] = spacing ts[uuid.uuid4()] = {'func': SP.convert, 'kwargs': {'op': op_t}} import aitom.parallel.multiprocessing.util as TPMU cre_s = TPMU.run_batch(ts) re = {} for cre in cre_s: pdb_id = cre['result']['pdb_id'] resolution = cre['result']['resolution'] spacing = cre['result']['spacing'] if pdb_id not in re: re[pdb_id] = {} if spacing not in re[pdb_id]: re[pdb_id][spacing] = {} assert resolution not in re[pdb_id][spacing] re[pdb_id][spacing][resolution] = cre['result'] return re
def batch_processing(op): """ automatically scan pdb files and convert them to density maps, and save in to a pickle file. This is done in parallel Alternatively, save in matlab format that is same as Bsoft.pdb2em_batch_convert_test(). """ import os # walk through every subdir, find all pdb files extension = '.pdb' pdb_path = {} for root, sub_folders, files in os.walk(op['pdb_dir']): for file_t in files: if not file_t.endswith(extension): continue pdb_id = file_t[:len(file_t) - len(extension)] assert (pdb_id + extension) == file_t assert pdb_id not in pdb_path # the pdb_id must be unique pdb_path[pdb_id] = os.path.join(root, file_t) if 'pdb_id_selected' in op: pdb_path = { _: pdb_path[_] for _ in (set(pdb_path.keys()) & set(op['pdb_id_selected'])) } print('generating maps for ', len(pdb_path), 'structures') import copy ts = {} for pdb_id in pdb_path: for spacing in op['spacing_s']: for resolution in op['resolution_s']: op_t = copy.deepcopy(op) op_t['pdb_id'] = pdb_id op_t['pdb_file'] = pdb_path[pdb_id] assert 'resolution' not in op_t op_t['resolution'] = resolution assert 'spacing' not in op_t op_t['spacing'] = spacing ts[uuid.uuid4()] = {'func': SP.convert, 'kwargs': {'op': op_t}} import aitom.parallel.multiprocessing.util as TPMU cre_s = TPMU.run_batch(ts, worker_num=0, verbose=True) re = {} for cre in cre_s: #print(cre) pdb_id = cre['result']['pdb_id'] resolution = cre['result']['resolution'] spacing = cre['result']['spacing'] if pdb_id not in re: re[pdb_id] = {} if spacing not in re[pdb_id]: re[pdb_id][spacing] = {} assert resolution not in re[pdb_id][spacing] re[pdb_id][spacing][resolution] = cre['result'] return re