def main(): alljobs = [] for coordfile in glob('*.d.cor'): coordfileroot = coordfile[:-6] #Generates coordinate file without Drude particles nonpolcoordfile = coordfileroot +'.cor' if not os.path.exists(nonpolcoordfile): dedrude(coordfile, nonpolcoordfile) if True or not os.path.exists(coordfileroot): print 'Preparing', coordfileroot if not os.path.exists(coordfileroot): os.mkdir(coordfileroot) os.chdir(coordfileroot) #Copy runpack copywild('/home/cjh/rmt/runpack/*', '.') #Copy coordinate files #NB Due to a limitation of CHARMM's environment variable # parsing mechanism, the coordinate filenames # must be in upper case. if not os.path.exists(coordfile.upper()): os.symlink(os.path.join('..', coordfile), coordfile.upper()) if not os.path.exists(nonpolcoordfile.upper()): os.symlink(os.path.join('..', nonpolcoordfile), \ nonpolcoordfile.upper()) #makejobs(coordfileroot) os.chdir('..') alljobs += iteratejobs(coordfileroot) #exit() f = open('sgearraytasklist.txt', 'w') for i, j in enumerate(alljobs): f.write(str(i+1)+' \t') f.write(' '.join(j) + '\n') f.close() numjobs = len(alljobs) print 'Indexed', numjobs, 'jobs' #Now create SGE array job and submit self as an array job import SGE import sys x = SGE.qsubOptions() x.args.N = 'h2pc-qmmm-rmt' x.args.t = '1-'+str(numjobs) x.args.o = '$JOB_NAME.$JOB_ID.$TASK_ID.stdout.log'.replace('$', '\$') x.args.command = sys.argv[0] print '#To submit the array job, run this command:' x.execute(mode = 'echo')
def PrepareJobs(runpack = '/home/cjh/rmt/runpack/*', joblist = 'sgearraytasklist.txt', sge_jobname = 'h2pc-qmmm-rmt', DoOverwrite = False, h5filename = 'h2pc-data.h5'): """ Prepares jobs to be executed in a term:`SGE` array job environment. This does the following: 1. Scans all files in the current directory for files matching @c \*.d.cor, which will be assumed to be :term:`CHARMM card file` s containing coordinates for the molecular system, and also Drude particles. For each such file, it will generate the corresponding :term:`CHARMM card file` s without Drude particles using dedrude(). 1. Copies files specified by runpack using linkwild() to a directory named after the root of the :term:`CHARMM card file` name. If this directory does not exist, it will be created. 1. Creates a symlink to the :term:`CHARMM card file`. If necessary, the filename will be converted into upper case due to a limitation of CHARMM. 1. Writes data needed by :py:func:`domyjob` to a file named in joblist. 1. Generates the command line needed to submit the array job to a Sun Grid Engine environment. This function requires the homebrew :py:mod:`SGE` module. .. versionadded:: 0.1 """ logger = logging.getLogger('SGEInterface.PrepareJobs') if os.path.exists(joblist) and not DoOverwrite: logger.info('Skipping population of %s', joblist) #count numjobs numjobs = 0 for line in open(joblist): len_line = len(line.split()) if len_line == 4: numjobs += 1 elif len_line != 0: raise ValueError, 'Invalid format of line:' + line else: alljobs = [] #Collect all data into HDF5 file h5data = tables.openFile(h5filename, 'a') ############################# # Load in residues to do QM # ############################# MolList = OpenHDF5Table(h5data, '/Model', 'MolList', ResidList, 'List of molecule ids for which QM data exist', DoOverwrite) if MolList is not None: if os.path.exists('../mol.list'): MolListFileName = '../mol.list' elif os.path.exists('mol.list'): MolListFileName = 'mol.list' else: raise ValueError, "Could not find list of molecule IDs" for res_id in open(MolListFileName): logger.info('Adding mol.list') try: data = MolList.row data['ResID'] = int(res_id) data.append() except ValueError: pass MolList.flush() ############################################### # Load in atomic charges from CHARMM topology # ############################################### for CHARMM_RTFile in glob(runpack+'.rtf'): RTF = OpenHDF5Table(h5data, '/Model', 'CHARMM_RTF', CHARMM_RTF_Short, 'CHARMM Topology File', DoOverwrite, DoAppend = True) if RTF is not None: for line in open(CHARMM_RTFile): logger.info('Adding CHARMM RTF: %s', CHARMM_RTFile) t = line.split() try: data = RTF.row data['Type'] = t[1] data['Charge'] = float(t[3]) data.append() except (ValueError, IndexError): pass RTF.flush() ##################################### # Load coordinates from CHARMM CARD # ##################################### for coordfile in glob('*.cor'): logger.info('Adding CHARMM CARD: %s', coordfile) coordfileroot = coordfile.split('.')[0] Coords = OpenHDF5Table(h5data, '/'+coordfileroot, 'CHARMM_CARD', CHARMM_CARD, 'CHARMM CARD File', DoOverwrite) if Coords is not None: LoadCHARMM_CARD(Coords, coordfile) alljobs += iteratejobs(coordfileroot) with open(joblist, 'w') as f: for i, j in enumerate(alljobs): f.write(str(i+1)+' \t') f.write(' '.join(j) + '\n') numjobs = len(alljobs) logger.info('Indexed %d jobs', numjobs) #Now create SGE array job and submit self as an array job x = SGE.qsubOptions() x.args.N = sge_jobname x.args.t = '1-'+str(numjobs) x.args.p = '-500' x.args.o = '$JOB_NAME.$JOB_ID.$TASK_ID.stdout.log'.replace('$', '\$') FullPathToJobHandler = os.path.dirname(sys.argv[0])+os.sep+'JobHandler.py' x.args.command = 'python '+FullPathToJobHandler+' --taskfile '+joblist x.args.b = 'y' print '#To submit the array job, run this command:' x.execute(mode = 'echo')