def getjoblist(sourcedir='/home-1/[email protected]/work/log/biased2_base/sim'): ls = os.listdir(sourcedir) ls = [i for i in ls if (i.startswith('sw') or i.startswith('cw'))] jobidlist = [] joblist = [] type_map = {} for logfile in sorted(ls): mt_type = 'sim' if logfile.startswith('sw') else 'ctl' with open(sourcedir + '/' + logfile) as src: log = src.read().split('\n') for l in log: if 'SLURM_JOB_ID' in l: _, jobid = l.split(':') jobid = int(jobid.strip()) jobidlist.append(jobid) type_map[jobid] = mt_type break job_data = slurm.jobexecinfo(jobidlist) for i in range(len(job_data)): job_data[i]['type'] = type_map[job_data[i]['jobid']] return job_data
def sw_file_parser(name, source_dir=None): global conn home = os.getenv('HOME') sourcedir = home + '/work/log/' + name if source_dir is None else source_dir jobinfo = {} ls = [i for i in os.listdir(sourcedir) if (i.startswith('sw'))] appl = os.path.basename(sourcedir) expid = get_expid(appl) print('Processing %d files' % len(ls)) for filename in sorted(ls): info = {} info['expid'] = expid info['swname'] = os.path.splitext(filename)[0] if not info['swname'].startswith('sw'): print('Only processing sw files in this method') continue # exists = runquery("Select count(swname) from sw where expid=%(expid)d and swname='%(swname)s';" % (info)) # if exists: # print('File %s already parsed for experiment %s' % (info['swname'], appl)) # return with open(os.path.join(sourcedir, filename)) as src: log = src.read().split('\n') jobid = None for l in log: if 'name:' in l: info['jobname'] = l.split()[2].strip() elif 'src_bin:' in l: info['src_bin'] = l[-6:] elif 'src_index:' in l: info['src_index'] = int(l.split()[2].strip()) elif 'src_hcube:' in l: info['src_hcube'] = l.split()[2].strip() elif 'SLURM_JOB_ID' in l: _, jobid = l.split(':') jobid = int(jobid.strip()) elif 'mdtraj.Trajectory' in l: info['numobs'] = int(l.split()[6]) if jobid is None or 'jobname' not in info.keys(): print('ERROR. Failed to retrieve jobid for ', info['swname']) continue jobinfo[jobid] = info print('Retrieving data from slurm for %d jobs' % len(jobinfo.keys())) jobdata = slurm.jobexecinfo(list(jobinfo.keys())) for job in jobdata: job['time'] = time2sec(job['time']) jobinfo[job['jobid']].update(job) if 'src_index' not in jobinfo[job['jobid']].keys(): jobinfo[job['jobid']]['src_index'] = -1 if 'src_hcube' not in jobinfo[job['jobid']].keys(): jobinfo[job['jobid']]['src_hcube'] = 'D' print('Inserting %d rows into database' % len(jobdata)) for job in jobinfo.values(): try: query = """INSERT INTO sw VALUES (%(expid)d, '%(swname)s', '%(jobname)s', %(jobid)d, '%(src_bin)s', %(src_index)d, '%(src_hcube)s', '%(submit)s', '%(start)s', %(time)d, '%(cpu)s', '%(exitcode)s', '%(node)s', %(numobs)d);""" % job cur = conn.cursor() cur.execute(query) except Exception as inst: print("Failed to insert jobid %(jobid)d (%(swname)s): " % job) print(inst) conn.commit()
def sw_file_parser(name, source_dir=None): global conn home = os.getenv('HOME') sourcedir=home + '/work/log/' + name if source_dir is None else source_dir jobinfo={} ls = [i for i in os.listdir(sourcedir) if (i.startswith('sw'))] appl = os.path.basename(sourcedir) expid = get_expid(appl) print('Processing %d files' % len(ls)) for filename in sorted(ls): info = {} info['expid'] = expid info['swname'] = os.path.splitext(filename)[0] if not info['swname'].startswith('sw'): print('Only processing sw files in this method') continue # exists = runquery("Select count(swname) from sw where expid=%(expid)d and swname='%(swname)s';" % (info)) # if exists: # print('File %s already parsed for experiment %s' % (info['swname'], appl)) # return with open(os.path.join(sourcedir, filename)) as src: log = src.read().split('\n') jobid = None for l in log: if 'name:' in l: info['jobname'] = l.split()[2].strip() elif 'src_bin:' in l: info['src_bin'] = l[-6:] elif 'src_index:' in l: info['src_index'] = int(l.split()[2].strip()) elif 'src_hcube:' in l: info['src_hcube'] = l.split()[2].strip() elif 'SLURM_JOB_ID' in l: _, jobid = l.split(':') jobid = int(jobid.strip()) elif 'mdtraj.Trajectory' in l: info['numobs'] = int(l.split()[6]) if jobid is None or 'jobname' not in info.keys(): print('ERROR. Failed to retrieve jobid for ', info['swname']) continue jobinfo[jobid] = info print('Retrieving data from slurm for %d jobs' % len(jobinfo.keys())) jobdata = slurm.jobexecinfo(list(jobinfo.keys())) for job in jobdata: job['time'] = time2sec(job['time']) jobinfo[job['jobid']].update(job) if 'src_index' not in jobinfo[job['jobid']].keys(): jobinfo[job['jobid']]['src_index'] = -1 if 'src_hcube' not in jobinfo[job['jobid']].keys(): jobinfo[job['jobid']]['src_hcube'] = 'D' print('Inserting %d rows into database' % len(jobdata)) for job in jobinfo.values(): try: query = """INSERT INTO sw VALUES (%(expid)d, '%(swname)s', '%(jobname)s', %(jobid)d, '%(src_bin)s', %(src_index)d, '%(src_hcube)s', '%(submit)s', '%(start)s', %(time)d, '%(cpu)s', '%(exitcode)s', '%(node)s', %(numobs)d);""" % job cur = conn.cursor() cur.execute(query) except Exception as inst: print("Failed to insert jobid %(jobid)d (%(swname)s): " % job) print(inst) conn.commit()