Exemple #1
0
    def run(self, date):
        print("Processing for ", date)
        if not os.path.exists(
                os.path.join(self.acct_path,
                             date.strftime("%Y-%m-%d") + ".txt")):
            print("No accounting file for ", date)
            return

        acct = self.acct_reader(
            os.path.join(self.acct_path,
                         date.strftime("%Y-%m-%d") + ".txt"))
        try:
            os.makedirs(
                os.path.join(self.pickles_dir, date.strftime("%Y-%m-%d")))
        except:
            pass

        val_file = os.path.join(self.pickles_dir, date.strftime("%Y-%m-%d"),
                                "validated")
        val_jids = []
        if os.path.exists(val_file):
            with open(val_file, 'r') as fd:
                val_jids = fd.read().splitlines()

        acct_jids = [x['id'] for x in acct if "+" not in x['id']]

        ntot = len(acct_jids)
        print(len(acct_jids), 'Job records in accounting file')

        run_jids = sorted(list(set(acct_jids) - set(val_jids)))
        run_jids += [self.jobids]
        print(len(run_jids), 'Jobs to process')
        ntod = len(run_jids)

        acct = [job for job in acct if job['id'] in run_jids]

        if not self.jobids:
            acct = [
                job for job in acct if job['nodes'] *
                (job['end_time'] - job['start_time']) < 1728000
            ]
        ctr = 0
        with open(val_file, "a") as fd:
            for result in self.pool.imap(self.partial_pickle, acct):
                #for result in map(self.partial_pickle, acct):
                if result[1]:
                    fd.write("%s\n" % result[0])
                fd.flush()
                ctr += 1.0
                progress(ctr + (ntot - ntod), ntot, date.strftime("%Y-%m-%d"))
        print("Completed ", date)
Exemple #2
0
    def run(self, date):
        print("Processing for ", date)
        if not os.path.exists(os.path.join(self.acct_path, date.strftime("%Y-%m-%d") + ".txt")): 
            print("No accounting file for ", date)
            return

        acct = self.acct_reader(os.path.join(self.acct_path, date.strftime("%Y-%m-%d") + ".txt"))
        try: 
            os.makedirs(os.path.join(self.pickles_dir, date.strftime("%Y-%m-%d")))
        except: pass

        val_file = os.path.join(self.pickles_dir, date.strftime("%Y-%m-%d"), "validated")            
        val_jids = []
        if os.path.exists(val_file):
            with open(val_file, 'r') as fd:
                val_jids = fd.read().splitlines()

        acct_jids = [x['id'] for x in acct if "+" not in x['id']]

        ntot = len(acct_jids)
        print(len(acct_jids),'Job records in accounting file')

        run_jids = sorted(list(set(acct_jids) - set(val_jids)))
        run_jids += [self.jobids]
        print(len(run_jids),'Jobs to process')
        ntod = len(run_jids)

        acct = [job for job in acct if job['id'] in run_jids]            

        if not self.jobids:
            acct = [job for job in acct if job['nodes']*(job['end_time']-job['start_time']) < 1728000]
        ctr = 0
        with open(val_file, "a") as fd:
            for result in self.pool.imap(self.partial_pickle, acct):
            #for result in map(self.partial_pickle, acct):
                if result[1]:
                    fd.write("%s\n" % result[0])
                fd.flush()
                ctr += 1.0
                progress(ctr + (ntot - ntod), ntot, date.strftime("%Y-%m-%d"))
        print("Completed ", date)
Exemple #3
0
    def run(self):
        for date in self.daterange(self.start, self.end):
            if not os.path.exists(
                    os.path.join(self.acct_path,
                                 date.strftime("%Y-%m-%d") + ".txt")):
                continue
            acct = self.acct_reader(
                os.path.join(self.acct_path,
                             date.strftime("%Y-%m-%d") + ".txt"))

            try:
                os.makedirs(
                    os.path.join(self.pickles_dir, date.strftime("%Y-%m-%d")))
            except:
                pass

            vfile = os.path.join(self.pickles_dir, date.strftime("%Y-%m-%d"),
                                 "validated")
            val_stat = {}
            if os.path.exists(vfile):
                with open(vfile, 'r') as fdv:
                    for line in sorted(list(set(fdv.readlines()))):
                        jobid, stat = line.split()
                        val_stat[jobid] = stat
            ntot = len(acct)
            print(len(acct), 'Job records in accounting file')
            acct = [
                x for x in acct if val_stat.get(x['id']) == "False"
                or val_stat.get(x['id']) == None
            ]
            print(len(acct), 'Jobs to process')
            ntod = len(acct)
            ctr = 0
            with open(vfile, "a+") as fdv:
                for result in self.pool.imap(self.partial_pickle, acct):
                    fdv.write("%s %s\n" % result)
                    fdv.flush()
                    ctr += 1.0
                    progress(ctr + (ntot - ntod), ntot,
                             date.strftime("%Y-%m-%d"))
Exemple #4
0
def update_acct(date, rerun=False):
    ftr = [3600, 60, 1]
    tz = pytz.timezone('US/Central')
    ctr = 0

    with open(os.path.join(cfg.acct_path,
                           date.strftime("%Y-%m-%d") + '.txt'),
              encoding="latin1") as fd:
        nrecords = sum(1 for record in csv.DictReader(fd))
        fd.seek(0)

        for job in csv.DictReader(fd, delimiter='|'):
            if '+' in job['JobID']:
                jid, rid = job['JobID'].split('+')
                job['JobID'] = int(jid) + int(rid)

            if rerun:
                pass
            elif Job.objects.filter(id=job['JobID']).exists():
                ctr += 1
                continue
            json = {}

            json['id'] = job['JobID']
            json['project'] = job['Account']
            json['start_time'] = tz.localize(parse(job['Start']))
            json['end_time'] = tz.localize(parse(job['End']))
            json['start_epoch'] = calendar.timegm(
                json['start_time'].utctimetuple())
            json['end_epoch'] = calendar.timegm(
                json['end_time'].utctimetuple())
            json['run_time'] = json['end_epoch'] - json['start_epoch']

            try:
                if '-' in job['Timelimit']:
                    days, time = job['Timelimit'].split('-')
                else:
                    time = job['Timelimit']
                    days = 0
                json['requested_time'] = (int(days) * 86400 + sum([
                    a * b
                    for a, b in zip(ftr, [int(i) for i in time.split(":")])
                ])) / 60
            except:
                pass

            json['queue_time'] = int(parse(job['Submit']).strftime('%s'))
            try:
                json['queue'] = job['Partition']
                json['name'] = job['JobName'][0:128]
                json['status'] = job['State'].split()[0]
                json['nodes'] = int(job['NNodes'])
                json['cores'] = int(job['ReqCPUS'])
                json['wayness'] = json['cores'] / json['nodes']
                json['date'] = json['end_time'].date()
                json['user'] = job['User']
            except:
                print(job)
                continue
            if "user" in json:
                try:
                    json['uid'] = int(pwd.getpwnam(json['user']).pw_uid)
                except:
                    pass

            host_list = hostlist.expand_hostlist(job['NodeList'])
            del job['NodeList']

            Job.objects.filter(id=json['id']).delete()
            obj, created = Job.objects.update_or_create(**json)

            ### If xalt is available add data to the DB
            xd = None
            try:
                xd = run.objects.using('xalt').filter(job_id=json['id'])[0]
            except:
                pass

            if xd:
                obj.exe = xd.exec_path.split('/')[-1][0:128]
                obj.exec_path = xd.exec_path
                obj.cwd = xd.cwd[0:128]
                obj.threads = xd.num_threads
                obj.save()
                for join in join_run_object.objects.using('xalt').filter(
                        run_id=xd.run_id):
                    object_path = lib.objects.using('xalt').get(
                        obj_id=join.obj_id).object_path
                    module_name = lib.objects.using('xalt').get(
                        obj_id=join.obj_id).module_name
                    if not module_name: module_name = 'none'
                    library = Libraries(object_path=object_path,
                                        module_name=module_name)
                    library.save()
                    library.jobs.add(obj)

            ### Build host table
            for host_name in host_list:
                h = Host(name=host_name)
                h.save()
                h.jobs.add(obj)

            ctr += 1
            progress(ctr, nrecords, date)

    with open(
            os.path.join(cfg.pickles_dir, date.strftime("%Y-%m-%d"),
                         "validated")) as fd:
        for line in fd.readlines():
            Job.objects.filter(id=int(line)).update(validated=True)
Exemple #5
0
def update(date, rerun=False):

    tz = pytz.timezone('US/Central')
    pickle_dir = os.path.join(cfg.pickles_dir, date)

    ctr = 0
    for root, directory, pickle_files in os.walk(pickle_dir):
        num_files = len(pickle_files)
        print "Number of pickle files in", root, '=', num_files
        for pickle_file in sorted(pickle_files):

            ctr += 1
            try:
                if rerun: pass
                elif Job.objects.filter(id=pickle_file).exists():
                    continue
            except:
                print pickle_file, "doesn't look like a pickled job"
                continue

            pickle_path = os.path.join(root, str(pickle_file))
            try:
                with open(pickle_path, 'rb') as f:
                    data = pickle.load(f)
                    json = data.acct
                    hosts = data.hosts.keys()
            except EOFError:
                print pickle_file, "is empty"
                continue

            if 'yesno' in json: del json['yesno']
            utc_start = datetime.utcfromtimestamp(
                json['start_time']).replace(tzinfo=pytz.utc)
            utc_end = datetime.utcfromtimestamp(
                json['end_time']).replace(tzinfo=pytz.utc)
            json['run_time'] = json['end_time'] - json['start_time']

            if json.has_key('unknown'):
                json['requested_time'] = json['unknown'] * 60
                del json['unknown']
            elif json.has_key('requested_time'):
                json['requested_time'] = json['requested_time'] * 60
            else:
                json['requested_time'] = 0
            json['start_epoch'] = json['start_time']
            json['end_epoch'] = json['end_time']
            json['start_time'] = utc_start.astimezone(tz)
            json['end_time'] = utc_end.astimezone(tz)
            json['date'] = json['end_time'].date()
            json['name'] = json['name'][0:128]
            json['wayness'] = json['cores'] / json['nodes']
            if json.has_key('state'):
                json['status'] = json['state']
                del json['state']
            json['status'] = json['status'].split()[0]
            try:
                if json.has_key('user'):
                    json['uid'] = int(pwd.getpwnam(json['user']).pw_uid)
                elif json.has_key('uid'):
                    json['user'] = pwd.getpwuid(int(json['uid']))[0]
            except:
                json['user'] = '******'

            ### If xalt is available add data to the DB
            xd = None
            try:
                xd = run.objects.using('xalt').filter(job_id=json['id'])[0]
                json['user'] = xd.user
                json['exe'] = xd.exec_path.split('/')[-1][0:128]
                json['exec_path'] = xd.exec_path
                json['cwd'] = xd.cwd[0:128]
                json['threads'] = xd.num_threads
            except:
                xd = False

            if json.has_key('host_list'):
                del json['host_list']

            Job.objects.filter(id=json['id']).delete()
            obj, created = Job.objects.update_or_create(**json)
            for host_name in hosts:
                h = Host(name=host_name)
                h.save()
                h.jobs.add(obj)

            if xd:
                for join in join_run_object.objects.using('xalt').filter(
                        run_id=xd.run_id):
                    try:
                        object_path = lib.objects.using('xalt').get(
                            obj_id=join.obj_id).object_path
                        module_name = lib.objects.using('xalt').get(
                            obj_id=join.obj_id).module_name
                        if not module_name: module_name = 'none'
                        library = Libraries(object_path=object_path,
                                            module_name=module_name)
                        library.save()
                        library.jobs.add(obj)
                    except:
                        pass

            progress(ctr, num_files, date)