Esempio n. 1
0
def parse_gdata_file(filename):

    db = None

    storagept = ''
    storageptstring = 'gdata'

    start = filename.find('gdata')
    if start < 0:
        print('Could not find storage point (e.g gdata1) in filename {}'.format(filename))
        raise
    else:
        storagept = filename[start:start+len(storageptstring)+1]
        print('Storage Point: {}'.format(storagept))
    
    with open(filename) as f:

        # Need this loop to support old method of having multiple dumps per file
        while True:
            # Need this try block to gracefully exit the above loop when end of file
            try:
                for line in f:
                    if line.startswith("%%%%%%%%%%%%%%%%%"):
                        # Grab date string
                        date = datetime.datetime.strptime(f.next().strip(os.linesep), "%a %b %d %H:%M:%S %Z %Y")
                        year, quarter = datetoyearquarter(date)
                        # Gobble another line
                        line = f.next()
                        break
                    else:
                        next

                # Assume a certain structure ....
                line = f.next()
                project = line.split()[4].strip(':')
                if not project in databases:
                    dbfile = 'sqlite:///'+os.path.join(dbfileprefix,"usage_{}_{}.db".format(project,date.year))
                    databases[project] = ProjectDataset(project,dbfile)
                db = databases[project]

                # Gobble the three header lines
                line = f.next(); line = f.next(); line = f.next()

                for line in f:
                    try:
                        (folder,user,size,inodes,scandate) = line.strip(os.linesep).split() 
                    except:
                        break
                    db.adduser(user)
                    if (verbose): print('Adding gdata ',folder,user,size,inodes,scandate)
                    db.addgdatausage(storagept,folder,user,parse_size(size.upper()),inodes,scandate)
            except:
                break
Esempio n. 2
0
def parse_short_file(filename):

    db = None
    
    with open(filename) as f:

        # Need this loop to support old method of having multiple dumps per file
        while True:
            # Need this try block to gracefully exit the above loop when end of file
            try:
                for line in f:
                    if line.startswith("%%%%%%%%%%%%%%%%%"):
                        # Grab date string
                        date = datetime.datetime.strptime(f.next().strip(os.linesep), "%a %b %d %H:%M:%S %Z %Y")
                        year, quarter = datetoyearquarter(date)
                        # Gobble another line
                        line = f.next()
                        break
                    else:
                        next

                # Assume a certain structure ....
                line = f.next()
                project = line.split()[4].strip(':')
                if not project in databases:
                    dbfile = 'sqlite:///'+os.path.join(dbfileprefix,"usage_{}_{}.db".format(project,date.year))
                    databases[project] = ProjectDataset(project,dbfile)
                db = databases[project]

                # Gobble the three header lines
                line = f.next(); line = f.next(); line = f.next()

                for line in f:
                    try:
                        (folder,user,size,inodes,scandate) = line.strip(os.linesep).split() 
                    except:
                        break
                    db.adduser(user)
                    if verbose: print 'Adding short ',folder,user,size,inodes,scandate
                    db.addshortusage(folder,user,parse_size(size.upper()),inodes,scandate)
            except:
                break
Esempio n. 3
0
def parse_qstat_json_dump(filename, dbfile, verbose=False):

    db = JobsDataset("sqlite:///{}".format(dbfile))

    numrecords = db.getnumrecords()

    nentries = 0

    with open(filename) as f:

        data = json.load(f)

        if 'Jobs' in data:
            data = data['Jobs']

        for jobid, info in data.items():

            if jobid == '_default': continue

            try:
                # Strip off '.r-man2' suffix if it exists
                jobid = jobid.split('.')[0]

                # Must have
                ctime = maybe_get_time(info, 'ctime', must=True)
                qtime = maybe_get_time(info, 'qtime', must=True)
                mtime = maybe_get_time(info, 'mtime', must=True)

                # Store all times as offset from creation time in seconds
                qtime = (qtime - ctime).total_seconds()
                mtime = (mtime - ctime).total_seconds()

                """
                    B  Array job: at least one subjob has started.
                    E  Job is exiting after having run.
                    F  Job is finished.
                    H  Job is held.
                    M  Job was moved to another server.
                    Q  Job is queued.
                    R  Job is running.
                    S  Job is suspended.
                    T  Job is being moved to new location.
                    U  Cycle-harvesting job is suspended due to keyboard activity.
                    W  Job is waiting for its submitter-assigned start time to be reached.
                    X  Subjob has completed execution or has been deleted.
                """

                # Put in some logic checking for job_state?
                stime = maybe_get_time(info, 'stime')

                # Needed to calculate time in the queue
                if stime is None:
                    start = datetime.datetime.now()
                    stime = -1.
                else:
                    start = stime
                    stime = (stime - ctime).total_seconds()

                # Create a derived field which is the total time spend queuing before
                # job started
                waitime = (start - ctime).total_seconds()

                # year = int(info['qtime'].split()[-1])
                year = ctime.year

                username = info['Job_Owner'].split('@')[0]

                resources = info['Resource_List']
                resources_used = info.get('resources_used',{})

                maxwalltime = walltime_to_seconds(resources['walltime'])
                walltime = walltime_to_seconds(resources_used.get('walltime', None))
                maxmem = int(parse_size(resources.get('mem', '0b').upper()))
                ncpus = resources.get('ncpus', None)
                mem = int(parse_size(resources_used.get('mem', '0b').upper()))
                cputime = walltime_to_seconds(resources_used.get('cput', None))
                try:
                    cpuutil = cputime/(walltime*ncpus)
                except ZeroDivisionError:
                    cpuutil = -1.

                exe = strip_ml(info.get('executable', ''))
                arglist = strip_ml(info.get('argument_list', ''))
                subarglist = info.get('Submit_arguments', '')

                # Use -999 to signify no exit status
                exit_status =  info.get('Exit_status',-999)

                if verbose:
                    print(year, info['queue'], jobid, info['project'], username,
                        info['job_state'], info['Job_Name'], resources['jobprio'], exe, arglist + subarglist,
                        ctime, mtime, qtime, stime, waitime,
                        maxwalltime, maxmem, ncpus,
                        walltime, mem, cputime, cpuutil, exit_status)
                db.addjob(year, info['queue'], jobid, info['project'], username,
                        info['job_state'], info['Job_Name'], resources['jobprio'], exe, arglist + subarglist,
                        ctime, mtime, qtime, stime, waitime,
                        maxwalltime, maxmem, ncpus,
                        walltime, mem, cputime, cpuutil, exit_status)
                nentries += 1
            except:
                print("Error parsing {}".format(jobid))
                print(info)
                raise
                    
    newrecords = db.getnumrecords() - numrecords

    print("Found {} entries. Added {} new records, {} records updated or unchanged".format(nentries, newrecords, nentries - newrecords)) 
Esempio n. 4
0
def parse_SU_file(filename):

    insystem = False; instorage = False; inuser = False
    
    with open(filename) as f:

        year = ''; quarter = ''
        for line in f:
            if line.startswith("%%%%%%%%%%%%%%%%%"):
                # Grab date string
                date = datetime.datetime.strptime(f.next().strip(os.linesep), "%a %b %d %H:%M:%S %Z %Y").date()
            elif line.startswith("Usage Report:") and "Compute" in line:
                words = line.split()
                project = words[2].split('=')[1]
                year, quarter = words[4].split('=')[1].split('.')
                print year, quarter
                startdate, enddate = words[5].split('-')
                startdate = datetime.datetime.strptime(startdate.strip('('),"%d/%m/%Y").date()
                enddate = datetime.datetime.strptime(enddate.strip(')'),"%d/%m/%Y").date()
                if not project in databases:
                    dbfile = 'sqlite:///'+os.path.join(dbfileprefix,"usage_{}_{}.db".format(project,year))
                    databases[project] = ProjectDataset(project,dbfile)
                db = databases[project]
                db.addquarter(year,quarter,startdate,enddate)
            elif line.startswith("Total Grant:"):
                total = line.split()[2]
                db.addgrant(year,quarter,total)
            elif line.startswith("System       Queue"):
                insystem = True
                f.next()
            elif insystem:
                try:
                    (system,queue,weight,usecpu,usewall,usesu,tmp,tmp,tmp) = line.strip(os.linesep).split() 
                except:
                    insystem = False
                    continue
                db.addsystemqueue(system,queue,weight)
                db.addprojectusage(date,system,queue,usecpu,usewall,usesu)
            elif line.startswith("Batch Queue Usage per User"):
                inuser = True
                # Gobble three lines
                f.next(); f.next(); f.next()
            elif inuser:
                try:
                    (user,usecpu,usewall,usesu,tmp) = line.strip(os.linesep).split() 
                except:
                    inuser = False
                    continue
                db.adduser(user)
                if verbose: print 'Add usage ',date,user,usecpu,usewall,usesu
                db.adduserusage(date,user,usecpu,usewall,usesu)
            elif line.startswith("System    StoragePt"):
                instorage = True
                f.next()
            elif instorage:
                try:
                    (systemname,storagept,grant,tmp,tmp,igrant,tmp,tmp) = line.strip(os.linesep).split() 
                except:
                    instorage = False
                    continue
                print(year, quarter)
                db.addsystemstorage(systemname,storagept,year,quarter,parse_size(grant.upper()),parse_inodenum(igrant))