Ejemplo n.º 1
0
def read_app_started_csv(path):
    df = pd.read_csv(path)
    jobs = []
    for index, row in df.iterrows():
        job = Job()
        job.name = row['queue']
        job.run_time = row['elapsedTime'] * 0.001
        job.memory_seconds = row['memorySeconds']
        jobs.append(job)
    return jobs
Ejemplo n.º 2
0
def read_app_csv(path):
    df = pd.read_csv(path)
    jobs = []
    for index, row in df.iterrows():
        job = Job()
        job.name = row['queue']
        job.run_time = row['elapsedTime'] * 0.001
        job.memory_seconds = row[
            'allocatedMB'] * 300  # five minute per sampling
        jobs.append(job)
    return jobs
Ejemplo n.º 3
0
def read_app_started_csv(path):
    df = pd.read_csv(path)
    jobs = []
    for index, row in df.iterrows():
        job = Job()
        job.name = row['queue']
        job.run_time = row['elapsedTime'] * 0.001
        job.memory_seconds = row['memorySeconds']
        jobs.append(job)
        """
        for i in range(df.shape[1]):
            print(i, cols[i], row[i])
        print( '-----------------------------------')
        """
    return jobs
Ejemplo n.º 4
0
def read_app_csv(path):
    df = pd.read_csv(path)
    # cols = df.columns.tolist()
    jobs = []
    for index, row in df.iterrows():
        job = Job()
        job.name = row['queue']
        job.run_time = row['elapsedTime'] * 0.001
        job.memory_seconds = row['allocatedMB']*300 # five minute per sampling
        jobs.append(job)
        """
        for i in range(df.shape[1]):
            print(i, cols[i], row[i])
        print( '-----------------------------------')
        """
    return jobs
Ejemplo n.º 5
0
def read_app_csv(path):
    job_count = np.random.randint(10, 50)
    queue = ['spark', 'hive', 'ProgrammerAlliance']
    # queue = ['spark', 'hive']
    jobs = []
    for i in range(job_count):
        job = Job()
        job.name = queue[np.random.randint(0,3)] 
        job.wait_time = np.random.randint(0, 25)
        job.run_time = np.random.randint(10, 40)
        job.memory_seconds = 1024*job.run_time*0.05
        jobs.append(job)
        """
        print '%d: queue: %s, wait time: %d, run time: %d, memory seconds: %d' %(i, job.name, job.wait_time, job.run_time, job.memory_seconds)
        """
    print('%d jobs finished during this interval' % job_count)
    return jobs
Ejemplo n.º 6
0
def read_app_stopped_csv(path):
    df = pd.read_csv(path)
    # cols = df.columns.tolist()
    # print(cols)
    jobs = []
    for index, row in df.iterrows():
        job = Job()
        job.name = row['queue']
        # job.wait_time = np.random.randint(50) #暂时用随机数模拟
        job.run_time = row['elapsedTime'] * 0.001
        job.memory_seconds = row['memorySeconds']
        if job.run_time > 150:
            job.memory_seconds = job.memory_seconds * 150 / job.run_time
            # print("STOPPED: ", job.memory_seconds)
        jobs.append(job)
        """
        for i in range(df.shape[1]):
            print(i, cols[i], row[i])
        print( '-----------------------------------')
        """
    return jobs