def read_app_stopped_csv(path): df = pd.read_csv(path) jobs = [] for index, row in df.iterrows(): job = Job() job.name = row['queue'] job.run_time = row['elapsedTime'] * 0.001 job.memory_seconds = row['memorySeconds'] if job.run_time > 150: job.memory_seconds = job.memory_seconds * 150 / job.run_time jobs.append(job) return jobs
def read_app_csv(path): df = pd.read_csv(path) jobs = [] for index, row in df.iterrows(): job = Job() job.name = row['queue'] job.run_time = row['elapsedTime'] * 0.001 job.memory_seconds = row[ 'allocatedMB'] * 300 # five minute per sampling jobs.append(job) return jobs
def read_app_stopped_csv(path): df = pd.read_csv(path) # cols = df.columns.tolist() # print(cols) jobs = [] for index, row in df.iterrows(): job = Job() job.name = row['queue'] # job.wait_time = np.random.randint(50) #暂时用随机数模拟 job.run_time = row['elapsedTime'] * 0.001 job.memory_seconds = row['memorySeconds'] if job.run_time > 150: job.memory_seconds = job.memory_seconds * 150 / job.run_time # print("STOPPED: ", job.memory_seconds) jobs.append(job) """ for i in range(df.shape[1]): print(i, cols[i], row[i]) print( '-----------------------------------') """ return jobs
def read_app_started_csv(path): df = pd.read_csv(path) jobs = [] for index, row in df.iterrows(): job = Job() job.name = row['queue'] job.run_time = row['elapsedTime'] * 0.001 job.memory_seconds = row['memorySeconds'] jobs.append(job) """ for i in range(df.shape[1]): print(i, cols[i], row[i]) print( '-----------------------------------') """ return jobs
def read_app_csv(path): df = pd.read_csv(path) # cols = df.columns.tolist() jobs = [] for index, row in df.iterrows(): job = Job() job.name = row['queue'] job.run_time = row['elapsedTime'] * 0.001 job.memory_seconds = row['allocatedMB']*300 # five minute per sampling jobs.append(job) """ for i in range(df.shape[1]): print(i, cols[i], row[i]) print( '-----------------------------------') """ return jobs
def read_app_csv(path): job_count = np.random.randint(10, 50) queue = ['spark', 'hive', 'ProgrammerAlliance'] # queue = ['spark', 'hive'] jobs = [] for i in range(job_count): job = Job() job.name = queue[np.random.randint(0,3)] job.wait_time = np.random.randint(0, 25) job.run_time = np.random.randint(10, 40) job.memory_seconds = 1024*job.run_time*0.05 jobs.append(job) """ print '%d: queue: %s, wait time: %d, run time: %d, memory seconds: %d' %(i, job.name, job.wait_time, job.run_time, job.memory_seconds) """ print('%d jobs finished during this interval' % job_count) return jobs