def run_taro_experiments(): experiments = get_taro_experiments() completed_experiment_procs = [] logging.info('Going to run {} experiments.'.format(len(experiments))) running_experiment = 1 for experiment in experiments.values(): try: print('Running experiments {}/{}'.format(running_experiment, len(experiments))) cctestbed.run_local_command( '/opt/bess/bessctl/bessctl daemon stop') proc = experiment.run() completed_experiment_procs.append(proc) running_experiment += 1 except Exception as e: print('ERROR RUNNING EXPERIMENT: {}'.format(e)) for proc in completed_experiment_procs: logging.info('Waiting for subprocess to finish PID={}'.format( proc.pid)) proc.wait() if proc.returncode != 0: logging.warning('Error running cmd PID={}'.format(proc.pid))
def start_bess_for_local_video(exp, duration): with ExitStack() as stack: exp._run_tcpdump('server', stack) exp._run_tcpdump('server', stack, capture_http=True) cctestbed.stop_bess() stack.enter_context(exp._run_bess( ping_source='client', skip_ping=False, bess_config_name='active-middlebox-pmd-fairness')) # give bess time to start time.sleep(5) exp._show_bess_pipeline() stack.enter_context(exp._run_bess_monitor()) util.start_apache_server(exp.flows[0]) video_flow = start_single_local_video_flow(exp.flows[0], exp, stack) logging.info('Waiting for flow to finish') # wait for flow to finish video_flow._wait() # add add a time buffer before finishing up experiment logging.info('Video flow finished') # add add a time buffer before finishing up experiment time.sleep(5) exp._show_bess_pipeline() cmd = '/opt/bess/bessctl/bessctl command module queue0 get_status EmptyArg' print(cctestbed.run_local_command(cmd)) util.stop_local_server_and_cleanup(exp)
def get_ping_rtt(instance_ip): #cmd = "nping --icmp -v-1 -H -c 5 {} | grep -oP 'Avg rtt:\s+\K.*(?=ms)'".format(instance_ip) cmd = 'ping -c 5 {} | tail -1 | awk "{{print $4}}" '.format(instance_ip), line = cctestbed.run_local_command(cmd, shell=True) print("get ping rtt: ", line) rtt = float(line.split('=')[-1].split('/')[1]) return rtt
def get_website_ip(url): url_parts = list(urlsplit(url.strip())) hostname = url_parts[1] ip_addrs = cctestbed.run_local_command( "nslookup {} | awk '/^Address: / {{ print $2 ; exit }}'".format(hostname), shell=True) ip_addr = ip_addrs.split('\n')[0] if ip_addr.strip() == '': raise ValueError('Could not find IP addr for {}'.format(url)) return ip_addr
def get_video_server_host(url): videoUrls = cctestbed.run_local_command( "youtube-dl --youtube-skip-dash-manifest -g {}".format(url), shell=True) # print(videoUrls) video_url = videoUrls.split('\n')[0] audio_url = videoUrls.split('\n')[1] print("Video URL: ", video_url) print("Audio URL: ", audio_url) url_parts = list(urlsplit(video_url.strip())) hostname = url_parts[1] print("video hostname {}", hostname) return hostname
def get_video_server_ip(hostname): video_ip_addrs = cctestbed.run_local_command( "nslookup {} | awk '/^Address: / {{ print $2 ; exit }}'".format( hostname), shell=True) print("Video IPs: ", video_ip_addrs) video_ip_addr = video_ip_addrs.split('\n')[0] print("ipaddress of video server {}", video_ip_addr) if video_ip_addr.strip() == '': raise ValueError('Could not find IP addr for {}'.format(video_url)) return video_ip_addr
def start_bess_for_combinatory_flows(experiment, includeWebsite, duration): with ExitStack() as stack: util.prerequisite_for_combinatory_tests(experiment, stack, includeWebsite) ssh_client = util.get_ssh_client_for_server_node(experiment) with ssh_client as ssh_client: local_web_service_required, single_local_flow_details = util.isApacheNeeded( experiment) print("local_web_service_required & single_local_flow_details>", local_web_service_required, single_local_flow_details) if local_web_service_required: print("Start Apache") util.start_apache_server(single_local_flow_details) #TODO:Measure RTT of third party services again after remaining delay is set #iperf flowImpl.start_iperf_flows(experiment, stack) #Web video contains_webvideo_flows = flowImpl.start_web_video_flows( experiment, stack) #Local video contains_localvideo_flows = flowImpl.start_local_video_flows( experiment, stack) #Local website contains_local_website_flows = flowImpl.start_local_website_flows( ssh_client, experiment, stack) #Website contains_website_flows = flowImpl.start_website_flows( ssh_client, experiment, stack) time.sleep(duration + 5) #Save website info onto a file if contains_webvideo_flows: clean_up_web_video(experiment, duration) if contains_webvideo_flows or contains_website_flows: util.write_webdata_to_log(experiment, duration) if contains_localvideo_flows or contains_local_website_flows: util.stop_local_server_and_cleanup(experiment) experiment._show_bess_pipeline() cmd = '/opt/bess/bessctl/bessctl command module queue0 get_status EmptyArg' print(cctestbed.run_local_command(cmd))
def start_bess_for_iperf(exp, duration): with ExitStack() as stack: exp._run_tcpdump('server', stack) cctestbed.stop_bess() stack.enter_context(exp._run_bess(ping_source='client', skip_ping=False, bess_config_name='active-middlebox-pmd-fairness')) # give bess time to start time.sleep(5) exp._show_bess_pipeline() stack.enter_context(exp._run_bess_monitor()) start_iperf_flows(exp, stack) time.sleep(duration+5) exp._show_bess_pipeline() cmd = '/opt/bess/bessctl/bessctl command module queue0 get_status EmptyArg' print(cctestbed.run_local_command(cmd))
def start_bess_for_website(exp, duration, web_data): with ExitStack() as stack: print(web_data) stack.enter_context(util.add_dnat_rule(exp, web_data['url_ip'])) stack.enter_context(util.add_route(exp, web_data['url_ip'])) stack.enter_context(util.add_dns_rule(exp, web_data['website'], web_data['url_ip'])) exp._run_tcpdump('server', stack) # run the flow # turns out there is a bug when using subprocess and Popen in Python 3.5 # so skip ping needs to be true # https://bugs.python.org/issue27122 cctestbed.stop_bess() stack.enter_context(exp._run_bess(ping_source='server', skip_ping=False, bess_config_name='active-middlebox-pmd-fairness')) # give bess some time to start time.sleep(5) exp._show_bess_pipeline() stack.enter_context(exp._run_bess_monitor()) stack.enter_context(exp._run_rtt_monitor()) ssh_client = cctestbed.get_ssh_client(exp.server.ip_wan, exp.server.username, key_filename=exp.server.key_filename) with ssh_client as ssh_client: start_website_flows(ssh_client, exp, stack) # exit_status = stdout.channel.recv_exit_status() time.sleep(duration+5) # flow_end_time = time.time() logging.info('Flow ran for {} seconds'.format(duration+5)) exp._show_bess_pipeline() cmd = '/opt/bess/bessctl/bessctl command module queue0 get_status EmptyArg' print(cctestbed.run_local_command(cmd)) logging.info('Dumping website data to log: {}'.format(exp.logs['website_log'])) with open(exp.logs['website_log'], 'w') as f: website_info = {} website_info['website'] = web_data['website'] website_info['url'] = web_data['url'] website_info['website_rtt'] = web_data['website_rtt'] website_info['experiment_rtt'] = web_data['experiment_rtt'] website_info['delay'] = web_data['delay'] website_info['url_ip'] = web_data['url_ip'] website_info['flow_runtime'] = duration+5 json.dump(website_info, f)
def run_experiment(website, url, btlbw=10, queue_size=128, rtt=35, force=False): experiment_name = '{}bw-{}rtt-{}q-{}'.format(btlbw, rtt, queue_size, website) if not force and is_completed_experiment(experiment_name): return (None, '') logging.info('Creating experiment for website: {}'.format(website)) url_ip = get_website_ip(url) logging.info('Got website IP: {}'.format(url_ip)) website_rtt = int(float(get_nping_rtt(url_ip))) logging.info('Got website RTT: {}'.format(website_rtt)) if website_rtt >= rtt: logging.warning('Skipping experiment with website RTT {} >= {}'.format( website_rtt, rtt)) return (-1, '') client = HOST_CLIENT_TEMPLATE client['ip_wan'] = url_ip client = cctestbed.Host(**client) server = HOST_SERVER server_nat_ip = HOST_CLIENT.ip_wan #'128.104.222.182' taro server_port = 5201 client_port = 5555 flow = {'ccalg': 'reno', 'end_time': 60, 'rtt': rtt - website_rtt, 'start_time': 0} flows = [cctestbed.Flow(ccalg=flow['ccalg'], start_time=flow['start_time'], end_time=flow['end_time'], rtt=flow['rtt'], server_port=server_port, client_port=client_port, client_log=None, server_log=None, kind='website', client=client)] exp = cctestbed.Experiment(name=experiment_name, btlbw=btlbw, queue_size=queue_size, flows=flows, server=server, client=client, config_filename='experiments-all-ccalgs-aws.yaml', server_nat_ip=server_nat_ip) logging.info('Running experiment: {}'.format(exp.name)) # make sure tcpdump cleaned up logging.info('Making sure tcpdump is cleaned up') with cctestbed.get_ssh_client( exp.server.ip_wan, username=exp.server.username, key_filename=exp.server.key_filename) as ssh_client: cctestbed.exec_command( ssh_client, exp.client.ip_wan, 'sudo pkill -9 tcpdump') with ExitStack() as stack: # add DNAT rule stack.enter_context(add_dnat_rule(exp, url_ip)) # add route to URL stack.enter_context(add_route(exp, url_ip)) # add dns entry stack.enter_context(add_dns_rule(exp, website, url_ip)) exp._run_tcpdump('server', stack) # run the flow # turns out there is a bug when using subprocess and Popen in Python 3.5 # so skip ping needs to be true # https://bugs.python.org/issue27122 cctestbed.stop_bess() stack.enter_context(exp._run_bess(ping_source='server', skip_ping=False)) # give bess some time to start time.sleep(5) exp._show_bess_pipeline() stack.enter_context(exp._run_bess_monitor()) stack.enter_context(exp._run_rtt_monitor()) with cctestbed.get_ssh_client(exp.server.ip_wan, exp.server.username, key_filename=exp.server.key_filename) as ssh_client: filename = os.path.basename(url) if filename.strip() == '': logging.warning('Could not get filename from URL') start_flow_cmd = 'timeout 65s wget --no-check-certificate --no-cache --delete-after --connect-timeout=10 --tries=3 --bind-address {} -P /tmp/ "{}" || rm -f /tmp/{}.tmp*'.format(exp.server.ip_lan, url, filename) # won't return until flow is done flow_start_time = time.time() _, stdout, _ = cctestbed.exec_command(ssh_client, exp.server.ip_wan, start_flow_cmd) exit_status = stdout.channel.recv_exit_status() flow_end_time = time.time() logging.info('Flow ran for {} seconds'.format(flow_end_time - flow_start_time)) exp._show_bess_pipeline() cmd = '/opt/bess/bessctl/bessctl command module queue0 get_status EmptyArg' print(cctestbed.run_local_command(cmd)) logging.info('Dumping website data to log: {}'.format(exp.logs['website_log'])) with open(exp.logs['website_log'], 'w') as f: website_info = {} website_info['website'] = website website_info['url'] = url website_info['website_rtt'] = website_rtt website_info['url_ip'] = url_ip website_info['flow_runtime'] = flow_end_time - flow_start_time json.dump(website_info, f) if exit_status != 0: if exit_status == 124: # timeout exit status print('Timeout. Flow longer than 65s.') logging.warning('Timeout. Flow longer than 65s.') else: logging.error(stdout.read()) raise RuntimeError('Error running flow.') proc = exp._compress_logs_url() return (proc, '{}-{}'.format(experiment_name, exp.exp_time))
def run_rtt_monitor(url_ip): cmd = "nping --delay 5s {} > {} &".format(url_ip, '') rtt = cctestbed.run_local_command(cmd, shell=True) return rtt
def get_nping_rtt(url_ip): cmd = "nping -v-1 -H -c 5 {} | grep -oP 'Avg rtt:\s+\K.*(?=ms)'".format(url_ip) rtt = cctestbed.run_local_command(cmd, shell=True) return rtt
def run_experiment(website1, url1, website2, url2, btlbw=10, queue_size=128, rtt=35, force=False): experiment_name = '{}bw-{}rtt-{}q-{}-{}'.format(btlbw, rtt, queue_size, website1, website2) if not force and is_completed_experiment(experiment_name): return else: if ran_experiment_today(experiment_name): return logging.info('Creating experiment for website1: {} website2: {}'.format( website1, website2)) url_ip1 = get_website_ip(url1) url_ip2 = get_website_ip(url2) logging.info('Got website1 IP: {} website2 IP: {}'.format( url_ip1, url_ip2)) website_rtt1 = int(float(get_nping_rtt(url_ip1))) website_rtt2 = int(float(get_nping_rtt(url_ip2))) logging.info('Got website1 RTT: {} website2 RTT: {}'.format( website_rtt1, website_rtt2)) if website_rtt1 >= rtt: logging.warning( 'Skipping experiment with website1 RTT {} >= {}'.format( website_rtt1, rtt)) return -1 elif website_rtt2 >= rtt: logging.warning( 'Skipping experiment with website2 RTT {} >= {}'.format( website_rtt2, rtt)) return -1 client = HOST_CLIENT_TEMPLATE # TODO: Which IP should be used for client? client['ip_wan'] = url_ip1 client = cctestbed.Host(**client) server = HOST_SERVER server_nat_ip = HOST_CLIENT.ip_wan #'128.104.222.182' taro server_port = 5201 client_port = 5555 flow1 = { 'ccalg': 'reno', 'end_time': 60, 'rtt': rtt - website_rtt1, 'start_time': 0 } flow2 = { 'ccalg': 'reno', 'end_time': 60, 'rtt': rtt - website_rtt2, 'start_time': 0 } flows = [ cctestbed.Flow(ccalg=flow1['ccalg'], start_time=flow1['start_time'], end_time=flow1['end_time'], rtt=flow1['rtt'], server_port=server_port, client_port=client_port, client_log=None, server_log=None), cctestbed.Flow(ccalg=flow2['ccalg'], start_time=flow2['start_time'], end_time=flow2['end_time'], rtt=flow2['rtt'], server_port=server_port, client_port=client_port, client_log=None, server_log=None), ] exp = cctestbed.Experiment( name=experiment_name, btlbw=btlbw, queue_size=queue_size, flows=flows, server=server, client=client, config_filename='experiments-all-ccalgs-aws.yaml', server_nat_ip=server_nat_ip) logging.info('Running experiment: {}'.format(exp.name)) # make sure tcpdump cleaned up logging.info('Making sure tcpdump is cleaned up') with cctestbed.get_ssh_client( exp.server.ip_wan, username=exp.server.username, key_filename=exp.server.key_filename) as ssh_client: cctestbed.exec_command(ssh_client, exp.client.ip_wan, 'sudo pkill -9 tcpdump') with ExitStack() as stack: # add DNAT rule stack.enter_context(add_dnat_rule(exp, url_ip1)) stack.enter_context(add_dnat_rule(exp, url_ip2)) # add route to URL stack.enter_context(add_route(exp, url_ip1)) stack.enter_context(add_route(exp, url_ip2)) # add dns entry stack.enter_context(add_dns_rule(exp, website1, url_ip1)) stack.enter_context(add_dns_rule(exp, website2, url_ip2)) exp._run_tcpdump('server', stack) # run the flow # turns out there is a bug when using subprocess and Popen in Python 3.5 # so skip ping needs to be true # https://bugs.python.org/issue27122 cctestbed.stop_bess() stack.enter_context( exp._run_bess(ping_source='server', skip_ping=False)) # give bess some time to start time.sleep(5) exp._show_bess_pipeline() stack.enter_context(exp._run_bess_monitor()) stack.enter_context(exp._run_rtt_monitor()) with cctestbed.get_ssh_client( exp.server.ip_wan, exp.server.username, key_filename=exp.server.key_filename) as ssh_client: filename1 = os.path.basename(url1) filename2 = os.path.basename(url2) if filename1.strip() == '': logging.warning('Could not get filename from URL 1') if filename2.strip() == '': logging.warning('Could not get filename from URL 2') # Start first flow in background and second in foreground start_flow_cmd1 = 'timeout 65s wget --no-cache --delete-after --connect-timeout=10 --tries=3 --bind-address {} -P /tmp/ {} || rm -f /tmp/{}.tmp* &'.format( exp.server.ip_lan, url1, filename1) start_flow_cmd2 = 'timeout 65s wget --no-cache --delete-after --connect-timeout=10 --tries=3 --bind-address {} -P /tmp/ {} || rm -f /tmp/{}.tmp*'.format( exp.server.ip_lan, url2, filename2) # won't return until flow is done flow_start_time = time.time() _, _, _ = cctestbed.exec_command(ssh_client, exp.server.ip_wan, start_flow_cmd1) _, stdout, _ = cctestbed.exec_command(ssh_client, exp.server.ip_wan, start_flow_cmd2) exit_status = stdout.channel.recv_exit_status() flow_end_time = time.time() logging.info('Flow ran for {} seconds'.format(flow_end_time - flow_start_time)) exp._show_bess_pipeline() cmd = '/opt/bess/bessctl/bessctl command module queue0 get_status EmptyArg' print(cctestbed.run_local_command(cmd)) if exit_status != 0: if exit_status == 124: # timeout exit status print('Timeout. Flow longer than 65s.') logging.warning('Timeout. Flow longer than 65s.') else: logging.error(stdout.read()) raise RuntimeError('Error running flow.') proc = exp._compress_logs_url() return (proc, exp.tar_filename, experiment_name)
def load_experiments(experiment_name_patterns, remote=True, force_local=False, remote_username=REMOTE_USERNAME, remote_ip=REMOTE_IP_ADDR, load_queue=False, clean=False, parallel=True, min_num_files=0, min_date=None, remove_duplicates=True): """Load all experiments into experiment analyzers experiment_name_pattern : list of str Should be a pattern that will be called with '{}.tar.gz'.format(experiment_name_pattern) remote : bool, (default: True) If True, look for experiments remotely. If False, don't look for experiments remotely, only locally. force_local : bool, (default: False) If True, always look for local experiments. If False, only look for local experiments, if no remote experiments are found. clean: bool If True, delete all local files matching this exp_name_pattern before downloading again. parallel: bool If True, run download for experiments in parallel min_num_files: int If greater than 0, then expected to get atleast this number of files min_date: string Only return experiments with equal to or large than the expected date remove_duplicates: bool Remove experiments with the same name, keeping the most recent one """ assert(type(experiment_name_patterns) is list) tarfile_remotepaths = [] # i feel like this code is too dangerous since there is a rm command ... if clean: for experiment_name_pattern in experiment_name_patterns: print('Deleting local files matching experiment pattern: {}'.format(experiment_name_pattern)) run_local_command('rm {}.h5'.format(os.path.join(DATAPATH_PROCESSED, experiment_name_pattern))) if remote: print('Searching for experiments on remote machine: {}'.format(remote_ip)) with get_ssh_client(ip_addr=remote_ip, username=remote_username) as ssh_client: ''' for experiment_name_pattern in experiment_name_patterns: _, stdout, _ = ssh_client.exec_command( 'ls -1 /tmp/{}.tar.gz'.format(experiment_name_pattern)) tarfile_remotepaths += [filename.strip() for filename in stdout.readlines()] ''' cmd = 'ls -1 ' + ' '.join(['/tmp/{}.tar.gz']*len(experiment_name_patterns)).format(*experiment_name_patterns) print(cmd) _, stdout, _ = ssh_client.exec_command(cmd) tarfile_remotepaths += [filename.strip() for filename in stdout.readlines()] print('Found {} experiment(s) on remote machine: {}'.format( len(tarfile_remotepaths), tarfile_remotepaths)) else: print('Not searching remote machine for experiments.') if force_local or len(tarfile_remotepaths) == 0: num_local_files = 0 for experiment_name_pattern in experiment_name_patterns: local_filepaths = glob.glob(os.path.join(DATAPATH_RAW, experiment_name_pattern +'.tar.gz')) tarfile_remotepaths += local_filepaths num_local_files += len(local_filepaths) if len(tarfile_remotepaths) == 0: raise ValueError(('Found no experiments on remote or local machine ' '{} with name pattern {}').format( remote_ip, experiment_name_pattern)) if num_local_files > 0: print('Found {} experiment(s) on local machine: {}'.format(num_local_files, tarfile_remotepaths[-num_local_files:])) else: print('Found 0 experiment(s) on local machines.') if min_date is not None: # copy file so we iterate over list and modify it remotepaths = tarfile_remotepaths[:] num_wrong_date = 0 for remotepath in remotepaths: date = os.path.basename(remotepath).split('-')[-1] if date < min_date: num_wrong_date += 1 tarfile_remotepaths.remove(remotepath) if num_wrong_date > 0: print('Found {} experiment(s) with date smaller than {}.'.format(num_wrong_date, min_date)) if remove_duplicates: # keep only most recent experiments with the same name tmp = pd.DataFrame(tarfile_remotepaths) num_duplicates = len(tmp) tarfile_remotepaths = tmp.loc[tmp[0].sort_values().apply(lambda x: '-'.join(x.split('-')[:-1])).drop_duplicates(keep='last').index][0].tolist() num_duplicates = num_duplicates - len(tarfile_remotepaths) if num_duplicates > 0: print('Found {} experiment(s) with duplicate prefixes.'.format(num_duplicates)) if min_num_files > 0: if len(tarfile_remotepaths) < min_num_files: print('Wanted min number of {} experiment(s), but only found {}.'.format(min_num_files, len(tarfile_remotepaths))) tarfile_remotepaths = [] #experiments = {} num_proc = 10 num_tarfiles = len(tarfile_remotepaths) num_tarfiles_per_process = int(num_tarfiles / num_proc) + 1 if parallel and num_tarfiles > 1: with mp.Pool(num_proc) as pool: analyzers = pool.starmap(get_experiment, zip(tarfile_remotepaths, it.repeat(remote_ip, num_tarfiles), it.repeat(remote_username, num_tarfiles), it.repeat(load_queue, num_tarfiles)), chunksize=num_tarfiles_per_process) else: analyzers = [get_experiment(tarfile_remotepath, remote_ip, remote_username, load_queue) for tarfile_remotepath in tarfile_remotepaths] experiment_analyzers = ExperimentAnalyzers() for analyzer in analyzers: experiment_analyzers['{}-{}'.format(analyzer.experiment.name, analyzer.experiment.exp_time)] = analyzer return experiment_analyzers
def _create_hdf_queue(self, raw_queue_log_tarpath, raw_queue_log_localpath, processed_queue_log_localpath): # haven't created HDF5 store yet; create it now find_bad_lines_cmd = 'grep ^.*,.*,.*,.*,.*,.*,.*,.*,.*$ {} -v -n'.format(raw_queue_log_localpath) badlines = run_local_command(find_bad_lines_cmd, shell=False).split('\n') if len(badlines) >= 1 and badlines[0] != '': sort_cmd = 'sort -k 2 -o {} {}'.format(raw_queue_log_localpath, raw_queue_log_localpath) print('Found {} bad lines:\n {}'.format(len(badlines), badlines)) else: tmp_queue_filename = raw_queue_log_localpath + '.tmp' sort_cmd = 'sort -k 2 -o {} {} && grep ^.*,.*,.*,.*,.*,.*,.*,.*,.*$ {} > {} && mv {} {} '.format( raw_queue_log_localpath, raw_queue_log_localpath, raw_queue_log_localpath, tmp_queue_filename, tmp_queue_filename, raw_queue_log_localpath) with untarfile(self.experiment.tarfile_localpath, raw_queue_log_tarpath, postprocess_cmd=sort_cmd) as f: with pd.HDFStore(processed_queue_log_localpath, mode='w') as store: df = pd.read_csv(f, names = ['dequeued', 'time', 'src', 'seq', 'datalen', 'size', 'dropped', 'queued', 'batch'], converters = {'seq': tohex, 'src': tohex}, dtype={'dequeued': bool, 'time': np.uint64, 'datalen': np.uint16, 'size': np.uint32, 'dropped':bool, 'queued': np.uint16, 'batch': np.uint16}, skip_blank_lines=True) df['seq'] = df['seq'].astype( np.uint32) df['src'] = df['src'].astype( np.uint16) #chunk['time'] = pd.to_datetime(chunk['time'], infer_datetime_format=True, unit='ns') df['lineno'] = df.index + 1 df = df.set_index('time') df_enq = (pd .get_dummies(df[(df.dequeued==0) & (df.dropped==0)]['src']) .astype(np.uint8)) df_deq = (pd .get_dummies(df[df.dequeued==1]['src']) .replace(1,-1) .astype(np.int8)) df_flows = (df_enq .append(df_deq) .sort_index() .cumsum() .fillna(0) .astype(np.uint32)) #cumsum may return negative numbers and if we convert that to uint32, it will return large numbers df = (df #.reset_index() .join(df_flows) .sort_index() .ffill()) df.index = pd.to_datetime(df.index, infer_datetime_format=True, unit='ns') #df = df.set_index('time') store.append('df_queue', df, format='table', data_columns=['src', 'dropped', 'dequeued'])