def parse_csv_data(data_file, csv_file_fields, test, data_resolution): df = pd.read_csv( data_file, index_col=0, low_memory=False, names=csv_file_fields, ) project_id = test.project.id test_id = test.id data_resolution_id = TestDataResolution.objects.get( frequency=data_resolution).id logger.info('[DAEMON] Removing {}.'.format(data_file)) os.remove(data_file) logger.info('[DAEMON] File {} was removed.'.format(data_file)) df.dropna(inplace=True) df = df[~df['url'].str.contains('exclude_', na=False)] df.index = pd.to_datetime(dateconv((df.index.values / 1000))) unique_urls = df['url'].unique() for url in unique_urls: url = str(url) if not Action.objects.filter(url=url, project_id=project_id).exists(): a = Action(url=url, project_id=project_id) a.save() logger.info("[DAEMON] Adding action data: {}".format(url)) a = Action.objects.get(url=url, project_id=project_id) action_id = a.id df_url = df[(df.url == url)] url_data = pd.DataFrame() df_url_gr_by_ts = df_url.groupby(pd.Grouper(freq=data_resolution)) url_data['avg'] = df_url_gr_by_ts.response_time.mean() url_data['median'] = df_url_gr_by_ts.response_time.median() url_data['count'] = df_url_gr_by_ts.success.count() df_url_gr_by_ts_only_errors = df_url[( df_url.success == False)].groupby(pd.Grouper(freq=data_resolution)) url_data['errors'] = df_url_gr_by_ts_only_errors.success.count() url_data['test_id'] = test_id url_data['url'] = url output_json = json.loads(url_data.to_json(orient='index', date_format='iso'), object_pairs_hook=OrderedDict) for row in output_json: logger.info('[DAEMON] {} {}'.format(url, row)) data = { 'timestamp': row, 'avg': output_json[row]['avg'], 'median': output_json[row]['median'], 'count': output_json[row]['count'], 'url': output_json[row]['url'], 'errors': output_json[row]['errors'], 'test_id': output_json[row]['test_id'], } test_action_data = TestActionData( test_id=output_json[row]['test_id'], action_id=action_id, data_resolution_id=data_resolution_id, data=data) test_action_data.save() logger.info('[DAEMON] Check aggregate data: {}'.format(url)) url_agg_data = dict( json.loads(df_url['response_time'].describe().to_json())) url_agg_data['99%'] = df_url['response_time'].quantile(.99) url_agg_data['90%'] = df_url['response_time'].quantile(.90) url_agg_data['weight'] = float(df_url['response_time'].sum()) url_agg_data['errors'] = float( df_url[(df_url['success'] == False)]['success'].count()) if not TestActionAggregateData.objects.filter( action_id=action_id, test_id=test_id).exists(): logger.info('[DAEMON] Adding new aggregate data.') test_action_aggregate_data = TestActionAggregateData( test_id=test_id, action_id=action_id, data=url_agg_data).save() else: logger.info('[DAEMON] Refreshing aggregate data.') data = {} d = TestActionAggregateData.objects.get(action_id=action_id, test_id=test_id) old_data = d.data new_data = url_agg_data maximum = new_data['max'] if new_data['max'] > old_data[ 'max'] else old_data['max'] minimum = new_data['min'] if new_data['min'] < old_data[ 'min'] else old_data['min'] p50 = new_data['50%'] if new_data['50%'] > old_data[ '50%'] else old_data['50%'] p75 = new_data['75%'] if new_data['75%'] > old_data[ '75%'] else old_data['75%'] p90 = new_data['90%'] if new_data['90%'] > old_data[ '90%'] else old_data['90%'] p99 = new_data['99%'] if new_data['99%'] > old_data[ '99%'] else old_data['99%'] std = new_data['std'] old_data = { 'mean': (old_data['weight'] + new_data['weight']) / (old_data['count'] + new_data['count']), 'max': maximum, 'min': minimum, 'count': old_data['count'] + new_data['count'], 'errors': old_data['errors'] + new_data['errors'], 'weight': old_data['weight'] + new_data['weight'], '50%': p50, '75%': p75, '90%': p90, '99%': p99, 'std': std, } d.data = old_data d.save() logger.info("[DAEMON] Adding test overall data.".format(url)) test_overall_data = pd.DataFrame() df_gr_by_ts = df.groupby(pd.Grouper(freq=data_resolution)) test_overall_data['avg'] = df_gr_by_ts.response_time.mean() test_overall_data['median'] = df_gr_by_ts.response_time.median() test_overall_data['count'] = df_gr_by_ts.response_time.count() test_overall_data['test_id'] = test_id output_json = json.loads(test_overall_data.to_json(orient='index', date_format='iso'), object_pairs_hook=OrderedDict) for row in output_json: data = { 'timestamp': row, 'avg': output_json[row]['avg'], 'median': output_json[row]['median'], 'count': output_json[row]['count'] } test_data = TestData(test_id=output_json[row]['test_id'], data_resolution_id=data_resolution_id, data=data) test_data.save()
def generate_data(t_id): print("Parse and generate test data: " + str(t_id)) test_running = TestRunning.objects.get(id=t_id) if not Test.objects.filter(path=test_running.workspace).exists(): test = Test(project_id=test_running.project_id, path=test_running.workspace, display_name=test_running.display_name, start_time=test_running.start_time, end_tiem=test_running.end_time, build_number=0, show=True) test.save() else: test = Test.objects.get(path=test_running.workspace) project_id = test.project_id test_id = test.id jmeter_results_file = test_running.result_file_dest if os.path.exists(jmeter_results_file): df = pd.DataFrame() if os.stat(jmeter_results_file).st_size > 1000007777: print("Executing a parse for a huge file") chunks = pd.read_table(jmeter_results_file, sep=',', index_col=0, chunksize=3000000) for chunk in chunks: chunk.columns = [ 'response_time', 'url', 'responseCode', 'success', 'threadName', 'failureMessage', 'grpThreads', 'allThreads' ] chunk = chunk[~chunk['URL'].str.contains('exclude_')] df = df.append(chunk) print("Parsing a huge file,size: " + str(df.size)) else: df = pd.read_csv(jmeter_results_file, index_col=0, low_memory=False) df.columns = [ 'response_time', 'url', 'responseCode', 'success', 'threadName', 'failureMessage', 'grpThreads', 'allThreads' ] df = df[~df['url'].str.contains('exclude_', na=False)] df.columns = [ 'response_time', 'url', 'responseCode', 'success', 'threadName', 'failureMessage', 'grpThreads', 'allThreads' ] #convert timestamps to normal date/time df.index = pd.to_datetime(dateconv((df.index.values / 1000))) num_lines = df['response_time'].count() print("Number of lines in filrue: %d." % num_lines) unique_urls = df['url'].unique() for url in unique_urls: url = str(url) if not Action.objects.filter(url=url, project_id=project_id).exists(): print("Adding new action: " + str(url) + " project_id: " + str(project_id)) a = Action(url=url, project_id=project_id) a.save() a = Action.objects.get(url=url, project_id=project_id) action_id = a.id if not TestActionData.objects.filter(action_id=action_id, test_id=test_id).exists(): print("Adding action data: " + url) df_url = df[(df.url == url)] url_data = pd.DataFrame() df_url_gr_by_ts = df_url.groupby(pd.TimeGrouper(freq='1Min')) url_data['avg'] = df_url_gr_by_ts.response_time.mean() url_data['median'] = df_url_gr_by_ts.response_time.median() url_data['count'] = df_url_gr_by_ts.success.count() df_url_gr_by_ts_only_errors = df_url[( df_url.success == False)].groupby( pd.TimeGrouper(freq='1Min')) url_data['errors'] = df_url_gr_by_ts_only_errors.success.count( ) url_data['test_id'] = test_id url_data['url'] = url output_json = json.loads(url_data.to_json(orient='index', date_format='iso'), object_pairs_hook=OrderedDict) for row in output_json: data = { 'timestamp': row, 'avg': output_json[row]['avg'], 'median': output_json[row]['median'], 'count': output_json[row]['count'], 'url': output_json[row]['url'], 'errors': output_json[row]['errors'], 'test_id': output_json[row]['test_id'], } test_action_data = TestActionData( test_id=output_json[row]['test_id'], action_id=action_id, data=data) test_action_data.save() zip_results_file(jmeter_results_file) test_overall_data = pd.DataFrame() df_gr_by_ts = df.groupby(pd.TimeGrouper(freq='1Min')) test_overall_data['avg'] = df_gr_by_ts.response_time.mean() test_overall_data['median'] = df_gr_by_ts.response_time.median() test_overall_data['count'] = df_gr_by_ts.response_time.count() test_overall_data['test_id'] = test_id output_json = json.loads(test_overall_data.to_json(orient='index', date_format='iso'), object_pairs_hook=OrderedDict) for row in output_json: data = { 'timestamp': row, 'avg': output_json[row]['avg'], 'median': output_json[row]['median'], 'count': output_json[row]['count'] } test_data = TestData(test_id=output_json[row]['test_id'], data=data) test_data.save() else: print("Result file does not exist") monitoring_results_file = test_running.monitoring_file_dest if os.path.exists(monitoring_results_file): f = open(monitoring_results_file, "r") lines = f.readlines() f.close() f = open(monitoring_results_file, "w") for line in lines: if not ('start' in line): f.write(line) f.close() monitoring_df = pd.read_csv(monitoring_results_file, index_col=1, sep=";") monitoring_df.columns = [ 'server_name', 'Memory_used', 'Memory_free', 'Memory_buff', 'Memory_cached', 'Net_recv', 'Net_send', 'Disk_read', 'Disk_write', 'System_la1', 'CPU_user', 'CPU_system', 'CPU_iowait' ] monitoring_df.index = pd.to_datetime( dateconv((monitoring_df.index.values))) monitoring_df.index.names = ['timestamp'] unique_servers = monitoring_df['server_name'].unique() for server_ in unique_servers: if not Server.objects.filter(server_name=server_).exists(): print("Adding new server: " + server_) s = Server(server_name=server_) s.save() server_id = s.id if not ServerMonitoringData.objects.filter( server_id=server_id, test_id=test_id).exists(): df_server = monitoring_df[( monitoring_df.server_name == server_)] output_json = json.loads(df_server.to_json(orient='index', date_format='iso'), object_pairs_hook=OrderedDict) for row in output_json: data = { 'timestamp': row, 'Memory_used': output_json[row]['Memory_used'], 'Memory_free': output_json[row]['Memory_free'], 'Memory_buff': output_json[row]['Memory_buff'], 'Memory_cached': output_json[row]['Memory_cached'], 'Net_recv': output_json[row]['Net_recv'], 'Net_send': output_json[row]['Net_send'], 'Disk_read': output_json[row]['Disk_read'], 'Disk_write': output_json[row]['Disk_write'], 'System_la1': output_json[row]['System_la1'], 'CPU_user': output_json[row]['CPU_user'], 'CPU_system': output_json[row]['CPU_system'], 'CPU_iowait': output_json[row]['CPU_iowait'] } server_monitoring_data = ServerMonitoringData( test_id=test_id, server_id=server_id, data=data) server_monitoring_data.save() else: print("Result file does not exist") return True
def generate_test_results_data(test_id, project_id, jmeter_results_file_path='', monitoring_results_file_path='', jmeter_results_file_fields=[], monitoring_results_file_fields=[], data_resolution='1Min', mode=''): data_resolution_id = TestDataResolution.objects.get( frequency=data_resolution).id if not jmeter_results_file_fields: jmeter_results_file_fields = [ 'response_time', 'url', 'responseCode', 'success', 'threadName', 'failureMessage', 'grpThreads', 'allThreads' ] if not monitoring_results_file_fields: monitoring_results_file_fields = [ 'server_name', 'Memory_used', 'Memory_free', 'Memory_buff', 'Memory_cached', 'Net_recv', 'Net_send', 'Disk_read', 'Disk_write', 'System_la1', 'CPU_user', 'CPU_system', 'CPU_iowait' ] jmeter_results_file = jmeter_results_file_path if os.path.exists(jmeter_results_file): df = pd.DataFrame() if os.stat(jmeter_results_file).st_size > 1000007777: logger.debug("Executing a parse for a huge file") chunks = pd.read_table(jmeter_results_file, sep=',', index_col=0, chunksize=3000000) for chunk in chunks: chunk.columns = jmeter_results_file_fields.split(',') chunk = chunk[~chunk['URL'].str.contains('exclude_')] df = df.append(chunk) else: df = pd.read_csv(jmeter_results_file, index_col=0, low_memory=False) df.columns = jmeter_results_file_fields df = df[~df['url'].str.contains('exclude_', na=False)] # If gather data "online" just clean result file zip_results_file(jmeter_results_file) df.columns = jmeter_results_file_fields df.index = pd.to_datetime(dateconv((df.index.values / 1000))) num_lines = df['response_time'].count() logger.debug('Number of lines in file: {}'.format(num_lines)) unique_urls = df['url'].unique() for url in unique_urls: url = str(url) if not Action.objects.filter(url=url, project_id=project_id).exists(): logger.debug("Adding new action: " + str(url) + " project_id: " + str(project_id)) a = Action(url=url, project_id=project_id) a.save() a = Action.objects.get(url=url, project_id=project_id) action_id = a.id if not TestActionData.objects.filter( action_id=action_id, test_id=test_id, data_resolution_id=data_resolution_id).exists(): logger.debug("Adding action data: {}".format(url)) df_url = df[(df.url == url)] url_data = pd.DataFrame() df_url_gr_by_ts = df_url.groupby( pd.Grouper(freq=data_resolution)) url_data['avg'] = df_url_gr_by_ts.response_time.mean() url_data['median'] = df_url_gr_by_ts.response_time.median() url_data['count'] = df_url_gr_by_ts.success.count() df_url_gr_by_ts_only_errors = df_url[( df_url.success == False)].groupby( pd.TimeGrouper(freq=data_resolution)) url_data['errors'] = df_url_gr_by_ts_only_errors.success.count( ) url_data['test_id'] = test_id url_data['url'] = url output_json = json.loads(url_data.to_json(orient='index', date_format='iso'), object_pairs_hook=OrderedDict) for row in output_json: data = { 'timestamp': row, 'avg': output_json[row]['avg'], 'median': output_json[row]['median'], 'count': output_json[row]['count'], 'url': output_json[row]['url'], 'errors': output_json[row]['errors'], 'test_id': output_json[row]['test_id'], } test_action_data = TestActionData( test_id=output_json[row]['test_id'], action_id=action_id, data_resolution_id=data_resolution_id, data=data) test_action_data.save() if not TestActionAggregateData.objects.filter( action_id=action_id, test_id=test_id).exists(): url_agg_data = dict( json.loads( df_url['response_time'].describe().to_json())) url_agg_data['99%'] = float( df_url['response_time'].quantile(.99)) url_agg_data['90%'] = float( df_url['response_time'].quantile(.90)) url_agg_data['weight'] = float( df_url['response_time'].sum()) url_agg_data['errors'] = float(df_url[( df_url['success'] == False)]['success'].count()) print(url_agg_data) test_action_aggregate_data = TestActionAggregateData( test_id=test_id, action_id=action_id, data=url_agg_data) test_action_aggregate_data.save() if not TestData.objects.filter( test_id=test_id, data_resolution_id=data_resolution_id).exists(): test_overall_data = pd.DataFrame() df_gr_by_ts = df.groupby(pd.TimeGrouper(freq=data_resolution)) test_overall_data['avg'] = df_gr_by_ts.response_time.mean() test_overall_data['median'] = df_gr_by_ts.response_time.median() test_overall_data['count'] = df_gr_by_ts.response_time.count() test_overall_data['test_id'] = test_id output_json = json.loads(test_overall_data.to_json( orient='index', date_format='iso'), object_pairs_hook=OrderedDict) for row in output_json: data = { 'timestamp': row, 'avg': output_json[row]['avg'], 'median': output_json[row]['median'], 'count': output_json[row]['count'] } test_data = TestData(test_id=output_json[row]['test_id'], data_resolution_id=data_resolution_id, data=data) test_data.save() monitoring_results_file = monitoring_results_file_path if os.path.exists(monitoring_results_file): f = open(monitoring_results_file, "r") lines = f.readlines() f.close() f = open(monitoring_results_file, "w") for line in lines: if not ('start' in line): f.write(line) f.close() monitoring_df = pd.read_csv(monitoring_results_file, index_col=1, sep=";") monitoring_df.columns = monitoring_results_file_fields monitoring_df.index = pd.to_datetime( dateconv((monitoring_df.index.values))) monitoring_df.index.names = ['timestamp'] unique_servers = monitoring_df['server_name'].unique() for server_ in unique_servers: if not Server.objects.filter(server_name=server_).exists(): s = Server(server_name=server_) s.save() server_id = s.id if not ServerMonitoringData.objects.filter( server_id=server_id, test_id=test_id, data_resolution_id=data_resolution_id).exists(): df_server = monitoring_df[( monitoring_df.server_name == server_)] output_json = json.loads(df_server.to_json(orient='index', date_format='iso'), object_pairs_hook=OrderedDict) for row in output_json: data = { 'timestamp': row, 'Memory_used': output_json[row]['Memory_used'], 'Memory_free': output_json[row]['Memory_free'], 'Memory_buff': output_json[row]['Memory_buff'], 'Memory_cached': output_json[row]['Memory_cached'], 'Net_recv': output_json[row]['Net_recv'], 'Net_send': output_json[row]['Net_send'], 'Disk_read': output_json[row]['Disk_read'], 'Disk_write': output_json[row]['Disk_write'], 'System_la1': output_json[row]['System_la1'], 'CPU_user': output_json[row]['CPU_user'], 'CPU_system': output_json[row]['CPU_system'], 'CPU_iowait': output_json[row]['CPU_iowait'] } server_monitoring_data = ServerMonitoringData( test_id=test_id, data_resolution_id=data_resolution_id, server_id=server_id, data=data) server_monitoring_data.save() else: logger.info("Result file does not exist")
def generate_data(t_id): print "Parse and generate test data: " + str(t_id) test_running = TestRunning.objects.get(id=t_id) if not Test.objects.filter(path=test_running.workspace).exists(): test = Test(project_id=test_running.project_id, path=test_running.workspace, display_name=test_running.display_name, start_time=test_running.start_time, end_tiem=test_running.end_time, build_number=0, show=True) test.save() else: test = Test.objects.get(path=test_running.workspace) project_id = test.project_id test_id = test.id jmeter_results_file = test_running.result_file_dest if os.path.exists(jmeter_results_file): df = pd.DataFrame() if os.stat(jmeter_results_file).st_size > 1000007777: print "Executing a parse for a huge file" chunks = pd.read_table(jmeter_results_file, sep=',', index_col=0, chunksize=3000000) for chunk in chunks: chunk.columns = [ 'response_time', 'url', 'responseCode', 'success', 'threadName', 'failureMessage', 'grpThreads', 'allThreads' ] chunk = chunk[~chunk['URL'].str.contains('exclude_')] df = df.append(chunk) print "Parsing a huge file,size: " + str(df.size) else: df = pd.read_csv(jmeter_results_file, index_col=0, low_memory=False) df.columns = [ 'response_time', 'url', 'responseCode', 'success', 'threadName', 'failureMessage', 'grpThreads', 'allThreads' ] df = df[~df['url'].str.contains('exclude_', na=False)] df.columns = [ 'response_time', 'url', 'responseCode', 'success', 'threadName', 'failureMessage', 'grpThreads', 'allThreads' ] #convert timestamps to normal date/time df.index = pd.to_datetime(dateconv((df.index.values / 1000))) num_lines = df['response_time'].count() print "Number of lines in filrue: %d." % num_lines unique_urls = df['url'].unique() for url in unique_urls: url = str(url) if not Action.objects.filter(url=url, project_id=project_id).exists(): print "Adding new action: " + str(url) + " project_id: " + str( project_id) a = Action(url=url, project_id=project_id) a.save() a = Action.objects.get(url=url, project_id=project_id) action_id = a.id if not TestActionData.objects.filter(action_id=action_id, test_id=test_id).exists(): print "Adding action data: " + url df_url = df[(df.url == url)] url_data = pd.DataFrame() df_url_gr_by_ts = df_url.groupby(pd.TimeGrouper(freq='1Min')) url_data['avg'] = df_url_gr_by_ts.response_time.mean() url_data['median'] = df_url_gr_by_ts.response_time.median() url_data['count'] = df_url_gr_by_ts.success.count() df_url_gr_by_ts_only_errors = df_url[( df_url.success == False)].groupby( pd.TimeGrouper(freq='1Min')) url_data['errors'] = df_url_gr_by_ts_only_errors.success.count( ) url_data['test_id'] = test_id url_data['url'] = url output_json = json.loads(url_data.to_json(orient='index', date_format='iso'), object_pairs_hook=OrderedDict) for row in output_json: data = { 'timestamp': row, 'avg': output_json[row]['avg'], 'median': output_json[row]['median'], 'count': output_json[row]['count'], 'url': output_json[row]['url'], 'errors': output_json[row]['errors'], 'test_id': output_json[row]['test_id'], } test_action_data = TestActionData( test_id=output_json[row]['test_id'], action_id=action_id, data=data) test_action_data.save() try: by_url = df.groupby('url') agg = by_url.aggregate({'average': np.mean}).round(1) agg['median'] = by_url.response_time.median().round(1) agg['percentile_75'] = by_url.response_time.quantile(.75).round(1) agg['percentile_90'] = by_url.response_time.quantile(.90).round(1) agg['percentile_99'] = by_url.response_time.quantile(.99).round(1) agg['maximum'] = by_url.response_time.max().round(1) agg['minimum'] = by_url.response_time.min().round(1) agg['cnt'] = by_url.success.count().round(1) agg['errors'] = ((1 - df[ (df.success == True)].groupby('url')['success'].count() / by_url['success'].count()) * 100).round(1) agg['weight'] = by_url.response_time.sum() agg['test_id'] = test_id action_df = DataFrame( list( Action.objects.values( 'id', 'url').filter(project_id=project_id))) action_df.columns = ['action_id', 'url'] action_df = action_df.set_index('url') agg.index.names = ['url'] agg = pd.merge(action_df, agg, left_index=True, right_index=True) #agg = agg.set_index('action_id') print agg.columns for index, row in agg.iterrows(): print "add row:" + str(row) aggr = Aggregate(test_id=int(row['test_id']), action_id=int(row['action_id']), average=row['average'], median=row['median'], percentile_75=row['percentile_75'], percentile_90=row['percentile_90'], percentile_99=row['percentile_99'], maximum=row['maximum'], minimum=row['minimum'], count=int(row['cnt']), errors=int(row['errors']), weight=row['weight']) aggr.save() zip_results_file(jmeter_results_file) except ValueError, e: print "error", e test_overall_data = pd.DataFrame() df_gr_by_ts = df.groupby(pd.TimeGrouper(freq='1Min')) test_overall_data['avg'] = df_gr_by_ts.response_time.mean() test_overall_data['median'] = df_gr_by_ts.response_time.median() test_overall_data['count'] = df_gr_by_ts.response_time.count() test_overall_data['test_id'] = test_id output_json = json.loads(test_overall_data.to_json(orient='index', date_format='iso'), object_pairs_hook=OrderedDict) for row in output_json: data = { 'timestamp': row, 'avg': output_json[row]['avg'], 'median': output_json[row]['median'], 'count': output_json[row]['count'] } test_data = TestData(test_id=output_json[row]['test_id'], data=data) test_data.save()