def process_elb_access_log(elb_buckets_dict, queue):
    """
    Primarily handle S3 log processing threads

    :param elb_buckets_dict:
    :param queue:
    :return:
    """

    # collecting access log for each elb with different threads
    elb_data_manager = ThreadingManager()

    for elb_region_str, bucket_name in elb_buckets_dict.iteritems():
        c = S3Connection()
        bucket = c.get_bucket(bucket_name)

        elb_data_manager.start_tasks(target_func=counting_elb_data,
                                     name="elb_data_collector",
                                     para=[bucket, elb_region_str])

    # waiting for all threads to finish parsing S3 log
    # by which time it will be the end of measurement interval
    result_queue = elb_data_manager.collect_results()
    # collect the total data processed by each ELB
    data_in = dict()
    data_out = dict()

    while not result_queue.empty():
        data_tuple = result_queue.get()
        station = data_tuple[0]
        # the amount of data sent and received by each client
        # *of ONE station
        client_sent, client_received = data_tuple[1]

        data_in.update({station: client_sent})
        data_out.update({station: client_received})

    # debug
    print_message('Data in (bytes) from clients: %s' % data_in)
    print_message('Data out (bytes) from clients: %s' % data_out)

    queue.put((data_in, data_out))
def calculate_key_prefix(elb_region, elb_name, last_expected_time):
    """Function that calculate the prefix for bucket key searching

    :param elb_region:
    :param elb_name:
    :return:
    """
    print_message('Retrieving access log for %s ...' % elb_name)

    next_expected_time, max_waiting_time\
        = get_next_nth_elb_log_time(1, last_expected_time)

    last_expected_time = next_expected_time

    year, month, day, hour, minute = next_expected_time.year, \
                                     next_expected_time.month, \
                                     next_expected_time.day, \
                                     next_expected_time.hour, \
                                     next_expected_time.minute

    # convert month, day, hour and minute to 2 digit representation
    month = '%02d' % month
    day = '%02d' % day
    hour = '%02d' % hour
    minute = '%02d' % minute
    # The time string that the expected log file should contain
    time_str = "%s%s%sT%s%sZ" % (year, month, day, hour, minute)

    aws_account_id = str(305933725014)
    region = elb_region
    load_balancer_name = elb_name
    end_time = time_str

    key_prefix = 'AWSLogs/{0}/elasticloadbalancing/{1}/{2}/{3}/{4}/{5}' \
                 '_elasticloadbalancing_{6}_{7}_{8}' \
        .format(aws_account_id, region, year, month, day,
                aws_account_id, region, load_balancer_name, end_time)

    request_headers = {'prefix': unicode(key_prefix), 'delimiter': '.log'}

    return request_headers, max_waiting_time, last_expected_time
Example #3
0
def measure_round_trip_delay(from_host, to_host, from_host_pk,
                             measurement_interval, queue):

    from_host_ip, from_host_name = from_host
    to_host_ip, to_host_name = to_host

    start_time = time.time()
    current_time = start_time
    time_elapsed = current_time - start_time

    measurements = []   # list to store series of measurement

    while time_elapsed < measurement_interval:   # in the unit of seconds

        cmd = ' '.join(["ping", "-c 5", to_host_ip])
        out, err = execute_remote_command(from_host_ip, cmd, 'ubuntu', '',
                                          from_host_pk)

        rt_str = [text for text in out.split('\n')
                  if 'min/avg/max/' in text]

        stat_str = rt_str[0].split('=')
        metric_name = stat_str[0]
        values = stat_str[1].split('/')
        avg_idx = metric_name.split('/').index('avg')

        avg_round_trip_delay = values[avg_idx]
        print_message('Average round trip delay between %s and %s: %s (ms)'
                      % (from_host, to_host, avg_round_trip_delay))

        # record current measurement
        measurements.append(float(avg_round_trip_delay) / 1000)

        time.sleep(30)  # sleep for half a min

        current_time = time.time()
        time_elapsed = current_time - start_time

    average_round_trip_delay = numpy.mean(measurements)

    queue.put((from_host, to_host, average_round_trip_delay))
Example #4
0
def main():
    setup_logging()

    # line counter for reading csparql logs of each service station
    line_counters = dict()

    # bucket and ELB info for getting access log from S3
    elb_buckets_dict = dict()
    elb_regions = get_station_region()
    elb_buckets = get_elb_buckets_map()
    for station, region in elb_regions.iteritems():
        elb_region_str = '%s:%s' % (station, region)
        elb_buckets_dict.update(
            {elb_region_str: unicode(elb_buckets[station])})

    stations = get_available_stations()
    for station in stations:
        line_counters.update({station: 0})

    log_base_dir = time.strftime("%Y_%m%d_%H%M")
    total_num_users = cfg.get_int('Default', 'total_num_users')

    # Get all available client region
    available_clients = get_available_clients()

    # counter = 0  # For testing
    while True:

        # Processing csparql log and ELB access log simultaneously by 2 threads

        # Start csparql log paring first since ELB access log has delay
        # emitting the log file

        # First we need to calculate how much time to wait before retrieving
        # csparql logs. i.e how long the actual measurement time is. Since S3
        # only emit log at 5, 10, 15 etc. minute of the hour, we can only
        # measure the time that measurement start until the last expected log
        # omission time which is not the actual time that log being obtained
        # since there is delay
        measurement_interval = calculate_waiting_time()

        # no need to wait until log actually being obtained
        measurement_interval -= 300

        # Measuring latency between each client region and service station
        latency_manager = ThreadingManager()
        latency_manager.start_task(
            target_func=measure_latency,
            name="latency_manager",
            para=[available_clients, stations, measurement_interval])

        server_log_processor = ThreadingManager()
        server_log_processor.start_task(target_func=process_server_logs,
                                        name="server_log_processor",
                                        para=[
                                            log_base_dir, line_counters,
                                            total_num_users,
                                            measurement_interval
                                        ])

        # Begin gathering info of the amount of data
        # transferred through each service station
        data_counting_task = ThreadingManager()
        data_counting_task.start_task(target_func=process_elb_access_log,
                                      name="elb_access_log_processor",
                                      para=[elb_buckets_dict])

        latency_results_dict = latency_manager.collect_results().get()

        # collecting csparql log first since its processing will complete first
        # while elb data might has delay
        server_metrics_queue = server_log_processor.collect_results()
        (station_metric_list, total_request) = server_metrics_queue.get()
        line_counters = server_metrics_queue.get()

        print_message('')
        print_message('Service station logs processing finished\n')

        # collecting elb data now
        elb_data_queue = data_counting_task.collect_results()
        data_in, data_out = elb_data_queue.get()
        """ Preparing optimisation parameters """
        # Calculate The "average amount of data involved in each request" for
        # each service station and the "total number of requests"

        # These 2 dictionary stores the average data send and received per
        # request sent and received by *each service station* from *each
        # client*. The length of the dictionary should be equals to the
        # number of clients (regions)

        # <Client_name: <Station: data_in_per_req>>
        avg_data_in_per_reqs = dict()
        # <Client_name: <Station: data_out_per_req>>
        avg_data_out_per_reqs = dict()

        # initialise
        for cli_name in available_clients:
            avg_data_in_per_reqs.update({cli_name: {}})
            avg_data_out_per_reqs.update({cli_name: {}})

        # requests arrival rate and service rate of each service station
        arrival_rates = dict()
        service_rates = dict()

        for station_metric in station_metric_list:
            # getting metric
            station_name = station_metric.station_name
            arrival_rate = station_metric.arrival_rate
            service_rate = station_metric.service_rate

            requests = station_metric.total_requests

            print '\nTotal requests for station %s: %s' \
                  % (station_name, requests)

            log_info(
                metric_record_file, '\nTotal requests for station %s: %s' %
                (station_name, requests))

            # arrival_rate and service_rate
            arrival_rates.update({station_name: arrival_rate})
            service_rates.update({station_name: service_rate})

            response_time = \
                math.pow(service_rate, -1) / (1 - math.pow(service_rate, -1) *
                                              arrival_rate)
            print '[Debug] predicted current response time of service station ' \
                  '\'%s\': %s' % (station_name, response_time)

            log_info(
                metric_record_file,
                '[Debug] predicted currentresponse time of service '
                'station \'%s\': %s' % (station_name, response_time))

        # TODO: calculate total requests for each client
        # TODO: if c-sparql could record the source of each requests
        # TODO: things would be much more easier
        total_request_per_client = dict()
        data_in_sum = 0
        client_data_in_sum = dict()

        for a_client in available_clients:
            client_data_in_sum.update({a_client: 0})

        for station_name in stations:
            d_in = data_in.get(station_name)
            for c, sent_data in d_in.iteritems():
                # calculate total data sent by each client
                # and the total data sent by all client
                client_data_in_sum[c] += sent_data
                data_in_sum += sent_data

        # calculate total amount of requests sent by each client
        for ac in available_clients:
            t_request = math.ceil(total_request *
                                  (client_data_in_sum[ac] / data_in_sum))
            t_request = int(t_request)

            # build this so that it could be used by calculating out data
            total_request_per_client.update({ac: t_request})

        for station_name in stations:
            d_in = data_in.get(station_name)

            for c, sent_data in d_in.iteritems():
                t_request = total_request_per_client[c]
                # convert the amount of data to GB
                sent_data = float(sent_data / math.pow(1024, 3))
                avg_data_in_per_req = sent_data / t_request
                avg_data_in_per_reqs[c].update(
                    {station_name: avg_data_in_per_req})

        for station_name in stations:
            d_out = data_out.get(station_name)

            for c1, received_data in d_out.iteritems():
                t_request = total_request_per_client[c1]
                received_data = float(received_data / math.pow(1024, 3))
                avg_data_out_per_req = received_data / t_request
                avg_data_out_per_reqs[c1].update(
                    {station_name: avg_data_out_per_req})

        # For testing purpose
        info_str = \
            '\n[Debug] total_request: %s\n' \
            '[Debug] avg_data_in_per_reqs: %s\n' \
            '[Debug] avg_data_out_per_reqs: %s\n' \
            '[Debug] arrival_rates: %s\n' \
            '[Debug] service_rates: %s\n' \
            % (total_request,
               avg_data_in_per_reqs,
               avg_data_out_per_reqs,
               arrival_rates, service_rates)

        print info_str
        log_info(metric_record_file, info_str)

        # TODO: Get elb price from config
        # ELB pricing
        elb_prices = [0.008, 0.008]

        # optimise for each client...
        # do optimisation for each client in a new threads
        optimiser = ThreadingManager()
        for client in available_clients:
            optimiser.start_tasks(target_func=clients_optimisation,
                                  name="optimiser",
                                  para=[
                                      avg_data_in_per_reqs,
                                      avg_data_out_per_reqs, client,
                                      elb_prices, latency_results_dict,
                                      measurement_interval, service_rates,
                                      stations, total_request_per_client
                                  ])

        # synchronising threads
        optimiser.collect_results()

        # it takes up to 60 mins for Route 53 record changes to take effect
        time.sleep(60)
Example #5
0
def _generate_data(base_path, queue):
    response_file = base_path + '/ResponseInfo.txt'
    cpu_file = base_path + '/CPUUtil.txt'
    # cell(6,2);
    data = [[] for j in range(7)]
    category_map = dict()  # containers.Map;
    category_index = 0
    category_count = 0
    category_list = []
    count = 0

    # No ResponseInfo available in observer log yet
    if not os.path.exists(response_file):
        print_message('%s not exists yet\n' % response_file)
        return

    with open(response_file) as f:
        line = f.readline()
        while line:

            # skip odd line
            if count % 2 != 0:
                line = f.readline()
                count += 1
                continue

            split_str = line.split(',')

            if len(split_str) < 7:
                line = f.readline()
                count += 1
                continue

            date_str = [split_str[j] for j in xrange(7)]

            date = datetime.strptime("".join(date_str), '%Y%m%d%H%M%S%f')

            # date = None
            # # python strptime thread safety bug
            # # http://bugs.python.org/issue11108
            # while not date:
            # try:
            # date = strptime("".join(date_str), '%Y%m%d%H%M%S%f')
            # except AttributeError as e:
            # print "[Debug]: strptime reported AttributeError\n" + \
            # "Details: %s" % e

            date_milli = mktime(
                date.timetuple()) * 1e3 + date.microsecond / 1e3

            category_str = split_str[8]

            if category_str not in category_map:
                category_map[category_str] = category_index
                category_list.append(category_str)
                category_count += 1

                category = category_index

                data = update_data_array(data, 5, category, [])
                data = update_data_array(data, 6, category, [])
                data = update_data_array(data, 7, category, [])

                category_index += 1
            else:
                category = category_map[category_str]

            if split_str[9] == 'Request Begun':
                continue

            response_time = float(split_str[10])
            arrival_time = date_milli - response_time * 1000
            data = update_data_array(data, 2, category, arrival_time)
            data = update_data_array(data, 3, category, response_time)

            line = f.readline()
            count += 1

        update_data_array(data, 2, category_index, [[]])

        # fill in the gap between length of data and the target index
        for i in xrange(len(data)):
            for j in range(len(data[2])):
                if len(data[i]) < len(data[2]):
                    data[i].append([])

        raw_data = data

        data = format_data(raw_data, 60000, category_list, cpu_file)

        seg = base_path.split('/')
        vm_name = seg[len(seg) - 1]

        results = (vm_name, data)
        queue.put(results)
def process_server_logs(base_dir, line_counters, total_users, waiting_time,
                        queue):
    """

    :param base_dir:        Base directory of monitor log
    :param line_counters:   Counter for continuously reading the single log file
    :param total_users:     The total number of users simulated
    :param queue:           Queue to store results when using in thread
    :param waiting_time:    The measurement time
    :return:
    """

    print_message('')
    print_message('Waiting for the next batch of service station monitoring '
                  'logs (%s seconds)...\n' % waiting_time)
    time.sleep(waiting_time)

    module_path = os.path.dirname(client_server.__file__)
    base_dir = module_path + '/logs/' + base_dir + '/'

    # retrieve service station and observer mapping
    station_observers = get_station_csparql()
    # retrieve and process the log of each service station with a new threads
    csparql_reader = ThreadingManager()

    # python strptime thread safety bug. Has to call strptime once before
    # creating thread. Details can be found on:
    # http://bugs.python.org/issue11108
    time.strptime("30 Nov 00", "%d %b %y")

    for station_name, observer_ip in station_observers.iteritems():
        observer_addr = '%s=%s' % (station_name, observer_ip)

        csparql_reader.start_tasks(target_func=process_monitor_log,
                                   name='csparql_reader',
                                   para=[base_dir, observer_addr,
                                         line_counters[station_name]])

    # wait for all threads to finish and collect their results
    result_queue = csparql_reader.collect_results()

    total_requests = 0

    # Now collect metric data from all service station and then calculate the
    # necessary metric for the optimisation since these metrics are calculated
    # for the entire online services
    service_station_metric_list = []

    while not result_queue.empty():
        # get metrics returned
        result_dict = result_queue.get()

        station_name = result_dict['station_name']
        station_total_requests = result_dict['total_requests']
        arrival_rate = result_dict['arrival_rate']
        service_rate_para_list = result_dict['service_rate_para_list']
        line_counter = result_dict['line_counter']

        total_requests += station_total_requests

        service_station_metric = ServiceStationMetric(station_name,
                                                      station_total_requests,
                                                      arrival_rate / 60,
                                                      service_rate_para_list,
                                                      service_rate=0)

        service_station_metric_list.append(service_station_metric)

        # update the current line counter
        line_counters[station_name] = line_counter

    # now calculate service rate for each station
    for station_metric in service_station_metric_list:

        # parameter needed for calculating service rate for servers from
        # one service station
        mu_para_list = station_metric.service_rate_para_list

        # Calculating service rate of each server in one station
        service_time_list = []  # list to store service time of each server

        # for service rate calculation parameters for each server ...
        for s_para in mu_para_list:
            # number of users for this vm
            num_of_requests = s_para['num_of_requests']
            num_of_user = int(math.ceil(
                total_users * (num_of_requests / total_requests)))
            num_of_cores = s_para['cpu_cores']
            data = s_para['data']

            mean_service_time = calculate_service_rate(num_of_user,
                                                       num_of_cores, data)
            service_time_list.append(mean_service_time)

            print_message('Mean service time of VM \'%s\' at station \'%s\': %s'
                          % (s_para['vm_name'], station_metric.station_name,
                             str(mean_service_time)))

        # The overall service rate is calculated by the number of requests
        # completed by all servers within the time that the slowest server
        # takes to complete a single request
        max_time = max(service_time_list)

        comp_req_sum = 0
        for service_time in service_time_list:
            comp_req_sum += max_time / service_time

        overall_service_rate = comp_req_sum / max_time

        station_metric.service_rate = overall_service_rate

    # store result of this thread in result the queue
    queue.put((service_station_metric_list, total_requests))
    queue.put(line_counters)
Example #7
0
def optimisation(num_of_stations, total_requests, elb_prices,
                 avg_data_in_per_reqs, avg_data_out_per_reqs, in_bandwidths,
                 out_bandwidths, budget, service_rates, measurement_interval,
                 station_latency):
    variables = [1 for i in xrange(num_of_stations)]

    feasible_tuple = []

    # get all combination that satisfy constrains
    for i in f_range(1, 99, 0.0001):
        variables[0] = float(i) / 100.0
        variables[1] = 1 - float(i) / 100.0

        satisfy_constrains = constrains_check(
            variables, total_requests, avg_data_in_per_reqs,
            avg_data_out_per_reqs, elb_prices, measurement_interval, budget,
            in_bandwidths, out_bandwidths, service_rates, station_latency)
        if satisfy_constrains:
            feasible_tuple.append((variables[0], variables[1]))

    if len(feasible_tuple) == 0:
        print_message('No feasible solution found')
        return

    smallest = float("inf")
    answer = (1, 1)
    # minimisation - find the feasible tuple gives the minimal value
    for f_tuple_idx, f_tuple_val in enumerate(feasible_tuple):
        objective_result = objective_function(f_tuple_val, total_requests,
                                              avg_data_in_per_reqs,
                                              avg_data_out_per_reqs,
                                              elb_prices, measurement_interval,
                                              service_rates, station_latency)
        if objective_result < smallest:
            smallest = objective_result
            answer = f_tuple_val

    #### test ####
    total_cost = 0
    for i in xrange(len(answer)):
        elb_cost = \
            total_requests * (avg_data_in_per_reqs[i] +
                              avg_data_out_per_reqs[i]) * \
            elb_prices[i] * answer[i]

        total_data_out = total_requests * answer[i] * \
                         avg_data_out_per_reqs[i]
        ec2_cost = 0
        if total_data_out < 1:
            ec2_cost = 0
        elif 1 < total_data_out <= 10240:
            ec2_cost = total_data_out * 0.12
        elif 10240 < total_data_out <= 51200:
            ec2_cost = (total_data_out - 10240) * 0.09 + 10240 * 0.12
        elif 51200 < total_data_out <= 153600:
            ec2_cost = \
                (total_data_out - 51200) * 0.07 + 40960 * 0.09 + 10240 * 0.12
        elif 153600 < total_data_out <= 512000:
            ec2_cost = \
                (total_data_out - 153600) * 0.05 + 102400 * 0.07 + \
                40960 * 0.09 + 10240 * 0.12

        total_cost += elb_cost + ec2_cost

    print_message('')
    print_message('Total cost: $%s ' % total_cost)
    # #### test ####

    return answer
Example #8
0
def optimise(num_of_stations,
             total_requests,
             elb_prices,
             avg_data_in_per_reqs,
             avg_data_out_per_reqs,
             in_bandwidths,
             out_bandwidths,
             budget,
             sla_response_t,
             service_rates,
             measurement_interval,
             station_latency,
             k,
             ec2_prices_ranges=None,
             cost_mode=None):
    """
    :param num_of_stations: total number of receipts of client requests
    :param total_requests:  total number of requests generated by client
    :param elb_prices:      list of pricing of every ELB involved

    :param avg_data_in_per_reqs:    the average amount of data transferred in
                                    pre request for requests received at
                                    *each service station*

    :param avg_data_out_per_reqs:   the average amount of data transferred out
                                    pre request for requests received at
                                    *each service station*

    :param in_bandwidths:   The capacity of the link used by each service
                            station to receive request
    :param out_bandwidths:  The capacity of the link used by each service
                            station to send response

    :param service_rates:   Overall service rate of each service station
    :param station_latency: latency between this client and each station
    :param sla_response_t:  Service Level Agreement of the response time
                            for each service station

    :param budget:          Budget of the OSP

    :param k:               coefficient to reflect on how much additional
                            cost to pay for one unit of throughput improvement

    :param measurement_interval: The length of measurement time (in seconds)

    :param ec2_prices_ranges:   The price charged by EC2 based on different
                                amount of data sent out of EC2 (Dict of dict)
    :param cost_mode:           Mode for selecting different EC2 data trans

    :return:                The current best weight for each requests receipt

    # Objective: (e.g number of service station = 2)
    # maximise  Cost + K * throughput
    #
    ///////////////////
     Update: New objective function:
             Minimise Cost + latency perceived by user
             latency perceived by user

     Only the objective function and response time constrain needs to change
    ////////////////////
    #
    #         ************ Deprecated *************
    #
    #         total_requests *
    #         (avg_data_in_per_req[0] + avg_data_out_per_req[0]) *
    #         elb_price[0] *
    #         P[0]
    #         +
    #         total_requests * avg_data_out_per_req[0] * P[0] * ec2_price_range
    #         +
    #         K * total_requests *
    #         P[0]

    #         +

    #         total_requests *
    #         (avg_data_in_per_req[1] + avg_data_out_per_req[1]) *
    #         elb_price[1] *
    #         P[1]
    #         +
    #         total_requests * avg_data_out_per_req[0] * P[0] * ec2_price_range
    #         +
    #         K * total_requests *
    #         P[1]

    #         "Actually formulation":

    #         (total_requests *
    #         (avg_data_in_per_req[0] + avg_data_out_per_req[0]) *
    #         elb_price[0]
    #         +
    #         K * total_requests
    #         +
    #         total_requests * avg_data_out_per_req[0] * ec2_price_range) * P[0]
    #
    #         +
    #
    #         (total_requests *
    #         (avg_data_in_per_req[1] + avg_data_out_per_req[1]) *
    #         elb_price[1]
    #         +
    #         K * total_requests
    #         +
    #         total_requests * avg_data_out_per_req[1] * ec2_price_range) * P[1]
    #
    #         ************ End of Deprecated *************
    #
    # Subject to:

    #   "data_in/second < bandwidth of the in_link"

    #   (total_requests * avg_data_in_per_req[0] * P[0]))
    #   / measurement_interval < in_bandwidth[0]

    #   (total_requests * avg_data_in_per_req[1] * P[1]))
    #   / measurement_interval < in_bandwidth[1]

    #   "data_out/second < bandwidth of the out_link"

    #   (total_requests * avg_data_out_per_req[0] * P[0]))
    #   / measurement_interval < out_bandwidth[0]

    #   (total_requests * avg_data_out_per_req[1] * P[1]))
    #   / measurement_interval < out_bandwidth[1]

    #   "Actually formulation":
    #
    #   total_requests * avg_data_in_per_req[0] / measurement_interval * P[0] +
    #                    0                                             * P[1]
    #                                                      <= in_bandwidth[0]

    #                    0                                             * P[0] +
    #   total_requests * avg_data_in_per_req[1] / measurement_interval * P[1]
    #                                                      <= in_bandwidth[1]

    #   total_requests * avg_data_out_per_req[0] / measurement_interval * P[0] +
    #                    0                                              * P[1]
    #                                                      <= out_bandwidth[0]

    #                    0                                              * P[0] +
    #   total_requests * avg_data_out_per_req[1] / measurement_interval * P[1]
    #                                                      <= out_bandwidth[1]



    #   "(Deprecated)Response time constrain"

    #   D_sla[0] * service_rates[0]^-1 * total_requests * P[0]
    #           <= measurement_interval * (D_sla[0] - service_rates[0]^-1)

    #   D_sla[1] * service_rates[1]^-1 * total_requests * P[1]
    #           <= measurement_interval * (D_sla[1] - service_rates[1]^-1)

    #   "Actually formulation":

    #   D_sla[0] * service_rates[0]^-1 * total_requests * P[0] +
    #                    0                              * P[1]
    #           <= measurement_interval * (D_sla[0] - service_rates[0]^-1)

    #                    0                              * P[0] +
    #   D_sla[1] * service_rates[1]^-1 * total_requests * P[1]
    #           <= measurement_interval * (D_sla[1] - service_rates[1]^-1)


    #   "Budget of OSP" (EC2 cost is calculated differently)
    #   total_requests * (avg_data_in_per_reqs[0] +
    #                     avg_data_out_per_reqs[0]) * elb_prices[0] * P[0]
    #   +
    #
    #   total_requests * (avg_data_in_per_reqs[1] +
    #                     avg_data_out_per_reqs[1]) * elb_prices[1] * P[1]
    #           <= budget


    #   "Sum of weights is 1"
    #   P[0] + P[1] + ... P[num Of Servers - 1] = 1

    #   "P are all positive"
    #   1 * P[0] + 0 * P[1] + 0 * P[2] .... > 0
    #   0 * P[0] + 1 * P[1] + 0 * P[2] .... > 0
    #   0 * P[0] + 0 * P[1] + 1 * P[2] .... > 0
    #   ... ...
    #
    # Variable: P[i]
    """

    coefficients = []
    # right hand side of constrains
    right_hand_side = []
    # coefficients in objective function
    obj_func_coef = []

    for i in xrange(num_of_stations):
        # Building coefficients for constrains inequations.

        # Collecting coefficients of each variable of each constrains inequation
        """ In bandwidth constrains """
        # | t*a/m  0    0    0   ... | < in_bandwidth[0]
        # |   0  t*a/m  0    0   ... | < in_bandwidth[1]
        # |   0    0  t*a/m  0   ... | < in_bandwidth[2]
        # |   0    0    0  t*a/m ... | ... ...
        in_bandwidth_coef = [0 for i1 in xrange(num_of_stations)]
        in_bandwidth_coef[i] = \
            total_requests * avg_data_in_per_reqs[i] / measurement_interval
        """ Out bandwidth constrains """
        out_bandwidth_coef = [0 for i2 in xrange(num_of_stations)]
        out_bandwidth_coef[i] = \
            total_requests * avg_data_out_per_reqs[i] / measurement_interval

        # """ Response time constrain """
        response_t_coef = [0 for i3 in xrange(num_of_stations)]
        response_t_coef[i] = \
            sla_response_t[i] * math.pow(service_rates[i], -1) * total_requests
        """ All variable (weights) are positive """
        all_pos_coef = [0 for i4 in xrange(num_of_stations)]
        all_pos_coef[i] = -1  # convert to standard form
        """ coefficient for the "sum of weights is 1" constrain (i.e all 1) """
        sum_p_coef = 1
        """ Cost less then or equal to budget """

        cost_coef = \
            total_requests * (avg_data_in_per_reqs[i] +
                              avg_data_out_per_reqs[i]) * elb_prices[i] + \
            0.120 * total_requests * avg_data_out_per_reqs[i]

        #### test ####
        print_message('Total cost : $%s' % cost_coef)
        #### test ####

        # Store all coefficients for this variable in the above order
        """ Order matters """
        coefficients_for_p_i = []
        coefficients_for_p_i.extend(in_bandwidth_coef)
        coefficients_for_p_i.extend(out_bandwidth_coef)
        coefficients_for_p_i.extend(response_t_coef)
        coefficients_for_p_i.extend(all_pos_coef)
        # in order to turn the "sum of weights is 1" equability constrain to
        # inequality constrain, replace the original equality constrain with
        # 2 new inequality that represent a very tiny range around the original
        # right hand side of the equability constrain
        # P1 + P2 + P3 + .... > 1 - 0.0000000001
        # P1 + P2 + P3 + .... < 1 + 0.0000000001
        coefficients_for_p_i.append(sum_p_coef * -1)
        coefficients_for_p_i.append(sum_p_coef)
        coefficients_for_p_i.append(cost_coef)

        # add this list in the coefficient collection as the coefficient of
        # current variable i.e weight
        coefficients.append(coefficients_for_p_i)

        # Building objective function coefficient for this variable
        service_time = math.pow(service_rates[i], -1)
        obj_p_i_coef = \
            total_requests * (avg_data_in_per_reqs[i] +
                              avg_data_out_per_reqs[i]) * elb_prices[i] + \
            0.120 * total_requests * avg_data_out_per_reqs[i] + \
            (measurement_interval - service_time * total_requests) / \
            (service_time * measurement_interval)

        # maximise = minimise the negative form
        obj_func_coef.append(obj_p_i_coef * -1)
    """ Order Matters """
    # Now adding right hand side.
    # Right hands side has to be added in the order that coefficients was added
    # e.g in_bandwidths -> out_bandwidths -> Response time constrains -> ...
    right_hand_side.extend([in_bandwidths[n] for n in xrange(num_of_stations)])
    right_hand_side.extend(
        [out_bandwidths[m] for m in xrange(num_of_stations)])
    right_hand_side.extend([
        measurement_interval *
        (sla_response_t[k] - math.pow(service_rates[k], -1))
        for k in xrange(num_of_stations)
    ])
    right_hand_side.extend([0 for j in xrange(num_of_stations)])
    right_hand_side.append(0.0000000001 - 1)
    right_hand_side.append(1 + 0.0000000001)
    right_hand_side.append(budget)

    print 'coefficients: %s' % coefficients
    print 'right_hand_side: %s' % right_hand_side
    print 'obj_func_coef: %s' % obj_func_coef

    a = matrix(coefficients)
    b = matrix(right_hand_side)
    c = matrix(obj_func_coef)

    sol = solvers.lp(c, a, b)

    return sol['x']
def format_data(data, period, category_list, cpu_file):
    metric_list = ['addtocartbulk', 'checkLogin', 'checkoutoptions', 'login',
                   'logout', 'main', 'orderhistory', 'quickadd']
    delete = []

    for i in xrange(len(data[2]) - 1):
        if not data[2][i]:
            delete.append(i)

    # delete data
    data, category_list, delete = remove_data(data, category_list, delete)

    # find out those metrics that are not in the metric_list
    for i in xrange(len(data[2]) - 1):
        flag = 0
        for j in range(8):
            if category_list[i] == metric_list[j]:
                flag = 1

        if flag == 0:
            delete.append(i)

    # delete data
    data, category_list, delete = remove_data(data, category_list, delete)

    start_time = min(data[2][0])
    max_time = max(data[2][0])

    for i in xrange(1, len(data[2]) - 1):
        if data[2][i] and start_time > min(data[2][i]):
            start_time = min(data[2][i])
        if data[2][i] and max_time > max(data[2][i]):
            max_time = max(data[2][i])

    samples = int(math.floor(((max_time - start_time) / period)))

    print_message('Number of samples (interval:%s) : %s' % (period, samples))

    for i in xrange(len(data[2]) - 1):
        end_time = start_time

        departure = [a + r * 1000 for a, r in zip(data[2][i], data[3][i])]

        for k in xrange(samples):

            index = [v[0] for v in enumerate(departure)
                     if end_time <= v[1] < (end_time + period)]

            arr_index = [v[0] for v in enumerate(data[2][i])
                         if end_time <= v[1] < (end_time + period)]

            response_times = [0]
            if index:
                response_times = [data[3][i][idx] for idx in index]
            data[4][i].append(scipy.mean(response_times))
            data[5][i].append(len(index) / period * 1000)
            data[6][i].append(len(index))
            data[7][i].append(len(arr_index))

            data[0][i].append(end_time + period)
            end_time += period

    # Number of samples for each request might not be equal
    max_num_requests = 0
    max_requests_idx = 0
    for i in xrange(len(data[2]) - 1):
        if max_num_requests < len(data[2][i]):
            max_num_requests = len(data[2][i])
            max_requests_idx = i

    for i in xrange(len(data[2]) - 1):
        data[0][i] = data[0][max_requests_idx]
        if len(data[4][i]) < len(data[0][i]):
            data[4][i].append([0] * (len(data[0][i]) - len(data[4][i])))
            data[5][i].append([0] * (len(data[0][i]) - len(data[5][i])))
            data[6][i].append([0] * (len(data[0][i]) - len(data[6][i])))

    data[0][len(data[0]) - 1] = data[0][0]

    with open(cpu_file) as f:
        count = 0
        cpu = []
        cpu_time = []
        flag = 0
        line = f.readline()
        while line:
            cpu_num = float(line)

            if count % 2 == 0:
                if cpu_num > 1 or math.isnan(cpu_num):
                    flag = 1
                else:
                    cpu.append(cpu_num)
            else:
                if flag:
                    flag = 0
                else:
                    cpu_time.append(cpu_num)

            count += 1
            line = f.readline()

    cpu_time = [e - 3600 * 1000 for e in cpu_time]
    indices = [i[0] for i in sorted(enumerate(cpu_time), key=lambda x: x[1])]
    cpu_time = [cpu_time[i] for i in indices]
    cpu = [cpu[i] for i in indices]

    for i in xrange(len(data[0][0])):
        indices_found = [v[0] for v in enumerate(cpu_time)
                         if data[0][0][i] <= v[1] < data[0][0][i] + period]

        if indices_found:
            mean = scipy.mean([cpu[i] for i in indices_found])
            data[1][len(data[1]) - 1].append(mean)

    return data
def _generate_data(base_path, queue):
    response_file = base_path + '/ResponseInfo.txt'
    cpu_file = base_path + '/CPUUtil.txt'
    # cell(6,2);
    data = [[] for j in range(7)]
    category_map = dict()  # containers.Map;
    category_index = 0
    category_count = 0
    category_list = []
    count = 0

    # No ResponseInfo available in observer log yet
    if not os.path.exists(response_file):
        print_message('%s not exists yet\n' % response_file)
        return

    with open(response_file) as f:
        line = f.readline()
        while line:

            # skip odd line
            if count % 2 != 0:
                line = f.readline()
                count += 1
                continue

            split_str = line.split(',')

            if len(split_str) < 7:
                line = f.readline()
                count += 1
                continue

            date_str = [split_str[j] for j in xrange(7)]

            date = datetime.strptime("".join(date_str), '%Y%m%d%H%M%S%f')

            # date = None
            # # python strptime thread safety bug
            # # http://bugs.python.org/issue11108
            # while not date:
            # try:
            # date = strptime("".join(date_str), '%Y%m%d%H%M%S%f')
            # except AttributeError as e:
            # print "[Debug]: strptime reported AttributeError\n" + \
            # "Details: %s" % e

            date_milli = mktime(date.timetuple())*1e3 + date.microsecond/1e3

            category_str = split_str[8]

            if category_str not in category_map:
                category_map[category_str] = category_index
                category_list.append(category_str)
                category_count += 1

                category = category_index

                data = update_data_array(data, 5, category, [])
                data = update_data_array(data, 6, category, [])
                data = update_data_array(data, 7, category, [])

                category_index += 1
            else:
                category = category_map[category_str]

            if split_str[9] == 'Request Begun':
                continue

            response_time = float(split_str[10])
            arrival_time = date_milli - response_time * 1000
            data = update_data_array(data, 2, category, arrival_time)
            data = update_data_array(data, 3, category, response_time)

            line = f.readline()
            count += 1

        update_data_array(data, 2, category_index, [[]])

        # fill in the gap between length of data and the target index
        for i in xrange(len(data)):
            for j in range(len(data[2])):
                if len(data[i]) < len(data[2]):
                    data[i].append([])

        raw_data = data

        data = format_data(raw_data, 60000, category_list, cpu_file)

        seg = base_path.split('/')
        vm_name = seg[len(seg) - 1]

        results = (vm_name, data)
        queue.put(results)
def process_access_log(bucket, elb_region, elb_name):
    """
    function that retrieve access logs of ELB that stored in S3 buckets and
    calculate the total amount of data processed by the ELB

    log file format

    {Bucket}/{Prefix}/AWSLogs/{AWS AccountID}/elasticloadbalancing/{Region}/
    {Year}/{Month}/{Day}/{AWS Account ID}_elasticloadbalancing_{Region}_
    {Load Balancer Name}_{End Time}_{Load Balancer IP}_{Random String}.log

    :param bucket: the  S3 buckets that stores the access log of ELB
    :param elb_region:  region of the elastic load balancer of which
                        access logs are being retrieved
    :param elb_name:    name of the elastic load balancer
    :return:            total amount of data being processed by the elb
                        during the measurement interval
    """

    if not bucket:
        print 'S3 bucket object required'
        return

    # Start new threads for downloading and reading each matching log file
    data_accumulator = DataAccumulatorManager()

    # record the start time to calculate time elapsed
    # start_time = time.time()

    # Since the actually log omission time varies we determine the time stop
    # waiting for log by the number of logs retrieved. Each log represents
    # elb access of either 5 minutes or 1 hour

    # Counter for the number of log obtained.
    logs_obtained = 0
    expected_logs_to_obtain = get_expected_num_logs()

    # maintain the last log emitting minutes that dealt with
    last_expected_time = None

    # flag indicating that the process needs to stop before obtaining
    # expected number of logs that matches measurement interval
    need_to_stop = False

    # check whether the it has reached the end of measurement interval
    # while (time.time() - start_time) / 60 <= m_interval:
    while logs_obtained < expected_logs_to_obtain and not need_to_stop:

        request_headers, max_waiting_time, last_expected_time \
            = calculate_key_prefix(elb_region, elb_name, last_expected_time)

        matching_keys = []
        # In case of total waiting time exceed the S3
        # logging interval (e.g 5 min) we need to recalculate the next
        # expected log name
        time_counter = 0
        # Wait for polling interval while the log is not available
        while not matching_keys or len(matching_keys) < 2:

            print_message('')
            print_message('Searching for bucket key(s) that start with: %s' %
                          request_headers['prefix'])
            matching_keys = bucket.search_key(parameters=request_headers)

            if matching_keys:
                for m_key in matching_keys:
                    print_message('Found %s' % m_key)
                if len(matching_keys) > 1:
                    break

            print_message('Time elapsed since current searching: %s min(s)' %
                          (time_counter / 60))

            # Check whether we need to wait for new log.
            # There could be up to 5 mins delay for actual log delivery
            # http://docs.aws.amazon.com/ElasticLoadBalancing/latest/
            # DeveloperGuide/access-log-collection.html
            # The next expected minus should be re calculated base on its
            # last value i.e the last "next expected minus"
            if time_counter > max_waiting_time:
                # Generally if waiting time exceed the maximum time
                # calculated there could either be some error during ELB
                # access log omission in S3 in which case the waiting time is
                #  unpredictable OR it is the end of the simulation. Either
                # case we need to stop waiting for S3
                need_to_stop = True
                break

            print_message('Waiting for log to be emitted (polling interval %s '
                          'seconds) ...\n' % log_polling_interval)

            time.sleep(log_polling_interval)
            time_counter += log_polling_interval

        if need_to_stop:
            break

        for key_name in matching_keys:
            key = bucket.get_key(key_name=key_name)

            # compose log file directory
            segment = key_name.split('/')
            log_file_name = segment[len(segment) - 1]
            log_file_path_dir = log_file_dir + bucket.name

            if not os.path.exists(log_file_path_dir):
                os.makedirs(log_file_path_dir)

            log_file_path = log_file_path_dir + '/' + log_file_name

            # download and process the each log file simultaneously
            data_accumulator.start_tasks(data_accumulator.read_log,
                                         'data_accumulator',
                                         (key, log_file_path))

        # Collect results from each threads
        data_accumulator.collect_results()

        print_message('Total amount of data (bytes) received by \'%s\' from '
                      'clients so far: %s' %
                      (elb_name, data_accumulator.client_sent))
        print_message('Total amount of data (bytes) sent by \'%s\' to '
                      'clients so far: %s' %
                      (elb_name, data_accumulator.client_receive))

        logs_obtained += 1
        print_message('Access log of \'%s\' obtained so far : %s\n' %
                      (elb_name, logs_obtained))

    return data_accumulator.client_sent, data_accumulator.client_receive
Example #12
0
def clients_optimisation(avg_data_in_per_reqs, avg_data_out_per_reqs, client,
                         elb_prices, latency_results_dict, measurement_interval,
                         service_rates, stations, total_request_per_client,
                         queue):
    # bandwidths for each client
    in_bandwidths = []
    out_bandwidths = []
    # average data per requests
    avg_in_data = []
    avg_out_data = []
    service_rates_list = []
    station_latency = []

    in_band_dict, out_band_dict = get_stations_bandwidth(client)
    # Budget e.g 100,000 / 30 / 24 / 60 / interval
    # Not total budget. Abstract Budget for the interval
    budget = 1000
    client_avg_data_in_per_reqs = avg_data_in_per_reqs[client]
    client_avg_data_out_per_reqs = avg_data_out_per_reqs[client]
    request_sum = total_request_per_client[client]
    # get latency for each from this client to each station
    station_latency_dict = dict()
    for key_str, latency_val in latency_results_dict.iteritems():
        src_host, dst_host = key_str.split(',')
        if client == src_host:
            station_latency_dict.update({dst_host: latency_val})
    for station in stations:
        # convert from Mb/s to GB/s
        in_band = float(in_band_dict[station]) / 8 / 1024
        out_band = float(out_band_dict[station]) / 8 / 1024
        in_bandwidths.append(in_band)
        out_bandwidths.append(out_band)

        avg_in_data.append(client_avg_data_in_per_reqs[station])
        avg_out_data.append(client_avg_data_out_per_reqs[station])
        service_rates_list.append(service_rates[station])
        station_latency.append(station_latency_dict[station])

    weights = optimisation(num_of_stations=2,
                           total_requests=request_sum,
                           elb_prices=elb_prices,
                           avg_data_in_per_reqs=avg_in_data,
                           avg_data_out_per_reqs=avg_out_data,
                           in_bandwidths=in_bandwidths,
                           out_bandwidths=out_bandwidths,
                           budget=budget,
                           service_rates=service_rates_list,
                           measurement_interval=measurement_interval,
                           station_latency=station_latency)

    print_message('Weights calculated for client %s: %s' % (client, weights))

    # weights are fraction initially but Route53 only accept integer and
    # must be an integer between 0 and 255
    # so we convert the ratio of weights to ratio of integers
    # this scaling should match the searching step of in optimisation
    # Weight
    weights = [int(val * 255) for val in weights]

    route53_conn = Route53Connection()
    zone = route53_conn.get_zone(base_domain)
    elb_records = station_metadata_map['StationELBDNS']

    alias_zone_id = {'xueshi-station-1': 'Z32O12XQLNTSW2',
                     'xueshi-station-2': 'Z35SXDOTRQ7X7K'}

    # need to be mapped to IPs
    clients_regions = {'ap_south_1_client_1': 'ap-southeast-1',
                       'us_east_1_client_1': 'us-east-1',
                       'us_west_1_client_1': 'us-west-1'}

    identifiers = dict(cfg.items('StationWRRAliasIdentifiers'))

    stations = get_available_stations()
    # Since we put optimisation parameter by the order available
    # stations the output weights should be in the same order
    station_weights = {}
    for idx in xrange(len(stations)):
        station_weights.update({stations[idx]: int(round(weights[idx]))})

    rrs = ResourceRecordSets(route53_conn, zone.id)

    for s_name, weights_val in station_weights.iteritems():
        alias_dns_name = elb_records[s_name]
        host_zone_id = alias_zone_id[s_name]

        # Client region not station region
        region_name = clients_regions[client]

        dns_record_name = '%s.%s' % (region_name, base_domain)
        identifier = identifiers[s_name]
        base_record = dict(name=dns_record_name,
                           record_type="A", weight=weights_val,
                           identifier=identifier)

        print_message('[Debug]: weight before sending change request %s'
                      % weights_val)

        new = rrs.add_change(action="UPSERT", **base_record)
        new.set_alias(host_zone_id, unicode(alias_dns_name), False)

    # with retry in case request rejected due to proc
    succeed = False
    while not succeed:
        try:
            rrs.commit()
            succeed = True
        except UnsuccessfulRequestError as e:

            retriable_err = 'The request was rejected because Route 53 ' \
                            'was still processing a prior request'
            if retriable_err in e.body:
                # pause for a while before send another request
                time.sleep(2.5)
                print_message('Previous request to Route 53 in progress.\n '
                              'Re-sending request...')

    print_message('Weights set for client %s: %s' % (client, weights))
    log_info(metric_record_file,
             'Weights set for client %s: %s' % (client, weights))

    queue.put((client, weights))
Example #13
0
def main():
    setup_logging()

    # line counter for reading csparql logs of each service station
    line_counters = dict()

    # bucket and ELB info for getting access log from S3
    elb_buckets_dict = dict()
    elb_regions = get_station_region()
    elb_buckets = get_elb_buckets_map()
    for station, region in elb_regions.iteritems():
        elb_region_str = '%s:%s' % (station, region)
        elb_buckets_dict.update({elb_region_str: unicode(elb_buckets[station])})

    stations = get_available_stations()
    for station in stations:
        line_counters.update({station: 0})

    log_base_dir = time.strftime("%Y_%m%d_%H%M")
    total_num_users = cfg.get_int('Default', 'total_num_users')

    # Get all available client region
    available_clients = get_available_clients()

    # counter = 0  # For testing
    while True:

        # Processing csparql log and ELB access log simultaneously by 2 threads

        # Start csparql log paring first since ELB access log has delay
        # emitting the log file

        # First we need to calculate how much time to wait before retrieving
        # csparql logs. i.e how long the actual measurement time is. Since S3
        # only emit log at 5, 10, 15 etc. minute of the hour, we can only
        # measure the time that measurement start until the last expected log
        # omission time which is not the actual time that log being obtained
        # since there is delay
        measurement_interval = calculate_waiting_time()

        # no need to wait until log actually being obtained
        measurement_interval -= 300

        # Measuring latency between each client region and service station
        latency_manager = ThreadingManager()
        latency_manager.start_task(
            target_func=measure_latency,
            name="latency_manager",
            para=[available_clients, stations, measurement_interval]
        )

        server_log_processor = ThreadingManager()
        server_log_processor.start_task(
            target_func=process_server_logs,
            name="server_log_processor",
            para=[log_base_dir, line_counters, total_num_users,
                  measurement_interval]
        )

        # Begin gathering info of the amount of data
        # transferred through each service station
        data_counting_task = ThreadingManager()
        data_counting_task.start_task(
            target_func=process_elb_access_log,
            name="elb_access_log_processor",
            para=[elb_buckets_dict]
        )

        latency_results_dict = latency_manager.collect_results().get()

        # collecting csparql log first since its processing will complete first
        # while elb data might has delay
        server_metrics_queue = server_log_processor.collect_results()
        (station_metric_list, total_request) = server_metrics_queue.get()
        line_counters = server_metrics_queue.get()

        print_message('')
        print_message('Service station logs processing finished\n')

        # collecting elb data now
        elb_data_queue = data_counting_task.collect_results()
        data_in, data_out = elb_data_queue.get()

        """ Preparing optimisation parameters """
        # Calculate The "average amount of data involved in each request" for
        # each service station and the "total number of requests"

        # These 2 dictionary stores the average data send and received per
        # request sent and received by *each service station* from *each
        # client*. The length of the dictionary should be equals to the
        # number of clients (regions)

        # <Client_name: <Station: data_in_per_req>>
        avg_data_in_per_reqs = dict()
        # <Client_name: <Station: data_out_per_req>>
        avg_data_out_per_reqs = dict()

        # initialise
        for cli_name in available_clients:
            avg_data_in_per_reqs.update({cli_name: {}})
            avg_data_out_per_reqs.update({cli_name: {}})

        # requests arrival rate and service rate of each service station
        arrival_rates = dict()
        service_rates = dict()

        for station_metric in station_metric_list:
            # getting metric
            station_name = station_metric.station_name
            arrival_rate = station_metric.arrival_rate
            service_rate = station_metric.service_rate

            requests = station_metric.total_requests

            print '\nTotal requests for station %s: %s' \
                  % (station_name, requests)

            log_info(metric_record_file,
                     '\nTotal requests for station %s: %s'
                     % (station_name, requests))

            # arrival_rate and service_rate
            arrival_rates.update({station_name: arrival_rate})
            service_rates.update({station_name: service_rate})

            response_time = \
                math.pow(service_rate, -1) / (1 - math.pow(service_rate, -1) *
                                              arrival_rate)
            print '[Debug] predicted current response time of service station ' \
                  '\'%s\': %s' % (station_name, response_time)

            log_info(metric_record_file,
                     '[Debug] predicted currentresponse time of service '
                     'station \'%s\': %s' % (station_name, response_time))

        # TODO: calculate total requests for each client
        # TODO: if c-sparql could record the source of each requests
        # TODO: things would be much more easier
        total_request_per_client = dict()
        data_in_sum = 0
        client_data_in_sum = dict()

        for a_client in available_clients:
            client_data_in_sum.update({a_client: 0})

        for station_name in stations:
            d_in = data_in.get(station_name)
            for c, sent_data in d_in.iteritems():
                # calculate total data sent by each client
                # and the total data sent by all client
                client_data_in_sum[c] += sent_data
                data_in_sum += sent_data

        # calculate total amount of requests sent by each client
        for ac in available_clients:
            t_request = math.ceil(
                total_request * (client_data_in_sum[ac] / data_in_sum))
            t_request = int(t_request)

            # build this so that it could be used by calculating out data
            total_request_per_client.update({ac: t_request})

        for station_name in stations:
            d_in = data_in.get(station_name)

            for c, sent_data in d_in.iteritems():
                t_request = total_request_per_client[c]
                # convert the amount of data to GB
                sent_data = float(sent_data / math.pow(1024, 3))
                avg_data_in_per_req = sent_data / t_request
                avg_data_in_per_reqs[c].update(
                    {station_name: avg_data_in_per_req})

        for station_name in stations:
            d_out = data_out.get(station_name)

            for c1, received_data in d_out.iteritems():
                t_request = total_request_per_client[c1]
                received_data = float(received_data / math.pow(1024, 3))
                avg_data_out_per_req = received_data / t_request
                avg_data_out_per_reqs[c1].update(
                    {station_name: avg_data_out_per_req})

        # For testing purpose
        info_str = \
            '\n[Debug] total_request: %s\n' \
            '[Debug] avg_data_in_per_reqs: %s\n' \
            '[Debug] avg_data_out_per_reqs: %s\n' \
            '[Debug] arrival_rates: %s\n' \
            '[Debug] service_rates: %s\n' \
            % (total_request,
               avg_data_in_per_reqs,
               avg_data_out_per_reqs,
               arrival_rates, service_rates)

        print info_str
        log_info(metric_record_file, info_str)

        # TODO: Get elb price from config
        # ELB pricing
        elb_prices = [0.008, 0.008]

        # optimise for each client...
        # do optimisation for each client in a new threads
        optimiser = ThreadingManager()
        for client in available_clients:
            optimiser.start_tasks(
                target_func=clients_optimisation,
                name="optimiser",
                para=[avg_data_in_per_reqs,
                      avg_data_out_per_reqs, client,
                      elb_prices, latency_results_dict,
                      measurement_interval, service_rates,
                      stations, total_request_per_client]
            )

        # synchronising threads
        optimiser.collect_results()

        # it takes up to 60 mins for Route 53 record changes to take effect
        time.sleep(60)
Example #14
0
def process_monitor_log(base_dir, observer_addr, line_counter, queue):
    """Parse the monitor log and calculate various metric for all servers in
    the service station monitored by the observer

    :param base_dir:        Base directory of monitor log
    :param observer_addr:   Service station name and observer ips pair
    :param line_counter:    Counter for continuously reading the single log file

    :param queue:           Queue that store metrics needed for optimisation
                            generated by current thread
    """

    station_name, observer_ip = observer_addr.split('=')

    # every thread store data in separate folder
    base_dir = base_dir + station_name + '/'
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)

    monitor_log_path = base_dir + 'observer_log.txt'

    # Flag indicates whether the logs of all servers contain useful data or not
    all_has_info = False

    while not all_has_info:

        print_message('')
        print_message('Synchronising log from observer at: %s' % observer_ip)

        module_path = os.path.dirname(Resources.__file__)
        private_key_file_path = module_path + '/ec2_private_key'

        sync_files(host_ip=observer_ip,
                   username='******',
                   host_file_path='~/results.txt',
                   pk_path=private_key_file_path,
                   dst_loc=monitor_log_path)

        parsed_log_dir, line_counter = parse_monitor_log(
            base_dir, int(line_counter))
        result_queue = generate_data(parsed_log_dir)

        # observer log has contain ResponseInfo if the queue if not empty

        # check all server has response info
        # TODO: the number of server can be read from config
        if result_queue.qsize() > 1:
            all_has_info = True
        else:
            time.sleep(5)

    data_list = []
    while not result_queue.empty():
        server_data = result_queue.get()
        data_list.append(server_data)

    total_requests, arrival_rate, service_rate_para_list \
        = calculate_metrics(data_list)

    result_dict = {
        'station_name': station_name,
        'total_requests': total_requests,
        'arrival_rate': arrival_rate,
        'service_rate_para_list': service_rate_para_list,
        'line_counter': line_counter
    }

    queue.put(result_dict)
Example #15
0
def calculate_metrics(data_list):
    """
    Function to calculate metrics needed for optimisation.

    What needs to be calculated:
    1. The total number of requests
    2. Requests arrival rate of the station (lambda)
    3. Prepares metrics for calculating Service Rate of the station (mu)

    :param data_list:   list of data for each server in current service station
    """

    # "vm, number of requests, cpu_core, data" dict list
    service_rate_para_list = []
    # list that stores the average arrival rate of each server
    avg_server_arrival_rate = []

    # collecting relative parameters
    for vm_data in data_list:
        vm_name = vm_data[0]
        data = vm_data[1]

        arrivals_list = []  # list that stores the average arrival
        # rate of "each sampling interval" for
        # a single server

        # sum the arrival rate for each request at the same sampling interval
        for i in xrange(len(data[0][0])):
            sampling_interval_arrivals = 0
            for j in xrange(len(data[2]) - 1):
                sampling_interval_arrivals += data[7][j][i]

            # store overall arrival rate of each sampling interval
            # in order to estimate service rate with CPU utilisation
            # which is also collected during each sampling interval
            arrivals_list.append(sampling_interval_arrivals)

        # Mean of arrivals of all sampling intervals is the arrival rate
        # the unit time of which is the sampling interval (in this case it is
        #  1 minutes
        avg_server_arrival_rate.append(numpy.mean(arrivals_list))

        # calculate service rate
        num_of_requests = calculate_total_requests(data)
        para_tuple = {
            'vm_name': vm_name,
            'num_of_requests': num_of_requests,
            'data': data
        }

        vm_cpu_spec = dict(cfg.items('VMSpec'))
        cpu_core = [
            vm_cpu_spec[spec] for spec in vm_cpu_spec.keys() if spec in vm_name
        ]

        if not cpu_core:
            print 'No specification configured for VM \'%s\'' % vm_name
            return

        print_message('[Debug] Number of CPUs of \'%s\': %s' %
                      (vm_name, cpu_core))

        para_tuple.update({'cpu_cores': cpu_core})

        service_rate_para_list.append(para_tuple)

    # calculate total number of requests
    total_requests = sum(
        [p['num_of_requests'] for p in service_rate_para_list])

    station_arrival_rate = sum(avg_server_arrival_rate)

    return total_requests, station_arrival_rate, service_rate_para_list
Example #16
0
def parse_monitor_log(input_files_dir, line_counter):
    if not input_files_dir:
        print 'Please supply the directory that contains observer results'
        return

    original_file_dir = input_files_dir
    parsed_file_dir = original_file_dir + "parsed_results/"

    if not os.path.exists(parsed_file_dir):
        os.makedirs(parsed_file_dir)

    # get time to store current reading
    current_time = time.strftime("%Y_%m%d_%H%M")

    files = [
        f for f in os.listdir(original_file_dir)
        if f.startswith('observer_log')
    ]

    # for i in xrange(len(files)):
    file_name = files[0]
    sub_folder_name = '%s/%s/' % (file_name[0:file_name.rfind('.')],
                                  current_time)

    metric_name = None

    # keep track of last metric id
    # in order to save reading of last metric when
    # encounter new metric
    last_metric_id = None

    metric_value = None
    timestamps = None
    vm_id = None

    file_path = os.path.abspath(original_file_dir + '/' + file_name)

    # debug
    # with open(file_path) as test_f:
    #     print_message('File length: %s' % (len(test_f.readlines())))
    #     print_message('Line counter %s\n' % line_counter)

    # counter for number of skip due to incorrect format of some
    # csparql log entries
    skip_counter = 0

    # counter to skip to the last
    with open(file_path) as f:

        # count = 0  # counter for skip to the line left over last time
        # while count < line_counter:
        # try:
        # f.next()
        # except StopIteration as e:
        # # print 'StopIteration\n' + str(e.message)
        # count += 1
        #     count += 1

        # current_line = 1  # default to 1 to enable the while loop

        count = 0  # counter for skip to the line left over last time
        if line_counter != 0:
            for current_line in f:
                if count == int(line_counter):
                    break
                count += 1

        # Skip to the line after ObserverReceivedTimestamp
        for current_line in f:
            line_counter += 1
            line_segments = current_line.split("\t")
            if len(line_segments) < 3:
                continue
            if 'ObserverReceivedTimesampt' in line_segments[1]:
                break

        # The csparql log sometimes contain entries with "MonitorDatum"
        # entries mixed up, hence we explicitly set the program to expect
        # metrics in the expected order and skip those mixed entries
        # since it is way to difficult to parse it for external program
        # like this one and it is actually the job of the csparql
        # observer to print metrics in a consistent format which on the
        # other hand is the efficient fix of this issue
        expected_properties = [
            'isAbout', 'isProducedBy', 'hasMonitoredMetric', 'hasValue',
            'hasTimeStamp'
        ]
        expected_prop_idx = 0  # counter to keep track of order of
        # expected properties encountered

        for current_line in f:
            line_counter += 1

            # parse current line of the file
            # current_line = f.readline()
            # line_counter += 1

            # [metric_id, metric_prop, value, dump]
            # = current_line.split("\t")
            line_segments = current_line.split("\t")
            if len(line_segments) < 3:
                continue

            metric_id = line_segments[0]
            metric_prop = line_segments[1]
            value = line_segments[2]

            # if the line is not about metric continue to read next line
            if 'MonitoringDatum' not in metric_id:
                continue

            # For the first time
            if not last_metric_id:
                last_metric_id = metric_id

            # Check the metric property is currently expected and the
            # metric id is the same one as the one that we'd been
            # reading other expected properties for

            # If the property is not expected skip to next bundle of metric
            # i.e next 'isAbout'. OR if it is expected but the metric ID
            # is not the same as other metric property we've been reading
            #  for the current metric ID we skip as well. Except
            # that when expecting 'isAbout' i.e the first property,
            # we don't check for metric ID, since it could be a new metric
            if metric_prop != expected_properties[expected_prop_idx] or \
               (expected_prop_idx != 0 and metric_id != last_metric_id):

                # as long as it enters here it is one skip already
                skip_counter += 1

                # if an 'isAbout' encountered we assume the following
                # lines could possible be in order
                if metric_prop == 'isAbout':
                    # start fresh
                    expected_prop_idx = 0
                    last_metric_id = metric_id
                else:
                    # Skip to the next 'isAbout' line
                    expected_prop_idx = 0  # expecting the 'isAbout'
                    last_metric_id = None
                    continue

            # if the metric property is expected then expect the next one
            if expected_prop_idx == len(expected_properties) - 1:
                expected_prop_idx = 0
            else:
                expected_prop_idx += 1

            # save reading of last metric since following reading
            # will be of a new metric id
            if last_metric_id != metric_id:
                # check whether any of the 4 properties :
                # vm_id, metric name, metric value, timestamp
                # is none, which means that the very end of the file
                # has one record that is only partially written, hence
                # ignore this record

                # This "if" shouldn't be here since I skipped those records that
                # are mixed up... but for some reason values below could
                # still be None... It is much more convenient to fix csparql log
                # rather than thinking out logic to patching up for this
                if vm_id and metric_name and metric_value and timestamps:

                    result_dir_path = parsed_file_dir + sub_folder_name + vm_id

                    if not os.path.exists(result_dir_path):
                        os.makedirs(result_dir_path)

                    result_file_path = \
                        result_dir_path + '/' + metric_name + '.txt'

                    with open(result_file_path, 'a') as parsed_f:
                        parsed_f.write(metric_value + '\n')
                        parsed_f.write(timestamps + '\n')

                # current metric become the last after finish
                # processing its value
                last_metric_id = metric_id

            if metric_prop == "isAbout":
                vm_id = value.replace("Compute#", "")
            elif metric_prop == "hasMonitoredMetric":
                metric_name = value.replace("QoSMetric#", "")
            elif metric_prop == "hasValue":
                metric_value = value
            elif metric_prop == "hasTimeStamp":
                timestamps = value

                # record the position left over
                # line_counter = f.tell()

    print_message('[Debug] Skipped: %s' % skip_counter)

    return parsed_file_dir + sub_folder_name, line_counter
Example #17
0
def clients_optimisation(avg_data_in_per_reqs, avg_data_out_per_reqs, client,
                         elb_prices, latency_results_dict,
                         measurement_interval, service_rates, stations,
                         total_request_per_client, queue):
    # bandwidths for each client
    in_bandwidths = []
    out_bandwidths = []
    # average data per requests
    avg_in_data = []
    avg_out_data = []
    service_rates_list = []
    station_latency = []

    in_band_dict, out_band_dict = get_stations_bandwidth(client)
    # Budget e.g 100,000 / 30 / 24 / 60 / interval
    # Not total budget. Abstract Budget for the interval
    budget = 1000
    client_avg_data_in_per_reqs = avg_data_in_per_reqs[client]
    client_avg_data_out_per_reqs = avg_data_out_per_reqs[client]
    request_sum = total_request_per_client[client]
    # get latency for each from this client to each station
    station_latency_dict = dict()
    for key_str, latency_val in latency_results_dict.iteritems():
        src_host, dst_host = key_str.split(',')
        if client == src_host:
            station_latency_dict.update({dst_host: latency_val})
    for station in stations:
        # convert from Mb/s to GB/s
        in_band = float(in_band_dict[station]) / 8 / 1024
        out_band = float(out_band_dict[station]) / 8 / 1024
        in_bandwidths.append(in_band)
        out_bandwidths.append(out_band)

        avg_in_data.append(client_avg_data_in_per_reqs[station])
        avg_out_data.append(client_avg_data_out_per_reqs[station])
        service_rates_list.append(service_rates[station])
        station_latency.append(station_latency_dict[station])

    weights = optimisation(num_of_stations=2,
                           total_requests=request_sum,
                           elb_prices=elb_prices,
                           avg_data_in_per_reqs=avg_in_data,
                           avg_data_out_per_reqs=avg_out_data,
                           in_bandwidths=in_bandwidths,
                           out_bandwidths=out_bandwidths,
                           budget=budget,
                           service_rates=service_rates_list,
                           measurement_interval=measurement_interval,
                           station_latency=station_latency)

    print_message('Weights calculated for client %s: %s' % (client, weights))

    # weights are fraction initially but Route53 only accept integer and
    # must be an integer between 0 and 255
    # so we convert the ratio of weights to ratio of integers
    # this scaling should match the searching step of in optimisation
    # Weight
    weights = [int(val * 255) for val in weights]

    route53_conn = Route53Connection()
    zone = route53_conn.get_zone(base_domain)
    elb_records = station_metadata_map['StationELBDNS']

    alias_zone_id = {
        'xueshi-station-1': 'Z32O12XQLNTSW2',
        'xueshi-station-2': 'Z35SXDOTRQ7X7K'
    }

    # need to be mapped to IPs
    clients_regions = {
        'ap_south_1_client_1': 'ap-southeast-1',
        'us_east_1_client_1': 'us-east-1',
        'us_west_1_client_1': 'us-west-1'
    }

    identifiers = dict(cfg.items('StationWRRAliasIdentifiers'))

    stations = get_available_stations()
    # Since we put optimisation parameter by the order available
    # stations the output weights should be in the same order
    station_weights = {}
    for idx in xrange(len(stations)):
        station_weights.update({stations[idx]: int(round(weights[idx]))})

    rrs = ResourceRecordSets(route53_conn, zone.id)

    for s_name, weights_val in station_weights.iteritems():
        alias_dns_name = elb_records[s_name]
        host_zone_id = alias_zone_id[s_name]

        # Client region not station region
        region_name = clients_regions[client]

        dns_record_name = '%s.%s' % (region_name, base_domain)
        identifier = identifiers[s_name]
        base_record = dict(name=dns_record_name,
                           record_type="A",
                           weight=weights_val,
                           identifier=identifier)

        print_message('[Debug]: weight before sending change request %s' %
                      weights_val)

        new = rrs.add_change(action="UPSERT", **base_record)
        new.set_alias(host_zone_id, unicode(alias_dns_name), False)

    # with retry in case request rejected due to proc
    succeed = False
    while not succeed:
        try:
            rrs.commit()
            succeed = True
        except UnsuccessfulRequestError as e:

            retriable_err = 'The request was rejected because Route 53 ' \
                            'was still processing a prior request'
            if retriable_err in e.body:
                # pause for a while before send another request
                time.sleep(2.5)
                print_message('Previous request to Route 53 in progress.\n '
                              'Re-sending request...')

    print_message('Weights set for client %s: %s' % (client, weights))
    log_info(metric_record_file,
             'Weights set for client %s: %s' % (client, weights))

    queue.put((client, weights))
def calculate_metrics(data_list):
    """
    Function to calculate metrics needed for optimisation.

    What needs to be calculated:
    1. The total number of requests
    2. Requests arrival rate of the station (lambda)
    3. Prepares metrics for calculating Service Rate of the station (mu)

    :param data_list:   list of data for each server in current service station
    """

    # "vm, number of requests, cpu_core, data" dict list
    service_rate_para_list = []
    # list that stores the average arrival rate of each server
    avg_server_arrival_rate = []

    # collecting relative parameters
    for vm_data in data_list:
        vm_name = vm_data[0]
        data = vm_data[1]

        arrivals_list = []  # list that stores the average arrival
        # rate of "each sampling interval" for
        # a single server

        # sum the arrival rate for each request at the same sampling interval
        for i in xrange(len(data[0][0])):
            sampling_interval_arrivals = 0
            for j in xrange(len(data[2]) - 1):
                sampling_interval_arrivals += data[7][j][i]

            # store overall arrival rate of each sampling interval
            # in order to estimate service rate with CPU utilisation
            # which is also collected during each sampling interval
            arrivals_list.append(sampling_interval_arrivals)

        # Mean of arrivals of all sampling intervals is the arrival rate
        # the unit time of which is the sampling interval (in this case it is
        #  1 minutes
        avg_server_arrival_rate.append(numpy.mean(arrivals_list))

        # calculate service rate
        num_of_requests = calculate_total_requests(data)
        para_tuple = {'vm_name': vm_name, 'num_of_requests': num_of_requests,
                      'data': data}

        vm_cpu_spec = dict(cfg.items('VMSpec'))
        cpu_core = [vm_cpu_spec[spec] for spec in vm_cpu_spec.keys()
                    if spec in vm_name]

        if not cpu_core:
            print 'No specification configured for VM \'%s\'' % vm_name
            return

        print_message('[Debug] Number of CPUs of \'%s\': %s'
                      % (vm_name, cpu_core))

        para_tuple.update({'cpu_cores': cpu_core})

        service_rate_para_list.append(para_tuple)

    # calculate total number of requests
    total_requests = sum([p['num_of_requests'] for p in service_rate_para_list])

    station_arrival_rate = sum(avg_server_arrival_rate)

    return total_requests, station_arrival_rate, service_rate_para_list
Example #19
0
def format_data(data, period, category_list, cpu_file):
    metric_list = [
        'addtocartbulk', 'checkLogin', 'checkoutoptions', 'login', 'logout',
        'main', 'orderhistory', 'quickadd'
    ]
    delete = []

    for i in xrange(len(data[2]) - 1):
        if not data[2][i]:
            delete.append(i)

    # delete data
    data, category_list, delete = remove_data(data, category_list, delete)

    # find out those metrics that are not in the metric_list
    for i in xrange(len(data[2]) - 1):
        flag = 0
        for j in range(8):
            if category_list[i] == metric_list[j]:
                flag = 1

        if flag == 0:
            delete.append(i)

    # delete data
    data, category_list, delete = remove_data(data, category_list, delete)

    start_time = min(data[2][0])
    max_time = max(data[2][0])

    for i in xrange(1, len(data[2]) - 1):
        if data[2][i] and start_time > min(data[2][i]):
            start_time = min(data[2][i])
        if data[2][i] and max_time > max(data[2][i]):
            max_time = max(data[2][i])

    samples = int(math.floor(((max_time - start_time) / period)))

    print_message('Number of samples (interval:%s) : %s' % (period, samples))

    for i in xrange(len(data[2]) - 1):
        end_time = start_time

        departure = [a + r * 1000 for a, r in zip(data[2][i], data[3][i])]

        for k in xrange(samples):

            index = [
                v[0] for v in enumerate(departure)
                if end_time <= v[1] < (end_time + period)
            ]

            arr_index = [
                v[0] for v in enumerate(data[2][i])
                if end_time <= v[1] < (end_time + period)
            ]

            response_times = [0]
            if index:
                response_times = [data[3][i][idx] for idx in index]
            data[4][i].append(scipy.mean(response_times))
            data[5][i].append(len(index) / period * 1000)
            data[6][i].append(len(index))
            data[7][i].append(len(arr_index))

            data[0][i].append(end_time + period)
            end_time += period

    # Number of samples for each request might not be equal
    max_num_requests = 0
    max_requests_idx = 0
    for i in xrange(len(data[2]) - 1):
        if max_num_requests < len(data[2][i]):
            max_num_requests = len(data[2][i])
            max_requests_idx = i

    for i in xrange(len(data[2]) - 1):
        data[0][i] = data[0][max_requests_idx]
        if len(data[4][i]) < len(data[0][i]):
            data[4][i].append([0] * (len(data[0][i]) - len(data[4][i])))
            data[5][i].append([0] * (len(data[0][i]) - len(data[5][i])))
            data[6][i].append([0] * (len(data[0][i]) - len(data[6][i])))

    data[0][len(data[0]) - 1] = data[0][0]

    with open(cpu_file) as f:
        count = 0
        cpu = []
        cpu_time = []
        flag = 0
        line = f.readline()
        while line:
            cpu_num = float(line)

            if count % 2 == 0:
                if cpu_num > 1 or math.isnan(cpu_num):
                    flag = 1
                else:
                    cpu.append(cpu_num)
            else:
                if flag:
                    flag = 0
                else:
                    cpu_time.append(cpu_num)

            count += 1
            line = f.readline()

    cpu_time = [e - 3600 * 1000 for e in cpu_time]
    indices = [i[0] for i in sorted(enumerate(cpu_time), key=lambda x: x[1])]
    cpu_time = [cpu_time[i] for i in indices]
    cpu = [cpu[i] for i in indices]

    for i in xrange(len(data[0][0])):
        indices_found = [
            v[0] for v in enumerate(cpu_time)
            if data[0][0][i] <= v[1] < data[0][0][i] + period
        ]

        if indices_found:
            mean = scipy.mean([cpu[i] for i in indices_found])
            data[1][len(data[1]) - 1].append(mean)

    return data
def process_monitor_log(base_dir, observer_addr, line_counter, queue):
    """Parse the monitor log and calculate various metric for all servers in
    the service station monitored by the observer

    :param base_dir:        Base directory of monitor log
    :param observer_addr:   Service station name and observer ips pair
    :param line_counter:    Counter for continuously reading the single log file

    :param queue:           Queue that store metrics needed for optimisation
                            generated by current thread
    """

    station_name, observer_ip = observer_addr.split('=')

    # every thread store data in separate folder
    base_dir = base_dir + station_name + '/'
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)

    monitor_log_path = base_dir + 'observer_log.txt'

    # Flag indicates whether the logs of all servers contain useful data or not
    all_has_info = False

    while not all_has_info:

        print_message('')
        print_message('Synchronising log from observer at: %s' % observer_ip)

        module_path = os.path.dirname(Resources.__file__)
        private_key_file_path = module_path + '/ec2_private_key'

        sync_files(host_ip=observer_ip, username='******',
                   host_file_path='~/results.txt',
                   pk_path=private_key_file_path, dst_loc=monitor_log_path)

        parsed_log_dir, line_counter = parse_monitor_log(base_dir,
                                                         int(line_counter))
        result_queue = generate_data(parsed_log_dir)

        # observer log has contain ResponseInfo if the queue if not empty

        # check all server has response info
        # TODO: the number of server can be read from config
        if result_queue.qsize() > 1:
            all_has_info = True
        else:
            time.sleep(5)

    data_list = []
    while not result_queue.empty():
        server_data = result_queue.get()
        data_list.append(server_data)

    total_requests, arrival_rate, service_rate_para_list \
        = calculate_metrics(data_list)

    result_dict = {'station_name': station_name,
                   'total_requests': total_requests,
                   'arrival_rate': arrival_rate,
                   'service_rate_para_list': service_rate_para_list,
                   'line_counter': line_counter}

    queue.put(result_dict)
def process_server_logs(base_dir, line_counters, total_users, waiting_time,
                        queue):
    """

    :param base_dir:        Base directory of monitor log
    :param line_counters:   Counter for continuously reading the single log file
    :param total_users:     The total number of users simulated
    :param queue:           Queue to store results when using in thread
    :param waiting_time:    The measurement time
    :return:
    """

    print_message('')
    print_message('Waiting for the next batch of service station monitoring '
                  'logs (%s seconds)...\n' % waiting_time)
    time.sleep(waiting_time)

    module_path = os.path.dirname(client_server.__file__)
    base_dir = module_path + '/logs/' + base_dir + '/'

    # retrieve service station and observer mapping
    station_observers = get_station_csparql()
    # retrieve and process the log of each service station with a new threads
    csparql_reader = ThreadingManager()

    # python strptime thread safety bug. Has to call strptime once before
    # creating thread. Details can be found on:
    # http://bugs.python.org/issue11108
    time.strptime("30 Nov 00", "%d %b %y")

    for station_name, observer_ip in station_observers.iteritems():
        observer_addr = '%s=%s' % (station_name, observer_ip)

        csparql_reader.start_tasks(
            target_func=process_monitor_log,
            name='csparql_reader',
            para=[base_dir, observer_addr, line_counters[station_name]])

    # wait for all threads to finish and collect their results
    result_queue = csparql_reader.collect_results()

    total_requests = 0

    # Now collect metric data from all service station and then calculate the
    # necessary metric for the optimisation since these metrics are calculated
    # for the entire online services
    service_station_metric_list = []

    while not result_queue.empty():
        # get metrics returned
        result_dict = result_queue.get()

        station_name = result_dict['station_name']
        station_total_requests = result_dict['total_requests']
        arrival_rate = result_dict['arrival_rate']
        service_rate_para_list = result_dict['service_rate_para_list']
        line_counter = result_dict['line_counter']

        total_requests += station_total_requests

        service_station_metric = ServiceStationMetric(station_name,
                                                      station_total_requests,
                                                      arrival_rate / 60,
                                                      service_rate_para_list,
                                                      service_rate=0)

        service_station_metric_list.append(service_station_metric)

        # update the current line counter
        line_counters[station_name] = line_counter

    # now calculate service rate for each station
    for station_metric in service_station_metric_list:

        # parameter needed for calculating service rate for servers from
        # one service station
        mu_para_list = station_metric.service_rate_para_list

        # Calculating service rate of each server in one station
        service_time_list = []  # list to store service time of each server

        # for service rate calculation parameters for each server ...
        for s_para in mu_para_list:
            # number of users for this vm
            num_of_requests = s_para['num_of_requests']
            num_of_user = int(
                math.ceil(total_users * (num_of_requests / total_requests)))
            num_of_cores = s_para['cpu_cores']
            data = s_para['data']

            mean_service_time = calculate_service_rate(num_of_user,
                                                       num_of_cores, data)
            service_time_list.append(mean_service_time)

            print_message(
                'Mean service time of VM \'%s\' at station \'%s\': %s' %
                (s_para['vm_name'], station_metric.station_name,
                 str(mean_service_time)))

        # The overall service rate is calculated by the number of requests
        # completed by all servers within the time that the slowest server
        # takes to complete a single request
        max_time = max(service_time_list)

        comp_req_sum = 0
        for service_time in service_time_list:
            comp_req_sum += max_time / service_time

        overall_service_rate = comp_req_sum / max_time

        station_metric.service_rate = overall_service_rate

    # store result of this thread in result the queue
    queue.put((service_station_metric_list, total_requests))
    queue.put(line_counters)
def parse_monitor_log(input_files_dir, line_counter):
    if not input_files_dir:
        print 'Please supply the directory that contains observer results'
        return

    original_file_dir = input_files_dir
    parsed_file_dir = original_file_dir + "parsed_results/"

    if not os.path.exists(parsed_file_dir):
        os.makedirs(parsed_file_dir)

    # get time to store current reading
    current_time = time.strftime("%Y_%m%d_%H%M")

    files = [f for f in os.listdir(original_file_dir)
             if f.startswith('observer_log')]

    # for i in xrange(len(files)):
    file_name = files[0]
    sub_folder_name = '%s/%s/' % (file_name[0: file_name.rfind('.')],
                                  current_time)

    metric_name = None

    # keep track of last metric id
    # in order to save reading of last metric when
    # encounter new metric
    last_metric_id = None

    metric_value = None
    timestamps = None
    vm_id = None

    file_path = os.path.abspath(original_file_dir + '/' + file_name)

    # debug
    # with open(file_path) as test_f:
    #     print_message('File length: %s' % (len(test_f.readlines())))
    #     print_message('Line counter %s\n' % line_counter)

    # counter for number of skip due to incorrect format of some
    # csparql log entries
    skip_counter = 0

    # counter to skip to the last
    with open(file_path) as f:

        # count = 0  # counter for skip to the line left over last time
        # while count < line_counter:
        # try:
        # f.next()
        # except StopIteration as e:
        # # print 'StopIteration\n' + str(e.message)
        # count += 1
        #     count += 1

        # current_line = 1  # default to 1 to enable the while loop

        count = 0  # counter for skip to the line left over last time
        if line_counter != 0:
            for current_line in f:
                if count == int(line_counter):
                    break
                count += 1

        # Skip to the line after ObserverReceivedTimestamp
        for current_line in f:
            line_counter += 1
            line_segments = current_line.split("\t")
            if len(line_segments) < 3:
                continue
            if 'ObserverReceivedTimesampt' in line_segments[1]:
                break

        # The csparql log sometimes contain entries with "MonitorDatum"
        # entries mixed up, hence we explicitly set the program to expect
        # metrics in the expected order and skip those mixed entries
        # since it is way to difficult to parse it for external program
        # like this one and it is actually the job of the csparql
        # observer to print metrics in a consistent format which on the
        # other hand is the efficient fix of this issue
        expected_properties = ['isAbout', 'isProducedBy',
                               'hasMonitoredMetric', 'hasValue',
                               'hasTimeStamp']
        expected_prop_idx = 0  # counter to keep track of order of
        # expected properties encountered

        for current_line in f:
            line_counter += 1

            # parse current line of the file
            # current_line = f.readline()
            # line_counter += 1

            # [metric_id, metric_prop, value, dump]
            # = current_line.split("\t")
            line_segments = current_line.split("\t")
            if len(line_segments) < 3:
                continue

            metric_id = line_segments[0]
            metric_prop = line_segments[1]
            value = line_segments[2]

            # if the line is not about metric continue to read next line
            if 'MonitoringDatum' not in metric_id:
                continue

            # For the first time
            if not last_metric_id:
                last_metric_id = metric_id

            # Check the metric property is currently expected and the
            # metric id is the same one as the one that we'd been
            # reading other expected properties for

            # If the property is not expected skip to next bundle of metric
            # i.e next 'isAbout'. OR if it is expected but the metric ID
            # is not the same as other metric property we've been reading
            #  for the current metric ID we skip as well. Except
            # that when expecting 'isAbout' i.e the first property,
            # we don't check for metric ID, since it could be a new metric
            if metric_prop != expected_properties[expected_prop_idx] or \
               (expected_prop_idx != 0 and metric_id != last_metric_id):

                # as long as it enters here it is one skip already
                skip_counter += 1

                # if an 'isAbout' encountered we assume the following
                # lines could possible be in order
                if metric_prop == 'isAbout':
                    # start fresh
                    expected_prop_idx = 0
                    last_metric_id = metric_id
                else:
                    # Skip to the next 'isAbout' line
                    expected_prop_idx = 0  # expecting the 'isAbout'
                    last_metric_id = None
                    continue

            # if the metric property is expected then expect the next one
            if expected_prop_idx == len(expected_properties) - 1:
                expected_prop_idx = 0
            else:
                expected_prop_idx += 1

            # save reading of last metric since following reading
            # will be of a new metric id
            if last_metric_id != metric_id:
                # check whether any of the 4 properties :
                # vm_id, metric name, metric value, timestamp
                # is none, which means that the very end of the file
                # has one record that is only partially written, hence
                # ignore this record

                # This "if" shouldn't be here since I skipped those records that
                # are mixed up... but for some reason values below could
                # still be None... It is much more convenient to fix csparql log
                # rather than thinking out logic to patching up for this
                if vm_id and metric_name and metric_value and timestamps:

                    result_dir_path = parsed_file_dir + sub_folder_name + vm_id

                    if not os.path.exists(result_dir_path):
                        os.makedirs(result_dir_path)

                    result_file_path = \
                        result_dir_path + '/' + metric_name + '.txt'

                    with open(result_file_path, 'a') as parsed_f:
                        parsed_f.write(metric_value + '\n')
                        parsed_f.write(timestamps + '\n')

                # current metric become the last after finish
                # processing its value
                last_metric_id = metric_id

            if metric_prop == "isAbout":
                vm_id = value.replace("Compute#", "")
            elif metric_prop == "hasMonitoredMetric":
                metric_name = value.replace("QoSMetric#", "")
            elif metric_prop == "hasValue":
                metric_value = value
            elif metric_prop == "hasTimeStamp":
                timestamps = value

                # record the position left over
                # line_counter = f.tell()

    print_message('[Debug] Skipped: %s' % skip_counter)

    return parsed_file_dir + sub_folder_name, line_counter