Example #1
0
 def initialize(self, arg="all"):
     """Clear all instance data and user data on the memory"""
     self.search = FGSearch()
     self.chart = FGCharts()
     self.instances = FGInstances()
     self.instances.db.conf()
     self.instances.db.connect()
Example #2
0
class FGMetricShell(Cmd):

    instances = None
    search = None

    def __init__(self):
        Cmd.__init__(self)
        self.prompt = "fg-metric> "
        self.intro = "Welcome to FutureGrid Cloud Metrics!"

    def initialize(self, arg="all"):
        """Clear all instance data and user data on the memory"""
        self.search = FGSearch()
        self.chart = FGCharts()
        self.instances = FGInstances()
        self.instances.db.conf()
        self.instances.db.connect()

    def init_objects(self):
        self.search.__init__()
        self.chart.__init__()

    def load_db(self, option=None):
        """Read the statistical data from database (MySQL, etc)"""

        print "\rloading database ..."
        # Get data from the database
        self.instances.read_from_db()
        # Get also userinfo data from the database
        self.instances.read_userinfo_from_db()
        self.instances.read_projectinfo_from_db()
        print "\r... loaded"

    def show_dbinfo(self, param=None):
        pprint(vars(self.instances.db), indent=2)

    def show_filter_setting(self, param=None):
        pprint(vars(self.search.get_filter()))
        # res = vars(self.search.get_filter()).copy()
        # del res["selected"]
        # pprint(res)

    def measure(self):

        total_counts = self.instances.count()
        print "Calculating metrics in " + str(total_counts) + " records...\n"

        cnt = cnt2 = cnt3 = 0
        for i in range(0, total_counts):
            try:
                instance = self.instances.get_data(
                    i, self.search._is_userinfo_needed())[0]
                cnt += 1
                if not self.search._is_in_date(instance):
                    continue
                cnt2 += 1
                if not self.search._is_filtered(instance):
                    continue
                cnt3 += 1
                res = self.search.collect(instance)

            except:
                # print sys.exc_info()
                pass  # raise

        print self.search.get_metric()
        # print cnt, cnt2, cnt3

        '''
        I am where to create a dict/list for data of charts.
        what I need to do is
        1) choose which column that I need to collect. This should be done by the 'metric' filter
        2) get value from the instance
        3) create data structure for the result
        4) if it has a groupby(s), create multi-dimentional dict/list to save the value in a depth
           e.g. res[groupby1][groupby2] =
           e.g. res = { groupby1 : { groupby2: val1, ... } }

        5) fill missing date? for chart format? this should be done by in a chart module
        6) convert the result data structure to chart formatted data
        '''

    def set_configfile(self, filename):
        self.instances.db.set_conf(filename)
        self.instances.db.update_conf()

        print filename + " loaded."
        print "refresh db may required."

    def create_csvfile(self, data, dirname="./", filename="default.csv"):

        try:
            writer = csv.writer(open(dirname + filename, 'wb'), delimiter=",",
                                quotechar="\"", quoting=csv.QUOTE_NONNUMERIC)  # QUOTE_MINIMAL)
            for row in data:
                writer.writerow(row)

            msg = filename + " is created"
        except:
            msg = filename + " is not created"
            print sys.exc_info()
            pass

        print msg

    @options([
        make_option('-f', '--start_date', type="string",
                    help="start time of the interval (type. YYYY-MM-DDThh:mm:ss)"),
        make_option('-t', '--end_date', type="string",
                    help="end time of the interval (type. YYYY-MM-DDThh:mm:ss)"),
        make_option('-M', '--month', type="int",
                    help="month to analyze (type. MM)"),
        make_option('-Y', '--year', type="int",
                    help="year to analyze (type. YYYY)"),
        make_option('-m', '--metric', dest="metric", type="string",
                    help="item name to measure (e.g. runtime, count)"),
        make_option('-P', '--period', dest="period",
                    type="string", help="search period (monthly, daily)")
    ])
    def do_analyze(self, line, opts=None):
        """Run analysis for cloud usage data.

        Typically, set platform ***, set nodename ***, set date ***  *** are required prior to this command
        Once analysis is finised, 'chart' command is usually following to generate results in a chart html file.

            Args:
                line(str): input line
            Returns:
                n/a
            Raises:
                n/a

        """
        try:
            self.set_parameters(opts)
            self.search.check_vailidity()
            self.search.init_stats()
            self.show_filter_setting()
            self.measure()
        except ValueError as e:
            print e
        except:
            print sys.exc_info()

    @options([
        make_option('-o', '--output', type="string",
                    dest="filepath", help="filepath to export a csv file")
    ])
    def do_csv(self, line, opts=None):
        """Export statistics as a csv file"""
        try:
            data = self.search.get_csv()
            if not opts.filepath:
                filedir = "./"
                filename = self.search.get_filename() + "." + "csv"
            else:
                filedir, filename = split(opts.filepath)

            self.create_csvfile(data, filedir, filename)
        except:
            print "no dataset is available to export."
            print "please perform 'analyze' first to export data"
            print

    @options([
        make_option('-o', '--directory', type="string",
                    dest="DIR", help="change to directory DIR"),
        make_option('-t', '--type', type="string", dest="ctype",
                    default="column", help="chart e.g. bar, line, column, pie, and motion"),
        make_option(
            '-a', '--api', type="string", dest="api", default="highcharts",
        help="chart api e.g. highchart, google, jquery sparkline")
    ])
    def do_chart(self, line, opts=None):
        ''' Generate html typed chart file based on the statistics from analyze command '''
        self.chart.set_chart_api(opts.api)
        self.chart.set_type(opts.ctype)
        self.chart.set_output_path(opts.DIR)
        self.chart.set_filename(
            self.search.get_filename() + "." + self.chart.output_type)

        for key, data in self.search.get_metric().iteritems():
            # self.chart.set_xaxis(key) TBD
            if key == "All":
                self.chart.set_data_beta(
                    data, self.search.metric, self.search.period, self.search.groupby)
            else:
                new_key = self.search.adjust_stats_keys(key)
                self.chart.set_data_beta2(new_key, data, ''.join(
                    self.search.metric), self.search.period or "Total")
            # self.chart.set_series_beta(data)

        self.chart.set_series(self.search.get_series())
        self.chart.set_title_beta(', '.join(
            self.search.metric), self.search.period, self.search.groupby)
        self.chart.set_subtitle("source: " + str(
            self.search.get_platform_names()) + " on " + str(self.search.get_node_names()))
        self.chart.set_yaxis(self.search.timetype or "")
        self.chart.display()

    @options([
        make_option('-u', '--user', type="string",
                    help="Show only image numbers owned by the userid specified."),
        make_option('-d', '--detail', action="store_true",
                    default=False, help="Show details about images"),
        make_option('-s', '--summary', action="store_true",
                    default=False, help="Show summary values about images")
    ])
    def count_images(self, arg, opts=None):
        """Count bucket images per user (development level)

            It is virtual machine image counts grouped by users or accounts based on euca2ools.
            It shows that which user or account currently owns how many virtual machine images on the system.
            This metric is based on the euca2ool command .euca-describe-images. that a eucalyptus user can see
            a list of machine images.
        """
        bucket_dict = {}
        details = {}
        detail = {}
        max_user = ["", 0]
        bin_path = subprocess.check_output(["which", "euca-describe-images"])
        eucabin = bin_path.split("\n")
        output = subprocess.check_output(["python2.7", eucabin[0]])
        # Split the output by end-of-line chars.
        lines = output.split("\n")
        chart_labels = []

        # Loop through lines. The image path is the third item.
        # Split by "/" to get bucket and key.
        for line in lines:
            if line:
                try:
                    values = line.split()
                    bucket, key = values[2].split("/")
                    # replace bucket with accountId - hrlee
                    # No reason to gather bucket name. Instead, accountid would
                    # be meaningful.
                    bucket = values[3] + "(" + values[3] + ")"
                    count = bucket_dict.get(bucket, 0)
                    detail[count] = line
                    details[bucket] = detail
                    bucket_dict[bucket] = count + 1
                    if bucket_dict[bucket] > max_user[1]:
                        max_user[0] = bucket
                        max_user[1] = bucket_dict[bucket]
                except:
                    continue

        for key, value in bucket_dict.items():
            if opts.user:
                if opts.user != key:
                    continue
            print("\t".join([key, str(value)]))
            chart_labels.append(key + ":" + str(value))

        # show detail information of image owned by a specific user from -u,
        # --user option
        if opts.user and opts.detail:
            for key, value in details[opts.user].items():
                print (value)

        # Show summary of images. i.e. number of total images, number of users,
        # average numbers of images, and maximum numbers of images.
        if opts.summary:
            total_image_count = str(len(
                lines) - 1)  # Except (-1) last \n line count
            total_user_count = str(len(bucket_dict))
            print ""
            print "= Summary ="
            print "Total image counts:\t" + total_image_count
            print "Total user counts:\t" + total_user_count
            print "Average image counts per user:\t" + str(float(total_image_count) / float(total_user_count))
            print "Maximum image counts and userid:\t" + max_user[0] + " has " + str(max_user[1])
            print "=========="

    def do_refresh(self, line, opts=None):
        """Refresh component (same as 'load')

        Usage example:
        fg-metric] refresh db"""
        self.do_load(line, opts)

    def do_load(self, line, opts=None):
        """Load component

        Usage example:
        fg-metric] load db"""
        self.call_attr(line, "load_")

    def do_showconf(self, line, opts=None):
        """Display current settings

        Usage example:
        fg-metric] showconf dbinfo
        fg-metric] showconf filter_setting"""
        self.call_attr(line, "show_")

    def do_show(self, line, opts=None):
        '''show search options set by a user'''
        self.call_attr(line, "show_", "self.search")

    def do_get(self, line, opts=None):
        """Show current settings

        Usage example:
        fg-metric] get filter"""
        self.call_attr(line, "get_", "self.search")

    def do_setconf(self, line, opts=None):
        """Set a configuration"""
        self.call_attr(line, "set_")

    def do_set(self, line, opts=None):
        """Set a function with parameter(s)"""
        self.call_attr(line, "set_", "self.search")

    def do_count(self, line, opts=None):
        """Set a function with parameter(s)"""
        self.call_attr(line, "count_")

    def call_attr(self, line, prefix="_", obj_name="self"):

        try:
            args = line.split()
            cmd = args[0]

            if len(args) == 1:
                params = ""
            elif len(args) == 2:
                params = args[1]
            else:
                params = args[1:]
        except:
            cmd = None
            params = ""

        function = prefix + str(cmd)

        try:
            func = getattr(eval(obj_name), function)
            if callable(func):
                func(params)
                print function + " is called .(" + "".join(params) + ")"
        except:
            print sys.exc_info()
            pass

    def set_parameters(self, opts):
        """Set search options from opt parse variables

        What variables are set:
            a. dates
            b. metric
            c. period

        Setting prioirity
        1. start_date, end_date
        2. year, month
        3. set date $from $to (set by prior to analyze command)

        For example,
        if opts.start_date and opts.end_date are given, opts.year and opts.month will be ignored.

            Args:
                opts.start_date
                opts.end_date
                opts.year
                opts.month
                opts.period
                opts.metric

        """

        if opts.year or opts.month:
            now = datetime.now()
            from_date = datetime(opts.year or now.year, opts.month or 1, 1)
            to_date = datetime(opts.year or now.year, opts.month or 12, monthrange(
                opts.year or now.year, opts.month or 12)[1])
            self.search.set_date([from_date, to_date])
        if opts.start_date and opts.end_date:
            self.search.set_date([opts.start_date, opts.end_date])
        if opts.period:
            self.search.set_period(opts.period)
        if opts.metric:
            self.search.set_metric(opts.metric)

    def help_analyze(self):
        print "Run analysis for cloud usage data"

    def do_clear(self, line):
        """Clear settings for analysis. (e.g. nodename, platform, date will be cleared)"""
        self.init_objects()

    def preloop(self):
        self.initialize()
        self.load_db()

    def postloop(self):
        print "Bye ..."
Example #3
0
    def insert_userinfo():
        '''Store userinfo into database by reading a userid(s) from a
        text file or a standard input This command will read a
        userid(s) and do ldapsearch to find userinfo. And then it will
        store the userinfo into mysql database.

        Usage: $ fg-metrics-utility insert_userinfo -i filename [hostname]
               or
               $ fg-metrics-utility insert_userinfo userid [hostname]
        '''

        i = FGInstances()
        filename = ""
        userid = ""
        ownerid = ""
        username = ""
        project = ""

        if len(sys.argv) < 3 or sys.argv[1] != "insert_userinfo":
            print "usage: ./$ fg-metrics-utility insert_userinfo -i filename [hostname] \n\
                   or \n\
                   $ fg-metrics-utility insert_userinfo userid [hostname]"
            return

        if sys.argv[2] == "-i":
            filename = sys.argv[3]
            hostname = sys.argv[4]
        else:
            userid = sys.argv[2]
            hostname = sys.argv[3]

        if os.path.exists(filename):
            f = open(filename, "r")
            while 1:
                line = f.readline()
                if not line:
                    break

                ownerid = line.rstrip()
                # For comma seperated lines
                # E.g. 5TQVNLFFHPWOH22QHXERX,hyunjoo,fg45
                # Ownerid, username, projectid
                m = re.search(r'(.*),(.*),(.*)', line.rstrip())

                if m:
                    try:
                        userid = m.group(1)
                        username = m.group(2)
                        project = m.group(3)
                    except:
                        m = None
                        pass

                    # In euca3.0+, username is an ownerid of past version of
                    # euca
                    if username:
                        ownerid = username
                res = self.retrieve_userinfo_ldap(ownerid)
                if res:
                    if m:
                        # if m exists, res (dict) should be merged with the
                        # comma separated values in order to store the info
                        # into db
                        res["ownerid"] = userid
                        res["username"] = username
                        res["project"] = project
                        if hostname:
                            res["hostname"] = hostname
                    print res
                    i.userinfo_data.append(res)
        else:
            res = self.retrieve_userinfo_ldap(userid)
            if res:
                i.userinfo_data.append(res)

        i.write_userinfo_to_db()
Example #4
0
class FGMetricAPI:
    """ FG Metric Python API

    This API supports usage statistics in FG Metric way, but rely on database query.

    In a nutshell,
    FG Metric retrieves all records on the database and collects matching records on Python programmatically.
    However, FG Metric API retrieves records on the database with search condition, especially 'ownerid' is required field to search.
    Mainly, this API calls database with a query look like " select * from instance where start >= date and end <= date and ownerid = id "
    Other than that, rest of processes are same as FG Metric.

    return value is python 'dict' type

    Description
    ===========
    FG Metric Python API to provide usage data like FG Metric's cmd tool but through python API.

    Requirement
    ^^^^^^^^^^^
    set_user(ownerid) should be set
    get_stats() returns selected statistics

    Sample Usage
    ^^^^^^^^^^^^
    api = FGMetricAPI()
    api.set_user('hrlee')
    api.set_date('2012-01-01', '2012-12-31')
    api.set_metric('count')
    res = api.get_stats()
    print res

    Development Status :: 2 - Pre-Alpha

    """

    def __init__(self):
        self.search = FGSearch()
        self.instances = FGInstances()
        self.init_db()
        self.init_vars()

    def init_db(self):
        self.instances.db.conf()
        self.instances.db.connect()

    def init_vars(self):
        self.start_date = None
        self.end_date = None
        self.metric = None
        self.cloud = None
        self.hostname = None
        self.period = None
        self.project = None
        self.userinfo = None
        self.projectinfo = None

    def set_date(self, *dates):
        self.start_date = dates[0]
        self.end_date = dates[1]

    def set_metric(self, name):
        self.metric = name

    def set_user(self, name):
        self.username = name

    def set_project(self, name):
        self.project = name

    def set_cloud(self, name):
        self.cloud = name

    def set_hostname(self, name):
        self.hostname = name

    def get_metric(self):
        # TBD
        return

    def get_cloud(self):
        # TBD
        return

    def set_period(self, name):
        self.period = name

    def get_period(self):
        # TBD
        return

    def set_groupby(self, name):
        self.groupby = name

    def get_groupby(self):
        return self.groupby

    def get_stats(self):
        ownerids = self._get_ownerids()
        self._get_instances(ownerids)
        self.search.init_stats()
        self._set_search_vars()
        # pprint(vars(self.search.get_filter()))
        self._calculate_stats()
        return self.search.get_metric()

    def get_realtime(self):
        return

    def get_series(self):
        return self.search.get_series()

    def _set_search_vars(self):
        self.search.set_date([self.start_date, self.end_date])
        self.search.set_metric(self.metric)
        self.search.set_platform(self.cloud)
        self.search.set_nodename(self.hostname)
        self.search.set_period(self.period)
        self.search.set_groupby(self.groupby)

    def _calculate_stats(self):
        for i in range(0, self.instances.count()):
            instance = self.instances.get_data(
                i, self.search._is_userinfo_needed())[0]
            if not self.search._is_in_date(instance):
                continue
            if not self.search._is_filtered(instance):
                continue
            res = self.search.collect(instance)

    def _get_ownerids(self):
        try:
            self.instances.read_userinfo({"username": self.username})
            userinfo = self.instances.userinfo
            ownerids = [element['ownerid'] for element in userinfo]
            return ownerids
        except:
            return None

    def _get_instances(self, ownerids=None):
        if ownerids:
            whereclause = " and ownerid in " + str(tuple(ownerids)) + ""
        else:
            whereclause = ""
        self.instances.read_instances({}, whereclause)

    def get_instances(self):
        return self.instances.instance

    def get_userinfo(self):
        """Return all users"""
        if self.userinfo:
            return self.userinfo
        try:
            self.instances.read_userinfo_detail()#({}, " group by username ")
            self.userinfo = self.instances.get_userinfo()
            return self.userinfo
        except:
            print "failed to read userinfo %s" % sys.exc_info()
            return None

    def get_projectinfo(self):
        """Return all projects"""
        if self.projectinfo:
            return self.projectinfo
        try:
            prj_info = self.instances.get_projectinfo()
            self.projectinfo = prj_info
            return self.projectinfo
            #self.instances.read_projectinfo()
            #self.projectinfo = self.instances.projectinfo
        except:
            print "failed to read project info %s" % sys.exc_info()
            return None

    def _set_dict_vars(self):
        self.result = {
            "start_date":   self.start_date,
            "end_date":   self.end_date,
            "ownerid":   self.username,
            "metric":   self.metric,
            "period":   self.period or "All",
            "clouds":   self.cloud or "All",
            "hostname":   self.hostname or "All"
        }
        return self.result
Example #5
0
 def __init__(self):
     self.search = FGSearch()
     self.instances = FGInstances()
     self.init_db()
     self.init_vars()
Example #6
0
class FGLogParser:

    debug = False
    progress = True

    args = None
    instances = None

    def __init__(self):
        self.instances = FGInstances()

    def convert_data_to_list(self, data, attribute):
        rest = data[attribute]
        rest = re.sub(" ", "' , '", rest)
        rest = "['" + rest[1:-1] + "']"
        restdata = eval(rest)
        data[attribute] = restdata

    def convert_data_to_dict(self, data, attribute):
        rest = data[attribute]
        rest = self.convert_str_to_dict_str(rest[1:-1])
        restdata = eval(rest)
        data[attribute] = restdata

    def convert_str_to_dict_str(self, line):
        line = re.sub(' +', ' ', line)
        line = line.strip(" ")
        line = re.sub(',', '%2C', line)  # , value converts '%2C'
        line = re.sub(' ', ',', line)

        # more regular dict
        line = re.sub('=', '\'=\'', line)
        line = re.sub(',', '\',\'', line)
        line = re.sub('=', ' : ', line)
        line = re.sub('%2C', ',', line)  # Back to , value
        return '{\'' + line + '\'}'

    def parse_type_and_date(self, line, data):
        # split line after the third ] to (find date, id, msgtype)
        # put the rest in the string "rest"
        try:
                m = re.search(
                    r'\[(.*)\]\[(.*)\]\[(.*)\](.*)', line, re.M | re.I)
                data['date'] = datetime.strptime(
                    m.group(1), '%a %b %d %H:%M:%S %Y')
                data['date'] = fgmetric.util.FGTimeZone.convert_timezone(
                    data['date'], self.args.timezone, "EST")
                data['id'] = m.group(2)
                data['msgtype'] = m.group(3)
                rest = m.group(4)
                rest = re.sub(' +}', '}', rest).strip()
                if rest.startswith("running"):
                        data['linetype'] = "running"
                        return rest
                elif rest.startswith("calling"):
                        data['linetype'] = "calling"
                        return rest
                else:
                        location = rest.index(":")
                        linetype = rest[0:location]
                        data['linetype'] = re.sub('\(\)', '', linetype).strip()
                        rest = rest[location+1:].strip()
                        return rest
        except (ValueError, AttributeError):
                data['linetype'] = "IGNORE"
                return
        except:
                data['linetype'] = "IGNORE"
                # print sys.exc_info()
                return

    def ccInstance_parser(self, rest, data):
        """parses the line and returns a dict"""

        # replace print_ccInstance(): with linetype=print_ccInstance
        # rest = rest.replace("print_ccInstance():","linetype=print_ccInstance")
        # replace refreshinstances(): with calltype=refresh_instances

        # RunInstances():
        rest = rest.replace(
            "RunInstances():", "calltype=run_instances")   # removing multiple spaces
        rest = rest.replace(
            "refresh_instances():", "calltype=refresh_instances")   # removing multiple spaces

        # separate easy assignments from those that would contain groups, for now simply put groups as a string
        # all others are merged into a string with *=* into rest
        m = re.search(
            r'(.*)keyName=(.*)ccnet=(.*)ccvm=(.*)ncHostIdx=(.*)volumes=(.*)groupNames=(.*)', rest, re.M | re.I)

        # Version 3.0.2
        # Deleted: emiId, kernelId, ramdiskId, emiURL, kernelURL and ramdiskURL
        # Added: accountId, platform, and bundleTaskStateName
        # Changed: value of ownerId is changed

        try:
                data['keyName'] = m.group(2).strip()
                data["ccnet"] = m.group(3).strip()
                data["ccvm"] = m.group(4).strip()
                data["volumes"] = m.group(6).strip()
                data["groupNames"] = m.group(7).strip()
                # assemble the rest string
                rest = m.group(1) + "ncHostIdx=" + m.group(5)
        except:
                return

        # GATHER ALL SIMPLE *=* assignments into a single rest line and add
        # each entry to dict via eval
        rest = self.convert_str_to_dict_str(rest)
        try:
            restdata = eval(rest)
        except:
            print "eval failed:(" + str(sys.exc_info()[0]) + "), (" + str(rest) + ")"
            return

        data.update(restdata)

        # convert ccvm and ccnet to dict
        self.convert_data_to_dict(data, "ccvm")
        self.convert_data_to_dict(data, "ccnet")

        # converts volumes and groupNAmes to list
        self.convert_data_to_list(data, "groupNames")
        self.convert_data_to_list(data, "volumes")

        # convert the timestamp
        data["ts"] = datetime.fromtimestamp(int(data["ts"]))

        return data

    def refresh_resource_parser(self, rest, data):
        #[Wed Nov  9 19:50:08 2011][008128][EUCADEBUG ] refresh_resources(): received data from node=i2 mem=24276/22740 disk=306400/305364 cores=8/6
        if (rest.find("received") > -1):
            rest = re.sub("received data from", "", rest).strip()
        # node=i2 mem=24276/22740 disk=306400/305364 cores=8/6
            m = re.search(
                r'node=(.*) mem=(.*)[/](.*) disk=(.*)/(.*) cores=(.*)/(.*)', rest, re.M | re.I)
            data["node"] = m.group(1)
            data["mem"] = m.group(2)
            data["mem_max"] = m.group(3)
            data["disk"] = m.group(4)
            data["disk_max"] = m.group(5)
            data["cores"] = m.group(6)
            data["cores_max"] = m.group(7)
        else:
            data["calltype"] = "ignore"
        return data

    def terminate_instances_param_parser(self, rest, data):

        rest = rest.strip()
        if rest.startswith("params"):
            # params: userId=(null), instIdsLen=1, firstInstId=i-417B07B2
            rest = re.sub("params:", "", rest).strip()
            # node=i2 mem=24276/22740 disk=306400/305364 cores=8/6
            m = re.search(
                r'userId=(.*) instIdsLen=(.*) firstInstId=(.*)', rest, re.M | re.I)
            userid = m.group(1)
            if userid == "(null),":
                data["userId"] = "null"
            else:
                data["userId"] = m.group(1)
            data["instIdsLen"] = m.group(2)
            data["firstInstId"] = m.group(3)
        else:
            data["calltype"] = "ignore"
        return data

    def print_counter(self, label, counter):
        print label + " = " + str(counter)

    def set_argparser(self):
        def_s_date = "19700101"
        def_e_date = "29991231"
        def_conf = "futuregrid.cfg"
        def_linetypes = [
            "TerminateInstances", "refresh_resources", "print_ccInstance"]
        def_platform = "eucalyptus"
        def_platform_version = "3.0.2"

        parser = argparse.ArgumentParser()
        parser.add_argument(
            "-s", "--from", dest="from_date", default=def_s_date,
            help="start date to begin parsing (type: YYYYMMDD)")
        parser.add_argument("-e", "--to", dest="to_date", default=def_e_date,
                            help="end date to finish parsing (type: YYYYMMDD)")
        parser.add_argument("-i", "--input_dir", dest="dirname", required=True,
                            help="Absolute path where the files (e.g. 2012-02-16-00-21-17-cc.log generated by fg-unix) exist")
        parser.add_argument("--conf", dest="conf",
                            help="configuraton file of the database to be used")
        parser.add_argument(
            "-l", "--parse", nargs="+", dest="linetypes", default=def_linetypes,
            help="specify function names which you want to parse (types: print_ccInstance, refresh_resources)")
        parser.add_argument("-z", "--gzip", action="store_true", default=False,
                            help="gzip compressed files will be loaded")
        parser.add_argument(
            "-d", "--debug", action="store_true", default=False,
            help="debug on|off")
        parser.add_argument("-p", "--platform", default=def_platform,
                            help="Cloud platform name, required. (e.g. nimbus, openstack, eucalyptus, etc)")
        parser.add_argument(
            "-pv", "--platform_version", default=def_platform_version,
            help="Cloud platform version. (e.g. 2.9 for nimbus, essex for openstack, and  2.0 or 3.1 for eucalyptus)")
        parser.add_argument("-n", "--nodename", required=True,
                            help="Hostname of the cloud platform, required. (e.g., hotel, sierra, india, alamo, foxtrot)")
        parser.add_argument(
            "-tz", "--timezone", dest="timezone", default="local()",
            help="gzip compressed files will be loaded")

        args = parser.parse_args()
        print args

        '''
        How we can use argparse in this file?
        -------------------------------------
        1) fg-parser.py -s start date -e end date; will parse logs between the period that specified by -s and -e options
           ex) fg-parser.py -s 20120216 -e 20120216
               => 2012-02-16-00-21-17-cc.log ~ 2012-02-16-23-47-16-cc.log will be parsed
        2) fg-parser.py -f filename; Only parse the file that specified by -f option
           ex) fg-parser.py -f 2012-02-16-00-21-17-cc.log
               => Only that file will be parsed
        '''

        self.args = args

    def check_argparser(self):
        if self.args.conf:
            self.instances.db.set_conf(self.args.conf)
            self.instances.db.update_conf()

        if self.args.gzip:
            import zlib
            CHUNKSIZE = 1024
            self.gz = zlib.decompressobj(16+zlib.MAX_WBITS)

        if self.args.debug:
            self.debug = True

    def read_compressed_line(self, line):
        if self.args.gzip:
            return self.gz.decompress(line)
        else:
            return line

    def read_logs(self):
        if self.args.dirname == "-":
            self.read_stdin()
        else:
            self.read_files()

    def read_files(self):

        from_date = datetime.strptime(
            self.args.from_date + " 00:00:00", '%Y%m%d %H:%M:%S')
        to_date = datetime.strptime(
            self.args.to_date + " 23:59:59", '%Y%m%d %H:%M:%S')
        dirname = self.args.dirname

        try:
            listdir = os.listdir(dirname)
        except:
            listdir = ""

        for filename in listdir:
            try:
                single_date = datetime.strptime(str(
                    filename).split(".")[0], '%Y-%m-%d-%H-%M-%S-cc')
                if from_date <= single_date <= to_date:
                    print "Processing file is: " + filename
                    self.parse_log(
                        dirname + "/" + filename, self.instances.update_traceinfo)
            except (ValueError):
                print "error occured parsing for: " + filename
                self.debug_output(sys.exc_info())
                continue
            except:
                print "error occured parsing for: " + filename
                print sys.exc_info()
                self.debug_output(sys.exc_info())
                continue

    def read_stdin(self):
        try:
            print "Processing stdin... "
            self.parse_log(None, self.instances.update_traceinfo)
        except:
            print sys.exc_info()
            pass

    def parse_log(self, filename, analyze):

        lines_total = lines_ignored = count_terminate_instances = count_refresh_resource = count_ccInstance_parser = read_bytes = 0
        parse_types = self.args.linetypes

        print filename

        if filename:
            file_size = os.path.getsize(filename)
            self.debug_output("SIZE>:" + str(file_size))

        for line in fileinput.input(filename, openhook=fileinput.hook_compressed):
            # line = self.read_compressed_line(line)
            line = line.rstrip()
            ignore = False
            lines_total += 1
            read_bytes += len(line)
            data = {}
            if (self.debug or self.progress) and filename and ((lines_total % 1000) == 0):
                percent = int(100 * read_bytes / file_size)
                sys.stdout.write("\r%2d%%" % percent)
                sys.stdout.flush()
            # self.debug_output("DEBUG " + str(lines_total) +"> " + line)
            rest = self.parse_type_and_date(line, data)

            '''
            Temporarily prince_ccInstance is only available to parse

            if data["linetype"] == "TerminateInstances" and "TerminateInstances" in parse_types:
                count_terminate_instances += 1
                terminate_instances_param_parser(rest, data)
            elif data["linetype"] == "refresh_resources" and "refresh_resources" in parse_types:
                count_refresh_resource += 1
                refresh_resource_parser(rest, data)
            el'''
            if data["linetype"] == "print_ccInstance" and "print_ccInstance" in parse_types:
                count_ccInstance_parser += 1
                if not self.ccInstance_parser(rest, data):
                    ignore = True
                else:
                    # cloudplatformid
                    data["cloudPlatformIdRef"] = self.cloudplatform_id

                    analyze(data)
            else:
                ignore = True

            if ignore:
                lines_ignored += 1
                # self.debug_output("IGNORED LAST LINE> ")

            # For Debugging to make it faster terminate at 5
            # if self.debug and (len(self.instances.data) > 5):
            #    break

        fileinput.close()

        self.print_counter("lines total", lines_total)
        self.print_counter("lines ignored = ", lines_ignored)
        self.print_counter(
            "count_terminate_instances", count_terminate_instances)
        self.print_counter("count_refresh_resource", count_refresh_resource)
        self.print_counter("count_ccInstance_parser ", count_ccInstance_parser)

    def store_parsed(self):
        self.instances.db.connect()
        self.instances.write_to_db()
        self.instances.set_userinfo()
        self.instances.write_userinfo_to_db()

        self.print_counter("======================", "")
        self.print_counter("instance stored total", len(
            self.instances.instance))
        self.print_counter("userinfo stored total", len(
            self.instances.userinfo))

    def get_cloudplatform_info(self):
        self.instances.db.conf()
        self.instances.db.connect()
        whereclause = {"platform": self.args.platform, "hostname":
                       self.args.nodename, "version": self.args.platform_version}
        self.cloudplatform_id = self.instances.get_cloudplatform_id(
            whereclause)

    def debug_output(self, msg):
        if not self.debug:
            return
        print msg

    def test_file_read(self, filename):
        parse_log(filename, self.instances.update_traceinfo)
        self.instances.dump()

    def test_sql_read(self):
        self.instances.read_from_db()
        self.instances.dump()

    def test_sql_write(self, filename):
        parse_log(filename, self.instances.update_traceinfo)
        instances.write_to_db()
Example #7
0
 def __init__(self):
     self.instances = FGInstances()