Example #1
0
    def __init__ (self, process_info=None):
        """
        Define simple collector parser and its command line options.
        To begin, we just ask for a kTBS root which is mandatory.
        """
        self._parser = ArgumentParser(description="Fill a stored trace with \
                                                   browser history items as \
                                                   obsels.")

        self._parser.add_argument("-f", "--file", 
                                  nargs="?", 
                                  const=FIREFOX_HISTORY, 
                                  default=FIREFOX_HISTORY,
                                  help="File containings the sqlite data to \
                                        parse. Default is %s" % FIREFOX_HISTORY)

        self._parser.add_argument("-r", "--root", 
                                  nargs="?", 
                                  const=KTBS_ROOT, default=KTBS_ROOT,
                                  help="Enter the uri of the kTBS root. \
                                        Default is %s" % KTBS_ROOT)

        self._parser.add_argument("-o", "--origin", 
                                  nargs="?", 
                                  const=TRACE_ORIGIN, default=TRACE_ORIGIN,
                                  help="Enter the trace origin. Default is \
                                        %s" % TRACE_ORIGIN)

        self._parser.add_argument("-l", "--limit", 
                                  nargs="?", type=int,
                                  const=NB_MAX_ITEMS, default=NB_MAX_ITEMS,
                                  help="Enter the maximun number of items to \
                                        collect. Default is %s" % NB_MAX_ITEMS)

        self._parser.add_argument("-p", "--profile",
                                  action="store_true",
                                  help="Profile current code")

        self._parser.add_argument("-s", "--stats",
                                  action="store_true",
                                  help="Mesure execution time")

        self._parser.add_argument("-v", "--verbose",
                                  action="store_true",
                                  help="Display print messages")

        self._args = self._parser.parse_args()
        self.display("Parsed with argparse: %s" % str(self._args))

        if self._args.stats:
            # To get process information without callback mechanism
            my_PID = os.getpid()
            self.process_info = ProcessInfo(my_PID)
Example #2
0
class BrowserHistoryCollector(object):
    """
    Creates a kTBS Base for browser history data.
    This code is for Firefox browser.
    """
    def __init__(self, process_info=None):
        """
        Define simple collector parser and its command line options.
        To begin, we just ask for a kTBS root which is mandatory.
        """
        self._parser = ArgumentParser(description="Fill a stored trace with \
                                                   browser history items as \
                                                   obsels.")

        self._parser.add_argument("-f",
                                  "--file",
                                  nargs="?",
                                  const=FIREFOX_HISTORY,
                                  default=FIREFOX_HISTORY,
                                  help="File containings the sqlite data to \
                                        parse. Default is %s" %
                                  FIREFOX_HISTORY)

        self._parser.add_argument("-r",
                                  "--root",
                                  nargs="?",
                                  const=KTBS_ROOT,
                                  default=KTBS_ROOT,
                                  help="Enter the uri of the kTBS root. \
                                        Default is %s" % KTBS_ROOT)

        self._parser.add_argument("-o",
                                  "--origin",
                                  nargs="?",
                                  const=TRACE_ORIGIN,
                                  default=TRACE_ORIGIN,
                                  help="Enter the trace origin. Default is \
                                        %s" % TRACE_ORIGIN)

        self._parser.add_argument("-l",
                                  "--limit",
                                  nargs="?",
                                  type=int,
                                  const=NB_MAX_ITEMS,
                                  default=NB_MAX_ITEMS,
                                  help="Enter the maximun number of items to \
                                        collect. Default is %s" % NB_MAX_ITEMS)

        self._parser.add_argument("-p",
                                  "--profile",
                                  action="store_true",
                                  help="Profile current code")

        self._parser.add_argument("-s",
                                  "--stats",
                                  action="store_true",
                                  help="Mesure execution time")

        self._parser.add_argument("-v",
                                  "--verbose",
                                  action="store_true",
                                  help="Display print messages")

        self._args = self._parser.parse_args()
        self.display("Parsed with argparse: %s" % str(self._args))

        if self._args.stats:
            # To get process information without callback mechanism
            my_PID = os.getpid()
            self.process_info = ProcessInfo(my_PID)

    def display(self, msg):
        """
        Display the messages only in verbose mode.
        """
        if self._args.verbose:
            print msg

    def profiling_asked(self):
        """Has profiling been asked in command line ?
        """
        return self._args.profile

    def create_ktbs_base_for_history(self):
        """
        Creates a kTBS Base for browser history data.
        """
        root = get_ktbs(self._args.root)

        base = root.get_base(id="BrowserHistory/")

        if base is None:
            base = root.create_base(id="BrowserHistory/")

        return base

    def create_ktbs_model_for_history(self, base=None):
        """
        Creates a kTBS Model for browser history data.
        """
        model = base.create_model(id="BHModel")

        #pylint: disable-msg=W0612
        # Unused variable obsel_type
        bh_obsel_type = model.create_obsel_type(id=BH_OBSEL_ID,
                                                label=BH_OBSEL_LABEL)

        # Browser history obsel attributes
        # id, url, title, rev_host, visit_count, hidden, typed, favicon_id,
        # frecency, last_visit_date

        nb_visit_attr_type = model.create_attribute_type(
            id="#visit_count", obsel_type=bh_obsel_type, data_type=XSD.integer)

        title_attr_type = model.create_attribute_type(id="#title",
                                                      obsel_type=bh_obsel_type,
                                                      data_type=XSD.string)

        frequency_attr_type = model.create_attribute_type(
            id="#frequency", obsel_type=bh_obsel_type, data_type=XSD.integer)

        return model

    def create_ktbs_trace_for_history(self, base=None, model=None):
        """
        Creates a kTBS Trace for browser history data.
        """
        trace = base.create_stored_trace(id="RawHistory/",
                                         model=model.get_uri(),
                                         origin=self._args.origin)

        return trace

    def collect_history_items(self, trace=None):
        """
        Open the browser history database, extract history items and
        populates a kTBS stored trace with it.
        """
        obsels_list = []

        try:
            if self._args.stats:
                start_time = time.time()
                start_cpu = time.clock()

            # http://docs.python.org/library/sqlite3.html#accessing-columns-
            # by-name-instead-of-by-index
            conn = sqlite3.connect(self._args.file,
                                   detect_types=sqlite3.PARSE_COLNAMES)
            conn.row_factory = sqlite3.Row

            cursor = conn.cursor()

            # If obsels are not inserted in chronological order
            # We get a "Non-monotonic collection error"
            cursor.execute(
                'SELECT * FROM moz_places WHERE last_visit_date IS NOT NULL ORDER BY last_visit_date'
            )

            # Get Model Information : should we store it ?
            model = trace.get_model()

            # Get obsel type URI
            bh_obsel_type = model.get(id=BH_OBSEL_ID)

            # Get attributes types uris
            model_attributes = model.list_attribute_types()
            vcnt_attr = model_attributes
            for ma in model_attributes:
                ma_uri = ma.get_uri()
                if ma_uri.endswith('visit_count'):
                    vcnt_attr_uri = ma_uri
                    continue
                if ma_uri.endswith('title'):
                    title_attr_uri = ma_uri
                    continue
                if ma_uri.endswith('frequency'):
                    freq_attr_uri = ma_uri
                    continue

            nb_browser_items = 0  # to be replaced by select count(id) ...
            nb_obsels = 0
            for row in cursor:
                nb_browser_items = nb_browser_items + 1

                if nb_obsels > self._args.limit:
                    break

                last_visit = row['last_visit_date']
                if last_visit is not None:
                    last_visit = datetime.datetime.fromtimestamp(int( \
                                              math.floor(last_visit/1000000)))
                else:
                    # We do not create obsels with no date in kTBS
                    continue

                # Prepare obsel attributes
                attributes = {}
                attributes[vcnt_attr_uri] = row['visit_count']
                attributes[title_attr_uri] = row['title']
                attributes[freq_attr_uri] = row['frecency']

                # Insert history items  as obsels
                o = trace.create_obsel(
                    type=bh_obsel_type.get_uri(),
                    begin=last_visit,
                    end=last_visit,
                    attributes={},  #visit_count=row['visit_count'],
                    subject=row['url'])

                obsels_list.append(o)

                self.display("id: %s, url: %s, visit_count: %s, frecency: %s, \
                              last_visit_date: %s" %
                             (row['id'], row['url'], row['visit_count'],
                              row['frecency'], last_visit))

                nb_obsels = nb_obsels + 1

                # To display Process information
                if self._args.stats and nb_obsels % 100 == 0:
                    values = self.process_info.get_values()
                    print "=====> PROCESS INFO = %s" % str(values)

            cursor.close()

            if self._args.stats:
                end_cpu = time.clock()
                end_time = time.time()
                print "Program execution time %f seconds" % \
                                                (end_time - start_time)
                print "Program CPU execution time %f seconds" % \
                                                (end_cpu - start_cpu)
                print "Created %i obsels on %i items" % (nb_obsels, \
                                                         nb_browser_items)

        except sqlite3.Error, err:
            print "An error occurred:", err.args[0]

        return obsels_list
Example #3
0
    def __init__(self, process_info=None):
        """
        Define simple collector parser and its command line options.
        To begin, we just ask for a kTBS root which is mandatory.
        """
        self._parser = ArgumentParser(description="Fill a stored trace with \
                                                   browser history items as \
                                                   obsels.")

        self._parser.add_argument("-f",
                                  "--file",
                                  nargs="?",
                                  const=FIREFOX_HISTORY,
                                  default=FIREFOX_HISTORY,
                                  help="File containings the sqlite data to \
                                        parse. Default is %s" %
                                  FIREFOX_HISTORY)

        self._parser.add_argument("-r",
                                  "--root",
                                  nargs="?",
                                  const=KTBS_ROOT,
                                  default=KTBS_ROOT,
                                  help="Enter the uri of the kTBS root. \
                                        Default is %s" % KTBS_ROOT)

        self._parser.add_argument("-o",
                                  "--origin",
                                  nargs="?",
                                  const=TRACE_ORIGIN,
                                  default=TRACE_ORIGIN,
                                  help="Enter the trace origin. Default is \
                                        %s" % TRACE_ORIGIN)

        self._parser.add_argument("-l",
                                  "--limit",
                                  nargs="?",
                                  type=int,
                                  const=NB_MAX_ITEMS,
                                  default=NB_MAX_ITEMS,
                                  help="Enter the maximun number of items to \
                                        collect. Default is %s" % NB_MAX_ITEMS)

        self._parser.add_argument("-p",
                                  "--profile",
                                  action="store_true",
                                  help="Profile current code")

        self._parser.add_argument("-s",
                                  "--stats",
                                  action="store_true",
                                  help="Mesure execution time")

        self._parser.add_argument("-v",
                                  "--verbose",
                                  action="store_true",
                                  help="Display print messages")

        self._args = self._parser.parse_args()
        self.display("Parsed with argparse: %s" % str(self._args))

        if self._args.stats:
            # To get process information without callback mechanism
            my_PID = os.getpid()
            self.process_info = ProcessInfo(my_PID)
Example #4
0
class BrowserHistoryCollector(object):
    """
    Creates a kTBS Base for browser history data.
    This code is for Firefox browser.
    """

    def __init__ (self, process_info=None):
        """
        Define simple collector parser and its command line options.
        To begin, we just ask for a kTBS root which is mandatory.
        """
        self._parser = ArgumentParser(description="Fill a stored trace with \
                                                   browser history items as \
                                                   obsels.")

        self._parser.add_argument("-f", "--file", 
                                  nargs="?", 
                                  const=FIREFOX_HISTORY, 
                                  default=FIREFOX_HISTORY,
                                  help="File containings the sqlite data to \
                                        parse. Default is %s" % FIREFOX_HISTORY)

        self._parser.add_argument("-r", "--root", 
                                  nargs="?", 
                                  const=KTBS_ROOT, default=KTBS_ROOT,
                                  help="Enter the uri of the kTBS root. \
                                        Default is %s" % KTBS_ROOT)

        self._parser.add_argument("-o", "--origin", 
                                  nargs="?", 
                                  const=TRACE_ORIGIN, default=TRACE_ORIGIN,
                                  help="Enter the trace origin. Default is \
                                        %s" % TRACE_ORIGIN)

        self._parser.add_argument("-l", "--limit", 
                                  nargs="?", type=int,
                                  const=NB_MAX_ITEMS, default=NB_MAX_ITEMS,
                                  help="Enter the maximun number of items to \
                                        collect. Default is %s" % NB_MAX_ITEMS)

        self._parser.add_argument("-p", "--profile",
                                  action="store_true",
                                  help="Profile current code")

        self._parser.add_argument("-s", "--stats",
                                  action="store_true",
                                  help="Mesure execution time")

        self._parser.add_argument("-v", "--verbose",
                                  action="store_true",
                                  help="Display print messages")

        self._args = self._parser.parse_args()
        self.display("Parsed with argparse: %s" % str(self._args))

        if self._args.stats:
            # To get process information without callback mechanism
            my_PID = os.getpid()
            self.process_info = ProcessInfo(my_PID)

    def display(self, msg):
        """
        Display the messages only in verbose mode.
        """
        if self._args.verbose:
            print msg

    def profiling_asked(self):
        """Has profiling been asked in command line ?
        """
        return self._args.profile

    def create_ktbs_base_for_history(self):
        """
        Creates a kTBS Base for browser history data.
        """
        root = get_ktbs(self._args.root)

        base = root.get_base(id="BrowserHistory/")

        if base is None:
            base = root.create_base(id="BrowserHistory/")

        return base


    def create_ktbs_model_for_history(self, base=None):
        """
        Creates a kTBS Model for browser history data.
        """
        model = base.create_model(id="BHModel")

        #pylint: disable-msg=W0612
        # Unused variable obsel_type
        bh_obsel_type = model.create_obsel_type(id=BH_OBSEL_ID, 
                                                label=BH_OBSEL_LABEL)

        # Browser history obsel attributes
        # id, url, title, rev_host, visit_count, hidden, typed, favicon_id, 
        # frecency, last_visit_date

        nb_visit_attr_type = model.create_attribute_type(
                                                  id="#visit_count",
                                                  obsel_type=bh_obsel_type, 
                                                  data_type=XSD.integer) 

        title_attr_type = model.create_attribute_type(
                                                  id="#title",
                                                  obsel_type=bh_obsel_type, 
                                                  data_type=XSD.string)

        frequency_attr_type = model.create_attribute_type(
                                                  id="#frequency",
                                                  obsel_type=bh_obsel_type, 
                                                  data_type=XSD.integer) 

        return model

    def create_ktbs_trace_for_history(self, base=None, model=None):
        """
        Creates a kTBS Trace for browser history data.
        """
        trace = base.create_stored_trace(id="RawHistory/",
                                         model=model.get_uri(), 
                                         origin=self._args.origin)
                                         
        return trace

    def collect_history_items(self, trace=None):
        """
        Open the browser history database, extract history items and
        populates a kTBS stored trace with it.
        """
        obsels_list = []

        try:
            if self._args.stats:
                start_time = time.time()
                start_cpu = time.clock()

            # http://docs.python.org/library/sqlite3.html#accessing-columns-
            # by-name-instead-of-by-index
            conn = sqlite3.connect(self._args.file, 
                                   detect_types=sqlite3.PARSE_COLNAMES)
            conn.row_factory = sqlite3.Row

            cursor = conn.cursor()

            # If obsels are not inserted in chronological order 
            # We get a "Non-monotonic collection error"
            cursor.execute('SELECT * FROM moz_places WHERE last_visit_date IS NOT NULL ORDER BY last_visit_date')

            # Get Model Information : should we store it ? 
            model = trace.get_model()

            # Get obsel type URI
            bh_obsel_type = model.get(id=BH_OBSEL_ID)

            # Get attributes types uris
            model_attributes = model.list_attribute_types()
            vcnt_attr = model_attributes
            for ma in model_attributes:
                ma_uri = ma.get_uri()
                if ma_uri.endswith('visit_count'):
                    vcnt_attr_uri = ma_uri
                    continue
                if ma_uri.endswith('title'):
                    title_attr_uri = ma_uri
                    continue
                if ma_uri.endswith('frequency'):
                    freq_attr_uri = ma_uri
                    continue

            nb_browser_items = 0 # to be replaced by select count(id) ...
            nb_obsels = 0
            for row in cursor:
                nb_browser_items = nb_browser_items + 1

                if nb_obsels > self._args.limit:
                    break

                last_visit = row['last_visit_date']
                if last_visit is not None:
                    last_visit = datetime.datetime.fromtimestamp(int( \
                                              math.floor(last_visit/1000000)))
                else:
                    # We do not create obsels with no date in kTBS
                    continue

                # Prepare obsel attributes
                attributes = {}
                attributes[vcnt_attr_uri] = row['visit_count']
                attributes[title_attr_uri] = row['title']
                attributes[freq_attr_uri] = row['frecency']
                
                # Insert history items  as obsels
                o = trace.create_obsel(type=bh_obsel_type.get_uri(),
                                       begin=last_visit,
                                       end=last_visit,
                                       attributes={}, #visit_count=row['visit_count'],
                                       subject=row['url'])

                obsels_list.append(o)

                self.display("id: %s, url: %s, visit_count: %s, frecency: %s, \
                              last_visit_date: %s" % (row['id'], row['url'],
                              row['visit_count'], row['frecency'], last_visit))

                nb_obsels = nb_obsels + 1

                # To display Process information
                if self._args.stats and nb_obsels % 100 == 0:
                    values = self.process_info.get_values()
                    print "=====> PROCESS INFO = %s" % str(values)

            cursor.close()

            if self._args.stats:
                end_cpu = time.clock()
                end_time = time.time()
                print "Program execution time %f seconds" % \
                                                (end_time - start_time)
                print "Program CPU execution time %f seconds" % \
                                                (end_cpu - start_cpu)
                print "Created %i obsels on %i items" % (nb_obsels, \
                                                         nb_browser_items)

        except sqlite3.Error, err:
            print "An error occurred:", err.args[0]

        return obsels_list