def __init__ (self, process_info=None): """ Define simple collector parser and its command line options. To begin, we just ask for a kTBS root which is mandatory. """ self._parser = ArgumentParser(description="Fill a stored trace with \ browser history items as \ obsels.") self._parser.add_argument("-f", "--file", nargs="?", const=FIREFOX_HISTORY, default=FIREFOX_HISTORY, help="File containings the sqlite data to \ parse. Default is %s" % FIREFOX_HISTORY) self._parser.add_argument("-r", "--root", nargs="?", const=KTBS_ROOT, default=KTBS_ROOT, help="Enter the uri of the kTBS root. \ Default is %s" % KTBS_ROOT) self._parser.add_argument("-o", "--origin", nargs="?", const=TRACE_ORIGIN, default=TRACE_ORIGIN, help="Enter the trace origin. Default is \ %s" % TRACE_ORIGIN) self._parser.add_argument("-l", "--limit", nargs="?", type=int, const=NB_MAX_ITEMS, default=NB_MAX_ITEMS, help="Enter the maximun number of items to \ collect. Default is %s" % NB_MAX_ITEMS) self._parser.add_argument("-p", "--profile", action="store_true", help="Profile current code") self._parser.add_argument("-s", "--stats", action="store_true", help="Mesure execution time") self._parser.add_argument("-v", "--verbose", action="store_true", help="Display print messages") self._args = self._parser.parse_args() self.display("Parsed with argparse: %s" % str(self._args)) if self._args.stats: # To get process information without callback mechanism my_PID = os.getpid() self.process_info = ProcessInfo(my_PID)
class BrowserHistoryCollector(object): """ Creates a kTBS Base for browser history data. This code is for Firefox browser. """ def __init__(self, process_info=None): """ Define simple collector parser and its command line options. To begin, we just ask for a kTBS root which is mandatory. """ self._parser = ArgumentParser(description="Fill a stored trace with \ browser history items as \ obsels.") self._parser.add_argument("-f", "--file", nargs="?", const=FIREFOX_HISTORY, default=FIREFOX_HISTORY, help="File containings the sqlite data to \ parse. Default is %s" % FIREFOX_HISTORY) self._parser.add_argument("-r", "--root", nargs="?", const=KTBS_ROOT, default=KTBS_ROOT, help="Enter the uri of the kTBS root. \ Default is %s" % KTBS_ROOT) self._parser.add_argument("-o", "--origin", nargs="?", const=TRACE_ORIGIN, default=TRACE_ORIGIN, help="Enter the trace origin. Default is \ %s" % TRACE_ORIGIN) self._parser.add_argument("-l", "--limit", nargs="?", type=int, const=NB_MAX_ITEMS, default=NB_MAX_ITEMS, help="Enter the maximun number of items to \ collect. Default is %s" % NB_MAX_ITEMS) self._parser.add_argument("-p", "--profile", action="store_true", help="Profile current code") self._parser.add_argument("-s", "--stats", action="store_true", help="Mesure execution time") self._parser.add_argument("-v", "--verbose", action="store_true", help="Display print messages") self._args = self._parser.parse_args() self.display("Parsed with argparse: %s" % str(self._args)) if self._args.stats: # To get process information without callback mechanism my_PID = os.getpid() self.process_info = ProcessInfo(my_PID) def display(self, msg): """ Display the messages only in verbose mode. """ if self._args.verbose: print msg def profiling_asked(self): """Has profiling been asked in command line ? """ return self._args.profile def create_ktbs_base_for_history(self): """ Creates a kTBS Base for browser history data. """ root = get_ktbs(self._args.root) base = root.get_base(id="BrowserHistory/") if base is None: base = root.create_base(id="BrowserHistory/") return base def create_ktbs_model_for_history(self, base=None): """ Creates a kTBS Model for browser history data. """ model = base.create_model(id="BHModel") #pylint: disable-msg=W0612 # Unused variable obsel_type bh_obsel_type = model.create_obsel_type(id=BH_OBSEL_ID, label=BH_OBSEL_LABEL) # Browser history obsel attributes # id, url, title, rev_host, visit_count, hidden, typed, favicon_id, # frecency, last_visit_date nb_visit_attr_type = model.create_attribute_type( id="#visit_count", obsel_type=bh_obsel_type, data_type=XSD.integer) title_attr_type = model.create_attribute_type(id="#title", obsel_type=bh_obsel_type, data_type=XSD.string) frequency_attr_type = model.create_attribute_type( id="#frequency", obsel_type=bh_obsel_type, data_type=XSD.integer) return model def create_ktbs_trace_for_history(self, base=None, model=None): """ Creates a kTBS Trace for browser history data. """ trace = base.create_stored_trace(id="RawHistory/", model=model.get_uri(), origin=self._args.origin) return trace def collect_history_items(self, trace=None): """ Open the browser history database, extract history items and populates a kTBS stored trace with it. """ obsels_list = [] try: if self._args.stats: start_time = time.time() start_cpu = time.clock() # http://docs.python.org/library/sqlite3.html#accessing-columns- # by-name-instead-of-by-index conn = sqlite3.connect(self._args.file, detect_types=sqlite3.PARSE_COLNAMES) conn.row_factory = sqlite3.Row cursor = conn.cursor() # If obsels are not inserted in chronological order # We get a "Non-monotonic collection error" cursor.execute( 'SELECT * FROM moz_places WHERE last_visit_date IS NOT NULL ORDER BY last_visit_date' ) # Get Model Information : should we store it ? model = trace.get_model() # Get obsel type URI bh_obsel_type = model.get(id=BH_OBSEL_ID) # Get attributes types uris model_attributes = model.list_attribute_types() vcnt_attr = model_attributes for ma in model_attributes: ma_uri = ma.get_uri() if ma_uri.endswith('visit_count'): vcnt_attr_uri = ma_uri continue if ma_uri.endswith('title'): title_attr_uri = ma_uri continue if ma_uri.endswith('frequency'): freq_attr_uri = ma_uri continue nb_browser_items = 0 # to be replaced by select count(id) ... nb_obsels = 0 for row in cursor: nb_browser_items = nb_browser_items + 1 if nb_obsels > self._args.limit: break last_visit = row['last_visit_date'] if last_visit is not None: last_visit = datetime.datetime.fromtimestamp(int( \ math.floor(last_visit/1000000))) else: # We do not create obsels with no date in kTBS continue # Prepare obsel attributes attributes = {} attributes[vcnt_attr_uri] = row['visit_count'] attributes[title_attr_uri] = row['title'] attributes[freq_attr_uri] = row['frecency'] # Insert history items as obsels o = trace.create_obsel( type=bh_obsel_type.get_uri(), begin=last_visit, end=last_visit, attributes={}, #visit_count=row['visit_count'], subject=row['url']) obsels_list.append(o) self.display("id: %s, url: %s, visit_count: %s, frecency: %s, \ last_visit_date: %s" % (row['id'], row['url'], row['visit_count'], row['frecency'], last_visit)) nb_obsels = nb_obsels + 1 # To display Process information if self._args.stats and nb_obsels % 100 == 0: values = self.process_info.get_values() print "=====> PROCESS INFO = %s" % str(values) cursor.close() if self._args.stats: end_cpu = time.clock() end_time = time.time() print "Program execution time %f seconds" % \ (end_time - start_time) print "Program CPU execution time %f seconds" % \ (end_cpu - start_cpu) print "Created %i obsels on %i items" % (nb_obsels, \ nb_browser_items) except sqlite3.Error, err: print "An error occurred:", err.args[0] return obsels_list
def __init__(self, process_info=None): """ Define simple collector parser and its command line options. To begin, we just ask for a kTBS root which is mandatory. """ self._parser = ArgumentParser(description="Fill a stored trace with \ browser history items as \ obsels.") self._parser.add_argument("-f", "--file", nargs="?", const=FIREFOX_HISTORY, default=FIREFOX_HISTORY, help="File containings the sqlite data to \ parse. Default is %s" % FIREFOX_HISTORY) self._parser.add_argument("-r", "--root", nargs="?", const=KTBS_ROOT, default=KTBS_ROOT, help="Enter the uri of the kTBS root. \ Default is %s" % KTBS_ROOT) self._parser.add_argument("-o", "--origin", nargs="?", const=TRACE_ORIGIN, default=TRACE_ORIGIN, help="Enter the trace origin. Default is \ %s" % TRACE_ORIGIN) self._parser.add_argument("-l", "--limit", nargs="?", type=int, const=NB_MAX_ITEMS, default=NB_MAX_ITEMS, help="Enter the maximun number of items to \ collect. Default is %s" % NB_MAX_ITEMS) self._parser.add_argument("-p", "--profile", action="store_true", help="Profile current code") self._parser.add_argument("-s", "--stats", action="store_true", help="Mesure execution time") self._parser.add_argument("-v", "--verbose", action="store_true", help="Display print messages") self._args = self._parser.parse_args() self.display("Parsed with argparse: %s" % str(self._args)) if self._args.stats: # To get process information without callback mechanism my_PID = os.getpid() self.process_info = ProcessInfo(my_PID)
class BrowserHistoryCollector(object): """ Creates a kTBS Base for browser history data. This code is for Firefox browser. """ def __init__ (self, process_info=None): """ Define simple collector parser and its command line options. To begin, we just ask for a kTBS root which is mandatory. """ self._parser = ArgumentParser(description="Fill a stored trace with \ browser history items as \ obsels.") self._parser.add_argument("-f", "--file", nargs="?", const=FIREFOX_HISTORY, default=FIREFOX_HISTORY, help="File containings the sqlite data to \ parse. Default is %s" % FIREFOX_HISTORY) self._parser.add_argument("-r", "--root", nargs="?", const=KTBS_ROOT, default=KTBS_ROOT, help="Enter the uri of the kTBS root. \ Default is %s" % KTBS_ROOT) self._parser.add_argument("-o", "--origin", nargs="?", const=TRACE_ORIGIN, default=TRACE_ORIGIN, help="Enter the trace origin. Default is \ %s" % TRACE_ORIGIN) self._parser.add_argument("-l", "--limit", nargs="?", type=int, const=NB_MAX_ITEMS, default=NB_MAX_ITEMS, help="Enter the maximun number of items to \ collect. Default is %s" % NB_MAX_ITEMS) self._parser.add_argument("-p", "--profile", action="store_true", help="Profile current code") self._parser.add_argument("-s", "--stats", action="store_true", help="Mesure execution time") self._parser.add_argument("-v", "--verbose", action="store_true", help="Display print messages") self._args = self._parser.parse_args() self.display("Parsed with argparse: %s" % str(self._args)) if self._args.stats: # To get process information without callback mechanism my_PID = os.getpid() self.process_info = ProcessInfo(my_PID) def display(self, msg): """ Display the messages only in verbose mode. """ if self._args.verbose: print msg def profiling_asked(self): """Has profiling been asked in command line ? """ return self._args.profile def create_ktbs_base_for_history(self): """ Creates a kTBS Base for browser history data. """ root = get_ktbs(self._args.root) base = root.get_base(id="BrowserHistory/") if base is None: base = root.create_base(id="BrowserHistory/") return base def create_ktbs_model_for_history(self, base=None): """ Creates a kTBS Model for browser history data. """ model = base.create_model(id="BHModel") #pylint: disable-msg=W0612 # Unused variable obsel_type bh_obsel_type = model.create_obsel_type(id=BH_OBSEL_ID, label=BH_OBSEL_LABEL) # Browser history obsel attributes # id, url, title, rev_host, visit_count, hidden, typed, favicon_id, # frecency, last_visit_date nb_visit_attr_type = model.create_attribute_type( id="#visit_count", obsel_type=bh_obsel_type, data_type=XSD.integer) title_attr_type = model.create_attribute_type( id="#title", obsel_type=bh_obsel_type, data_type=XSD.string) frequency_attr_type = model.create_attribute_type( id="#frequency", obsel_type=bh_obsel_type, data_type=XSD.integer) return model def create_ktbs_trace_for_history(self, base=None, model=None): """ Creates a kTBS Trace for browser history data. """ trace = base.create_stored_trace(id="RawHistory/", model=model.get_uri(), origin=self._args.origin) return trace def collect_history_items(self, trace=None): """ Open the browser history database, extract history items and populates a kTBS stored trace with it. """ obsels_list = [] try: if self._args.stats: start_time = time.time() start_cpu = time.clock() # http://docs.python.org/library/sqlite3.html#accessing-columns- # by-name-instead-of-by-index conn = sqlite3.connect(self._args.file, detect_types=sqlite3.PARSE_COLNAMES) conn.row_factory = sqlite3.Row cursor = conn.cursor() # If obsels are not inserted in chronological order # We get a "Non-monotonic collection error" cursor.execute('SELECT * FROM moz_places WHERE last_visit_date IS NOT NULL ORDER BY last_visit_date') # Get Model Information : should we store it ? model = trace.get_model() # Get obsel type URI bh_obsel_type = model.get(id=BH_OBSEL_ID) # Get attributes types uris model_attributes = model.list_attribute_types() vcnt_attr = model_attributes for ma in model_attributes: ma_uri = ma.get_uri() if ma_uri.endswith('visit_count'): vcnt_attr_uri = ma_uri continue if ma_uri.endswith('title'): title_attr_uri = ma_uri continue if ma_uri.endswith('frequency'): freq_attr_uri = ma_uri continue nb_browser_items = 0 # to be replaced by select count(id) ... nb_obsels = 0 for row in cursor: nb_browser_items = nb_browser_items + 1 if nb_obsels > self._args.limit: break last_visit = row['last_visit_date'] if last_visit is not None: last_visit = datetime.datetime.fromtimestamp(int( \ math.floor(last_visit/1000000))) else: # We do not create obsels with no date in kTBS continue # Prepare obsel attributes attributes = {} attributes[vcnt_attr_uri] = row['visit_count'] attributes[title_attr_uri] = row['title'] attributes[freq_attr_uri] = row['frecency'] # Insert history items as obsels o = trace.create_obsel(type=bh_obsel_type.get_uri(), begin=last_visit, end=last_visit, attributes={}, #visit_count=row['visit_count'], subject=row['url']) obsels_list.append(o) self.display("id: %s, url: %s, visit_count: %s, frecency: %s, \ last_visit_date: %s" % (row['id'], row['url'], row['visit_count'], row['frecency'], last_visit)) nb_obsels = nb_obsels + 1 # To display Process information if self._args.stats and nb_obsels % 100 == 0: values = self.process_info.get_values() print "=====> PROCESS INFO = %s" % str(values) cursor.close() if self._args.stats: end_cpu = time.clock() end_time = time.time() print "Program execution time %f seconds" % \ (end_time - start_time) print "Program CPU execution time %f seconds" % \ (end_cpu - start_cpu) print "Created %i obsels on %i items" % (nb_obsels, \ nb_browser_items) except sqlite3.Error, err: print "An error occurred:", err.args[0] return obsels_list