def test_complete_load(self): dl = DataLoader() dl.complete_load('./data/tests/issuu_test_data_loader.json') dict_result1 = { "visitor_useragent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36", "visitor_country": "MX", "visitor_useragent_trimmed": "Mozilla Safari", "visitor_continent": "North America" } dict_result2 = { "visitor_useragent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/540.36 (KHTML, like Gecko) Chrome/34.0.1750.112 Safari/538.36", "visitor_country": "MX", "visitor_useragent_trimmed": "Mozilla Safari", "visitor_continent": "North America" } dict_result3 = { "visitor_useragent": "P3P Validator", "visitor_country": "FR", "visitor_useragent_trimmed": "P3P Validator", "visitor_continent": "Europe" } self.assertEqual(dl.dicts[0], dict_result1) self.assertEqual(dl.dicts[1], dict_result2) self.assertEqual(dl.dicts[2], dict_result3)
class GraphHandlerTestCase(unittest.TestCase): def setUp(self): self.dl = DataLoader() self.dl.load_dataset_from('./data/tests/issuu_test_graph_handler.json') self.gh = GraphHandler(self.dl.dicts) def test_file_created(self): self.gh.create_graph(base_visitor_uuid="0000", base_document_uuid="aaaa", render=False) import os.path self.assertTrue(os.path.exists('graphs/alaaaa.dot')) self.assertTrue(os.path.exists('graphs/alaaaa.dot.pdf'))
def __init__(self, *args, **kwargs): tk.Tk.__init__(self, *args, **kwargs) self.al_list = [] self.dl = DataLoader() self.dv = None self.af = None self.gh = None # GRAPHICAL INITIALISATION # ======================== self.title("ISSUU Tracker") self.geometry("520x330") self.grid_rowconfigure(1, weight=1) self.grid_columnconfigure(0, weight=1) self.df_frame = tk.Frame(self, padx=3, pady=10) self.stat_frame = tk.Frame(self, padx=3, pady=10) self.uuid_frame = tk.Frame(self, padx=3, pady=10) self.al_frame = tk.Frame(self, padx=3, pady=10) self.df_frame.grid(row=0) self.stat_frame.grid(row=1) self.uuid_frame.grid(row=2) self.al_frame.grid(row=3) self.visitor_uuid_entry = tk.Entry(self.uuid_frame) self.visitor_uuid_entry.insert("end", "2f63e0cca690da91") self.visitor_uuid_entry.grid(column=2, row=2) self.document_uuid_entry = tk.Entry(self.uuid_frame) self.document_uuid_entry.insert( "end", "140219141540-c900b41f845c67cc08b58911155c681c") self.document_uuid_entry.grid(column=2, row=3) self.al_label_list = [] self.init_df_frame() self.init_stat_frame() self.init_uuid_frame() self.init_al_frame()
class AffinityFinderTestCase(unittest.TestCase): def setUp(self): self.dl = DataLoader() self.dl.load_dataset_from('./data/tests/issuu_test_affinity_finder.json') self.af = AffinityFinder(self.dl.dicts) def list_equal(self,l1,l2): print(l1) print(l2) return len(l1)==len(l2) and sorted(l1)==sorted(l2) def test_readers_of(self): list_result = ["0000","1111"] readers_of = self.af.readers_of("aaaa") self.assertTrue(self.list_equal(list_result,readers_of)) def test_has_read(self): list_result = ["aaaa","bbbb"] has_read = self.af.has_read("1111") self.assertTrue(self.list_equal(list_result,has_read)) def test_also_likes_no_user(self): list_result = ["aaaa","bbbb"] also_likes = self.af.also_likes("aaaa") self.assertTrue(self.list_equal(list_result,also_likes)) def test_also_likes_user0(self): list_result = ["aaaa","bbbb"] also_likes = self.af.also_likes("aaaa","0000") self.assertTrue(self.list_equal(list_result,also_likes)) def test_also_likes_user1(self): list_result = ["bbbb"] also_likes = self.af.also_likes("bbbb","1111") self.assertTrue(self.list_equal(list_result,also_likes))
class DataVisualiserTestCase(unittest.TestCase): def setUp(self): self.dl = DataLoader() self.dl.complete_load('./data/tests/issuu_test_data_visualiser.json') self.dv = DataVisualiser(self.dl.dicts) def test_histogram_countries(self): country_dict = self.dv.create_histogram_dict(self.dv.dicts, 'visitor_country') dict_result = {'MX': 2, 'FR': 1} self.assertEqual(country_dict, dict_result) def test_histogram_continents(self): cont_dict = self.dv.create_histogram_dict(self.dv.dicts, 'visitor_continent') dict_result = {'North America': 2, 'Europe': 1} self.assertEqual(cont_dict, dict_result) def test_histogram_browsers_verbose(self): vbrowsers_dict = self.dv.create_histogram_dict(self.dv.dicts, 'visitor_useragent') dict_result = { 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36': 1, 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/540.36 (KHTML, like Gecko) Chrome/34.0.1750.112 Safari/538.36': 1, 'P3P Validator': 1 } self.assertEqual(vbrowsers_dict, dict_result) def test_histogram_browsers_trimmed(self): tbrowsers_dict = self.dv.create_histogram_dict( self.dv.dicts, 'visitor_useragent_trimmed') dict_result = {'Mozilla Safari': 2, 'P3P Validator': 1} self.assertEqual(tbrowsers_dict, dict_result)
def setUp(self): self.dl = DataLoader() self.dl.load_dataset_from('./data/tests/issuu_test_data_loader.json')
class DataLoaderTestCase(unittest.TestCase): def setUp(self): self.dl = DataLoader() self.dl.load_dataset_from('./data/tests/issuu_test_data_loader.json') def test_load_dataset(self): dict_result1 = { "visitor_useragent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36", "visitor_country": "MX" } dict_result2 = { "visitor_useragent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/540.36 (KHTML, like Gecko) Chrome/34.0.1750.112 Safari/538.36", "visitor_country": "MX" } dict_result3 = { "visitor_useragent": "P3P Validator", "visitor_country": "FR" } self.assertEqual(self.dl.dicts[0], dict_result1) self.assertEqual(self.dl.dicts[1], dict_result2) self.assertEqual(self.dl.dicts[2], dict_result3) def test_trimmed_browser_regex_match(self): test_dict = self.dl.dicts[0] self.dl.add_trimmed_browser(test_dict) self.assertEqual(test_dict['visitor_useragent_trimmed'], 'Mozilla Safari') def test_trimmed_browser_no_regex_match(self): test_dict = self.dl.dicts[2] self.dl.add_trimmed_browser(test_dict) self.assertEqual(test_dict['visitor_useragent_trimmed'], 'P3P Validator') def test_map_trim_browser(self): self.dl.map_trim() dict_result1 = { "visitor_useragent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36", "visitor_country": "MX", "visitor_useragent_trimmed": "Mozilla Safari" } dict_result2 = { "visitor_useragent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/540.36 (KHTML, like Gecko) Chrome/34.0.1750.112 Safari/538.36", "visitor_country": "MX", "visitor_useragent_trimmed": "Mozilla Safari" } dict_result3 = { "visitor_useragent": "P3P Validator", "visitor_country": "FR", "visitor_useragent_trimmed": "P3P Validator" } self.assertEqual(self.dl.dicts[0], dict_result1) self.assertEqual(self.dl.dicts[1], dict_result2) self.assertEqual(self.dl.dicts[2], dict_result3) def test_continent(self): test_dict1 = self.dl.dicts[0] test_dict2 = self.dl.dicts[2] self.dl.add_continent(test_dict1) self.dl.add_continent(test_dict2) self.assertEqual(test_dict1['visitor_continent'], 'North America') self.assertEqual(test_dict2['visitor_continent'], 'Europe') def test_map_add_continent(self): self.dl.map_continents() dict_result1 = { "visitor_useragent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36", "visitor_country": "MX", "visitor_continent": "North America" } dict_result2 = { "visitor_useragent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/540.36 (KHTML, like Gecko) Chrome/34.0.1750.112 Safari/538.36", "visitor_country": "MX", "visitor_continent": "North America" } dict_result3 = { "visitor_useragent": "P3P Validator", "visitor_country": "FR", "visitor_continent": "Europe" } self.assertEqual(self.dl.dicts[0], dict_result1) self.assertEqual(self.dl.dicts[1], dict_result2) self.assertEqual(self.dl.dicts[2], dict_result3) def test_complete_load(self): dl = DataLoader() dl.complete_load('./data/tests/issuu_test_data_loader.json') dict_result1 = { "visitor_useragent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36", "visitor_country": "MX", "visitor_useragent_trimmed": "Mozilla Safari", "visitor_continent": "North America" } dict_result2 = { "visitor_useragent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/540.36 (KHTML, like Gecko) Chrome/34.0.1750.112 Safari/538.36", "visitor_country": "MX", "visitor_useragent_trimmed": "Mozilla Safari", "visitor_continent": "North America" } dict_result3 = { "visitor_useragent": "P3P Validator", "visitor_country": "FR", "visitor_useragent_trimmed": "P3P Validator", "visitor_continent": "Europe" } self.assertEqual(dl.dicts[0], dict_result1) self.assertEqual(dl.dicts[1], dict_result2) self.assertEqual(dl.dicts[2], dict_result3)
def main(): # COMPONENTS CREATION # =================== dl = DataLoader() # ARGUMENTS PARSING # ================= parser = argparse.ArgumentParser(description='ISSUU Tracker') parser.add_argument("-u","--user_uuid", help = "UUID of the user") parser.add_argument("-d","--doc_uuid", help = "UUID of the document") parser.add_argument("-t","--task_id", help = "ID of the task (2a,2b,3a,3b,4d,5,6)") parser.add_argument("-f","--file_name", help = "JSON file of the dataset") args = parser.parse_args() if args.task_id == "2a": dl.complete_load(args.file_name) dv = DataVisualiser(dl.dicts) dv.plot_countries() elif args.task_id == "2b": dl.complete_load(args.file_name) dv = DataVisualiser(dl.dicts) dv.plot_continents() elif args.task_id == "3a": dl.complete_load(args.file_name) dv = DataVisualiser(dl.dicts) dv.plot_browsers_verbose() elif args.task_id == "3b": dl.complete_load(args.file_name) dv = DataVisualiser(dl.dicts) dv.plot_browsers() elif args.task_id == "4d": dl.load_dataset_from(args.file_name) af = AffinityFinder(dl.dicts) print(af.also_likes(args.doc_uuid)) elif args.task_id == "5": dl.load_dataset_from(args.file_name) gh = GraphHandler(dicts=dl.dicts) graph = gh.create_graph(base_document_uuid=args.doc_uuid,base_visitor_uuid=args.user_uuid) elif args.task_id == "6": gui = GUI() gui.mainloop()
Dictionaries loaded by a DataLoader. Default None (instance variable) ''' if dicts is None: dicts = self.dicts self.plot_feature(dicts, 'visitor_useragent', 'Browser', 'Count') def plot_browsers(self, dicts=None): ''' Plot a histogram of the home continents of the visitors. Parameters ========== dicts: dictionary list Dictionaries loaded by a DataLoader. Default None (instance variable) ''' if dicts is None: dicts = self.dicts self.plot_feature(dicts, 'visitor_useragent_trimmed', 'Browser', 'Count') if __name__ == "__main__": # IMPORTS TO TEST from IssuuTracker.data_loader import DataLoader, path_base_dataset dl_full = DataLoader() dl_full.complete_load(path_base_dataset) dv = DataVisualiser(dl_full.dicts) dv.plot_countries() dv.plot_continents() dv.plot_browsers()
# Sorting by frequency sort_list = sorted(docs_list, key=counts.get, reverse=True) # Removing duplicates sort_list = list(set(sort_list)) if doc_uuid in sort_list: sort_list.remove(doc_uuid) return [doc_uuid] + sort_list[:9] def id(self, docs_list, doc_uuid): return docs_list if __name__ == "__main__": # IMPORTS TO TEST from IssuuTracker.data_loader import DataLoader, path_base_dataset, path_100k_dataset dl_full = DataLoader() dl_full.load_dataset_from(path_base_dataset) af = AffinityFinder(dl_full.dicts) # Test 1 # print(af.readers_of_list('130228184234-6fd07690237d48aaa7be4e20cb767b13')) # print(af.has_read_list('bd378ce6df7cb9cd')) # print(af.also_likes_list('120928161916-bbf9b86bb865460a8e674d5338115a18')) # Test 2 print(af.has_read('2f63e0cca690da91')) print(af.also_likes('140219141540-c900b41f845c67cc08b58911155c681c')) # dl_100k = DataLoader() # dl_100k.load_dataset_from(path_100k_dataset)
graph.attr('node',style='filled',color='green') if not(document_uuid in graph.node_attr): graph.node(document_uuid[-4:]) graph.edge(visitor_uuid[-4:],document_uuid[-4:]) else: graph.edge(visitor_uuid[-4:],document_uuid[-4:]) graph.attr('node',style='solid',color='black') if __name__ == "__main__": # IMPORT TESTS from IssuuTracker.data_loader import DataLoader,path_base_dataset,path_100k_dataset,path_400k_dataset,path_600k_dataset,path_3m_dataset # BASE DATASET TESTS # ================== dl_base = DataLoader() dl_base.load_dataset_from(path_base_dataset) gh = GraphHandler(dl_base.dicts) # gh.create_graph(dl_base.df,'bd378ce6df7cb9cd','130228184234-6fd07690237d48aaa7be4e20cb767b13') # gh.create_graph(dl_base.dicts,'2f63e0cca690da91','140219141540-c900b41f845c67cc08b58911155c681c') # # 100K DATASET TESTS # # ================== dl_100k = DataLoader() dl_100k.load_dataset_from(path_100k_dataset) gh = GraphHandler(dicts=dl_100k.dicts) # gh.create_graph(base_visitor_uuid='00000000deadbeef',base_document_uuid='100806162735-00000000115598650cb8b514246272b5') gh.create_graph(base_visitor_uuid='00000000deadbeef',base_document_uuid='aaaaaaaaaaaa-00000000df1ad06a86c40000000feadbe') # # 400K DATASET TESTS
class GUI(tk.Tk): ''' GUI is a subclass of tk.Tk and holds as instance variables ''' def __init__(self, *args, **kwargs): tk.Tk.__init__(self, *args, **kwargs) self.al_list = [] self.dl = DataLoader() self.dv = None self.af = None self.gh = None # GRAPHICAL INITIALISATION # ======================== self.title("ISSUU Tracker") self.geometry("520x330") self.grid_rowconfigure(1, weight=1) self.grid_columnconfigure(0, weight=1) self.df_frame = tk.Frame(self, padx=3, pady=10) self.stat_frame = tk.Frame(self, padx=3, pady=10) self.uuid_frame = tk.Frame(self, padx=3, pady=10) self.al_frame = tk.Frame(self, padx=3, pady=10) self.df_frame.grid(row=0) self.stat_frame.grid(row=1) self.uuid_frame.grid(row=2) self.al_frame.grid(row=3) self.visitor_uuid_entry = tk.Entry(self.uuid_frame) self.visitor_uuid_entry.insert("end", "2f63e0cca690da91") self.visitor_uuid_entry.grid(column=2, row=2) self.document_uuid_entry = tk.Entry(self.uuid_frame) self.document_uuid_entry.insert( "end", "140219141540-c900b41f845c67cc08b58911155c681c") self.document_uuid_entry.grid(column=2, row=3) self.al_label_list = [] self.init_df_frame() self.init_stat_frame() self.init_uuid_frame() self.init_al_frame() # Initialisation Functions: # ========================= def init_df_frame(self): label_df = tk.Label(self.df_frame, text="Dataset") label_df.grid(column=0, columnspan=10, row=0) load_dataset_button = tk.Button(self.df_frame, text="Choose Dataset", command=self.load_dataset) load_dataset_button.grid(column=0, row=1) def init_stat_frame(self): ''' Initialise the different buttons and labels of the stat frame. ''' label_stat = tk.Label(self.stat_frame, text="Statistical visualisations") label_stat.grid(column=0, columnspan=10, row=0) plot_countries_button = tk.Button(self.stat_frame, text="Plot Countries", command=self.gui_plot_countries) plot_countries_button.grid(column=0, row=1) plot_continents_button = tk.Button(self.stat_frame, text="Plot Continents", command=self.gui_plot_continents) plot_continents_button.grid(column=1, row=1) plot_browsers_verbose_button = tk.Button( self.stat_frame, text="Plot Browsers (Verbose)", command=self.gui_plot_browsers_verbose) plot_browsers_verbose_button.grid(column=2, row=1) plot_browsers_button = tk.Button(self.stat_frame, text="Plot Browsers", command=self.gui_plot_browsers) plot_browsers_button.grid(column=3, row=1) def init_uuid_frame(self): ''' Initialise the different labels of the uuid frame. Note that the entries have been put as instance variables to access their content. ''' label_uuid = tk.Label(self.uuid_frame, text="UUIDs") label_uuid.grid(column=0, columnspan=10, row=0) visitor_uuid_label = tk.Label(self.uuid_frame, text="Visitor UUID: ") visitor_uuid_label.grid(column=1, row=2) document_uuid_label = tk.Label(self.uuid_frame, text="Document UUID: ") document_uuid_label.grid(column=1, row=3) def init_al_frame(self): ''' Initialise the different buttons of the also likes frame. ''' al_uuid = tk.Label(self.al_frame, text="Also Likes") al_uuid.grid(column=0, columnspan=10, row=0) al_list_button = tk.Button(self.al_frame, text="List Documents", command=self.gui_also_likes_list) al_list_button.grid(column=0, row=1) al_graph_button = tk.Button(self.al_frame, text="Plot Graph", command=self.gui_also_likes_graph) al_graph_button.grid(column=0, row=2) # Functions: # ========== def load_dataset(self): ''' Open a file dialog and allow the user to choose a file. Initialise the different component by loading it. ''' filename = filedialog.askopenfilename(title="Select file", filetypes=(("json files", "*.json"), ("all files", "*.*"))) if filename.endswith('.json'): self.dl.complete_load(filename) self.dv = DataVisualiser(self.dl.dicts) self.af = AffinityFinder(self.dl.dicts) self.gh = GraphHandler(self.dl.dicts) # DF FRAME FUNCTIONS def gui_plot_countries(self): ''' Plot the countries of the users of the dataset ''' self.dv.plot_countries() def gui_plot_continents(self): ''' Plot the continents of the users of the dataset ''' self.dv.plot_continents() def gui_plot_browsers_verbose(self): ''' Plot the full visitor_user_agent of the users of the dataset. ''' self.dv.plot_browsers_verbose() def gui_plot_browsers(self): ''' Plot the trimmed visitor_user_agent of the users of the dataset. ''' self.dv.plot_browsers() def gui_also_likes_list(self): ''' Display the list of "also liked" documents as labels next to the button. ''' visitor_uuid = self.visitor_uuid_entry.get() document_uuid = self.document_uuid_entry.get() al_list = self.af.also_likes(document_uuid, visitor_uuid) for elt in self.al_label_list: elt.destroy() for i, doc in enumerate(al_list): al_doc = tk.Label(self.al_frame, text=doc) al_doc.grid(column=1, columnspan=10, row=i + 1) self.al_label_list.append(al_doc) def gui_also_likes_graph(self): ''' Generate and display the "also likes" graph. ''' visitor_uuid = self.visitor_uuid_entry.get() document_uuid = self.document_uuid_entry.get() self.gh.reset_graph() self.gh.create_graph(base_visitor_uuid=visitor_uuid, base_document_uuid=document_uuid)
def setUp(self): self.dl = DataLoader() self.dl.load_dataset_from('./data/tests/issuu_test_affinity_finder.json') self.af = AffinityFinder(self.dl.dicts)
def setUp(self): self.dl = DataLoader() self.dl.complete_load('./data/tests/issuu_test_data_visualiser.json') self.dv = DataVisualiser(self.dl.dicts)
def setUp(self): self.dl = DataLoader() self.dl.load_dataset_from('./data/tests/issuu_test_graph_handler.json') self.gh = GraphHandler(self.dl.dicts)