コード例 #1
0
 def test_complete_load(self):
     dl = DataLoader()
     dl.complete_load('./data/tests/issuu_test_data_loader.json')
     dict_result1 = {
         "visitor_useragent":
         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36",
         "visitor_country": "MX",
         "visitor_useragent_trimmed": "Mozilla Safari",
         "visitor_continent": "North America"
     }
     dict_result2 = {
         "visitor_useragent":
         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/540.36 (KHTML, like Gecko) Chrome/34.0.1750.112 Safari/538.36",
         "visitor_country": "MX",
         "visitor_useragent_trimmed": "Mozilla Safari",
         "visitor_continent": "North America"
     }
     dict_result3 = {
         "visitor_useragent": "P3P Validator",
         "visitor_country": "FR",
         "visitor_useragent_trimmed": "P3P Validator",
         "visitor_continent": "Europe"
     }
     self.assertEqual(dl.dicts[0], dict_result1)
     self.assertEqual(dl.dicts[1], dict_result2)
     self.assertEqual(dl.dicts[2], dict_result3)
コード例 #2
0
class GraphHandlerTestCase(unittest.TestCase):
    def setUp(self):
        self.dl = DataLoader()
        self.dl.load_dataset_from('./data/tests/issuu_test_graph_handler.json')
        self.gh = GraphHandler(self.dl.dicts)

    def test_file_created(self):
        self.gh.create_graph(base_visitor_uuid="0000",
                             base_document_uuid="aaaa",
                             render=False)
        import os.path
        self.assertTrue(os.path.exists('graphs/alaaaa.dot'))
        self.assertTrue(os.path.exists('graphs/alaaaa.dot.pdf'))
コード例 #3
0
    def __init__(self, *args, **kwargs):
        tk.Tk.__init__(self, *args, **kwargs)

        self.al_list = []
        self.dl = DataLoader()
        self.dv = None
        self.af = None
        self.gh = None
        # GRAPHICAL INITIALISATION
        # ========================

        self.title("ISSUU Tracker")
        self.geometry("520x330")
        self.grid_rowconfigure(1, weight=1)
        self.grid_columnconfigure(0, weight=1)

        self.df_frame = tk.Frame(self, padx=3, pady=10)
        self.stat_frame = tk.Frame(self, padx=3, pady=10)
        self.uuid_frame = tk.Frame(self, padx=3, pady=10)
        self.al_frame = tk.Frame(self, padx=3, pady=10)
        self.df_frame.grid(row=0)
        self.stat_frame.grid(row=1)
        self.uuid_frame.grid(row=2)
        self.al_frame.grid(row=3)

        self.visitor_uuid_entry = tk.Entry(self.uuid_frame)
        self.visitor_uuid_entry.insert("end", "2f63e0cca690da91")
        self.visitor_uuid_entry.grid(column=2, row=2)
        self.document_uuid_entry = tk.Entry(self.uuid_frame)
        self.document_uuid_entry.insert(
            "end", "140219141540-c900b41f845c67cc08b58911155c681c")
        self.document_uuid_entry.grid(column=2, row=3)

        self.al_label_list = []

        self.init_df_frame()
        self.init_stat_frame()
        self.init_uuid_frame()
        self.init_al_frame()
コード例 #4
0
class AffinityFinderTestCase(unittest.TestCase):

    def setUp(self):
        self.dl = DataLoader()
        self.dl.load_dataset_from('./data/tests/issuu_test_affinity_finder.json')
        self.af = AffinityFinder(self.dl.dicts)

    def list_equal(self,l1,l2):
        print(l1)
        print(l2)
        return len(l1)==len(l2) and sorted(l1)==sorted(l2)


    def test_readers_of(self):
        list_result = ["0000","1111"]
        readers_of = self.af.readers_of("aaaa")
        self.assertTrue(self.list_equal(list_result,readers_of))

    def test_has_read(self):
        list_result = ["aaaa","bbbb"]
        has_read = self.af.has_read("1111")
        self.assertTrue(self.list_equal(list_result,has_read))

    def test_also_likes_no_user(self):
        list_result = ["aaaa","bbbb"]
        also_likes = self.af.also_likes("aaaa")
        self.assertTrue(self.list_equal(list_result,also_likes))

    def test_also_likes_user0(self):
        list_result = ["aaaa","bbbb"]
        also_likes = self.af.also_likes("aaaa","0000")
        self.assertTrue(self.list_equal(list_result,also_likes))

    def test_also_likes_user1(self):
        list_result = ["bbbb"]
        also_likes = self.af.also_likes("bbbb","1111")
        self.assertTrue(self.list_equal(list_result,also_likes))
コード例 #5
0
class DataVisualiserTestCase(unittest.TestCase):
    def setUp(self):
        self.dl = DataLoader()
        self.dl.complete_load('./data/tests/issuu_test_data_visualiser.json')
        self.dv = DataVisualiser(self.dl.dicts)

    def test_histogram_countries(self):
        country_dict = self.dv.create_histogram_dict(self.dv.dicts,
                                                     'visitor_country')
        dict_result = {'MX': 2, 'FR': 1}
        self.assertEqual(country_dict, dict_result)

    def test_histogram_continents(self):
        cont_dict = self.dv.create_histogram_dict(self.dv.dicts,
                                                  'visitor_continent')
        dict_result = {'North America': 2, 'Europe': 1}
        self.assertEqual(cont_dict, dict_result)

    def test_histogram_browsers_verbose(self):
        vbrowsers_dict = self.dv.create_histogram_dict(self.dv.dicts,
                                                       'visitor_useragent')
        dict_result = {
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36':
            1,
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/540.36 (KHTML, like Gecko) Chrome/34.0.1750.112 Safari/538.36':
            1,
            'P3P Validator':
            1
        }
        self.assertEqual(vbrowsers_dict, dict_result)

    def test_histogram_browsers_trimmed(self):
        tbrowsers_dict = self.dv.create_histogram_dict(
            self.dv.dicts, 'visitor_useragent_trimmed')
        dict_result = {'Mozilla Safari': 2, 'P3P Validator': 1}
        self.assertEqual(tbrowsers_dict, dict_result)
コード例 #6
0
 def setUp(self):
     self.dl = DataLoader()
     self.dl.load_dataset_from('./data/tests/issuu_test_data_loader.json')
コード例 #7
0
class DataLoaderTestCase(unittest.TestCase):
    def setUp(self):
        self.dl = DataLoader()
        self.dl.load_dataset_from('./data/tests/issuu_test_data_loader.json')

    def test_load_dataset(self):
        dict_result1 = {
            "visitor_useragent":
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36",
            "visitor_country": "MX"
        }
        dict_result2 = {
            "visitor_useragent":
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/540.36 (KHTML, like Gecko) Chrome/34.0.1750.112 Safari/538.36",
            "visitor_country": "MX"
        }
        dict_result3 = {
            "visitor_useragent": "P3P Validator",
            "visitor_country": "FR"
        }
        self.assertEqual(self.dl.dicts[0], dict_result1)
        self.assertEqual(self.dl.dicts[1], dict_result2)
        self.assertEqual(self.dl.dicts[2], dict_result3)

    def test_trimmed_browser_regex_match(self):
        test_dict = self.dl.dicts[0]
        self.dl.add_trimmed_browser(test_dict)
        self.assertEqual(test_dict['visitor_useragent_trimmed'],
                         'Mozilla Safari')

    def test_trimmed_browser_no_regex_match(self):
        test_dict = self.dl.dicts[2]
        self.dl.add_trimmed_browser(test_dict)
        self.assertEqual(test_dict['visitor_useragent_trimmed'],
                         'P3P Validator')

    def test_map_trim_browser(self):
        self.dl.map_trim()
        dict_result1 = {
            "visitor_useragent":
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36",
            "visitor_country": "MX",
            "visitor_useragent_trimmed": "Mozilla Safari"
        }
        dict_result2 = {
            "visitor_useragent":
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/540.36 (KHTML, like Gecko) Chrome/34.0.1750.112 Safari/538.36",
            "visitor_country": "MX",
            "visitor_useragent_trimmed": "Mozilla Safari"
        }
        dict_result3 = {
            "visitor_useragent": "P3P Validator",
            "visitor_country": "FR",
            "visitor_useragent_trimmed": "P3P Validator"
        }
        self.assertEqual(self.dl.dicts[0], dict_result1)
        self.assertEqual(self.dl.dicts[1], dict_result2)
        self.assertEqual(self.dl.dicts[2], dict_result3)

    def test_continent(self):
        test_dict1 = self.dl.dicts[0]
        test_dict2 = self.dl.dicts[2]
        self.dl.add_continent(test_dict1)
        self.dl.add_continent(test_dict2)
        self.assertEqual(test_dict1['visitor_continent'], 'North America')
        self.assertEqual(test_dict2['visitor_continent'], 'Europe')

    def test_map_add_continent(self):
        self.dl.map_continents()
        dict_result1 = {
            "visitor_useragent":
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36",
            "visitor_country": "MX",
            "visitor_continent": "North America"
        }
        dict_result2 = {
            "visitor_useragent":
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/540.36 (KHTML, like Gecko) Chrome/34.0.1750.112 Safari/538.36",
            "visitor_country": "MX",
            "visitor_continent": "North America"
        }
        dict_result3 = {
            "visitor_useragent": "P3P Validator",
            "visitor_country": "FR",
            "visitor_continent": "Europe"
        }
        self.assertEqual(self.dl.dicts[0], dict_result1)
        self.assertEqual(self.dl.dicts[1], dict_result2)
        self.assertEqual(self.dl.dicts[2], dict_result3)

    def test_complete_load(self):
        dl = DataLoader()
        dl.complete_load('./data/tests/issuu_test_data_loader.json')
        dict_result1 = {
            "visitor_useragent":
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36",
            "visitor_country": "MX",
            "visitor_useragent_trimmed": "Mozilla Safari",
            "visitor_continent": "North America"
        }
        dict_result2 = {
            "visitor_useragent":
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/540.36 (KHTML, like Gecko) Chrome/34.0.1750.112 Safari/538.36",
            "visitor_country": "MX",
            "visitor_useragent_trimmed": "Mozilla Safari",
            "visitor_continent": "North America"
        }
        dict_result3 = {
            "visitor_useragent": "P3P Validator",
            "visitor_country": "FR",
            "visitor_useragent_trimmed": "P3P Validator",
            "visitor_continent": "Europe"
        }
        self.assertEqual(dl.dicts[0], dict_result1)
        self.assertEqual(dl.dicts[1], dict_result2)
        self.assertEqual(dl.dicts[2], dict_result3)
コード例 #8
0
ファイル: __main__.py プロジェクト: QDucasse/IssuuTracker
def main():
    # COMPONENTS CREATION
    # ===================
    dl = DataLoader()

    # ARGUMENTS PARSING
    # =================
    parser = argparse.ArgumentParser(description='ISSUU Tracker')
    parser.add_argument("-u","--user_uuid", help = "UUID of the user")
    parser.add_argument("-d","--doc_uuid",  help = "UUID of the document")
    parser.add_argument("-t","--task_id",   help = "ID of the task (2a,2b,3a,3b,4d,5,6)")
    parser.add_argument("-f","--file_name", help = "JSON file of the dataset")
    args = parser.parse_args()

    if args.task_id == "2a":
        dl.complete_load(args.file_name)
        dv = DataVisualiser(dl.dicts)
        dv.plot_countries()

    elif args.task_id == "2b":
        dl.complete_load(args.file_name)
        dv = DataVisualiser(dl.dicts)
        dv.plot_continents()

    elif args.task_id == "3a":
        dl.complete_load(args.file_name)
        dv = DataVisualiser(dl.dicts)
        dv.plot_browsers_verbose()

    elif args.task_id == "3b":
        dl.complete_load(args.file_name)
        dv = DataVisualiser(dl.dicts)
        dv.plot_browsers()

    elif args.task_id == "4d":
        dl.load_dataset_from(args.file_name)
        af = AffinityFinder(dl.dicts)
        print(af.also_likes(args.doc_uuid))

    elif args.task_id == "5":
        dl.load_dataset_from(args.file_name)
        gh = GraphHandler(dicts=dl.dicts)
        graph = gh.create_graph(base_document_uuid=args.doc_uuid,base_visitor_uuid=args.user_uuid)

    elif args.task_id == "6":
        gui = GUI()
        gui.mainloop()
コード例 #9
0
            Dictionaries loaded by a DataLoader. Default None (instance variable)
        '''
        if dicts is None:
            dicts = self.dicts
        self.plot_feature(dicts, 'visitor_useragent', 'Browser', 'Count')

    def plot_browsers(self, dicts=None):
        '''
        Plot a histogram of the home continents of the visitors.
        Parameters
        ==========
        dicts: dictionary list
            Dictionaries loaded by a DataLoader. Default None (instance variable)
        '''
        if dicts is None:
            dicts = self.dicts
        self.plot_feature(dicts, 'visitor_useragent_trimmed', 'Browser',
                          'Count')


if __name__ == "__main__":
    # IMPORTS TO TEST
    from IssuuTracker.data_loader import DataLoader, path_base_dataset
    dl_full = DataLoader()
    dl_full.complete_load(path_base_dataset)

    dv = DataVisualiser(dl_full.dicts)
    dv.plot_countries()
    dv.plot_continents()
    dv.plot_browsers()
コード例 #10
0
        # Sorting by frequency
        sort_list = sorted(docs_list, key=counts.get, reverse=True)
        # Removing duplicates
        sort_list = list(set(sort_list))
        if doc_uuid in sort_list:
            sort_list.remove(doc_uuid)
        return [doc_uuid] + sort_list[:9]

    def id(self, docs_list, doc_uuid):
        return docs_list


if __name__ == "__main__":
    # IMPORTS TO TEST
    from IssuuTracker.data_loader import DataLoader, path_base_dataset, path_100k_dataset
    dl_full = DataLoader()
    dl_full.load_dataset_from(path_base_dataset)

    af = AffinityFinder(dl_full.dicts)

    # Test 1
    # print(af.readers_of_list('130228184234-6fd07690237d48aaa7be4e20cb767b13'))
    # print(af.has_read_list('bd378ce6df7cb9cd'))
    # print(af.also_likes_list('120928161916-bbf9b86bb865460a8e674d5338115a18'))

    # Test 2
    print(af.has_read('2f63e0cca690da91'))
    print(af.also_likes('140219141540-c900b41f845c67cc08b58911155c681c'))

    # dl_100k = DataLoader()
    # dl_100k.load_dataset_from(path_100k_dataset)
コード例 #11
0
            graph.attr('node',style='filled',color='green')

        if not(document_uuid in graph.node_attr):
            graph.node(document_uuid[-4:])
            graph.edge(visitor_uuid[-4:],document_uuid[-4:])
        else:
            graph.edge(visitor_uuid[-4:],document_uuid[-4:])

        graph.attr('node',style='solid',color='black')

if __name__ == "__main__":
    # IMPORT TESTS
    from IssuuTracker.data_loader import DataLoader,path_base_dataset,path_100k_dataset,path_400k_dataset,path_600k_dataset,path_3m_dataset
    # BASE DATASET TESTS
    # ==================
    dl_base = DataLoader()
    dl_base.load_dataset_from(path_base_dataset)
    gh = GraphHandler(dl_base.dicts)
    # gh.create_graph(dl_base.df,'bd378ce6df7cb9cd','130228184234-6fd07690237d48aaa7be4e20cb767b13')
    # gh.create_graph(dl_base.dicts,'2f63e0cca690da91','140219141540-c900b41f845c67cc08b58911155c681c')

    # # 100K DATASET TESTS
    # # ==================
    dl_100k = DataLoader()
    dl_100k.load_dataset_from(path_100k_dataset)
    gh = GraphHandler(dicts=dl_100k.dicts)
    # gh.create_graph(base_visitor_uuid='00000000deadbeef',base_document_uuid='100806162735-00000000115598650cb8b514246272b5')
    gh.create_graph(base_visitor_uuid='00000000deadbeef',base_document_uuid='aaaaaaaaaaaa-00000000df1ad06a86c40000000feadbe')


    # # 400K DATASET TESTS
コード例 #12
0
class GUI(tk.Tk):
    '''
    GUI is a subclass of tk.Tk and holds as instance variables
    '''
    def __init__(self, *args, **kwargs):
        tk.Tk.__init__(self, *args, **kwargs)

        self.al_list = []
        self.dl = DataLoader()
        self.dv = None
        self.af = None
        self.gh = None
        # GRAPHICAL INITIALISATION
        # ========================

        self.title("ISSUU Tracker")
        self.geometry("520x330")
        self.grid_rowconfigure(1, weight=1)
        self.grid_columnconfigure(0, weight=1)

        self.df_frame = tk.Frame(self, padx=3, pady=10)
        self.stat_frame = tk.Frame(self, padx=3, pady=10)
        self.uuid_frame = tk.Frame(self, padx=3, pady=10)
        self.al_frame = tk.Frame(self, padx=3, pady=10)
        self.df_frame.grid(row=0)
        self.stat_frame.grid(row=1)
        self.uuid_frame.grid(row=2)
        self.al_frame.grid(row=3)

        self.visitor_uuid_entry = tk.Entry(self.uuid_frame)
        self.visitor_uuid_entry.insert("end", "2f63e0cca690da91")
        self.visitor_uuid_entry.grid(column=2, row=2)
        self.document_uuid_entry = tk.Entry(self.uuid_frame)
        self.document_uuid_entry.insert(
            "end", "140219141540-c900b41f845c67cc08b58911155c681c")
        self.document_uuid_entry.grid(column=2, row=3)

        self.al_label_list = []

        self.init_df_frame()
        self.init_stat_frame()
        self.init_uuid_frame()
        self.init_al_frame()

    # Initialisation Functions:
    # =========================
    def init_df_frame(self):

        label_df = tk.Label(self.df_frame, text="Dataset")
        label_df.grid(column=0, columnspan=10, row=0)
        load_dataset_button = tk.Button(self.df_frame,
                                        text="Choose Dataset",
                                        command=self.load_dataset)
        load_dataset_button.grid(column=0, row=1)

    def init_stat_frame(self):
        '''
        Initialise the different buttons and labels of the stat frame.
        '''
        label_stat = tk.Label(self.stat_frame,
                              text="Statistical visualisations")
        label_stat.grid(column=0, columnspan=10, row=0)

        plot_countries_button = tk.Button(self.stat_frame,
                                          text="Plot Countries",
                                          command=self.gui_plot_countries)
        plot_countries_button.grid(column=0, row=1)

        plot_continents_button = tk.Button(self.stat_frame,
                                           text="Plot Continents",
                                           command=self.gui_plot_continents)
        plot_continents_button.grid(column=1, row=1)

        plot_browsers_verbose_button = tk.Button(
            self.stat_frame,
            text="Plot Browsers (Verbose)",
            command=self.gui_plot_browsers_verbose)
        plot_browsers_verbose_button.grid(column=2, row=1)

        plot_browsers_button = tk.Button(self.stat_frame,
                                         text="Plot Browsers",
                                         command=self.gui_plot_browsers)
        plot_browsers_button.grid(column=3, row=1)

    def init_uuid_frame(self):
        '''
        Initialise the different labels of the uuid frame. Note that the entries
        have been put as instance variables to access their content.
        '''
        label_uuid = tk.Label(self.uuid_frame, text="UUIDs")
        label_uuid.grid(column=0, columnspan=10, row=0)

        visitor_uuid_label = tk.Label(self.uuid_frame, text="Visitor UUID: ")
        visitor_uuid_label.grid(column=1, row=2)

        document_uuid_label = tk.Label(self.uuid_frame, text="Document UUID: ")
        document_uuid_label.grid(column=1, row=3)

    def init_al_frame(self):
        '''
        Initialise the different buttons of the also likes frame.
        '''
        al_uuid = tk.Label(self.al_frame, text="Also Likes")
        al_uuid.grid(column=0, columnspan=10, row=0)

        al_list_button = tk.Button(self.al_frame,
                                   text="List Documents",
                                   command=self.gui_also_likes_list)
        al_list_button.grid(column=0, row=1)

        al_graph_button = tk.Button(self.al_frame,
                                    text="Plot Graph",
                                    command=self.gui_also_likes_graph)
        al_graph_button.grid(column=0, row=2)

    # Functions:
    # ==========
    def load_dataset(self):
        '''
        Open a file dialog and allow the user to choose a file. Initialise the different
        component by loading it.
        '''
        filename = filedialog.askopenfilename(title="Select file",
                                              filetypes=(("json files",
                                                          "*.json"),
                                                         ("all files", "*.*")))
        if filename.endswith('.json'):
            self.dl.complete_load(filename)
            self.dv = DataVisualiser(self.dl.dicts)
            self.af = AffinityFinder(self.dl.dicts)
            self.gh = GraphHandler(self.dl.dicts)

    # DF FRAME FUNCTIONS
    def gui_plot_countries(self):
        '''
        Plot the countries of the users of the dataset
        '''
        self.dv.plot_countries()

    def gui_plot_continents(self):
        '''
        Plot the continents of the users of the dataset
        '''
        self.dv.plot_continents()

    def gui_plot_browsers_verbose(self):
        '''
        Plot the full visitor_user_agent of the users of the dataset.
        '''
        self.dv.plot_browsers_verbose()

    def gui_plot_browsers(self):
        '''
        Plot the trimmed visitor_user_agent of the users of the dataset.
        '''
        self.dv.plot_browsers()

    def gui_also_likes_list(self):
        '''
        Display the list of "also liked" documents as labels next to the button.
        '''
        visitor_uuid = self.visitor_uuid_entry.get()
        document_uuid = self.document_uuid_entry.get()
        al_list = self.af.also_likes(document_uuid, visitor_uuid)
        for elt in self.al_label_list:
            elt.destroy()

        for i, doc in enumerate(al_list):
            al_doc = tk.Label(self.al_frame, text=doc)
            al_doc.grid(column=1, columnspan=10, row=i + 1)
            self.al_label_list.append(al_doc)

    def gui_also_likes_graph(self):
        '''
        Generate and display the "also likes" graph.
        '''
        visitor_uuid = self.visitor_uuid_entry.get()
        document_uuid = self.document_uuid_entry.get()
        self.gh.reset_graph()
        self.gh.create_graph(base_visitor_uuid=visitor_uuid,
                             base_document_uuid=document_uuid)
コード例 #13
0
 def setUp(self):
     self.dl = DataLoader()
     self.dl.load_dataset_from('./data/tests/issuu_test_affinity_finder.json')
     self.af = AffinityFinder(self.dl.dicts)
コード例 #14
0
 def setUp(self):
     self.dl = DataLoader()
     self.dl.complete_load('./data/tests/issuu_test_data_visualiser.json')
     self.dv = DataVisualiser(self.dl.dicts)
コード例 #15
0
 def setUp(self):
     self.dl = DataLoader()
     self.dl.load_dataset_from('./data/tests/issuu_test_graph_handler.json')
     self.gh = GraphHandler(self.dl.dicts)