Exemple #1
0
 def test_thesis(self):
     workdir = get_working_dir()
     for testname in ['T001', 'T002', 'T003', 'T004', 'T005', 'T006']:
         for domain in sorted(self.journals):
             journal = self.journals[domain]
             maschine = MaschineLSH(config, journal, outdir=workdir, testname=testname)
             maschine.process()
Exemple #2
0
    def test_journal_001(self):

        test_domain = self.test_domain

        test_path = config.get(test_domain, 'check_path1')
        test_ts = config.getint(test_domain, 'check_ts11')
        test_wlid = config.get(test_domain, 'check_uid11')

        workdir = get_working_dir()
        maschine = MaschineLSH(config, self.journal, outdir=workdir)

        self.assertEqual(test_domain, self.journal.domain)
        journal_entry = maschine.journal.get_entry(test_path, test_ts)
        self.assertEqual(test_wlid, journal_entry['warc_entry'].wlid)

        cross = maschine.get_cross(journal_entry)
        self.assertEqual(
            config.get(test_domain, 'check_cross11'),
            cross.get_unique_id()
        )

        self.assertEqual((1, 1, 1, 1, 1), cross.to_bits())
        self.assertEqual(test_wlid, cross.cur['warc_entry'].wlid)

        diffmethod = DBCEMethodLSH(config, cross)
Exemple #3
0
    def test_process_001(self):
        test_domain = self.test_domain

        test_path = config.get(test_domain, 'check_path1')
        test_ts = config.getint(test_domain, 'check_ts11')
        test_wlid = config.get(test_domain, 'check_uid11')

        workdir = get_working_dir()
        for testname in ['T001', 'T002', 'T006']:
            maschine = MaschineLSH(config, self.journal, outdir=workdir, testname=testname)
            maschine.process()
Exemple #4
0
        parser.error('missing arguments')

    elif args.report_dir:
        try:
            erc = load_eval_results(config, args.report_dir)
        except:
            import sys
            import traceback
            atype, value, tb = sys.exc_info()
            traceback.print_exc()
            pdb.post_mortem(tb)

    elif args.mode == 'idx':
        journals = idx_to_journals(config, args.idx_sources)
        if args.lsh_only and not args.manual_annotate:
            workdir = get_working_dir()
            for domain in sorted(journals):
                journal = journals[domain]
                try:
                    maschine = MaschineLSH(config, journal, outdir=workdir)
                    maschine.process()
                except:
                    import sys
                    import traceback
                    atype, value, tb = sys.exc_info()
                    traceback.print_exc()
                    pdb.post_mortem(tb)

        elif args.manual_annotate:
            if len(args.domain) != 1:
                parser.error('manual annotation needs exactly one domain')