Esempio n. 1
0
    def test_creds_default_location_beneath_home(self):

        input_ = 'scrape_user -u test_user --all'.split()
        parsed = self.parser.parse_args(input_)
        parsed = parse_scraper_args(parsed, self.parser)

        self.assertEqual(parsed.creds, self.default_creds)
Esempio n. 2
0
    def test_output_dir_default(self):

        input_ = 'scrape_user -u test_user --all'.split()
        parsed = self.parser.parse_args(input_)
        parsed = parse_scraper_args(parsed, self.parser)

        self.assertEqual(parsed.output_dir, os.getcwd())
Esempio n. 3
0
    def test_implicit_mode_defaults_to_all_choices(self):

        in1 = 'scrape_user -u test_user --all'.split()
        in2 = 'scrape_user -u test_user --start 2020-01-01 --end 2020-02-01'.split(
        )
        in3 = 'scrape_publication -u test_pub --all'.split()
        in4 = 'scrape_publication -u test_pub --start 2020-01-01 --end 2020-02-01'.split(
        )

        parsed_user = [self.parser.parse_args(i) for i in [in1, in2]]
        parsed_user = [parse_scraper_args(p, self.parser) for p in parsed_user]
        for p in parsed_user:
            self.assertEqual(p.mode, USER_MODE_CHOICES)

        parsed_pub = [self.parser.parse_args(i) for i in [in3, in4]]
        parsed_pub = [parse_scraper_args(p, self.parser) for p in parsed_pub]
        for p in parsed_pub:
            self.assertEqual(p.mode, PUB_MODE_CHOICES)
Esempio n. 4
0
    def test_period_end_defaults_to_most_recent_full_day_utc(self):

        input_ = 'scrape_user -u test_user --start 2020-01-01'.split()

        now = datetime.now(timezone.utc)
        now = datetime(*now.timetuple()[:3]).replace(tzinfo=timezone.utc)
        args = self.parser.parse_args(input_)
        args = parse_scraper_args(args, self.parser)

        self.assertEqual(args.end, now)
Esempio n. 5
0
    def test_period_start_defaults_beginning_day_prior_to_end(self):

        input_d = 'scrape_user -u test_user --end 2020-02-01'.split()
        input_dt = 'scrape_user -u test_user --end 2020-02-01T12:00:00'.split()

        start = datetime.strptime('2020-01-31', '%Y-%m-%d')
        start = start.replace(tzinfo=timezone.utc)
        for i in [input_d, input_dt]:
            args = self.parser.parse_args(i)
            args = parse_scraper_args(args, self.parser)
            self.assertEqual(args.start, start)
Esempio n. 6
0
    def test_mode_flag_returns_list(self):

        input1 = 'scrape_user -u test_user --all --mode events'.split()
        input2 = 'scrape_user -u test_user --all --mode events referrers'.split(
        )
        input3 = 'scrape_user -u test_user --start 2020-01-01 --end 2020-02-01'.split(
        )

        parsed = [self.parser.parse_args(i) for i in [input1, input2, input3]]
        parsed = [parse_scraper_args(p, self.parser) for p in parsed]

        for p in parsed:
            self.assertIsInstance(p.mode, list)
Esempio n. 7
0
    def test_sid_and_uid_required_together_if_input(self):

        invalid_sid = 'scrape_user --sid foo -u test_user'.split()
        invalid_uid = 'scrape_user --uid bar -u test_user'.split()

        invalids = [invalid_sid, invalid_uid]
        invalids = [self.parser.parse_args(i) for i in invalids]

        for i in invalids:
            with self.assertRaises(SystemExit) as e:
                with capture_sys_output() as (out, err):
                    _ = parse_scraper_args(i, self.parser)
            msg = err.getvalue()
            self.assertIn('both "sid" and "uid" arguments', msg)
Esempio n. 8
0
    def test_all_and_period_flags_mutually_exclusive(self):

        invalid1 = 'scrape_user -u test_user --all --start 2020-01-01'.split()
        invalid2 = 'scrape_user -u test_user --all --end 2020-01-01'.split()
        invalid3 = 'scrape_user -u test_user --all --start 2020-01-01 --end 2020-02-01'.split(
        )

        invalids = [invalid1, invalid2, invalid3]
        invalids = [self.parser.parse_args(i) for i in invalids]

        for i in invalids:
            with self.assertRaises(SystemExit) as e:
                with capture_sys_output() as (out, err):
                    _ = parse_scraper_args(i, self.parser)
            msg = err.getvalue()
            self.assertIn('Can\'t use "--all" flag with', msg)
Esempio n. 9
0
    def test_period_set(self):

        # TODO - this hits error because creds path needs to be mocked
        invalid_cred_explicit = 'scrape_user --creds ~/.medium_creds.ini -u test_user'.split(
        )
        invalid_cred_implicit = 'scrape_user -u test_user'.split()

        invalids = [invalid_cred_explicit, invalid_cred_implicit]
        invalids = [self.parser.parse_args(i) for i in invalids]

        for i in invalids:
            with self.assertRaises(SystemExit) as e:
                with capture_sys_output() as (out, err):
                    _ = parse_scraper_args(i, self.parser)
            msg = err.getvalue()
            self.assertIn('Period must be set', msg)
Esempio n. 10
0
    def test_period_flags_obey_correct_time_order(self):

        start = '2020-01-02'
        start_dt = '2020-01-02T00:00:00'
        end = '2020-01-01'
        end_dt = '2020-01-01T00:00:00'

        input1 = f'scrape_user -u test_user --start {start} --end {end}'.split(
        )
        input2 = f'scrape_user -u test_user --start {start_dt} --end {end_dt}'.split(
        )

        args_list = [self.parser.parse_args(i) for i in [input1, input2]]
        for a in args_list:
            with self.assertRaises(SystemExit) as e:
                with capture_sys_output() as (out, err):
                    _ = parse_scraper_args(a, self.parser)
            msg = err.getvalue()
            self.assertIn('"--end" cannot be prior to "--start"', msg)
Esempio n. 11
0
def main():

    ## PARSE ARGS
    parser = get_argparser()
    args = parser.parse_args()

    # TODO - make this a plain path; not a directory argument

    command = args.command

    ## EXECUTE COMMANDS
    if command == 'fetch_cookies':
        from medium_stats.cookie_fetcher import MediumAuthorizer

        email, password = unpack_email_pwd(args)

        me = MediumAuthorizer(args.u, email, password)
        me.sign_in()
        me.save_cookies(args.creds)
        print(section_break)

    elif command in ['scrape_user', 'scrape_publication']:
        args = parse_scraper_args(args, parser)

        if args.creds:
            cfg = MediumConfigHelper(args.creds, args.u)
            sid, uid = cfg.sid, cfg.uid
        else:
            sid, uid = args.sid, args.uid

        modes = list(args.mode)

        get_folders = lambda x: [x[m]['folder'] for m in modes]

        print('\nGetting Preliminary Data...', end='\n\n')
        if command == 'scrape_user':
            username = args.u
            sg = StatGrabberUser(username,
                                 sid,
                                 uid,
                                 args.start,
                                 args.end,
                                 already_utc=True)
            folders = get_folders(user_mode_attrs)
            sub_dir = create_directories(args.output_dir, sg.slug, folders)

            # get summary stats to derive article_ids and user creation_time
            data = sg.get_summary_stats()
            articles = sg.get_article_ids(data)
            if 'summary' in modes:
                write_stats(sg, data, 'summary', sg.now, sub_dir)

        else:
            url = args.s
            sg = StatGrabberPublication(url,
                                        sid,
                                        uid,
                                        args.start,
                                        args.end,
                                        already_utc=True)
            folders = get_folders(pub_mode_attrs)
            sub_dir = create_directories(args.output_dir, sg.slug, folders)
            data = sg.get_all_story_overview()
            articles = sg.get_article_ids(data)
            if 'story_overview' in modes:
                write_stats(sg, data, 'story_overview', sg.now, sub_dir)

        # go through remainder of modes
        remaining = [
            m for m in modes if m not in ('summary', 'story_overview')
        ]
        for m in remaining:
            if m == 'events':
                data = get_stats(sg, m, sg.now)
            else:
                data = get_stats(sg, m, sg.now, articles)
            write_stats(sg, data, m, sg.now, sub_dir)

    print('All done!')