def test_fifteen_retries(yahoo_response): r = yahoo_response('v1/groups/groupname/', ygError={}, status=500) yga = YahooGroupsAPI('groupname') with raises(yahoogroupsapi.Recoverable): yga.HackGroupInfo() assert len(r.calls) == 15
def test_one_retry(yahoo_response): result = {'ok': 'on second try'} r = yahoo_response('v1/groups/groupname/', ygError={}, status=500) r = yahoo_response('v1/groups/groupname/', result) yga = YahooGroupsAPI('groupname') json = yga.HackGroupInfo() assert len(r.calls) == 2 assert json == result
def test_unauthorized_error(yahoo_response): r = yahoo_response('v1/groups/groupname/', ygError={"hostname": "gapi17.grp.bf1.yahoo.com", "httpStatus": 401, "errorMessage": "User does not have READ permission for GROUP. Mess...", "errorCode": 1103, "sid": "SID:YHOO:groups.yahoo.com:00000000000000000000000000000000:0"}, ygPerms=YGPERMS_NONE, status=401) yga = YahooGroupsAPI('groupname') with raises(yahoogroupsapi.Unauthorized): yga.HackGroupInfo() assert len(r.calls) == 1
def test_get_json(yahoo_response, cookies): response = yahoo_response('v2/groups/groupname/files/a/2?param1=c¶m2=4', {'result': 'returned data'}) yga = YahooGroupsAPI('groupname', cookies) json = yga.get_json('files', 'a', 2, param1='c', param2=4) request = response.calls[0].request request_cookies = SimpleCookie() request_cookies.load(request.headers['Cookie']) assert dict(cookies) == {k: v.value for k, v in request_cookies.items()} assert json == {'result': 'returned data'}
def test_warc_enabled(): # Note that this does not use the responses mocking framework, as it conflicts with the warc captures. # This makes a real request to Yahoo, so might fail. url = 'https://groups.yahoo.com/api/v1/groups/test/' yga = YahooGroupsAPI('test') writer = BufferWARCWriter(gzip=False) yga.set_warc_writer(writer) yga.HackGroupInfo() expected = [(url, 'response'), (url, 'request')] actual = [(record.rec_headers['WARC-Target-URI'], record.rec_type) for record in ArchiveIterator(writer.get_stream())] assert expected == actual
def test_get_json(response, cookies): response.add( responses.GET, 'https://groups.yahoo.com/api/v2/groups/groupname/files/a/2?param1=c¶m2=4', json={'ygData': { 'result': 'returned data' }}) yga = YahooGroupsAPI('groupname', cookies) json = yga.get_json('files', 'a', 2, param1='c', param2=4) request = response.calls[0].request request_cookies = Cookie.SimpleCookie() request_cookies.load(request.headers['Cookie']) assert dict(cookies) == { k: v.value for k, v in request_cookies.iteritems() } assert json == {'result': 'returned data'}
help='Only archive database') pe = p.add_argument_group(title='Email Options') pe.add_argument('-r', '--no-reattach', action='store_true', help="Don't reattach attachment files to email") pe.add_argument('-s', '--no-save', action='store_true', help="Don't save email attachments as individual files") p.add_argument('--overwrite', action='store_true', help="Re-download and overwrite existing files and messages") p.add_argument('group', type=str) args = p.parse_args() yga = YahooGroupsAPI(args.group, args.cookie_t, args.cookie_y) if args.username: password = args.password or getpass.getpass() print "logging in..." if not yga.login(args.username, password): print "Login failed" sys.exit(1) if not (args.email or args.files or args.photos or args.database): args.email = args.files = args.photos = args.database = True skip_existing = not args.overwrite with Mkchdir(args.group): if args.email: with Mkchdir('email'): archive_email(yga,
raise e coloredlogs.install(level=log_level, **log_format) else: log_stdout_handler = logging.StreamHandler(sys.stdout) log_stdout_handler.setLevel(log_level) log_stdout_handler.setFormatter(log_formatter) root_logger.addHandler(log_stdout_handler) cookie_jar = init_cookie_jar(args.cookie_file, args.cookie_t, args.cookie_y, args.cookie_e) headers = {} if args.user_agent: headers['User-Agent'] = args.user_agent yga = YahooGroupsAPI(args.group, cookie_jar, headers, delay=args.delay) if not (args.email or args.files or args.photos or args.database or args.links or args.calendar or args.about or args.polls or args.attachments or args.members): args.email = args.files = args.photos = args.database = args.links = args.calendar = args.about = \ args.polls = args.attachments = args.members = True with Mkchdir(args.group, sanitize=False): log_file_handler = logging.FileHandler('archive.log') log_file_handler.setFormatter(log_formatter) root_logger.addHandler(log_file_handler) if args.warc: try: from warcio import WARCWriter
help= 'Output WARC file of raw network requests. [Requires warcio package installed]' ) p.add_argument('-v', '--verbose', action='store_true') p.add_argument('group', type=str) args = p.parse_args() if not args.verbose: log_stdout_handler.setLevel(logging.INFO) cookie_jar = init_cookie_jar(args.cookie_file, args.cookie_t, args.cookie_y, args.cookie_e) yga = YahooGroupsAPI(args.group, cookie_jar) if not (args.email or args.files or args.photos or args.database or args.links or args.calendar or args.about or args.polls or args.attachments or args.members): args.email = args.files = args.photos = args.database = args.links = args.calendar = args.about = \ args.polls = args.attachments = args.members = True with Mkchdir(args.group): log_file_handler = logging.FileHandler('archive.log') log_file_handler.setFormatter(log_formatter) root_logger.addHandler(log_file_handler) if args.warc: try: from warcio import WARCWriter
def test_not_authenticated_error(yahoo_response): r = yahoo_response('v1/groups/groupname/', status=307) yga = YahooGroupsAPI('groupname') with raises(yahoogroupsapi.NotAuthenticated): yga.HackGroupInfo() assert len(r.calls) == 15
def test_correct_ua(yahoo_response): r = yahoo_response('v1/groups/groupname/', {}) yga = YahooGroupsAPI('groupname', headers={'User-Agent': 'test'}) yga.HackGroupInfo() assert r.calls[0].request.headers['user-agent'] == 'test'
po.add_argument('-i', '--photos', action='store_true', help='Only archive photo galleries') po.add_argument('-d', '--database', action='store_true', help='Only archive database') pe = p.add_argument_group(title='Email Options') pe.add_argument('-r', '--no-reattach', action='store_true', help="Don't reattach attachment files to email") pe.add_argument('-s', '--no-save', action='store_true', help="Don't save email attachments as individual files") p.add_argument('group', type=str) args = p.parse_args() yga = YahooGroupsAPI(args.group, args.cookie_t, args.cookie_y) if args.username: password = args.password or getpass.getpass() print "logging in..." if not yga.login(args.username, password): print "Login failed" sys.exit(1) if not (args.email or args.files or args.photos or args.database): args.email = args.files = args.photos = args.database = True with Mkchdir(args.group): if args.email: with Mkchdir('email'): archive_email(yga, reattach=(not args.no_reattach), save=(not args.no_save)) if args.files:
pe.add_argument('-s', '--no-save', action='store_true', help="Don't save email attachments as individual files") pe.add_argument('--html', action='store_false', help="Don't save the non-raw version of message") p.add_argument('group', type=str) args = p.parse_args() if not args.verbose: log_stdout_handler.setLevel(logging.INFO) yga = YahooGroupsAPI(args.group, args.cookie_t, args.cookie_y, args.cookie_e) if not (args.email or args.files or args.photos or args.database or args.links or args.calendar or args.about or args.polls or args.attachments or args.members): args.email = args.files = args.photos = args.database = args.links = args.calendar = args.about = \ args.polls = args.attachments = args.members = True with Mkchdir(args.group): log_file_handler = logging.FileHandler('archive.log') log_file_handler.setFormatter(log_formatter) root_logger.addHandler(log_file_handler) if args.email: with Mkchdir('email'): archive_email(yga, save=(not args.no_save), html=args.html)
def test_not_authenticated_error(yahoo_response): r = yahoo_response('v1/groups/groupname/', status=307) yga = YahooGroupsAPI('groupname') with raises(yahoogroupsapi.Recoverable): # Temporary fix, replaced: yahoogroupsapi.NotAuthenticated yga.HackGroupInfo() assert len(r.calls) == 15
def main(): args = parse_arguments() # Setup logging root_logger = logging.getLogger() root_logger.setLevel(logging.DEBUG) log_format = { 'fmt': '%(asctime)s %(levelname)s %(name)s %(message)s', 'datefmt': '%Y-%m-%d %H:%M:%S.%f %Z' } log_formatter = CustomFormatter(**log_format) if args.verbose: log_level = logging.DEBUG elif args.quiet: log_level = logging.ERROR else: log_level = logging.INFO if args.colour: try: import coloredlogs except ImportError as e: sys.exit( "Error: Coloured logging output requires the 'coloredlogs' package to be installed." ) coloredlogs.install(level=log_level, **log_format) else: log_stdout_handler = logging.StreamHandler(sys.stdout) log_stdout_handler.setLevel(log_level) log_stdout_handler.setFormatter(log_formatter) root_logger.addHandler(log_stdout_handler) cookie_jar = init_cookie_jar(args.cookie_file, args.cookie_t, args.cookie_y, args.cookie_e) headers = {} if args.user_agent: headers['User-Agent'] = args.user_agent yga = YahooGroupsAPI(args.group, cookie_jar, headers, min_delay=args.delay) # Default to all unique content. This includes topics and raw email, # but not the full email download since that would duplicate html emails we get through topics. if not (args.email or args.files or args.photos or args.database or args.links or args.calendar or args.about or args.polls or args.attachments or args.members or args.topics or args.raw): args.files = args.photos = args.database = args.links = args.calendar = args.about = \ args.polls = args.attachments = args.members = args.topics = args.raw = True with Mkchdir(args.group, sanitize=False): log_file_handler = logging.FileHandler('archive.log', 'a', 'utf-8') log_file_handler.setFormatter(log_formatter) root_logger.addHandler(log_file_handler) if args.warc: try: from warcio import WARCWriter fhwarc = open('data.warc.gz', 'ab') warc_writer = WARCWriter(fhwarc) warcmeta = warc_writer.create_warcinfo_record( fhwarc.name, WARC_META_PARAMS) warc_writer.write_record(warcmeta) yga.set_warc_writer(warc_writer) except ImportError: logging.error( 'WARC output requires the warcio package to be installed.') exit(1) if args.overwrite: hacky_vars['file'] = True if args.email: with Mkchdir('email'): archive_email(yga, message_subset=args.ids, start=args.start, stop=args.stop, noAttachments=args.noattachments) if args.files: with Mkchdir('files'): archive_files(yga) if args.photos: with Mkchdir('photos'): archive_photos(yga) if args.topics: with Mkchdir('topics'): archive_topics(yga, noAttachments=args.noattachments) if args.raw: with Mkchdir('email'): archive_email(yga, message_subset=args.ids, start=args.start, stop=args.stop, skipHTML=True) if args.database: with Mkchdir('databases'): archive_db(yga) if args.links: with Mkchdir('links'): archive_links(yga) if args.about: with Mkchdir('about'): archive_about(yga) if args.polls: with Mkchdir('polls'): archive_polls(yga) if args.attachments: with Mkchdir('attachments'): archive_attachments(yga) if args.members: with Mkchdir('members'): archive_members(yga) if args.calendar: with Mkchdir('calendar'): archive_calendar(yga) if args.warc: fhwarc.close()