def LoginTumblrGDPR( self ): # t-thanks, EU # this is cribbed from poking around here https://github.com/johanneszab/TumblThree/commit/3563d6cebf1a467151d6b8d6eee9806ddd6e6364 network_job = ClientNetworkingJobs.NetworkJob( 'GET', 'http://www.tumblr.com/' ) network_job.SetForLogin( True ) self.engine.AddJob( network_job ) network_job.WaitUntilDone() html = network_job.GetContent() formula = ClientParsing.ParseFormulaHTML( tag_rules = [ ClientParsing.ParseRuleHTML( rule_type = ClientParsing.HTML_RULE_TYPE_DESCENDING, tag_name = 'meta', tag_attributes = { 'id' : 'tumblr_form_key' } ) ], content_to_fetch = ClientParsing.HTML_CONTENT_ATTRIBUTE, attribute_to_fetch = "content" ) results = formula.Parse( {}, html ) if len( results ) != 1: raise HydrusExceptions.ParseException( 'Could not figure out the tumblr form key for the GDPR click-through.' ) tumblr_form_key = results[0] # body = '{\"eu_resident\":true,\"gdpr_is_acceptable_age\":true,\"gdpr_consent_core\":true,\"gdpr_consent_first_party_ads\":true,\"gdpr_consent_third_party_ads\":true,\"gdpr_consent_search_history\":true,\"redirect_to\":\"\"}' referral_url = 'https://www.tumblr.com/privacy/consent?redirect=' network_job = ClientNetworkingJobs.NetworkJob( 'POST', 'https://www.tumblr.com/svc/privacy/consent', body = body, referral_url = referral_url ) network_job.SetForLogin( True ) network_job.AddAdditionalHeader( 'Accept', 'application/json, text/javascript, */*; q=0.01') network_job.AddAdditionalHeader( 'Content-Type', 'application/json' ) network_job.AddAdditionalHeader( 'X-Requested-With', 'XMLHttpRequest' ) network_job.AddAdditionalHeader( 'X-tumblr-form-key', tumblr_form_key ) self.engine.AddJob( network_job ) network_job.WaitUntilDone() # test cookies here or something HydrusData.ShowText( 'Looks like tumblr GDPR click-through worked! You should be good for a year, at which point we should have an automatic solution for this!' )
def ConvertBooruToNewObjects(booru): name = booru.GetName() name = 'zzz - auto-generated from legacy booru system - ' + name (search_url, search_separator, advance_by_page_num, thumb_classname, image_id, image_data, tag_classnames_to_namespaces) = booru.GetData() if advance_by_page_num: search_url = search_url.replace('%index%', '1') else: search_url = search_url.replace('%index%', '0') gug = ClientNetworkingDomain.GalleryURLGenerator( name + ' search', url_template=search_url, replacement_phrase='%tags%', search_terms_separator=search_separator, initial_search_text='tag search', example_search_text='blonde_hair blue_eyes') # tag_rules = [] rule_type = ClientParsing.HTML_RULE_TYPE_DESCENDING tag_name = None tag_attributes = {'class': thumb_classname} tag_index = None tag_rules.append( ClientParsing.ParseRuleHTML(rule_type=rule_type, tag_name=tag_name, tag_attributes=tag_attributes, tag_index=tag_index)) rule_type = ClientParsing.HTML_RULE_TYPE_DESCENDING tag_name = 'a' tag_attributes = None tag_index = None tag_rules.append( ClientParsing.ParseRuleHTML(rule_type=rule_type, tag_name=tag_name, tag_attributes=tag_attributes, tag_index=tag_index)) formula = ClientParsing.ParseFormulaHTML( tag_rules=tag_rules, content_to_fetch=ClientParsing.HTML_CONTENT_ATTRIBUTE, attribute_to_fetch='href') url_type = HC.URL_TYPE_DESIRED priority = 50 additional_info = (url_type, priority) thumb_content_parser = ClientParsing.ContentParser( name='get post urls (based on old booru thumb search)', content_type=HC.CONTENT_TYPE_URLS, formula=formula, additional_info=additional_info) gallery_parser = ClientParsing.PageParser( name + ' gallery page parser', content_parsers=[thumb_content_parser], example_urls=[gug.GetExampleURL()]) # content_parsers = [] if image_id is not None: tag_rules = [] rule_type = ClientParsing.HTML_RULE_TYPE_DESCENDING tag_name = 'a' tag_attributes = {'id': image_id} tag_index = None tag_rules.append( ClientParsing.ParseRuleHTML(rule_type=rule_type, tag_name=tag_name, tag_attributes=tag_attributes, tag_index=tag_index)) formula = ClientParsing.ParseFormulaHTML( tag_rules=tag_rules, content_to_fetch=ClientParsing.HTML_CONTENT_ATTRIBUTE, attribute_to_fetch='href') url_type = HC.URL_TYPE_DESIRED priority = 75 additional_info = (url_type, priority) image_link_content_parser = ClientParsing.ContentParser( name='get image file link url (based on old booru parser)', content_type=HC.CONTENT_TYPE_URLS, formula=formula, additional_info=additional_info) content_parsers.append(image_link_content_parser) # tag_rules = [] rule_type = ClientParsing.HTML_RULE_TYPE_DESCENDING tag_name = 'img' tag_attributes = {'id': image_id} tag_index = None tag_rules.append( ClientParsing.ParseRuleHTML(rule_type=rule_type, tag_name=tag_name, tag_attributes=tag_attributes, tag_index=tag_index)) formula = ClientParsing.ParseFormulaHTML( tag_rules=tag_rules, content_to_fetch=ClientParsing.HTML_CONTENT_ATTRIBUTE, attribute_to_fetch='src') url_type = HC.URL_TYPE_DESIRED priority = 50 additional_info = (url_type, priority) image_src_content_parser = ClientParsing.ContentParser( name='get image file src url (based on old booru parser)', content_type=HC.CONTENT_TYPE_URLS, formula=formula, additional_info=additional_info) content_parsers.append(image_src_content_parser) elif image_data is not None: tag_rules = [] rule_type = ClientParsing.HTML_RULE_TYPE_DESCENDING tag_name = 'a' tag_attributes = None tag_index = None string_match = ClientParsing.StringMatch( match_type=ClientParsing.STRING_MATCH_FIXED, match_value=image_data, example_string=image_data) tag_rules.append( ClientParsing.ParseRuleHTML(rule_type=rule_type, tag_name=tag_name, tag_attributes=tag_attributes, tag_index=tag_index, should_test_tag_string=True, tag_string_string_match=string_match)) formula = ClientParsing.ParseFormulaHTML( tag_rules=tag_rules, content_to_fetch=ClientParsing.HTML_CONTENT_ATTRIBUTE, attribute_to_fetch='href') url_type = HC.URL_TYPE_DESIRED priority = 50 additional_info = (url_type, priority) image_link_content_parser = ClientParsing.ContentParser( name='get image file url (based on old booru parser)', content_type=HC.CONTENT_TYPE_URLS, formula=formula, additional_info=additional_info) content_parsers.append(image_link_content_parser) for (classname, namespace) in tag_classnames_to_namespaces.items(): tag_rules = [] rule_type = ClientParsing.HTML_RULE_TYPE_DESCENDING tag_name = None tag_attributes = {'class': classname} tag_index = None tag_rules.append( ClientParsing.ParseRuleHTML(rule_type=rule_type, tag_name=tag_name, tag_attributes=tag_attributes, tag_index=tag_index)) rule_type = ClientParsing.HTML_RULE_TYPE_DESCENDING tag_name = 'a' tag_attributes = None tag_index = None tag_rules.append( ClientParsing.ParseRuleHTML(rule_type=rule_type, tag_name=tag_name, tag_attributes=tag_attributes, tag_index=tag_index)) formula = ClientParsing.ParseFormulaHTML( tag_rules=tag_rules, content_to_fetch=ClientParsing.HTML_CONTENT_STRING) additional_info = namespace tag_content_parser = ClientParsing.ContentParser( name='get "' + namespace + '" tags', content_type=HC.CONTENT_TYPE_MAPPINGS, formula=formula, additional_info=additional_info) content_parsers.append(tag_content_parser) post_parser = ClientParsing.PageParser(name + ' post page parser', content_parsers=content_parsers, example_urls=[]) # return (gug, gallery_parser, post_parser)