def __init__(self, pattern): SGMLParser.__init__(self) self.urls = [] self.is_done = False self.target_ref = "" self.pattern = pattern self.dirs = {}
def __init__(self, email, password): SGMLParser.__init__(self) self.h3 = False self.h3_is_ready = False self.div = False self.h3_and_div = False self.a = False self.depth = 0 self.names = "" self.dic = {} self.email = email self.password = password self.domain = 'renren.com' try: cookie = cookielib.CookieJar() cookieProc = urllib2.HTTPCookieProcessor(cookie) except: raise else: proxy_handler = urllib2.ProxyHandler({'http':'http://10.17.75.253:3128'}) proxy_auth_handler = urllib2.ProxyBasicAuthHandler() # opener = urllib2.build_opener(proxy_handler, proxy_auth_handler) opener = urllib2.build_opener(cookieProc, proxy_handler, proxy_auth_handler) urllib2.install_opener(opener)
def __init__(self): SGMLParser.__init__(self) self.is_tbl = False self.is_tr = False self.is_td = False self.row_count = 0 self.out = []
def __init__(self): SGMLParser.__init__(self) self.is_div_day = False self.get_title_data = False self.level = 0 self.comments = [] self.postTitle = []
def __init__(self, markup='', parseOnlyThese=None, fromEncoding=None, markupMassage=True, smartQuotesTo=XML_ENTITIES, convertEntities=None, selfClosingTags=None, isHTML=False): self.parseOnlyThese = parseOnlyThese self.fromEncoding = fromEncoding self.smartQuotesTo = smartQuotesTo self.convertEntities = convertEntities if self.convertEntities: self.smartQuotesTo = None if (convertEntities == self.HTML_ENTITIES): self.convertXMLEntities = False self.convertHTMLEntities = True self.escapeUnrecognizedEntities = True elif (convertEntities == self.XHTML_ENTITIES): self.convertXMLEntities = True self.convertHTMLEntities = True self.escapeUnrecognizedEntities = False elif (convertEntities == self.XML_ENTITIES): self.convertXMLEntities = True self.convertHTMLEntities = False self.escapeUnrecognizedEntities = False else: self.convertXMLEntities = False self.convertHTMLEntities = False self.escapeUnrecognizedEntities = False self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) SGMLParser.__init__(self) if hasattr(markup, 'read'): markup = markup.read() self.markup = markup self.markupMassage = markupMassage try: self._feed(isHTML=isHTML) except StopParsing: pass self.markup = None
def __init__(self): SGMLParser.__init__(self) self.flag = False self.a_flag = False self.b_flag = False self.urls = [] self.titles = []
def __init__(self, context=None, request=None): SGMLParser.__init__(self) self.current_status = None self.context = context self.request = request self.pieces = [] self.in_link = False
def __init__(self, term, verbose=False): SGMLParser.__init__(self) self.inside_pre = False self.inside_a = False self.term = term self.verbose = verbose self.major = Major.objects.all()[0]
def __init__(self): SGMLParser.__init__(self) self.channels_data = [] self.state = ["init"] self.current_href = None self.success = False
def __init__(self): SGMLParser.__init__(self) self.is_div = "" self.name = [] self.is_a = "" self.div_counter = 0 self.a_counter = 0
def __init__(self,url): SGMLParser.__init__(self) SGMLParser.reset(self) self.vul_value = [] self.url_value = "" self.is_table = None self.is_date = None self.is_tr = None self.is_span = None self.is_vul_name = None self.is_td = None self.is_tr = None self.is_name = None self.is_vul = None self.is_name_desc = None self.is_date_desc = None self.is_cvss_desc = None self.is_a = None self.is_cvss = None self.is_href_desc = None self.is_url = None self.is_vul_desc = None self.table_count = 0 self.cvss_name = "" self.vul_name = "" self.href_name = "" self.vul_type_name = "" self.date_name = "" self.td_count = 0 self.cve_name = "" self.a_count = 0 self.desc_name = "" self.url_name = url
def __init__(self, input_file, mapping): input_file.seek(0) self.input_file = input_file self.mapping = mapping self.current_offset = 0 self.count = 0 SGMLParser.__init__(self)
def __init__(self, httpResponse, normalizeMarkup=True, verbose=0): abstractParser.__init__( self, httpResponse ) SGMLParser.__init__(self, verbose) # Set some constants self._tagsContainingURLs = ('go', 'a','img', 'link', 'script', 'iframe', 'object', 'embed', 'area', 'frame', 'applet', 'input', 'base', 'div', 'layer', 'ilayer', 'bgsound', 'form') self._urlAttrs = ('href', 'src', 'data', 'action' ) # And some internal variables self._tag_and_url = [] self._parsed_URLs = [] self._re_URLs = [] self._encoding = httpResponse.getCharset() self._forms = [] self._insideForm = False self._insideSelect = False self._insideTextarea = False self._insideScript = False self._commentsInDocument = [] self._scriptsInDocument = [] # Meta tags self._metaRedirs = [] self._metaTags = [] self._normalizeMarkup = normalizeMarkup # Fill self._re_URLs list with url objects self._regex_url_parse( httpResponse ) # Now we are ready to work self._preParse( httpResponse )
def __init__(self): BaseModule.__init__(self) SGMLParser.__init__(self) self.is_b = 0 self.is_dl = 0 self.is_p = 0 self.is_br = 0 self.is_tr = 0 self.is_td = 0 self.is_a = 0 self.is_span = 0 self.is_font = 0 self.is_li = 0 self.is_div = 0 self.is_dt = 0 self.is_ul = 0 self.is_dd = 0 self.is_table = 0 self.is_em = 0 self.is_tbody = 0 self.is_img = 0 self.is_h1 = 0 self.is_h3 = 0 self.is_i = 0 self.req = None
def __init__(self, email, password): SGMLParser.__init__(self) self.h3 = False self.h3_is_ready = False self.div = False self.h3_and_div = False self.a = False self.depth = 0 self.names = "" self.dic = {} self.email = email self.password = password self.domain = 'renren.com' self.file = None self.friend_file = None self.ghost = Ghost() self.cookie = None self.group_url = "http://friend.renren.com/groupsdata" self.group_home = "http://friend.renren.com/managefriends" self.file_url = "renren_cookie.txt" self.mongodb = MongoClient("127.0.0.1", 27017) try: self.cookie = cookielib.LWPCookieJar(self.file_url) cookieProc = urllib2.HTTPCookieProcessor(self.cookie) except: raise else: opener = urllib2.build_opener(cookieProc) urllib2.install_opener(opener) print "init finished successfully!!"
def __init__(self, baseuri): SGMLParser.__init__(self) self.links = [] self.baseuri = baseuri self.startlink = None self.homelink = None self.prearchiveslink = None
def __init__(self): self.url = None self.links = [] self.linkpos = {} self.images = [] # Keywords self.keywords = [] # Description of page self.description = '' # Title of page self.title = '' self.title_flag = True # Fix for <base href="..."> links self.base_href = False # Base url for above self.base = None # anchor links flag self._anchors = True # For META robots tag self.can_index = True self.can_follow = True # Current tag self._tag = '' SGMLParser.__init__(self) # Type self.typ = 0
def __init__(self): SGMLParser.__init__(self) self.majors = {} self.inside_row = False self.col = 0 self.capture = False self.data = ''
def __init__(self,username,password): SGMLParser.__init__(self) self.username = username self.password = password cj = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener)
def __init__(self, site): SGMLParser.__init__(self) self.site=site self.city=[] self.name=[] self.url=[] self.state=StateBase(self)
def __init__(self): self.result = [] self.data = "" self.link = "" self.TAG_BEG = False self.TAG_END = False SGMLParser.__init__(self, 0)
def __init__(self, base, username, password): SGMLParser.__init__(self) self.h3 = False self.h3_is_ready = False self.div = False self.h3_and_div = False self.a = False self.depth = 0 self.names = "" self.dic = {} self.base = base self.fastloginfield = "username" self.username = username self.password = password self.quickforward = "yes" self.handlekey = "ls" try: cookie = cookielib.CookieJar() cookieProc = urllib2.HTTPCookieProcessor(cookie) except: raise else: opener = urllib2.build_opener(cookieProc) urllib2.install_opener(opener)
def __init__( self, verbose=0 ): SGMLParser.__init__( self, verbose ) self.savedata = None self.title = '' self.metatags = {} self.body = ''
def __init__(self): self.data = "" self.links = [] self.title = "" self.TAG_BEG = False self.TAG_END = False SGMLParser.__init__(self, 0)
def __init__(self): SGMLParser.__init__(self) self.infoFlag = "" self.appurl=[] self.appname=[] self.descriptionFlag='' self.descriptionPFlag='' self.description=[] self.appnameFlag='' self.info=[] self.iphonescreenshots=[] self.ipadscreenshots=[] self.whatisnewFlag='' self.whatisnew=[] self.allstartFlag='' self.allstart=[] self.customerstartFlag='' self.customerstart=[] self.customerFlag='' self.customer=[] self.iphonescreenshotsFlag='' self.ipadscreenshotsFlag='' self.nameFlag='' self.name='' self.icon=''
def __init__(self, wordname): self._word = word.Word() self._word.set_name(wordname) SGMLParser.__init__(self) self._phonetic_found = 0 self._etcTrans_found = 0 self._etcTrans_li_found = 0 self._p_additional_found = 0 self._synonyms_found = 0 self._synonyms_a_found = 0 self._tPowerTrans_found = 0 self._tPowerTrans_ul_sense_ex_found = 0 self._tPowerTrans_ul_sense_ex_p_found = 0 self._examplesToggle_found = 0 self._examplesToggle_bilingual_found = 0 self._examplesToggle_bilingual_p_found = 0 self._examplesToggle_bilingual_p_noattrs_found = 0 #用来解决“报错”这个<p> self._tmp_sentence_list = [] self._tmp_sentence = ''
def __init__(self): SGMLParser.__init__(self) self.data = '' self.capture = False self.inside_select = False self.current_id = '' self.terms = {}
def __init__(self): SGMLParser.__init__(self) self.data = "" #instantiate an array of hyperlinks self.hyperlinks = [] self.descriptions = [] self.inside_a_element = 0
def __init__(self, verbose=0): "Initialise an object, passing 'verbose' to the superclass." SGMLParser.__init__(self, verbose) self.hyperlinks = [] self.descriptions = [] self.inside_a_element = 0 self.starting_description = 0
def __init__(self, source, type): self.source = source self.type = type self.is_title = 0 self.title_text = "" self.title_list = [] self.href_list = [] SGMLParser.__init__(self)
def __init__(self, verbose=0): SGMLParser.__init__(self, verbose) self.links = [] self.iframes = [] self.starting_description = False
def __init__(self, content_start_comment='', content_end_comment=''): SGMLParser.__init__(self) self.buf = "" self.output = "" self.entitydefs.update({'auml':'ä','ouml':'ö','aring':'å','Auml':'Ä','Ouml':'Ö','Aring':'Å','nbsp':' ',})
def __init__(self): SGMLParser.__init__(self) self.data = None self.currenttable = None self.lastcaption = None
def __init__(self): SGMLParser.__init__(self) self.hrefs = [] self.srcs = [] #保存视频数组 self.hrefs2 = []
def __init__(self, verbose=0): self._init() # Fall-back defaults. self._modFunct = None self._as = 'http' SGMLParser.__init__(self, verbose)
def __init__(self, valid_tags=None, nasty_tags=None): SGMLParser.__init__(self) self.result = "" self.valid_tags = valid_tags or VALID_TAGS self.nasty_tags = nasty_tags or NASTY_TAGS
def __init__(self, baseUrl): SGMLParser.__init__(self) self.baseUrl = baseUrl self.leaves = [] self.directories = []
def __init__(self, verbose=0): SGMLParser.__init__(self, verbose) self.title = self.data = None self._reset()
def __init__(self): SGMLParser.__init__(self)
def __init__(self) : SGMLParser.__init__(self) phpbb_prompt.PromptParser.__init__(self) self._parsing_form = False
def __init__(self): SGMLParser.__init__(self) self.img_locations = []
def __init__(self): SGMLParser.__init__(self) self.scripts = []
"""HTML 2.0 parser.
def __init__(self): self.text = [] SGMLParser.__init__(self)
def __init__(self): SGMLParser.__init__(self) self.inside_table = False self.capture = False self.data = '' self.section_ids = []
def __init__(self): SGMLParser.__init__(self) self.result = ""
def __init__(self, site="163"): SGMLParser.__init__(self) self.site = site
def __init__(self): SGMLParser.__init__(self) self.is_h4 = "" self.name = []
def __init__(self, verbose=0): SGMLParser.__init__(self, verbose) self.savedata = None self.title = '' self.metatags = {} self.body = ''
def __init__(self): SGMLParser.__init__(self) self.docs = []
def __init__(self): SGMLParser.__init__(self) self.list = []
def __init__(self): SGMLParser.__init__(self) self.data = '' self.capture = False self.inside_select = False self.majors = {}
def __init__(self,out): HTMLParser.__init__(self) self.out = out
def __init__(self): """ """ SGMLParser.__init__(self)
def __init__(self): SGMLParser.__init__(self) self.is_package = "" self.packages = [] self.count = 0
def __init__(self, url): SGMLParser.__init__(self) self._is_weather, self._is_td = False, False self._page_datas, self._weather = None, [] # 网页数据,天气数据 self._get_page_datas(url) self.feed(self._page_datas)
def __init__(self): SGMLParser.__init__(self) self.hasil = [] self.baris = [] self.data = [] self.last_error = None
def __init__(self): SGMLParser.__init__(self) self.majors = {} self.data = '' self.capture = False self.major_list = False
def __init__(self): SGMLParser.__init__(self) self.is_a = "" self.link = [] self.Title = ""
def __init__(self): SGMLParser.__init__(self) phpbb_prompt.PromptParser.__init__(self) self.__parsing_search_result = False self.__parsing_post_body = False self.search_result = set()