Example #1
0
	def __init__(self):
		SGMLParser.__init__(self)
		self.is_div = ""
		self.name = []
		self.is_a = ""
		self.div_counter = 0
		self.a_counter = 0
Example #2
0
    def __init__(self, email, password):
        SGMLParser.__init__(self)
        self.h3 = False
        self.h3_is_ready = False
        self.div = False
        self.h3_and_div = False
        self.a = False
        self.depth = 0
        self.names = ""
        self.dic = {}

        self.email = email
        self.password = password
        self.domain = 'renren.com'
        self.file = None
        self.friend_file = None
        self.ghost = Ghost()
        self.cookie = None
        self.group_url = "http://friend.renren.com/groupsdata"
        self.group_home = "http://friend.renren.com/managefriends"
        self.file_url = "renren_cookie.txt"

        self.mongodb = MongoClient("127.0.0.1", 27017)


        try:
            self.cookie = cookielib.LWPCookieJar(self.file_url)
            cookieProc = urllib2.HTTPCookieProcessor(self.cookie)
        except:
            raise
        else:
            opener = urllib2.build_opener(cookieProc)
            urllib2.install_opener(opener)

        print "init finished successfully!!"
Example #3
0
 def __init__(self, term, verbose=False):
     SGMLParser.__init__(self)
     self.inside_pre = False
     self.inside_a = False
     self.term = term
     self.verbose = verbose
     self.major = Major.objects.all()[0]
Example #4
0
 def reset(self):
     try:
         SGMLParser.reset(self)
         self.urls = []
     except Exception,e:
         print "reset111111111111",e
         return 0
Example #5
0
    def __init__( self, verbose=0 ):

        SGMLParser.__init__( self, verbose )
        self.savedata = None
        self.title = ''
        self.metatags = {}
        self.body = ''
Example #6
0
 def reset(self):
     SGMLParser.reset(self) 
     self.addText = False
     self.currentMeaning = ''
     self.meanings = []
     self.processingMeaning = False
     self.processMoreMeanings = True
Example #7
0
 def reset(self):
     SGMLParser.reset(self)
     self.majors = {}
     self.inside_majors = False
     self.current_name = None
     self.capture = False
     self.data = ''
Example #8
0
    def __init__(self, httpResponse, normalizeMarkup=True, verbose=0):
        abstractParser.__init__( self, httpResponse )
        SGMLParser.__init__(self, verbose)

        # Set some constants
        self._tagsContainingURLs =  ('go', 'a','img', 'link', 'script', 'iframe', 'object',
                'embed', 'area', 'frame', 'applet', 'input', 'base',
                'div', 'layer', 'ilayer', 'bgsound', 'form')
        self._urlAttrs = ('href', 'src', 'data', 'action' )
        
        # And some internal variables
        self._tag_and_url = []
        self._parsed_URLs = []
        self._re_URLs = []
        self._encoding = httpResponse.getCharset()
        self._forms = []
        self._insideForm = False
        self._insideSelect = False
        self._insideTextarea = False
        self._insideScript = False
        self._commentsInDocument = []
        self._scriptsInDocument = []
        
        # Meta tags
        self._metaRedirs = []
        self._metaTags = []
        
        self._normalizeMarkup = normalizeMarkup
        
        #    Fill self._re_URLs list with url objects
        self._regex_url_parse( httpResponse )
        
        # Now we are ready to work
        self._preParse( httpResponse )
Example #9
0
 def __init__(self):
     SGMLParser.__init__(self)
     self.data = ''
     self.capture = False
     self.inside_select = False
     self.current_id = ''
     self.terms = {}
Example #10
0
 def __init__(self,url):
     SGMLParser.__init__(self)                             
     SGMLParser.reset(self)
     self.vul_value = []
     self.url_value = ""
     
     self.is_table = None
     self.is_date = None
     self.is_tr = None
     self.is_span = None
     self.is_vul_name = None
     self.is_td = None
     self.is_tr = None
     self.is_name = None
     self.is_vul = None
     self.is_name_desc = None
     self.is_date_desc = None
     self.is_cvss_desc = None
     self.is_a = None
     self.is_cvss = None
     self.is_href_desc = None
     self.is_url = None
     self.is_vul_desc = None
     
     self.table_count = 0
     self.cvss_name = ""
     self.vul_name = ""
     self.href_name = ""
     self.vul_type_name = ""
     self.date_name = ""
     self.td_count = 0
     self.cve_name = ""
     self.a_count = 0
     self.desc_name = ""
     self.url_name = url    
Example #11
0
    def __init__(self):
        BaseModule.__init__(self)
        SGMLParser.__init__(self)
     
        self.is_b = 0
        self.is_dl = 0
        self.is_p = 0
        self.is_br = 0
        self.is_tr = 0
        self.is_td = 0
        self.is_a = 0
        self.is_span = 0
        self.is_font = 0
        self.is_li = 0
        self.is_div = 0
        self.is_dt = 0
        self.is_ul = 0
        self.is_dd = 0
        self.is_table = 0
        self.is_em = 0
        self.is_tbody = 0
        self.is_img = 0
        self.is_h1 = 0
        self.is_h3 = 0
        self.is_i = 0

        self.req = None
 def reset(self):                              
   SGMLParser.reset(self)
   self.name = "Jokes2go.com"
   self.url = "http://www.jokes2go.com/cgi-perl/randjoke.cgi?type=j"
   self.quote = []
   self.inside_pre_element = False                                             # indicates that the parser is currently processing the content inside <pre></pre> tag
   self.current_quote = ""
	def reset(self):
		self.enum = 0
		self.enumstrs = ("", "  * ", "	   + ")
		self.enumbrks = ("", "    ", "	     ")
		self.pieces = []
		self.refs = []
		SGMLParser.reset(self)
Example #14
0
 def __init__(self, input_file, mapping):
   input_file.seek(0)
   self.input_file = input_file
   self.mapping = mapping
   self.current_offset = 0
   self.count = 0
   SGMLParser.__init__(self)
Example #15
0
 def __init__(self,username,password):
     SGMLParser.__init__(self)
     self.username = username
     self.password = password
     cj = cookielib.CookieJar()
     opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
     urllib2.install_opener(opener)
Example #16
0
 def reset(self):
     SGMLParser.reset(self)
     self.in_a_watchers = 0
     self.in_a_forks = 0
     self.in_div = 0
     self.in_fork_flag = 0
     self.exist_in_fork_flag = 0  ##### 유뮤 체크 (만에하나라도 없을수 있는 것은 유무 체크를 해야한다) 모듈화 해서 쓸수 있는 방법이 없을려나... ㅜ
Example #17
0
    def __init__(self, base, username, password):
        SGMLParser.__init__(self)
        self.h3 = False
        self.h3_is_ready = False
        self.div = False
        self.h3_and_div = False
        self.a = False
        self.depth = 0
        self.names = ""
        self.dic = {}

        self.base = base
        self.fastloginfield = "username"
        self.username = username
        self.password = password
        self.quickforward = "yes"
        self.handlekey = "ls"
        try:
            cookie = cookielib.CookieJar()
            cookieProc = urllib2.HTTPCookieProcessor(cookie)
        except:
            raise
        else:
            opener = urllib2.build_opener(cookieProc)
            urllib2.install_opener(opener)
 def reset(self):
   SGMLParser.reset(self)
   self.url = "http://www.100blagues.com/random"
   self.quote = []                                         # list of quotes to be filled
   self.inside_div_element = False                         # indicates if the parser is inside the <div class="left comment">...</div> tag
   self.inside_div_a_element = False                       # indicates if the parser is inside the <div class="left comment">...<a href>...</a>...</div> tag
   self.current_quote = ""
Example #19
0
 def reset(self):
     SGMLParser.reset(self)
     self.terms = {}
     self.inside_terms = False
     self.current_id = None
     self.capture = False
     self.data = ''
 def __init__(self):
     self.url = None
     self.links = []
     self.linkpos = {}
     self.images = []
     # Keywords
     self.keywords = []
     # Description of page
     self.description = ''
     # Title of page
     self.title = ''
     self.title_flag = True
     # Fix for <base href="..."> links
     self.base_href = False
     # Base url for above
     self.base = None
     # anchor links flag
     self._anchors = True
     # For META robots tag
     self.can_index = True
     self.can_follow = True
     # Current tag
     self._tag = ''
     SGMLParser.__init__(self)
     # Type
     self.typ = 0
Example #21
0
 def reset(self):
     SGMLParser.reset(self)
     self.url = []
     self.is_td = False
     self.is_just = False
     self.is_target = True
     self.next_url = []
 def reset(self):
   SGMLParser.reset(self)
   self.url = "http://www.vitadimerda.it/aleatorie"
   self.quote = []                                         # list of quotes to be filled
   self.inside_div_element = False                         # indicates if the parser is inside the <div></div> tag
   self.inside_div_p_element = False
   self.current_quote = ""
Example #23
0
	def reset(self):
		SGMLParser.reset(self)
		self.urls = []
		self.is_a = 0
		self.is_href = 0
		self.key_num = 0
		self.tag=0
Example #24
0
 def __init__(self, site):
     SGMLParser.__init__(self)
     self.site=site
     self.city=[]
     self.name=[]
     self.url=[]
     self.state=StateBase(self)
Example #25
0
    def reset(self):
        r"""
        Initialize necessary variables.  Called by
        :meth:`SGMLParser.__init__`.

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
            sage: d = SphinxHTMLProcessor()    #indirect doctest
            sage: d.keep_data
            False
            sage: d.in_highlight_div
            False
            sage: d.temp_pieces
            []
            sage: d.all_pieces
            u''
            sage: d.cellcount
            0
        """
        # flags
        self.keep_data = False #don't keep anything before the <body> tag
        self.in_highlight_div = False

        # lists of what the parser keeps
        self.temp_pieces = []
        self.all_pieces = ''

        # counters
        self.cellcount = 0
                
        SGMLParser.reset(self)
Example #26
0
 def reset(self):
     SGMLParser.reset(self)
     self.tokens = []
     self.in_directory_index = False
     self.in_row = False
     self.token = {}
     self.state = ''
Example #27
0
 def reset(self):
     self.flag = False
     self.title=""
     self.title_flag=False
     self.url = ""
     self.picture=[]
     SGMLParser.reset(self)
Example #28
0
 def reset(self):
     SGMLParser.reset(self)
     self.songs = {}
     self.cursong = ''
     self.insong = False
     self.newsong = False
     self.name = ''
 def __init__(self, context=None, request=None):
     SGMLParser.__init__(self)
     self.current_status = None
     self.context = context
     self.request = request
     self.pieces = []
     self.in_link = False
Example #30
0
 def reset(self):
     SGMLParser.reset(self)
     self.text=[]
     self.ul = 0
     self.ol = 0
     self.pre = 0
     self.a_flag = False