Ejemplo n.º 1
0
def MAIN(murl):
    if 'TV' in murl:
        main.addDir('Movies','MOVIES',451,art+'/shush.png')
        link=main.OPENURL('http://www.shush.se/index.php?shows')
        link=link.replace('\r','').replace('\n','').replace('\t','').replace(' ','').replace('»','')
        match=re.compile('(?sim)class="shows"><a href="([^"]+)"><img src="([^"]+)" alt="Watch (.+?) online').findall(link)
        for url,thumb,name in match:
            main.addDirT(name.title(),'http://www.shush.se/'+url,452,thumb,'','','','','')
    else:
        main.addDir('TV','TV',451,art+'/shush.png')
        link=main.OPENURL('http://www.shush.se/index.php?movies')
        link=link.replace('\r','').replace('\n','').replace('\t','').replace('&nbsp;','').replace('&raquo;','')
        match=re.compile('(?sim)class="shows"><a href="([^"]+)"><img src="([^"]+)" alt="([^"]+)" title=').findall(link)
        dialogWait = xbmcgui.DialogProgress()
        ret = dialogWait.create('Please wait until Movie list is cached.')
        totalLinks = len(match)
        loadedLinks = 0
        remaining_display = 'Movies loaded :: [B]'+str(loadedLinks)+' / '+str(totalLinks)+'[/B].'
        dialogWait.update(0,'[B]Will load instantly from now on[/B]',remaining_display)
        xbmc.executebuiltin("XBMC.Dialog.Close(busydialog,true)")
        for url,thumb,name in match:
            main.addPlayM(name.title(),'http://www.shush.se/'+url,453,thumb,'','','','','')
            loadedLinks = loadedLinks + 1
            percent = (loadedLinks * 100)/totalLinks
            remaining_display = 'Movies loaded :: [B]'+str(loadedLinks)+' / '+str(totalLinks)+'[/B].'
            dialogWait.update(percent,'[B]Will load instantly from now on[/B]',remaining_display)
            if (dialogWait.iscanceled()):
                    return False   
        dialogWait.close()
        del dialogWait
Ejemplo n.º 2
0
    def __parse_positions(self, var):
        listPattern = re.compile("\[[^\[]+?\]")
        tagPattern = re.compile("<.+?>")
        betweenTagPattern = re.compile(">(.+?)<")
        numberPattern = re.compile("-?\d+\.?\d*")
        stringPattern = re.compile('".*?[^\\\\]"')

        positions = []
        columns = ("pid", "date", "stock", "percentage", "shares", "notes")
        for text in listPattern.findall(var):
            data = stringPattern.findall(text[1:-1])
            stock = betweenTagPattern.findall(data[0])[0]
            if self.user == list_user:
                percentage = shares = "NULL"
                notes = tagPattern.sub(" ", data[-1][1:-1])
            else:
                comments = tagPattern.split(data[-1][1:-1])
                try:
                    percentage = float(numberPattern.findall(comments[0])[0])
                except:
                    percentage = 0
                try:
                    shares = float(numberPattern.findall(comments[1])[0])
                except:
                    shares = 0
                try:
                    notes = comments[2]
                except:
                    notes = ""
            positions.append(
                dict(zip(columns, (self.id, self.now, stock, percentage, shares, notes.encode("ascii", "ignore"))))
            )
        return positions
Ejemplo n.º 3
0
 def get_user_info(self, uid):
     """
     获取用户基本信息
     :param uid: 用户id
     :return: 用户基本信息
     """
     user_info_url = 'http://weibo.cn/%s/info' % uid
     user_info_page = self.get_page(user_info_url)
     sex_pattern = re.compile('性别:(.*?)<br/>')
     area_pattern = re.compile('地区:(.*?)<br/>')
     birth_pattern = re.compile('生日:(\d*?)-.*?<br/>')
     sex = re.search(sex_pattern, user_info_page)
     area = re.search(area_pattern, user_info_page)
     birth = re.search(birth_pattern, user_info_page)
     if sex:
         sex = sex.group(1)
     if area:
         area = area.group(1)
     if birth:
         birth = birth.group(1)
         if int(birth) != 0001:    # 将年龄为微博默认设置的用户过滤
             info = {'性别': sex, '地区': area, '年龄': 2016-int(birth)}
             return info
     info = {'性别': sex, '地区': area, '年龄': None}
     return info
Ejemplo n.º 4
0
def makeconfig(infp, outfp, modules, with_ifdef=0):
    m1 = re.compile('-- ADDMODULE MARKER 1 --')
    m2 = re.compile('-- ADDMODULE MARKER 2 --')
    while 1:
        line = infp.readline()
        if not line: break
        outfp.write(line)
        if m1 and m1.search(line):
            m1 = None
            for mod in modules:
                if mod in never:
                    continue
                if with_ifdef:
                    outfp.write("#ifndef init%s\n"%mod)
                outfp.write('extern void init%s(void);\n' % mod)
                if with_ifdef:
                    outfp.write("#endif\n")
        elif m2 and m2.search(line):
            m2 = None
            for mod in modules:
                if mod in never:
                    continue
                outfp.write('\t{"%s", init%s},\n' %
                            (mod, mod))
    if m1:
        sys.stderr.write('MARKER 1 never found\n')
    elif m2:
        sys.stderr.write('MARKER 2 never found\n')
Ejemplo n.º 5
0
    def acquire(self, testname, buf, status, command):
        # record failures based on exit status
        if status:
            self.failures.append("Exit %s: %s" % (status, command))

        # scan test log for magical tokens
        # see also: http://hg.mozilla.org/automation/logparser/
        passre = re.compile("^TEST-(PASS|EXPECTED-FAIL).*")
        failre = re.compile("^TEST-UNEXPECTED-.*")
        tback = re.compile("^Traceback.*")
        excpt = re.compile("^Exception:.*")

        self.text[testname] = []

        for line in buf:
            print line
            if passre.match(line):
                self.passes.append(line)
            elif failre.match(line):
                self.failures.append(line)
            elif tback.match(line):
                self.failures.append(line)
            elif excpt.match(line):
                self.failures.append(line)
            else:
                self.info.append(line)
            self.text[testname].append(line)
def check_easyblocks_for_environment(home):
    """ check whether os.putenv or os.environ[]= is used inside easyblocks """

    files = glob.glob(os.path.join(home, 'easybuild/easyblocks/[a-z]/*.py'))
    eb_files = filter(lambda x: os.path.basename(x) != '__init__.py', files)

    os_env_re = re.compile(r"os\.environ\[\w+\]\s*=\s*")
    os_putenv_re = re.compile(r"os\.putenv")

    found = []
    for eb_file in eb_files:
        f = open(eb_file, "r")
        text = f.read()
        f.close()

        if os_putenv_re.search(text) or os_env_re.search(text):
            found.append(eb_file)

    for faulty in found:
        warning("found os.environ or os.putenv inside eb_file: %s" % faulty)

    if found:
        warning("Only easybuild.tools.environment.set should be used for setting environment variables.")

    return len(found) == 0
def catchweibo():
	c = 3362
	# c是爬虫起始页
	for i in range(6906):
		pn = (i+c)
		url = 'http://weibo.cn/1767797335/profile?filter=0&page='+str(pn)	
		#上面地址是你要爬的人的微薄url,用weibo.cn是因为那个地方进去访问限制少	
		print url
		req = urllib2.Request(url)
		req.add_header("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36")
		req.add_header("Cookie", "_T_WM=edf4469bb5245a50aa32006460daa5ae; _T_WL=1; _WEIBO_UID=5638019231; SUB=_2A254gp9aDeTxGeNI6FoR8SfOyD2IHXVbjCESrDV6PUJbrdAKLXOnkW1HSRVVWhAfa6SQUOfsMJvV5z1nWg..; gsid_CTandWM=4u3Fdd4a1W8HT0Rlp91lUnEHN3J")
		#上面这行修改自己的cookie,每个cookie大概能爬1000页左右,如果只有一个帐号就隔一个小时之后再爬
		try:
			res = urllib2.urlopen(req)
			print 'ok1'
		except:
			print 'error open'
			continue
		html = res.read()
		print html

		reg1 = re.compile(r'(<div class="c" id="M_[\d\D]*?)<div class="s"></div>')
		reg2 = re.compile(r'<span class="ct">(.*?)&nbsp;')
		yuanchuang = reg1.findall(html)
		# atime = reg2.findall(html)
		if not yuanchuang:
			print 'reg none'
			c = c-1
			continue
		for j in range(0, len(yuanchuang)):
			print len(yuanchuang)
			print yuanchuang[j]
			print '\n'
			fout.write(yuanchuang[j]+'\n'+'\n<br><br>')
Ejemplo n.º 8
0
    def listsItems(self, url):
        query_data = { 'url': url, 'use_host': True, 'host': HOST, 'use_cookie': True, 'save_cookie': True, 'load_cookie': False,
                      'cookiefile': self.COOKIEFILE, 'use_post': False, 'return_data': True }
        link = self.cm.getURLRequestData(query_data)
        HEADER = {'Accept-Language': 'pl,en-US;q=0.7,en;q=0.3',
                  'Referer': url, 'User-Agent': HOST,
                  'X-Requested-With':'XMLHttpRequest',
                  'Content-Type:': 'application/json'}

        #http://www.cda.pl/tick.php?ts=1443133845
        #query_data2 = { 'url': url, 'use_host': True, 'host': HOST,  'use_header': True, 'header': HEADER,
        #               'use_cookie': True, 'save_cookie': False, 'load_cookie': True,
        #              'cookiefile': self.COOKIEFILE, 'use_post': True, 'return_data': True }
        #link = self.cm.getURLRequestData(query_data2)
        #print("Link", link)
        match = re.compile('<label(.*?)>(.*?)</label>', re.DOTALL).findall(link)
        if len(match) > 0:
            for i in range(len(match)):
                match1 = re.compile('<img height="90" width="120" src="(.*?)" (.*?)>(.*?)<span class="timeElem">(.*?)</span>(.*?)</a>(.*?)<a class="titleElem" href="(.*?)">(.*?)</a>', re.DOTALL).findall(match[i][1])
                if len(match1) > 0:
                    self.add('cdapl', 'playSelectedMovie', 'None', self.cm.html_special_chars(match1[0][7]) + ' - '+ match1[0][3].strip(), match1[0][0], mainUrlb+match1[0][6], 'aaaa', 'None', False, False)
        else:
            match2 = re.compile('<div class="block upload" id="dodane_video">(.*?)<div class="paginationControl">', re.DOTALL).findall(link)
            match3 = re.compile('<div class="videoElem">\n                  <a href="(.*?)" style="position:relative;width:120px;height:90px" title="(.*?)">\n                    <img width="120" height="90" src="(.*?)" title="(.*?)" alt="(.*?)" />\n ', re.DOTALL).findall(match2[0])
            if len(match3) > 0:
                for i in range(len(match3)):
                    self.add('cdapl', 'playSelectedMovie', 'None', self.cm.html_special_chars(match3[i][1]) , match3[i][2], mainUrlb+match3[i][0], 'aaaa', 'None', True, False)
        #                     <span class="next-wrapper"><a onclick="javascript:changePage(2);return false;"       class="sbmBigNext btn-my btn-large fiximg" href="     "> &nbsp;Następna strona ></a></span>
        match10 = re.compile('<span class="next-wrapper"><a onclick="javascript:changePage\((.*?)\);return false;" class="sbmBigNext btn-my btn-large fiximg" href="(.*?)">(.*?)></a></span>', re.DOTALL).findall(link)
        print("M10000",match10)
        if len(match10) > 0:
            self.add('cdapl', 'categories-menu', 'Następna strona', 'None', 'None', mainUrlb+match10[0][1], 'None', 'None', True, False,match10[0][0])
        xbmcplugin.endOfDirectory(int(sys.argv[1]))
Ejemplo n.º 9
0
def __cut_internal(sentence,HMM=True):
    if not ( type(sentence) is unicode):
        try:
            sentence = sentence.decode('utf-8')
        except:
            sentence = sentence.decode('gbk','ignore')
    re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5a-zA-Z0-9+#&\._]+)"), re.compile(ur"(\r\n|\s)")
    re_eng,re_num = re.compile(ur"[a-zA-Z0-9]+"), re.compile(ur"[\.0-9]+")
    blocks = re_han.split(sentence)
    if HMM:
        __cut_blk = __cut_DAG
    else:
        __cut_blk = __cut_DAG_NO_HMM

    for blk in blocks:
        if re_han.match(blk):
            for word in __cut_blk(blk):
                yield word
        else:
            tmp = re_skip.split(blk)
            for x in tmp:
                if re_skip.match(x):
                    yield pair(x,'x')
                else:
                    for xx in x:
                        if re_num.match(xx):
                            yield pair(xx,'m')
                        elif re_eng.match(x):
                            yield pair(xx,'eng')
                        else:
                            yield pair(xx,'x')
Ejemplo n.º 10
0
 def _create_regs(self, clist=None):
     """Creates regular expressions for all connected routes"""
     if clist is None:
         if self.directory:
             clist = self.controller_scan(self.directory)
         elif callable(self.controller_scan):
             clist = self.controller_scan()
         elif not self.controller_scan:
             clist = []
         else:
             clist = self.controller_scan
     
     for key, val in self.maxkeys.iteritems():
         for route in val:
             route.makeregexp(clist)
     
     regexps = []
     routematches = []
     for route in self.matchlist:
         if not route.static:
             routematches.append(route)
             regexps.append(route.makeregexp(clist, include_names=False))
     self._routematches = routematches
     
     # Create our regexp to strip the prefix
     if self.prefix:
         self._regprefix = re.compile(self.prefix + '(.*)')
     
     # Save the master regexp
     regexp = '|'.join(['(?:%s)' % x for x in regexps])
     self._master_reg = regexp
     self._master_regexp = re.compile(regexp)
     self._created_regs = True
Ejemplo n.º 11
0
def fromUrl( streamUrl ):
    Log( "Channel.py fromUrl ..." )
    """
    Two types of valid stream URLs:

    hdhomerun://<device-id>-<tuner>/ch<physical-channel>-<program-number>

    hdhomerun://<device-id>-<tuner>/tuner<tuner>?channel=<modulation>:<frequency>&program=<program-number>

    """

    channel = Channel()
    
    urlRe = re.compile( r'^\s*hdhomerun\:\/\/([\w\-]+)\-(\d+)\/tuner(\d+)\?channel\=([^\:]+)\:(.+)\&program\=(.+)$' )
    reMatch = urlRe.match( streamUrl )
    if reMatch:
        deviceId = reMatch.group(1)
        tunerId1 = reMatch.group(2)
        tunerId2 = reMatch.group(3)
        channel.Modulation = reMatch.group(4)
        channel.Frequency = reMatch.group(5)
        channel.ProgramNumber = reMatch.group(6)
        return channel

    urlRe = re.compile( r'^\s*hdhomerun\:\/\/([\w\-]+)\-(\d+)\/ch([^\-]+)-(\w+)$' )
    reMatch = urlRe.match( streamUrl )
    if reMatch:
        deviceId = reMatch.group(1)
        tunerId1 = reMatch.group(2)
        channel.PhysicalChannel = reMatch.group(3)
        channel.ProgramNumber = reMatch.group(4)
        return channel

    return None
Ejemplo n.º 12
0
def parse_stat_str(s):
    print 'Parsing %s: ' % s
    d = {'mode': 'NOTPARSED', 'zxid': 'NOTPARSED'}
    d['mode'] = re.compile('.*Mode:\s(.*)').search(s).group(1)
    d['zxid'] = re.compile('.*Zxid:\s(.*)').search(s).group(1)
    print 'Parsed %s: ' % d
    return d
Ejemplo n.º 13
0
def clean(sentence):
    """
    Takes the tweet as an input, cleans it using the regular expression
    defined and explained below and returns the cleaned string.

    All the "stop words" are removed by using the below list of regexs. Of the
    following the regex r"http[s]*://\S+", selects all the links in the sentence.
    r" q.\d+", selects the strings like q.1653 from the sentence.
    r"[#@]\w+", selects the @ mentions and hashtags in the sentence.
    r"[^A-Za-z0-9]", selects all the special characters in the sentence.
    r"\w+[-']\w+" selects all the words with "-" or "'" in between them.
    """

    common = [r"\bi\b", r"\bi[nfs]\b", r"\bo[nfr]\b", r"\ba\b", r"\ba[nts]\b",
              r"^i", r"\bother\b", r"\bhe\b", r"\bhave\b", r"\bus\b",
              r"\b[gdtsn]o\b", r"\bnot\b", r"\b[wb]e\b", r"\byour[s]*\b",
              r"\bwhich\b", r"\bthat\b", r"\bha[sd]\b", r"\band\b", r"\bby\b",
              r"\bthe[y]*\b", r"\b[t]*his\b", r"\bit[s]*\b", r"\bfor\b", r"\byou\b",
              r"\bwill\b", r"\bg[eo]t\b", r"\bbut\b", r"\bour\b", r"\bwas\b",
              r"\bcan\b", r"\balso\b", r"\byet\b", r"\bafter\b", r"\bwith\b",
              r"\bthem\b", r"\bdid\b", r"\bare\b", r"\bfrom\b", r"http[s]*://\S+",
              r" q.\d+", r"[#@]\w+", r"[^A-Za-z0-9]", r"\w+[-']\w+"]

    pattern = r"(" + r"|".join(common) + r")"
    p = re.compile(pattern)

    sentence = p.sub(" ", sentence)

    p = re.compile("  +")
    sentence = p.sub(" ", sentence).strip()

    return sentence
Ejemplo n.º 14
0
    def find_bug_ids(self, ctx):
        '''find valid bug ids that are referred to in changeset
        comments and that do not already have references to this
        changeset.'''

        if bugzilla._bug_re is None:
            bugzilla._bug_re = re.compile(
                self.ui.config('bugzilla', 'regexp', bugzilla._default_bug_re),
                re.IGNORECASE)
            bugzilla._split_re = re.compile(r'\D+')
        start = 0
        ids = set()
        while True:
            m = bugzilla._bug_re.search(ctx.description(), start)
            if not m:
                break
            start = m.end()
            for id in bugzilla._split_re.split(m.group(1)):
                if not id: continue
                ids.add(int(id))
        if ids:
            ids = self.filter_real_bug_ids(ids)
        if ids:
            ids = self.filter_unknown_bug_ids(ctx.node(), ids)
        return ids
Ejemplo n.º 15
0
def interact(line, stdin, process):
  global tot_sec
  global VIDEO_PAUSED
  global omx_stdin
  global omx_process

  omx_stdin = stdin
  omx_process = process

  # video regexp
  video_curr_rexp = re.compile(r'V :\s*([\d.]+).*')
  video_total_rexp = re.compile(r'Length : *([\d.]+)*')

  # get current video time
  curr = video_curr_rexp.search(line)

  if curr and tot_sec:
    pts = curr.group(1)
    sec = int(pts.split(".")[0]) / 1000000
    print(sec, tot_sec)
    # stop video to last seconds
    if tot_sec == sec and VIDEO_PAUSED == False:
      VIDEO_PAUSED = True
      stdin.put('p')
      print("---- PAUSE ----")

  else:
    len = video_total_rexp.search(line)

    if len:
      tot_pts = len.group(1)
      tot_sec = (int(tot_pts) / 1000) - 11
Ejemplo n.º 16
0
  def ParseMethodAnnotation(self, annotation):
    if annotation.find('reservable = true') >= 0:
      self._is_reservable = True

    delegate_re = re.compile('delegate\s*=\s*'
        '(?P<delegate>(true|false))')
    for match in re.finditer(delegate_re, annotation):
      delegate = match.group('delegate')
      if delegate == 'true':
        self._is_delegate = True
      elif delegate == 'false':
        self._is_delegate = False

    disable_reflect_method_re = re.compile('disableReflectMethod\s*=\s*'
        '(?P<disableReflectMethod>(true|false))')
    for match in re.finditer(disable_reflect_method_re, annotation):
      disable_reflect_method = match.group('disableReflectMethod')
      if disable_reflect_method == 'true':
        self._disable_reflect_method = True
      else:
        self._disable_reflect_method = False

    pre_wrapline_re = re.compile('preWrapperLines\s*=\s*\{\s*('
        '?P<pre_wrapline>(".*")(,\s*".*")*)\s*\}')
    for match in re.finditer(pre_wrapline_re, annotation):
      pre_wrapline = self.FormatWrapperLine(match.group('pre_wrapline'))
      self._method_annotations[self.ANNOTATION_PRE_WRAPLINE] = pre_wrapline

    post_wrapline_re = re.compile('postWrapperLines\s*=\s*\{\s*('
        '?P<post_wrapline>(".*")(,\s*".*")*)\s*\}')
    for match in re.finditer(post_wrapline_re, annotation):
      post_wrapline = self.FormatWrapperLine(match.group('post_wrapline'))
      self._method_annotations[self.ANNOTATION_POST_WRAPLINE] = post_wrapline
Ejemplo n.º 17
0
def LISTTV4(murl):
        main.addDir('Search Rlsmix','rlsmix',136,"%s/art/search.png"%selfAddon.getAddonInfo("path"))
        main.addLink('[COLOR red]First turbobit Link could be HD[/COLOR]','',"%s/art/tvb.png"%selfAddon.getAddonInfo("path"))
        urllist=['http://www.rlsmix.net/category/tv-shows/','http://www.rlsmix.net/category/tv-shows/page/2/','http://www.rlsmix.net/category/tv-shows/page/3/','http://www.rlsmix.net/category/tv-shows/page/4/','http://www.rlsmix.net/category/tv-shows/page/5/','http://www.rlsmix.net/category/tv-shows/page/6/','http://www.rlsmix.net/category/tv-shows/page/7/','http://www.rlsmix.net/category/tv-shows/page/8/','http://www.rlsmix.net/category/tv-shows/page/9/','http://www.rlsmix.net/category/tv-shows/page/10/']
        dialogWait = xbmcgui.DialogProgress()
        ret = dialogWait.create('Please wait until Show list is cached.')
        totalLinks = 10
        loadedLinks = 0
        remaining_display = 'Pages loaded :: [B]'+str(loadedLinks)+' / '+str(totalLinks)+'[/B].'
        dialogWait.update(0,'[B]Will load instantly from now on[/B]',remaining_display)
        for murl in urllist:
                link=main.OPENURL(murl)
                link=link.replace('\r','').replace('\n','').replace('\t','').replace('&nbsp;','')
                match=re.compile('<h1 class="titles"><a href="(.+?)" title="Permanent Link to (.+?)">.+?src="http://uppix.net/(.+?)"').findall(link)
                for url,name,thumb in match:
                        match2=re.compile('TV Round Up').findall(name)
                        name=name.replace('\xc2\xa0','').replace('" ','').replace(' "','').replace('"','').replace("&#039;","'").replace("&amp;","and").replace("&#8217;","'").replace("amp;","and").replace("#8211;","-")
                        if len(match2)==0:
                            main.addDir(name,url,62,'http://uppix.net/'+thumb)
                
                loadedLinks = loadedLinks + 1
                percent = (loadedLinks * 100)/totalLinks
                remaining_display = 'Pages loaded :: [B]'+str(loadedLinks)+' / '+str(totalLinks)+'[/B].'
                dialogWait.update(percent,'[B]Will load instantly from now on[/B]',remaining_display)
                if (dialogWait.iscanceled()):
                        return False   
        dialogWait.close()
        del dialogWait
        main.GA("TV","Rlsmix")
Ejemplo n.º 18
0
def getDiseaseLink(catgory):
    '''
    根据类型列表,获取各类型的疾病列表
    '''
    global disease_list
    global host
    global lock
    
    re1 = re.compile(r'\d+')
    re2 = re.compile(r'.*(/dkd/disease/.*/)".*') 
    
    page = 1
    while True:
        replace_url = host +re1.sub(str(page), catgory)
        try:
            content = getContent(replace_url)
        except Exception as e:
            break 
        else:
            links = getMatchItems(re2, content)
            
            if links:
                lock.acquire()
                try:
                    page += 1
                    for link in links:
                        if link not in disease_list:
                            disease_list.append(link)
                finally:
                    lock.release()
            else:
                break
                    
    return disease_list
Ejemplo n.º 19
0
def listA(name, type, url, thumb):
    link = GetHttpData(url)
    match1 = re.compile("<!-- 剧集列表 start -->(.+?)<!-- 剧集列表 end -->", re.DOTALL).findall(link)
    match2 = re.compile('<div class="left">(.+?)</div>', re.DOTALL).findall(match1[0])
    if match2:
        match = re.compile(r"'videoListCon', '(.+?)'", re.DOTALL).findall(match2[0])
        if match:
            FindItems(type, match1[0])
            for url in match:
                link = GetHttpData("http://www.juchang.com" + url)
                link = link.decode("gbk").encode("utf8")
                FindItems(type, link)
                match2 = re.compile('<a href="#" class="one"(.+?)<a class="two"', re.DOTALL).findall(link)
                if match2:
                    match3 = re.compile(r"'videoListCon','(.+?)'", re.DOTALL).findall(match2[0])
                    for urla in match3:
                        link = GetHttpData("http://www.juchang.com" + urla)
                        link = link.decode("gbk").encode("utf8")
                        FindItems(type, link)
        else:
            FindItems(type, match1[0])
    else:
        FindItems(type, match1[0])

    xbmcplugin.setContent(int(sys.argv[1]), "movies")
    xbmcplugin.endOfDirectory(int(sys.argv[1]))
Ejemplo n.º 20
0
    def loginWithCheckCode(self):
        checkcode = raw_input('请输入验证码')
        self.post['TPL_checkcode'] = checkcode
        self.postData = urllib.urlencode(self.post)
        try:
            request = urllib2.Request(self.loginURL,self.postData,self.loginHeaders)
            response = self.opener.open(request)
            content = response.read().decode('gbk')
            pattern = re.compile(u'\u9a8c\u8bc1\u7801\u9519\u8bef',re.S)
            result = re.search(pattern,content)
            if result:
                print u"验证码输入错误"
                return False
            else:
                tokenPattern = re.compile('id="J_HToken" value="(.*?)"')
                tokenMatch = re.search(tokenPattern,content)
                if tokenMatch:
                    print u"验证码输入正确"
                    print tokenMatch.group(1)
                    return tokenMatch.group(1)
                else:
                    print u"J_Token"
                    return False

        except urllib2.HTTPError,e:
            print u"出错",e.reason
            return False
Ejemplo n.º 21
0
def GetSetting(setting, timeout=5):
    if not IsSetting(setting):    
        raise ValueError("Not a setting.")
    if not Global.serverlog:
        raise RuntimeError("Script wasn't started with run.py or Global.serverlog wasn't set.")
    startpattern=r"\[0[^\]]*\] "
    pattern1=re.compile(startpattern+setting.upper()+r" is currently set to (?P<value>[^.]*)\.")
    pattern2=re.compile(startpattern+setting.upper()+r" changed from (?P<value>((([^t]|t+[^o])*)(to)*)*)to \.")
    serverlog=open(Global.serverlog, encoding="latin-1")
    serverlog.seek(0,2)
    SendCommand(setting.upper())
    match=None
    for i in range(timeout*2): #@UnusedVariable
        for line in serverlog.readlines():
            match=pattern1.search(line)
            if match==None:
                match=pattern2.search(line)
            if match!=None:
                break
        if match!=None:
            break
        time.sleep(0.5)
    if match==None:
        return ""
    value=match.group("value")
    SendCommand(setting.upper()+" "+value)
    return value.strip()
Ejemplo n.º 22
0
def normalizeTitle(title):
  # remove leading whitespace and underscores
  title = title.strip(' _')
  # replace sequences of whitespace and underscore chars with a single space
  title = re.compile(r'[\s_]+').sub(' ', title)

  m = re.compile(r'([^:]*):(\s*)(\S(?:.*))').match(title)
  if m:
      prefix = m.group(1)
      if m.group(2):
          optionalWhitespace = ' '
      else:
          optionalWhitespace = ''
      rest = m.group(3)

      ns = prefix.capitalize()
      if ns in acceptedNamespaces:
          # If the prefix designates a known namespace, then it might be
          # followed by optional whitespace that should be removed to get
          # the canonical page name
          # (e.g., "Category:  Births" should become "Category:Births").
          title = ns + ":" + rest.capitalize()
      else:
          # No namespace, just capitalize first letter.
	  # If the part before the colon is not a known namespace, then we must
          # not remove the space after the colon (if any), e.g.,
          # "3001: The_Final_Odyssey" != "3001:The_Final_Odyssey".
          # However, to get the canonical page name we must contract multiple
          # spaces into one, because
          # "3001:   The_Final_Odyssey" != "3001: The_Final_Odyssey".
          title = prefix.capitalize() + ":" + optionalWhitespace + rest
  else:
      # no namespace, just capitalize first letter
      title = title.capitalize();
  return title
Ejemplo n.º 23
0
    def get_ticket_tumbers(cls, build):
        """Extract ticket ids from the changeset of a Jenkins build"""
        items = build.get_changeset_items()
        ticket_numbers = []
        regex = re.compile(cls.TICKET_REGEX)

        for entry in items:
            message = entry["msg"]
            print("-- found message: ", message)

            noissue = re.compile(r"#noissue")
            if not noissue.search(message):
                match = regex.search(message)
                if match is None:
                    print(
                        "found malformed message in build: ",
                        build.get_number(), "\n",
                        "with message: ",
                        message
                    )
                else:
                    ticket = match.group(1)
                    if ticket not in ticket_numbers:
                        ticket_numbers.append(ticket)

        return ticket_numbers
Ejemplo n.º 24
0
def parse_replace_hook(s):
    """
        Returns a (pattern, regex, replacement) tuple.

        The general form for a replacement hook is as follows:

            /patt/regex/replacement

        The first character specifies the separator. Example:

            :~q:foo:bar

        If only two clauses are specified, the pattern is set to match
        universally (i.e. ".*"). Example:

            /foo/bar/

        Clauses are parsed from left to right. Extra separators are taken to be
        part of the final clause. For instance, the replacement clause below is
        "foo/bar/":

            /one/two/foo/bar/

        Checks that pattern and regex are both well-formed. Raises
        ParseException on error.
    """
    patt, regex, replacement = _parse_hook(s)
    try:
        re.compile(regex)
    except re.error, e:
        raise ParseException("Malformed replacement regex: %s"%str(e.message))
Ejemplo n.º 25
0
    def __load_book_menu (self, lines) :
	r1 = re.compile(u'^\s*目\s*录\s*$')
	r2 = re.compile(u'^\s*([^·…]+)\s*[·.…]{2,}\s*([l\d]+)\s*$')
	menus = {}
	start = False
	not_match = 0
	for line in lines :
	    words = line.decode(self.default_coding)
	    words.strip('\n')
	    if re.match(r1, words) :
		start = True
		continue
	    elif start :
		m = re.match(r2, words)
		if m :
		    title = m.group(1)
		    page  = m.group(2)
		    page  = page.replace('l', '1')
		    page  = int(page.encode(self.default_coding))
		    menus[page] = self.__get_simple_string(title)
		    not_match = 0
		else :
		    not_match += 1
		    if not_match > 10 :
			break
	
	return menus
Ejemplo n.º 26
0
def Episodes(url, name):
    # try:
    link = GetContentMob(url)
    newlink = "".join(link.splitlines()).replace("\t", "")
    match = re.compile(
        '<td style="text-align:justify" class="movieepisode"><strong>' + name + "</strong>(.+?)</td>"
    ).findall(newlink)
    mirrors = re.compile("<a [^>]*href=[\"']?([^>^\"^']+)[\"']?[^>]*>(.+?)</a>").findall(match[0])

    if len(mirrors) >= 1:
        i = 1
        for mcontent in mirrors:
            vLinktemp, vLinkName = mcontent
            vLink = ""
            j = 1
            k = 1
            for mlink in mirrors:
                vLink1, vLinkName1 = mlink
                if j >= i:
                    if i == len(mirrors) or j == len(mirrors) or k == 12:
                        vLink += viddomain + vLink1 + "+++" + vLinkName1
                    else:
                        vLink += viddomain + vLink1 + "+++" + vLinkName1 + "***"
                    if k % 12 == 0:
                        break
                    k += 1
                j += 1
            i += 1
            # addLink("tập:  " + RemoveHTML(vLinkName).strip(),mobileurl+"/"+vLink,3,'',"")
            addLink("Tập:  " + RemoveHTML(vLinkName).strip(), vLink, 3, "", "")
            print vLink
Ejemplo n.º 27
0
def _translate(version, rules, standard):
    """Translate Python version into Debian one.

    >>> _translate('1.C2betac', ['s/c//gi'], None)
    '1.2beta'
    >>> _translate('5-fooa1.2beta3-fooD',
    ...     ['s/^/1:/', 's/-foo//g', 's:([A-Z]):+$1:'], 'PEP386')
    '1:5~a1.2~beta3+D'
    >>> _translate('x.y.x.z', ['tr/xy/ab/', 'y,z,Z,'], None)
    'a.b.a.Z'
    """
    for rule in rules:
        # uscan supports s, tr and y operations
        if rule.startswith(('tr', 'y')):
            # Note: no support for escaped separator in the pattern
            pos = 1 if rule.startswith('y') else 2
            tmp = rule[pos + 1:].split(rule[pos])
            version = version.translate(str.maketrans(tmp[0], tmp[1]))
        elif rule.startswith('s'):
            # uscan supports: g, u and x flags
            tmp = rule[2:].split(rule[1])
            pattern = re.compile(tmp[0])
            count = 1
            if tmp[2:]:
                flags = tmp[2]
                if 'g' in flags:
                    count = 0
                if 'i' in flags:
                    pattern = re.compile(tmp[0], re.I)
            version = pattern.sub(_pl2py(tmp[1]), version, count)
        else:
            log.warn('unknown rule ignored: %s', rule)
    if standard == 'PEP386':
        version = PRE_VER_RE.sub(r'~\g<1>', version)
    return version
Ejemplo n.º 28
0
    def add(self, irc, msg, args, channel, regexp, action):
        """[<channel>] <regexp> <action>

        Associates <regexp> with <action>.  <channel> is only
        necessary if the message isn't sent on the channel
        itself.  Action is echoed upon regexp match, with variables $1, $2, 
        etc. being interpolated from the regexp match groups."""
        if not self._checkManageCapabilities(irc, msg, channel):
            capabilities = self.registryValue('requireManageCapability')
            irc.errorNoCapability(capabilities, Raise=True)
        db = self.getDb(channel)
        cursor = db.cursor()
        cursor.execute("SELECT id, usage_count, locked FROM triggers WHERE regexp=?", (regexp,))
        results = cursor.fetchall()
        if len(results) != 0:
            (id, usage_count, locked) = map(int, results[0])
        else:
            locked = 0
            usage_count = 0
        if not locked:
            try:
                re.compile(regexp)
            except Exception, e:
                irc.error('Invalid python regexp: %s' % (e,))
                return
            if ircdb.users.hasUser(msg.prefix):
                name = ircdb.users.getUser(msg.prefix).name
            else:
                name = msg.nick
            cursor.execute("""INSERT INTO triggers VALUES
                              (NULL, ?, ?, ?, ?, ?, ?)""",
                            (regexp, name, int(time.time()), usage_count, action, locked,))
            db.commit()
            irc.replySuccess()
Ejemplo n.º 29
0
def init_db(db_url):
    regex = re.compile('^mongodb:\\/\\/(.*?):(.*?)@(.*?):([0-9]+)\\/(.*)$')
    match = regex.match(db_url)

    if not match:
        regex = re.compile('^mongodb:\\/\\/(.*?)\\/(.*)$')
        match = regex.match(db_url)

        username = None
        password = None
        host = match.group(1)
        port = None
        db_name = match.group(2)
    else:
        username = match.group(1)
        password = match.group(2)
        host = match.group(3)
        port = int(match.group(4))
        db_name = match.group(5)

    conn = mongoengine.connect(db_name,
            host=host,
            port=port,
            username=username,
            password=password)

    return conn[db_name]
Ejemplo n.º 30
0
 def __init__(self, **kwargs):
     for k, v in kwargs.items():
         if hasattr(self, k):
             setattr(self, k, v)
     self.keys = AttrDictSimple(
         name=("Name", SENTENCE_SYMBOLS_RE, "Name " +
               SENTENCE_SYMBOLS_WARNING),
         ident=("Identifier", re.compile(r"^[a-zA-Z][a-zA-Z\d_]{2,}$"),
                "Identifier must be 3 or more alphanumeric characters"
                " (underscore allowed)."),
         desc=("Description", SENTENCE_SYMBOLS_RE, "Description " +
               SENTENCE_SYMBOLS_WARNING),
         prefix=("Prefix", re.compile(r"^[a-zA-Z][a-zA-Z\d_]{2,4}$"),
                 "Prefix must be 3 to 5 alphanumeric characters"
                 " (underscores allowed)."),
         domain=("Domain",
                 re.compile(r"^([a-zA-Z0-9]+(\-[a-zA-Z0-9]+)*\.)*"
                            "[a-zA-Z0-9]+(\-[a-zA-Z0-9]+)*"
                            "\.[a-zA-Z]{2,4}$"),
                 "Domain must be a valid domain name."),
         mimetype=("MIME type", re.compile(r"^[a-zA-Z0-9]+"
                                           "\/[a-zA-Z0-9\-]+$"),
                   "Please use alphanumeric characters and dashes in the"
                   " format: application/x-firebreath"),
         disable_gui=("has no UI", re.compile(r"^true$|false$"),
                      "Please enter valid input: true or false"),
     )
Ejemplo n.º 31
0
import aniso8601
import pytz

# Constants for upgrading date-based intervals to full datetimes.
START_OF_DAY = time(0, 0, 0, tzinfo=pytz.UTC)
END_OF_DAY = time(23, 59, 59, 999999, tzinfo=pytz.UTC)

# https://code.djangoproject.com/browser/django/trunk/django/core/validators.py
# basic auth added by frank

url_regex = re.compile(
    r"^(?:http|ftp)s?://"  # http:// or https://
    r"(?:[^:@]+?:[^:@]*?@|)"  # basic auth
    r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+"
    r"(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|"  # domain...
    r"localhost|"  # localhost...
    r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|"  # ...or ipv4
    r"\[?[A-F0-9]*:[A-F0-9:]+\]?)"  # ...or ipv6
    r"(?::\d+)?"  # optional port
    r"(?:/?|[/?]\S+)$",
    re.IGNORECASE,
)


def url(value):
    """Validate a URL.

    :param string value: The URL to validate
    :returns: The URL if valid.
    :raises: ValueError
    """
    if not url_regex.search(value):
Ejemplo n.º 32
0
class EventsBackend(BaseBackend):
    ACCOUNT_ID = re.compile(r"^(\d{1,12}|\*)$")
    STATEMENT_ID = re.compile(r"^[a-zA-Z0-9-_]{1,64}$")

    def __init__(self, region_name):
        self.rules = {}
        # This array tracks the order in which the rules have been added, since
        # 2.6 doesn't have OrderedDicts.
        self.rules_order = []
        self.next_tokens = {}
        self.region_name = region_name
        self.event_buses = {}
        self.event_sources = {}

        self._add_default_event_bus()

    def reset(self):
        region_name = self.region_name
        self.__dict__ = {}
        self.__init__(region_name)

    def _add_default_event_bus(self):
        self.event_buses["default"] = EventBus(self.region_name, "default")

    def _get_rule_by_index(self, i):
        return self.rules.get(self.rules_order[i])

    def _gen_next_token(self, index):
        token = os.urandom(128).encode("base64")
        self.next_tokens[token] = index
        return token

    def _process_token_and_limits(self, array_len, next_token=None, limit=None):
        start_index = 0
        end_index = array_len
        new_next_token = None

        if next_token:
            start_index = self.next_tokens.pop(next_token, 0)

        if limit is not None:
            new_end_index = start_index + int(limit)
            if new_end_index < end_index:
                end_index = new_end_index
                new_next_token = self._gen_next_token(end_index)

        return start_index, end_index, new_next_token

    def delete_rule(self, name):
        self.rules_order.pop(self.rules_order.index(name))
        return self.rules.pop(name) is not None

    def describe_rule(self, name):
        return self.rules.get(name)

    def disable_rule(self, name):
        if name in self.rules:
            self.rules[name].disable()
            return True

        return False

    def enable_rule(self, name):
        if name in self.rules:
            self.rules[name].enable()
            return True

        return False

    def list_rule_names_by_target(self, target_arn, next_token=None, limit=None):
        matching_rules = []
        return_obj = {}

        start_index, end_index, new_next_token = self._process_token_and_limits(
            len(self.rules), next_token, limit
        )

        for i in range(start_index, end_index):
            rule = self._get_rule_by_index(i)
            for target in rule.targets:
                if target["Arn"] == target_arn:
                    matching_rules.append(rule.name)

        return_obj["RuleNames"] = matching_rules
        if new_next_token is not None:
            return_obj["NextToken"] = new_next_token

        return return_obj

    def list_rules(self, prefix=None, next_token=None, limit=None):
        match_string = ".*"
        if prefix is not None:
            match_string = "^" + prefix + match_string

        match_regex = re.compile(match_string)

        matching_rules = []
        return_obj = {}

        start_index, end_index, new_next_token = self._process_token_and_limits(
            len(self.rules), next_token, limit
        )

        for i in range(start_index, end_index):
            rule = self._get_rule_by_index(i)
            if match_regex.match(rule.name):
                matching_rules.append(rule)

        return_obj["Rules"] = matching_rules
        if new_next_token is not None:
            return_obj["NextToken"] = new_next_token

        return return_obj

    def list_targets_by_rule(self, rule, next_token=None, limit=None):
        # We'll let a KeyError exception be thrown for response to handle if
        # rule doesn't exist.
        rule = self.rules[rule]

        start_index, end_index, new_next_token = self._process_token_and_limits(
            len(rule.targets), next_token, limit
        )

        returned_targets = []
        return_obj = {}

        for i in range(start_index, end_index):
            returned_targets.append(rule.targets[i])

        return_obj["Targets"] = returned_targets
        if new_next_token is not None:
            return_obj["NextToken"] = new_next_token

        return return_obj

    def put_rule(self, name, **kwargs):
        rule = Rule(name, self.region_name, **kwargs)
        self.rules[rule.name] = rule
        self.rules_order.append(rule.name)
        return rule.arn

    def put_targets(self, name, targets):
        rule = self.rules.get(name)

        if rule:
            rule.put_targets(targets)
            return True

        return False

    def put_events(self, events):
        num_events = len(events)

        if num_events < 1:
            raise JsonRESTError("ValidationError", "Need at least 1 event")
        elif num_events > 10:
            raise JsonRESTError("ValidationError", "Can only submit 10 events at once")

        # We dont really need to store the events yet
        return []

    def remove_targets(self, name, ids):
        rule = self.rules.get(name)

        if rule:
            rule.remove_targets(ids)
            return True

        return False

    def test_event_pattern(self):
        raise NotImplementedError()

    def put_permission(self, event_bus_name, action, principal, statement_id):
        if not event_bus_name:
            event_bus_name = "default"

        event_bus = self.describe_event_bus(event_bus_name)

        if action is None or action != "events:PutEvents":
            raise JsonRESTError(
                "ValidationException",
                "Provided value in parameter 'action' is not supported.",
            )

        if principal is None or self.ACCOUNT_ID.match(principal) is None:
            raise JsonRESTError(
                "InvalidParameterValue", "Principal must match ^(\d{1,12}|\*)$"
            )

        if statement_id is None or self.STATEMENT_ID.match(statement_id) is None:
            raise JsonRESTError(
                "InvalidParameterValue", "StatementId must match ^[a-zA-Z0-9-_]{1,64}$"
            )

        event_bus._permissions[statement_id] = {
            "Action": action,
            "Principal": principal,
        }

    def remove_permission(self, event_bus_name, statement_id):
        if not event_bus_name:
            event_bus_name = "default"

        event_bus = self.describe_event_bus(event_bus_name)

        if not len(event_bus._permissions):
            raise JsonRESTError(
                "ResourceNotFoundException", "EventBus does not have a policy."
            )

        if not event_bus._permissions.pop(statement_id, None):
            raise JsonRESTError(
                "ResourceNotFoundException",
                "Statement with the provided id does not exist.",
            )

    def describe_event_bus(self, name):
        if not name:
            name = "default"

        event_bus = self.event_buses.get(name)

        if not event_bus:
            raise JsonRESTError(
                "ResourceNotFoundException",
                "Event bus {} does not exist.".format(name),
            )

        return event_bus

    def create_event_bus(self, name, event_source_name):
        if name in self.event_buses:
            raise JsonRESTError(
                "ResourceAlreadyExistsException",
                "Event bus {} already exists.".format(name),
            )

        if not event_source_name and "/" in name:
            raise JsonRESTError(
                "ValidationException", "Event bus name must not contain '/'."
            )

        if event_source_name and event_source_name not in self.event_sources:
            raise JsonRESTError(
                "ResourceNotFoundException",
                "Event source {} does not exist.".format(event_source_name),
            )

        self.event_buses[name] = EventBus(self.region_name, name)

        return self.event_buses[name]

    def list_event_buses(self, name_prefix):
        if name_prefix:
            return [
                event_bus
                for event_bus in self.event_buses.values()
                if event_bus.name.startswith(name_prefix)
            ]

        return list(self.event_buses.values())

    def delete_event_bus(self, name):
        if name == "default":
            raise JsonRESTError(
                "ValidationException", "Cannot delete event bus default."
            )

        self.event_buses.pop(name, None)
Ejemplo n.º 33
0
 def group_callback(test_id, regex=re.compile(group_regex)):
     match = regex.match(test_id)
     if match:
         return match.group(0)
Ejemplo n.º 34
0
def get_hostmask_regex(mask):
    """Return a compiled `re.RegexObject` for an IRC hostmask"""
    mask = re.escape(mask)
    mask = mask.replace(r'\*', '.*')
    return re.compile(mask + '$', re.I)
Ejemplo n.º 35
0
 def test_setting_regexp(self):
     with Settings(SENTRY_ALLOW_ORIGIN=[re.compile('https?\://(.*\.)?example\.com')]):
         assert is_valid_origin('http://example.com')
Ejemplo n.º 36
0
class CSV(object):
    eval_re = re.compile('eval\((.*)\)')

    def __init__(self, fpath, newnames=None, delimiter=None, transpose=False):
        f = csv_open(fpath)
        if delimiter is None:
            dialect = csv.Sniffer().sniff(f.read(1024))
#            dialect = csv.Sniffer().sniff(f.read(1024), ',:|\t')
            f.seek(0)
            data_stream = csv.reader(f, dialect)
        else:
            data_stream = csv.reader(f, delimiter=delimiter)
        if transpose:
            transposed = transpose_table(list(data_stream))
            data_stream = iter(transposed)
        else:
            transposed = None
        self.fpath = fpath
        if newnames is not None:
            # TODO: move this junk out of the class
            basename = os.path.splitext(os.path.basename(fpath))[0]
            for k in newnames:
                m = self.eval_re.match(newnames[k])
                if m:
                    eval_str = m.group(1)
                    newnames[k] = eval_with_template(eval_str,
                                                     {'basename': basename})
        self.newnames = newnames
        self.transposed = transposed
        self.f = f
        self.data_stream = data_stream
        self._fields = None
        self._field_names = None
        self._numlines = None

    def __iter__(self):
        return iter(self.data_stream)

    def next(self):
        return next(self.data_stream)

    def rewind(self):
        if self.transposed is not None:
            self.data_stream = iter(self.transposed)
        else:
            self.f.seek(0)

    def close(self):
        self.f.close()

    @property
    def field_names(self):
        if self._field_names is None:
            # TODO: use self._fields instead if it was already computed
            # read the first line in the file
            self.rewind()
            fnames = self.next()
            if self.newnames is not None:
                fnames = [self.newnames.get(name, name) for name in fnames]
            self._field_names = fnames

        return self._field_names

    @property
    def fields(self):
        if self._fields is None:
            self.rewind()
            fields = detect_column_types(self.data_stream)
            if self.newnames is not None:
                fields = [(self.newnames.get(name, name), type_)
                          for name, type_ in fields]
            self._fields = fields
        return self._fields

    @property
    def numlines(self):
        if self._numlines is None:
            if self.transposed is not None:
                self._numlines = len(self.transposed) - 1
            else:
                self._numlines = countlines(self.fpath) - 1
        return self._numlines

    def read(self, fields=None):
        """imports one Xsv file with all columns
           * columns can be in any order (they will be reordered if needed)
           * row order is preserved
        """
        print(" - reading", self.fpath)
        if fields is None:
            fields = self.fields
            positions = None
        else:
            available = self.field_names
            missing = set(name for name, _ in fields) - set(available)
            if missing:
                raise Exception("%s does not contain any field(s) named: %s"
                                % (self.fpath, ", ".join(missing)))
            positions = [available.index(name) for name, _ in fields]
        self.rewind()
        self.next()
        return convert(self.data_stream, fields, positions)

    def as_array(self, fields=None):
        if fields is None:
            fields = self.fields

        # csv file is assumed to be in the correct order (ie by period then id)
        datastream = self.read(fields)
        return fromiter(datastream, dtype=np.dtype(fields),
                        count=self.numlines)
Ejemplo n.º 37
0
 def __init__(self, pattern, flags=0):
     self.pattern = pattern
     self.re = re.compile(pattern, flags)
Ejemplo n.º 38
0
def _setup_new_database(cur, database_engine):
    """Sets up the database by finding a base set of "full schemas" and then
    applying any necessary deltas.

    The "full_schemas" directory has subdirectories named after versions. This
    function searches for the highest version less than or equal to
    `SCHEMA_VERSION` and executes all .sql files in that directory.

    The function will then apply all deltas for all versions after the base
    version.

    Example directory structure:

        schema/
            delta/
                ...
            full_schemas/
                3/
                    test.sql
                    ...
                11/
                    foo.sql
                    bar.sql
                ...

    In the example foo.sql and bar.sql would be run, and then any delta files
    for versions strictly greater than 11.
    """
    current_dir = os.path.join(dir_path, "schema", "full_schemas")
    directory_entries = os.listdir(current_dir)

    valid_dirs = []
    pattern = re.compile(r"^\d+(\.sql)?$")
    for filename in directory_entries:
        match = pattern.match(filename)
        abs_path = os.path.join(current_dir, filename)
        if match and os.path.isdir(abs_path):
            ver = int(match.group(0))
            if ver <= SCHEMA_VERSION:
                valid_dirs.append((ver, abs_path))
        else:
            logger.warn("Unexpected entry in 'full_schemas': %s", filename)

    if not valid_dirs:
        raise PrepareDatabaseException(
            "Could not find a suitable base set of full schemas"
        )

    max_current_ver, sql_dir = max(valid_dirs, key=lambda x: x[0])

    logger.debug("Initialising schema v%d", max_current_ver)

    directory_entries = os.listdir(sql_dir)

    for filename in fnmatch.filter(directory_entries, "*.sql"):
        sql_loc = os.path.join(sql_dir, filename)
        logger.debug("Applying schema %s", sql_loc)
        executescript(cur, sql_loc)

    cur.execute(
        database_engine.convert_param_style(
            "INSERT INTO schema_version (version, upgraded)"
            " VALUES (?,?)"
        ),
        (max_current_ver, False,)
    )

    _upgrade_existing_database(
        cur,
        current_version=max_current_ver,
        applied_delta_files=[],
        upgraded=False,
        database_engine=database_engine,
        config=None,
        is_empty=True,
    )
Ejemplo n.º 39
0
    def beginSpider(self,url):
        user = "******"
        pwd = "94c05j02p"
        # 1. 前期工作
        browser = webdriver.Chrome()
        self.loginzhihu(browser,user,pwd)
        browser.get(url)
        #x = browser.get_cookies()
        #browser.refresh()
        try:
            # 首先加载出全部的内容,判断是否页面中存在“更多”这一个按钮
            while True:
                # 这里需要注意的是:selenium2 是不支持 类名之中 有空格的
                try:
                    self.scroll(browser)
                    time.sleep(18)
                    more = browser.find_element_by_css_selector("button.Button.QuestionMainAction")
                    actions = ActionChains(browser)
                    actions.move_to_element(more)
                    actions.click(more)
                    actions.perform()
                    # more.click() # 如果我们在页面中找到了更多,那么就点击更多,然后等两秒
                except NoSuchElementException as e:
                    break
            # 加载了全部的内容后,获取到所有内容,存为items
            soup = BeautifulSoup(browser.page_source,"html.parser")
            # 2. 对soup进行操作,获取出title,和包含内容的列表items
            titles = soup.find("title").text.replace('\n', '').replace('?', '').split()
            title = titles[0]
            print(title)
            # 如果当前目录下没有title命名的文件夹,则创建一个
            dirpath = os.getcwd() + "\\" + title + "\\"
            if not os.path.exists(dirpath):
                os.makedirs(dirpath)

            items = soup.find_all("div", class_=re.compile("List-item"))
            nimingCount = 0
            for item in items:
                # 分为两种情况:1.匿名用户,没有第一张头像 2. 匿名用户有第一张头像
                userName = item.find('img', class_="Avatar AuthorInfo-avatar").get("alt")
                if "匿名" in userName:
                    userName = "******" + str(nimingCount)
                    nimingCount += 1
                count = 0  # 一个用户下有多个照片的
                images = item.find_all('img',class_ = "origin_image zh-lightbox-thumb lazy")

                for image in images:
                    # 保存图片
                    imageSrc = image.get("src")
                    picName = dirpath + userName + '_' + str(count) + ".jpg"
                    count += 1

                    try:
                        imageData = requests.get(imageSrc, stream=True).content
                        try:
                            with open(picName, 'wb') as jpg:
                                jpg.write(imageData)
                        except IOError as e:
                            print(userName + "的一张图片写入错误")
                    except MissingSchema as e:
                        print(userName + "的一张图片获取失败")
                        print("地址为:" + imageSrc)
        finally:
            # 最后要记得关闭浏览器,否则就会永远开着
            browser.quit()
Ejemplo n.º 40
0
    '''
    response.view = 'generic.json'
    # read NexSON from 'nexson' arg or (more likely) the request body
    nexson = extract_nexson_from_http_call(
        request, **request.vars)  # web2py equivalent to **kwargs

    try:
        o = merge_otus_and_trees(nexson)
        return {'data': o, 'error': 0}
    except Exception, x:
        s = str(x)
        return {'error': 1, 'description': s}


UPLOADID_PAT = re.compile(r'^[a-zA-Z_][-_.a-zA-Z0-9]{4,84}$')
ID_PREFIX_PAT = re.compile(r'^[a-zA-Z_][-_.a-zA-Z0-9]*$')


def to_nexson():
    global UPLOADID_PAT
    from externalproc import get_external_proc_dir_for_upload, get_logger, invoc_status, \
            ExternalProcStatus, get_conf, write_input_files, write_ext_proc_content, do_ext_proc_launch
    import os
    import datetime
    import codecs
    import locket
    import shutil
    import uuid
    from peyotl.nexson_syntax import can_convert_nexson_forms, \
                                     get_ot_study_info_from_nexml, \
Ejemplo n.º 41
0
    L_cheru = {W_cheru ∪ `\\W`}*
    切噜语由切噜词与标点符号连接而成
"""
from nonebot.message import escape
from itertools import zip_longest
import re

from nonebot import *

bot = get_bot()


CHERU_SET = '切卟叮咧哔唎啪啰啵嘭噜噼巴拉蹦铃'
CHERU_DIC = { c: i for i, c in enumerate(CHERU_SET) }
ENCODING = 'gb18030'
rex_split = re.compile(r'\b', re.U)
rex_word = re.compile(r'^\w+$', re.U)
rex_cheru_word:re.Pattern = re.compile(rf'切[{CHERU_SET}]+', re.U)


def grouper(iterable, n, fillvalue=None):
    args = [iter(iterable)] * n
    return zip_longest(*args, fillvalue=fillvalue)


def word2cheru(w:str) -> str:
    c = ['切']
    for b in w.encode(ENCODING):
        c.append(CHERU_SET[b & 0xf])
        c.append(CHERU_SET[(b >> 4) & 0xf])
    return ''.join(c)
Ejemplo n.º 42
0
from __future__ import print_function
# Useage: python trafoXML_visualize.py input.trafoXML output_dir
import re, numpy, sys
from matplotlib.mlab import *
from matplotlib.pyplot import *

resdir = ''
file_in = 'small_002.trafoXML'
if len(sys.argv) > 1: file_in = sys.argv[1]
if len(sys.argv) > 2: resdir = sys.argv[2]
f = open(file_in)
text = f.read()
f.close()

# parse the input file into pairs of x/y coordinates
pair_re = re.compile('<Pair from="([^ ]*)" to="([^ ]*)"/>')
x = []
y = []
for pair in pair_re.finditer(text):
    x.append(float(pair.group(1)))
    y.append(float(pair.group(2)))

# calculate least squares regression
A = np.vstack([x, np.ones(len(x))]).T
m, c = numpy.linalg.lstsq(A, y)[0]

print("Use linear fit", m, c)

# calculate resides
residues = []
predicted = []
Ejemplo n.º 43
0
                              DistutilsFileError)
from setuptools.command.egg_info import manifest_maker
from setuptools.dist import Distribution
from setuptools.extension import Extension

try:
    import ConfigParser as configparser
except ImportError:
    import configparser

from pbr import extra_files
import pbr.hooks

# A simplified RE for this; just checks that the line ends with version
# predicates in ()
_VERSION_SPEC_RE = re.compile(r'\s*(.*?)\s*\((.*)\)\s*$')

# Mappings from setup() keyword arguments to setup.cfg options;
# The values are (section, option) tuples, or simply (section,) tuples if
# the option has the same name as the setup() argument
D1_D2_SETUP_ARGS = {
    "name": ("metadata", ),
    "version": ("metadata", ),
    "author": ("metadata", ),
    "author_email": ("metadata", ),
    "maintainer": ("metadata", ),
    "maintainer_email": ("metadata", ),
    "url": ("metadata", "home_page"),
    "description": ("metadata", "summary"),
    "keywords": ("metadata", ),
    "long_description": ("metadata", "description"),
Ejemplo n.º 44
0
#Write a function that uses regular expressions to make sure the password string it is passed is strong.
# #A strong password is defined as one that is at least eight characters long, contains both uppercase and lowercase characters,
# and has at least one digit. You may need to test the string against multiple regex patterns to validate its strength.

import re

passworddetectionRegex = re.compile(
    r'''(
    ^(?=.*[A-Z].*[A-Z])          # two capital letters
    (?=.*[!@#$&*])               # special characters
    (?=.*[0-9].*[0-9])           # two numeric digits
    (?=.*[a-z].*[a-z].*[a-z])    #three lower case letters
    .{10,}                       #at least 10 digits
    $
)''', re.VERBOSE)


def passwordDetection():
    passwordCheck = input("Please enter your password: "******"Strong password.")
    else:
        return print("Not a strong password.")


passwordDetection()
Ejemplo n.º 45
0
#!/usr/bin/env python

import os
import re
import urllib2

#base= "http://dl.film2movie.biz/serial/The%20Walking%20Dead/S01/"
get = raw_input("enter url: ")
response = urllib2.urlopen(get)
html = response.read()
mas = re.compile('<[a-z]\s\w{3,4}\=".*a\>')
check = mas.findall(html)

for i in check:
    mov = i
    prep = re.compile('>[a-z]?[A-Z]?.*.mkv')
    prep1 = prep.findall(mov)
    for p in prep1:
        ok = p.replace('>', '')
        print get + ok
Ejemplo n.º 46
0
 def __init__(self, ignore):
     self.__ignore = re.compile(
         r'(%s)' % ('|'.join([pat.replace('*', '.*') for pat in ignore])))
Ejemplo n.º 47
0
class EST_SIZE():
    IP = len('111.222.333.444')
    PORT = 6
    USER = 42
    PASSWORD = 42


ssh_version = it.cycle(['SSH-2.0-OpenSSH_6.1', 'SSH-2.0-OpenSSH_5.1', 'SSH-2.0-OpenSSH_4.1'])

DOMAIN_RE = re.compile(
    "([A-Za-z]{3,9}:(?:\/\/)?)?" +    # match protocol, allow in format http:// or mailto:
    "(?P<domain>" +                  # domain part
    #"(?:[\-;:&=\+\$,\w]+@)?" +       # allow something@ for email addresses
    #"[A-Za-z0-9\.\-]+" +             # anything looking at all like a domain, non-unicode domains
    #"|" +                            # or instead of above
    "(?:www\.|[\-;:&=\+\$,\w]+@)?" +  # starting with something@ or www.
    "[A-Za-z0-9\.\-]+" +             # anything looking at all like a domain
    ")" +
    "(?P<params>" +                  # path / querystr part
    "(?:\/[\+~%\/\.\w\-_]*)" +       # allow optional /path
    "?\??(?:[\-\+=&;%@\.\w_]*)" +    # allow optional query string starting with ?
    "#?(?:[\.\!\/\\\w]*)"            # allow optional anchor #anchor
    ")?")
IP_RE = re.compile("^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$")
RNGE_RE = re.compile("^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])" +
                     "-(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$")
MASK_RE = re.compile("^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])" +
                     "?\/([0-9]|[1-3][0-2])+$")

MAX_THREADS = 100
Q_TIMEOUT = 1
FB_PORT = None
Ejemplo n.º 48
0
MAX_COL_WIDTHS = 30
MIN_PASS_LEN = 6
ADMIN_USER = False
INITIALLY_ACTIVE = True
# first users under db_init -
RIBCAGE_KEY01 = os.environ.get('RIBCAGE_ADMIN_KEYS1', 'BLANK BLANK').split()
RIBCAGE_KEY02 = os.environ.get('RIBCAGE_ADMIN_KEYS2', 'BLANK BLANK').split()

# ====================
# CLIENT IP HANDLING
# ====================
VALID_IP = re.compile(
    r"""
\b
(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)
\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)
\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)
\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)
\b
""", re.VERBOSE)

# ====================
# NAME OF YOUR LOG FILE
# ====================
LOGOUT = 'Devel_logs.log'

# ====================
# MAILGUN VARIABLES
# ====================
MAILGUN_URL = 'https://api.mailgun.net/v3/{}/messages'
SANDBOX = 'sandbox26a6aabbd3e946feba81293c4b4d9dcc.mailgun.org'
Ejemplo n.º 49
0
from settings import LOGGING
import logging, logging.config
import urllib, urllib2
import re, urlparse
import traceback
from database import CrawlerDb

# Debugging
# import pdb;pdb.set_trace()

# Logging
logging.config.dictConfig(LOGGING)
logger = logging.getLogger("crawler_logger")

google_adurl_regex = re.compile('adurl=(.*?)"')
google_url_regex = re.compile('url\?q=(.*?)&amp;sa=')
email_regex = re.compile('([A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4})',
                         re.IGNORECASE)
url_regex = re.compile('<a\s.*?href=[\'"](.*?)[\'"].*?>')
# Below url_regex will run into 'Castrophic Backtracking'!
# http://stackoverflow.com/questions/8010005/python-re-infinite-execution
# url_regex = re.compile('<a\s(?:.*?\s)*?href=[\'"](.*?)[\'"].*?>')

# Maximum number of search results to start the crawl

EMAILS_FILENAME = 'data/emails.csv'
DOMAINS_FILENAME = 'data/domains.csv'

# Set up the database
db = CrawlerDb()
db.connect()
Ejemplo n.º 50
0
from __future__ import print_function

import os
import re
import sys


def execute_cmd(cmd):
    print(cmd)
    os.system(cmd)


# The regular expression we use to match compiler-crasher lines.
regex = re.compile(
    '.*Swift(.*) :: '
    '(compiler_crashers|compiler_crashers_2|IDE/crashers|SIL/crashers)'
    '/(.*\.swift|.*\.sil).*')

# Take the output of lit as standard input.
for line in sys.stdin:
    match = regex.match(line)
    if match:
        suffix = match.group(2)
        filename = match.group(3)

        # Move the test over to the fixed suite.
        from_filename = 'validation-test/%s/%s' % (suffix, filename)
        to_filename = 'validation-test/%s_fixed/%s' % (suffix, filename)
        git_mv_cmd = 'git mv %s %s' % (from_filename, to_filename)
        execute_cmd(git_mv_cmd)
Ejemplo n.º 51
0
import pyperclip, re
phoneRegex = re.compile(
    r'''(
	(\d{3}|\(\d{3}\))?
	(\s|-|\.)?
	(\d{3})
	(\s|-|\.)
	(\d{4})
	(\s*(ext|x|ext.)\s*(\d{2,5}))?
	)''', re.VERBOSE)
emailRegex = re.compile(
    r'''([a-zA-Z0-9._%+-]+@+[a-zA-Z0-9.-]+(\.[a-zA-Z]{2,6}))''', re.VERBOSE)

text = str(pyperclip.paste())
matches = []
for groups in phoneRegex.findall(text):
    phoneNum = '-'.join([groups[1], groups[3], groups[5]])
    if groups[8] != '':
        phoneNum += ' x' + groups[8]
    matches.append(phoneNum)
for groups in emailRegex.findall(text):
    matches.append(groups[0])
if len(matches) > 0:
    pyperclip.copy('\n'.join(matches))
    print('copied to buffer')
    print('\n'.join(matches))
else:
    print('not discovered phone and email in buffer')
Ejemplo n.º 52
0
from babel.dates import format_timedelta as babel_format_timedelta
from flask_babelplus import lazy_gettext as _
from flask_themes2 import render_theme_template, get_themes_list
from flask_login import current_user

# from flaskbb.user.models import User

from werkzeug.local import LocalProxy

from flaskbb._compat import range_method, text_type, iteritems, to_unicode, to_bytes
from flaskbb.extensions import redis_store, babel
from flaskbb.utils.settings import flaskbb_config
from flaskbb.utils.markup import markdown
from flask_allows import Permission

_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')


def slugify(text, delim=u'-'):
    """Generates an slightly worse ASCII-only slug.
    Taken from the Flask Snippets page.

   :param text: The text which should be slugified
   :param delim: Default "-". The delimeter for whitespace
    """
    text = unidecode.unidecode(text)
    result = []
    for word in _punct_re.split(text.lower()):
        if word:
            result.append(word)
    return text_type(delim.join(result))
Ejemplo n.º 53
0
import re
from sopel import web
from sopel.module import commands, example
import json
import time


def formatnumber(n):
    """Format a number with beautiful commas."""
    parts = list(str(n))
    for i in range((len(parts) - 3), 0, -3):
        parts.insert(i, ',')
    return ''.join(parts)
	

r_bing = re.compile(r'<h3><a href="([^"]+)"')


def bing_search(query, lang='en-GB'):
    query = web.quote(query)
    base = 'http://www.bing.com/search?mkt=%s&q=' % lang
    bytes = web.get(base + query)
    m = r_bing.search(bytes)
    if m:
        return m.group(1)

r_duck = re.compile(r'nofollow" class="[^"]+" href="(.*?)">')


def duck_search(query):
    query = query.replace('!', '')
Ejemplo n.º 54
0
            # printouts will tell us about problems if this API changes
            import traceback
            traceback.print_exc()

        self.debugger = Pdb(colors)

    def __call__(self):
        """Starts an interactive debugger at the point where called.

        This is similar to the pdb.set_trace() function from the std lib, but
        using IPython's enhanced debugger."""

        self.debugger.set_trace(sys._getframe().f_back)


RGX_EXTRA_INDENT = re.compile(r'(?<=\n)\s+')


def strip_indentation(multiline_string):
    return RGX_EXTRA_INDENT.sub('', multiline_string)


def decorate_fn_with_doc(new_fn, old_fn, additional_text=""):
    """Make new_fn have old_fn's doc string. This is particularly useful
    for the ``do_...`` commands that hook into the help system.
    Adapted from from a comp.lang.python posting
    by Duncan Booth."""
    def wrapper(*args, **kw):
        return new_fn(*args, **kw)
    if old_fn.__doc__:
        wrapper.__doc__ = strip_indentation(old_fn.__doc__) + additional_text
Ejemplo n.º 55
0
 def resp_regex_compiled():
     return re.compile(SBE16NOCalibrationParticle.resp_regex(), re.DOTALL)
Ejemplo n.º 56
0
#!/bin/python3

import os
import sys
import re
import fdb_embedded as fdb

# Use database, or use actual dir?

# 1: fetch all _1280 files from CGI.db, build list in file
# path/file_1280.ext
# 2: for each file_, lookup in download.db
# if _raw is found, should be moved
# if no _raw is found, should be listed in separate list for further investigation
#
# path/file_1280.ext path/file_raw.ext

# make list of 1280
# exclude those to delete (list from tumbler_scrape.txt)
# -> list TODELETE (move them before delete)
# make list of 1280 with no corresponding _raw -> list REVERSE_SEARCH (move them)

repattern_tumblr_1280 = re.compile(r'tumblr_.*_1280.*', re.I)


class FDBEMBEDDED():
    """handles queries to the fdb databases"""
Ejemplo n.º 57
0
from itertools import chain, repeat
from functools import update_wrapper

from _internal import _decode_unicode, _empty_stream
from urls import url_decode_stream
from wsgi import LimitedStream, make_line_iter
from exceptions import RequestEntityTooLarge
from datastructures import Headers, FileStorage, MultiDict
from http import parse_options_header


#: an iterator that yields empty strings
_empty_string_iter = repeat('')

#: a regular expression for multipart boundaries
_multipart_boundary_re = re.compile('^[ -~]{0,200}[!-~]$')

#: supported http encodings that are also available in python we support
#: for multipart messages.
_supported_multipart_encodings = frozenset(['base64', 'quoted-printable'])


def default_stream_factory(total_content_length, filename, content_type,
                           content_length=None):
    """The stream factory that is used per default."""
    if total_content_length > 1024 * 500:
        return TemporaryFile('wb+')
    return StringIO()


def parse_form_data(environ, stream_factory=None, charset='utf-8',
Ejemplo n.º 58
0
    'BoolFacet',
    'TermsFacet',
    'ModelTermsFacet',
    'RangeFacet',
    'TemporalCoverageFacet',
    'BoolBooster',
    'FunctionBooster',
    'ValueFactor',
    'GaussDecay',
    'ExpDecay',
    'LinearDecay',
)

ES_NUM_FAILURES = '-Infinity', 'Infinity', 'NaN', None

RE_TIME_COVERAGE = re.compile(r'\d{4}-\d{2}-\d{2}-\d{4}-\d{2}-\d{2}')

OR_SEPARATOR = '|'
OR_LABEL = _('OR')


def obj_to_string(obj):
    '''Render an object into a unicode string if possible'''
    if not obj:
        return None
    elif isinstance(obj, bytes):
        return obj.decode('utf-8')
    elif isinstance(obj, str):
        return obj
    elif is_lazy_string(obj):
        return obj.value
Ejemplo n.º 59
0
access_key = u'182309114-zBJTzx72PGS3p5yTikSMi6lTXPEIR3f92Ky8KsLU'
access_secret = u'2R9TwGSfvH7z8eDpMaHsoHFwRLA2r7bMixG4wfIhJU'

# create twitter API object
auth = OAuth1(consumer_key,
              consumer_secret,
              access_key,
              access_secret,
              signature_type='query')
stream = TwitterStream(auth=auth, secure=True)

# iterate over tweets matching this filter text
# IMPORTANT! this is not quite the same as a standard twitter search
tweet_iter = stream.statuses.filter(track=search_term)

pattern = re.compile("%s" % search_term, re.IGNORECASE)

for tweet in tweet_iter:
    # check whether this is a valid tweet
    if tweet.get('text'):

        # turn the date string into a date object that python can handle
        timestamp = parsedate(tweet["created_at"])
        # now format this nicely into HH:MM:SS format
        timetext = strftime("%H:%M:%S", timestamp)

        # colour our tweet's time, user and text
        time_colored = colored(timetext, color="white", attrs=["bold"])
        user_colored = colored(tweet["user"]["screen_name"], "green")
        text_colored = tweet["text"]
        # replace each instance of our search terms with a highlighted version
Ejemplo n.º 60
0
 def resp_regex_compiled():
     """
     get the compiled regex pattern
     @return: compiled re
     """
     return re.compile(SBE16NOHardwareParticle.resp_regex(), re.DOTALL)