Python search Exemples, re.search Python Exemples

Exemple #1

1

Afficher le fichier

Fichier : octave.py Projet : brajeshvit/virtual

    def indent_code(self, code):
        """Accepts a string of code or a list of code lines"""

        # code mostly copied from ccode
        if isinstance(code, string_types):
            code_lines = self.indent_code(code.splitlines(True))
            return "".join(code_lines)

        tab = "  "
        inc_regex = ("^function ", "^if ", "^elseif ", "^else$", "^for ")
        dec_regex = ("^end$", "^elseif ", "^else$")

        # pre-strip left-space from the code
        code = [line.lstrip(" \t") for line in code]

        increase = [int(any([search(re, line) for re in inc_regex])) for line in code]
        decrease = [int(any([search(re, line) for re in dec_regex])) for line in code]

        pretty = []
        level = 0
        for n, line in enumerate(code):
            if line == "" or line == "\n":
                pretty.append(line)
                continue
            level -= decrease[n]
            pretty.append("%s%s" % (tab * level, line))
            level += increase[n]
        return pretty

Exemple #2

1

Afficher le fichier

Fichier : sitemap.py Projet : hibozzy/mediatum

        def check_date_format(date):
            """
            Checks to see whether dates are in proper datetime format and converts times in ##/##/#### format to
            datetime or raises an error when it encounters a different format
            """
            # check if date is already in the proper format
            datetime_pattern = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$")

            # regex and its accompanying strptime format
            misc_date_formats = (
                (re.compile(r"\d{2}/\d{2}/\d{4}\+\d{2}:\d{2}T\d{2}:\d{2}:\d{2}$"), "%m/%d/%Y+%H:%MT%H:%M:%S"),
                (re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$"), "%Y-%m-%dT%H:%M:%S"),
                (re.compile(r"\d{4}/\d{2}/\d{2}$"), "%d/%m/%YT%H:%M:%S"),
                (re.compile(r"\d{4}/\d{2}/\d{2}$"), "%/%d/%YT%H:%M:%S"),
                (re.compile(r"\d{2}/\d{2}/\d{4}\+\d{2}:\d{2}$"), "%m/%d/%Y+%H:%M"),
                (re.compile(r"\d{4}-\d{2}-\d{2}$"), "%Y-%m-%d"),
                (re.compile(r"\d{2}/\d{2}/\d{4}$"), "%d/%m/%Y"),
                (re.compile(r"\d{2}/\d{2}/\d{4}$"), "%m/%d/%Y"),
            )

            matched = re.search(datetime_pattern, date)
            if matched:
                return date
            else:
                for date_format_tuple in misc_date_formats:
                    matched = re.search(date_format_tuple[0], date)
                    if matched:
                        try:
                            timestruct = time.strptime(date, date_format_tuple[1])
                            timedatetime = datetime.datetime.fromtimestamp(time.mktime(timestruct))
                            return timedatetime.strftime("%Y-%m-%dT%H:%M:%S")
                        except ValueError:
                            continue
                else:
                    raise TypeError("unknown date format given: %s" % date)

Exemple #3

0

Afficher le fichier

Fichier : smb2_dos.py Projet : fabaff/zarp

def initialize():
	try:
		print '[!] Preparing SMB2 listener...'
		pkt =("\x00\x00\x00\x01")
		sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
		# bind and listen for a connection 
		sock.bind(("", 445))
		try:
			print '[!] Waiting for connection...'
			sock.listen(1)
			connection, addr = sock.accept()
		except KeyboardInterrupt:
			return
		print '[!] Connection from %s, waiting for negotiation...'%str(addr)
		while True:
			try:
				npkt = sock.recv(1024)
				# we're responding to the negotiation packet
				if npkt[8] == 'r':
					sock.send(pkt)
					break	
			except Exception, j:
				Error('Connection error [%s]'%j)
				break
		sock.close()
		print '[!] Complete, checking remote address...'
		rval = commands.getoutput('ping -c 1 -w 1 %s'%addr[0])
		up = re.search('\d.*? received', rval)
		if re.search('0', up.group(0)) is None:
			Msg('Host appears to be up')
		else:
			print '[+] Host is not responding - it is either down or rejecting our probes.'

Exemple #4

0

Afficher le fichier

Fichier : PRESUBMIT.py Projet : kongx73/OsmAnd-external-skia

def _CheckLGTMsForPublicAPI(input_api, output_api):
    """Check LGTMs for public API changes.

  For public API files make sure there is an LGTM from the list of owners in
  PUBLIC_API_OWNERS.
  """
    results = []
    requires_owner_check = False
    for affected_file in input_api.AffectedFiles():
        affected_file_path = affected_file.LocalPath()
        file_path, file_ext = os.path.splitext(affected_file_path)
        # We only care about files that end in .h and are under the top-level
        # include dir.
        if file_ext == ".h" and "include" == file_path.split(os.path.sep)[0]:
            requires_owner_check = True

    if not requires_owner_check:
        return results

    lgtm_from_owner = False
    issue = input_api.change.issue
    if issue and input_api.rietveld:
        issue_properties = input_api.rietveld.get_issue_properties(issue=int(issue), messages=True)
        if re.match(REVERT_CL_SUBJECT_PREFIX, issue_properties["subject"], re.I):
            # It is a revert CL, ignore the public api owners check.
            return results

        if re.search(r"^COMMIT=false$", issue_properties["description"], re.M):
            # Ignore public api owners check for COMMIT=false CLs since they are not
            # going to be committed.
            return results

        match = re.search(r"^TBR=(.*)$", issue_properties["description"], re.M)
        if match:
            tbr_entries = match.group(1).strip().split(",")
            for owner in PUBLIC_API_OWNERS:
                if owner in tbr_entries or owner.split("@")[0] in tbr_entries:
                    # If an owner is specified in the TBR= line then ignore the public
                    # api owners check.
                    return results

        if issue_properties["owner_email"] in PUBLIC_API_OWNERS:
            # An owner created the CL that is an automatic LGTM.
            lgtm_from_owner = True

        messages = issue_properties.get("messages")
        if messages:
            for message in messages:
                if message["sender"] in PUBLIC_API_OWNERS and "lgtm" in message["text"].lower():
                    # Found an lgtm in a message from an owner.
                    lgtm_from_owner = True
                    break

    if not lgtm_from_owner:
        results.append(
            output_api.PresubmitError(
                "Since the CL is editing public API, you must have an LGTM from " "one of: %s" % str(PUBLIC_API_OWNERS)
            )
        )
    return results

Exemple #5

0

Afficher le fichier

Fichier : UploadingCom.py Projet : toroettg/pyload

    def loadAccountInfo(self, user, req):
        validuntil = None
        trafficleft = None
        premium = None

        html = req.load("http://uploading.com/")

        premium = re.search(self.PREMIUM_PATTERN, html) is None

        m = re.search(self.VALID_UNTIL_PATTERN, html)
        if m:
            expiredate = m.group(1).strip()
            self.logDebug("Expire date: " + expiredate)

            try:
                validuntil = time.mktime(time.strptime(expiredate, "%b %d, %Y"))

            except Exception, e:
                self.logError(e)

            else:
                if validuntil > time.mktime(time.gmtime()):
                    premium = True
                else:
                    premium = False
                    validuntil = None

Exemple #6

0

Afficher le fichier

Fichier : attempt_three.py Projet : Thea2/crawler

 def get_user_info(self, uid):
     """
     获取用户基本信息
     :param uid: 用户id
     :return: 用户基本信息
     """
     user_info_url = 'http://weibo.cn/%s/info' % uid
     user_info_page = self.get_page(user_info_url)
     sex_pattern = re.compile('性别:(.*?)<br/>')
     area_pattern = re.compile('地区:(.*?)<br/>')
     birth_pattern = re.compile('生日:(\d*?)-.*?<br/>')
     sex = re.search(sex_pattern, user_info_page)
     area = re.search(area_pattern, user_info_page)
     birth = re.search(birth_pattern, user_info_page)
     if sex:
         sex = sex.group(1)
     if area:
         area = area.group(1)
     if birth:
         birth = birth.group(1)
         if int(birth) != 0001:    # 将年龄为微博默认设置的用户过滤
             info = {'性别': sex, '地区': area, '年龄': 2016-int(birth)}
             return info
     info = {'性别': sex, '地区': area, '年龄': None}
     return info

Exemple #7

0

Afficher le fichier

Fichier : __init__.py Projet : nsigustavo/idoctestgeditplugin

 def _get_snippet_bounds(self):
     init_snippet, end_snippet = self._textline_bounds()
     while not re.search(r' *>>> ', self.doc.get_text(init_snippet, end_snippet)):
         if not re.search(r' *>>> | *... ', self.doc.get_text(init_snippet, end_snippet)):
             return end_snippet, end_snippet
         init_snippet.backward_line()
     return init_snippet, end_snippet

Exemple #8

0

Afficher le fichier

Fichier : UART.py Projet : freecores/ssbcc

 def GenVerilog(self,fp,config):
   for bodyextension in ('_Rx.v','_Tx.v',):
     body = self.LoadCore(self.peripheralFile,bodyextension);
     if hasattr(self,'RTR') or hasattr(self,'RTRn'):
       body = re.sub(r'@RTR_BEGIN@\n','',body);
       body = re.sub(r'@RTR_END@\n','',body);
     else:
       if re.search(r'@RTR_BEGIN@',body):
         body = re.sub(r'@RTR_BEGIN@.*?@RTR_END@\n','',body,flags=re.DOTALL);
     for subpair in (
         ( r'@RTR_SIGNAL@',    self.RTR if hasattr(self,'RTR') else self.RTRn if hasattr(self,'RTRn') else '', ),
         ( r'@RTR_INVERT@',    '' if hasattr(self,'RTR') else '!', ),
         ( r'\bL__',           'L__@NAME@__',          ),
         ( r'\bgen__',         'gen__@NAME@__',        ),
         ( r'\bs__',           's__@NAME@__',          ),
         ( r'@INPORT@',        self.insignal,          ),
         ( r'@BAUDMETHOD@',    str(self.baudmethod),   ),
         ( r'@SYNC@',          str(self.sync),         ),
         ( r'@DEGLITCH@',      str(self.deglitch),     ),
         ( r'@INFIFO@',        str(self.inFIFO),       ),
         ( r'@ENABLED@',       self.CTS if hasattr(self,'CTS') else ('!%s' % self.CTSn) if hasattr(self,'CTSn') else '1\'b1', ),
         ( r'@NSTOP@',         str(self.nStop),        ),
         ( r'@OUTFIFO@',       str(self.outFIFO),      ),
         ( r'@NAME@',          self.namestring,        ),
       ):
       if re.search(subpair[0],body):
         body = re.sub(subpair[0],subpair[1],body);
     body = self.GenVerilogFinal(config,body);
     fp.write(body);

Exemple #9

0

Afficher le fichier

Fichier : generate_spectrum.py Projet : ashleyrback/MajoRat

 def clean_up(self):
     """ Move DQ outputs to their appropriate directory """
     try:
         data_dir = os.environ["DATA"]
         plots_dir = os.environ["PLOTS"]
         logs_dir = os.environ["LOGS"]
     except KeyError as detail:
         print "GenerateSpectrum.clean_up: error", detail, "not set"
         print " --> source analysis environment scripts before running!"
         sys.exit(1)
     for root, dirs, files in os.walk(os.getcwd()):
         for file in files:
             is_data = re.search(r".*\.root$", file)
             is_plot = re.search(r".*\.png$", file)
             hostname = socket.gethostname()
             is_log =  re.search(r"^rat\."+hostname+r"\.[0-9]+\.log$", file)
             if is_data:
                 try:
                     root_file = TFile(file)
                     tree = root_file.Get("T")
                     tree.ls()
                 except ReferenceError as detail:
                     "generate_spectrum.clean_up: error in TFile,", detail
                     sys.exit(1)
                 file_manips.copy_file(os.path.join(root, file), data_dir)
             elif is_plot:
                 file_manips.copy_file(os.path.join(root, file), plots_dir)
             elif is_log:
                 file_manips.copy_file(os.path.join(root, file), logs_dir)

Exemple #10

0

Afficher le fichier

Fichier : parser.py Projet : mgymrek/PyVCF

 def _parse_alt(self, str):
     if re.search('[\[\]]', str) is not None:
         # Paired breakend
         items = re.split('[\[\]]', str)
         remoteCoords = items[1].split(':')
         chr = remoteCoords[0]
         if chr[0] == '<':
             chr = chr[1:-1]
             withinMainAssembly = False
         else:
             withinMainAssembly = True
         pos = remoteCoords[1]
         orientation = (str[0] == '[' or str[0] == ']')
         remoteOrientation = (re.search('\[', str) is not None)
         if orientation:
             connectingSequence = items[2]
         else:
             connectingSequence = items[0]
         return _Breakend(chr, pos, orientation, remoteOrientation, connectingSequence, withinMainAssembly)
     elif str[0] == '.' and len(str) > 1:
         return _SingleBreakend(True, str[1:])
     elif str[-1] == '.' and len(str) > 1:
         return _SingleBreakend(False, str[:-1])
     elif str[0] == "<" and str[-1] == ">":
         return _SV(str[1:-1])
     else:
         return _Substitution(str)

Exemple #11

0

Afficher le fichier

Fichier : coursera_downloader.py Projet : lorenzosantos/Coursera.org-Downloader

def resolve_resources(br, path):
    lecture = []
    b_video = []
    video   = []
    pdf     = []
    pptx    = []

    for l in br.links():
        m_video = re.search(r'https:[\S]+download.mp4[\S]+\'', str(l))
        m_pdf = re.search(r'https*:[\S]+/([\S]+\.pdf)', str(l))
        m_pptx = re.search(r'https*:[\S]+/([\S]+\.pptx*)', str(l))
    
        if m_video:
            b_video.append(m_video.group().rstrip("'"))
        if m_pdf:
            pdf.append([resolve_name_with_hex(m_pdf.group(1)), m_pdf.group()])
        if m_pptx:
            pptx.append([resolve_name_with_hex(m_pptx.group(1)), m_pptx.group()])

    for l in b_video:
        br.open(l)
        tmp_l = br.geturl()
        index = tmp_l.find('?')
        tmp_l = tmp_l[ : index]
        video.append(tmp_l)
        index = tmp_l.rfind('/')
        lecture.append(resolve_name_with_hex(tmp_l[index+1 :]))

    if len(lecture) == len(video):
        mp4 = zip(lecture, video)
    else:
        print 'Video names resolving error. Ignore videos...'
        mp4 = []
    return mp4, pdf, pptx

Exemple #12

0

Afficher le fichier

Fichier : user.py Projet : pombredanne/quickpin

def valid_password(password):
    ''' Verify the password meets complexity requirements. '''

    return len(password) >= 8 and \
           re.search(_LOWER_ALPHA, password) and \
           re.search(_UPPER_ALPHA, password) and \
           re.search(_NUMERIC, password)

Exemple #13

0

Afficher le fichier

Fichier : __init__.py Projet : AEliu/calibre

def initialize_constants():
    global __version__, __appname__, modules, functions, basenames, scripts

    src = open('src/calibre/constants.py', 'rb').read()
    nv = re.search(r'numeric_version\s+=\s+\((\d+), (\d+), (\d+)\)', src)
    __version__ = '%s.%s.%s'%(nv.group(1), nv.group(2), nv.group(3))
    __appname__ = re.search(r'__appname__\s+=\s+(u{0,1})[\'"]([^\'"]+)[\'"]',
            src).group(2)
    epsrc = re.compile(r'entry_points = (\{.*?\})', re.DOTALL).\
            search(open('src/calibre/linux.py', 'rb').read()).group(1)
    entry_points = eval(epsrc, {'__appname__': __appname__})

    def e2b(ep):
        return re.search(r'\s*(.*?)\s*=', ep).group(1).strip()

    def e2s(ep, base='src'):
        return (base+os.path.sep+re.search(r'.*=\s*(.*?):', ep).group(1).replace('.', '/')+'.py').strip()

    def e2m(ep):
        return re.search(r'.*=\s*(.*?)\s*:', ep).group(1).strip()

    def e2f(ep):
        return ep[ep.rindex(':')+1:].strip()

    basenames, functions, modules, scripts = {}, {}, {}, {}
    for x in ('console', 'gui'):
        y = x + '_scripts'
        basenames[x] = list(map(e2b, entry_points[y]))
        functions[x] = list(map(e2f, entry_points[y]))
        modules[x] = list(map(e2m, entry_points[y]))
        scripts[x] = list(map(e2s, entry_points[y]))

Exemple #14

0

Afficher le fichier

Fichier : uniprot_data.py Projet : PragyaJaiswal/Protein-Sequence-Parser

def ftp_download():
	ftp_host = 'ftp.uniprot.org'
	ftp_user = '******'
	ftp_pass = ''
	ftp_path = '/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes'

	ftp = FTP(ftp_host)
	ftp.login(ftp_user, ftp_pass)
	ftp.getwelcome()
	ftp.cwd(ftp_path)

	dirs = ftp.nlst()
	# print(dirs)
	p = 0

	# Navigate to the required directory and thereby download data.
	for dir in dirs:
		if re.search(species, dir):
			path = ftp_path + '/' + str(species)
			# print(path)
			ftp.cwd(path)
			types = ftp.nlst()
			for x in types:
				if not re.search('DNA.fasta.gz', x) and re.search('fasta.gz', x):
					final = path + '/' + str(x)
					# print(final)
					fullfilename = os.path.join(store + str(x))
					urllib.urlretrieve('ftp://' + ftp_host + str(final), fullfilename)
					p+=1
				else:
					pass

	print("Number of viruses: " + str(p))

	print(ftp.pwd())

Exemple #15

0

Afficher le fichier

Fichier : ListUrl.py Projet : 4213/administration

  def _apache_index(self, url):
    r = requests.get(url)
    if r.status_code != 200:
      raise ValueError(url+" status:"+str(r.status_code))
    r.dirs = []
    r.files = []
    for l in r.content.split("\n"):
      # '<img src="/icons/folder.png" alt="[DIR]" /> <a href="7.0/">7.0/</a>       03-Dec-2014 19:57    -   '
      # ''<img src="/icons/tgz.png" alt="[   ]" /> <a href="owncloud_7.0.4-2.diff.gz">owncloud_7.0.4-2.diff.gz</a>                     09-Dec-2014 16:53  9.7K   <a href="owncloud_7.0.4-2.diff.gz.mirrorlist">Details</a>'
      # 
      m = re.search("<a\s+href=[\"']?([^>]+?)[\"']?>([^<]+?)[\"']?</a>\s*([^<]*)", l, re.I)
      if m:
	# ('owncloud_7.0.4-2.diff.gz', 'owncloud_7.0.4-2.diff.gz', '09-Dec-2014 16:53  9.7K   ')
	m1,m2,m3 = m.groups()

	if re.match("(/|\?|\w+://)", m1):	# skip absolute urls, query strings and foreign urls
	  continue
	if re.match("\.?\./?$", m1):	# skip . and ..
	  continue

	m3 = re.sub("[\s-]+$", "", m3)
	if re.search("/$", m1):
	  r.dirs.append([m1, m3])
	else:
	  r.files.append([m1, m3])
    return r

Exemple #16

0

Afficher le fichier

Fichier : 1.py Projet : yanggg1133/spider-Taobao-login

    def loginWithCheckCode(self):
        checkcode = raw_input('请输入验证码')
        self.post['TPL_checkcode'] = checkcode
        self.postData = urllib.urlencode(self.post)
        try:
            request = urllib2.Request(self.loginURL,self.postData,self.loginHeaders)
            response = self.opener.open(request)
            content = response.read().decode('gbk')
            pattern = re.compile(u'\u9a8c\u8bc1\u7801\u9519\u8bef',re.S)
            result = re.search(pattern,content)
            if result:
                print u"验证码输入错误"
                return False
            else:
                tokenPattern = re.compile('id="J_HToken" value="(.*?)"')
                tokenMatch = re.search(tokenPattern,content)
                if tokenMatch:
                    print u"验证码输入正确"
                    print tokenMatch.group(1)
                    return tokenMatch.group(1)
                else:
                    print u"J_Token"
                    return False

        except urllib2.HTTPError,e:
            print u"出错",e.reason
            return False

Exemple #17

0

Afficher le fichier

Fichier : fresh_tomatoes.py Projet : skawaguchi/udacity-full-stack-project-1

def create_movie_tiles_content(movies):
    '''Generates a string with the movie tile markup.'''
    # The HTML content for this section of the page
    content = ''
    for movie in movies:
        # Extract the youtube ID from the url
        youtube_id_match = re.search(
            r'(?<=v=)[^&#]+',
            movie.trailer_youtube_url
        )
        youtube_id_match = youtube_id_match or re.search(
            r'(?<=be/)[^&#]+',
            movie.trailer_youtube_url
        )
        trailer_youtube_id = youtube_id_match.group(0) if youtube_id_match else None

        # Append the tile for the movie with its content filled in
        content += MOVIE_TILE_CONTENT.format(
            movie_id=movie.movie_id,
            movie_title=movie.title,
            poster_image_url=movie.poster_image_url,
            trailer_youtube_id=trailer_youtube_id,
            actors=create_actor_list_content(movie.actors),
            year=movie.year,
            synopsis=movie.synopsis
        )

    return content

Exemple #18

0

Afficher le fichier

Fichier : networks.py Projet : Kismon/kismon

	def check_filter(self, mac, network):
		if self.config["filter_type"][network["type"]] == False:
			return False
		
		crypts = decode_cryptset(network["cryptset"])
		if crypts == ["none"]:
			crypt = "none"
		elif "aes_ccm" in crypts or "aes_ocb" in crypts:
			crypt = "wpa2"
		elif "wpa" in crypts:
			crypt = "wpa"
		elif "wep" in crypts:
			crypt = "wep"
		else:
			crypt = "other"
		if self.config["filter_crypt"][crypt] == False:
			return False
		
		if self.config["filter_regexpr"]["ssid"] != "":
			if re.search(r"%s" % self.config["filter_regexpr"]["ssid"], network["ssid"]) is None:
				return False
		if self.config["filter_regexpr"]["bssid"] != "":
			if re.search(r"%s" % self.config["filter_regexpr"]["bssid"], mac) is None:
				return False
		
		return True

Exemple #19

0

Afficher le fichier

Fichier : picsearch.py Projet : spaam/svtplay-dl

 def get_mediaid(self):
     match = re.search(r"mediaId = '([^']+)';", self.get_urldata())
     if not match:
         match = re.search(r'media-id="([^"]+)"', self.get_urldata())
     if not match:
         match = re.search(r'screen9-mid="([^"]+)"', self.get_urldata())
     if not match:
         match = re.search(r'data-id="([^"]+)"', self.get_urldata())
     if not match:
         match = re.search(r'data-id=([^ ]+) ', self.get_urldata())
     if not match:
         match = re.search(r'data-videoid="([^"]+)"', self.get_urldata())
     if not match:
         match = re.search('s.src="(https://csp-ssl.picsearch.com[^"]+|http://csp.picsearch.com/rest[^"]+)', self.get_urldata())
         if match:
             data = self.http.request("get", match.group(1))
             match = re.search(r'mediaid": "([^"]+)"', data.text)
         if not match:
             match = re.search('iframe src="(//csp.screen9.com[^"]+)"', self.get_urldata())
             if match:
                 url = "http:{0}".format(match.group(1))
                 data = self.http.request("get", url)
                 match = re.search(r"mediaid: '([^']+)'", data.text)
     if not match:
         urlp = urlparse(self.url)
         match = urlp.fragment
     return match

Exemple #20

0

Afficher le fichier

Fichier : oldidGA.py Projet : Mdann52/pywikipedia-scripts

 def process_page(self, page):
     talk_page = page
     page = talk_page.toggleTalkPage()
     #find the edit where {{good article}] was added
     found_oldid = False
     oldid = None
     while not found_oldid:
         self.site.loadrevisions(page, getText=True, rvdir=False,
                                 step=10, total=10, startid=oldid)
         hist = page.fullVersionHistory(total=10)  # This should fetch nothing...
         for revision in hist:
             if re.search('\{\{(good|ga) article\}\}', revision[3], re.IGNORECASE):
                 oldid = revision[0]
             else:
                 #current oldid is the right one
                 found_oldid = True
                 break
     #add the oldid in the template
     if not oldid:
         self.output('* ERROR: Could not find oldid for [[%s]]' % talk_page.title())
         return
     self.output('* Adding |oldid=%s to [[%s]]' % (oldid, talk_page.title()))
     oldtext = talk_page.get()
     search = re.search('\{\{GA\s?\|(.*?)\}\}', oldtext)
     newtext = oldtext.replace(search.group(0), '{{GA|%s|oldid=%s}}' % (search.group(1), oldid))
     pywikibot.showDiff(oldtext, newtext)
     talk_page.put(newtext, 'BOT: Adding |oldid=%s to {{[[Template:GA|GA]]}}' % oldid)

Exemple #21

0

Afficher le fichier

Fichier : cmd.py Projet : chmarr/git-review

def get_topic(target_branch):

    branch_name = get_branch_name(target_branch)

    branch_parts = branch_name.split("/")
    if len(branch_parts) >= 3 and branch_parts[0] == "review":
        return use_topic("Using change number %s "
                         "for the topic of the change submitted",
                         "/".join(branch_parts[2:]))

    log_output = run_command("git log HEAD^1..HEAD")
    bug_re = r'\b([Bb]ug|[Ll][Pp])\s*[:]?\s*[#]?\s*(\d+)'

    match = re.search(bug_re, log_output)
    if match is not None:
        return use_topic("Using bug number %s "
                         "for the topic of the change submitted",
                         "bug/%s" % match.group(2))

    bp_re = r'\b([Bb]lue[Pp]rint|[Bb][Pp])\s*[#:]?\s*([0-9a-zA-Z-_]+)'
    match = re.search(bp_re, log_output)
    if match is not None:
        return use_topic("Using blueprint number %s "
                         "for the topic of the change submitted",
                         "bp/%s" % match.group(2))

    return use_topic("Using local branch name %s "
                     "for the topic of the change submitted",
                     branch_name)

Exemple #22

0

Afficher le fichier

Fichier : bindings.py Projet : cascadeo/lasso

    def annotation2arg(self, arg, annotation):
        '''Convert GObject-introspection annotations to arg options'''

        if 'allow-none' in annotation:
            arg[2]['optional'] = True
        if re.search(r'\(\s*out\s*\)', annotation):
            arg[2]['out'] = True
        if re.search(r'\(\s*in\s*\)', annotation):
            arg[2]['in'] = True
        m = re.search(r'\(\s*default\s*([^ )]*)\s*\)', annotation)
        if m:
            prefix = ''
            if is_boolean(arg):
                prefix = 'b:'
            elif is_int(arg, self.binding_data):
                prefix = 'c:'
            else:
                raise Exception('should not happen: could not found type for default: ' + annotation)
            arg[2]['default'] = prefix + m.group(1)
            arg[2]['optional'] = True
        m = re.search(r'\(\s*element-type\s+(\w+)(?:\s+(\w+))?', annotation)
        if m:
            if len(m.groups()) > 2:
                arg[2]['key-type'] = \
                        convert_type_from_gobject_annotation(m.group(1))
                arg[2]['value-type'] = \
                        convert_type_from_gobject_annotation(m.group(2))
            else:
                arg[2]['element-type'] = \
                        convert_type_from_gobject_annotation(m.group(1))
        m = re.search(r'\(\s*transfer\s+(\w+)', annotation)
        if m:
            arg[2]['transfer'] = m.group(1)

Exemple #23

0

Afficher le fichier

Fichier : utils.py Projet : informatics-isi-edu/microscopy

    def status_recv(self):
       try:
           tmp = str(self.cxi.recv(1024), 'utf-8')
       except (socket.error, socket.timeout):
           print ("ERROR:socket got disconnected")
           return -1, "socket timeout/disconnected" 

       if DEBUG:
               print ("recv got, ",tmp)

       if tmp == 0:
           return -1, "socket timeout/disconnected" 

       if len(tmp) == 0:
           return -1, "did not receive anything"

       ## out of ribbon
       if re.search("^o",tmp) != None:
           return -1, "Out of ribbon"
       ## out of paper
       if re.search("^O",tmp) != None:
           return -1, "Out of paper"

       ## printing error
       if re.search("^ERROR", tmp) != None:
           return -1, "some ERROR with status check"

       ## print is done
       if re.search("^R00000",tmp) != None:
           return 1, "Success"
       return -1,tmp

Exemple #24

0

Afficher le fichier

Fichier : release.py Projet : fc-thrisp-hurrata-dlm-graveyard/flask-anyform

def parse_changelog():
    with open('CHANGES') as f:
        lineiter = iter(f)
        for line in lineiter:
            match = re.search('^Version\s+(.*)', line.strip())

            if match is None:
                continue

            version = match.group(1).strip()

            if lineiter.next().count('-') != len(line.strip()):
                fail('Invalid hyphen count below version line: %s', line.strip())

            while 1:
                released = lineiter.next().strip()
                if released:
                    break

            match = re.search(r'Released (\w+\s+\d+\w+\s+\d+)', released)

            if match is None:
                fail('Could not find release date in version %s' % version)

            datestr = parse_date(match.group(1).strip())

            return version, datestr

Exemple #25

0

Afficher le fichier

Fichier : zdctl.py Projet : Petchesi-Iulian/Crawler

    def get_status(self):
        self.zd_up = 0
        self.zd_pid = 0
        self.zd_should_be_up = 0
        self.zd_status = None
        resp = self.send_action("status")
        if not resp:
            return resp
        m = re.search("(?m)^application=(\d+)$", resp)
        if not m:
            return resp
        self.zd_up = 1
        self.zd_pid = int(m.group(1))
        self.zd_status = resp
        m = re.search("(?m)^should_be_up=(\d+)$", resp)
        if m:
            self.zd_should_be_up = int(m.group(1))
        else:
            self.zd_should_be_up = 1
        m = re.search("(?m)^testing=(\d+)$", resp)
        if m:
            self.zd_testing = int(m.group(1))
        else:
            self.zd_testing = 0

        return resp

Exemple #26

0

Afficher le fichier

Fichier : nbench_parser.py Projet : open-estuary/caliper

def parser(content, option, outfp):
    score = 0
    for lines in re.findall(
                    "=+LINUX\s+DATA\s+BELOW\s*=+\n(.*?)\n\*\s+Trademarks",
                    content,
                    re.DOTALL):
        if lines:
            line_list = lines.splitlines()
            for i in range(0, len(line_list)):
                if re.search("MEMORY\s+INDEX", line_list[i]):
                    memory_line = line_list[i]
                elif re.search("INTEGER\s+INDEX", line_list[i]):
                    int_line = line_list[i]
                else:
                    if re.search("FLOATING-POINT", line_list[i]):
                        float_line = line_list[i]
            if option == "int":
                line_list.remove(memory_line)
                line_list.remove(float_line)
                score = int_line.split(":")[1].strip()
            elif option == "float":
                line_list.remove(int_line)
                line_list.remove(memory_line)
                score = float_line.split(":")[1].strip()
            else:
                if option == "memory":
                    line_list.remove(int_line)
                    line_list.remove(float_line)
                    score = memory_line.split(":")[1].strip()

            for i in range(0, len(line_list)):
                outfp.write(line_list[i] + '\n')
            return score

Exemple #27

0

Afficher le fichier

Fichier : resources.py Projet : hdeweirdt/imp-std

    def do_changes(self, resource):
        changes = self.list_changes(resource)
        changed = False

        if "state" in changes and changes["state"][0] != changes["state"][1]:
            action = "start"
            if changes["state"][1] == "stopped":
                action = "stop"

            # start or stop the service
            result = self._io.run("/usr/bin/systemctl",
                            [action, "%s.service" % resource.name])

            if re.search("^Failed", result[1]):
                raise Exception("Unable to %s %s: %s" % (action, resource.name, result[1]))

            changed = True

        if "enabled" in changes and changes["enabled"][0] != changes["enabled"][1]:
            action = "enable"

            if changes["enabled"][1] == False:
                action = "disable"

            result = self._io.run("/usr/bin/systemctl",
                            [action, "%s.service" % resource.name])
            changed = True

            if re.search("^Failed", result[1]):
                raise Exception("Unable to %s %s: %s" % (action, resource.name, result[1]))

        return changed

Exemple #28

0

Afficher le fichier

Fichier : resources.py Projet : hdeweirdt/imp-std

    def do_changes(self, resource):
        changes = self.list_changes(resource)
        changed = False

        if "state" in changes and changes["state"][0] != changes["state"][1]:
            action = "start"
            if changes["state"][1] == "stopped":
                action = "stop"

            # start or stop the service
            result = self._io.run("/sbin/service",
                            [resource.name, action])

            if re.search("^Failed", result[1]):
                raise Exception("Unable to %s %s: %s" % (action, resource.name, result[1]))

            changed = True

        if "enabled" in changes and changes["enabled"][0] != changes["enabled"][1]:
            action = "on"

            if changes["enabled"][1] == False:
                action = "off"

            result = self._io.run("/sbin/chkconfig", [resource.name, action])
            changed = True

            if re.search("^Failed", result[1]):
                raise Exception("Unable to %s %s: %s" % (action, resource.name, result[1]))

        return changed

Exemple #29

0

Afficher le fichier

Fichier : common.py Projet : salvationj/Sick-Beard

    def nameQuality(name):

        name = os.path.basename(name)

        # if we have our exact text then assume we put it there
        for x in Quality.qualityStrings:
            if x == Quality.UNKNOWN:
                continue

            regex = '\W'+Quality.qualityStrings[x].replace(' ','\W')+'\W'
            regex_match = re.search(regex, name, re.I)
            if regex_match:
                return x

        checkName = lambda list, func: func([re.search(x, name, re.I) for x in list])

        if checkName(["pdtv.xvid", "hdtv.xvid", "dsr.xvid"], any) and not checkName(["720p"], all):
            return Quality.SDTV
        elif checkName(["dvdrip.xvid", "bdrip.xvid", "dvdrip.divx", "dvdrip.ws.xvid"], any) and not checkName(["720p"], all):
            return Quality.SDDVD
        elif checkName(["720p", "hdtv", "x264"], all) or checkName(["hr.ws.pdtv.x264"], any):
            return Quality.HDTV
        elif checkName(["720p", "web.dl"], all) or checkName(["720p", "itunes", "h.?264"], all):
            return Quality.HDWEBDL
        elif checkName(["720p", "bluray", "x264"], all) or checkName(["720p", "hddvd", "x264"], all):
            return Quality.HDBLURAY
        elif checkName(["1080p", "bluray", "x264"], all) or checkName(["1080p", "hddvd", "x264"], all):
            return Quality.FULLHDBLURAY
        else:
            return Quality.UNKNOWN

Exemple #30

0

Afficher le fichier

Fichier : wikidotparser.py Projet : kerel-fs/ogn-rdb

def parse_contact(raw):
    contact_details = {'name': '', 'email': ''}
    links = []

    match_mail = re.search(contact_mail_pattern, raw)
    match_url = re.search(contact_url_pattern, raw)
    match_intern = re.search(contact_intern_pattern, raw)

    if match_mail:
        if re.match(mail_address_pattern, match_mail.group('email')):
            # found an email address
            contact_details = {'name': match_mail.group('name'),
                               'email': match_mail.group('email')}
        else:
            contact_details = {'name': match_mail.group('name'),
                               'email': ''}
    elif match_url:
        # found a hyperlink
        links.append({'ref': 'contact', 'href': match_url.group('url')})
    elif match_intern:
        # found a link to the wiki page '/contact'
        contact_details = {'name': ' / '.join(name for name in match_intern.groupdict().values() if (name is not None)), 'email': ''}
    else:
        name = raw.replace("[", "").replace("]", "").replace("|", "").strip()
        if name:
            # found a name
            contact_details = {'name': name, 'email': ''}
        else:
            # found nothing
            pass
    return contact_details, links

Exemple #31

0

Afficher le fichier

Fichier : tntvillage.py Projet : TheBauwssss/SickRage-Mirror

 def checkName(options, func):
     return func(
         [re.search(option, file_quality, re.I) for option in options])

Exemple #32

0

Afficher le fichier

Fichier : PTTCrawler.py Projet : moneyorz/1st-PyCrawlerMarathon

    def parse(self, response):
        # 假設網頁回應不是 200 OK 的話, 我們視為傳送請求失敗
        if response.status != 200:
            print('Error - {} is not available to access'.format(response.url))
            return

        # 將網頁回應的 HTML 傳入 BeautifulSoup 解析器, 方便我們根據標籤 (tag) 資訊去過濾尋找
        soup = BeautifulSoup(response.text)

        
        # 取得文章內容主體
        main_content = soup.find(id='main-content')
        
        # 假如文章有屬性資料 (meta), 我們在從屬性的區塊中爬出作者 (author), 文章標題 (title), 發文日期 (date)
        metas = main_content.select('div.article-metaline')
        author = ''
        title = ''
        date = ''
        if metas:
            if metas[0].select('span.article-meta-value')[0]:
                author = metas[0].select('span.article-meta-value')[0].string
            if metas[1].select('span.article-meta-value')[0]:
                title = metas[1].select('span.article-meta-value')[0].string
            if metas[2].select('span.article-meta-value')[0]:
                date = metas[2].select('span.article-meta-value')[0].string

            # 從 main_content 中移除 meta 資訊（author, title, date 與其他看板資訊）
            #
            # .extract() 方法可以參考官方文件
            #  - https://www.crummy.com/software/BeautifulSoup/bs4/doc/#extract
            for m in metas:
                m.extract()
            for m in main_content.select('div.article-metaline-right'):
                m.extract()
        
        # 取得留言區主體
        pushes = main_content.find_all('div', class_='push')
        for p in pushes:
            p.extract()
        
        # 假如文章中有包含「※ 發信站: 批踢踢實業坊(ptt.cc), 來自: xxx.xxx.xxx.xxx」的樣式
        # 透過 regular expression 取得 IP
        # 因為字串中包含特殊符號跟中文, 這邊建議使用 unicode 的型式 u'...'
        try:
            ip = main_content.find(text=re.compile(u'※ 發信站:'))
            ip = re.search('[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*', ip).group()
        except Exception as e:
            ip = ''
        
        # 移除文章主體中 '※ 發信站:', '◆ From:', 空行及多餘空白 (※ = u'\u203b', ◆ = u'\u25c6')
        # 保留英數字, 中文及中文標點, 網址, 部分特殊符號
        #
        # 透過 .stripped_strings 的方式可以快速移除多餘空白並取出文字, 可參考官方文件 
        #  - https://www.crummy.com/software/BeautifulSoup/bs4/doc/#strings-and-stripped-strings
        filtered = []
        for v in main_content.stripped_strings:
            # 假如字串開頭不是特殊符號或是以 '--' 開頭的, 我們都保留其文字
            if v[0] not in [u'※', u'◆'] and v[:2] not in [u'--']:
                filtered.append(v)

        # 定義一些特殊符號與全形符號的過濾器
        expr = re.compile(u'[^一-龥。；，：“”（）、？《》\s\w:/-_.?~%()]')
        for i in range(len(filtered)):
            filtered[i] = re.sub(expr, '', filtered[i])
        
        # 移除空白字串, 組合過濾後的文字即為文章本文 (content)
        filtered = [i for i in filtered if i]
        content = ' '.join(filtered)
        
        # 處理留言區
        # p 計算推文數量
        # b 計算噓文數量
        # n 計算箭頭數量
        p, b, n = 0, 0, 0
        messages = []
        for push in pushes:
            # 假如留言段落沒有 push-tag 就跳過
            if not push.find('span', 'push-tag'):
                continue
            
            # 過濾額外空白與換行符號
            # push_tag 判斷是推文, 箭頭還是噓文
            # push_userid 判斷留言的人是誰
            # push_content 判斷留言內容
            # push_ipdatetime 判斷留言日期時間
            push_tag = push.find('span', 'push-tag').string.strip(' \t\n\r')
            push_userid = push.find('span', 'push-userid').string.strip(' \t\n\r')
            push_content = push.find('span', 'push-content').strings
            push_content = ' '.join(push_content)[1:].strip(' \t\n\r')
            push_ipdatetime = push.find('span', 'push-ipdatetime').string.strip(' \t\n\r')

            # 整理打包留言的資訊, 並統計推噓文數量
            messages.append({
                'push_tag': push_tag,
                'push_userid': push_userid,
                'push_content': push_content,
                'push_ipdatetime': push_ipdatetime})
            if push_tag == u'推':
                p += 1
            elif push_tag == u'噓':
                b += 1
            else:
                n += 1
        
        # 統計推噓文
        # count 為推噓文相抵看這篇文章推文還是噓文比較多
        # all 為總共留言數量 
        message_count = {'all': p+b+n, 'count': p-b, 'push': p, 'boo': b, 'neutral': n}
        
        # 整理文章資訊
        data = {
            'url': response.url,
            'article_author': author,
            'article_title': title,
            'article_date': date,
            'article_content': content,
            'ip': ip,
            'message_count': message_count,
            'messages': messages
        }
        yield data

Exemple #33

0

Afficher le fichier

Fichier : systemhtml.py Projet : yongaru/bleeding_edge

 def js_type_annotation(ann):
   return re.search('^@.*Returns', ann) or re.search('^@.*Creates', ann)

Exemple #34

0

Afficher le fichier

Fichier : anonymize_datatimes.py Projet : hahnicity/ventMAP

def main():
    parser = ArgumentParser()
    parser.add_argument('patient_dir', help='path to the patient directory')
    mutex = parser.add_mutually_exclusive_group()
    mutex.add_argument('--shift-file', help='mapping of patient to the amount of time (hours) we want to shift the data by')
    mutex.add_argument('--new-cohort-file', help='make a new cohort file with patient data. Allows us to track patients that we\'ve already processed. The difference between this and --shift-file is that that shift-file is already made, whereas this argument presumes no prior thought from the user')
    parser.add_argument('--rm-old-dir', help='remove old (non-anonymized) directory', action='store_true')
    parser.add_argument('--new-dir', help='specify a new directory path to save patient data. If not specified then script will save data into 1 level above where patient directory is located')
    parser.add_argument('--only-shift-date', action='store_true', help='only shift the date of the filename and not the patient. Helpful in cases where the patient name is already anonymized')
    args = parser.parse_args()

    match = re.search(patient_pattern, args.patient_dir)
    if args.only_shift_date:
        patient = None
    elif not match:
        raise NoPatientError('Patient pattern not found for directory {}. Did you mean to shift the files without a patient identifier?'.format(args.patient_dir))
    elif match:
        patient = match.groups()[0]


    shift_hours = randint(min_years*24*365, max_years*24*365)

    if args.only_shift_date:
        new_patient_id = None

    elif args.shift_file:
        new_patient_id = randint(0, max_patient_id)
        shift_data = pd.read_csv(args.shift_file)
        patient_data = shift_data[shift_data.patient == patient]
        if len(patient_data) != 1:
            raise NoPatientError('patient {} not found in shift file, or may be duplicated'.format(patient))
        shift_hours = patient_data.iloc[0].shift_hours
        new_patient_id = patient_data.iloc[0].new_patient_id

    elif args.new_cohort_file:
        new_patient_id = randint(0, max_patient_id)
        try:
            cohort_data = pd.read_csv(args.new_cohort_file)
            new_patient_ids = cohort_data.new_patient_id.unique()
            cohort_data = cohort_data.values.tolist()
        except:
            cohort_data = []
            new_patient_ids = []

        while new_patient_id in new_patient_ids:
            new_patient_id = randint(0, max_patient_id)

    print("shifting patient: {} data by hours: {} new id: {}".format(patient, shift_hours, new_patient_id))

    files = glob(os.path.join(args.patient_dir, '*.csv'))
    files += glob(os.path.join(args.patient_dir, '*.processed.npy'))
    if len(files) == 0:
        raise NoFilesError('No files found in directory {}'.format(args.patient_dir))

    new_files_to_move = []
    remove_files_from_arr = []
    for filename in files:
        file_obj = File(filename, shift_hours, patient, new_patient_id, args.only_shift_date)
        processsed_ok, new_filename = file_obj.process_file()

        if not processsed_ok:
            remove_files_from_arr.append(filename)
        else:
            new_files_to_move.append(new_filename)

    for file in remove_files_from_arr:
        idx = files.index(file)
        files.pop(idx)

    if len(files) == 0:
        raise NoFilesError("No files were found to move for patient {} after final check".format(patient))

    new_dir = args.patient_dir.replace(patient, str(new_patient_id)) if not args.new_dir else os.path.join(args.new_dir, str(new_patient_id))
    os.mkdir(new_dir)
    for i, file in enumerate(files):
        new_filename = new_files_to_move[i]
        new_filepath = os.path.join(new_dir, os.path.basename(new_files_to_move[i]))
        shutil.move(new_filename, new_filepath)
        # This bit of logic is a bit confusing but basically means that we only have .processed.npy files in the
        # list of collected files, but we still have to move the .raw.npy files as well. There's really nothing
        # to do with these files except change their name thankfully. Anyhow, we just reference the .processed.npy
        # file and since theres a 1-1 mapping between processed and raw files we can just do a string replacement
        # to get everything to work properly.
        if file.endswith('.processed.npy'):
            old_raw_file = file.replace('.processed.npy', '.raw.npy')
            shutil.copy(old_raw_file, new_filepath.replace('.processed.npy', '.raw.npy'))

    if args.rm_old_dir:
        shutil.rmtree(args.patient_dir)

    if args.new_cohort_file:
        cohort_data.append([patient, new_patient_id, shift_hours])
        df = pd.DataFrame(cohort_data, columns=['patient_id', 'new_patient_id', 'shift_hours'])
        df.to_csv(args.new_cohort_file, index=False)

Exemple #35

0

Afficher le fichier

Fichier : rename_dll.py Projet : laoniu2020/rename_dll

parser.add_argument('inputdll', help='input dll')
parser.add_argument('outputdll', help='output dll')
args = parser.parse_args()

# dump the dll exports using dumpbin
process = subprocess.Popen(['dumpbin', '/EXPORTS', args.inputdll], stdout=subprocess.PIPE)
out, err = process.communicate()

# get all the function definitions
lines = out.split('\n')
pattern = r'^\s*(\d+)\s+[A-Z0-9]+\s+[A-Z0-9]{8}\s+([^ ]+)'

library_output = 'EXPORTS \n'

for line in lines:
    matches = re.search(pattern, line)

    if matches is not None:
        #ordinal = matches.group(1)
        function_name = matches.group(2)
        library_output = library_output + function_name + '\n'

# write the def file
deffile_name = args.outputdll[:-4] + '.def'
with open(deffile_name, 'w') as f:
    f.write(library_output)

process = subprocess.Popen(['lib', '/MACHINE:X64', '/DEF:' + deffile_name], )
out, err = process.communicate()

# copy the dll over

Exemple #36

0

Afficher le fichier

Fichier : report.py Projet : tuxianOS/odoo-1

#--------------------------------------------------------------------------
# Check the presence of Wkhtmltopdf and return its version at Odoo start-up
#--------------------------------------------------------------------------
wkhtmltopdf_state = 'install'
try:
    process = subprocess.Popen([_get_wkhtmltopdf_bin(), '--version'],
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
except (OSError, IOError):
    _logger.info('You need Wkhtmltopdf to print a pdf version of the reports.')
else:
    _logger.info('Will use the Wkhtmltopdf binary at %s' %
                 _get_wkhtmltopdf_bin())
    out, err = process.communicate()
    version = re.search('([0-9.]+)', out).group(0)
    if LooseVersion(version) < LooseVersion('0.12.0'):
        _logger.info('Upgrade Wkhtmltopdf to (at least) 0.12.0')
        wkhtmltopdf_state = 'upgrade'
    else:
        wkhtmltopdf_state = 'ok'

    if config['workers'] == 1:
        _logger.info(
            'You need to start Odoo with at least two workers to print a pdf version of the reports.'
        )
        wkhtmltopdf_state = 'workers'


class Report(osv.Model):
    _name = "report"

Exemple #37

0

Afficher le fichier

Fichier : setup.py Projet : totocookly/starlette

def get_version(package):
    """
    Return package version as listed in `__version__` in `init.py`.
    """
    with open(os.path.join(package, '__init__.py')) as f:
        return re.search("__version__ = ['\"]([^'\"]+)['\"]", f.read()).group(1)

Exemple #38

0

Afficher le fichier

    def from_params(params: Params,
                    serialization_dir: str,
                    recover: bool = False) -> 'TrainerPieces':
        all_datasets = training_util.datasets_from_params(params)
        datasets_for_vocab_creation = set(
            params.pop("datasets_for_vocab_creation", all_datasets))

        for dataset in datasets_for_vocab_creation:
            if dataset not in all_datasets:
                raise ConfigurationError(
                    f"invalid 'dataset_for_vocab_creation' {dataset}")

        logger.info(
            "From dataset instances, %s will be considered for vocabulary creation.",
            ", ".join(datasets_for_vocab_creation))

        if recover and os.path.exists(
                os.path.join(serialization_dir, "vocabulary")):
            vocab = Vocabulary.from_files(
                os.path.join(serialization_dir, "vocabulary"))
            params.pop("vocabulary", {})
        else:
            vocab = Vocabulary.from_params(params.pop(
                "vocabulary", {}), (instance
                                    for key, dataset in all_datasets.items()
                                    for instance in dataset
                                    if key in datasets_for_vocab_creation))

        model = Model.from_params(vocab=vocab, params=params.pop('model'))

        # If vocab extension is ON for training, embedding extension should also be
        # done. If vocab and embeddings are already in sync, it would be a no-op.
        model.extend_embedder_vocab()

        # Initializing the model can have side effect of expanding the vocabulary
        vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

        iterator = DataIterator.from_params(params.pop("iterator"))
        iterator.index_with(model.vocab)
        validation_iterator_params = params.pop("validation_iterator", None)
        if validation_iterator_params:
            validation_iterator = DataIterator.from_params(
                validation_iterator_params)
            validation_iterator.index_with(model.vocab)
        else:
            validation_iterator = None

        train_data = all_datasets['train']
        validation_data = all_datasets.get('validation')
        test_data = all_datasets.get('test')

        trainer_params = params.pop("trainer")
        no_grad_regexes = trainer_params.pop("no_grad", ())
        for name, parameter in model.named_parameters():
            if any(re.search(regex, name) for regex in no_grad_regexes):
                parameter.requires_grad_(False)

        frozen_parameter_names, tunable_parameter_names = \
                    get_frozen_and_tunable_parameter_names(model)
        logger.info("Following parameters are Frozen  (without gradient):")
        for name in frozen_parameter_names:
            logger.info(name)
        logger.info("Following parameters are Tunable (with gradient):")
        for name in tunable_parameter_names:
            logger.info(name)

        return TrainerPieces(model, iterator, train_data, validation_data,
                             test_data, validation_iterator, trainer_params)

Exemple #39

0

Afficher le fichier

Fichier : aidr.py Projet : WoodenPlancks/Clean_Path_CNN

def load_and_numberize_data(path="../data/", nb_words=None, maxlen=None, seed=113, start_char=1, oov_char=2, index_from=3, init_type="random", embfile=None, dev_train_merge=0, map_labels_to_five_class=0):

    """ numberize the train, dev and test files """

    # read the vocab from the entire corpus (train + test + dev)
    vocab = Counter()

    sentences_train = []
    y_train = []

    sentences_test  = []
    y_test = []

    sentences_dev   = []
    y_dev  = []

    for filename in glob.glob(os.path.join(path, '*.csv')):
#        print "Reading vocabulary from" + filename
        reader  = csv.reader(open(filename, 'rb'))
	print (filename)
        for rowid, row in enumerate (reader):
            if rowid == 0: #header
                continue
            if re.search("train.csv", filename.lower()):    
                sentences_train.append(row[1])
                y_train.append(row[2])    

            elif re.search("test.csv", filename.lower()):    
                sentences_test.append(row[1])    
                y_test.append(row[2])    

            elif re.search("dev.csv", filename.lower()):    
                sentences_dev.append(row[1])    
                y_dev.append(row[2])    

            for wrd in row[1].split():
                vocab[wrd] += 1

#    print (sentences_train)
    #print (y_test)
    #print (sentences_dev)

    print "Nb of tweets: train: " + str (len(sentences_train)) + " test: " + str (len(sentences_test)) + " dev: " + str (len(sentences_dev))
    print "Total vocabulary size: " + str (len(vocab))

    if nb_words is None: # now take a fraction
        nb_words = len(vocab) 
    else:
        pr_perc  = nb_words
        nb_words = int ( len(vocab) * (nb_words / 100.0) )    

#    if nb_words is None or nb_words > len(vocab):
#        nb_words = len(vocab) 

    vocab = dict (vocab.most_common(nb_words))

    print "Pruned vocabulary size: " + str (pr_perc) + "% =" + str (len(vocab))

    #Create vocab dictionary that maps word to ID
    vocab_list = vocab.keys()
    vocab_idmap = {}
    for i in range(len(vocab_list)):
        vocab_idmap[vocab_list[i]] = i

    # Numberize the sentences
    X_train = numberize_sentences(sentences_train, vocab_idmap, oov_char=oov_char)
    X_test  = numberize_sentences(sentences_test,  vocab_idmap, oov_char=oov_char)
    X_dev   = numberize_sentences(sentences_dev,   vocab_idmap, oov_char=oov_char)


    #Create label dictionary that map label to ID
    merge_labels = None
    

    if map_labels_to_five_class:                

        merge_labels = {
                    # QUESTION/REQUEST
                    "QH":"Ques",\
                    "QO":"Ques",\
                    "QR":"Ques",\
                    "QW":"Ques",\
                    "QY":"Ques",\
                    # APPRECIATION/ASSESSMENT/POLITE 
                    "AA":"Polite",\
                    "P":"Polite",\
                    # STATEMENT 
                    "S":"St",\
                    # RESPONSE 
                    "A":"Res",\
                    "R":"Res",\
                    "U":"Res",\
                    #SUGGESTION
                    "AC":"Sug"}

        y_train = remap_labels(y_train, merge_labels=merge_labels)
        y_test  = remap_labels(y_test,  merge_labels=merge_labels)
        y_dev   = remap_labels(y_dev,   merge_labels=merge_labels)

    label_list = sorted( list (set(y_train)))

    label_map  = {}
    for lab_id, lab in enumerate (label_list):
        label_map[lab] = lab_id  

    # Numberize the labels
    (y_train, y_train_freq)   = numberize_labels(y_train, label_map)
    (y_test,  y_test_freq)    = numberize_labels(y_test,  label_map)
    (y_dev,   y_dev_freq)     = numberize_labels(y_dev,   label_map)


    assert len(X_train) == len(y_train) or len(X_test) == len(y_test) or len(X_dev) == len(y_dev)

    #randomly shuffle the training data

    print("Random seed", str(seed))
    np.random.seed(seed)
    np.random.shuffle(X_train)
    np.random.seed(seed)
    np.random.shuffle(y_train)


    X_train, y_train = adjust_index(X_train, y_train, start_char=start_char, index_from=index_from, maxlen=maxlen)
    X_test,  y_test  = adjust_index(X_test,  y_test,  start_char=start_char, index_from=index_from, maxlen=maxlen)
    X_dev,   y_dev   = adjust_index(X_dev,   y_dev,   start_char=start_char, index_from=index_from, maxlen=maxlen)

    if dev_train_merge:
        X_train.extend(X_dev)
        y_train.extend(y_dev)
#        y_train=np.concatenate ((y_train, y_dev)) # need if y_train is numpy array
 
    # load the embeddeings
    if init_type.lower() != "random" and embfile:
        E = load_emb(embfile, vocab_idmap, index_from)
    else:
        E = None


    return (X_train, y_train), (X_test, y_test), (X_dev, y_dev), nb_words + index_from, E, label_map

Exemple #40

0

Afficher le fichier

Fichier : _version.py Projet : ineschh/phys2denoise

def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
    """Get version from 'git describe' in the root of the source tree.

    This only gets called if the git-archive 'subst' keywords were *not*
    expanded, and _version.py hasn't already been rewritten with a short
    version string, meaning we're inside a checked out source tree.
    """
    GITS = ["git"]
    if sys.platform == "win32":
        GITS = ["git.cmd", "git.exe"]

    out, rc = run_command(GITS, ["rev-parse", "--git-dir"],
                          cwd=root,
                          hide_stderr=True)
    if rc != 0:
        if verbose:
            print("Directory %s not under git control" % root)
        raise NotThisMethod("'git rev-parse --git-dir' returned error")

    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
    # if there isn't one, this yields HEX[-dirty] (no NUM)
    describe_out, rc = run_command(GITS, [
        "describe", "--tags", "--dirty", "--always", "--long", "--match",
        "%s*" % tag_prefix
    ],
                                   cwd=root)
    # --long was added in git-1.5.5
    if describe_out is None:
        raise NotThisMethod("'git describe' failed")
    describe_out = describe_out.strip()
    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
    if full_out is None:
        raise NotThisMethod("'git rev-parse' failed")
    full_out = full_out.strip()

    pieces = {}
    pieces["long"] = full_out
    pieces["short"] = full_out[:7]  # maybe improved later
    pieces["error"] = None

    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
    # TAG might have hyphens.
    git_describe = describe_out

    # look for -dirty suffix
    dirty = git_describe.endswith("-dirty")
    pieces["dirty"] = dirty
    if dirty:
        git_describe = git_describe[:git_describe.rindex("-dirty")]

    # now we have TAG-NUM-gHEX or HEX

    if "-" in git_describe:
        # TAG-NUM-gHEX
        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
        if not mo:
            # unparseable. Maybe git-describe is misbehaving?
            pieces["error"] = ("unable to parse git-describe output: '%s'" %
                               describe_out)
            return pieces

        # tag
        full_tag = mo.group(1)
        if not full_tag.startswith(tag_prefix):
            if verbose:
                fmt = "tag '%s' doesn't start with prefix '%s'"
                print(fmt % (full_tag, tag_prefix))
            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" %
                               (full_tag, tag_prefix))
            return pieces
        pieces["closest-tag"] = full_tag[len(tag_prefix):]

        # distance: number of commits since tag
        pieces["distance"] = int(mo.group(2))

        # commit: short hex revision ID
        pieces["short"] = mo.group(3)

    else:
        # HEX: no tags
        pieces["closest-tag"] = None
        count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
                                    cwd=root)
        pieces["distance"] = int(count_out)  # total number of commits

    # commit date: see ISO-8601 comment in git_versions_from_keywords()
    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
                       cwd=root)[0].strip()
    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)

    return pieces

Exemple #41

0

Afficher le fichier

Fichier : email_validation.py Projet : terodea/PG-DBDA

def validate_email(email):
    print(email)
    match = re.search('[a-zA-Z._0-9]+@[a-z]+\.[a-z]+', email)
    if len(email) == len(match.group()):
        return True
    return False

Exemple #42

0

Afficher le fichier

Fichier : tntvillage.py Projet : TheBauwssss/SickRage-Mirror

    def search(self, search_params, age=0, ep_obj=None):  # pylint: disable=too-many-locals, too-many-branches, too-many-statements
        results = []
        if not self.login():
            return results

        self.categories = "cat=" + str(self.cat)

        for mode in search_params:
            items = []
            logger.log("Search Mode: {0}".format(mode), logger.DEBUG)
            for search_string in search_params[mode]:

                if mode == 'RSS':
                    self.page = 2

                last_page = 0
                y = int(self.page)

                if search_string == '':
                    continue

                search_string = str(search_string).replace('.', ' ')

                for x in range(0, y):
                    z = x * 20
                    if last_page:
                        break

                    if mode != 'RSS':
                        search_url = (self.urls['search_page'] +
                                      '&filter={2}').format(
                                          z, self.categories, search_string)
                    else:
                        search_url = self.urls['search_page'].format(
                            z, self.categories)

                    if mode != 'RSS':
                        logger.log(
                            "Search string: {0}".format(
                                search_string.decode("utf-8")), logger.DEBUG)

                    data = self.get_url(search_url, returns='text')
                    if not data:
                        logger.log("No data returned from provider",
                                   logger.DEBUG)
                        continue

                    try:
                        with BS4Parser(data, 'html5lib') as html:
                            torrent_table = html.find('table',
                                                      class_='copyright')
                            torrent_rows = torrent_table(
                                'tr') if torrent_table else []

                            # Continue only if one Release is found
                            if len(torrent_rows) < 3:
                                logger.log(
                                    "Data returned from provider does not contain any torrents",
                                    logger.DEBUG)
                                last_page = 1
                                continue

                            if len(torrent_rows) < 42:
                                last_page = 1

                            for result in torrent_table('tr')[2:]:

                                try:
                                    link = result.find('td').find('a')
                                    title = link.string
                                    download_url = self.urls[
                                        'download'] % result('td')[8].find(
                                            'a')['href'][-8:]
                                    leechers = result('td')[3]('td')[0].text
                                    leechers = int(leechers.strip('[]'))
                                    seeders = result('td')[3]('td')[1].text
                                    seeders = int(seeders.strip('[]'))
                                    torrent_size = result('td')[3](
                                        'td')[3].text.strip('[]') + " GB"
                                    size = convert_size(torrent_size) or -1
                                except (AttributeError, TypeError):
                                    continue

                                filename_qt = self._reverseQuality(
                                    self._episodeQuality(result))
                                for text in self.hdtext:
                                    title1 = title
                                    title = title.replace(text, filename_qt)
                                    if title != title1:
                                        break

                                if Quality.nameQuality(
                                        title) == Quality.UNKNOWN:
                                    title += filename_qt

                                if not self._is_italian(
                                        result) and not self.subtitle:
                                    logger.log(
                                        "Torrent is subtitled, skipping: {0} ".
                                        format(title), logger.DEBUG)
                                    continue

                                if self.engrelease and not self._is_english(
                                        result):
                                    logger.log(
                                        "Torrent isnt english audio/subtitled , skipping: {0} "
                                        .format(title), logger.DEBUG)
                                    continue

                                search_show = re.split(r'([Ss][\d{1,2}]+)',
                                                       search_string)[0]
                                show_title = search_show
                                rindex = re.search(r'([Ss][\d{1,2}]+)', title)
                                if rindex:
                                    show_title = title[:rindex.start()]
                                    ep_params = title[rindex.start():]
                                if show_title.lower() != search_show.lower(
                                ) and search_show.lower() in show_title.lower(
                                ):
                                    new_title = search_show + ep_params
                                    title = new_title

                                if not all([title, download_url]):
                                    continue

                                if self._is_season_pack(title):
                                    title = re.sub(r'([Ee][\d{1,2}\-?]+)', '',
                                                   title)

                                # Filter unseeded torrent
                                if seeders < self.minseed or leechers < self.minleech:
                                    if mode != 'RSS':
                                        logger.log(
                                            "Discarding torrent because it doesn't meet the minimum seeders or leechers: {0} (S:{1} L:{2})"
                                            .format(title, seeders,
                                                    leechers), logger.DEBUG)
                                    continue

                                item = {
                                    'title': title,
                                    'link': download_url,
                                    'size': size,
                                    'seeders': seeders,
                                    'leechers': leechers,
                                    'hash': ''
                                }
                                if mode != 'RSS':
                                    logger.log(
                                        "Found result: {0} with {1} seeders and {2} leechers"
                                        .format(title, seeders,
                                                leechers), logger.DEBUG)

                                items.append(item)

                    except Exception:
                        logger.log(
                            "Failed parsing provider. Traceback: {0}".format(
                                traceback.format_exc()), logger.ERROR)

                # For each search mode sort all the items by seeders if available if available
                items.sort(key=lambda d: try_int(d.get('seeders', 0)),
                           reverse=True)

                results += items

        return results

Exemple #43

0

Afficher le fichier

    def convert(self, output):
        """
        Convert the output from LaTeX into images

        Arguments:
        output -- output file object

        """
        if not self.command and self.executeConverter is Imager.executeConverter:
            log.warning('No imager command is configured.  ' +
                        'No images will be created.')
            return

        cwd = os.getcwd()

        # Make a temporary directory to work in
        tempdir = tempfile.mkdtemp()
        os.chdir(tempdir)

        # Execute converter
        rc, images = self.executeConverter(output)
        if rc:
            log.warning('Image converter did not exit properly.  ' +
                        'Images may be corrupted or missing.')

        # Get a list of all of the image files
        if images is None:
            images = [
                f for f in os.listdir('.') if re.match(r'^img\d+\.\w+$', f)
            ]
        if len(images) != len(self.images):
            log.warning(
                'The number of images generated (%d) and the number of images requested (%d) is not the same.'
                % (len(images), len(self.images)))

        # Sort by creation date
        #images.sort(lambda a,b: cmp(os.stat(a)[9], os.stat(b)[9]))

        images.sort(
            lambda a, b: cmp(int(re.search(r'(\d+)\.\w+$', a).group(1)),
                             int(re.search(r'(\d+)\.\w+$', b).group(1))))

        os.chdir(cwd)

        if PILImage is None:
            log.warning('PIL (Python Imaging Library) is not installed.  ' +
                        'Images will not be cropped.')

        # Move images to their final location
        for src, dest in zip(images, self.images.values()):
            # Move the image
            directory = os.path.dirname(dest.path)
            if directory and not os.path.isdir(directory):
                os.makedirs(directory)
            try:
                shutil.copy2(os.path.join(tempdir, src), dest.path)
            except OSError:
                shutil.copy(os.path.join(tempdir, src), dest.path)

            # Crop the image
            try:
                dest.crop()
                status.dot()
            except Exception, msg:
                import traceback
                traceback.print_exc()
                log.warning('failed to crop %s (%s)', dest.path, msg)

Exemple #44

0

Afficher le fichier

Fichier : context.py Projet : mashiyatz/resaspy

    def fetch(self, resource, params={}):
        api_resource = self._full_category + '/' + resource
        beg = re.search('[^/]', api_resource).start()
        api_resource = api_resource[beg:]

        return self._accessor.fetch(api_resource, params)

Exemple #45

0

Afficher le fichier

Fichier : conf.py Projet : aio-libs/aiosignal

# serve to show the default.

import os
import re

_docs_path = os.path.dirname(__file__)
_version_path = os.path.abspath(
    os.path.join(_docs_path, "..", "aiosignal", "__init__.py")
)
with open(_version_path, encoding="latin1") as fp:
    try:
        _version_info = re.search(
            r'^__version__ = "'
            r"(?P<major>\d+)"
            r"\.(?P<minor>\d+)"
            r"\.(?P<patch>\d+)"
            r'(?P<tag>.*)?"$',
            fp.read(),
            re.M,
        ).groupdict()
    except IndexError:
        raise RuntimeError("Unable to determine version.")


# -- General configuration ------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
# needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom

Exemple #46

0

Afficher le fichier

Fichier : email_validation.py Projet : terodea/PG-DBDA

def validate_email1(email):
    print(email)
    match = re.search('^[a-z._0-9]+\@', email)
    if match:
        return True
    return False

Exemple #47

0

Afficher le fichier

Fichier : sina_stock2.py Projet : wangsanshi123/scrapy_test1

class StockInfo:
    url = 'http://money.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/{}.phtml?year={}&jidu={}'

    year = '2017'
    jidu = '2'

    target = year + "-"

    # list_temp = []
    # list_open = []
    # list_max = []
    # list_close = []
    # list_min = []
    # list_volume = []  # 交易量
    # list_amount = []  # 交易金额

    def __init__(self):
        self.list_temp = []
        self.list_open = []
        self.list_max = []
        self.list_close = []
        self.list_min = []
        self.list_volume = []  # 交易量
        self.list_amount = []  # 交易金额

    def getData(self, stockcode, jidu):
        '''获取指定时间内的数据，如最近一个季度，最近四个季度，ps:只能已季度为单位'''
        self.stockcode = stockcode
        year = int(
            time.strftime('%Y-%m', time.localtime(time.time())).split('-')[0])
        jidu_current = int(
            time.strftime('%Y-%m', time.localtime(
                time.time())).split('-')[1]) / 3 + 1
        # jidu_temp = None
        # year_temp = None
        for i in range(jidu):
            if jidu_current - i > 0:
                jidu_temp = jidu_current - i
                year_temp = year
            else:
                jidu_temp = jidu_current + 1 - i
                year_temp = year - 1
                pass
            self.visiteSina(stockcode, year_temp, jidu_temp)

            pass
        # 将数据获得数据简单处理后存到内存中
        i = 0
        for item in self.list_temp:
            if i + 7 < len(self.list_temp) + 1:
                self.list_open.append(self.list_temp[i + 1])
                self.list_max.append(self.list_temp[i + 2])
                self.list_close.append(self.list_temp[i + 3])
                self.list_min.append(self.list_temp[i + 4])
                self.list_volume.append(self.list_temp[i + 5])
                self.list_amount.append(self.list_temp[i + 6])
                i += 7

    def price_info(self, day):
        '''行情,指定天数内的最大值，最小值相对于当今价格的涨跌比率'''
        # if len(self.list_max):
        if self.list_max:
            max_price = max(self.list_max[0:day])
            min_price = min(self.list_min[0:day])
            current_price = self.list_close[0]

            # print "%d天内的最大值是:" % (day), max_price
            # print "%d天内的最小值是：" % (day), min_price
            # print "当前值是：", current_price
            change_percent = (float(max_price) -
                              float(current_price)) / float(max_price) * 100

            print "代码为%s当前值相对%d天内的最大值下跌了：" % (self.stockcode,
                                              day), str(change_percent), "%"
        else:
            print "无法获取代码为%s:" % (self.stockcode), "的股票信息"
        pass

    def visiteSina(self, stockcode, year, jidu):
        ''' 访问新浪，获得数据'''
        user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
        headers = {'User-Agent': user_agent}
        context = ssl._create_unverified_context()
        response = None
        try:
            request = urllib2.Request(self.url.format(stockcode, year, jidu),
                                      headers=headers)
            response = urllib2.urlopen(request, context=context)
        except urllib2.URLError, e:
            if hasattr(e, "code"):
                print e.code
            if hasattr(e, "reason"):
                print e.reason
        if response is None:
            print "无法抓取网页"

        else:
            target = response.read()
            tree = lxml.html.fromstring(target)
            td = tree.cssselect('tr td')
            beginflag = False
            split_target = str(year) + "-"
            if td:
                for item in td:
                    if re.search(r'%s' % (split_target), item.text_content()):
                        beginflag = True
                    if beginflag:
                        self.list_temp.append(item.text_content().strip())

        pass

Exemple #48

0

Afficher le fichier

Fichier : Security.py Projet : nseetim/sololearn


import re
user_input=str(input())

#pattern='(\$|T)x*G+x*(T|\$)' # The regex pattern here searches for every occurance of '$' or 'T' followed by zero or more occurance of 'x' then one or more occurance of a G then zero or more occurance of a 'T' or '$'

alarm_pattern='(Tx*[$]x*)|(x*[$]x*Tx*)x*G*' # So for the sake of figuring out the last test case i decided to
# look at the pattern that will set of an alarm then every other pattern will be regarded as quiet, 
# although we should note that this approach isnt safe or secure for real life scenerios

# The same is achieved bellow by spliting the pattern into two but using ome pattern will probably be more efficient and reduce the number of if statements 

#pattern='\$x*Gx*T'
#pattern1='Tx*Gx*\$'
if re.search(alarm_pattern,user_input):
    print("ALARM")
#elif re.search(pattern1,user_input):
#    print("quiet")
else:
    print("quiet")


#Adapted from code by Mansi

user_input=input()
check=0
for index_of_a_particular_letter in range(len(user_input)-1):
    if user_input[index_of_a_particular_letter] =='$'or user_input[index_of_a_particular_letter]=='T':
       for index_of_each_letter in range(index_of_a_particular_letter+1,len(user_input)):
           if user_input[index_of_each_letter]=='G':

Exemple #49

0

Afficher le fichier

Fichier : linearize-hashes.py Projet : forkee/eliqa

                exit(1)
            assert (resp_obj['id'] == x)  # assume replies are in-sequence
            print(resp_obj['result'])

        height += num_blocks


if __name__ == '__main__':
    if len(sys.argv) != 2:
        print("Usage: linearize-hashes.py CONFIG-FILE")
        sys.exit(1)

    f = open(sys.argv[1])
    for line in f:
        # skip comment lines
        m = re.search('^\s*#', line)
        if m:
            continue

        # parse key=value lines
        m = re.search('^(\w+)\s*=\s*(\S.*)$', line)
        if m is None:
            continue
        settings[m.group(1)] = m.group(2)
    f.close()

    if 'host' not in settings:
        settings['host'] = '127.0.0.1'
    if 'port' not in settings:
        settings['port'] = 52273
    if 'min_height' not in settings:

Exemple #50

0

Afficher le fichier

Fichier : py4schrodinger.py Projet : CoffeeCat-Q/automatedMD

def check_ligname(ligname):
    '''
    检查配体名称合法性
    '''

    return re.search('[0-9A-Z]{2,3}$', ligname)

Exemple #51

0

Afficher le fichier

Fichier : load_derivatives.py Projet : liuluyang530/gpt-pytorch

def saved_variables(formula, args):
    # find which arguments need to be saved
    saved = []

    REPLACEMENTS = [
        # replace self.sizes() with self_sizes
        (r'{}.sizes\(\)', {
            'suffix': '_sizes',
            'type': 'IntArrayRef',
        }),
        # replace zeros_like(self) with self_info
        (r'zeros_like\({}\)', {
            'suffix': '_info',
            'type': 'TypeAndSize',
            'expr': lambda name: name,  # at save-time
            'res': lambda name: name + '_info.zeros()',  # at eval-time
        }),
        # replace self.size(2) with self_size_2
        (r'{}.size\((\w+)\)', {
            'suffix': lambda m: '_argsize_{}'.format(*m.groups()),
            'type': 'int64_t',
        }),
        # replace self.numel() with self_numel
        (r'{}.numel\(\)', {
            'suffix': '_numel',
            'type': 'int64_t',
        }),
        # replace to_args_sizes(self) with self_args_sizes
        (r'to_args_sizes\({}\)', {
            'suffix': '_args_sizes',
            'type': 'std::vector<std::vector<int64_t>>',
        }),
        # replace TensorGeometry(self) with self_geometry
        (r'TensorGeometry\({}\)', {
            'suffix': '_geometry',
            'type': 'TensorGeometry',
        }),
        (r'{}.scalar_type\(\)', {
            'suffix': '_scalar_type',
            'type': 'ScalarType',
        }),
    ]

    for arg in args:
        if 'name' not in arg:
            # some returned arguments do not have names
            continue

        name = arg['name']

        # First search the formula for expressions which can be evaluated
        # when the autograd Function is created to avoid saving variables
        for regex, info in REPLACEMENTS:
            def repl(m):
                suffix = info['suffix']
                suffix = suffix(m) if callable(suffix) else suffix
                expr = info['expr'](name) if 'expr' in info else m.group(0)
                saved.append({
                    'name': name + suffix,
                    'type': info['type'],
                    'expr': expr,
                })
                if 'res' in info:
                    return info['res'](name)
                return name + suffix

            formula = re.sub(regex.format(name), repl, formula)

        # Find any variables which remain in the formula and save them
        if re.search(IDENT_REGEX.format(name), formula):
            arg = copy.deepcopy(arg)
            arg['type'] = arg['type'].replace('const ', '').replace(' &', '')
            saved.append(arg)

    return formula, saved

Exemple #52

0

Afficher le fichier

Fichier : sina_stock2.py Projet : wangsanshi123/scrapy_test1

        response = None
        try:
            request = urllib2.Request(url, headers=headers)
            response = urllib2.urlopen(request, context=context)
        except urllib2.URLError, e:
            if hasattr(e, "code"):
                print e.code
            if hasattr(e, "reason"):
                print e.reason

        target = response.read()
        tree = lxml.html.fromstring(target)
        a = tree.cssselect('li a')
        for item in a:
            text = item.text_content().strip()
            result = re.search(r'\((.*?)\)', text)
            if result:
                list_stockcodes.append(result)
        return list_stockcodes
        pass


# StockInfo().getData('601006', 1)
# StockInfo().price_info(10)
#

# StockInfo.getData('601006', 1)
# StockInfo.price_info(10)

# stockinfo = StockInfo()
# stockinfo.getData('601006', 1)

Exemple #53

0

Afficher le fichier

Fichier : server.py Projet : louisgv/EMNIST

 def parseImage(imgData):
     # parse canvas bytes and save as output.png
     imgstr = re.search(b'base64,(.*)', imgData).group(1)
     with open('output.png','wb') as output:
         output.write(base64.decodebytes(imgstr))

Exemple #54

0

Afficher le fichier

Fichier : setup.py Projet : joelstevenson/xavier

    'xavier',
    'xavier.aws',
    'xavier.slack',
    'xavier.sentry',
]

requires = [
    "requests>=2.4",
    "routes>=2.4",
    "frozendict",
    "jsonpickle",
]
test_requirements = ['pytest>=2.8.0']

with open('xavier/__init__.py', 'r') as fd:
    version = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
                        fd.read(), re.MULTILINE).group(1)

if not version:
    raise RuntimeError('Cannot find version information')

with open('README.rst', 'r', 'utf-8') as f:
    readme = f.read()


setup(
    name='xavier',
    version=version,
    description='Lambda bot',
    long_description=readme,
    author='Alex Kessinger',
    author_email='*****@*****.**',

Exemple #55

0

Afficher le fichier

Fichier : client.py Projet : thibaultserti/Imagine-and-Make

def signal_handler(sig, frame):
    connection_with_server.send("END".encode())
    print("Connexion fermée")
    connection_with_server.close()
    sys.exit(0)

ip = sys.argv[1]
chamber = sys.argv[2]
booked = False

regex = '''^(25[0-5]|2[0-4][0-9]|[0-1]?[0-9][0-9]?)\.( 
            25[0-5]|2[0-4][0-9]|[0-1]?[0-9][0-9]?)\.( 
            25[0-5]|2[0-4][0-9]|[0-1]?[0-9][0-9]?)\.( 
            25[0-5]|2[0-4][0-9]|[0-1]?[0-9][0-9]?)''' 
            
if re.search(regex, ip):  
    print("Adresse IP valide") 
else:  
    print("Adresse IP invalide")
    exit(0)

GPIO.setmode(GPIO.BOARD)
r = Rasp(GPIO)

connection_with_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
connection_with_server.connect((ip, port))
print(f"Connexion établie avec le serveur sur le {port}")
connection_with_server.send(chamber.encode())

msg = b""
retry = 0

Exemple #56

0

Afficher le fichier

Fichier : 1.py Projet : drumgiovanni/schoolPythonProject

import re
import urllib.request

url = "https://www.ips.media.osaka-cu.ac.jp/?page_id=180"
encoding="utf-8"
proxy = {"http": "http://prxy.ex.media.osaka-cu.ac.jp:10080",
         "https": "http://prxy.ex.media.osaka-cu.ac.jp:10080"}

count = 0
with urllib.request.FancyURLopener(proxy).open(url) as fh:
    for l in fh:
        line = l.decode(encoding).rstrip()
        match = re.search(r"<h[1-6]>", line)
        if match:
            count += 1
print(count)

Exemple #57

0

Afficher le fichier

Fichier : io.py Projet : tpatki/geopm

    def __init__(self, report_path, offset=0):
        super(Report, self).__init__()
        self._path = report_path
        self._offset = offset
        self._version = None
        self._profile_name = None
        self._mode = None
        self._tree_decider = None
        self._leaf_decider = None
        self._power_budget = None
        self._total_runtime = None
        self._total_energy = None
        self._total_ignore_runtime = None
        self._total_mpi_runtime = None
        self._node_name = None

        found_totals = False
        (region_name, region_id, runtime, energy, frequency, mpi_runtime, count) = None, None, None, None, None, None, None
        float_regex = r'([-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?)'

        with open(self._path, 'r') as fid:
            fid.seek(self._offset)
            line = fid.readline()
            while len(line) != 0:
                if self._version is None:
                    match = re.search(r'^##### geopm (\S+) #####$', line)
                    if match is not None:
                        self._version = match.group(1)
                elif self._profile_name is None:
                    match = re.search(r'^Profile: (\S+)$', line)
                    if match is not None:
                        self._profile_name = match.group(1)
                elif self._mode is None:
                    match = re.search(r'^Policy Mode: (\S+)$', line)
                    if match is not None:
                        self._mode = match.group(1)
                elif self._tree_decider is None:
                    match = re.search(r'^Tree Decider: (\S+)$', line)
                    if match is not None:
                        self._tree_decider = match.group(1)
                elif self._leaf_decider is None:
                    match = re.search(r'^Leaf Decider: (\S+)$', line)
                    if match is not None:
                        self._leaf_decider = match.group(1)
                elif self._power_budget is None:
                    match = re.search(r'^Power Budget: (\S+)$', line)
                    if match is not None:
                        self._power_budget = int(match.group(1))
                if self._node_name is None:
                    match = re.search(r'^Host: (\S+)$', line)
                    if match is not None:
                        self._node_name = match.group(1)
                elif region_name is None:
                    match = re.search(r'^Region (\S+) \(([0-9]+)\):', line)
                    if match is not None:
                        region_name = match.group(1)
                        region_id = match.group(2)
                elif runtime is None:
                    match = re.search(r'^\s+runtime.+: ' + float_regex, line)
                    if match is not None:
                        runtime = float(match.group(1))
                elif energy is None:
                    match = re.search(r'^\s+energy.+: ' + float_regex, line)
                    if match is not None:
                        energy = float(match.group(1))
                elif frequency is None:
                    match = re.search(r'^\s+frequency.+: ' + float_regex, line)
                    if match is not None:
                        frequency = float(match.group(1))
                elif mpi_runtime is None:
                    match = re.search(r'^\s+mpi-runtime.+: ' + float_regex, line)
                    if match is not None:
                        mpi_runtime = float(match.group(1))
                elif count is None:
                    match = re.search(r'^\s+count: ' + float_regex, line)
                    if match is not None:
                        count = float(match.group(1))
                        self[region_name] = Region(region_name, region_id, runtime, energy, frequency, mpi_runtime, count)
                        (region_name, region_id, runtime, energy, frequency, mpi_runtime, count) = \
                            None, None, None, None, None, None, None
                if not found_totals:
                    match = re.search(r'^Application Totals:$', line)
                    if match is not None:
                        found_totals = True
                elif self._total_runtime is None:
                    match = re.search(r'\s+runtime.+: ' + float_regex, line)
                    if match is not None:
                        self._total_runtime = float(match.group(1))
                elif self._total_energy is None:
                    match = re.search(r'\s+energy.+: ' + float_regex, line)
                    if match is not None:
                        self._total_energy = float(match.group(1))
                elif self._total_mpi_runtime is None:
                    match = re.search(r'\s+mpi-runtime.+: ' + float_regex, line)
                    if match is not None:
                        self._total_mpi_runtime = float(match.group(1))
                elif self._total_ignore_runtime is None:
                    match = re.search(r'\s+ignore-time.+: ' + float_regex, line)
                    if match is not None:
                        self._total_ignore_runtime = float(match.group(1))
                        break # End of report blob

                line = fid.readline()
            self._offset = fid.tell()

        # Check static vars to see if they were parsed.  if not, use the Report vals
        if self._version is None and Report._version:
            self._version = Report._version
        elif self._version:
            Report._version = self._version
        else:
            raise SyntaxError('Unable to parse version information from report!')
        if self._profile_name is None and Report._profile_name:
            self._profile_name = Report._profile_name
        elif self._profile_name:
            Report._profile_name = self._profile_name
        else:
            raise SyntaxError('Unable to parse name information from report!')
        if self._mode is None and Report._mode:
            self._mode = Report._mode
        elif self._mode:
            Report._mode = self._mode
        else:
            raise SyntaxError('Unable to parse mode information from report!')
        if self._tree_decider is None and Report._tree_decider:
            self._tree_decider = Report._tree_decider
        elif self._tree_decider:
            Report._tree_decider = self._tree_decider
        else:
            raise SyntaxError('Unable to parse tree_decider information from report!')
        if self._leaf_decider is None and Report._leaf_decider:
            self._leaf_decider = Report._leaf_decider
        elif self._leaf_decider:
            Report._leaf_decider = self._leaf_decider
        else:
            raise SyntaxError('Unable to parse leaf_decider information from report!')
        if self._power_budget is None and Report._power_budget:
            self._power_budget = Report._power_budget
        elif self._power_budget:
            Report._power_budget = self._power_budget
        else:
            raise SyntaxError('Unable to parse power_budget information from report!')

        if (len(line) != 0 and (region_name is not None or not found_totals or
            None in (self._total_runtime, self._total_energy, self._total_ignore_runtime, self._total_mpi_runtime))):
            raise SyntaxError('Unable to parse report {} before offset {}: '.format(self._path, self._offset))

Exemple #58

0

Afficher le fichier

Fichier : sockwrap.py Projet : philgras/stanford_corenlp_pywrapper

def assert_no_java(msg=""):
    ps_output = os.popen("ps wux").readlines()
    javalines = [x for x in ps_output if re.search(r'\bbin/java\b', x)]
    print( ''.join(javalines))
    assert len(javalines) == 0, msg

Exemple #59

0

Afficher le fichier

Fichier : test_internals.py Projet : voigtjessica/pandas

def create_block(typestr, placement, item_shape=None, num_offset=0):
    """
    Supported typestr:

        * float, f8, f4, f2
        * int, i8, i4, i2, i1
        * uint, u8, u4, u2, u1
        * complex, c16, c8
        * bool
        * object, string, O
        * datetime, dt, M8[ns], M8[ns, tz]
        * timedelta, td, m8[ns]
        * sparse (SparseArray with fill_value=0.0)
        * sparse_na (SparseArray with fill_value=np.nan)
        * category, category2

    """
    placement = BlockPlacement(placement)
    num_items = len(placement)

    if item_shape is None:
        item_shape = (N, )

    shape = (num_items, ) + item_shape

    mat = get_numeric_mat(shape)

    if typestr in ('float', 'f8', 'f4', 'f2', 'int', 'i8', 'i4', 'i2', 'i1',
                   'uint', 'u8', 'u4', 'u2', 'u1'):
        values = mat.astype(typestr) + num_offset
    elif typestr in ('complex', 'c16', 'c8'):
        values = 1.j * (mat.astype(typestr) + num_offset)
    elif typestr in ('object', 'string', 'O'):
        values = np.reshape(['A%d' % i for i in mat.ravel() + num_offset],
                            shape)
    elif typestr in ('b', 'bool', ):
        values = np.ones(shape, dtype=np.bool_)
    elif typestr in ('datetime', 'dt', 'M8[ns]'):
        values = (mat * 1e9).astype('M8[ns]')
    elif typestr.startswith('M8[ns'):
        # datetime with tz
        m = re.search(r'M8\[ns,\s*(\w+\/?\w*)\]', typestr)
        assert m is not None, "incompatible typestr -> {0}".format(typestr)
        tz = m.groups()[0]
        assert num_items == 1, "must have only 1 num items for a tz-aware"
        values = DatetimeIndex(np.arange(N) * 1e9, tz=tz)
    elif typestr in ('timedelta', 'td', 'm8[ns]'):
        values = (mat * 1).astype('m8[ns]')
    elif typestr in ('category', ):
        values = Categorical([1, 1, 2, 2, 3, 3, 3, 3, 4, 4])
    elif typestr in ('category2', ):
        values = Categorical(['a', 'a', 'a', 'a', 'b', 'b', 'c', 'c', 'c', 'd'
                              ])
    elif typestr in ('sparse', 'sparse_na'):
        # FIXME: doesn't support num_rows != 10
        assert shape[-1] == 10
        assert all(s == 1 for s in shape[:-1])
        if typestr.endswith('_na'):
            fill_value = np.nan
        else:
            fill_value = 0.0
        values = SparseArray([fill_value, fill_value, 1, 2, 3, fill_value,
                              4, 5, fill_value, 6], fill_value=fill_value)
        arr = values.sp_values.view()
        arr += (num_offset - 1)
    else:
        raise ValueError('Unsupported typestr: "%s"' % typestr)

    return make_block(values, placement=placement, ndim=len(shape))

Exemple #60

0

Afficher le fichier

Fichier : map.py Projet : veioenza/advent2016

    return total

filename = "input.txt"
bots = dict()
outputs = dict()
chips = dict()
for i in range(210):
    bots[i] = bot(i)
for i in range(21):
    outputs[i] = bot(i)
inputted_chips = 0
with open(filename) as f:
    straightRGX = r"value (\d+) goes to bot (\d+)"
    conditionalRGX = r"bot (\d+) gives low to (output|bot) (\d+) and high to (output|bot) (\d+)"
    for line in f:
        m = re.search(straightRGX, line)
        if m != None:
            bots[int(m.group(2))].chips.append(int(m.group(1)))
            chips[int(m.group(1))] = [int(m.group(2))]
            inputted_chips += 1
        m = re.search(conditionalRGX, line)
        if m != None:
            bots[int(m.group(1))].low = eval(m.group(2) + "s")[int(m.group(3))]
            bots[int(m.group(1))].high = eval(m.group(4) + "s")[int(m.group(5))]

while (chips_at_outputs(outputs) < inputted_chips):
    for key in bots:
        if len(bots[key].chips) == 2:
            bots[key].low.chips.append(min(bots[key].chips))
            chips[min(bots[key].chips)].append(bots[key].low.number)
            bots[key].high.chips.append(max(bots[key].chips))