def get_phonenumbers(file_name):
    page_phones = []
    page_phonedesc = ""
    page_phonenumb = ""
    f = open(file_name, "r")
    for line in f:
        if re.search("<td>.+</td>", line):
            # if re.search('[0-9]', line):
            if not re.search("911", line):
                page_phonedesc = re.sub("<td.*?>", "", line)
                page_phonedesc = re.sub("</td>.*", "", page_phonedesc)
                page_phonedesc = re.sub("<.*?>", " ", page_phonedesc)
                # print line.strip()

                page_phonenumb = re.sub(".*</td><td.*?>", "", line)
                page_phonenumb = re.sub("<.*?>", "", page_phonenumb)
                # print page_phonenumb.strip()
                # print

                if re.search("http:.+", page_phonenumb) or re.search("https:.+", page_phonenumb):
                    page_phones.append(
                        "<a href='" + page_phonenumb.strip() + "'>" + str(page_phonedesc.strip() + "</a>")
                    )
                else:
                    page_phones.append(
                        str(page_phonedesc.strip() + "<br><span class='srchsub'>" + page_phonenumb.strip() + "</span>")
                    )
    f.close()
    return page_phones
Example #2
1
    def handle(self, app_or_project, name, target=None, **options):
        self.app_or_project = app_or_project
        self.paths_to_remove = []
        self.verbosity = int(options.get("verbosity"))

        # If it's not a valid directory name.
        if not re.search(r"^[_a-zA-Z]\w*$", name):
            # Provide a smart error message, depending on the error.
            if not re.search(r"^[_a-zA-Z]", name):
                message = "make sure the name begins " "with a letter or underscore"
            else:
                message = "use only numbers, letters and underscores"
            raise CommandError("%r is not a valid %s name. Please %s." % (name, app_or_project, message))

        # if some directory is given, make sure it's nicely expanded
        if target is None:
            target = os.getcwd()
        else:
            target = path.expanduser(target)

        top_dir = path.join(target, name)
        try:
            os.makedirs(top_dir)
        except OSError, e:
            raise CommandError(e)
Example #3
1
        def check_date_format(date):
            """
            Checks to see whether dates are in proper datetime format and converts times in ##/##/#### format to
            datetime or raises an error when it encounters a different format
            """
            # check if date is already in the proper format
            datetime_pattern = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$")

            # regex and its accompanying strptime format
            misc_date_formats = (
                (re.compile(r"\d{2}/\d{2}/\d{4}\+\d{2}:\d{2}T\d{2}:\d{2}:\d{2}$"), "%m/%d/%Y+%H:%MT%H:%M:%S"),
                (re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$"), "%Y-%m-%dT%H:%M:%S"),
                (re.compile(r"\d{4}/\d{2}/\d{2}$"), "%d/%m/%YT%H:%M:%S"),
                (re.compile(r"\d{4}/\d{2}/\d{2}$"), "%/%d/%YT%H:%M:%S"),
                (re.compile(r"\d{2}/\d{2}/\d{4}\+\d{2}:\d{2}$"), "%m/%d/%Y+%H:%M"),
                (re.compile(r"\d{4}-\d{2}-\d{2}$"), "%Y-%m-%d"),
                (re.compile(r"\d{2}/\d{2}/\d{4}$"), "%d/%m/%Y"),
                (re.compile(r"\d{2}/\d{2}/\d{4}$"), "%m/%d/%Y"),
            )

            matched = re.search(datetime_pattern, date)
            if matched:
                return date
            else:
                for date_format_tuple in misc_date_formats:
                    matched = re.search(date_format_tuple[0], date)
                    if matched:
                        try:
                            timestruct = time.strptime(date, date_format_tuple[1])
                            timedatetime = datetime.datetime.fromtimestamp(time.mktime(timestruct))
                            return timedatetime.strftime("%Y-%m-%dT%H:%M:%S")
                        except ValueError:
                            continue
                else:
                    raise TypeError("unknown date format given: %s" % date)
Example #4
1
def processResponse(page, responseHeaders):
    kb.processResponseCounter += 1

    page = page or ""

    parseResponse(page, responseHeaders if kb.processResponseCounter < PARSE_HEADERS_LIMIT else None)

    if conf.parseErrors:
        msg = extractErrorMessage(page)

        if msg:
            logger.warning("parsed DBMS error message: '%s'" % msg)

    if kb.originalPage is None:
        for regex in (EVENTVALIDATION_REGEX, VIEWSTATE_REGEX):
            match = re.search(regex, page)
            if match and PLACE.POST in conf.parameters:
                name, value = match.groups()
                if PLACE.POST in conf.paramDict and name in conf.paramDict[PLACE.POST]:
                    if conf.paramDict[PLACE.POST][name] in page:
                        continue
                    conf.paramDict[PLACE.POST][name] = value
                conf.parameters[PLACE.POST] = re.sub(
                    "(?i)(%s=)[^&]+" % name, r"\g<1>%s" % value, conf.parameters[PLACE.POST]
                )

    if re.search(BLOCKED_IP_REGEX, page):
        errMsg = "it appears that you have been blocked by the target server"
        singleTimeLogMessage(errMsg, logging.ERROR)
    def _get_episode_url(self, show_url, video):
        force_title = scraper_utils.force_title(video)
        title_fallback = kodi.get_setting("title-fallback") == "true"
        norm_title = scraper_utils.normalize_title(video.ep_title)
        page_url = [show_url]
        too_old = False
        while page_url and not too_old:
            url = urlparse.urljoin(self.base_url, page_url[0])
            html = self._http_get(url, require_debrid=True, cache_limit=1)
            posts = dom_parser.parse_dom(html, "div", {"id": "post-\d+"})
            for post in posts:
                if self.__too_old(post):
                    too_old = True
                    break
                if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                    match = re.search('<a\s+href="([^"]+)[^>]+>(.*?)</a>', post)
                    if match:
                        url, title = match.groups()
                        if not force_title:
                            if scraper_utils.release_check(video, title, require_title=False):
                                return scraper_utils.pathify_url(url)
                        else:
                            if title_fallback and norm_title:
                                match = re.search("</strong>(.*?)</p>", post)
                                if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                                    return scraper_utils.pathify_url(url)

            page_url = dom_parser.parse_dom(html, "a", {"class": "nextpostslink"}, ret="href")
def getWeather(place):
    if (re.match("[a-zA-Z]+,* [a-zA-Z]{2}", place)) != None:
        city = re.search("[a-zA-Z]+ ", place).string.strip()  # Doesn't actually seperate city/state. Needs to!
        state = re.search(" [a-zA-Z]{2}", place).string.strip()

        print "city, state = " + city + ", " + state
    elif re.match("\d{5}", place) != None:
        zip = place
        print "zip code = ", zip
    else:
        return "Please enter a real location"

    if "city" in locals() and "state" in locals():
        content = urllib.urlopen("http://www.rssweather.com/wx/us/" + state + "/" + city + "/rss.php")
    else:
        content = urllib.urlopen("http://www.rssweather.com/zipcode/" + zip + "/wx.php")

    xmldoc = minidom.parse(content)

    try:
        pubDate = xmldoc.getElementsByTagName("pubDate")[0].firstChild.data
    except:
        return "I can't find the weather for that city."
    pubDate = pubDate.split(" ")[4] + " GMT " + pubDate.split(" ")[5]
    summary = xmldoc.getElementsByTagName("description")[1].firstChild.data
    reply = summary + " (" + pubDate + ")"

    return reply
Example #7
1
    def indent_code(self, code):
        """Accepts a string of code or a list of code lines"""

        # code mostly copied from ccode
        if isinstance(code, string_types):
            code_lines = self.indent_code(code.splitlines(True))
            return "".join(code_lines)

        tab = "  "
        inc_regex = ("^function ", "^if ", "^elseif ", "^else$", "^for ")
        dec_regex = ("^end$", "^elseif ", "^else$")

        # pre-strip left-space from the code
        code = [line.lstrip(" \t") for line in code]

        increase = [int(any([search(re, line) for re in inc_regex])) for line in code]
        decrease = [int(any([search(re, line) for re in dec_regex])) for line in code]

        pretty = []
        level = 0
        for n, line in enumerate(code):
            if line == "" or line == "\n":
                pretty.append(line)
                continue
            level -= decrease[n]
            pretty.append("%s%s" % (tab * level, line))
            level += increase[n]
        return pretty
Example #8
0
    def get_versioning_status(self, headers=None):
        """
        Returns the current status of versioning on the bucket.

        :rtype: dict
        :returns: A dictionary containing a key named 'Versioning'
                  that can have a value of either Enabled, Disabled,
                  or Suspended. Also, if MFADelete has ever been enabled
                  on the bucket, the dictionary will contain a key
                  named 'MFADelete' which will have a value of either
                  Enabled or Suspended.
        """
        response = self.connection.make_request("GET", self.name, query_args="versioning", headers=headers)
        body = response.read()
        boto.log.debug(body)
        if response.status == 200:
            d = {}
            ver = re.search(self.VersionRE, body)
            if ver:
                d["Versioning"] = ver.group(1)
            mfa = re.search(self.MFADeleteRE, body)
            if mfa:
                d["MfaDelete"] = mfa.group(1)
            return d
        else:
            raise self.connection.provider.storage_response_error(response.status, response.reason, body)
 def get_domain_name(self, url, headers, count):
     ctest = 0
     while True:
         if ctest == count:
             return False
         else:
             if ctest != 0:
                 ctest += 1
                 print "Retry %s times" % ctest
             else:
                 ctest += 1
             try:
                 domain_data = requests.get(
                     url + "/console.portal?_nfpb=true&_pageLabel=CoreServerServerTablePage",
                     headers=headers,
                     cookies=self.cookies,
                     timeout=10,
                 )
                 domain_soup = BeautifulSoup(domain_data.text)
                 for name in domain_soup.find_all("a"):
                     if name.get("href") != None:
                         name = re.search(r"\:Name=[\w]*\,Type\=Domain", urllib.unquote(name.get("href")))
                         if name:
                             self.domain_name = name.group()
                             break
                 self.domain_name = re.search(r"Name=[\w]*", self.domain_name)
                 self.domain_name = self.domain_name.group()[5:]
                 print "DomainName:%s\r\n" % self.domain_name
                 return True
             except:
                 print "get_domain_name Error!\n"
                 if ctest == 3:
                     f = open("error.txt", "a")
                     f.write("get_domain_name Error! " + url + "\n")
                     f.close()
 def get_path(self, url, warname, count):
     ctest = 0
     while True:
         if ctest == count:
             return False
         else:
             if ctest != 0:
                 ctest += 1
                 print "Retry %s times" % ctest
             else:
                 ctest += 1
             try:
                 path_exp = warname + " [^ -~]*[ -~]*"
                 path_sea = re.search(path_exp, self.path_data.text)
                 if path_sea:
                     path = re.search(r"[a-zA-Z]:(\\[\w .]+)+", path_sea.group())  # Windows
                     if path:
                         self.path = path.group()
                         self.system = 1
                         print "Target system: Windows\n"
                     else:
                         path = re.search(r"(/[\w .]+)+", path_sea.group())  # Linux
                         if path:
                             self.path = path.group()
                             self.system = 2
                             print "Target system: Linux\n"
                 time.sleep(1)
                 print "Upload Path:%s\r\n" % self.path
                 return True
             except:
                 print "get_path Error!\n"
                 if ctest == 3:
                     f = open("error.txt", "a")
                     f.write("get_path Error! " + url + "\n")
                     f.close()
Example #11
0
def setCookie(srDomain):
    from t0mm0.common.net import Net as net

    cookieExpired = False
    if os.path.exists(cookie_file):
        try:
            cookie = open(cookie_file).read()
            expire = re.search('expires="(.*?)"', cookie, re.I)
            if expire:
                expire = str(expire.group(1))
                import time

                if time.time() > time.mktime(time.strptime(expire, "%Y-%m-%d %H:%M:%SZ")):
                    cookieExpired = True
        except:
            cookieExpired = True
        loggedin = re.search("SrLoggedIn", cookie, re.I)
    if not os.path.exists(cookie_file) or cookieExpired or (not loggedin and user != "" and passw != ""):
        link = main.OPENURL(srDomain + "/sign_in")
        match = re.findall('<meta content="([^<]+)" name="csrf-token" />', link, re.DOTALL)
        token = match[0]
        net().http_GET(srDomain + "/sign_in")
        net().http_POST(
            srDomain + "/sign_in", {"authenticity_token": token, "user[email]": user, "user[password]": passw}
        )
        net().save_cookies(cookie_file)
    else:
        net().set_cookies(cookie_file)
Example #12
0
    def loadAccountInfo(self, user, req):
        validuntil = None
        trafficleft = None
        premium = None

        html = req.load("http://uploading.com/")

        premium = re.search(self.PREMIUM_PATTERN, html) is None

        m = re.search(self.VALID_UNTIL_PATTERN, html)
        if m:
            expiredate = m.group(1).strip()
            self.logDebug("Expire date: " + expiredate)

            try:
                validuntil = time.mktime(time.strptime(expiredate, "%b %d, %Y"))

            except Exception, e:
                self.logError(e)

            else:
                if validuntil > time.mktime(time.gmtime()):
                    premium = True
                else:
                    premium = False
                    validuntil = None
Example #13
0
 def __parsingVariablePointer__(self, Global, CrossReference):
     index, fileList, found = 0, None, False
     indentValue = self.__getDefaultIndentLevel__(self._curFile, self.DEFAULT_NAME_INDENT)
     for index in range(len(self._lines)):
         if not found:
             if re.search(
                 "^ {%d,%d}FILE  ORDER  PREFIX    LAYGO  MESSAGE$" % (self.DEFAULT_NAME_INDENT, indentValue),
                 self._lines[index],
             ):
                 found = True
             continue
         else:
             if re.search("^ {%d,}$" % indentValue, self._lines[index]):
                 break
             else:
                 result = re.search("^ +(?P<File>[0-9\.]+) +", self._lines[index])
                 if result:
                     filePointedTo = CrossReference.getGlobalByFileNo(result.group("File"))
                     if not filePointedTo:
                         # log an error for now, will handle this case later
                         logger.error("INVALID File! File is %s, Global is %s" % (result.group("File"), Global))
                         continue
                     if not fileList:
                         fileList = []
                     fileList.append(filePointedTo)
     self._field.setPointedToFiles(fileList)
Example #14
0
def harvest_output(outtext):
    """Function to separate portions of a PSI4 output file *outtext*.

    """
    psivar = PreservingDict()
    psivar_coord = None
    psivar_grad = None

    NUMBER = "((?:[-+]?\\d*\\.\\d+(?:[DdEe][-+]?\\d+)?)|(?:[-+]?\\d+\\.\\d*(?:[DdEe][-+]?\\d+)?))"

    # Process PsiVariables
    mobj = re.search(
        r"^(?:  Variable Map:)\s*" + r"^\s*(?:-+)\s*" + r"^(.*?)" + r"^(?:\s*?)$", outtext, re.MULTILINE | re.DOTALL
    )

    if mobj:
        for pv in mobj.group(1).split("\n"):
            submobj = re.search(r"^\s+" + r'"(.+?)"' + r"\s+=>\s+" + NUMBER + r"\s*$", pv)
            if submobj:
                psivar["%s" % (submobj.group(1))] = submobj.group(2)

    # Process Completion
    mobj = re.search(r"PSI4 exiting successfully. Buy a developer a beer!", outtext, re.MULTILINE)
    if mobj:
        psivar["SUCCESS"] = True

    return psivar, psivar_coord, psivar_grad
Example #15
0
 def containsRegexp(self, regexp, flags):
     """
     Returns whether this widget's passage contains a regexp.
     """
     return (
         re.search(regexp, self.passage.title, flags) != None or re.search(regexp, self.passage.text, flags) != None
     )
Example #16
0
 def find_in_2011style_dir(version):
     # The 2011 (compiler v12) dirs are inconsistent, so just redo the search from
     # get_all_compiler_versions and look for a match (search the newest form first)
     top = None
     for d in glob.glob("/opt/intel/composer_xe_*"):
         # Typical dir here is /opt/intel/composer_xe_2011_sp1.11.344
         # The _sp1 is useless, the installers are named 2011.9.x, 2011.10.x, 2011.11.x
         m = re.search(r"([0-9]{0,4})(?:_sp\d*)?\.([0-9][0-9.]*)$", d)
         if m:
             cur_ver = "%s.%s" % (m.group(1), m.group(2))
             if cur_ver == version and (
                 os.path.exists(os.path.join(d, "bin", "ia32", "icc"))
                 or os.path.exists(os.path.join(d, "bin", "intel64", "icc"))
             ):
                 top = d
                 break
     if not top:
         for d in glob.glob("/opt/intel/composerxe-*"):
             # Typical dir here is /opt/intel/composerxe-2011.4.184
             m = re.search(r"([0-9][0-9.]*)$", d)
             if (
                 m
                 and m.group(1) == version
                 and (
                     os.path.exists(os.path.join(d, "bin", "ia32", "icc"))
                     or os.path.exists(os.path.join(d, "bin", "intel64", "icc"))
                 )
             ):
                 top = d
                 break
     return top
    def parse_class_and_level_info(string_to_parse, default_class_name=None):
        """Returns a dict containing the class and level identified in the string.
        default_class_name - If no class is found in the string, raise an exception or use this class name.

        dict contains:
        class name
        level
        """

        # Search for the level string.
        level_regex = "level.?(\d+)"
        matches = re.search(level_regex, string_to_parse)

        # If none are found, raise an error.
        if matches == None:
            print("No level found in %s " % string_to_parse)
            raise TypeError
        level = matches.group(1)

        # Search for a class name.
        class_name_regex = "^.+level"
        matches = re.search(class_name_regex, string_to_parse)

        # If none are found, use the default or raise an error.
        if matches == None:
            if default_class_name == None:
                print("No class name found in %s, and no default provided. " % string_to_parse)
                raise TypeError
            else:
                class_name = default_class_name
        else:
            class_name = matches.group(1)
        return {"class name": class_name, "level": level}
Example #18
0
    def fTable(self, match):
        tatts = self.pba(match.group(1), "table")
        rows = []
        for row in [x for x in match.group(2).split("\n") if x]:
            rmtch = re.search(r"^(%s%s\. )(.*)" % (self.a, self.c), row.lstrip())
            if rmtch:
                ratts = self.pba(rmtch.group(1), "tr")
                row = rmtch.group(2)
            else:
                ratts = ""

            cells = []
            for cell in row.split("|"):
                ctyp = "d"
                if re.search(r"^_", cell):
                    ctyp = "h"
                cmtch = re.search(r"^(_?%s%s%s\. )(.*)" % (self.s, self.a, self.c), cell)
                if cmtch:
                    catts = self.pba(cmtch.group(1), "td")
                    cell = cmtch.group(2)
                else:
                    catts = ""

                cell = self.graf(self.span(cell))

                if cell.strip() != "":
                    cells.append("\t\t\t<t%s%s>%s</t%s>" % (ctyp, catts, cell, ctyp))
            rows.append("\t\t<tr%s>\n%s\n\t\t</tr>" % (ratts, "\n".join(cells)))
            cells = []
            catts = None
        return "\t<table%s>\n%s\n\t</table>\n\n" % (tatts, "\n".join(rows))
Example #19
0
def get_gpu():
    """Returns video device as listed by WMI. See get_gpu()."""
    try:
        import win32com.client  # pylint: disable=F0401
    except ImportError:
        # win32com is included in pywin32, which is an optional package that is
        # installed by Swarming devs. If you find yourself needing it to run without
        # pywin32, for example in cygwin, please send us a CL with the
        # implementation that doesn't use pywin32.
        return None, None

    wmi_service = win32com.client.Dispatch("WbemScripting.SWbemLocator")
    wbem = wmi_service.ConnectServer(".", "root\\cimv2")
    dimensions = set()
    state = set()
    # https://msdn.microsoft.com/library/aa394512.aspx
    for device in wbem.ExecQuery("SELECT * FROM Win32_VideoController"):
        vp = device.VideoProcessor
        if vp:
            state.add(vp)

        # The string looks like:
        #  PCI\VEN_15AD&DEV_0405&SUBSYS_040515AD&REV_00\3&2B8E0B4B&0&78
        pnp_string = device.PNPDeviceID
        ven_id = u"UNKNOWN"
        dev_id = u"UNKNOWN"
        match = re.search(r"VEN_([0-9A-F]{4})", pnp_string)
        if match:
            ven_id = match.group(1).lower()
        match = re.search(r"DEV_([0-9A-F]{4})", pnp_string)
        if match:
            dev_id = match.group(1).lower()
        dimensions.add(unicode(ven_id))
        dimensions.add(u"%s:%s" % (ven_id, dev_id))
    return sorted(dimensions), sorted(state)
Example #20
0
    def fList(self, match):
        text = match.group(0).split("\n")
        result = []
        lists = []
        for i, line in enumerate(text):
            try:
                nextline = text[i + 1]
            except IndexError:
                nextline = ""

            m = re.search(r"^([#*]+)(%s%s) (.*)$" % (self.a, self.c), line, re.S)
            if m:
                tl, atts, content = m.groups()
                nl = ""
                nm = re.search(r"^([#*]+)\s.*", nextline)
                if nm:
                    nl = nm.group(1)
                if tl not in lists:
                    lists.append(tl)
                    atts = self.pba(atts)
                    line = "\t<%sl%s>\n\t\t<li>%s" % (self.lT(tl), atts, self.graf(content))
                else:
                    line = "\t\t<li>" + self.graf(content)

                if len(nl) <= len(tl):
                    line = line + "</li>"
                for k in reversed(lists):
                    if len(k) > len(nl):
                        line = line + "\n\t</%sl>" % self.lT(k)
                        if len(k) > 1:
                            line = line + "</li>"
                        lists.remove(k)

            result.append(line)
        return "\n".join(result)
Example #21
0
def set_attach_to_forum(cookie, site="http://m.dimonvideo.ru/"):
    page = send_mail("{}settings.php?op=5&id=3".format(site), cookie, [])

    if page:
        lst = page.split("<tr>")[1:-1]
        L = []

        for i in lst:
            mo = re.search(r"name *= *'([^']+)'", i)
            if not mo:
                continue
            name = mo.group(1)
            mo = re.search(r"checked *= *'([^']+)'", i)

            if mo:
                # Опция уже включена на сайте.
                if name == "forumupl" and mo.group(1) == "checked":
                    return True
                L.append((name, "1"))

        L.extend([("forumupl", "1"), ("op", "1"), ("id", "3"), ("submit", "Сохранить")])
        page = send_mail("{0}settings.php?op=5&id=3".format(site), L)

    if page is None:
        return None  # ошибка соединения
    elif page == 0:
        return None  # ошибка авторизации
    else:
        page = page.split("<h4")
        if len(page) < 2:
            raise
        else:
            return page[1].split(">")[1].split("</h")[0].decode("cp1251").encode("utf-8")
 def __isGoldSection__(self, input, curLine):
     if re.search("^-+$", curLine):
         nextLine = input.readline()
         if nextLine.startswith("VA Enterprise VistA"):
             if re.search("^-+$", input.readline()):
                 return True
     return False
Example #23
0
    def _get_streams(self):
        channelname = urlparse(self.url).path.rstrip("/").rpartition("/")[-1].lower()

        self.logger.debug("Fetching stream info")

        headers = {"Referer": self.url}

        res = urlget(self.PlayerURL.format(channelname), headers=headers)
        match = re.search("'FlashVars', '(id=\d+)&s=(.+?)&", res.text)
        if not match:
            raise NoStreamsError(self.url)

        channelname = "{0}?{1}".format(match.group(2), match.group(1))
        res = urlget(self.BalancerURL, headers=headers)

        match = re.search("redirect=(.+)", res.text)
        if not match:
            raise PluginError("Error retrieving RTMP address from loadbalancer")

        rtmp = match.group(1)
        streams = {}
        streams["live"] = RTMPStream(
            self.session,
            {
                "rtmp": "rtmp://{0}/live/{1}".format(rtmp, channelname),
                "pageUrl": self.url,
                "swfVfy": self.SWFURL,
                "conn": "S:OK",
                "live": True,
            },
        )

        return streams
    def search(self, video_type, title, year, season=""):
        results = []
        page_url = urlparse.urljoin(self.base_url, "/tvseries/index.php?&page=1")
        while page_url:
            html = self._http_get(page_url, cache_limit=48)
            html = re.sub("<!--.*?-->", "", html)
            norm_title = scraper_utils.normalize_title(title)
            for td in dom_parser.parse_dom(html, "td", {"class": "topic_content"}):
                match_url = re.search('href="([^"]+)', td)
                match_title_year = dom_parser.parse_dom(td, "img", ret="alt")
                if match_url and match_title_year:
                    match_url = match_url.group(1)
                    if not match_url.startswith("/"):
                        match_url = "/tvseries/" + match_url
                    match_title, match_year = scraper_utils.extra_year(match_title_year[0])
                    if norm_title in scraper_utils.normalize_title(match_title):
                        result = {
                            "url": scraper_utils.pathify_url(match_url),
                            "title": scraper_utils.cleanse_title(match_title),
                            "year": match_year,
                        }
                        results.append(result)

            match = re.search('href="([^"]+)[^>]*>>', html)
            if match:
                page_url = urlparse.urljoin(self.base_url, match.group(1))
            else:
                page_url = ""

        return results
Example #25
0
 def __init__(self, name, src, dst, features=".*"):
     """
     :param name: A unique name for this weight group.
     :param src: The set of source tags that should be used for
         this weight group, specified as either a list of state
         names or a regular expression.
     :param dst: The set of destination tags that should be used
         for this weight group, specified as either a list of state
         names or a regular expression.
     :param features: The set of input feature that should be used
         for this weight group, specified as either a list of
         feature names or a regular expression.  WARNING: currently,
         this regexp is passed streight to java -- i.e., it must
         be a java-style regexp!
     """
     if re.search("\s", name):
         raise ValueError("weight group name may not " "contain whitespace.")
     if re.search('"', name):
         raise ValueError("weight group name may not contain '\"'.")
     self.name = name
     self.src = src
     self.dst = dst
     self.features = features
     self._src_match_cache = {}
     self._dst_match_cache = {}
    def test_StencilInteriorIter_and_StencilNeighborIter(self):
        import re

        argdict = {"in_grid": self.in_grid, "out_grid": self.out_grid}
        output_as_string = ast.dump(StencilKernel.StencilProcessAST(argdict).visit(self.kernel.kernel_ast))
        self.assertTrue(re.search("StencilInteriorIter", output_as_string))
        self.assertTrue(re.search("StencilNeighborIter", output_as_string))
Example #27
0
File: clang.py Project: d-tk/spack
    def default_version(self, comp):
        """The '--version' option works for clang compilers.
           On most platforms, output looks like this::

               clang version 3.1 (trunk 149096)
               Target: x86_64-unknown-linux-gnu
               Thread model: posix

          On Mac OS X, it looks like this:

               Apple LLVM version 7.0.2 (clang-700.1.81)
               Target: x86_64-apple-darwin15.2.0
               Thread model: posix

        """
        if comp not in cpr._version_cache:
            compiler = Executable(comp)
            output = compiler("--version", output=str, error=str)

            ver = "unknown"
            match = re.search(r"^Apple LLVM version ([^ )]+)", output)
            if match:
                # Apple's LLVM compiler has its own versions, so suffix them.
                ver = match.group(1) + "-apple"
            else:
                # Normal clang compiler versions are left as-is
                match = re.search(r"^clang version ([^ )]+)", output)
                if match:
                    ver = match.group(1)

            cpr._version_cache[comp] = ver

        return cpr._version_cache[comp]
    def test_StencilConvertAST_array_macro(self):
        import re

        result = StencilKernel.StencilConvertAST(self.argdict).gen_array_macro_definition("in_grid")

        self.assertTrue(re.search("array_macro", str(result)))
        self.assertTrue(re.search("#define", str(result)))
Example #29
0
def _CheckLGTMsForPublicAPI(input_api, output_api):
    """Check LGTMs for public API changes.

  For public API files make sure there is an LGTM from the list of owners in
  PUBLIC_API_OWNERS.
  """
    results = []
    requires_owner_check = False
    for affected_file in input_api.AffectedFiles():
        affected_file_path = affected_file.LocalPath()
        file_path, file_ext = os.path.splitext(affected_file_path)
        # We only care about files that end in .h and are under the top-level
        # include dir.
        if file_ext == ".h" and "include" == file_path.split(os.path.sep)[0]:
            requires_owner_check = True

    if not requires_owner_check:
        return results

    lgtm_from_owner = False
    issue = input_api.change.issue
    if issue and input_api.rietveld:
        issue_properties = input_api.rietveld.get_issue_properties(issue=int(issue), messages=True)
        if re.match(REVERT_CL_SUBJECT_PREFIX, issue_properties["subject"], re.I):
            # It is a revert CL, ignore the public api owners check.
            return results

        if re.search(r"^COMMIT=false$", issue_properties["description"], re.M):
            # Ignore public api owners check for COMMIT=false CLs since they are not
            # going to be committed.
            return results

        match = re.search(r"^TBR=(.*)$", issue_properties["description"], re.M)
        if match:
            tbr_entries = match.group(1).strip().split(",")
            for owner in PUBLIC_API_OWNERS:
                if owner in tbr_entries or owner.split("@")[0] in tbr_entries:
                    # If an owner is specified in the TBR= line then ignore the public
                    # api owners check.
                    return results

        if issue_properties["owner_email"] in PUBLIC_API_OWNERS:
            # An owner created the CL that is an automatic LGTM.
            lgtm_from_owner = True

        messages = issue_properties.get("messages")
        if messages:
            for message in messages:
                if message["sender"] in PUBLIC_API_OWNERS and "lgtm" in message["text"].lower():
                    # Found an lgtm in a message from an owner.
                    lgtm_from_owner = True
                    break

    if not lgtm_from_owner:
        results.append(
            output_api.PresubmitError(
                "Since the CL is editing public API, you must have an LGTM from " "one of: %s" % str(PUBLIC_API_OWNERS)
            )
        )
    return results
def get_player_var_set(url, var_set):
    tmp_html = web.URL(url).download(cached=False)
    regM = re.search('href="(/Matches.{1,128}?)".{1,10}Match Centre', tmp_html, re.DOTALL)
    match_centre_url = "http://www.whoscored.com" + regM.group(1)
    player_stats_url = match_centre_url.replace("Live", "LiveStatistics")

    html = web.URL(player_stats_url).download(cached=False)
    regM = re.search("var initialData = (.*?);", html, re.DOTALL)
    data = regM.group(1)

    while ",," in data:
        data = data.replace(",,", ",' ',")

    data = ast.literal_eval(data)

    match_overview = data[0][0]
    match_details = data[0][1]

    print match_overview[2], match_overview[3]

    for team in match_details:
        player_stats = team[4]

        for p in player_stats:
            for var in p[3][0]:
                var_set.add(var[0])