Example #1
1
def InsertGameIDs():
    conn = sqlite3.connect("db.sqlite3")
    cur = conn.cursor()
    gameids = getgameid()
    para = "insert into gameid values(?,?,?,?,?,?,?,?)"
    pattern1 = re.compile(r"\d+")
    pattern2 = re.compile(r"(\d+)-(\d+)-(\d+)")
    for i in range(0, len(gameids)):
        date = re.findall(pattern1, gameids["GAMECODE"][i])[0]
        times = re.findall(pattern2, gameids["GAME_DATE_EST"][i])[0]
        year = times[0]
        month = times[1]
        day = times[2]
        fillset = [
            gameids["GAME_ID"][i],
            date,
            year,
            month,
            day,
            gameids["HOME_TEAM_ID"][i],
            gameids["VISITOR_TEAM_ID"][i],
            gameids["SEASON"][i],
        ]
        cur.execute(para, fillset)
    conn.commit()
    cur.close()
    conn.close()
def find_research(url, area):
    research = []
    flag = False

    # Get webpage contents
    try:
        web_page = urllib.urlopen(url)
    except IOError:
        print "Not a valid URL."
    else:
        content = web_page.read()
        web_page.close()

        # Get links to research areas
        links = [item for item in re.findall('<a href="../../research[\w./-]+.shtml">', content)]

        # Get research area text
        for link in links:
            research_area = re.findall(link + "[\w\s./-]+</a>", content)
            for areas in research_area:
                areas = areas.replace(link, "")
                areas = areas.replace("</a>", "")
                research.append(areas)

        # Attempt to find research area in area's obtained from webpage.
        for each in research:
            # .title() in case of user error
            if area.title() == each:
                flag = True

        # Return found flag
        return flag
Example #3
1
    def __call__(self):
        page = Page_Market_App(_("Application"))

        # Figure app
        tmp = re.findall("^%s/(.+)$" % (URL_APP), CTK.request.url)
        if not tmp:
            page += CTK.RawHTML("<h2>%s</h2>" % (_("Application not found")))
            return page.Render()

        app_name = tmp[0]

        # Menu
        self.menu = Menu([CTK.Link(URL_MAIN, CTK.RawHTML(_("Market Home")))])
        page.mainarea += self.menu

        referer = CTK.request.headers.get("HTTP_REFERER", "")
        ref_search = re.findall("%s/(.+)" % (URL_SEARCH), referer)
        ref_category = re.findall("%s/([\w ]+)" % (URL_CATEGORY), referer)

        if ref_search:
            self.menu += CTK.Link(
                "%s/%s" % (URL_SEARCH, ref_search[0]),
                CTK.RawHTML("%s %s" % (_("Search"), CTK.unescape_html(ref_search[0]))),
            )
        elif ref_category:
            self.menu += CTK.Link(
                "%s/%s" % (URL_CATEGORY, ref_category[0]),
                CTK.RawHTML("%s %s" % (_("Category"), CTK.unescape_html(ref_category[0]))),
            )

        # App Info
        page.mainarea += AppInfo(app_name)

        # Final render
        return page.Render()
Example #4
1
def ntk_get_result(ans, field):
    s1 = re.findall("^(\S+)\s+(\S.*)", ans)
    s = [s1[0][0]]
    s.extend(re.findall('("[^"]*"|\([^\)]*\)|[^"(),\s]*)(?:\s*[,]|\s*$)', s1[0][1]))
    if len(s) < field:
        raise ValueError
    return s[field]
Example #5
1
    def parseResponse(self):

        self.engWords = []
        self.deWords = []

        engLine = deLine = ""

        # Split lines
        lines = self.Response.split("\n")

        for l in lines:
            if l.find("var c1Arr") >= 0:
                engLine = l
            elif l.find("var c2Arr") >= 0:
                deLine = l

        if not engLine or not deLine:
            return False

        else:
            # Regex
            # pattern = "\"[A-Za-z \.()\-\?ßäöüÄÖÜéáíçÇâêî\']*\""
            pattern = '"[^,]+"'

            # Return list of matching strings
            self.engWords = re.findall(pattern, engLine)
            self.deWords = re.findall(pattern, deLine)
Example #6
1
def getcallback_functions(filename):
    f = open(filename, "r")

    stream = f.read()

    ouput = re.findall(regex.CALLBACK_FUNCTIONS, stream)

    output = re.findall(regex.CALLBACK_FUNCTIONS, stream)
    functionData = {}
    functionCalls = {}

    for retType, functionName, arguments in output:
        # build a list of tuples from the arguments string
        # [(name, type), (name, type), ...]
        argumentList = arguments.split(",")
        retType = retType.strip()
        for i, argument in enumerate(argumentList):
            argType, argName = argument.strip().rsplit(" ", 1)
            argumentList[i] = (argType, argName)

        call = buildcall(functionName, argumentList, retType)

        functionCalls[functionName] = call
        functionData[functionName] = (functionName, argumentList, retType)

    return functionCalls, functionData
def _get_game_data(game_url):
    print game_url
    gamedata = {}
    gamedata["genre"] = ""
    gamedata["release"] = ""
    gamedata["studio"] = ""
    gamedata["plot"] = ""
    try:
        req = urllib2.Request(game_url)
        req.add_unredirected_header(
            "User-Agent",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31",
        )
        f = urllib2.urlopen(req)
        page = f.read().replace("\r\n", "")
        game_genre = re.findall(
            '<li class="crumb top-crumb"><a href="/(.*?)">(.*?)</a></li><li class="crumb"><a href="/(.*?)/list-(.*?)">(.*?)</a></li>',
            page,
        )
        if game_genre:
            gamedata["genre"] = game_genre[0][4]
        game_release = re.findall('Release: <a href="(.*?)">(.*?) &raquo;</a>', page)
        if game_release:
            gamedata["release"] = game_release[0][1][-4:]
        game_studio = re.findall('<li><a href="/features/company/(.*?)">(.*?)</a>', page)
        if game_studio:
            p = re.compile(r"<.*?>")
            gamedata["studio"] = p.sub("", game_studio[0][1])
        game_plot = re.findall('Description</h2></div><div class="body game_desc"><div class="desc">(.*?)</div>', page)
        if game_plot:
            gamedata["plot"] = unescape(game_plot[0])
        return gamedata
    except:
        return gamedata
Example #8
0
def bale_CRITs_indicator(base_url, data, indicator_que):
    """ One thread of adding indicators to CRITs"""
    while not indicator_que.empty():
        indicator = indicator_que.get()
        if indicator[1] == 'IPv4':
            # using the IP API
            url = base_url + 'ips/'
            data['add_indicator'] = "true"
            data['ip'] = indicator[0]
            data['ip_type'] = 'Address - ipv4-addr'
            data['reference'] = indicator[3]
            # getting the source automatically:
            source = re.findall(r'\/\/(.*?)\/', data['reference'])
            if source:
                data['source'] = source[0]
            res = requests.post(url, data=data, verify=False)
            if not res.status_code in [201, 200, 400]:
                logger.info("Issues with adding: %s" % data['ip'])
        elif indicator[1] == "FQDN":
            # using the Domain API
            url = base_url + 'domains/'
            data['add_indicator'] = "true"
            data['domain'] = indicator[0]
            data['reference'] = indicator[3]
            # getting the source automatically:
            source = re.findall(r'\/\/(.*?)\/', data['reference'])
            if source:
                data['source'] = source[0]
            res = requests.post(url, data=data, verify=False)
            if not res.status_code in [201, 200, 400]:
                logger.info("Issues with adding: %s" % data['domain'])
        else:
            logger.info("don't yet know what to do with: %s[%s]" % (indicator[1], indicator[0]))
Example #9
0
    def test_error_text_inline(self):
        form = TestForm({"email": "invalidemail"})
        form.helper = FormHelper()
        layout = Layout(
            AppendedText("first_name", "wat"),
            PrependedText("email", "@"),
            PrependedAppendedText("last_name", "@", "wat"),
        )
        form.helper.layout = layout
        form.is_valid()
        html = render_crispy_form(form)

        help_class = "help-inline"
        if self.current_template_pack == "bootstrap3":
            help_class = "help-block"

        matches = re.findall('<span id="error_\d_\w*" class="%s"' % help_class, html, re.MULTILINE)
        self.assertEqual(len(matches), 3)

        form = TestForm({"email": "invalidemail"})
        form.helper = FormHelper()
        form.helper.layout = layout
        form.helper.error_text_inline = False
        html = render_crispy_form(form)

        matches = re.findall('<p id="error_\d_\w*" class="help-block"', html, re.MULTILINE)
        self.assertEqual(len(matches), 3)
    def ER_ovenh_pro_pro_adder():

        filtered_promoter_enhancer_inter = []
        overlaps = np.loadtxt(promoter_overlaps_enhancer_file, delimiter="\t", usecols=(4, 8), dtype=int)
        overlaps_promoter_enhancer_inter = overlaps
        diction_overlaps_ovenh = {}

        for overl in overlaps[:, 1]:

            promoters = list(overlaps[overl == overlaps[:, 1], 0])
            diction_overlaps_ovenh[overl] = promoters

        for el in promoter_enhancer_inter:

            index_1 = int(re.findall("\d+", el[1])[0])
            index_2 = int(re.findall("\d+", el[2])[0])
            feature_2 = re.findall("\D+", el[1])[0]

            if feature_2 == "ovenh":
                for pro_dict in diction_overlaps_ovenh[index_2]:
                    # pro_pro_int = [index_1, dict_pro_survived[pro_dict]]
                    pro_pro_int = [index_1, pro_dict]
                    if pro_pro_int not in filtered_promoter_promoter_inter and pro_dict in pro_survived:
                        filtered_promoter_promoter_inter.append(pro_pro_int)

            else:
                filtered_promoter_enhancer_inter.append(el)
        return np.array(filtered_promoter_enhancer_inter)
Example #11
0
def parse_ltrace(ltrace):

    match_call = r"^([a-z_]+)\((.*)\) += (.*)"
    match_err = r"^([a-z_]+)\((.*) <no return \.\.\.>"

    for line in ltrace:

        # if the trace file contains PID (for ltrace -f)
        head, _, tail = line.partition(" ")
        if head.isdigit():
            line = tail

        if not any(line.startswith(f) for f in operations):
            continue

        try:
            func, args, ret = re.findall(match_call, line)[0]
        except Exception:

            try:
                # maybe this stopped the program
                func, args = re.findall(match_err, line)[0]
                ret = None
            except Exception:
                print("ignoring line: %s" % line, file=sys.stderr)
                continue

        args = list(map(sanitize, args.split(", ")))
        ret = sanitize(ret)

        yield func, args, ret
Example #12
0
def _strClean(x):
    """ Helper function that translates csv values to reasonable floats (or header values to strings). """
    if x == "OPEN":
        return 1.0
    elif x == "CLOSED":
        return 0.0
        # Look for strings of the type '+32.0+68.32d':
    elif x == "-1.#IND":
        return 0.0
    if x.endswith("d"):
        matches = re.findall("^([+-]?\d+\.?\d*e?[+-]?\d+)[+-](\d+\.?\d*e?[+-]?\d*)d$", x)
        if len(matches) == 0:
            return 0.0
        else:
            floatConv = map(float, matches[0])
            squares = map(lambda x: x ** 2, floatConv)
            return math.sqrt(sum(squares))
    elif re.findall("^([+-]?\d+\.?\d*e?[+-]?\d*)$", x) != []:
        matches = re.findall("([+-]?\d+\.?\d*e?[+-]?\d*)", x)
        if len(matches) == 0:
            return 0.0
        else:
            try:
                return float(matches[0])
            except:
                return 0.0  # Hack for crazy WTF occasional Gridlab output.
    else:
        return x
Example #13
0
def vidqual(url, dfunc, referer):
    globals()["dfunc"] = dfunc
    ret = {}
    first8 = dfunc(url, None, range=(0, 7), referer=referer)
    offset, atom = struct.unpack(">i4s", first8)
    if atom == "ftyp":
        ret["type"] = "mp4"
        cur, offset = find_atom(url, referer, offset, "moov")
        while True:
            cur, offset = find_atom(url, referer, cur + 8, "trak")
            ##it is possible not to meet first trak as video
            cur2, offset2 = find_atom(url, referer, cur + 8, "tkhd")
            w, h = struct.unpack(">II", dfunc(url, None, referer=referer, range=(cur2 + 82, cur2 + 89)))
            if abs(w) < 5000 and abs(h) < 5000:
                ret["width"] = int(w)
                ret["height"] = int(h)
                resp = dfunc(url, None, referer=referer, head=True)
                ret["size"] = int(resp.info().getheader("Content-Length"))
                break
    elif first8[:3] == "FLV":
        b1, b2, b3 = struct.unpack("3B", dfunc(url, None, referer=referer, range=(14, 16)))
        size = (b1 << 16) + (b2 << 8) + b3
        header = dfunc(url, None, referer=referer, range=(27, 27 + size))
        width = re.findall("width.(........)", header)
        height = re.findall("height.(........)", header)
        if len(width) > 0:
            ret["width"] = int(struct.unpack(">d", width[0])[0])
        if len(height) > 0:
            ret["height"] = int(struct.unpack(">d", height[0])[0])
        resp = dfunc(url, None, referer=referer, head=True)
        ret["size"] = int(resp.info().getheader("Content-Length"))
        ret["type"] = "flv"
    return ret
Example #14
0
def protocol_check():

    u"""Checking  running protocols"""

    cmd_lldp = "show feature | egrep lldp"
    proto_lldp = re.findall(r"enabled", cli.cli(cmd_lldp))
    cmd_cdp = "show cdp global | sed -n 2p"
    proto_cdp = re.findall(r"enabled", cli.cli(cmd_cdp))
    proto = {"cdp": str(proto_cdp), "lldp": str(proto_lldp)}
    counter = 2
    print ("\nChecking CDP/LLDP protocols...")

    for key, value in proto.items():
        try:
            if re.search(r"enabled", value):
                print (key.upper() + " protocol is" + color.GREEN + " enabled" + color.ENDCOLOR)
                if key == "cdp":
                    counter = counter + 1
                elif key == "lldp":
                    counter = counter - 1
            else:
                counter = 0
                print (key.upper() + " protocol is" + color.RED + " disabled" + color.ENDCOLOR)
        except AttributeError:
            print ("Check device. Need to be Nexus 7000/9000.")

    return counter
Example #15
0
    def _extract_brightcove_urls(cls, webpage):
        """Return a list of all Brightcove URLs from the webpage """

        url_m = re.search(
            r'<meta\s+property=[\'"]og:video[\'"]\s+content=[\'"](https?://(?:secure|c)\.brightcove.com/[^\'"]+)[\'"]',
            webpage,
        )
        if url_m:
            url = unescapeHTML(url_m.group(1))
            # Some sites don't add it, we can't download with this url, for example:
            # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
            if "playerKey" in url or "videoId" in url:
                return [url]

        matches = re.findall(
            r"""(?sx)<object
            (?:
                [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
                [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
            ).+?>\s*</object>""",
            webpage,
        )
        if matches:
            return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))

        return list(
            filter(
                None,
                [
                    cls._build_brighcove_url_from_js(custom_bc)
                    for custom_bc in re.findall(r"(customBC\.createVideo\(.+?\);)", webpage)
                ],
            )
        )
Example #16
0
    def login(self):
        if self.response.geturl() == self.url:
            print "logining..."
            html = self.response.read()
            reg = r'<img id="captcha_image" src="(.*?)" alt="captcha" class="captcha_image"/>'
            imglist = re.findall(reg, html)
            urllib.urlretrieve(imglist[0], "%d.jpg" % random.randint(1, 100))
            captcha = raw_input("captcha is: ")
            regid = r'<input type="hidden" name="captcha-id" value="(.*?)"/>'
            ids = re.findall(regid, html)
            self.post["captcha-solution"] = captcha
            self.post["captcha-id"] = ids[0]
            self.post["user_login"] = "登录"
            self.post["redir"] = "http://www.douban.com/doumail/"
            if self.response.geturl() == "http://www.douban.com/doumail/":
                print "login success !"
                soup = BeautifulSoup(self.response.read())
                tag = soup.find_all("span", attrs={"class": "from"})
                tag2 = soup.find_all("a", attrs={"class": "url"})
                a = []
                for x in tag:
                    a.append(x.get_text())
                b = []
                for y in tag2:
                    b.append(y.get_text())

                def split(num):
                    print a[num] + "  " + b[num]
                    print

                print "-" * 30, "豆瓣豆邮", "-" * 30
                for x in range(len(a)):
                    split(x)
                print "-" * 80
Example #17
0
    def _extract_urls(webpage):
        # Reference:
        # 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
        # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript)
        # 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html

        entries = []

        # Look for iframe embeds [1]
        for _, url in re.findall(
            r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage
        ):
            entries.append(url)

        # Look for embed_in_page embeds [2]
        for video_id, account_id, player_id, embed in re.findall(
            # According to examples from [3] it's unclear whether video id
            # may be optional and what to do when it is
            r"""(?sx)
                    <video[^>]+
                        data-video-id=["\'](\d+)["\'][^>]*>.*?
                    </video>.*?
                    <script[^>]+
                        src=["\'](?:https?:)?//players\.brightcove\.net/
                        (\d+)/([\da-f-]+)_([^/]+)/index\.min\.js
                """,
            webpage,
        ):
            entries.append(
                "http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s"
                % (account_id, player_id, embed, video_id)
            )

        return entries
    def test_01(self):
        """passwordChange: password-change-script works"""

        self.computer.run("rm -f /tmp/passwordChange*")
        self.computer.run(
            "cat <<EOF>{}".format(self.passwordChangeScript), expectPrompt=False, handleOutputManually=True
        )
        self.computer.run(
            '#!/bin/sh\nexec >>{}\ndd bs=1 of={}.\$\$\nPASSLEN=\`wc -c {}.\$\$ | sed -e \'s/^[ ][ ]*//\' | cut -f1 -d" "\`\nPASSWORD=\`cat {}.\$\$\`\n\necho "PID: (\$\$) DATE: (\`date\`) PASSLEN: (\$PASSLEN) PASSWORD: <\$PASSWORD>"\nEOF'.format(
                self.passwordChangeOut, self.passwordChangePwd, self.passwordChangePwd, self.passwordChangePwd
            )
        )
        self.computer.run("chmod 755 {}".format(self.passwordChangeScript))
        self.computer.vastool.configureVas.password_change_interval(-1)
        self.computer.vastool.configureVas.password_change_script(self.passwordChangeScript)
        self.addCleanup(self.computer.vastool.configureVas.password_change_interval)
        self.addCleanup(self.computer.vastool.configureVas.password_change_script)
        self.assertTrue(vasConf.vasdRestart(self.computer, 60), "Unable to restart vasd")
        time.sleep(60)
        res = self.computer.run("cat {}".format(self.passwordChangeOut), returnOutput=True)
        self.assertEqual(res[1], 0, "cat {} failed".format(self.passwordChangeOut))
        match = re.findall("PASSWORD:", res[0])
        self.assertTrue(
            len(match) < 6 and len(match) > 1, "password changes {} < 6 and {} > 1".format(len(match), len(match))
        )
        self.assertGreater(len(re.findall("PASSLEN: \(([0-9]*)\)", res[0])), 0)
        for m in re.finditer("PASSLEN: \(([0-9]*)\)", res[0]):
            self.assertGreater(len(m.group(1)), 0, "Empty PASSLEN in output")
            self.assertTrue(
                int(m.group(1)) <= 33 and int(m.group(1)) > 1,
                "Password length {} must be <= 33 and > 1".format(m.group(1)),
            )
def google(names):
    ump.add_log("animeram is searching %s on %s" % (names[0], "google"))
    results = ump.web_search('site:%s %s "Alternative Title:"' % (domain, names[0]))
    if not results is None:
        for result in results:
            page = ump.get_page(result, encoding)
            info_table = re.findall('<table\sclass="sheet">(.+?)</table>', page, re.DOTALL)[0]
            info = re.findall(
                '<td\sclass="header"><label>(.+?)</label></td>\s<td\sclass="content1">(.+?)</td>', page, re.DOTALL
            )
            for info_title, info_value in info:
                if "Title:" in info_title:
                    orgname = info_value
                    break

            for info_title, info_value in info:
                if "Alternative Title:" in info_title:
                    names = [orgname]
                    if info_value is not "-":
                        # TODO: implement after such found.
                        raise

            for name in refnames:
                for name1 in names:
                    if ump.is_same(name1, name, strict=True):
                        ump.add_log("animeram found %s" % name)
                        return result
    return False
Example #20
0
 def parseForm(self, data):
     params = {}
     _dsh = """name="dsh" id="dsh" value="(.+?)"\n"""
     _GALX = """name="GALX"\n         value="(.+?)">"""
     params["dsh"] = re.findall(_dsh, data, re.S)[0]
     params["GALX"] = re.findall(_GALX, data, re.S)[0]
     return params
Example #21
0
 def run(self):
     new_catalog.run(self)
     handle = open(self.output_file)
     res = ""
     counter = 0
     formatting_string = False
     for line in handle:
         if not ('""' in line):
             strlen = len(re.findall(r"\S+", line)) - 1
         if "%" in line:
             formatting_string = re.findall(r"%\S+", line)
             strlen -= len(formatting_string)
             formatting_string = (", ".join(formatting_string)).strip('"')
         if '""' in line and not ("msgid" in line):
             strlen = 1 if strlen < 1 else strlen
             string = " ".join([self.string for x in range(0, strlen)])
             if formatting_string:
                 string = "%s [%s]" % (string, formatting_string)
                 formatting_string = ""
             string = '"%s"' % string
             res += line if counter < 1 else line.replace('""', string)
             counter += 1
         elif line.startswith('"Last-Translator:'):
             res += line.replace("FULL NAME", "Babel Phish")
         else:
             res += line
     handle.close()
     handle = open(self.output_file, "w")
     handle.write(res)
     handle.close()
     cmd = 'msgfmt -o "%s" "%s"' % (self.output_file_mo, self.output_file)
     handle = os.popen(cmd)
     print "converting to MO file..."
     print handle.read(),
     handle.close()
Example #22
0
 def write(self, inStr):
     self.MoveEnd()  # always 'append' text rather than 'writing' it
     """tracebacks have the form:
     Traceback (most recent call last):
     File "C:\Program Files\wxPython2.8 Docs and Demos\samples\hangman\hangman.py", line 21, in <module>
         class WordFetcher:
     File "C:\Program Files\wxPython2.8 Docs and Demos\samples\hangman\hangman.py", line 23, in WordFetcher
     """
     for thisLine in inStr.splitlines(True):
         if len(re.findall('".*", line.*', thisLine)) > 0:
             # this line contains a file/line location so write as URL
             # self.BeginStyle(self.urlStyle) #this should be done with
             # styles, but they don't exist in wx as late as 2.8.4.0
             self.BeginBold()
             self.BeginTextColour(wx.BLUE)
             self.BeginURL(thisLine)
             self.WriteText(thisLine)
             self.EndURL()
             self.EndBold()
             self.EndTextColour()
         elif len(re.findall("WARNING", thisLine)) > 0:
             self.BeginTextColour([0, 150, 0])
             self.WriteText(thisLine)
             self.EndTextColour()
         elif len(re.findall("ERROR", thisLine)) > 0:
             self.BeginTextColour([150, 0, 0])
             self.WriteText(thisLine)
             self.EndTextColour()
         else:
             # line to write as simple text
             self.WriteText(thisLine)
     self.MoveEnd()  # go to end of stdout so user can see updated text
     self.ShowPosition(self.GetLastPosition())
Example #23
0
def AddEnteries(Fromurl):
    print "getting enteries %s" % Fromurl

    link = getHtml(Fromurl)

    # 	print link
    # 	print "addshows"
    # 	match=re.compile('<param name="URL" value="(.+?)">').findall(link)
    # 	match=re.compile('<a href="(.+?)"').findall(link)
    # 	match=re.compile('onclick="playChannel\(\'(.*?)\'\);">(.*?)</a>').findall(link)
    # 	match =re.findall('onclick="playChannel\(\'(.*?)\'\);">(.*?)</a>', link, re.DOTALL|re.IGNORECASE)
    # 	match =re.findall('onclick="playChannel\(\'(.*?)\'\);".?>(.*?)</a>', link, re.DOTALL|re.IGNORECASE)
    # 	match =re.findall('<div class=\"post-title\"><a href=\"(.*?)\".*<b>(.*)<\/b><\/a>', link, re.IGNORECASE)
    # 	match =re.findall('<img src="(.*?)" alt=".*".+<\/a>\n*.+<div class="post-title"><a href="(.*?)".*<b>(.*)<\/b>', link, re.UNICODE)
    # 	print Fromurl
    # 	match =re.findall('<div class="videopart">\s*<div class="paneleft">\s*<a class="pthumb" href="(.*?)" title="(.*?)".*?img.*?src="(.*?)" class="attachment-index-post-thumbnail wp-post-image"', link, re.M|re.DOTALL)
    match = re.findall(
        '<div class="video_thumnail_hover" href="#">\s*<a class="pthumb" href="(.*?)" title="(.*?)" ><img alt="" src=".*?hover_bg.png"><\/a>\s*<\/div>\s*<a class="pthumb"\s+href=".*?" title=".*?" ><span><\/span>\s*<img width="\d+" height="\d+" src="(.*?)" class="attachment-index-post-thumbnail wp-post-image".*?\/><\/a>',
        link,
        re.M | re.DOTALL,
    )
    # 	print Fromurl

    # print match

    for cname in match:
        addDir(cname[1], cname[0], 4, cname[2], isItFolder=False)

    match = re.findall("<div class='pagination'>.*<a href='([^']*)'>&rsaquo;</a>", link, re.IGNORECASE | re.DOTALL)
    # print 'match', match

    if len(match) == 1:
        addDir("Next Page", match[0], 3, "")
Example #24
0
    def _on_authenticate_page(self, response):
        """
        STEP3: 
        抓取到Twitter登陆页面以后将用户名和密码POST上去
        """

        if response.error:
            raise tornado.web.HTTPError(403, "Get Authenticate Message Failed ~")

        authenticity_token = re.findall(
            '<input name="authenticity_token" type="hidden" value="(.+)" \/>', response.body
        )[0]
        oauth_token = re.findall(
            '<input id="oauth_token" name="oauth_token" type="hidden" value="(.+)" \/>', response.body
        )[0]

        # 将得到的用户名和密码POST到Twitter登陆界面

        args = {
            "authenticity_token": authenticity_token,
            "oauth_token": oauth_token,
            "session[username_or_email]": self.get_argument("user"),
            "session[password]": self.get_argument("passwd"),
        }
        http = tornado.httpclient.AsyncHTTPClient()
        http.fetch(
            "https://api.twitter.com/oauth/authorize",
            method="POST",
            body=urllib.urlencode(args),
            callback=self._on_authorize_page,
        )
def link_list(url):
    profiles = []
    names = []

    # Get webpage contents
    try:
        web_page = urllib.urlopen(url)
    except IOError:
        print "Not a valid URL."
    else:
        content = web_page.read()
        web_page.close()

        # Get profile links
        links = [item for item in re.findall('<td><h3><a href="profiles/[\w.-]+.shtml">', content)]

        for link in links:
            # Get names using each link as a starting point
            titles = re.findall(link + "[\w\s./-]+</a>", content)
            for name in titles:
                name = name.replace(link, "")
                name = name.replace("</a>", "")
                if len(name.split()) == 3:
                    first, middle, last = name.split()
                    first = str(first) + " " + str(middle)
                if len(name.split()) == 2:
                    first, last = name.split()
                names.append((" " + last, first))
            link = link.replace('<td><h3><a href="', "http://www.soic.indiana.edu/people/")
            link = link.replace('">', "")
            profiles.append(link)

        # Return list of names, and profile links
        return profiles, names
Example #26
0
    def __init__(self, src, fmt=""):
        # source fields
        self.raw = None
        self.json = None
        # flags
        self.valid = True
        # default values for all fields
        self.text = src
        self.costs = []

        if fmt == "json":
            self.json = src
            manastrs = re.findall(utils.mana_json_regex, src)
        else:
            self.raw = src
            manastrs = re.findall(utils.mana_regex, src)

        for manastr in manastrs:
            cost = Manacost(manastr, fmt)
            if not cost.valid:
                self.valid = False
            self.costs += [cost]
            self.text = self.text.replace(manastr, utils.reserved_mana_marker, 1)

        if (
            utils.mana_open_delimiter in self.text
            or utils.mana_close_delimiter in self.text
            or utils.mana_json_open_delimiter in self.text
            or utils.mana_json_close_delimiter in self.text
        ):
            self.valid = False
def _GetReviewersFromBisectLog(results_output):
    """Parse bisect log and gets reviewers email addresses from Rietveld issue.

  Note: This method doesn't get called when bisect reports multiple CLs by
  different authors, but will get called when there are multiple CLs by the
  same owner.

  Args:
    results_output: Bisect results output.

  Returns:
    List of email addresses from the committed CL.
  """
    reviewer_list = []
    revisions_list = re.findall(r"Link    : (.*)", results_output)
    revisions_links = {rev.strip() for rev in revisions_list}
    # Sometime revision page content consist of multiple "Review URL" strings
    # due to some reverted CLs, such CLs are prefixed with ">"(&gt;) symbols.
    # Should only parse CL link correspoinding the revision found by the bisect.
    link_pattern = r'(?<!&gt;\s)Review URL: <a href=[\'"]' r'https://codereview.chromium.org/(\d+)[\'"].*>'
    for link in revisions_links:
        # Fetch the commit links in order to get codereview link
        response = _FetchURL(link)
        if not response:
            continue
        rietveld_issue_ids = re.findall(link_pattern, response.content)
        for issue_id in rietveld_issue_ids:
            # Fetch codereview link, and get reviewer email addresses from the
            # response JSON.
            issue_response = _FetchURL("https://codereview.chromium.org/api/%s" % issue_id)
            if not issue_response:
                continue
            issue_data = json.loads(issue_response.content)
            reviewer_list.extend([str(item) for item in issue_data["reviewers"]])
    return reviewer_list
Example #28
0
 def sentences(self):
     """Iterate over all sentences (sentence_id, sentence) in the document, sentence is a list of 4-tuples (word,id,pos,lemma)"""
     prevp = 0
     prevs = 0
     prevw = 0
     sentence = []
     sentence_id = ""
     for word, id, pos, lemma in iter(self):
         try:
             doc_id, ptype, p, s, w = re.findall("([\w\d-]+)\.(p|head)\.(\d+)\.s\.(\d+)\.w\.(\d+)", id)[0]
             if ((p != prevp) or (s != prevs)) and sentence:
                 yield sentence_id, sentence
                 sentence = []
                 sentence_id = doc_id + "." + ptype + "." + str(p) + ".s." + str(s)
             prevp = p
         except IndexError:
             doc_id, s, w = re.findall("([\w\d-]+)\.s\.(\d+)\.w\.(\d+)", id)[0]
             if s != prevs and sentence:
                 yield sentence_id, sentence
                 sentence = []
                 sentence_id = doc_id + ".s." + str(s)
         sentence.append((word, id, pos, lemma))
         prevs = s
         prevw = w
     if sentence:
         yield sentence_id, sentence
Example #29
0
 def get_cdrom_file(vm, qemu_cdrom_device):
     """
     :param vm: VM object
     :param qemu_cdrom_device: qemu monitor device
     :return: file associated with $qemu_cdrom_device device
     """
     blocks = vm.monitor.info("block")
     cdfile = None
     if isinstance(blocks, str):
         tmp_re_str = r"%s: .*file=(\S*) " % qemu_cdrom_device
         file_list = re.findall(tmp_re_str, blocks)
         if file_list:
             cdfile = file_list[0]
         else:
             # try to deal with new qemu
             tmp_re_str = r"%s: (\S*) \(.*\)" % qemu_cdrom_device
             file_list = re.findall(tmp_re_str, blocks)
             if file_list:
                 cdfile = file_list[0]
     else:
         for block in blocks:
             if block["device"] == qemu_cdrom_device:
                 try:
                     cdfile = block["inserted"]["file"]
                     break
                 except KeyError:
                     continue
     return cdfile
Example #30
0
def parse_log(log_file):
    with open(log_file, "r") as log_file:
        log = log_file.read()

    loss_pattern = r"Iteration (?P<iter_num>\d+), loss = (?P<loss_val>[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?)"
    losses = []
    loss_iterations = []

    for r in re.findall(loss_pattern, log):
        loss_iterations.append(int(r[0]))
        losses.append(float(r[1]))

    loss_iterations = np.array(loss_iterations)
    losses = np.array(losses)

    accuracy_pattern = r"Iteration (?P<iter_num>\d+), Testing net \(#0\)\n.* accuracy = (?P<accuracy>[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?)"
    accuracies = []
    accuracy_iterations = []
    accuracies_iteration_checkpoints_ind = []

    for r in re.findall(accuracy_pattern, log):
        iteration = int(r[0])
        accuracy = float(r[1]) * 100

        if iteration % 10000 == 0 and iteration > 0:
            accuracies_iteration_checkpoints_ind.append(len(accuracy_iterations))

        accuracy_iterations.append(iteration)
        accuracies.append(accuracy)

    accuracy_iterations = np.array(accuracy_iterations)
    accuracies = np.array(accuracies)

    return loss_iterations, losses, accuracy_iterations, accuracies, accuracies_iteration_checkpoints_ind