コード例 #1
0
def range(lower, upper, sem):
    with open(str(lower) + "-" + str(upper) + ".csv", "w") as log:
        while (lower <= upper):
            with requests.Session() as req:
                dom = BeautifulSoup(
                    req.post("http://www.wbutech.net/show-result.php",
                             data={
                                 "semno": sem,
                                 "rollno": lower,
                                 "rectype": 1
                             },
                             headers={
                                 "Referer":
                                 "http://www.wbutech.net/result_odd.php"
                             }).text)
                if len(dom.find_all("th")) == 14:
                    name = find("Name : (.+)",
                                dom.find_all("th")[1].text.strip()).group(1)
                    reg = find("Registration No. : (.+) OF",
                               dom.find_all("th")[3].text.strip()).group(1)
                    sgpa = find("SEMESTER : ([0-9.]+)",
                                dom.find_all("td")[54].text.strip()).group(1)
                    print(name + ", " + str(lower) + ", " + reg + ", " + sgpa)
                    log.write("\"" + name + "\",\"" + str(lower) + "\",\"" +
                              reg + "\"," + sgpa + "\n")
                else:
                    print(str(lower) + " MISSING")
                req.close()
                lower += 1
コード例 #2
0
def phoneticcase(filetotopen):
    f = open(filetotopen, 'r', encoding="utf8")
    message = f.readlines()

    f.close()
    phoneticarr = []
    newphonecticlist = []

    for m in message:

        for i in m.strip().split(","):
            # print(i)
            for re in i.strip().split("'"):
                if re.find("[") == -1:
                    if re.find("]") == -1:
                        if re != " ":
                            if re != "":
                                # if x not in phoneticarr:
                                # print(re)
                                phoneticarr.append(str(re).lower())

    # for index,x1 in enumerate(phoneticarr):
    #     print(index)
    #     print(len(phoneticarr))
    #     if x1 not in newphonecticlist:
    #         newphonecticlist.append(x1)
    #
    # return newphonecticlist
    newphonecticlist = list(set(phoneticarr))
    # print(phoneticarr)
    return newphonecticlist
コード例 #3
0
def post_request(cookies, class_code, hashkey, img_data, pred_type="ydm"):
    global THREAD_FLAG
    # while count < 50:
    #     check_url = 'https://dean.bjtu.edu.cn/course_selection/courseselecttask/selects_action/?action=load&iframe=school&page=1&perpage=500'
    #     res = requests.get(check_url, cookies=cookies, headers=check_classheader)
    #     count += 1
    try:

        if pred_type == "ydm":
            req_id, answer = yundama.decode(img_data, 2004, 20)
        elif pred_type == "chaoren":
            res = chaoren_client.recv_byte(img_data)
            answer, req_id = res[u'result'], res[u'imgId']
        elif pred_type == "pp":
            answer, req_id = api.Predict(40400, img_data)
        elif pred_type == "cjy":
            answer, req_id = chaojiying.PostPic(img_data, 2004)
        data = {
            'checkboxs': class_code,
            # 'is_cross':True,
            'hashkey': hashkey,
            'answer': answer
        }
        re = requests.post(
            'https://dean.bjtu.edu.cn/course_selection/courseselecttask/selects_action/?action=submit',
            cookies=cookies,
            headers=robclass_headers,
            allow_redirects=False,
            data=data)
        if re.status_code == 503:
            print(re.status_code)
            print("重新提交抢课请求")
            time.sleep(0.3)
            post_request(cookies, class_code, hashkey, img_data, pred_type)
        re = re.headers['Set-Cookie']
        message = re[re.find('[['):re.find(']]') + 2]
        res = str(json.loads(eval("'" + message + "'")))
        print(pred_type + "请求:" + str(data))
        print(res)
        if "选课成功" in res:
            THREAD_FLAG = True
            return 200
        elif "课堂无课余量" in res:
            return 404
        elif "验证码" in res:
            if pred_type == 'pp':
                api.Justice(req_id)
            elif pred_type == 'cjy':
                chaojiying.ReportError(req_id)
            elif pred_type == 'ydm':
                yundama.report(req_id)
            else:
                chaoren_client.report_err(req_id)
            return 403
            # 完全错误,比如有类似的课了
        else:
            return 500
    except Exception as e:
        print("139postreq bug :" + str(e))
        return 403
コード例 #4
0
 def getip_66ip(self):
     for page in range(1, 4):
         url = 'http://www.66ip.cn/{}.html'.format(page)
         html = getpage(url)
         if html:
             doc = pq(html)
             res = doc('#main table tr:gt(0)').items()
             for re in res:
                 address = re.find('td').eq(0).text()
                 port = re.find('td').eq(1).text()
                 if address and port:
                     result = '{0}:{1}'.format(address, port)
                     yield result.replace(' ', '')
コード例 #5
0
def get_video_img_info(html):
	#find site Name
	site_url='http://'
	url_p = '^http://v.qq.com/.+?tm|^/cover/'
	sub_url_p = '^/'
	video_with_img = []
	finded = re.find('qq.com',html)
	if finded:
		site_url = site_url+'v.qq.com/'
		url_p = '^http://v.qq.com/.+tm|^/cover/'
	elif (finded = re.find('youku.com',html)):
		site_url = site_url+'v.youku.com'
		url_p = '^http://v.youku.com/.+?_show'
コード例 #6
0
ファイル: result_ex.py プロジェクト: bhskt/wbutResultEx
def range(lower,upper,sem):
    while(lower<=upper):
        with requests.Session() as req:
            dom=BeautifulSoup(req.post("http://www.wbutech.net/show-result.php",data={"semno":sem,"rollno":lower,"rectype":1},headers={"Referer":"http://www.wbutech.net/result_odd.php"}).text)
            if len(dom.find_all("th"))==14:
            	name=find("Name : (.+)",dom.find_all("th")[1].text.strip()).group(1)
            	reg=find("Registration No. : (.+) OF",dom.find_all("th")[3].text.strip()).group(1)
            	sgpa=find("SEMESTER : ([0-9.]+)",dom.find_all("td")[54].text.strip()).group(1)
            	print name+", "+str(lower)+", "+reg+", "+sgpa
            else:
            	print str(lower)+" MISSING"
            req.close()
            lower+=1
コード例 #7
0
 def get_chapterurl(self, response):
     item = DingdianItem()
     item['name'] = str(response.meta['name']).replace('\xa0', '')
     item['novelurl'] = response.meta['novelurl']
     re = BeautifulSoup(response.text, 'lxml')
     category = re.find('table').find('a').get_text()  #类别
     author = re.find('table').find_all('td')[1].get_text()
     bash_url = re.find('p', class_='btnlinks').find('a',
                                                     class_='read')['href']
     name_id = str(bash_url)[-6:-1].replace('/', '')
     item['category'] = str(category).replace('/', '')
     item['author'] = str(author).replase('/', '')
     item['name_id'] = name_id
     return item
コード例 #8
0
def get_video_site_info(html):
	finded = re.find('qq.com',html)
	if finded:
		site_url = site_url+'v.qq.com/'
		return 'qq'
	finded = re.find('youku.com',html)	
	if finded:
		site_url = site_url+'v.youku.com'
		return 'youku'
	finded = re.find('tudou.com',html)
	if finded:
		site_url = site_url+'v.tudou.com'
		return 'tudou'	
	else:
		print 'error not support yet...'
		return ''
コード例 #9
0
def parse_proxies(country="all"):
    pattern = re.compile("(?:\d{1,3}\.){3}\d{1,3}:\d+")
    proxies = []

    for page_num in range(1, PAGES_COUNT + 1):
        print("page:", page_num)
        soup = get_soup(PROXIES_URL % (country.lower(), page_num))
        table = soup.find("table", id="proxy_list")

        if not table: 
            continue

        rows = table.findall("tr")

        print("rows:", len(rows))

        for row in rows:
            script = row.find("script").text
            cipher = re.findall(r'Base64.decode\("(.+)"\)', script)[0]
            ip = base64.b64decode(cipher)
            proxy = ip

            if proxy and re.find(pattern, proxy):
                proxies.append(proxy)

    return proxies
コード例 #10
0
def get_subclass_name_from_item(item):
    match = re.find('\n\n(?P<subclass>.+)\n', item)
    subclass_name = match.group('subclass')

    print("\tsubclass: " + subclass_name)

    return subclass_name
コード例 #11
0
ファイル: speedparser.py プロジェクト: puzzlet/speedparser
def parse(document, clean_html=True, unix_timestamp=False, encoding=None):
    """Parse a document and return a feedparser dictionary with attr key access.
    If clean_html is False, the html in the feed will not be cleaned.  If
    clean_html is True, a sane version of lxml.html.clean.Cleaner will be used.
    If it is a Cleaner object, that cleaner will be used.  If unix_timestamp is
    True, the date information will be a numerical unix timestamp rather than a
    struct_time.  If encoding is provided, the encoding of the document will be
    manually set to that."""
    if isinstance(document, six.text_type):
        encoding = 'utf8'
        m = re.find(b'''<\?xml.*?encoding=['"](.*?)['"].*\?>''', document)
        document = document.encode(encoding)
    if isinstance(clean_html, bool):
        cleaner = default_cleaner if clean_html else fake_cleaner
    else:
        cleaner = clean_html
    result = feedparser.FeedParserDict()
    result['feed'] = feedparser.FeedParserDict()
    result['entries'] = []
    result['bozo'] = 0
    try:
        parser = SpeedParser(document, cleaner, unix_timestamp, encoding)
        parser.update(result)
    except Exception as e:
        if isinstance(e, UnicodeDecodeError) and encoding is True:
            encoding = chardet.detect(document)['encoding']
            document = document.decode(encoding, 'replace').encode('utf-8')
            return parse(document, clean_html, unix_timestamp, encoding)
        import traceback
        result['bozo'] = 1
        result['bozo_exception'] = e
        result['bozo_tb'] = traceback.format_exc()
    return result
コード例 #12
0
def get_data(url):
    res = requests.get(url)
    result_json = res.json()
    print(result_json)
    next_page_url = result_json["paging"].get("next")
    data = result_json["data"]

    # get first data
    for item in data:
        message = item.get("message")
        if message:
            # whitelist posts
            if any(whitelist in message.lower() for whitelist in whitelists):
                # check if there are blacklisted words
                if not any(blacklist in message.lower()
                           for blacklist in blacklists):
                    number = ""
                    match = pattern.search(message)
                    if match:
                        if match.group(0):
                            number = match.group(0).replace("/", "")
                        elif match.group(1):
                            number = match.group(1).replace("/+", "")
                            # ensure that we only take the number phone digits
                            number = re.find("\d", number)
                        # check if number is exists.
                        if number not in numbers:
                            messages.append(message)
                            numbers.append(number)

    return next_page_url
コード例 #13
0
def reorderLines(logFileSize, logfile):
    # WRITE YOUR CODE HERE
    id_map = {}
    content_words = []
    content_numbers = []
    for log_line in logfile:
        log_contents = log_line.split(' ')
        id = log_contents[0]
        content = log_contents[1:]
        id_map[content] = id
        if re.find('\d+', content):
            content_numbers.append(content)
        else:
            content_words.append(content)

    #sort the content_words lexicographically
    content_words_sorted = sorted(content_words)

    result = []
    for sorted_line in content_words_sorted:
        if sorted_line in id_map:
            result_line = id_map[sorted_line] + " " + sorted_line
            result.add(result_line)

    for number_line in content_numbers:
        if number_line in id_map:
            result_line = id_map[number_line] + " " + number_line
            result.add(result_line)

    return result
コード例 #14
0
def parse(document, clean_html=True, unix_timestamp=False, encoding=None):
    """Parse a document and return a feedparser dictionary with attr key access.
    If clean_html is False, the html in the feed will not be cleaned.  If
    clean_html is True, a sane version of lxml.html.clean.Cleaner will be used.
    If it is a Cleaner object, that cleaner will be used.  If unix_timestamp is
    True, the date information will be a numerical unix timestamp rather than a
    struct_time.  If encoding is provided, the encoding of the document will be
    manually set to that."""
    if isinstance(document, six.text_type):
        encoding = 'utf8'
        m = re.find(b'''<\?xml.*?encoding=['"](.*?)['"].*\?>''', document)
        document = document.encode(encoding)
    if isinstance(clean_html, bool):
        cleaner = default_cleaner if clean_html else fake_cleaner
    else:
        cleaner = clean_html
    result = feedparser.FeedParserDict()
    result['feed'] = feedparser.FeedParserDict()
    result['entries'] = []
    result['bozo'] = 0
    try:
        parser = SpeedParser(document, cleaner, unix_timestamp, encoding)
        parser.update(result)
    except Exception as e:
        if isinstance(e, UnicodeDecodeError) and encoding is True:
            encoding = chardet.detect(document)['encoding']
            document = document.decode(encoding, 'replace').encode('utf-8')
            return parse(document, clean_html, unix_timestamp, encoding)
        import traceback
        result['bozo'] = 1
        result['bozo_exception'] = e
        result['bozo_tb'] = traceback.format_exc()
    return result
コード例 #15
0
ファイル: views.py プロジェクト: WillGuan105/data_monitor
def groupon_poster_gz(request):
	cmd='curl -H "Host:groupon.mlapi.meilishuo.com" 10.0.0.55/groupon/groupon_poster'
	re=os.popen(cmd).read()
	reStatus="success"
	if re.find('"error_code":0') < 0:
		reStatus="fail"
	return HttpResponse(reStatus)
コード例 #16
0
ファイル: calculator.py プロジェクト: JcGNeon/codework
def getNumbers(varInput):
	variables = find(r'[\d.]+', varInput)
	
	if variables is not None:
		return float(variables[0]), float(variables[1])
	else:
		print "\'%s\' is not valid input. Please try again.\n" % varInput
コード例 #17
0
    def BIOtagSingleOffset_NoOverlap(self, element, text2, i1, i2,
                                     last_offset):

        # before drug words start
        before_part = text2[last_offset:i1]
        element["text_splits"].append(before_part)

        # drug words
        element["text_splits"].append(self.BItagWSpace)

        # now add the next word only? or what?

        i2 = re.find(r'\b', text2[i1:])

        if i2 > -1:

            entity_text = text2[i1:]
            element["text_splits"].append(entity_text)
            #entity_text2 = self.BIOTagWord(entity_text)
            #deviation += len(entity_text2) - len(entity_text)
            element["text_splits"].append(self.BIOtagWSpace)

            last_offset = i1 + len(entity_text) + 1

        return last_offset
コード例 #18
0
def get_class_and_subclass_codes_from_item(item):
    match = re.find('(?P<class>\d{2} )(?P<subclass>\d{2}\n)' , item)
    class_code = match.group('class')
    subclass_code = match.group('subclass')

    print("\tclass: " + class_code + "\n\tsubclass: " + subclass_code)

    return [class_code , subclass_code]
コード例 #19
0
ファイル: chinese_project-3.py プロジェクト: 211706127/--2
def println(name):
    global var
    rule = r'“(.*?)”'
    try:
        print("\n")
        print(var[name])
    except:
        print("\n")
        print(re.find(rule))
コード例 #20
0
ファイル: __init__.py プロジェクト: kaeza/KaeChat
def is_valid_channel(name):
    """Returns whether NAME is a valid channel name, that is, it starts with
    any of '#', '&', '+', or '!', and does not contain NUL, BEL, CR, LF, ' ',
    ',', or ':'.
    """
    return (
      (name[0] in CHANNEL_PREFIXES)
      and (re.find('[\0\7\r\n ,:]', name) == -1)
    )
コード例 #21
0
def gerarAfndGramatica(afnd, gramatica, alfabeto):  #cria o afnd das gramaticas
    if not afnd:
        afnd.update({0: {}})
    aTemp = {}
    mpRgs = {}
    for regra in gramatica:
        simbolos = find(r'(\w*<\w+>|\w+|&)', regra)
        if simbolos[0] in mpRgs.keys(
        ):  # Verifica se a regra já foi criada e armazena no mapa de regras
            iRg = mpRgs[simbolos[0]]  # iRg armazena o índice da regra
        else:
            iRg = len(aTemp)
            aTemp.update({iRg: {}})
            mpRgs.update({simbolos[0]: iRg})
        for simbolo in simbolos[1:]:
            term = find(r'^\w+', simbolo)
            nTerm = find(r'<\w+>', simbolo)
            term = '&' if not term else term[0]
            if term not in alfabeto:
                alfabeto.append(term)
            if not nTerm:  # produção sem não terminal, gera uma regra terminal
                rg = aTemp[iRg]
                if term in rg.keys():
                    rg[term].append(len(aTemp))
                else:
                    rg.update({term: [len(aTemp)]})
                aTemp.update({len(aTemp): {'*': [1]}})
            else:
                nTerm = nTerm[0]
                if nTerm in mpRgs.keys():
                    rg = mpRgs[nTerm]
                else:
                    rg = len(aTemp)
                    mpRgs.update({nTerm: rg})
                    aTemp.update({rg: {}})
                mp = aTemp[iRg]
                if term in mp.keys():
                    mp[term].append(rg)
                else:
                    mp.update({term: [rg]})

    unirAutomatos(afnd,
                  aTemp)  #mescla os automatos, compartilhand o simbolo inicial
コード例 #22
0
def get_attributes(tag):
    keys = re.findall(' ([\w-]+?)=', tag)
    result = {}
    for key in keys:
        value = re.find('%s="(.*?)"' % key, tag)
        if value is None:
            print('cannot find attribute "%s" in %s' % (key, tag))
            continue
        result[key] = value
    check_attributes(keys, tag)
    return result
コード例 #23
0
ファイル: bot.py プロジェクト: raghu3111x/F.R.I.D.A.Y
def usd_to_inr():
    from bs4 import BeautifulSoup
    import requests
    import re

    amount = re.find(r'(\d)+', x)
    url = 'https://www.xe.com/currencyconverter/convert/?Amount=' + amount + '&From=USD&To=INR'
    html_text = requests.get(url).text
    soup = BeautifulSoup(html_text, 'lxml')
    value = soup.find('p', class_='result__BigRate-sc-1bsijpp-1 iGrAod').text
    print_and_say(value)
コード例 #24
0
ファイル: pureSXLinux.py プロジェクト: badstranger/Special
def getIP():
    try:
        res=urllib2.urlopen('http://whois.pconline.com.cn/ipJson.jsp',timeout=2)
    except:
        return None
    if res.getcode()!=200:
        return None
    re=res.read().decode('gbk').encode('utf8')
    res.close()
    re=re[re.rfind('{'):re.find('}')+1]
    return json.loads(re)
コード例 #25
0
ファイル: sxbase.py プロジェクト: aircyan/sx_pi
 def getIP(self):
     try:
         res=urllib2.urlopen('http://whois.pconline.com.cn/ipJson.jsp',timeout=2000)
     except:
         return None
     if res.getcode()!=200:
         return None
     re=res.read().decode('gbk').encode('utf8')
     res.close()
     re=re[re.rfind('{'):re.find('}')+1]
     return json.loads(re)
コード例 #26
0
 def get_dependencies(self):
     dependencies = []
     dependencies_in_angular_quotes = []
     dependencies_in_angular_braces = re.findall('\<(.*?)\>',
                                                 self.raw_content)
     print(self.raw_content)
     for line in self.raw_content.split("\n"):
         if line[0] == '#':
             dependencies_in_angular_quotes.append = re.find(
                 r'"([^"]*)"', line)
     self.dependencies = dependencies_in_angular_braces + dependencies_in_angular_quotes
コード例 #27
0
ファイル: Banner.py プロジェクト: white111/CMtestpy
def fill(self, char):
    if not self: return self[FILL]
    if re.find("reset", char.lower):
        del self[FILL]
        self[_CHANGE]
        return undef

    char = char[0:1]  # only one character allowed for fill value.
    re.sub("[^\x20-\x7f]+", "", char)
    if char: self[FILL] = char
    return self[FILL]
コード例 #28
0
ファイル: homework4.py プロジェクト: sarnthil/python
def matching_one(regexes, string):
    '''Returns True if and only if one of the given regexes (a list) matches the
    string. For example, if regexes is ['abc','foo'] and the string is 'blabcar',
    it should return True, but not if regexes is ['lalala','foo'] and the string
    is 'blabcar'.

    3/3 points
    '''
    for item in regexes:
        if re.find(item, string):
            return True
    return False
コード例 #29
0
ファイル: jmdict.py プロジェクト: maxigaz/jiten
def parse_jmdict(file=JMDICT_FILE):  # {{{1
    alang = "{http://www.w3.org/XML/1998/namespace}lang"
    data = []
    with gzip.open(file) as f:
        with click.progressbar(ET.parse(f).getroot(),
                               width=0,
                               label="parsing jmdict") as bar:
            for e in bar:
                seq, pos = int(e.find("ent_seq").text), ()
                kanji, reading, sense = [], [], []
                for ke in e.findall("k_ele"):  # 0+ kanji elem
                    keb = ke.find("keb").text.strip()  # word/phrase w/ kanji
                    info = tuple(x.text.strip() for x in ke.findall("ke_inf"))
                    assert all("\n" not in x and "\x1e" not in x for x in info)
                    kanji.append(
                        Kanji(keb, _kanji_chars(keb), info, _prio_k(ke)))
                for re in e.findall("r_ele"):  # 1+ reading elem
                    reb = re.find("reb").text.strip()  # reading elem
                    restr = tuple(x.text.strip()
                                  for x in re.findall("re_restr"))
                    # reading only applies to keb subset
                    info = tuple(x.text.strip() for x in re.findall("re_inf"))
                    assert all("\n" not in x and "\x1e" not in x
                               for xs in [restr, info] for x in xs)
                    reading.append(Reading(reb, restr, info, _prio_r(re)))
                for se in e.findall("sense"):  # 1+ sense elem
                    pos = tuple(x.text.strip()
                                for x in se.findall("pos")) or pos
                    # part of speech, applies to following senses too
                    lang, gloss = None, []
                    for x in se.findall("gloss"):
                        l = x.get(alang, "eng")
                        if l in LANGS and x.text:
                            assert lang is None or lang == l
                            lang = l
                            gloss.append(x.text.strip())
                    if lang is None: continue
                    s_inf = tuple(x.text.strip() for x in se.findall("s_inf"))
                    misc = tuple(x.text.strip() for x in se.findall("misc"))
                    xref = tuple(y.strip() for x in se.findall("xref")
                                 for y in x.text.split("・")
                                 if not y.strip().isdigit())
                    assert seq < MAXSEQ
                    assert all("\n" not in x and "\x1e" not in x
                               for xs in [pos, gloss, s_inf, misc, xref]
                               for x in xs)
                    sense.append(
                        Sense(pos, lang, tuple(gloss), s_inf + misc, xref))
                krs = (tuple(x) for x in [kanji, reading, sense])
                jlpt = jlpt_level(kanji, reading, _usukana(sense))
                data.append(Entry(seq, jlpt, *krs))
            return data
コード例 #30
0
def check_attributes(attributes, tag):
    check = tag
    for key in attributes:
        attr = re.find('%s=".*?"' % key, tag)
        check = check.replace(attr, '')
    check = check.replace('<img', '')
    check = check.replace('/>', '')
    check = check.replace('>', '')
    check = check.replace(' ', '')
    if len(check) > 0:
        print('tag has unexpected attributes: %s\n%s\n%s' %
              (attributes, check, tag))
        raise Exception('unexpected attributes')
コード例 #31
0
ファイル: homework4.py プロジェクト: sarnthil/python
def grep(filename, string):
    '''Returns all lines that match a given string. Interpret the string as a
    regular expression, so e.g. if the string is "foo[bp]ar" it should return
    all those lines that contain the words foobar and/or foopar.

    It shouldn't return the lines altogether in a list, but rather one by one
    using yield.

    5/5 points
    '''
    with open(filename) as f:
        for line in f:
            if re.find(string, line):
                yield line
コード例 #32
0
 def sanitize_token(self, token, strip_tokens=False):
     if token["type"] in (tokenTypes["StartTag"], tokenTypes["EndTag"], 
                          tokenTypes["EmptyTag"]):
         if token["name"] in self.allowed_elements:
             if token.has_key("data"):
                 attrs = dict([(name,val) for name,val in
                               token["data"][::-1] 
                               if name in self.allowed_attributes])
                 for attr in self.attr_val_is_uri:
                     if not attrs.has_key(attr):
                         continue
                     val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                            unescape(attrs[attr])).lower()
                     if (re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped) and
                         (val_unescaped.split(':')[0] not in 
                          self.allowed_protocols)):
                         del attrs[attr]
                 for attr in self.svg_attr_val_allows_ref:
                     if attr in attrs:
                         attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                              ' ',
                                              unescape(attrs[attr]))
                 if (token["name"] in self.svg_allow_local_href and
                     'xlink:href' in attrs and re.find('^\s*[^#\s].*',
                                                       attrs['xlink:href'])):
                     del attrs['xlink:href']
                 if attrs.has_key('style'):
                     attrs['style'] = self.sanitize_css(attrs['style'])
                 token["data"] = [[name,val] for name,val in attrs.items()]
             return token
         else:
             if strip_tokens:
                 return None
             if token["type"] == tokenTypes["EndTag"]:
                 token["data"] = "</%s>" % token["name"]
             elif token["data"]:
                 attrs = ''.join([' %s="%s"' % (k,escape(v)) for k,v in token["data"]])
                 token["data"] = "<%s%s>" % (token["name"],attrs)
             else:
                 token["data"] = "<%s>" % token["name"]
             if token["type"] == tokenTypes["EmptyTag"]:
                 token["data"]=token["data"][:-1] + "/>"
             token["type"] = tokenTypes["Characters"]
             del token["name"]
             return token
     elif token["type"] == tokenTypes["Comment"]:
         pass
     else:
         return token
コード例 #33
0
ファイル: sanitizer.py プロジェクト: glebourgeois/Pywemil
 def sanitize_token(self, token):
     if token["type"] in (tokenTypes["StartTag"], tokenTypes["EndTag"],
                          tokenTypes["EmptyTag"]):
         if token["name"] in self.allowed_elements:
             if token.has_key("data"):
                 attrs = dict([(name, val)
                               for name, val in token["data"][::-1]
                               if name in self.allowed_attributes])
                 for attr in self.attr_val_is_uri:
                     if not attrs.has_key(attr):
                         continue
                     val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                            unescape(attrs[attr])).lower()
                     if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped)
                             and (val_unescaped.split(':')[0]
                                  not in self.allowed_protocols)):
                         del attrs[attr]
                 for attr in self.svg_attr_val_allows_ref:
                     if attr in attrs:
                         attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                              ' ', unescape(attrs[attr]))
                 if (token["name"] in self.svg_allow_local_href
                         and 'xlink:href' in attrs
                         and re.find('^\s*[^#\s].*', attrs['xlink:href'])):
                     del attrs['xlink:href']
                 if attrs.has_key('style'):
                     attrs['style'] = self.sanitize_css(attrs['style'])
                 token["data"] = [[name, val]
                                  for name, val in attrs.items()]
             return token
         else:
             if token["type"] == tokenTypes["EndTag"]:
                 token["data"] = "</%s>" % token["name"]
             elif token["data"]:
                 attrs = ''.join([
                     ' %s="%s"' % (k, escape(v)) for k, v in token["data"]
                 ])
                 token["data"] = "<%s%s>" % (token["name"], attrs)
             else:
                 token["data"] = "<%s>" % token["name"]
             if token["type"] == tokenTypes["EmptyTag"]:
                 token["data"] = token["data"][:-1] + "/>"
             token["type"] = tokenTypes["Characters"]
             del token["name"]
             return token
     elif token["type"] == tokenTypes["Comment"]:
         pass
     else:
         return token
コード例 #34
0
def _get_conflict_file_name(outfile):
    """
    """
    outdir = '/'.join(outfile.split('/')[:-1])
    outfilename = outfile.split('/')[-1]
    filelist = os.listdir(outdir)
    previous_conflicts = [filename.split('.')[2] for filename in filelist 
                          if filename.startswith(outfilename) and len(filename.split('.')) == 3]
    if previous_conflicts:
        max_conflict = max([int(re.find('[0-9]{3}',conflict)[0]) 
                            for conflict in previous_conflicts])
    else:
        max_conflict = -1
    
    return '{}.conflict_{:03.0f}'.format(outfile,max_conflict+1)
コード例 #35
0
def capo(hope_comments_count):
    url_capogames = "http://www.capogames.net/samw/board/board.do"
    souping_capogames = souping(url_capogames)

    for row in souping_capogames.find_all('tr'):
        row_count = row.find('span').contents[0]
        reply_count = re.search(r'wd+', row_count).group(0)
        if int(reply_count) >hope_comments_count:
            site_name = 'samw'
            number = row.find('td').contents[0].contents[0]
            link = 'http://www.capogames.net/samw/'+row.find('td').a.get('href')
            title = row.find('td').a.contents[0]
            name = row.find('td').a.contents[0]
            raw_date = re.find('td').contents[0]
            best_article_list([site_name, number, title, link, reply_count])
コード例 #36
0
ファイル: SanitizeHTML.py プロジェクト: mcyph/char_data
def sanitize_html(tag_name, D, SElms=SElms, SAttr=SAttr, SProtocols=SURITypes):
    """
    tag_name -> The tag's name
    D -> The tag's attributes dict
    DElms -> The allowed elements
    DAttr -> The allowed attributes
    DProtocols -> The allowed protocols (see Tags.DURITypes)
    """

    tag_name = tag_name.lower()  # HACK!
    if tag_name in SElms:
        for k in list(D.keys()):
            # Delete unallowed attributes
            if not k in SAttr:
                del D[k]

        for attr in SURIAttrs:
            # Validate URLs using REs
            if not attr in D:
                continue

            val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                   unescape(D[attr])).lower()

            if re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped) and \
                (val_unescaped.split(':')[0] not in SURITypes):
                del D[attr]

        for attr in svg_attr_val_allows_ref:
            # SVG something something...
            if attr in D:
                D[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', ' ',
                                 unescape(D[attr]))

        if (tag_name in svg_allow_local_href and 'xlink:href' in D
                and re.find('^\s*[^#\s].*', D['xlink:href'])):
            # ???
            # Disable SVG links?
            del D['xlink:href']

        if 'style' in D and D['style']:
            # Sanitize the CSS
            D['style'] = sanitize_css(D['style'])
        return tag_name, D

    else:
        # Don't allow!
        return None, None
コード例 #37
0
def clean(t_list, comm_trig=False):  # cleaning up the name html.
    list_ = pd.DataFrame()
    command = []
    for i in t_list:
        try:
            list_.append(i.get('data-name'))
            if i.get('data-name') == None:
                #.find('a', attr = 'data-name'))
                command.append(i.get(re.find('a', 'href="/mtg-card/')))

        except:  # need to come p with conditional to define the commander..
            #command.append(i.get('href'))
            print(
                'if there are more than 3-4 of these ther is a problem here!!!'
            )
    return command + list_  # remember partner commanders, need to add cleanfor that.
コード例 #38
0
    def countNumDrugWords(self, element):

        count = 0
        element["verified_drug_words"] = []

        # get all sorted offsets
        # merge offsets that overlap
        overlaped_offsets = []
        for off in element["offsets"]:
            if len(overlaped_offsets) == 0:
                overlaped_offsets.append(off)
            else:
                i1 = off[0]
                i2 = off[1]

                if i2 <= overlaped_offsets[-1][1]:
                    # as the offsets are sorted by i1,
                    # we could skip this offsets as it is contained in the previous one
                    pass

                elif i2 > overlaped_offsets[-1][1] and i1 < overlaped_offsets[
                        -1][1]:
                    # this one overlaps but includes more words after it
                    # so we should increment i2 of the previous offset to current i2
                    overlaped_offsets[-1][1] = i2

                else:
                    overlaped_offsets.append(off)

        for groupoffset in overlaped_offsets:
            if len(groupoffset) == 2:
                i1 = groupoffset[0]
                i2 = groupoffset[1]
                fragment = element["text_original"][i1:i2 + 1]
                fwords = self.tokenize(fragment)
                element["verified_drug_words"].extend(fwords)
                count += sum([1 for word in fwords])
            else:
                i1 = int(groupoffset[0])
                i2 = i1 + int(re.find(r'\b', element["text_original"][i1:]))
                if i2 > i1:
                    fragment = element["text_original"][i1:i2 + 1]
                    fwords = self.tokenize(fragment)
                    element["verified_drug_words"].extend(fwords)
                    count += sum([1 for word in fwords])

        return count
コード例 #39
0
 def countNumDrugWords(self, element):
     
     count = 0
     element["verified_drug_words"] = []
     
     # get all sorted offsets
     # merge offsets that overlap
     overlaped_offsets = []
     for off in element["offsets"]:
         if len(overlaped_offsets)==0:
             overlaped_offsets.append(off)
         else:
             i1 = off[0]
             i2 = off[1]
             
             if i2<= overlaped_offsets[-1][1]:
                 # as the offsets are sorted by i1,
                 # we could skip this offsets as it is contained in the previous one
                 pass
             
             elif i2> overlaped_offsets[-1][1] and i1< overlaped_offsets[-1][1]:
                 # this one overlaps but includes more words after it
                 # so we should increment i2 of the previous offset to current i2
                 overlaped_offsets[-1][1] = i2
             
             else:
                 overlaped_offsets.append(off)
             
     for groupoffset in overlaped_offsets:
         if len(groupoffset)==2:
             i1 = groupoffset[0]
             i2 = groupoffset[1]
             fragment = element["text_original"][i1:i2+1]
             fwords =self.tokenize(fragment)
             element["verified_drug_words"].extend(fwords)
             count += sum([1 for word in fwords]) 
         else:
             i1 = int(groupoffset[0])
             i2 = i1 + int(re.find(r'\b',element["text_original"][i1:]))
             if i2>i1:
                 fragment = element["text_original"][i1:i2+1]
                 fwords = self.tokenize(fragment)
                 element["verified_drug_words"].extend(fwords)
                 count += sum([1 for word in fwords]) 
     
     return count
コード例 #40
0
ファイル: routing.py プロジェクト: gbour/Mother
def fromurl(url):
    """Return app/class/method/function pointed by an url

		1. raise ValueError if url is external
		2. return None      if url does not match any callable app/class/...

		else return target item
	"""
    if re.find("[^\w/-+#]", url):
        raise ValueError

    target = None
    for part in url.split("/"):
        pass
        # aaa/bb/cc/dd

    return
コード例 #41
0
ファイル: permissionbi.py プロジェクト: hephaestus9/Ironworks
    def updatePermissions(self, permissions):
        for p in permissions:
            obj = {}
            obj["permission_k"] = p["permission_k"]
            obj["date_created"] = datetime.datetime.now().strftime(
                "%Y-%m-%d %H:%M:%S")

            # //remove this permission from all roles
            roles = self.role.getByPermissions(p["permission_k"])
            for role in roles:
                self.permission.deleteRolePermissions(role)

            #//add this permission to each role
            for key in p.keys():
                match = re.find("/^role_/", key)
                if match:
                    obj["role_k"] = key[5:]
                    obj["value"] = p[key]
                    self.permissions.addRolePermissions(obj)

        return {"success": True, "message": "Permissions successfully saved"}
コード例 #42
0
ファイル: permissionbi.py プロジェクト: hephaestus9/Ironworks
    def updatePermissions(self, permissions):
        for p in permissions:
            obj = {}
            obj["permission_k"] = p["permission_k"]
            obj["date_created"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

            # //remove this permission from all roles
            roles = self.role.getByPermissions(p["permission_k"])
            for role in roles:
                self.permission.deleteRolePermissions(role)

            #//add this permission to each role
            for key in p.keys():
                match = re.find("/^role_/", key)
                if match:
                    obj["role_k"] = key[5:]
                    obj["value"] = p[key]
                    self.permissions.addRolePermissions(obj)

        return {"success": True,
                "message": "Permissions successfully saved"}
コード例 #43
0
def fetch(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines:
            line = line.strip()
            # print(line)
            a, b = line.split(':')
            anchors.append((a, b))
    txt = '| 主播 | 标题 | 状态 | 订阅 |\n|:---:|:---:|:---:|:---:|\n'
    for suffix, anchor in anchors:
        # time.sleep(100)
        print('watching:', anchor)
        try:
            r = requests.get(huya_url + suffix, timeout=3)
            html = r.content.decode('utf-8')
            newaddr = re.find(r'更换为.+href="https://www.huya.com/(.+)"', html)
            print(newaddr)
            if newaddr:
                print('NEW:', anchor, suffix, '->', newaddr)
                anchors.append((newaddr, anchor))
                continue
            title = re.findall(r'<h1 id="J_roomTitle">(.+)</h1>', html)[0]
            status = re.findall(r'id="live-count">(.+?)</em></span>', html)
            fans = re.findall(r'id="activityCount">(\d+)</div>', html)[0]
            last_live = '未直播'
            if status and status[0]:
                last_live = status[0]
        except:
            print('ERROR:' + huya_url + suffix)
        else:
            txt += ('|' + anchor + '|' + title + '|' + last_live + '|' + fans +
                    '|\n')
    print(txt)
    return
    if send_msg('主播直播状态', txt):
        print('wechat message push success.')
    else:
        print('wechat message push failed.')
コード例 #44
0
ファイル: utils.py プロジェクト: acmewebservices/openstates
def get_session_details(s):
    url = 'http://www.azleg.gov/xml/sessions.asp'
    with s.urlopen(url) as page:
        root = etree.fromstring(page)
        session_file = open('session_details.py', 'w')
        detail = """
                 '%s':
                    {'type': '%s', 'session_id': %s,
                     'start_date': datetime.date(%s),
                     'end_date': datetime.date(%s)},
                 """
        for session in root.xpath('//session'):
            session_type = 'primary' if re.find('Regular', session.get('Session_Full_Name')) else 'special'
            start_date = datetime.datetime.strptime(
                                              session.get('Session_Start_Date'),
                                              '%Y-%m-%dT%H:%M:%S')
            end_date = datetime.datetime.strptime(session.get('Sine_Die_Day'),
                                                  '%Y-%m-%dT%H:%M:%S')
            session_file.write(detail % ( session.get('Session_Full_Name'),
                                           session_type,
                                           session.get('Session_ID'),
                                           start_date,
                                           end_date))
コード例 #45
0
def replace_image_tag(html, path=None):
    tags = re.findall('<img.+?src="/img.+?>', html)
    for tag in tags:
        image = re.find('src="/img/(.+?)"', tag)
        if image is None:
            print('image path not found in tag: "%s" in %s' % (tag, path))
            continue
        imagepaths.append(image)
        attributes = get_attributes(tag)
        attributes.pop('src')
        if len(attributes) > 0:
            # http://railsdoc.com/references/image_tag
            # <img src="/img/hoge.png" alt="hogera"> -> <%= image_tag("hoge.png", alt: "hogera") %>
            attributes = ', '.join(
                ['%s: "%s"' % o for o in attributes.items()])
            replaced = '<%%= image_tag("%s", %s) %%>' % (image, attributes)
        else:
            replaced = '<%%= image_tag("%s") %%>' % (image)
        if is_dry:
            print('"%s" -> "%s"' % (tag, replaced))
            continue
        html = html.replace(tag, replaced)
    return html
コード例 #46
0
def dict_to_hstore(python_dict):
    """ 
    There's an implementation of this here ( HstoreAdapter ) https://github.com/psycopg/psycopg2/blob/master/lib/extras.py
    but the comments say that it is "painfully inefficient!"
    
    """
    hstore = StringIO()
    first_row = True
    for key in python_dict:
        ## Prune the hash--if it's empty
        value = python_dict[key]
        if value:
            if not first_row:
                hstore.write("|")
            else:
                first_row=False
            # don't allow quotes within the value. Not sure if we should check this here. 
            value = value.replace('"','')
            hstore.write("\"%s\"=>\"%s\"" % (key, value)
    return hstore.getvalue()
    

## If we were using a comma as a delimiter, we'd need to use a regex to allow for the possibility that a comma was inside the commas--so use the below. But we don't have to do that if we use the bar as delimiter. We already clean bars out in utf8clean.

# key_pair_re = re.compile('"(.+?)"=>"(.+?)"(?:,|$)')
key_pair_re = re.compile('"(.+?)"=>"(.+?)"')
def hstore_to_dict(text_string):
    return_dict = {}
    keypairs = text_string.split("|")
    for keypair in keypairs:
        keygroups = re.find(key_pair_re, keypair)
        return_dict[keygroups[1]]=keygroups[2]
    return return_dict
    
    
    
    
コード例 #47
0
    def BIOtagSingleOffset_NoOverlap(self, element, text2, i1, i2, last_offset):
        
        # before drug words start
        before_part = text2[last_offset:i1]
        element["text_splits"].append(before_part)

        # drug words
        element["text_splits"].append(self.BItagWSpace)

        # now add the next word only? or what?

        i2 = re.find(r'\b', text2[i1:])

        if i2>-1:

            entity_text = text2[i1:]
            element["text_splits"].append(entity_text)
            #entity_text2 = self.BIOTagWord(entity_text)
            #deviation += len(entity_text2) - len(entity_text)
            element["text_splits"].append(self.BIOtagWSpace)

            last_offset = i1 + len(entity_text) + 1
            
        return last_offset
コード例 #48
0
Copyright (c) 2014 Beckersweet. All rights reserved.
"""

from commands import getoutput as command
from json import loads as decodeJSON
from json import dumps as encodeJSON
from mininet.cli import CLI
from mininet.net import Mininet
from mininet.node import Node, RemoteController, CPULimitedHost
from mininet.util import pmonitor
import pp
from re import findall as find

ifconfig = command('ifconfig')
try:
	localIp = find('addr:(192\.168\.56\.\d+) ', ifconfig)[0]
except:
	print "Network settings not configured. Try running 'sudo dhclient eth1'."

NETWORK_CONTROLLER_PORT = 6633
NUMBER_OF_HOSTS = 3
TCP_REQUEST_COMMAND = "python tcpRequest.py " + localIp + " 9999 "
JOB_SERVER_COMMAND = "sudo python dynamic_ncpus.py "
BENCHMARK_RESULTS_FILE_NAME = "OpMub_benchmarking.out"

print
print "Creating network:"

virtualNetwork = Mininet(controller=RemoteController,
							   host=CPULimitedHost,
							  build=False)
コード例 #49
0
ファイル: subtitles.py プロジェクト: mekza/moviepy
    def match_expr(self, expr):

        return SubtitlesClip([e for e in self.subtitles
                              if re.find(expr, e) != []])
コード例 #50
0
ファイル: __init__.py プロジェクト: kaeza/KaeChat
def is_valid_nickname(name):
    """Returns whether NAME is a valid nickname, that is, it contains only
    letters, numbers, '_', '[', ']', '{', '}', '\', '|', '`', or '^'.
    """
    return (re.find(r'[^A-Za-z0-9_\[\]\{\}\\\|\`\^]', name) == -1)
コード例 #51
0
ファイル: qconnect.py プロジェクト: MikeDacre/qconnect
def check_queue(uid):
    """ Check the queue for any uid string, return job list with running
        node information. """
    from re import compile as mkregex

    qstat = rn(['qstat', '-u', uid, '-n', '-1']).decode('utf8').rstrip().split('\n')[5:]

    # If there are no job return nothing
    if not qstat:
        return

    jobs = {}
    for i in qstat:
        f = s(r' +', i.rstrip())

        # Only look at jobs in the interactive queue
        if not f[2] == short_queue_name:
            continue

        # Skip completed jobs
        if f[9] == 'C':
            continue

        # Get node name, if there is one
        if f[11] == '--':
            node = ''
        else:
            nodes = set(find(r'node[0-9][0-9]', f[11]))
            if len(nodes) > 1:
                continue
            node = str(list(nodes)[0])

        # Get job number
        job_id = find(r'[0-9]+', f[0])[0]

        # Now that we have a limited job set, use qstat -f to get the
        # complete job and queue name
        find_queue = mkregex(r'queue = (.*)$')
        find_name  = mkregex(r'Job_Name = (.*)$')

        for i in subprocess.check_output(['qstat', '-f', job_id]).decode().rstrip().split('\n'):
            # Get Queue Name
            if find_queue.search(i):
                try:
                    queue = find_queue.findall(i)[0]
                except IndexError:
                    # Queue parsing failed, report this and continue
                    print("Failed to parse queue for job number:{:^3}\nskipping".format(job_id), file=stderr)
                    continue
                if not queue == interactive_queue:
                    continue
            elif find_name.search(i):
                try:
                    names = find_name.findall(i)[0].split('_')
                except IndexError:
                    # Queue parsing failed, report this and continue
                    print("Failed to parse queue for job number:{:^3}\nskipping".format(job_id), file=stderr)
                    continue

        # Check that this is actually one of our jobs
        identifier = '_'.join(names[-2:])
        if identifier == 'int_tmux':
            type = 'tmux'
        elif identifier == 'int_vnc':
            type = 'vnc'
        elif identifier == 'int_gui':
            type = 'gui'
        else:
            continue

        # Fix queue name
        name = '_'.join(names[:-2])
        name = name if name else type

        # Assemble the dictionary
        jobs[job_id] = {'queue'    : queue,
                        'job_name' : name,
                        'type'     : type,
                        'node'     : node,
                        'state'    : f[9]}

    # Sort the dictionary
    jobs = OrderedDict(sorted(jobs.items()))

    return(jobs)
コード例 #52
0
ファイル: runAll.py プロジェクト: astephens4/Iriss
#!/usr/bin/python27

import os;
import re;
from subprocess import call;

dirStruct = os.walk("../../raspi/LineAnalysis/testImages");

for roots, dirs, files in dirStruct :
    for fname in files :
        if re.find("jpg", fname) != None :
            ret = call(["./stats"], ["-i"], ["
コード例 #53
0
ファイル: client.py プロジェクト: usbuild/distmem
            #list类型以,为分隔符,对于字符中出现的,应使用\,转义
            data_arr = []
            for d in data:
                symbol = getFlag(d)
                if d is None:
                    print 'Unknown Value'
                    continue
                data_arr.append(symbol + str(d).replace('\\', '\\\\').replace(',', '\,'))
            data_str = ','.join(data_arr)
            request += "$" + str(len(data_str) + 1) + "\r\n"
            request += getFlag(data) + data_str + "\r\n"
        else:
            print pred('Unknown Value')
            continue

    sock.send(request + "\n")
    time.sleep(0.05)
    re = sock.recv(2048)
    if re[0] in ('+', '-'):
        print re[1:],
    elif re[0] == '$':
        resp_len = int(re[1:re.find("\r\n")])
        if resp_len == -1:
            print pred("Not Found")
            continue
        data_start = re.find("\r\n") + 2;
        data = re[data_start:data_start + resp_len]
        data = parseCMD(data)
        print "(" + pyellow(type(data).__name__) + ")", pgreen(data)
sock.close()
コード例 #54
0
ファイル: re1.py プロジェクト: borkarfaiz/python
def demo_bad_catch():
	try:
		var = input("Enter variable name")
		if re.find(reg, var):
			print('The input is valid')
	except ValueError as e:
コード例 #55
0
ファイル: qconnect.py プロジェクト: MikeDacre/qconnect
def attach_job(job_id, attempt_gui=False):
    """ Attach to a currently running job, default is tmux.
        To attach to a GUI running in tmux, pass attempt_gui """

    # Get details
    job_list = check_queue(uid)
    try:
        node  = job_list[job_id]['node']
        type  = job_list[job_id]['type']
        state = job_list[job_id]['state']
    except KeyError:
        print("Sorry, that job number doesn't exist. Please try again")
        print_jobs(job_list)
        sys.exit(1)

    if not state == 'R':
        print("Job not running, cannot attach")
        return

    if type == 'gui' or attempt_gui:
        # Confirm GUI Possible
        if not xpra_installed:
            print("It appears that xpra is not in your PATH, I cannot run GUI jobs", file=stderr)
            print("Exiting", file=stderr)
            sys.exit(-1)

        # Display xpra instructions
        print("You MUST NOT close your program by closing the window unless you want to")
        print("terminate your session\n")
        print("To preserve your session, you need to Ctrl-C in the command line, not close")
        print("the window\n")
        sleep(1)

        # Actually attach to the session!
        subprocess.call(['xpra', 'attach', 'ssh:' + uid + '@' + node + ':' + job_id])
        return

    elif type == 'tmux':
        # Do not attach if running from within a tmux session already
        if rn('echo $TMUX', shell=True).decode().rstrip():
            print("You are already running a tmux session, sessions should be nested with care")
            print("To force run, unset the $TMUX variable, but I suggest you just detatch your")
            print("current session and try the same command again")
            return

        # Attempt to initially attach to xpra, fail gracefully without
        # notifying user
        if xpra_installed:
            GUI_PID=''
            if subprocess.call("xpra attach ssh:" + uid + "@" + node + ":" + job_id + " >/dev/null 2>/dev/null &", shell=True) == 0:
                GUI_PID = subprocess.check_output('ps axo pid,user,cmd | grep "xpra attach" | grep "' + job_id + '$"| awk \'{print $1}\'', shell=True).decode().rstrip()

        # Actually attach to the session!
        job_string = ' '.join(['ssh', node, '-t', 'DISPLAY=:' + job_id, 'tmux', 'a', '-t', job_id])
        subprocess.call(job_string, shell=True)

        # Kill GUI if open
        if xpra_installed and GUI_PID:
            subprocess.call(['kill', GUI_PID])

    elif type == 'vnc':
        # Check that vnc can run
        if not vnc_installed:
            print("It appears that vncviewer is not in your PATH, I cannot run connect to a VNC session", file=stderr)
            print("Exiting", file=stderr)
            sys.exit(-1)

        # Get VNC Port
        ports = []
        files = subprocess.check_output('ssh ' + node + ' "ls $HOME/.vnc"', shell=True).decode().rstrip().split('\n')
        for i in files:
            if i.startswith(node) and i.endswith('pid'):
                    port = find(r':([0-9]+)\.pid', i)[0]
                    ports.append(port)

        if not ports:
            print("It appears no VNC servers are running on the selected server.")
            print("If the job is still running in the queue, there is a problem.")
            print("Try clearing out the *.log and *.pid files in $HOME/.vnc, and killing")
            print("the running VNC queue job")
            return

        if len(ports) > 1:
            print("There is more than one vnc server running for you on that node.")
            print("That isn't allowed and I don't know which one to join. It may")
            print("be that your last session exited without cleaning $HOME/.vnc")
            print("Check in there and clean out log files for vnc servers that")
            print("aren't running to prevent problems")
            return

        subprocess.call(['vncviewer', node + ':' + ports[0]])
        return

    else:
        print("I don't understand the job type")
        return
コード例 #56
0
		return ''

def get_video_img_info(html):
	#find site Name
	site_url='http://'
	url_p = '^http://v.qq.com/.+?tm|^/cover/'
	sub_url_p = '^/'
	video_with_img = []
	finded = re.find('qq.com',html)
	if finded:
		site_url = site_url+'v.qq.com/'
		url_p = '^http://v.qq.com/.+tm|^/cover/'
	elif (finded = re.find('youku.com',html)):
		site_url = site_url+'v.youku.com'
		url_p = '^http://v.youku.com/.+?_show'
	elif (finded = re.find('tudou.com',html)):
		site_url = site_url+'v.tudou.com'
		url_p = 'http://v.tudou.com/'
	else:
		print 'error not support yet...'
		return []	

	soup = BeautifulSoup(html)
	all_img=soup.select("a > img")
	for img in all_img:
		img_attrs = img.attrs
		for attr in img_attrs:			
			if attr == 'src' or attr == '_src':
				imgurl=img[attr]
			if attr == 'alt':
				imgalt=img[attr]			
コード例 #57
0
from sys import argv
import re

script, directory, csv = argv

identified = open(csv).read() # 从整理后的csv生成鉴定到的queries列表

identified_peaks = []
for line in open(csv):
    identified_peaks.append(line[:-1])
print('total identifed peaks: ', len(identified_peaks))

output = open('identfied.mgf','w') # add new name for new mgf file

for mgf in os.listdir(directory):
    if not re.find('mgf', mgf):
        continue

    s = open(mgf).read()

    queries = {}

    match = re.compile('BEGIN IONS.*?END IONS\n', re.DOTALL)

    peak_list = re.findall(match, s) #生成queries的列表

    print('total MS/MS spectra: ', len(peak_list))

    for query in peak_list:
        title = re.search('TITLE=.*? ', query) # 从表中提取title行
        #print(title.group()[6:])
コード例 #58
0
ファイル: hlir.py プロジェクト: zjuan22/general
def hstack(name):
    return re.find(r'\[([0-9]+)\]', name)
コード例 #59
0
ファイル: qconnect.py プロジェクト: MikeDacre/qconnect
def create_job(cores=default_cores, mem='', gui='', name='', vnc=False):
    """ Create a job in the queue, wait for it to run, and then attach
        Ctl-C after submission will not kill job, it will only kill attach
        queue """

    # Figure out memory request
    try:
        mem = str(int(cores*default_max_mem/default_max_cores)) + 'GB' if not mem else str(int(mem)) + 'GB'
    except ValueError:
        print("Incorrect formatting for memory request, please submit an integer multiple in GB")
        sys.exit(1)

    # Create job name
    if gui:
        gui_name = gui.split(' ')[0]
        job_name = name + '_' + gui_name + '_int_gui' if name else gui_name + '_int_gui'
    elif vnc:
        job_name = name + '_int_vnc' if name else 'int_vnc'
    else:
        job_name = name + '_int_tmux' if name else 'int_tmux'

    # Prep the job
    template = "#!/bin/bash\n#PBS -S /bin/bash\n"
    template = ''.join([template, "#PBS -q ", interactive_queue,
                        "\n#PBS -N ", job_name,
                        '\n#PBS -l nodes=1:ppn=' + str(cores),
                        '\n#PBS -l mem=' + mem,
                        '\n#PBS -e ' + os.environ['HOME'] + '/.' + job_name + '.error',
                        '\n#PBS -o /dev/null'])

    if gui:
        template = template + ("\n\nexport QCONNECT=gui"
                               "\n\njob_id=$(echo $PBS_JOBID | sed 's#\..*##g')\n"
                               "xpra start :$job_id\n"
                               "export DISPLAY=:${job_id}\n"
                               "sleep 1\n" +
                               gui + "\n"
                               "PID=$!\n"
                               "sleep 1\n"
                               "while true\n"
                               "do\n"
                               "  if kill -0 $PID > /dev/null 2>&1; then\n"
                               "    sleep 5\n"
                               "  else\n"
                               "    xpra stop :${job_id}\n"
                               "    xpra list >/dev/null 2>/dev/null\n"
                               "    rm ~/.xpra/:${job_id}.log 2>/dev/null\n"
                               "    exit 0\n"
                               "  fi\n"
                               "done\n")

    elif vnc:
        if not vnc_installed:
            print("It appears that vncviewer is not in your PATH, I cannot create a VNC connection", file=stderr)
            print("Exiting", file=stderr)
            sys.exit(-1)

        template = template + ("\n\nexport QCONNECT=vnc\n\nvncserver -geometry " + vnc_geometry + " -fg\n")

    else:
        template = template + ( "\n\nexport QCONNECT=tmux"
                                "\n\nsession_id=$(echo $PBS_JOBID | sed 's#\..*##g')\n")
        if xpra_installed:
            template = template + ("if xpra start :$session_id >/dev/null 2>/dev/null; then\n"
                                   "    export DISPLAY=:$session_id\n"
                                   "fi\n")

        template = template + ( "CMD=\"tmux new-session -s $session_id -d\"\n"
                                "$CMD\n"
                                "PID=$(ps axo pid,user,cmd | grep tmux | grep $USER | grep -v grep | awk '{print $1}')\n"
                                "while true\n"
                                "do\n"
                                "  if kill -0 $PID > /dev/null 2>&1; then\n"
                                "    if [[ ! $(tmux ls | grep $session_id) ]]; then\n")
        if xpra_installed:
            template = template + ("      xpra stop :$session_id >/dev/null 2>/dev/null\n"
                                   "      xpra list >/dev/null 2>/dev/null\n"
                                   "      rm ~/.xpra/:$session_id.log 2>/dev/null\n")

        template = template + ( "      exit 0\n"
                                "    else\n"
                                "      sleep 5\n"
                                "    fi\n"
                                "  else\n")
    if xpra_installed:
        template = template + ("      xpra stop :$session_id >/dev/null 2>/dev/null\n"
                                "      xpra list >/dev/null 2>/dev/null\n"
                                "      rm ~/.xpra/:$session_id.log 2>/dev/null\n")

        template = template + ( "    exit 0\n"
                                "  fi\n"
                                "done\n")
    if debug:
        print(template)

    pbs_command = (['qsub'])

    # Submit the job
    pbs_submit = subprocess.Popen(pbs_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    pbs_submit.stdin.write(template.encode())
    pbs_submit.stdin.close()

    # Get job number
    job_no = (pbs_submit.stdout.read().decode().rstrip())
    try:
        job_no = find(r'[0-9]+', job_no)[0]
    except IndexError:
        print("PBS Submission failed with message:\n{}".format(job_no), file=stderr)
        sys.exit(1)
    print("Job", job_name, "created with job id", job_no, "\n")
    sleep(1)

    return(job_no)
コード例 #60
0
ファイル: sqlparser.py プロジェクト: moin18/sync_db_schema
 def get_table_info(self, table):
     table_data = re.find('CREATE TABLE {table_name} (.*\n?)\)\n\))'.format(table), self._file_content, re.MULTILINE)
     return table_data